當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

收藏的2个正则html标签剔除方法

發布時間：2025/7/14 编程问答 24 豆豆

生活随笔收集整理的這篇文章主要介紹了收藏的2个正则html标签剔除方法小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

方法1

public string checkStr(string html)
????? {
????????? System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
????????? html = regex1.Replace(html, ""); //過濾<script></script>標記
????????? html = regex2.Replace(html, ""); //過濾href=javascript: (<A>) 屬性
????????? html = regex3.Replace(html, " _disibledevent="); //過濾其它控件的on...事件
????????? html = regex4.Replace(html, ""); //過濾iframe
????????? html = regex5.Replace(html, ""); //過濾frameset
????????? html = regex6.Replace(html, ""); //過濾frameset
????????? html = regex7.Replace(html, ""); //過濾frameset
????????? html = regex8.Replace(html, ""); //過濾frameset
????????? html = regex9.Replace(html, "");
????????? html = html.Replace(" ", "");
????????? html = html.Replace("</strong>", "");
????????? html = html.Replace("<strong>", "");
????????? return html;
}

方法2

#region 過濾掉 html代碼
public static string StripHTML(string strHtml)
{
string [] aryReg ={
@"<script[^>]*?>.*?</script>",

@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};

string [] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1",//chr(161),
"\xa2",//chr(162),
"\xa3",//chr(163),
"\xa9",//chr(169),
"",
"\r\n",
""
};

string newReg =aryReg[0];
string strOutput=strHtml;
for(int i = 0;i<aryReg.Length;i++)
{
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(aryReg[i],System.Text.RegularExpressions.RegexOptions.IgnoreCase);
strOutput = regex.Replace(strOutput,aryRep[i]);
}
strOutput.Replace("<","");
strOutput.Replace(">","");
strOutput.Replace("\r\n","");
return strOutput;
}
#endregion

轉載于:https://www.cnblogs.com/yintian2/archive/2007/11/22/968127.html

總結

以上是生活随笔為你收集整理的收藏的2个正则html标签剔除方法的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。