生活随笔
收集整理的這篇文章主要介紹了
敏感词过滤算法实现
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
敏感詞庫下載
FilterHelper類
#region 非法關鍵字過濾 bate 1.1public class FilterHelper{public FilterHelper() { }public FilterHelper(string dictionaryPath
){this.dictionaryPath
= dictionaryPath
;}private string dictionaryPath
= string.Empty
;public string DictionaryPath
{get { return dictionaryPath
; }set { dictionaryPath
= value; }}private WordGroup
[] MEMORYLEXICON
= new WordGroup[(int)char.MaxValue
];private string sourctText
= string.Empty
;public string SourctText
{get { return sourctText
; }set { sourctText
= value; }}int cursor
= 0;int wordlenght
= 0;int nextCursor
= 0;private List
<string> illegalWords
= new List<string>();public List
<string> IllegalWords
{get { return illegalWords
; }}private bool isCHS(char character
){int charVal
= (int)character
;return (charVal
>= 0x4e00 && charVal
<= 0x9fa5);}private bool isNum(char character
){int charVal
= (int)character
;return (charVal
>= 48 && charVal
<= 57);}private bool isAlphabet(char character
){int charVal
= (int)character
;return ((charVal
>= 97 && charVal
<= 122) || (charVal
>= 65 && charVal
<= 90));}private string ToDBC(string input
){char[] c
= input
.ToCharArray();for (int i
= 0; i
< c
.Length
; i
++){if (c
[i
] == 12288){c
[i
] = (char)32;continue;}if (c
[i
] > 65280 && c
[i
] < 65375)c
[i
] = (char)(c
[i
] - 65248);}return new string(c
).ToLower();}private void LoadDictionary(){if (DictionaryPath
!= string.Empty
){List
<string> wordList
= new List<string>();Array
.Clear(MEMORYLEXICON
, 0, MEMORYLEXICON
.Length
);string[] words
= System
.IO
.File
.ReadAllLines(DictionaryPath
, System
.Text
.Encoding
.Default
);foreach (string word
in words
){string key
= this.ToDBC(word
);wordList
.Add(key
);}Comparison
<string> cmp
= delegate (string key1
, string key2
){return key1
.CompareTo(key2
);};wordList
.Sort(cmp
);for (int i
= wordList
.Count
- 1; i
> 0; i
--){if (wordList
[i
].ToString() == wordList
[i
- 1].ToString()){wordList
.RemoveAt(i
);}}foreach (var word
in wordList
){if (word
.Length
> 0){WordGroup group = MEMORYLEXICON
[(int)word
[0]];if (group == null){group = new WordGroup();MEMORYLEXICON
[(int)word
[0]] = group;}group.Add(word
.Substring(1));}}}}private bool Check(string blackWord
){wordlenght
= 0;nextCursor
= cursor
+ 1;bool found
= false;for (int i
= 0; i
< blackWord
.Length
; i
++){int offset
= 0;if (nextCursor
>= sourctText
.Length
){break;}else{for (int y
= nextCursor
; y
< sourctText
.Length
; y
++){if (!isCHS(sourctText
[y
]) && !isNum(sourctText
[y
]) && !isAlphabet(sourctText
[y
])){offset
++;if (nextCursor
+ offset
>= sourctText
.Length
) break;wordlenght
++;}else break;}if ((int)blackWord
[i
] == (int)sourctText
[nextCursor
+ offset
]){found
= true;}else{found
= false;break;}}nextCursor
= nextCursor
+ 1 + offset
;wordlenght
++;}return found
;}public string Filter(char replaceChar
){LoadDictionary();if (sourctText
!= string.Empty
){char[] tempString
= sourctText
.ToCharArray();for (int i
= 0; i
< SourctText
.Length
; i
++){WordGroup group = MEMORYLEXICON
[(int)ToDBC(SourctText
)[i
]];if (group != null){for (int z
= 0; z
< group.Count(); z
++){string word
= group.GetWord(z
);if (word
.Length
== 0 || Check(word
)){string blackword
= string.Empty
;for (int pos
= 0; pos
< wordlenght
+ 1; pos
++){blackword
+= tempString
[pos
+ cursor
].ToString();tempString
[pos
+ cursor
] = replaceChar
;}illegalWords
.Add(blackword
);cursor
= cursor
+ wordlenght
;i
= i
+ wordlenght
;}}}cursor
++;}return new string(tempString
);}else{return string.Empty
;}}}class WordGroup{private List
<string> groupList
;public WordGroup(){groupList
= new List<string>();}public void Add(string word
){groupList
.Add(word
);}public int Count(){return groupList
.Count
;}public string GetWord(int index
){return groupList
[index
];}}#endregion
調用
string path
= Directory
.GetCurrentDirectory().Replace("\\bin\\Debug\\netcoreapp3.1","");FilterHelper filter
= new FilterHelper(path
+"/暴恐詞庫.txt"); filter
.SourctText
= "你個大推背";string resultStr
= filter
.Filter('*'); var list
=filter
.IllegalWords
; foreach(string s
in list
){Console
.WriteLine(s
);}Console
.WriteLine(resultStr
);
總結
以上是生活随笔為你收集整理的敏感词过滤算法实现的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。