java敏感词过滤_java敏感词过滤
java敏感詞過濾
敏感詞:“美元”,“中國”,“北京大學”,“北大”,“南京大學”
DFAUtils`import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
public class DFAUtils {
/**
* 添加敏感詞到算法樹
*/
public static void addSensitiveWord(String sensitiveWord) {
if (null == sensitiveWord || sensitiveWord.length() == 0) {
return;
}
char[] chars = sensitiveWord.toCharArray();
Map parentMap = sensitiveWordsMap;
Map current = null;
synchronized (lock) {
for (int i = 0; i < chars.length; i++) {
if (i == 0) {
if (sensitiveWordsMap.size() == 0) {
/* 添加第一個敏感詞的第一個字符執行此code */
if (chars.length == 1) {
Map endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
} else {
sensitiveWordsMap.put(chars[0], null);
}
} else {
current = parentMap.get(chars[0]);
if (null == current) {
if (chars.length == 1) {
Map endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
break;
} else {
sensitiveWordsMap.put(chars[0], null);
}
} else {
if (chars.length == 1) {
current.put(null, null);
break;
}
}
}
} else {
if (null == current) {
Map childMap = new HashMap();
if (i == chars.length - 1) {
Map endMap = new HashMap<>(1);
endMap.put(null, null);
childMap.put(chars[i], endMap);
parentMap.put(chars[i - 1], childMap);
break;
} else {
childMap.put(chars[i], null);
parentMap.put(chars[i - 1], childMap);
parentMap = childMap;
current = null;
}
} else {
Map childMap = current.get(chars[i]);
if (null == childMap) {
if (i == chars.length - 1) {
Map endMap = new HashMap<>(1);
endMap.put(null, null);
current.put(chars[i], endMap);
} else {
current.put(chars[i], null);
parentMap = current;
current = null;
}
} else {
if (i == chars.length - 1) {
childMap.put(null, null);
} else {
parentMap = current;
current = childMap;
}
}
}
}
}
}
}
/**
* 檢查敏感詞(找到符合敏感詞則返回--單個字符敏感詞前后不是中文字符才算敏感詞)
*/
public static String checkSensitiveWord(String content) {
if (null == content || content.length() == 0 || sensitiveWordsMap.size() == 0) {
return null;
}
char[] chars = content.toCharArray();
boolean isContain = Boolean.FALSE;
StringBuilder sbResult = new StringBuilder();
for (int i = 0; i < chars.length; i++) {
if (sensitiveWordsMap.containsKey(chars[i])) {
Map currentMap = sensitiveWordsMap.get(chars[i]);
sbResult.append(chars[i]);
if (null == currentMap) {
break;
} else {
if (currentMap.containsKey(null)) {
if (sbResult.length() == 1) {
/* 前一個字符或后一個字符是否是中文字符 */
boolean before = Boolean.FALSE;
if (i - 1 < 0) {
before = Boolean.TRUE;
} else {
if (chars[i - 1] < 13312 || chars[i - 1] > 40895) {
before = Boolean.TRUE;
}
}
boolean after = Boolean.FALSE;
if (i + 1 >= chars.length) {
after = Boolean.TRUE;
} else {
if (chars[i + 1] < 13312 || chars[i + 1] > 40895) {
after = Boolean.TRUE;
}
}
if (before && after) {
isContain = Boolean.TRUE;
break;
}
/* From當前index開始匹配是否存在敏感詞 */
int j = i + 1;
for (; j < chars.length; j++) {
if (currentMap.containsKey(chars[j])) {
sbResult.append(chars[j]);
currentMap = currentMap.get(chars[j]);
if (currentMap.containsKey(null)) {
isContain = Boolean.TRUE;
break;
} else {
continue;
}
} else {
break;
}
}
} else {
isContain = Boolean.TRUE;
break;
}
} else {
/* From當前index開始匹配是否存在敏感詞 */
int j = i + 1;
for (; j < chars.length; j++) {
if (currentMap.containsKey(chars[j])) {
sbResult.append(chars[j]);
currentMap = currentMap.get(chars[j]);
if (currentMap.containsKey(null)) {
isContain = Boolean.TRUE;
break;
} else {
continue;
}
} else {
break;
}
}
}
if (isContain) {
break;
} else {
sbResult.setLength(0);
}
}
}
}
if (isContain) {
return sbResult.toString();
} else {
return null;
}
}
/**
* 刪除算法樹的敏感詞
*/
public static void delSensitiveWord(String sensitiveWord) {
if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {
return;
}
int delIndex = 0;
char[] chars = sensitiveWord.toCharArray();
Map current = sensitiveWordsMap;
synchronized (lock) {
int i = 0;
for (; i < chars.length; i++) {
if (current.containsKey(chars[i])) {
if (current.get(chars[i]).size() > 1) {
delIndex = i;
}
} else {
break;
}
current = current.get(chars[i]);
}
if (!current.containsKey(null)) {
return;
}
current = sensitiveWordsMap;
if (i == chars.length) {
for (i = 0; i < delIndex; i++) {
current = current.get(chars[i]);
}
if (i == chars.length) {
current.remove(chars[i]);
} else {
if (i == 0 && chars.length == 1) {
if (current.get(chars[i]).size() == 1) {
current.remove(chars[i]);
} else {
current.get(chars[i]).remove(null);
}
} else {
if (i + 1 == chars.length) {
current.get(chars[i]).remove(null);
} else {
current.get(chars[i]).remove(chars[i + 1]);
}
}
}
}
}
}
/**
* 獲取算法樹的敏感詞
*/
public static LinkedList getSevsitiveWords() {
LinkedList listWords = new LinkedList();
if (sensitiveWordsMap.size() == 0) {
return listWords;
}
StringBuilder sbWord = new StringBuilder();
getSevsitiveWords(sensitiveWordsMap, listWords, sbWord);
return listWords;
}
/**
* 算法樹是否包含對應的敏感詞
*/
public static boolean containSensitiveWord(String sensitiveWord) {
if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {
return false;
}
return sensitiveWord.equals(checkSensitiveWord(sensitiveWord));
}
/**
* 清空算法樹
*/
public static void clearSensitiveWord() {
synchronized (lock) {
sensitiveWordsMap = new HashMap();
}
}
/**
* 遞歸獲取算法樹的敏感詞
*/
private static void getSevsitiveWords(Map childMap, LinkedList listWords,
StringBuilder sbWord) {
if (childMap.size() == 1 && childMap.containsKey(null)) {
listWords.add(sbWord.toString());
sbWord.setLength(sbWord.length() - 1);
return;
}
for (Map.Entry entry : childMap.entrySet()) {
Character keyChar = entry.getKey();
Map valueMap = entry.getValue();
if (null == keyChar) {
continue;
}
sbWord.append(keyChar);
if (valueMap.containsKey(null)) {
listWords.add(sbWord.toString());
if (valueMap.size() == 1) {
sbWord.setLength(sbWord.length() - 1);
} else {
getSevsitiveWords(valueMap, listWords, sbWord);
sbWord.setLength(sbWord.length() - 1);
}
} else {
getSevsitiveWords(valueMap, listWords, sbWord);
sbWord.setLength(sbWord.length() - 1);
}
}
}
private final static Object lock = new Object();
private static Map sensitiveWordsMap = new HashMap();
}`
DFAUtilsTestimport org.junit.Assert;
import org.junit.Test;
import java.util.LinkedList;
public class DFAUtilsTest {
/*==========================AddSensitiveWord-start==========================*/
@Test
public void testAddSensitiveWord01() {
DFAUtils.clearSensitiveWord();
LinkedList listWords = null;
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.addSensitiveWord("中哈");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中哈");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.addSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.addSensitiveWord("人");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(4, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(5, listWords.size());
}
/*==========================AddSensitiveWord-end============================*/
/*==========================CheckSensitiveWord-start==========================*/
@Test
public void testCheckSensitiveWord01() {
DFAUtils.clearSensitiveWord();
String sencitivaWord = null;
LinkedList listWords = null;
DFAUtils.addSensitiveWord("大");
DFAUtils.addSensitiveWord("大學");
DFAUtils.addSensitiveWord("中中中國中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(8, listWords.size());
sencitivaWord = DFAUtils.checkSensitiveWord("滾");
Assert.assertEquals("滾", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("翻滾");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滾 ");
Assert.assertEquals("滾", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord(" 滾");
Assert.assertEquals("滾", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("體操");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("你好滾滾");
Assert.assertEquals("滾滾", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滾你好滾");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滾輪胎");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("你你國國");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("中中國中中 中中中中國中中中");
Assert.assertEquals("中中中國中中中", sencitivaWord);
}
/*==========================CheckSensitiveWord-start==========================*/
/*==========================DelSensitiveWor-start==========================*/
@Test
public void testDelSensitiveWord01() {
DFAUtils.clearSensitiveWord();
LinkedList listWords = null;
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("國");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord02() {
DFAUtils.clearSensitiveWord();
LinkedList listWords = null;
DFAUtils.addSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord03() {
DFAUtils.clearSensitiveWord();
LinkedList listWords = null;
DFAUtils.addSensitiveWord("中中");
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord(" 中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中 ");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord04() {
DFAUtils.clearSensitiveWord();
LinkedList listWords = null;
DFAUtils.addSensitiveWord("中中中111");
DFAUtils.addSensitiveWord("中中");
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中中中111");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中中111");
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中 ");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
}
/*==========================DelSensitiveWor-end============================*/
/*==========================ContainSensitiveWord-start==========================*/
@Test
public void testContainSensitiveWord01() {
DFAUtils.clearSensitiveWord();
LinkedList listWords = null;
DFAUtils.addSensitiveWord("滾");
DFAUtils.addSensitiveWord("中中中國中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(7, listWords.size());
Assert.assertEquals(false, DFAUtils.containSensitiveWord(" "));
Assert.assertEquals(true, DFAUtils.containSensitiveWord("操"));
}
/*==========================ContainSensitiveWord-end============================*/
}
總結
以上是生活随笔為你收集整理的java敏感词过滤_java敏感词过滤的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: linux创建zip+函数,linux+
- 下一篇: RabbitMQ(1) - win+ra