生活随笔
收集整理的這篇文章主要介紹了
java 实现敏感词汇的过滤
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
功能目錄
1.CensorWords.txt文件為能過濾的詞匯
具體內(nèi)容略;
2.SensitiveWordFilter為工具類,具體實(shí)現(xiàn)過濾的代碼
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;/*** @Author : Liuzz* @Description: 敏感詞過濾 工具類* @Date : 2018/5/24 09:21* @Modified By :*/
public class SensitiveWordFilter {private StringBuilder replaceAll;//初始化private String encoding = "UTF-8";private String replceStr = "*";private int replceSize = 500;private String fileName = "CensorWords.txt";private List<String> arrayList;/*** 文件要求路徑在src或resource下,默認(rèn)文件名為CensorWords.txt** @param fileName 詞庫文件名(含后綴)*/public SensitiveWordFilter(String fileName) {this.fileName = fileName;}/*** @param replceStr 敏感詞被轉(zhuǎn)換的字符* @param replceSize 初始轉(zhuǎn)義容量*/public SensitiveWordFilter(String replceStr, int replceSize) {this.replceStr = fileName;this.replceSize = replceSize;}public SensitiveWordFilter() {}/*** @param str 將要被過濾信息* @return 過濾后的信息*/public String filterInfo(String str) {StringBuilder buffer = new StringBuilder(str);HashMap<Integer, Integer> hash = new HashMap<Integer, Integer>(arrayList.size());String temp;for (int x = 0; x < arrayList.size(); x++) {temp = arrayList.get(x);int findIndexSize = 0;for (int start = -1; (start = buffer.indexOf(temp, findIndexSize)) > -1; ) {findIndexSize = start + temp.length();//從已找到的后面開始找Integer mapStart = hash.get(start);//起始位置if (mapStart == null || (mapStart != null && findIndexSize > mapStart))//滿足1個(gè),即可更新map{hash.put(start, findIndexSize);}}}Collection<Integer> values = hash.keySet();for (Integer startIndex : values) {Integer endIndex = hash.get(startIndex);buffer.replace(startIndex, endIndex, replaceAll.substring(0, endIndex - startIndex));}hash.clear();return buffer.toString();}/*** 初始化敏感詞庫*/public void InitializationWork() {replaceAll = new StringBuilder(replceSize);for (int x = 0; x < replceSize; x++) {replaceAll.append(replceStr);}//加載詞庫arrayList = new ArrayList<String>();InputStreamReader read = null;BufferedReader bufferedReader = null;try {read = new InputStreamReader(SensitiveWordFilter.class.getClassLoader().getResourceAsStream(fileName), encoding);bufferedReader = new BufferedReader(read);for (String txt = null; (txt = bufferedReader.readLine()) != null; ) {if (!arrayList.contains(txt))arrayList.add(txt);}} catch (UnsupportedEncodingException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {if (null != bufferedReader)bufferedReader.close();} catch (IOException e) {e.printStackTrace();}try {if (null != read)read.close();} catch (IOException e) {e.printStackTrace();}}}public StringBuilder getReplaceAll() {return replaceAll;}public void setReplaceAll(StringBuilder replaceAll) {this.replaceAll = replaceAll;}public String getReplceStr() {return replceStr;}public void setReplceStr(String replceStr) {this.replceStr = replceStr;}public int getReplceSize() {return replceSize;}public void setReplceSize(int replceSize) {this.replceSize = replceSize;}public String getFileName() {return fileName;}public void setFileName(String fileName) {this.fileName = fileName;}public List<String> getArrayList() {return arrayList;}public void setArrayList(List<String> arrayList) {this.arrayList = arrayList;}public String getEncoding() {return encoding;}public void setEncoding(String encoding) {this.encoding = encoding;}
}
3.Demo測(cè)試
public class demo {public static void main(String args[]) {SensitiveWordFilter sw = new SensitiveWordFilter();
// SensitiveWordFilter sw = new SensitiveWordFilter("CensorWords.txt");sw.InitializationWork();long startNumer = System.currentTimeMillis();String str = "這里是要過濾的內(nèi)容,當(dāng)里面含有txt文件的詞匯時(shí),就會(huì)用*****進(jìn)行替換";System.out.println("被檢測(cè)字符長(zhǎng)度:" + str.length());str = sw.filterInfo(str);long endNumber = System.currentTimeMillis();System.out.println("耗時(shí)(毫秒):" + (endNumber - startNumer));System.out.println("過濾之后:" + str);}
}
總結(jié)
以上是生活随笔為你收集整理的java 实现敏感词汇的过滤的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。