第一章:初识lucene
生活随笔
收集整理的這篇文章主要介紹了
第一章:初识lucene
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
lucene實(shí)戰(zhàn)程序示例:
建立索引(使用Indexer索引文本文件)
package lucene;import java.io.File; import java.io.FileFilter; import java.io.FileReader; import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version;public class Indexer {public static String indexDir = "h:/lucene"; // 創(chuàng)建索引目錄public static String dataDir = "h:/listFile";// 對(duì)指定目錄中的(txt)文件進(jìn)行索引private IndexWriter writer;//索引類/*** 初始化writer* * @param indexDir* 索引存放目錄* @throws IOException*/public Indexer(String indexDir) throws IOException {Directory dir = FSDirectory.open(new File(indexDir));IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));writer = new IndexWriter(dir, config);}/*** 關(guān)閉writer* * @throws Exception*/public void close() throws Exception {writer.close();}/*** 過(guò)濾文件(只索引txt文件)* * @param dir* 要被索引的文件目錄* @param filter* 過(guò)濾器* @return 返回被索引的文檔數(shù)* @throws Exception*/public int index(String dir, FileFilter filter) throws Exception {File[] files = new File(dir).listFiles();for (File f : files) {if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()&& (filter == null || filter.accept(f))) {indexFile(f);}}return writer.numDocs();}/*** 添加索引文件* * @param f* 符合條件的索引文件* @throws Exception*/private void indexFile(File f) throws Exception {System.out.print("Indexing " + f.getCanonicalPath());Document doc = getDocument(f);writer.addDocument(doc);}/*** 向文檔中添加索引字段* * @param f* 索引文件* @return 文檔doc* @throws Exception*/public Document getDocument(File f) throws Exception {Document doc = new Document();doc.add(new Field("contents", new FileReader(f)));doc.add(new Field("filename", f.getName(), Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,Field.Index.NOT_ANALYZED));return doc;}public static void main(String[] args) {long start = System.currentTimeMillis();Indexer indexer = null;int numIndexed = 0;try {indexer = new Indexer(indexDir);numIndexed = indexer.index(dataDir, new TextFilesFilter());} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{try {//IndexerWriter寫索引操作關(guān)閉,提交寫索引(如沒(méi)關(guān)閉會(huì)造成索引無(wú)法完整創(chuàng)建,查詢時(shí)出錯(cuò))indexer.close();} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}}long end = System.currentTimeMillis();System.out.println("\n Indexing " + numIndexed + " files took "+ (end - start) + "milliseconds");}static class TextFilesFilter implements FileFilter {@Overridepublic boolean accept(File pathname) {return pathname.getName().toLowerCase().endsWith(".txt");}} }搜索索引(Searcher)
package lucene;import java.io.File; import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version;public class Searcher {public static String indexDir = "h:/lucene";public static void search(String indexDir, String q) throws IOException,ParseException {Directory dir = FSDirectory.open(new File(indexDir));IndexReader reader = IndexReader.open(dir);IndexSearcher searcher = new IndexSearcher(reader); // QueryParser parser = new QueryParser(Version.LUCENE_35, "contents", // new StandardAnalyzer(Version.LUCENE_35)); // Query query = parser.parse(q);Query query = new TermQuery(new Term("contents",q));long start = System.currentTimeMillis();TopDocs hits = searcher.search(query, 10);long end = System.currentTimeMillis();System.err.println("Found " + hits.totalHits + " document(s) (in "+ (end - start) + "milliseconds) that matched query'" + q+ "':");for (ScoreDoc scoreDoc : hits.scoreDocs) {Document doc = searcher.doc(scoreDoc.doc);System.out.println(doc.get("fullpath"));}searcher.close();}public static void main(String[] args) {try {search(indexDir, "apache");} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (ParseException e) {// TODO Auto-generated catch blocke.printStackTrace();}} }總結(jié)
以上是生活随笔為你收集整理的第一章:初识lucene的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: win10双击excel打开太慢怎么解决
- 下一篇: lucene 索引出错 no segme