package org.fnlp.app.lucene.demo; import java.io.File; import java.io.IOException; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.fnlp.app.lucene.FNLPAnalyzer; import edu.fudan.nlp.cn.CNFactory; import edu.fudan.nlp.cn.CNFactory.Models; import edu.fudan.util.exception.LoadModelException; public class BuildIndex { /** * @param args * @throws IOException * @throws LoadModelException */ public static void main(String[] args) throws IOException, LoadModelException { String indexPath = "./tmp/faqidx"; System.out.println("Indexing to directory '" + indexPath + "'..."); Date start = new Date(); Directory dir = FSDirectory.open(new File(indexPath)); //需要先初始化 CNFactory CNFactory factory = CNFactory.getInstance("./models",Models.SEG_TAG); Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); String[] strs = new String[]{ "终端的保修期为一年。", "凡在保修期内非人为损坏,均可免费保修。", "人为损坏的终端将视情况收取维修费用。" }; for(int i=0;i<strs.length;i++){ Document doc = new Document(); Field field = new TextField("content", strs[i] , Field.Store.YES); doc.add(field); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { writer.updateDocument(new Term("content",strs[i]), doc); } } writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } }