package net.ion.craken.node.problem.inf.v7; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import junit.framework.TestCase; import net.ion.framework.util.Debug; import net.ion.framework.util.InfinityThread; import net.ion.nsearcher.common.SearchConstant; import net.ion.nsearcher.common.WriteDocument; import net.ion.nsearcher.config.Central; import net.ion.nsearcher.config.CentralConfig; import net.ion.nsearcher.index.IndexJob; import net.ion.nsearcher.index.IndexSession; import net.ion.nsearcher.index.Indexer; import net.ion.radon.util.csv.CsvReader; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import org.infinispan.Cache; import org.infinispan.configuration.cache.Configuration; import org.infinispan.configuration.cache.ConfigurationBuilder; import org.infinispan.configuration.cache.EvictionConfigurationBuilder; import org.infinispan.eviction.EvictionStrategy; import org.infinispan.lucene.directory.BuildContext; import org.infinispan.lucene.directory.DirectoryBuilder; import org.infinispan.manager.DefaultCacheManager; public class TestIndex extends TestCase { private DefaultCacheManager dm; @Override protected void setUp() throws Exception { super.setUp(); this.dm = new DefaultCacheManager() ; String path = "./resource/temp/chunk"; EvictionConfigurationBuilder builder = new ConfigurationBuilder() //.read(dm.getDefaultCacheConfiguration()) .invocationBatching().enable() .persistence().addSingleFileStore().location("./resource/temp/chunk") .fetchPersistentState(true).preload(false).shared(false).purgeOnStartup(false).ignoreModifications(false) .async().enabled(false).flushLockTimeout(20000).shutdownTimeout(1000).modificationQueueSize(1000).threadPoolSize(5) .eviction().maxEntries(20000).strategy(EvictionStrategy.LIRS) ; Configuration meta_config = new ConfigurationBuilder().persistence().passivation(false) .addSingleFileStore().fetchPersistentState(false).preload(true).shared(false).purgeOnStartup(false).ignoreModifications(false).location(path) .async().enable().flushLockTimeout(300000).shutdownTimeout(2000).modificationQueueSize(10).threadPoolSize(3) .build() ; dm.defineConfiguration("chunk", builder.build()) ; dm.defineConfiguration("meta", meta_config) ; dm.start(); } @Override protected void tearDown() throws Exception { this.dm.stop(); super.tearDown(); } public void testRead() throws Exception { Cache<Object, Object> chunk = dm.getCache("chunk") ; Cache<?, ?> meta = dm.getCache("meta") ; BuildContext bcontext = DirectoryBuilder.newDirectoryInstance(meta, chunk, meta, "search"); bcontext.chunkSize(1024 * 1024); Directory directory = bcontext.create(); IndexSearcher searcher = new IndexSearcher(IndexReader.open(directory)) ; TopDocs topdocs = searcher.search(new MatchAllDocsQuery(), 100); Debug.line(topdocs.totalHits); DirectoryReader reader = IndexReader.open(directory) ; for(ScoreDoc sd : topdocs.scoreDocs){ Document doc = reader.document(sd.doc); Debug.line(doc); } directory.close(); } public void testCentralIndex() throws Exception { Cache<Object, Object> chunk = dm.getCache("chunk") ; Cache<?, ?> meta = dm.getCache("meta") ; BuildContext bcontext = DirectoryBuilder.newDirectoryInstance(meta, chunk, meta, "search"); bcontext.chunkSize(1024 * 1024); Directory directory = bcontext.create(); Central central = CentralConfig.oldFromDir(directory).indexConfigBuilder().indexAnalyzer(new StandardAnalyzer(SearchConstant.LuceneVersion)).build() ; Indexer indexer = central.newIndexer() ; indexer.index(new IndexJob<Void>() { @Override public Void handle(IndexSession isession) throws Exception { File file = new File("C:/temp/freebase-datadump-tsv/data/medicine/drug_label_section.tsv") ; CsvReader reader = new CsvReader(new BufferedReader(new FileReader(file))); reader.setFieldDelimiter('\t') ; String[] headers = reader.readLine(); String[] line = reader.readLine() ; int max = 500000 ; while(line != null && line.length > 0 && max-- > 0 ){ createDoc("/bleujin/" + max, isession, headers, line).updateVoid() ; line = reader.readLine() ; if ((max % 4999) == 0) { System.out.print('.') ; isession.continueUnit() ; } } return null; } private WriteDocument createDoc(String id, IndexSession isession, String[] headers, String[] line) { WriteDocument doc = isession.newDocument(id) ; for(int i = 0 ; i <headers.length ; i++){ String header = headers[i] ; String value = line[i] ; doc.unknown(header, value) ; } return doc; } }) ; Debug.line("endJob") ; // new InfinityThread().startNJoin(); } public void testIndex() throws Exception { // 59 sec Cache<Object, Object> chunk = dm.getCache("chunk") ; Cache<?, ?> meta = dm.getCache("meta") ; BuildContext bcontext = DirectoryBuilder.newDirectoryInstance(meta, chunk, meta, "search"); bcontext.chunkSize(1024 * 1024); Directory directory = bcontext.create(); // directory = FSDirectory.open(new File("./resource/temp/index")) ; IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_44, new StandardAnalyzer(SearchConstant.LuceneVersion)); IndexWriter iw = new IndexWriter(directory, iwconfig) ; File file = new File("C:/temp/freebase-datadump-tsv/data/medicine/drug_label_section.tsv") ; CsvReader reader = new CsvReader(new BufferedReader(new FileReader(file))); reader.setFieldDelimiter('\t') ; String[] headers = reader.readLine(); String[] line = reader.readLine() ; int max = 500000 ; int skipCount = 0 ; while(line != null && line.length > 0 && max-- > 0 ){ String path = "/bleujin/" + max; Document doc = createDoc(path, iw, headers, line) ; iw.updateDocument(new Term("id", path), doc); line = reader.readLine() ; if ((max % 4999) == 0) { System.out.print('.') ; iw.commit(); } } iw.commit(); iw.close(); directory.close(); Debug.line("endJob", skipCount) ; // new InfinityThread().startNJoin(); } private Document createDoc(String id, IndexWriter iw, String[] headers, String[] values) { Document doc = new Document() ; doc.add(new StringField("id", id, Store.YES)); for(int i = 0 ; i <headers.length ; i++){ String name = headers[i] ; String value = (values.length > i) ? values[i] : "" ; if (value.contains(" ")) doc.add(new TextField(name, value, Store.YES)); else doc.add(new StringField(name, value, Store.YES)) ; } return doc; } }