package experiments.collective.entdoccentric; import java.io.File; import java.io.IOException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestClass { public static final String indexDirectory = "/home/quh/Arbeitsfläche/Code_Data/LuceneCorpora/Lucene 4.1/PorterStemmerKnowledgeBaseCalbCSmall+UMLS+Uniprot/"; public static void main(String[] args) throws IOException { File indexDir = new File(indexDirectory); Directory dir = FSDirectory.open(indexDir); IndexSearcher iSearcher = new IndexSearcher(DirectoryReader.open(dir)); long time = System.currentTimeMillis(); String name = "The proteasome is a multicatalytic proteinase complex which is characterized by its ability to cleave"; // String name = "On the isolation of a prolactin inhibiting factor"; BooleanQuery query = new BooleanQuery(); // query.add(new FuzzyQuery(new Term("titleandabs", "lipopolysaccharide")), Occur.MUST); String[] words = name.split(" "); DefaultSimilarity sim = new DefaultSimilarity(); for (int i = 0; i < words.length; i++) { query.add(new TermQuery(new Term("title", words[i])), Occur.MUST); // query.add(new FuzzyQuery(new Term("abstract", words[i])), // Occur.SHOULD); } TopDocs top = iSearcher.search(query, 101); ScoreDoc[] score = top.scoreDocs; for (int i = 0; i < score.length; i++) { System.out.println(score[i].doc); } System.out.println(System.currentTimeMillis() - time); } }