package org.aksw.gerbil.semantic.sameas.index; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import org.aksw.gerbil.datatypes.ErrorTypes; import org.aksw.gerbil.exceptions.GerbilException; import org.apache.commons.io.IOUtils; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; //import org.apache.lucene.queryParser.ParseException; //import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Searcher extends LuceneConstants { private IndexSearcher indexSearcher; private Directory indexDirectory; private IndexReader indexReader; public Searcher(String indexDirectoryPath) throws GerbilException { try { indexDirectory = FSDirectory.open(new File( indexDirectoryPath).toPath()); indexReader = DirectoryReader.open(indexDirectory); indexSearcher = new IndexSearcher(indexReader); } catch (IOException e) { throw new GerbilException("Could not initialize Searcher", ErrorTypes.UNEXPECTED_EXCEPTION); } } public TopDocs searchTops(String searchQuery) throws IOException { return searchTerm(searchQuery); } private TopDocs searchTerm(String searchQuery) throws IOException{ TermQuery query = new TermQuery(new Term(CONTENTS, searchQuery)); return indexSearcher.search(query, MAX_SEARCH); } public Document getDocument(ScoreDoc scoreDoc) throws CorruptIndexException, IOException { return indexSearcher.doc(scoreDoc.doc); } public void close() throws IOException { IOUtils.closeQuietly(indexReader); IOUtils.closeQuietly(indexDirectory); } public Collection<String> search(String uri) throws GerbilException{ return searchSameAsTerm(uri); } public Collection<String> searchSameAsTerm(String uri) throws GerbilException{ TopDocs docs; try { docs = searchTops(uri); } catch (IOException e1) { throw new GerbilException("Could not parse index files", ErrorTypes.UNEXPECTED_EXCEPTION); } Collection<String> uris = new HashSet<String>(); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document doc; try { doc = getDocument(scoreDoc); } catch (IOException e) { throw new GerbilException("Could not load Hits", ErrorTypes.UNEXPECTED_EXCEPTION); } String content = doc.get(CONTENTS); uris.add(content); String sameAs = doc.get(SAMEAS); for (String uriStr : sameAs.split(" ")) uris.add(uriStr); } return uris; } }