/* * Copyright (C) 2011 Andrea Schweer * * This file is part of the Digital Parrot. * * The Digital Parrot is free software; you can redistribute it and/or modify * it under the terms of the Eclipse Public License as published by the Eclipse * Foundation or its Agreement Steward, either version 1.0 of the License, or * (at your option) any later version. * * The Digital Parrot is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the Eclipse Public License for * more details. * * You should have received a copy of the Eclipse Public License along with the * Digital Parrot. If not, see http://www.eclipse.org/legal/epl-v10.html. * */ package net.schweerelos.parrot.model; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; public class TextSearchEngine { private static final String LABEL_FIELD_NAME = "label"; private static final String HASH_FIELD_NAME = "hash"; private IndexWriter writer; private Directory index; private Analyzer analyser; private Map<Integer, NodeWrapper> hashToNodeWrapper; private IndexSearcher searcher; public TextSearchEngine() { index = new RAMDirectory(); analyser = new StandardAnalyzer(); try { writer = new IndexWriter(index, analyser, true); } catch (CorruptIndexException e) { // ignore e.printStackTrace(); } catch (LockObtainFailedException e) { // ignore e.printStackTrace(); } catch (IOException e) { // ignore e.printStackTrace(); } hashToNodeWrapper = new HashMap<Integer, NodeWrapper>(); } public void add(NodeWrapper node) { Document doc = new Document(); doc.add(new Field(LABEL_FIELD_NAME, node.toString(), Field.Store.COMPRESS, Field.Index.TOKENIZED)); doc.add(new Field(HASH_FIELD_NAME, String.valueOf(node.hashCode()), Field.Store.YES, Field.Index.NO)); try { writer.addDocument(doc); hashToNodeWrapper.put(node.hashCode(), node); writer.flush(); } catch (CorruptIndexException e) { // ignore e.printStackTrace(); } catch (IOException e) { // ignore e.printStackTrace(); } } public Set<NodeWrapper> search(String queryString) throws SearchFailedException { Set<NodeWrapper> results = new HashSet<NodeWrapper>(); Query query = null; try { QueryParser queryParser = new QueryParser(LABEL_FIELD_NAME, analyser); queryParser.setAllowLeadingWildcard(true); query = queryParser.parse(queryString); } catch (ParseException e) { throw new SearchFailedException("Problem parsing query string '" + queryString + "'. Can't search.", e); } if (searcher == null) { try { searcher = new IndexSearcher(index); } catch (CorruptIndexException e) { throw new SearchFailedException("Internal error. Can't search.", e); } catch (IOException e) { throw new SearchFailedException("Internal error. Can't search.", e); } } // get up to 10 best hits TopDocCollector collector = new TopDocCollector(10); try { searcher.search(query, collector); } catch (IOException e) { throw new SearchFailedException("Internal error. Can't search.", e); } ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; try { Document doc = searcher.doc(docId); int hashCode = Integer.parseInt(doc.get(HASH_FIELD_NAME)); if (hashToNodeWrapper.containsKey(hashCode)) { results.add(hashToNodeWrapper.get(hashCode)); } } catch (CorruptIndexException e) { throw new SearchFailedException("Internal error. Can't search.", e); } catch (IOException e) { throw new SearchFailedException("Internal error. Can't search.", e); } } return results; } }