/* Copyright (C) 2016 maik.jablonski@jease.org This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package jfix.search; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import java.util.ArrayList; import java.util.List; public class FullTextIndex<E> { // + - && || ! ( ) { } [ ] ^ " ~ * ? : \ private static final String[] STRINGS_TO_QUOTE = new String[]{"-", "&", "!", "{", "}", "[", "]", ":", "?"}; static { BooleanQuery.setMaxClauseCount(BooleanQuery.getMaxClauseCount() * 10); } private List<E> objects; private Directory indexDirectory; private IndexWriter indexWriter; private QueryParser queryParser; private Document document; private Field fulltext; public FullTextIndex() { try { objects = new ArrayList<>(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(new LimitTokenCountAnalyzer(analyzer, Integer.MAX_VALUE)); indexDirectory = new RAMDirectory(); indexWriter = new IndexWriter(indexDirectory, config); queryParser = new QueryParser("text", analyzer); queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); fulltext = new TextField("text", "", Field.Store.NO); // Used as base-set for a NOT-Query Field inverse = new TextField("true", "yes", Field.Store.NO); document = new Document(); document.add(fulltext); document.add(inverse); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } public void add(E object, String text) { try { if (object != null && text != null) { objects.add(object); fulltext.setStringValue(appendWithoutPunctuation(text)); indexWriter.addDocument(document); } } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } public void commit() { try { indexWriter.commit(); indexWriter.close(); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } public List<E> search(String search) throws Exception { try { ObjectCollector<E> collector = new ObjectCollector<>(objects); String query = buildQueryString(search); IndexReader indexReader = DirectoryReader.open(indexDirectory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.search(queryParser.parse(query), collector); indexReader.close(); return collector.getOutput(); } catch (Exception e) { throw new Exception("Query Syntax Error: " + search); } } protected String buildQueryString(String search) { for (String stringToQuote : STRINGS_TO_QUOTE) { search = search.replace(stringToQuote, "\\" + stringToQuote); } if (search.startsWith("^")) { search = "true:yes+" + search; } return search.replace("+", " AND ").replace("|", " OR ") .replace("^", " NOT "); } private String appendWithoutPunctuation(String str) { int strlen = str.length(); StringBuilder sb = new StringBuilder(2 * strlen + 2); sb.append(str); sb.append(" "); for (int i = 0; i < strlen; i++) { char c = str.charAt(i); if (Character.isLetterOrDigit(c) || Character.isWhitespace(c)) { sb.append(c); } else { sb.append(" "); } } return sb.toString(); } }