package querqy.lucene.contrib.parser; import java.io.IOException; import org.apache.commons.io.input.CharSequenceReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import querqy.model.Clause.Occur; import querqy.model.DisjunctionMaxQuery; import querqy.model.Query; import querqy.model.Term; import querqy.parser.QuerqyParser; /** * A {@linkplain QuerqyParser} that works solely on Lucene {@linkplain Analyzer} * s. The query is run through a query analyzer. The resulting tokens are used * to lookup synonyms with the synonym analyzer. The tokens remaining in that * analyzer are treated as synonyms. * * @author Shopping24 GmbH, Torsten Bøgh Köster (@tboeghk) */ public class AnalyzingQuerqyParser implements QuerqyParser { /** * {@link Analyzer} for the query. */ private final Analyzer queryAnalyzer; /** * {@link Analyzer} for the synonyms. */ private final Analyzer optSynonymAnalyzer; /** * Constructor. * * @param queryAnalyzer * {@link Analyzer} for the query. * @param optSynonymAnalyzer * {@link Analyzer} for the synonyms. */ public AnalyzingQuerqyParser(Analyzer queryAnalyzer, Analyzer optSynonymAnalyzer) { checkNotNull(queryAnalyzer); this.queryAnalyzer = queryAnalyzer; this.optSynonymAnalyzer = optSynonymAnalyzer; } /** * Generate query for the input. * * @param input * Search term. */ @Override public Query parse(String input) { checkNotNull(input); try (TokenStream queryTokens = queryAnalyzer.tokenStream("querqy", new CharSequenceReader(input))) { Query query = new Query(); queryTokens.reset(); CharTermAttribute original = queryTokens.addAttribute(CharTermAttribute.class); while (queryTokens.incrementToken()) { DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(query, Occur.SHOULD, false); // We need to copy "original" per toString() here, because // "original" is transient. dmq.addClause(new Term(dmq, original.toString())); query.addClause(dmq); if (optSynonymAnalyzer != null) { addSynonyms(dmq, original); } } queryTokens.end(); // if the stopwords eliminates all terms, we add the input to the query if (query.getClauses().isEmpty()) { DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(query, Occur.SHOULD, false); dmq.addClause(new Term(dmq, input)); query.addClause(dmq); } return query; } catch (IOException e) { throw new RuntimeException(e); } } /** * Add terms to the query for the synonyms. * * @param dmq * {@link DisjunctionMaxQuery} * @param original * Original term to determine synonyms for. */ private void addSynonyms(DisjunctionMaxQuery dmq, CharSequence original) throws IOException { try (TokenStream synonymTokens = optSynonymAnalyzer.tokenStream("querqy", new CharSequenceReader(original))) { synonymTokens.reset(); CharTermAttribute generated = synonymTokens.addAttribute(CharTermAttribute.class); while (synonymTokens.incrementToken()) { // We need to copy "generated" per toString() here, because // "generated" is transient. dmq.addClause(new Term(dmq, generated.toString(), true)); } synonymTokens.end(); } } public static void checkNotNull(Object obj) { if (obj == null) { throw new NullPointerException(); } } }