package org.apache.blur.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.StringReader; import java.text.DateFormat; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.Locale; import java.util.Map; import java.util.TimeZone; import java.util.UUID; import org.apache.blur.analysis.FieldManager; import org.apache.blur.analysis.FieldTypeDefinition; import org.apache.blur.utils.BlurConstants; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.Version; public class BlurQueryParser extends QueryParser { public static final String SUPER = "super"; protected final Map<Query, String> _fieldNames; protected final FieldManager _fieldManager; protected final Locale _locale = Locale.getDefault(); protected final TimeZone _timeZone = TimeZone.getDefault(); protected final boolean _allowLeadingWildcard; protected final int _fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; public BlurQueryParser(Version matchVersion, String f, Map<Query, String> fieldNames, FieldManager fieldManager) { super(matchVersion, f, fieldManager.getAnalyzerForQuery()); _fieldNames = fieldNames == null ? new HashMap<Query, String>() : fieldNames; _fieldManager = fieldManager; _allowLeadingWildcard = true; setAllowLeadingWildcard(_allowLeadingWildcard); setAutoGeneratePhraseQueries(true); } @Override protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { String resolvedField = _fieldManager.resolveField(term.field()); try { Boolean b = _fieldManager.checkSupportForFuzzyQuery(resolvedField); if (!(b == null || b)) { throw new RuntimeException("Field [" + resolvedField + "] is type [" + _fieldManager.getFieldTypeDefinition(resolvedField) + "] which does not support fuzzy queries."); } } catch (IOException e) { throw new RuntimeException(e); } customQueryCheck(resolvedField); return addField(super.newFuzzyQuery(new Term(resolvedField, term.text()), minimumSimilarity, prefixLength), resolvedField); } @Override protected Query newMatchAllDocsQuery() { return addField(super.newMatchAllDocsQuery(), UUID.randomUUID().toString()); } @Override protected MultiPhraseQuery newMultiPhraseQuery() { return new MultiPhraseQuery() { @Override public void add(Term[] terms, int position) { super.add(terms, position); for (Term term : terms) { String resolvedField = _fieldManager.resolveField(term.field()); customQueryCheck(resolvedField); addField(this, resolvedField); } } }; } @Override protected PhraseQuery newPhraseQuery() { return new PhraseQuery() { @Override public void add(Term term, int position) { super.add(term, position); String resolvedField = _fieldManager.resolveField(term.field()); customQueryCheck(resolvedField); addField(this, resolvedField); } }; } @Override protected Query newPrefixQuery(Term prefix) { String resolvedField = _fieldManager.resolveField(prefix.field()); try { Boolean b = _fieldManager.checkSupportForPrefixQuery(resolvedField); if (!(b == null || b)) { throw new RuntimeException("Field [" + resolvedField + "] is type [" + _fieldManager.getFieldTypeDefinition(resolvedField) + "] which does not support prefix queries."); } } catch (IOException e) { throw new RuntimeException(e); } customQueryCheck(resolvedField); return addField(super.newPrefixQuery(new Term(resolvedField, prefix.text())), resolvedField); } @Override protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { String resolvedField = _fieldManager.resolveField(field); customQueryCheck(resolvedField); Query q; try { q = _fieldManager.getNewRangeQuery(resolvedField, part1, part2, startInclusive, endInclusive); } catch (IOException e) { throw new RuntimeException(e); } if (q != null) { return addField(q, resolvedField); } return addField(super.newRangeQuery(resolvedField, part1, part2, startInclusive, endInclusive), resolvedField); } protected void customQueryCheck(String field) { try { Boolean b = _fieldManager.checkSupportForCustomQuery(field); if (b != null && b) { throw new RuntimeException("Field [" + field + "] is type [" + _fieldManager.getFieldTypeDefinition(field) + "] queries should exist with \" around them."); } } catch (IOException e) { throw new RuntimeException(e); } } @Override protected Query newTermQuery(Term term) { String resolvedField = _fieldManager.resolveField(term.field()); try { Boolean b = _fieldManager.checkSupportForCustomQuery(resolvedField); if (b != null && b) { return addField(_fieldManager.getCustomQuery(resolvedField, term.text()), resolvedField); } } catch (IOException e) { throw new RuntimeException(e); } Query q; try { q = _fieldManager.getTermQueryIfNumeric(resolvedField, term.text()); } catch (IOException e) { throw new RuntimeException(e); } if (q != null) { return addField(q, resolvedField); } return addField(super.newTermQuery(new Term(resolvedField, term.text())), resolvedField); } @Override protected Query newWildcardQuery(Term t) { if ("*".equals(t.text())) { String fieldName = t.field(); if (SUPER.equals(fieldName)) { return addField(new MatchAllDocsQuery(), fieldName); } else { String resolvedField = _fieldManager.resolveField(t.field()); return addField(new TermQuery(new Term(BlurConstants.FIELDS, fieldName)), resolvedField); } } String resolvedField = _fieldManager.resolveField(t.field()); try { Boolean b = _fieldManager.checkSupportForWildcardQuery(resolvedField); if (!(b == null || b)) { throw new RuntimeException("Field [" + resolvedField + "] is type [" + _fieldManager.getFieldTypeDefinition(resolvedField) + "] which does not support wildcard queries."); } } catch (IOException e) { throw new RuntimeException(e); } customQueryCheck(resolvedField); return addField(super.newWildcardQuery(new Term(resolvedField, t.text())), resolvedField); } @Override protected Query newRegexpQuery(Term t) { String resolvedField = _fieldManager.resolveField(t.field()); try { Boolean b = _fieldManager.checkSupportForRegexQuery(resolvedField); if (!(b == null || b)) { throw new RuntimeException("Field [" + resolvedField + "] is type [" + _fieldManager.getFieldTypeDefinition(resolvedField) + "] which does not support wildcard queries."); } } catch (IOException e) { throw new RuntimeException(e); } customQueryCheck(resolvedField); return addField(super.newRegexpQuery(new Term(resolvedField, t.text())), resolvedField); } protected Query addField(Query q, String field) { _fieldNames.put(q, field); return q; } protected String analyzeField(String field, String text) throws ParseException { try { FieldTypeDefinition fieldTypeDefinition = _fieldManager.getFieldTypeDefinition(field); if (fieldTypeDefinition == null) { return text; } Analyzer analyzerForQuery = fieldTypeDefinition.getAnalyzerForQuery(field); if (analyzerForQuery instanceof KeywordAnalyzer) { return text; } StringBuilder builder = new StringBuilder(); StringBuilder result = new StringBuilder(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (isSpecialChar(c) && !isEscaped(text, i - 1)) { if (builder.length() > 0) { result.append(analyze(field, builder.toString(), analyzerForQuery)); builder.setLength(0); } if (isSpecialRange(c)) { char closingChar = getClosingChar(c); int indexOf = text.indexOf(closingChar, i); if (indexOf < 0) { throw new ParseException("Could not find closing char [" + closingChar + "] in text [" + text + "]"); } String s = text.substring(i, indexOf + 1); result.append(s); i += s.length() - 1; } else { result.append(c); } } else { builder.append(c); } } if (builder.length() > 0) { result.append(analyze(field, builder.toString(), analyzerForQuery)); builder.setLength(0); } return result.toString(); } catch (IOException e) { throw new ParseException(e.getMessage()); } } private char getClosingChar(char c) throws ParseException { switch (c) { case '[': return ']'; default: throw new ParseException("Closing char for " + c + " not found."); } } private boolean isSpecialRange(char c) { switch (c) { case '[': return true; case '{': return true; default: return false; } } private boolean isSpecialChar(char c) { switch (c) { case '?': case '/': case '[': case ']': case '}': case '{': case '*': return true; default: return false; } } private boolean isEscaped(String text, int pos) { if (pos <= 0) { return false; } return text.charAt(pos) == '\\'; } private String analyze(String field, String text, Analyzer analyzerForQuery) throws IOException, ParseException { StringBuilder result = new StringBuilder(); TokenStream tokenStream = analyzerForQuery.tokenStream(field, new StringReader(text)); CharTermAttribute termAttribute = tokenStream.getAttribute(CharTermAttribute.class); tokenStream.reset(); if (tokenStream.incrementToken()) { result.append(termAttribute.toString()); } if (tokenStream.incrementToken()) { throw new ParseException("Should not have multiple tokens in text [" + text + "] for field [" + field + "]."); } return result.toString(); } @Override protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { part1 = part1 == null ? null : analyzeField(field, part1); part2 = part2 == null ? null : analyzeField(field, part2); DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, _locale); df.setLenient(true); DateTools.Resolution resolution = getDateResolution(field); try { part1 = DateTools.dateToString(df.parse(part1), resolution); } catch (Exception e) { } try { Date d2 = df.parse(part2); if (endInclusive) { // The user can only specify the date, not the time, so make sure // the time is set to the latest possible time of that date to really // include all documents: Calendar cal = Calendar.getInstance(_timeZone, _locale); cal.setTime(d2); cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); cal.set(Calendar.SECOND, 59); cal.set(Calendar.MILLISECOND, 999); d2 = cal.getTime(); } part2 = DateTools.dateToString(d2, resolution); } catch (Exception e) { } return newRangeQuery(field, part1, part2, startInclusive, endInclusive); } @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { if ("*".equals(field)) { if ("*".equals(termStr)) { return newMatchAllDocsQuery(); } } if (!_allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?"))) { throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); } if (!"*".equals(termStr)) { termStr = analyzeField(field, termStr); } Term t = new Term(field, termStr); return newWildcardQuery(t); } @Override protected Query getRegexpQuery(String field, String termStr) throws ParseException { termStr = analyzeField(field, termStr); Term t = new Term(field, termStr); return newRegexpQuery(t); } @Override protected Query getPrefixQuery(String field, String termStr) throws ParseException { if (!_allowLeadingWildcard && termStr.startsWith("*")) throw new ParseException("'*' not allowed as first character in PrefixQuery"); termStr = analyzeField(field, termStr); Term t = new Term(field, termStr); return newPrefixQuery(t); } @Override protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { termStr = analyzeField(field, termStr); Term t = new Term(field, termStr); return newFuzzyQuery(t, minSimilarity, _fuzzyPrefixLength); } }