package org.apache.lucene.queryparser.flexible.aqp.processors;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.RangeQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.TextableQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.StandardBooleanQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryparser.flexible.standard.processors.AnalyzerQueryNodeProcessor;
/**
* This is an improved version of the {@link AnalyzerQueryNodeProcessor} it is
* better because it keeps track of the position offset which is absolutely
* indispensable for proper parsing of expanded queries. And also we save the
* type attribute name with the node
*
* TODO: send a patch and make them accept it
*
* This processor verifies if {@link ConfigurationKeys#ANALYZER} is defined in
* the {@link QueryConfigHandler}. If it is and the analyzer is not
* <code>null</code>, it looks for every {@link FieldQueryNode} that is not
* {@link WildcardQueryNode}, {@link FuzzyQueryNode} or {@link RangeQueryNode}
* contained in the query node tree, then it applies the analyzer to that
* {@link FieldQueryNode} object.
* <p>
* If the analyzer return only one term, the returned term is set to the
* {@link FieldQueryNode} and it's returned.
* <p>
* If the analyzer return more than one term, a {@link TokenizedPhraseQueryNode}
* or {@link MultiPhraseQueryNode} is created, whether there is one or more
* terms at the same position, and it's returned.
* <p>
* If no term is returned by the analyzer a {@link NoTokenFoundQueryNode} object
* is returned.
*
*
* @see ConfigurationKeys#ANALYZER
* @see Analyzer
* @see TokenStream
*/
public class AqpAnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
public String TYPE_ATTRIBUTE = "token_type_attribute";
private Analyzer analyzer;
private boolean positionIncrementsEnabled;
public AqpAnalyzerQueryNodeProcessor() {
// empty constructor
}
@Override
public QueryNode process(QueryNode queryTree) throws QueryNodeException {
Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
if (analyzer != null) {
this.analyzer = analyzer;
this.positionIncrementsEnabled = false;
Boolean positionIncrementsEnabled = getQueryConfigHandler().get(
ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
if (positionIncrementsEnabled != null) {
this.positionIncrementsEnabled = positionIncrementsEnabled;
}
if (this.analyzer != null) {
return super.process(queryTree);
}
}
return queryTree;
}
@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
if (node instanceof TextableQueryNode
&& !(node instanceof WildcardQueryNode)
&& !(node instanceof FuzzyQueryNode)
&& !(node instanceof RegexpQueryNode)
&& !(node.getParent() instanceof RangeQueryNode)) {
FieldQueryNode fieldNode = ((FieldQueryNode) node);
int queryStart = Math.max(fieldNode.getBegin(), 0); // could be -1
String text = fieldNode.getTextAsString();
String field = fieldNode.getFieldAsString();
TokenStream source;
try {
source = this.analyzer.tokenStream(field, new StringReader(text));
source.reset();
} catch (IOException e1) {
throw new RuntimeException(e1);
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
}
TypeAttribute typeAtt = null;
if (buffer.hasAttribute(TypeAttribute.class)) {
typeAtt = buffer.getAttribute(TypeAttribute.class);
}
try {
while (buffer.incrementToken()) {
numTokens++;
int positionIncrement = (posIncrAtt != null) ? posIncrAtt
.getPositionIncrement() : 1;
if (positionIncrement != 0) {
positionCount += positionIncrement;
} else {
severalTokensAtSamePosition = true;
}
}
} catch (IOException e) {
// ignore
}
try {
// rewind the buffer stream
buffer.reset();
// close original stream - all tokens buffered
source.close();
} catch (IOException e) {
// ignore
}
if (!buffer.hasAttribute(CharTermAttribute.class)) {
return new NoTokenFoundQueryNode();
}
CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
int offsetStart = -1;
int offsetEnd = -1;
OffsetAttribute offsetAtt;
if (buffer.hasAttribute(OffsetAttribute.class)) {
offsetAtt = buffer.getAttribute(OffsetAttribute.class);
} else {
offsetAtt = null;
}
if (numTokens == 0) {
return new NoTokenFoundQueryNode();
} else if (numTokens == 1) {
String term = null;
try {
boolean hasNext;
hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
fieldNode.setText(term);
if (offsetAtt != null) {
fieldNode.setBegin(queryStart + offsetAtt.startOffset());
fieldNode.setEnd(queryStart + offsetAtt.endOffset());
}
if (typeAtt != null)
fieldNode.setTag(TYPE_ATTRIBUTE, typeAtt.type());
return fieldNode;
} else if (severalTokensAtSamePosition
|| !(node instanceof QuotedFieldQueryNode)) {
if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
// no phrase query:
LinkedList<QueryNode> children = new LinkedList<QueryNode>();
for (int i = 0; i < numTokens; i++) {
String term = null;
offsetStart = offsetEnd = -1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (offsetAtt != null) {
offsetStart = queryStart + offsetAtt.startOffset();
offsetEnd = queryStart + offsetAtt.endOffset();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
FieldQueryNode fq = new FieldQueryNode(field, term, offsetStart,
offsetEnd);
if (typeAtt != null)
fq.setTag(TYPE_ATTRIBUTE, typeAtt.type());
children.add(fq);
}
return new GroupQueryNode(new StandardBooleanQueryNode(children,
positionCount == 1));
} else {
// phrase query:
MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
int position = -1;
int i = 0;
int termGroupCount = 0;
for (; i < numTokens; i++) {
String term = null;
offsetStart = offsetEnd = -1;
int positionIncrement = 1;
String tokenType = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
if (offsetAtt != null) {
offsetStart = queryStart + offsetAtt.startOffset();
offsetEnd = queryStart + offsetAtt.endOffset();
}
if (typeAtt != null)
tokenType = typeAtt.type();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (positionIncrement > 0 && multiTerms.size() > 0) {
for (FieldQueryNode termNode : multiTerms) {
if (this.positionIncrementsEnabled) {
termNode.setPositionIncrement(position);
} else {
termNode.setPositionIncrement(termGroupCount);
}
mpq.add(termNode);
}
// Only increment once for each "group" of
// terms that were in the same position:
termGroupCount++;
multiTerms.clear();
}
position += positionIncrement;
FieldQueryNode fq = new FieldQueryNode(field, term, offsetStart,
offsetEnd);
fq.setTag(TYPE_ATTRIBUTE, tokenType);
multiTerms.add(fq);
}
for (FieldQueryNode termNode : multiTerms) {
if (this.positionIncrementsEnabled) {
termNode.setPositionIncrement(position);
} else {
termNode.setPositionIncrement(termGroupCount);
}
mpq.add(termNode);
}
return mpq;
}
} else {
TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
offsetStart = offsetEnd = -1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
if (offsetAtt != null) {
offsetStart = queryStart + offsetAtt.startOffset();
offsetEnd = queryStart + offsetAtt.endOffset();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
FieldQueryNode newFieldNode = new FieldQueryNode(field, term,
offsetStart, offsetEnd);
if (typeAtt != null)
newFieldNode.setTag(TYPE_ATTRIBUTE, typeAtt.type());
if (this.positionIncrementsEnabled) {
position += positionIncrement;
newFieldNode.setPositionIncrement(position);
} else {
newFieldNode.setPositionIncrement(i);
}
pq.add(newFieldNode);
}
return pq;
}
}
return node;
}
@Override
protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
return node;
}
@Override
protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
throws QueryNodeException {
return children;
}
}