/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2013-2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see <http://www.gnu.org/licenses/>.
**/
package com.jaeksoft.searchlib.snippet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import com.jaeksoft.searchlib.snippet.SnippetVectors.SnippetVector;
import com.jaeksoft.searchlib.util.Timer;
class SnippetQueries {
private final String field;
private final Map<String, Integer> termMap;
private final List<String> termList;
private final Set<Integer> termQuerySet;
private final Set<Integer> termPhraseSet;
private final List<TermSequence> termSequenceList;
final String[] terms;
SnippetQueries(final Query query, final String field) {
this.field = field;
termQuerySet = new TreeSet<Integer>();
termPhraseSet = new TreeSet<Integer>();
termMap = new TreeMap<String, Integer>();
termList = new ArrayList<String>();
termSequenceList = new ArrayList<TermSequence>(2);
parse(query);
terms = termList.toArray(new String[termList.size()]);
}
private final int checkTerm(final String term) {
Integer pos = termMap.get(term);
if (pos != null)
return pos;
pos = termList.size();
termMap.put(term, pos);
termList.add(term);
return pos;
}
private final void parse(final TermQuery query) {
Term term = query.getTerm();
if (!field.equals(term.field()))
return;
int pos = checkTerm(term.text());
termQuerySet.add(pos);
}
private final static class TermSequence {
private final int slop;
private final int[] terms;
private TermSequence(final List<Integer> termPosSequence, final int slop) {
int i = 0;
terms = new int[termPosSequence.size()];
for (Integer termPos : termPosSequence)
terms[i++] = termPos;
this.slop = slop;
}
}
private final void parse(final PhraseQuery query) {
Term[] terms = query.getTerms();
if (terms == null)
return;
List<Integer> termPosSequence = new ArrayList<Integer>(terms.length);
for (Term term : terms) {
if (!field.equals(term.field()))
continue;
int pos = checkTerm(term.text());
termPosSequence.add(pos);
termPhraseSet.add(pos);
}
// Term sequences with one term are not phrase queries
if (termPosSequence.size() <= 1)
return;
termSequenceList
.add(new TermSequence(termPosSequence, query.getSlop()));
}
private final void parse(final BooleanQuery query) {
BooleanClause[] clauses = query.getClauses();
if (clauses == null)
return;
for (BooleanClause clause : clauses) {
switch (clause.getOccur()) {
case MUST:
case SHOULD:
parse(clause.getQuery());
break;
default:
break;
}
}
}
private final void parse(final Query query) {
if (query == null)
return;
if (query instanceof BooleanQuery)
parse((BooleanQuery) query);
else if (query instanceof TermQuery)
parse((TermQuery) query);
else if (query instanceof PhraseQuery)
parse((PhraseQuery) query);
}
private final void checkTermQueries(
final Collection<SnippetVector> vectors, final long expiration) {
if (termQuerySet.isEmpty())
return;
for (SnippetVector vector : vectors) {
if (!vector.query) {
if (termQuerySet.contains(vector.term))
vector.query = true;
if (expiration != 0)
if (System.currentTimeMillis() > expiration)
return;
}
}
}
private static class SequenceCollector {
private enum Result {
WRONG, CONTINUE, FULL
};
private final TermSequence termSequence;
private final SnippetVector[] vectors;
private int foundPos;
private int nextPosition;
private int nextTerm;
private SequenceCollector(final TermSequence termSequence,
final SnippetVector vector) {
this.termSequence = termSequence;
vectors = new SnippetVector[termSequence.terms.length];
foundPos = 0;
addVector(vector);
}
private final Result addVector(final SnippetVector vector) {
vectors[foundPos++] = vector;
nextPosition = vector.position + termSequence.slop + 1;
if (foundPos == vectors.length)
return Result.FULL;
nextTerm = termSequence.terms[foundPos];
return Result.CONTINUE;
}
private final Result collect(final SnippetVector vector) {
if (vector.position > nextPosition)
return Result.WRONG;
if (vector.term != nextTerm)
return Result.CONTINUE;
if (addVector(vector) != Result.FULL)
return Result.CONTINUE;
for (SnippetVector v : vectors)
v.query = true;
return Result.FULL;
}
}
private final void checkPhraseQueries(
final Collection<SnippetVector> vectors, final long expiration) {
if (termSequenceList.isEmpty())
return;
Set<SequenceCollector> collectors = new HashSet<SequenceCollector>();
List<SequenceCollector> toRemove = new ArrayList<SequenceCollector>();
for (SnippetVector vector : vectors) {
if (!(termPhraseSet.contains(vector.term)))
continue;
for (TermSequence termSequence : termSequenceList) {
if (termSequence.terms[0] == vector.term)
collectors.add(new SequenceCollector(termSequence, vector));
}
for (SequenceCollector collector : collectors) {
switch (collector.collect(vector)) {
case CONTINUE:
break;
case WRONG:
case FULL:
toRemove.add(collector);
break;
}
}
if (!toRemove.isEmpty()) {
collectors.removeAll(toRemove);
toRemove.clear();
}
if (expiration != 0)
if (System.currentTimeMillis() > expiration)
return;
}
}
final void checkQueries(final Collection<SnippetVector> vectors,
final Timer parentTimer, final long expiration) {
if (vectors == null)
return;
Timer t = new Timer(parentTimer, "checkTermQueries");
checkTermQueries(vectors, expiration);
t.end(null);
t = new Timer(parentTimer, "checkPhraseQueries");
checkPhraseQueries(vectors, expiration);
t.end(null);
}
}