/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2013 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see <http://www.gnu.org/licenses/>.
**/
package com.jaeksoft.searchlib.snippet;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.Swapper;
import it.unimi.dsi.fastutil.ints.IntComparator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.CompiledAnalyzer;
import com.jaeksoft.searchlib.analysis.TokenTerm;
import com.jaeksoft.searchlib.function.expression.SyntaxError;
import com.jaeksoft.searchlib.index.ReaderInterface;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.schema.FieldValueItem;
import com.jaeksoft.searchlib.util.Timer;
class SnippetVectors {
static class SnippetVector {
public final int start;
public final int end;
public final int term;
public final int position;
public boolean remove;
public boolean query;
private SnippetVector(final int term,
final TermVectorOffsetInfo termVectorOffsetInfo,
final int position) {
this.term = term;
this.start = termVectorOffsetInfo.getStartOffset();
this.end = termVectorOffsetInfo.getEndOffset();
this.position = position;
this.remove = false;
this.query = false;
}
@Override
public String toString() {
return "Term: " + term + " Start: " + start + " End: " + end
+ " Pos:" + position;
}
}
final static Iterator<SnippetVector> extractTermVectorIterator(
final int docId, final ReaderInterface reader,
final SnippetQueries snippetQueries, final String fieldName,
List<FieldValueItem> values, CompiledAnalyzer analyzer,
final Timer parentTimer, final long expiration) throws IOException,
ParseException, SyntaxError, SearchLibException {
if (ArrayUtils.isEmpty(snippetQueries.terms))
return null;
Timer t = new Timer(parentTimer, "getTermPositionVector " + fieldName);
TermPositionVector termVector = getTermPositionVector(
snippetQueries.terms, reader, docId, fieldName, values,
analyzer, t);
t.end(null);
if (termVector == null)
return null;
Collection<SnippetVector> vectors = new ArrayList<SnippetVector>();
t = new Timer(parentTimer, "populate");
populate(termVector, snippetQueries.terms, vectors, t);
t.end(null);
t = new Timer(parentTimer, "removeIncludes");
vectors = removeIncludes(vectors);
t.end(null);
t = new Timer(parentTimer, "checkQueries");
snippetQueries.checkQueries(vectors, t, expiration);
t.end(null);
t = new Timer(parentTimer, "removeNonQuery");
vectors = removeNonQuery(vectors);
t.end(null);
return vectors.iterator();
}
private static final TermPositionVector getTermPositionVector(
final String[] terms, final ReaderInterface readerInterface,
final int docId, final String field, List<FieldValueItem> values,
CompiledAnalyzer analyzer, Timer timer) throws IOException,
SearchLibException, ParseException, SyntaxError {
TermFreqVector termFreqVector = readerInterface.getTermFreqVector(
docId, field);
if (termFreqVector != null)
if (termFreqVector instanceof TermPositionVector)
return (TermPositionVector) termFreqVector;
if (analyzer == null)
return null;
SnippetTermPositionVector stpv = new SnippetTermPositionVector(field,
terms);
int positionOffset = 0;
int characterOffset = 0;
List<TokenTerm> tokenTerms = new ArrayList<TokenTerm>();
for (FieldValueItem fieldValueItem : values) {
if (fieldValueItem.value == null)
continue;
analyzer.populate(fieldValueItem.value, tokenTerms);
positionOffset = stpv.addCollection(tokenTerms, characterOffset,
positionOffset);
characterOffset += fieldValueItem.value.length() + 1;
tokenTerms.clear();
}
stpv.compile();
return stpv;
}
private static final void populate(final TermPositionVector termVector,
final String[] terms, final Collection<SnippetVector> vectors,
Timer parentTimer) throws SearchLibException {
Timer t = new Timer(parentTimer, "indexesOf");
int[] termsIdx = termVector.indexesOf(terms, 0, terms.length);
t.end(null);
int i = 0;
for (int termId : termsIdx) {
Timer termTimer = new Timer(parentTimer, "term " + terms[i]);
if (termId != -1) {
t = new Timer(termTimer, "getOffsets");
TermVectorOffsetInfo[] offsets = termVector.getOffsets(termId);
t.end(null);
t = new Timer(termTimer, "getTermPositions");
int[] positions = termVector.getTermPositions(termId);
t.end(null);
t = new Timer(termTimer, "SnippetVector");
int j = 0;
for (TermVectorOffsetInfo offset : offsets)
vectors.add(new SnippetVector(i, offset, positions[j++]));
t.end(null);
}
termTimer.end(null);
i++;
}
}
private static final Collection<SnippetVector> removeIncludes(
final Collection<SnippetVector> vectorCollection) {
SnippetVector[] vectors = vectorCollection
.toArray(new SnippetVector[vectorCollection.size()]);
new SnippetVectorSort(vectors);
SnippetVector last = null;
for (SnippetVector current : vectors) {
if (last != null && current.start == last.start
&& current.end >= last.end)
last.remove = true;
last = current;
}
List<SnippetVector> vectorList = new ArrayList<SnippetVector>(
vectors.length);
for (SnippetVector vector : vectors)
if (!vector.remove)
vectorList.add(vector);
return vectorList;
}
private static final Collection<SnippetVector> removeNonQuery(
final Collection<SnippetVector> vectors) {
List<SnippetVector> vectorList = new ArrayList<SnippetVector>(
vectors.size());
for (SnippetVector vector : vectors)
if (vector.query)
vectorList.add(vector);
return vectorList;
}
private static class SnippetVectorSort implements IntComparator, Swapper {
private final SnippetVector[] vectors;
private SnippetVectorSort(final SnippetVector[] vectors) {
this.vectors = vectors;
Arrays.quickSort(0, vectors.length, this, this);
}
@Override
final public int compare(final Integer k1, final Integer k2) {
return compare((int) k1, (int) k2);
}
@Override
final public void swap(final int k1, final int k2) {
SnippetVector v1 = vectors[k1];
SnippetVector v2 = vectors[k2];
vectors[k2] = v1;
vectors[k1] = v2;
}
@Override
final public int compare(final int k1, final int k2) {
int i = vectors[k1].start - vectors[k2].start;
if (i == 0)
i = vectors[k1].end - vectors[k2].end;
return i;
}
}
}