/**
* License Agreement for OpenSearchServer
* <p>
* Copyright (C) 2008-2016 Emmanuel Keller / Jaeksoft
* <p>
* http://www.open-search-server.com
* <p>
* This file is part of OpenSearchServer.
* <p>
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* <p>
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* <p>
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see <http://www.gnu.org/licenses/>.
**/
package com.jaeksoft.searchlib.snippet;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.CompiledAnalyzer;
import com.jaeksoft.searchlib.function.expression.SyntaxError;
import com.jaeksoft.searchlib.index.ReaderInterface;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.request.AbstractLocalSearchRequest;
import com.jaeksoft.searchlib.schema.*;
import com.jaeksoft.searchlib.snippet.SnippetVectors.SnippetVector;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.Timer;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.lucene.search.Query;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class SnippetField extends AbstractField<SnippetField> {
/**
*
*/
private static final long serialVersionUID = 1989504404725110730L;
private FragmenterAbstract fragmenterTemplate;
private String tag;
private String[] tags;
private String separator;
private String unescapedSeparator;
private int maxSnippetSize;
private int maxSnippetNumber;
private int timeLimit;
private transient SnippetQueries snippetQueries;
private transient Query query;
private transient CompiledAnalyzer queryAnalyzer;
private transient CompiledAnalyzer indexAnalyzer;
private SnippetField(String fieldName, String tag, String separator, int maxSnippetSize, int maxSnippetNumber,
FragmenterAbstract fragmenterTemplate, int timeLimit) {
super(fieldName);
this.snippetQueries = null;
setTag(tag);
setSeparator(separator);
this.maxSnippetSize = maxSnippetSize;
this.maxSnippetNumber = maxSnippetNumber;
this.fragmenterTemplate = fragmenterTemplate;
this.timeLimit = timeLimit;
}
public SnippetField(String fieldName) {
this(fieldName, "em", "...", 200, 1, FragmenterAbstract.NOFRAGMENTER, 0);
}
@Override
public SnippetField duplicate() {
return new SnippetField(name, tag, separator, maxSnippetSize, maxSnippetNumber, fragmenterTemplate, timeLimit);
}
public String getFragmenter() {
return fragmenterTemplate.getClass().getSimpleName();
}
public void setFragmenter(String fragmenterName) throws SearchLibException {
try {
fragmenterTemplate = FragmenterAbstract.newInstance(fragmenterName);
} catch (InstantiationException e) {
throw new SearchLibException(e);
} catch (IllegalAccessException e) {
throw new SearchLibException(e);
}
}
/**
* @return the tag
*/
public String getTag() {
return tag;
}
/**
* @param tag the tag to set
*/
public void setTag(String tag) {
this.tag = tag == null ? null : tag.intern();
if (tag != null && tag.length() > 0) {
tags = new String[2];
tags[0] = '<' + tag + '>';
tags[1] = "</" + tag + '>';
} else
tags = null;
}
/**
* @return the separator
*/
public String getSeparator() {
return separator;
}
/**
* @param separator the separator to set
*/
public void setSeparator(String separator) {
this.separator = separator == null ? null : separator.intern();
unescapedSeparator = separator == null ? null : StringEscapeUtils.unescapeHtml4(separator).intern();
}
/**
* @return the maxSnippetSize
*/
public int getMaxSnippetSize() {
return maxSnippetSize;
}
/**
* @param maxSnippetSize the maxSnippetSize to set
*/
public void setMaxSnippetSize(int maxSnippetSize) {
this.maxSnippetSize = maxSnippetSize;
}
/**
* @return the maxSnippetNumber
*/
public int getMaxSnippetNumber() {
return maxSnippetNumber;
}
/**
* @param maxSnippetNumber the maxSnippetNumber to set
*/
public void setMaxSnippetNumber(int maxSnippetNumber) {
this.maxSnippetNumber = maxSnippetNumber;
}
/**
* Retourne la liste des champs "snippet".
*
* @param node
* @param source
* @param target
* @throws IllegalAccessException
* @throws InstantiationException
*/
public static void copySnippetFields(Node node, SchemaFieldList source, SnippetFieldList target)
throws InstantiationException, IllegalAccessException {
String fieldName = XPathParser.getAttributeString(node, "name");
String tag = XPathParser.getAttributeString(node, "tag");
if (tag == null)
tag = "em";
int maxSnippetNumber = XPathParser.getAttributeValue(node, "maxSnippetNumber");
if (maxSnippetNumber == 0)
maxSnippetNumber = 1;
int maxSnippetSize = XPathParser.getAttributeValue(node, "maxSnippetSize");
if (maxSnippetSize == 0)
maxSnippetSize = 200;
int timeLimit = DomUtils.getAttributeInteger(node, "timeLimit", 0);
FragmenterAbstract fragmenter =
FragmenterAbstract.newInstance(XPathParser.getAttributeString(node, "fragmenterClass"));
fragmenter.setAttributes(node.getAttributes());
String separator = XPathParser.getAttributeString(node, "separator");
if (separator == null)
separator = "...";
SchemaField schemaField = source.get(fieldName);
if (schemaField == null)
return;
SnippetField field =
new SnippetField(schemaField.getName(), tag, separator, maxSnippetSize, maxSnippetNumber, fragmenter,
timeLimit);
target.put(field);
}
public final void reset() {
snippetQueries = null;
query = null;
queryAnalyzer = null;
indexAnalyzer = null;
}
public void initSearchTerms(AbstractLocalSearchRequest searchRequest)
throws ParseException, SyntaxError, IOException, SearchLibException {
synchronized (this) {
if (snippetQueries != null)
return;
this.query = searchRequest.getSnippetQuery();
this.queryAnalyzer = searchRequest.getAnalyzer().getCompiledAnalyzer(name);
this.indexAnalyzer = searchRequest.getConfig()
.getSchema()
.getIndexPerFieldAnalyzer(searchRequest.getLang())
.getCompiledAnalyzer(name);
snippetQueries = new SnippetQueries(this.query, name);
}
}
private final void appendSubString(String text, int start, int end, StringBuilder sb) {
if (text == null)
return;
int l = text.length();
if (end > l)
end = l;
if (end < start)
return;
sb.append(text.substring(start, end));
}
private final SnippetVector checkValue(SnippetVector currentVector, Iterator<SnippetVector> vectorIterator,
int startOffset, Fragment fragment) {
if (currentVector == null)
return null;
StringBuilder result = new StringBuilder();
String originalText = fragment.getOriginalText();
int originalTextLength = originalText.length();
int endOffset = startOffset + originalTextLength;
int pos = 0;
while (currentVector != null) {
int end = currentVector.end - fragment.vectorOffset;
if (end > endOffset)
break;
int start = currentVector.start - fragment.vectorOffset;
if (start >= startOffset) {
appendSubString(originalText, pos, start - startOffset, result);
if (tags != null)
result.append(tags[0]);
appendSubString(originalText, start - startOffset, end - startOffset, result);
if (tags != null)
result.append(tags[1]);
pos = end - startOffset;
}
currentVector = vectorIterator.hasNext() ? vectorIterator.next() : null;
}
if (result.length() == 0)
return currentVector;
if (pos < originalTextLength)
appendSubString(originalText, pos, originalTextLength, result);
fragment.setHighlightedText(result.toString());
return currentVector;
}
public boolean getSnippets(final int docId, final ReaderInterface reader, final List<FieldValueItem> values,
final List<FieldValueItem> snippets, final Timer parentTimer)
throws IOException, ParseException, SyntaxError, SearchLibException {
if (values == null)
return false;
final Timer timer = new Timer(parentTimer, "SnippetField " + this.name);
final long halfTimeExpiration = this.timeLimit == 0 ? 0 : timer.getStartOffset(this.timeLimit / 2);
final long expiration = this.timeLimit == 0 ? 0 : timer.getStartOffset(this.timeLimit);
FragmenterAbstract fragmenter = fragmenterTemplate.newInstance();
SnippetVector currentVector = null;
Timer t = new Timer(timer, "extractTermVectorIterator");
Iterator<SnippetVector> vectorIterator =
SnippetVectors.extractTermVectorIterator(docId, reader, snippetQueries, name, values, indexAnalyzer, t,
halfTimeExpiration);
if (vectorIterator != null)
currentVector = vectorIterator.hasNext() ? vectorIterator.next() : null;
t.end(null);
t = new Timer(timer, "getFraments");
int startOffset = 0;
FragmentList fragments = new FragmentList();
int vectorOffset = 0;
for (FieldValueItem valueItem : values) {
String value = valueItem.getValue();
if (value != null) {
// VectorOffset++ depends of EndOffset bug #patch Lucene 579 and
// 1458
fragmenter.getFragments(value, fragments, vectorOffset++);
}
}
t.end(null);
if (fragments.size() == 0) {
timer.end(null);
return false;
}
t = new Timer(timer, "checkValue");
Fragment fragment = fragments.first();
while (fragment != null) {
currentVector = checkValue(currentVector, vectorIterator, startOffset, fragment);
startOffset += fragment.getOriginalText().length();
fragment = fragment.next();
}
t.end(null);
Timer sbTimer = new Timer(timer, "snippetBuilder");
boolean result = false;
int snippetCounter = maxSnippetNumber;
int scoredFragment = 0;
while (snippetCounter-- != 0) {
Fragment bestScoreFragment = null;
fragment = Fragment.findNextHighlightedFragment(fragments.first());
List<Fragment> scoreFragments = new ArrayList<Fragment>(0);
double maxSearchScore = 0;
t = new Timer(sbTimer, "fragmentScore");
boolean expired = false;
while (fragment != null) {
double sc = fragment.searchScore(name, queryAnalyzer, query);
if (sc > maxSearchScore)
maxSearchScore = sc;
scoreFragments.add(fragment);
fragment = Fragment.findNextHighlightedFragment(fragment.next());
scoredFragment++;
if (expiration != 0) {
if (System.currentTimeMillis() > expiration) {
expired = true;
break;
}
}
}
t.end("fragmentScore " + scoredFragment + " " + expired);
for (Fragment frag : scoreFragments)
bestScoreFragment = Fragment.bestScore(bestScoreFragment, frag, maxSearchScore, maxSnippetSize);
if (bestScoreFragment != null) {
SnippetBuilder snippetBuilder =
new SnippetBuilder(maxSnippetSize, unescapedSeparator, tags, bestScoreFragment);
if (snippetBuilder.length() > 0)
snippets.add(new FieldValueItem(FieldValueOriginEnum.SNIPPET, snippetBuilder.toString()));
fragments.remove(snippetBuilder.getFragments());
result = true;
continue;
}
if (fragments.first() == null)
break;
SnippetBuilder snippetBuilder =
new SnippetBuilder(maxSnippetSize, unescapedSeparator, tags, fragments.first());
if (snippetBuilder.length() > 0) {
snippets.add(new FieldValueItem(FieldValueOriginEnum.SNIPPET, snippetBuilder.toString()));
fragments.remove(snippetBuilder.getFragments());
}
}
sbTimer.end(null);
timer.end(null);
return result;
}
@Override
public void writeXmlConfig(XmlWriter xmlWriter) throws SAXException {
xmlWriter.startElement("field", "name", name, "tag", tag, "separator", separator, "maxSnippetSize",
Integer.toString(maxSnippetSize), "maxSnippetNumber", Integer.toString(maxSnippetNumber),
"fragmenterClass", fragmenterTemplate != null ? fragmenterTemplate.getClass().getSimpleName() : null,
"timeLimit", Long.toString(timeLimit));
xmlWriter.endElement();
}
@Override
public int compareTo(SnippetField f) {
int c = super.compareTo(f);
if (c != 0)
return c;
if ((c = fragmenterTemplate.getClass().getName().compareTo(f.fragmenterTemplate.getClass().getName())) != 0)
return c;
if ((c = tag.compareTo(f.tag)) != 0)
return c;
if ((c = separator.compareTo(f.separator)) != 0)
return c;
if ((c = maxSnippetSize - f.maxSnippetSize) != 0)
return c;
if ((c = maxSnippetNumber - f.maxSnippetNumber) != 0)
return c;
return 0;
}
/**
* @return the timeLimit
*/
public int getTimeLimit() {
return timeLimit;
}
/**
* @param timeLimit the timeLimit to set
*/
public void setTimeLimit(int timeLimit) {
this.timeLimit = timeLimit;
}
}