package org.xbib.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import java.util.HashMap;
import java.util.Map;
/**
*
*/
public final class MockAnalyzer extends Analyzer {
private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase;
private final CharacterRunAutomaton filter;
private int positionIncrementGap;
private Integer offsetGap;
private Map<String, Integer> previousMappings = new HashMap<>();
private int maxTokenLength = MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH;
public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase) {
this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET);
}
public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter) {
super(PER_FIELD_REUSE_STRATEGY);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.filter = filter;
}
@Override
public TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(runAutomaton, lowerCase, maxTokenLength);
MockTokenFilter filt = new MockTokenFilter(tokenizer, filter);
return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
}
private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) {
Integer val = previousMappings.get(fieldName);
if (val == null) {
val = -1; // no payloads
previousMappings.put(fieldName, val); // save it so we are consistent for this field
}
return stream;
}
public void setPositionIncrementGap(int positionIncrementGap) {
this.positionIncrementGap = positionIncrementGap;
}
@Override
public int getPositionIncrementGap(String fieldName) {
return positionIncrementGap;
}
/**
* Set a new offset gap which will then be added to the offset when several fields with the same name are indexed
*
* @param offsetGap The offset gap that should be used.
*/
public void setOffsetGap(int offsetGap) {
this.offsetGap = offsetGap;
}
/**
* Get the offset gap between tokens in fields if several fields with the same name were added.
*
* @param fieldName Currently not used, the same offset gap is returned for each field.
*/
@Override
public int getOffsetGap(String fieldName) {
return offsetGap == null ? super.getOffsetGap(fieldName) : offsetGap;
}
/**
* Toggle maxTokenLength for MockTokenizer
*/
public void setMaxTokenLength(int length) {
this.maxTokenLength = length;
}
}