package de.spinscale.elasticsearch.service.suggest; import de.spinscale.elasticsearch.action.suggest.refresh.ShardSuggestRefreshRequest; import de.spinscale.elasticsearch.action.suggest.refresh.ShardSuggestRefreshResponse; import de.spinscale.elasticsearch.action.suggest.statistics.FstStats; import de.spinscale.elasticsearch.action.suggest.statistics.ShardSuggestStatisticsResponse; import de.spinscale.elasticsearch.action.suggest.suggest.ShardSuggestRequest; import de.spinscale.elasticsearch.action.suggest.suggest.ShardSuggestResponse; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.spell.HighFrequencyDictionary; import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; import org.apache.lucene.search.suggest.fst.FSTCompletionLookup; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.base.Function; import org.elasticsearch.common.base.Joiner; import org.elasticsearch.common.base.Objects; import org.elasticsearch.common.cache.CacheBuilder; import org.elasticsearch.common.cache.CacheLoader; import org.elasticsearch.common.cache.LoadingCache; import org.elasticsearch.common.collect.Collections2; import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.internal.ToStringBuilder; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Streamable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.IndexShardState; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.service.IndexShard; import java.io.IOException; import java.io.Serializable; import java.util.*; import java.util.concurrent.locks.ReentrantLock; public class ShardSuggestService extends AbstractIndexShardComponent { private final IndexShard indexShard; private final ReentrantLock lock = new ReentrantLock(); private IndexReader indexReader; private final LoadingCache<String, FSTCompletionLookup> lookupCache; private final LoadingCache<FieldType, AnalyzingSuggester> analyzingSuggesterCache; private final LoadingCache<FieldType, FuzzySuggester> fuzzySuggesterCache; private final LoadingCache<String, HighFrequencyDictionary> dictCache; private final LoadingCache<String, SpellChecker> spellCheckerCache; private final LoadingCache<String, RAMDirectory> ramDirectoryCache; @Inject public ShardSuggestService(ShardId shardId, @IndexSettings Settings indexSettings, IndexShard indexShard, final AnalysisService analysisService, final MapperService mapperService) { super(shardId, indexSettings); this.indexShard = indexShard; ramDirectoryCache = CacheBuilder.newBuilder().build( new CacheLoader<String, RAMDirectory>() { @Override public RAMDirectory load(String field) throws Exception { return new RAMDirectory(); } } ); dictCache = CacheBuilder.newBuilder().build( new CacheLoader<String, HighFrequencyDictionary>() { @Override public HighFrequencyDictionary load(String field) throws Exception { return new HighFrequencyDictionary(createOrGetIndexReader(), field, 0.00001f); } } ); spellCheckerCache = CacheBuilder.newBuilder().build( new CacheLoader<String, SpellChecker>() { @Override public SpellChecker load(String field) throws Exception { SpellChecker spellChecker = new SpellChecker(ramDirectoryCache.get(field)); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, new WhitespaceAnalyzer(Version.LUCENE_44)); spellChecker.indexDictionary(dictCache.getUnchecked(field), indexWriterConfig, false); return spellChecker; } } ); lookupCache = CacheBuilder.newBuilder().build( new CacheLoader<String, FSTCompletionLookup>() { @Override public FSTCompletionLookup load(String field) throws Exception { FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(dictCache.getUnchecked(field)); return lookup; } } ); analyzingSuggesterCache = CacheBuilder.newBuilder().build( new AbstractCacheLoaderSuggester.CacheLoaderAnalyzingSuggester(mapperService, analysisService, dictCache)); fuzzySuggesterCache = CacheBuilder.newBuilder().build( new AbstractCacheLoaderSuggester.CacheLoaderFuzzySuggester(mapperService, analysisService, dictCache)); } public ShardSuggestRefreshResponse refresh(ShardSuggestRefreshRequest shardSuggestRefreshRequest) { String field = shardSuggestRefreshRequest.field(); if (!Strings.hasLength(field)) { update(); } else { resetIndexReader(); HighFrequencyDictionary dict = dictCache.getIfPresent(field); if (dict != null) dictCache.refresh(field); RAMDirectory ramDirectory = ramDirectoryCache.getIfPresent(field); if (ramDirectory != null) { ramDirectory.close(); ramDirectoryCache.invalidate(field); } SpellChecker spellChecker = spellCheckerCache.getIfPresent(field); if (spellChecker != null) { spellCheckerCache.refresh(field); try { spellChecker.close(); } catch (IOException e) { logger.error("Could not close spellchecker in indexshard [{}] for field [{}]", e, indexShard, field); } } FSTCompletionLookup lookup = lookupCache.getIfPresent(field); if (lookup != null) lookupCache.refresh(field); for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) { if (fieldType.field().equals(shardSuggestRefreshRequest.field())) { analyzingSuggesterCache.refresh(fieldType); } } for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) { if (fieldType.field().equals(shardSuggestRefreshRequest.field())) { fuzzySuggesterCache.refresh(fieldType); } } } return new ShardSuggestRefreshResponse(shardId.index().name(), shardId.id()); } public void shutDown() { resetIndexReader(); dictCache.invalidateAll(); for (Map.Entry<String, SpellChecker> entry : spellCheckerCache.asMap().entrySet()) { try { ramDirectoryCache.getUnchecked(entry.getKey()).close(); entry.getValue().close(); } catch (IOException e) { logger.error("Could not close spellchecker in indexshard [{}] for field [{}]", e, indexShard, entry.getKey()); } } spellCheckerCache.invalidateAll(); ramDirectoryCache.invalidateAll(); lookupCache.invalidateAll(); analyzingSuggesterCache.invalidateAll(); fuzzySuggesterCache.invalidateAll(); } public void update() { resetIndexReader(); for (String field : dictCache.asMap().keySet()) { dictCache.refresh(field); } try { for (String field : spellCheckerCache.asMap().keySet()) { SpellChecker oldSpellchecker = spellCheckerCache.getUnchecked(field); RAMDirectory oldRamDirectory = ramDirectoryCache.getUnchecked(field); ramDirectoryCache.refresh(field); spellCheckerCache.refresh(field); oldRamDirectory.close(); oldSpellchecker.close(); } } catch (IOException e ) { logger.error("Error refreshing spell checker cache [{}]", e, shardId); } for (String field : lookupCache.asMap().keySet()) { lookupCache.refresh(field); } for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) { analyzingSuggesterCache.refresh(fieldType); } for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) { fuzzySuggesterCache.refresh(fieldType); } } public ShardSuggestResponse suggest(ShardSuggestRequest shardSuggestRequest) { List<String> suggestions; try { suggestions = Lists.newArrayList(getSuggestions(shardSuggestRequest)); } catch (IOException e) { throw new ElasticsearchException("Error getting suggestions", e); } return new ShardSuggestResponse(shardId.index().name(), shardId.id(), suggestions); } private Collection<String> getSimilarSuggestions(ShardSuggestRequest shardSuggestRequest) { String field = shardSuggestRequest.field(); String term = shardSuggestRequest.term(); Integer limit = shardSuggestRequest.size(); Float similarity = shardSuggestRequest.similarity(); try { String[] suggestSimilar = spellCheckerCache.getUnchecked(field).suggestSimilar(term, limit, similarity); return Arrays.asList(suggestSimilar); } catch (IOException e) { logger.error("Error getting spellchecker suggestions for shard [{}] field [{}] term [{}] limit [{}] similarity [{}]", e, shardId, field, term, limit, similarity); } return Collections.emptyList(); } private Collection<String> getSuggestions(ShardSuggestRequest shardSuggestRequest) throws IOException { List<LookupResult> lookupResults = Lists.newArrayList(); if ("full".equals(shardSuggestRequest.suggestType())) { AnalyzingSuggester analyzingSuggester = analyzingSuggesterCache.getUnchecked(new FieldType(shardSuggestRequest)); lookupResults.addAll(analyzingSuggester.lookup(shardSuggestRequest.term(), false, shardSuggestRequest.size())); } else if ("fuzzy".equals(shardSuggestRequest.suggestType())) { lookupResults.addAll(fuzzySuggesterCache.getUnchecked(new FieldType(shardSuggestRequest)) .lookup(shardSuggestRequest.term(), false, shardSuggestRequest.size())); } else { lookupResults.addAll(lookupCache.getUnchecked(shardSuggestRequest.field()) .lookup(shardSuggestRequest.term(), true, shardSuggestRequest.size() + 1)); Collection<String> suggestions = Collections2.transform(lookupResults, new LookupResultToStringFunction()); float similarity = shardSuggestRequest.similarity(); if (similarity < 1.0f && suggestions.size() < shardSuggestRequest.size()) { suggestions = Lists.newArrayList(suggestions); suggestions.addAll(getSimilarSuggestions(shardSuggestRequest)); } return suggestions; } return Collections2.transform(lookupResults, new LookupResultToStringFunction()); } private class LookupResultToStringFunction implements Function<LookupResult, String> { @Override public String apply(LookupResult result) { return result.key.toString(); } } public void resetIndexReader() { IndexReader currentIndexReader = null; if (indexShard.state() == IndexShardState.STARTED) { try (Engine.Searcher currentIndexSearcher = indexShard.acquireSearcher( "suggest" )) { currentIndexReader = currentIndexSearcher.reader(); } } // if this index reader is not used in the current index searcher, we need to decrease the old refcount if (indexReader != null && indexReader.getRefCount() > 0 && !indexReader.equals(currentIndexReader)) { try { indexReader.decRef(); } catch (IOException e) { logger.error("Error decreasing indexreader ref count [{}] of shard [{}]", e, indexReader.getRefCount(), shardId); } } indexReader = null; } public ShardSuggestStatisticsResponse getStatistics() { ShardSuggestStatisticsResponse shardSuggestStatisticsResponse = new ShardSuggestStatisticsResponse(shardId()); for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) { long sizeInBytes = analyzingSuggesterCache.getIfPresent(fieldType).ramBytesUsed(); FstStats.FstIndexShardStats fstIndexShardStats = new FstStats.FstIndexShardStats(shardId, "analyzingsuggester", fieldType, sizeInBytes); shardSuggestStatisticsResponse.getFstIndexShardStats().add(fstIndexShardStats); } for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) { long sizeInBytes = fuzzySuggesterCache.getIfPresent(fieldType).ramBytesUsed(); FstStats.FstIndexShardStats fstIndexShardStats = new FstStats.FstIndexShardStats(shardId, "fuzzysuggester", fieldType, sizeInBytes); shardSuggestStatisticsResponse.getFstIndexShardStats().add(fstIndexShardStats); } return shardSuggestStatisticsResponse; } // this does not look thread safe and nice... private IndexReader createOrGetIndexReader() { try { if (indexReader == null) { lock.lock(); if (indexReader == null) { try (Engine.Searcher indexSearcher = indexShard.acquireSearcher( "suggest" )) { indexReader = indexSearcher.reader(); // logger.info("1 shard {} : ref count {}", shardId, indexReader.getRefCount()); } // If an indexreader closes, we have to refresh all our data structures! indexReader.addReaderClosedListener(new IndexReader.ReaderClosedListener() { @Override public void onClose(IndexReader reader) { update(); } }); } } } finally { if (lock.isLocked()) { lock.unlock(); } } return indexReader; } public static class FieldType implements Streamable, Serializable, ToXContent { private String field; private List<String> types = Lists.newArrayList(); private String queryAnalyzer; private String indexAnalyzer; private boolean preservePositionIncrements = true; public FieldType() {} public FieldType(ShardSuggestRequest shardSuggestRequest) { this.field = shardSuggestRequest.field(); this.types = Arrays.asList(shardSuggestRequest.types()); this.queryAnalyzer = shardSuggestRequest.queryAnalyzer(); this.indexAnalyzer = shardSuggestRequest.indexAnalyzer(); this.preservePositionIncrements = shardSuggestRequest.preservePositionIncrements(); } public String field() { return field; } public String[] types() { return types.toArray(new String[]{}); } public String queryAnalyzer() { return queryAnalyzer; } public String indexAnalyzer() { return indexAnalyzer; } public boolean preservePositionIncrements() { return preservePositionIncrements; } @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final FieldType other = (FieldType) obj; return Objects.equal(this.field(), other.field()) && Objects.equal(this.queryAnalyzer(), other.queryAnalyzer()) && Objects.equal(this.indexAnalyzer(), other.indexAnalyzer()) && Objects.equal(this.types, other.types) && Objects.equal(this.preservePositionIncrements(), other.preservePositionIncrements()); } @Override public int hashCode() { int hashCode = this.field().hashCode(); hashCode += this.types.hashCode(); if (this.queryAnalyzer != null) hashCode += this.queryAnalyzer.hashCode(); if (this.indexAnalyzer != null) hashCode += this.indexAnalyzer.hashCode(); hashCode += Boolean.valueOf(preservePositionIncrements).hashCode(); return hashCode; } @Override public String toString() { ToStringBuilder toStringBuilder = new ToStringBuilder(this.getClass()) .add("field", this.field()); toStringBuilder.add("preservePositionIncrements", this.preservePositionIncrements); if (queryAnalyzer != null && queryAnalyzer.equals(indexAnalyzer)) { toStringBuilder.add("analyzer", this.queryAnalyzer); } else { if (queryAnalyzer != null) { toStringBuilder.add("queryAnalyzer", queryAnalyzer); } if (indexAnalyzer != null) { toStringBuilder.add("indexAnalyzer", indexAnalyzer); } } if (types.size() > 0) { toStringBuilder.add("types", Joiner.on("-").join(types)); } return toStringBuilder.toString(); } @Override public void readFrom(StreamInput in) throws IOException { field = in.readString(); queryAnalyzer = in.readOptionalString(); indexAnalyzer = in.readOptionalString(); types = (List<String>) in.readGenericValue(); preservePositionIncrements = in.readBoolean(); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(field); out.writeOptionalString(queryAnalyzer); out.writeOptionalString(indexAnalyzer); out.writeGenericValue(types); out.writeBoolean(preservePositionIncrements); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { //builder.startObject(field); builder.field("field", field); if (queryAnalyzer != null && queryAnalyzer.equals(indexAnalyzer)) { builder.field("analyzer", this.queryAnalyzer); } else { if (queryAnalyzer != null) builder.field("queryAnalyzer", queryAnalyzer); if (indexAnalyzer != null) builder.field("indexAnalyzer", indexAnalyzer); } if (!preservePositionIncrements) builder.field("preservePositionIncrements", preservePositionIncrements); if (types.size() > 0) builder.field("types", types()); //builder.endObject(); return builder; } } }