/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.fielddata.plain; import org.apache.lucene.index.*; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRefBuilder; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder; import org.elasticsearch.index.mapper.MappedFieldType.Names; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.search.MultiValueMode; import java.io.IOException; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldData<AtomicOrdinalsFieldData> implements IndexOrdinalsFieldData { protected Settings frequency; protected Settings regex; protected final CircuitBreakerService breakerService; protected AbstractIndexOrdinalsFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache, CircuitBreakerService breakerService) { super(index, indexSettings, fieldNames, fieldDataType, cache); final Map<String, Settings> groups = fieldDataType.getSettings().getGroups("filter"); frequency = groups.get("frequency"); regex = groups.get("regex"); this.breakerService = breakerService; } @Override public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) { return new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested); } @Override public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) { if (indexReader.leaves().size() <= 1) { // ordinals are already global return this; } boolean fieldFound = false; for (LeafReaderContext context : indexReader.leaves()) { if (context.reader().getFieldInfos().fieldInfo(getFieldNames().indexName()) != null) { fieldFound = true; break; } } if (fieldFound == false) { // Some directory readers may be wrapped and report different set of fields and use the same cache key. // If a field can't be found then it doesn't mean it isn't there, // so if a field doesn't exist then we don't cache it and just return an empty field data instance. // The next time the field is found, we do cache. try { return GlobalOrdinalsBuilder.buildEmpty(index(), indexSettings(), indexReader, this); } catch (IOException e) { throw new RuntimeException(e); } } try { return cache.load(indexReader, this); } catch (Throwable e) { if (e instanceof ElasticsearchException) { throw (ElasticsearchException) e; } else { throw new ElasticsearchException(e.getMessage(), e); } } } @Override public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception { return GlobalOrdinalsBuilder.build(indexReader, this, indexSettings(), breakerService, logger); } @Override protected AtomicOrdinalsFieldData empty(int maxDoc) { return AbstractAtomicOrdinalsFieldData.empty(); } protected TermsEnum filter(Terms terms, LeafReader reader) throws IOException { TermsEnum iterator = terms.iterator(); if (iterator == null) { return null; } if (iterator != null && frequency != null) { iterator = FrequencyFilter.filter(iterator, terms, reader, frequency); } if (iterator != null && regex != null) { iterator = RegexFilter.filter(iterator, terms, reader, regex); } return iterator; } private static final class FrequencyFilter extends FilteredTermsEnum { private int minFreq; private int maxFreq; public FrequencyFilter(TermsEnum delegate, int minFreq, int maxFreq) { super(delegate, false); this.minFreq = minFreq; this.maxFreq = maxFreq; } public static TermsEnum filter(TermsEnum toFilter, Terms terms, LeafReader reader, Settings settings) throws IOException { int docCount = terms.getDocCount(); if (docCount == -1) { docCount = reader.maxDoc(); } final double minFrequency = settings.getAsDouble("min", 0d); final double maxFrequency = settings.getAsDouble("max", docCount+1d); final double minSegmentSize = settings.getAsInt("min_segment_size", 0); if (minSegmentSize < docCount) { final int minFreq = minFrequency > 1.0? (int) minFrequency : (int)(docCount * minFrequency); final int maxFreq = maxFrequency > 1.0? (int) maxFrequency : (int)(docCount * maxFrequency); assert minFreq < maxFreq; return new FrequencyFilter(toFilter, minFreq, maxFreq); } return toFilter; } @Override protected AcceptStatus accept(BytesRef arg0) throws IOException { int docFreq = docFreq(); if (docFreq >= minFreq && docFreq <= maxFreq) { return AcceptStatus.YES; } return AcceptStatus.NO; } } private static final class RegexFilter extends FilteredTermsEnum { private final Matcher matcher; private final CharsRefBuilder spare = new CharsRefBuilder(); public RegexFilter(TermsEnum delegate, Matcher matcher) { super(delegate, false); this.matcher = matcher; } public static TermsEnum filter(TermsEnum iterator, Terms terms, LeafReader reader, Settings regex) { String pattern = regex.get("pattern"); if (pattern == null) { return iterator; } Pattern p = Pattern.compile(pattern); return new RegexFilter(iterator, p.matcher("")); } @Override protected AcceptStatus accept(BytesRef arg0) throws IOException { spare.copyUTF8Bytes(arg0); matcher.reset(spare.get()); if (matcher.matches()) { return AcceptStatus.YES; } return AcceptStatus.NO; } } }