/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import java.io.IOException;
public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldData<AtomicOrdinalsFieldData> implements IndexOrdinalsFieldData {
private final double minFrequency, maxFrequency;
private final int minSegmentSize;
protected final CircuitBreakerService breakerService;
protected AbstractIndexOrdinalsFieldData(IndexSettings indexSettings, String fieldName,
IndexFieldDataCache cache, CircuitBreakerService breakerService,
double minFrequency, double maxFrequency, int minSegmentSize) {
super(indexSettings, fieldName, cache);
this.breakerService = breakerService;
this.minFrequency = minFrequency;
this.maxFrequency = maxFrequency;
this.minSegmentSize = minSegmentSize;
}
@Override
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
if (indexReader.leaves().size() <= 1) {
// ordinals are already global
return this;
}
boolean fieldFound = false;
for (LeafReaderContext context : indexReader.leaves()) {
if (context.reader().getFieldInfos().fieldInfo(getFieldName()) != null) {
fieldFound = true;
break;
}
}
if (fieldFound == false) {
// Some directory readers may be wrapped and report different set of fields and use the same cache key.
// If a field can't be found then it doesn't mean it isn't there,
// so if a field doesn't exist then we don't cache it and just return an empty field data instance.
// The next time the field is found, we do cache.
try {
return GlobalOrdinalsBuilder.buildEmpty(indexSettings, indexReader, this);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
try {
return cache.load(indexReader, this);
} catch (Exception e) {
if (e instanceof ElasticsearchException) {
throw (ElasticsearchException) e;
} else {
throw new ElasticsearchException(e);
}
}
}
@Override
public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
return GlobalOrdinalsBuilder.build(indexReader, this, indexSettings, breakerService, logger,
AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION);
}
@Override
protected AtomicOrdinalsFieldData empty(int maxDoc) {
return AbstractAtomicOrdinalsFieldData.empty();
}
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
if (iterator == null) {
return null;
}
int docCount = terms.getDocCount();
if (docCount == -1) {
docCount = reader.maxDoc();
}
if (docCount >= minSegmentSize) {
final int minFreq = minFrequency > 1.0
? (int) minFrequency
: (int)(docCount * minFrequency);
final int maxFreq = maxFrequency > 1.0
? (int) maxFrequency
: (int)(docCount * maxFrequency);
if (minFreq > 1 || maxFreq < docCount) {
iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
}
}
return iterator;
}
private static final class FrequencyFilter extends FilteredTermsEnum {
private int minFreq;
private int maxFreq;
FrequencyFilter(TermsEnum delegate, int minFreq, int maxFreq) {
super(delegate, false);
this.minFreq = minFreq;
this.maxFreq = maxFreq;
}
@Override
protected AcceptStatus accept(BytesRef arg0) throws IOException {
int docFreq = docFreq();
if (docFreq >= minFreq && docFreq <= maxFreq) {
return AcceptStatus.YES;
}
return AcceptStatus.NO;
}
}
}