/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.field.data.support;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.StringHelper;
import org.elasticsearch.index.field.data.FieldData;
import java.io.IOException;
import java.util.ArrayList;
/**
*
*/
public class FieldDataLoader {
@SuppressWarnings({"StringEquality"})
public static <T extends FieldData> T load(IndexReader reader, String field, TypeLoader<T> loader) throws IOException {
loader.init();
field = StringHelper.intern(field);
ArrayList<int[]> ordinals = new ArrayList<int[]>();
int[] idx = new int[reader.maxDoc()];
ordinals.add(new int[reader.maxDoc()]);
int t = 1; // current term number
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms(new Term(field));
try {
// bulk read (in lucene 4 it won't be needed).
int size = Math.min(128, reader.maxDoc());
int[] docs = new int[size];
int[] freqs = new int[size];
do {
Term term = termEnum.term();
if (term == null || term.field() != field) break;
loader.collectTerm(term.text());
termDocs.seek(termEnum);
int number = termDocs.read(docs, freqs);
while (number > 0) {
for (int i = 0; i < number; i++) {
int doc = docs[i];
int[] ordinal;
if (idx[doc] >= ordinals.size()) {
ordinal = new int[reader.maxDoc()];
ordinals.add(ordinal);
} else {
ordinal = ordinals.get(idx[doc]);
}
ordinal[doc] = t;
idx[doc]++;
}
number = termDocs.read(docs, freqs);
}
t++;
} while (termEnum.next());
} catch (RuntimeException e) {
if (e.getClass().getName().endsWith("StopFillCacheException")) {
// all is well, in case numeric parsers are used.
} else {
throw e;
}
} finally {
termDocs.close();
termEnum.close();
}
if (ordinals.size() == 1) {
return loader.buildSingleValue(field, ordinals.get(0));
} else {
int[][] nativeOrdinals = new int[ordinals.size()][];
for (int i = 0; i < nativeOrdinals.length; i++) {
nativeOrdinals[i] = ordinals.get(i);
}
return loader.buildMultiValue(field, nativeOrdinals);
}
}
public static interface TypeLoader<T extends FieldData> {
void init();
void collectTerm(String term);
T buildSingleValue(String fieldName, int[] ordinals);
T buildMultiValue(String fieldName, int[][] ordinals);
}
public static abstract class FreqsTypeLoader<T extends FieldData> implements TypeLoader<T> {
protected FreqsTypeLoader() {
}
@Override
public void init() {
}
}
}