package org.apache.solr.search.field;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.solr.core.HS;
import org.apache.solr.search.SolrIndexSearcher;
import java.io.IOException;
public class StrTopValues extends TopValues {
public StrTopValues(StrFieldValues StrFieldValues) {
super(StrFieldValues);
}
@Override
public StrLeafValues createValue(TopValues topValues, CreationLeafValue create, AtomicReaderContext readerContext) throws IOException {
AtomicReader reader = readerContext.reader();
final int maxDoc = readerContext.reader().maxDoc();
Terms terms = reader.terms(topValues.fieldValues.getFieldName());
if (terms == null) {
return new Str0Values(topValues.fieldValues, new StrFieldStats());
}
StrFieldStats stats = new StrFieldStats();
NativePagedBytes bytes = new NativePagedBytes(15);
LongArray docToOrd = null;
long termBytes = 0;
final int termCountHardLimit;
if (maxDoc >= Integer.MAX_VALUE - 1) {
termCountHardLimit = Integer.MAX_VALUE - 1;
} else {
termCountHardLimit = maxDoc + 1;
}
long numUniqueTerms = terms.size();
if (numUniqueTerms != -1L) { // TODO: which codecs don't provide the number of terms???
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
} else {
numUniqueTerms = termCountHardLimit;
}
int bitsRequired = PackedInts.bitsRequired(numUniqueTerms) + 1; // add one since we aren't using unsigned values for ords... (i.e. we would need to convert to unsigned or bias the values)
// TODO: a good test that reliably fails if we didn't add 1 here! CursorPagingTest is the only one that does fail.
docToOrd = LongArray.create(maxDoc, bitsRequired);
int termOrd = 0;
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
if (termOrd >= termCountHardLimit) {
break;
}
termOrd++;
bytes.copyUsingLengthPrefix(term);
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
docToOrd.setLong(docID, termOrd);
}
}
stats.numUniqueValues = termOrd;
long termBytesLength = bytes.getUsedSize();
termBytes = bytes.buildSingleArray();
assert termBytesLength == HS.arraySizeBytes(termBytes);
bytes.close(); // close early before building offset array to lower memory requirements
MonotonicLongArray.Tracker tracker = new MonotonicLongArray.Tracker(termOrd, termBytesLength);
long pos = 0;
for (int i=0; i<termOrd; i++) {
assert pos < termBytesLength;
tracker.add(i, pos);
int len = NativePagedBytes.getEntrySize(termBytes, pos);
pos += len;
}
LongArray offsets = tracker.createArray();
pos = 0;
for (int i=0; i<termOrd; i++) {
assert pos < termBytesLength;
offsets.setLong(i, pos);
int len = NativePagedBytes.getEntrySize(termBytes, pos);
pos += len;
}
return new StrArrLeafValues(topValues.fieldValues, docToOrd, offsets, termBytes, stats);
}
/***
public static class OrdIndexBuilder {
StrFieldStats stats = new StrFieldStats();
NativePagedBytes termBytesPaged = new NativePagedBytes(15);
long termOrd = 0;
// Results of the build.
public LongArray docToOrd;
public long termBytesArr;
public LongArray offsets;
public void addTerm(BytesRef term) throws IOException {
termOrd++;
termBytesPaged.copyUsingLengthPrefix(term);
}
public void addDoc(int doc) throws IOException {
docToOrd.setLong(doc, termOrd);
}
public long getNumOrds() {
return termOrd;
}
public void build() throws IOException {
stats.numUniqueValues = termOrd;
long termBytesLength = termBytesPaged.getUsedSize();
termBytesArr = termBytesPaged.buildSingleArray();
assert termBytesLength == HS.arraySizeBytes(termBytesArr);
termBytesPaged.close(); // close early before building offset array to lower memory requirements
termBytesPaged = null;
MonotonicLongArray.Tracker tracker = new MonotonicLongArray.Tracker(termOrd, termBytesLength);
long pos = 0;
for (int i=0; i<termOrd; i++) {
assert pos < termBytesLength;
tracker.add(i, pos);
int len = NativePagedBytes.getEntrySize(termBytesArr, pos);
assert len > 0;
pos += len;
}
LongArray offsets = tracker.createArray();
pos = 0;
for (int i=0; i<termOrd; i++) {
assert pos < termBytesLength;
offsets.setLong(i, pos);
int len = NativePagedBytes.getEntrySize(termBytesArr, pos);
pos += len;
}
}
public StrLeafValues buildValues(FieldValues fieldValues) throws IOException {
build();
return new StrArrLeafValues(fieldValues, docToOrd, offsets, termBytesArr, stats);
}
}
***/
@Override
public StrTopValues create(SolrIndexSearcher.WarmContext warmContext) {
StrTopValues tv = new StrTopValues((StrFieldValues)fieldValues);
tv.create(warmContext, this);
return tv;
}
}