Java Examples for org.apache.lucene.index.SortedDocValues
The following java examples will help you to understand the usage of org.apache.lucene.index.SortedDocValues. These source code samples are taken from different open source projects.
Example 1
| Project: crate-master File: IpColumnReference.java View source code |
@Override
public void setNextReader(LeafReaderContext context) throws IOException {
SortedSetDocValues setDocValues = context.reader().getSortedSetDocValues(columnName);
final SortedDocValues singleton = DocValues.unwrapSingleton(setDocValues);
if (singleton != null) {
values = singleton;
} else {
values = new SortedDocValues() {
@Override
public int getOrd(int docID) {
setDocValues.setDocument(docID);
int ord = (int) setDocValues.nextOrd();
if (setDocValues.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
throw new GroupByOnArrayUnsupportedException(columnName);
}
return ord;
}
@Override
public BytesRef lookupOrd(int ord) {
return setDocValues.lookupOrd(ord);
}
@Override
public int getValueCount() {
return (int) setDocValues.getValueCount();
}
};
}
}Example 2
| Project: elassandra-master File: ChildrenQuery.java View source code |
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSet parentsSet = parentFilter.getDocIdSet(context, null);
if (Lucene.isEmpty(parentsSet) || remaining == 0) {
return null;
}
// We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining"
// count down (short circuit) logic will then work as expected.
DocIdSetIterator parents = BitsFilteredDocIdSet.wrap(parentsSet, context.reader().getLiveDocs()).iterator();
if (parents != null) {
SortedDocValues bytesValues = collector.globalIfd.load(context).getOrdinalsValues(parentType);
if (bytesValues == null) {
return null;
}
if (minChildren > 0 || maxChildren != 0 || scoreType == ScoreType.NONE) {
switch(scoreType) {
case NONE:
DocIdSetIterator parentIdIterator = new CountParentOrdIterator(this, parents, collector, bytesValues, minChildren, maxChildren);
return ConstantScorer.create(parentIdIterator, this, queryWeight);
case AVG:
return new AvgParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren);
default:
return new ParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren);
}
}
switch(scoreType) {
case AVG:
return new AvgParentScorer(this, parents, collector, bytesValues);
default:
return new ParentScorer(this, parents, collector, bytesValues);
}
}
return null;
}Example 3
| Project: montysolr-master File: CitationLRUCache.java View source code |
/*
* Given the set of fields, we'll look inside them and retrieve (into memory)
* all values
*/
private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List<String> fields, KVSetter setter) throws IOException {
IndexSchema schema = searcher.getCore().getLatestSchema();
List<LeafReaderContext> leaves = searcher.getIndexReader().getContext().leaves();
Bits liveDocs;
LeafReader lr;
Transformer transformer;
for (LeafReaderContext leave : leaves) {
int docBase = leave.docBase;
liveDocs = leave.reader().getLiveDocs();
lr = leave.reader();
FieldInfos fInfo = lr.getFieldInfos();
for (String field : fields) {
FieldInfo fi = fInfo.fieldInfo(field);
if (fi == null) {
log.error("Field " + field + " has no schema entry; skipping it!");
continue;
}
SchemaField fSchema = schema.getField(field);
DocValuesType fType = fi.getDocValuesType();
Map<String, Type> mapping = new HashMap<String, Type>();
final LeafReader unReader;
if (fType.equals(DocValuesType.NONE)) {
Class<? extends DocValuesType> c = fType.getClass();
if (c.isAssignableFrom(TextField.class) || c.isAssignableFrom(StrField.class)) {
if (fSchema.multiValued()) {
mapping.put(field, Type.SORTED);
} else {
mapping.put(field, Type.BINARY);
}
} else if (c.isAssignableFrom(TrieIntField.class)) {
if (fSchema.multiValued()) {
mapping.put(field, Type.SORTED_SET_INTEGER);
} else {
mapping.put(field, Type.INTEGER_POINT);
}
} else {
continue;
}
unReader = new UninvertingReader(lr, mapping);
} else {
unReader = lr;
}
switch(fType) {
case NUMERIC:
transformer = new Transformer() {
NumericDocValues dv = unReader.getNumericDocValues(field);
@Override
public void process(int docBase, int docId) {
int v = (int) dv.get(docId);
setter.set(docBase, docId, v);
}
};
break;
case SORTED_NUMERIC:
transformer = new Transformer() {
SortedNumericDocValues dv = unReader.getSortedNumericDocValues(field);
@Override
public void process(int docBase, int docId) {
dv.setDocument(docId);
int max = dv.count();
int v;
for (int i = 0; i < max; i++) {
v = (int) dv.valueAt(i);
setter.set(docBase, docId, v);
}
}
};
break;
case SORTED_SET:
transformer = new Transformer() {
SortedSetDocValues dv = unReader.getSortedSetDocValues(field);
int errs = 0;
@Override
public void process(int docBase, int docId) {
if (errs > 5)
return;
dv.setDocument(docId);
for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) {
final BytesRef value = dv.lookupOrd(ord);
setter.set(docBase, docId, value.utf8ToString());
}
}
};
break;
case SORTED:
transformer = new Transformer() {
SortedDocValues dv = unReader.getSortedDocValues(field);
TermsEnum te;
@Override
public void process(int docBase, int docId) {
BytesRef v = dv.get(docId);
if (v.length == 0)
return;
setter.set(docBase, docId, v.utf8ToString());
}
};
break;
default:
throw new IllegalArgumentException("The field " + field + " is of type that cannot be un-inverted");
}
int i = 0;
while (i < lr.maxDoc()) {
if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
i++;
continue;
}
transformer.process(docBase, i);
i++;
}
}
}
}Example 4
| Project: elasticsearch-master File: MultiValueMode.java View source code |
@Override
protected int pick(SortedDocValues values, DocIdSetIterator docItr, int startDoc, int endDoc) throws IOException {
int ord = Integer.MAX_VALUE;
boolean hasValue = false;
for (int doc = startDoc; doc < endDoc; doc = docItr.nextDoc()) {
if (values.advanceExact(doc)) {
final int innerOrd = values.ordValue();
ord = Math.min(ord, innerOrd);
hasValue = true;
}
}
return hasValue ? ord : -1;
}Example 5
| Project: spatial-solr-sandbox-master File: GeometryOperationFilter.java View source code |
@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
AtomicReader areader = context.reader();
SortedDocValues sortedDocValues = areader.getSortedDocValues(fieldName);
if (sortedDocValues == null)
return null;
OpenBitSet bits = new OpenBitSet(areader.maxDoc());
BytesRef bytes = bstream.getBytesRef();
for (int docID = 0; docID < areader.maxDoc(); docID++) {
if (acceptDocs == null || acceptDocs.get(docID)) {
sortedDocValues.get(docID, bytes);
if (bytes.length > 0) {
try {
// likely the same
bstream.setBytesRef(bytes);
Geometry geo = wkbReader.read(bstream);
if (tester.matches(geo)) {
bits.set(docID);
}
} catch (ParseException ex) {
log.warn("error reading indexed geometry", ex);
}
}
}
}
return bits;
}Example 6
| Project: lucene-solr-master File: TestJoinUtil.java View source code |
public void testSimpleOrdinalsJoin() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
IndexReader r = indexSearcher.getIndexReader();
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
Query toQuery = new TermQuery(new Term(typeField, "price"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
// Search for product and return prices
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
fromQuery = new TermQuery(new Term("name", "name1"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for prices and return products
fromQuery = new TermQuery(new Term("price", "20.0"));
toQuery = new TermQuery(new Term(typeField, "product"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
indexSearcher.getIndexReader().close();
dir.close();
}Example 7
| Project: simple-category-extraction-component-master File: CategoryExtractionComponent.java View source code |
@Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrIndexSearcher searcher = req.getSearcher();
SortedDocValues fieldValues = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), categoryField);
SolrParams params = req.getParams();
ModifiableSolrParams modParams = new ModifiableSolrParams(params);
String qStr = params.get(CommonParams.Q);
// tokenize the query string, if any part of it matches, remove the token from the list and
// add a filter query with <categoryField>:value
StringTokenizer strtok = new StringTokenizer(qStr, " .,:;\"'");
StringBuilder strbldr = new StringBuilder();
while (strtok.hasMoreTokens()) {
String tok = strtok.nextToken().toLowerCase();
Log.info("got token: " + tok);
BytesRef key = new BytesRef(tok.getBytes());
if (fieldValues.lookupTerm(key) >= 0) {
String fq = new String(categoryField + ":" + tok);
Log.info("adding fq " + fq);
modParams.add("fq", fq);
} else {
strbldr.append(tok);
if (strbldr.length() > 0) {
strbldr.append(" ");
}
}
}
String modQ = strbldr.toString();
// if the query is now empty, make sure it hits on everything
if (modQ.trim().length() == 0) {
modQ = "*:*";
}
Log.info("final q string is: '" + modQ + "'");
modParams.set("q", modQ);
req.setParams(modParams);
}Example 8
| Project: incubator-blur-master File: SecureAtomicReader.java View source code |
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { final SortedDocValues sortedDocValues = in.getSortedDocValues(field); if (sortedDocValues == null) { return null; } return new SortedDocValues() { @Override public void lookupOrd(int ord, BytesRef result) { sortedDocValues.lookupOrd(ord, result); } @Override public int getValueCount() { return sortedDocValues.getValueCount(); } @Override public int getOrd(int docID) { try { if (_accessControl.hasAccess(ReadType.SORTED_DOC_VALUE, docID)) { return sortedDocValues.getOrd(docID); } // Default missing value. return -1; } catch (IOException e) { throw new RuntimeException(e); } } }; }
Example 9
| Project: heliosearch-master File: SimpleFacetsHS.java View source code |
/**
* Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>.
* The field must have at most one indexed token per document.
*/
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
// TODO: If the number of terms is high compared to docs.size(), and zeros==false,
// we should use an alternate strategy to avoid
// 1) creating another huge int[] for the counts
// 2) looping over that huge int[] looking for the rare non-zeros.
//
// Yet another variation: if docs.size() is small and termvectors are stored,
// then use them instead of the FieldCache.
//
// TODO: this function is too big and could use some refactoring, but
// we also need a facet cache, and refactoring of SimpleFacets instead of
// trying to pass all the various params around.
FieldType ft = searcher.getSchema().getFieldType(fieldName);
NamedList<Integer> res = new NamedList<Integer>();
SortedDocValues si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName);
final BytesRef br = new BytesRef();
final BytesRef prefixRef;
if (prefix == null) {
prefixRef = null;
} else if (prefix.length() == 0) {
prefix = null;
prefixRef = null;
} else {
prefixRef = new BytesRef(prefix);
}
int startTermIndex, endTermIndex;
if (prefix != null) {
startTermIndex = si.lookupTerm(prefixRef);
if (startTermIndex < 0)
startTermIndex = -startTermIndex - 1;
prefixRef.append(UnicodeUtil.BIG_TERM);
endTermIndex = si.lookupTerm(prefixRef);
assert endTermIndex < 0;
endTermIndex = -endTermIndex - 1;
} else {
startTermIndex = -1;
endTermIndex = si.getValueCount();
}
final int nTerms = endTermIndex - startTermIndex;
int missingCount = -1;
final CharsRef charsRef = new CharsRef(10);
if (nTerms > 0 && docs.size() >= mincount) {
// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = new int[nTerms];
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int term = si.getOrd(iter.nextDoc());
int arrIdx = term - startTermIndex;
if (arrIdx >= 0 && arrIdx < nTerms)
counts[arrIdx]++;
}
if (startTermIndex == -1) {
missingCount = counts[0];
}
// IDEA: we could also maintain a count of "other"... everything that fell outside
// of the top 'N'
int off = offset;
int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
maxsize = Math.min(maxsize, nTerms);
LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);
// the smallest value in the top 'N' values
int min = mincount - 1;
for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
int c = counts[i];
if (c > min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// smaller term numbers sort higher, so subtract the term number instead
long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
boolean displaced = queue.insert(pair);
if (displaced)
min = (int) (queue.top() >>> 32);
}
}
// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
assert collectCount <= lim;
// the start and end indexes of our list "sorted" (starting with the highest value)
int sortedIdxStart = queue.size() - (collectCount - 1);
int sortedIdxEnd = queue.size() + 1;
final long[] sorted = queue.sort(collectCount);
for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
long pair = sorted[i];
int c = (int) (pair >>> 32);
int tnum = Integer.MAX_VALUE - (int) pair;
si.lookupOrd(startTermIndex + tnum, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}
} else {
// add results in index order
int i = (startTermIndex == -1) ? 1 : 0;
if (mincount <= 0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i += off;
off = 0;
}
for (; i < nTerms; i++) {
int c = counts[i];
if (c < mincount || --off >= 0)
continue;
if (--lim < 0)
break;
si.lookupOrd(startTermIndex + i, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}
}
}
if (missing) {
if (missingCount < 0) {
missingCount = getFieldMissingCount(searcher, docs, fieldName);
}
res.add(null, missingCount);
}
return res;
}Example 10
| Project: clue-master File: DocValCommand.java View source code |
private void showDocId(int docid, int docBase, Object docVals, DocValuesType docValType, BytesRef bytesRef, PrintStream out, int segmentid) throws Exception {
int subid = docid - docBase;
if (docVals != null) {
String val;
switch(docValType) {
case NUMERIC:
NumericDocValues dv = (NumericDocValues) docVals;
val = String.valueOf(dv.get(subid));
break;
case BINARY:
BinaryDocValues bv = (BinaryDocValues) docVals;
bytesRef = bv.get(subid);
val = bytesRef.utf8ToString();
break;
case SORTED:
{
SortedDocValues sv = (SortedDocValues) docVals;
bytesRef = sv.get(subid);
StringBuilder sb = new StringBuilder();
sb.append(NUM_TERMS_IN_FIELD).append(sv.getValueCount()).append(", ");
sb.append("value: [");
sb.append(bytesRef.utf8ToString());
sb.append("]");
val = sb.toString();
break;
}
case SORTED_SET:
{
SortedSetDocValues sv = (SortedSetDocValues) docVals;
sv.setDocument(subid);
long nextOrd;
long count = sv.getValueCount();
StringBuilder sb = new StringBuilder();
sb.append(NUM_TERMS_IN_FIELD).append(count).append(", ");
sb.append("values: [");
boolean firstPass = true;
while ((nextOrd = sv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
bytesRef = sv.lookupOrd(nextOrd);
if (!firstPass) {
sb.append(", ");
}
sb.append(bytesRef.utf8ToString());
firstPass = false;
}
sb.append("]");
val = sb.toString();
break;
}
case SORTED_NUMERIC:
{
SortedNumericDocValues sv = (SortedNumericDocValues) docVals;
sv.setDocument(subid);
int count = sv.count();
StringBuilder sb = new StringBuilder();
sb.append(NUM_TERMS_IN_FIELD).append(count).append(", ");
sb.append("values: [");
boolean firstPass = true;
for (int i = 0; i < count; ++i) {
long nextVal = sv.valueAt(i);
if (!firstPass) {
sb.append(", ");
}
sb.append(String.valueOf(nextVal));
firstPass = false;
}
sb.append("]");
val = sb.toString();
break;
}
default:
val = null;
}
if (val == null) {
out.println("cannot read doc value type: " + docValType);
} else {
out.println("type: " + docValType + ", val: " + val + ", segment: " + segmentid + ", docid: " + docid + ", subid: " + subid);
}
} else {
out.println("doc value unavailable");
}
}Example 11
| Project: bobo-master File: DocComparatorSource.java View source code |
@Override
public DocComparator getComparator(AtomicReader reader, int docbase) throws IOException {
final SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
return new DocComparator() {
@Override
public int compare(ScoreDoc doc1, ScoreDoc doc2) {
return values.getOrd(doc1.doc) - values.getOrd(doc2.doc);
}
@Override
public String value(ScoreDoc doc) {
int ord = values.getOrd(doc.doc);
BytesRef term = new BytesRef();
values.lookupOrd(ord, term);
return term.utf8ToString();
}
};
}Example 12
| Project: stargate-core-master File: IndexEntryCollector.java View source code |
IndexEntry getIndexEntry(int slot, int doc, float score) throws IOException {
String pkName = LuceneUtils.primaryKeyName(pkNames, doc);
ByteBuffer primaryKey = LuceneUtils.byteBufferDocValue(primaryKeys, doc);
ByteBuffer rowKey = LuceneUtils.byteBufferDocValue(rowKeys, doc);
Map<String, Number> numericDocValues = new HashMap<>();
Map<String, String> binaryDocValues = new HashMap<>();
for (Map.Entry<String, NumericDocValues> entry : numericDocValuesMap.entrySet()) {
Type type = AggregateFunction.getLuceneType(options, entry.getKey());
Number number = LuceneUtils.numericDocValue(entry.getValue(), doc, type);
numericDocValues.put(entry.getKey(), number);
}
for (Map.Entry<String, SortedDocValues> entry : stringDocValues.entrySet()) {
binaryDocValues.put(entry.getKey(), LuceneUtils.stringDocValue(entry.getValue(), doc));
}
return new IndexEntry(rowKey, pkName, primaryKey, slot, docBase + doc, score, numericDocValues, binaryDocValues);
}Example 13
| Project: elk-master File: EmptyReader.java View source code |
@Override
public SortedDocValues getSortedDocValues(final String field) throws IOException {
return null;
}