Java Examples for org.apache.lucene.index.SortedDocValues
The following java examples will help you to understand the usage of org.apache.lucene.index.SortedDocValues. These source code samples are taken from different open source projects.
Example 1
Project: crate-master File: IpColumnReference.java View source code |
@Override public void setNextReader(LeafReaderContext context) throws IOException { SortedSetDocValues setDocValues = context.reader().getSortedSetDocValues(columnName); final SortedDocValues singleton = DocValues.unwrapSingleton(setDocValues); if (singleton != null) { values = singleton; } else { values = new SortedDocValues() { @Override public int getOrd(int docID) { setDocValues.setDocument(docID); int ord = (int) setDocValues.nextOrd(); if (setDocValues.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { throw new GroupByOnArrayUnsupportedException(columnName); } return ord; } @Override public BytesRef lookupOrd(int ord) { return setDocValues.lookupOrd(ord); } @Override public int getValueCount() { return (int) setDocValues.getValueCount(); } }; } }
Example 2
Project: elassandra-master File: ChildrenQuery.java View source code |
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSet parentsSet = parentFilter.getDocIdSet(context, null);
if (Lucene.isEmpty(parentsSet) || remaining == 0) {
return null;
}
// We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining"
// count down (short circuit) logic will then work as expected.
DocIdSetIterator parents = BitsFilteredDocIdSet.wrap(parentsSet, context.reader().getLiveDocs()).iterator();
if (parents != null) {
SortedDocValues bytesValues = collector.globalIfd.load(context).getOrdinalsValues(parentType);
if (bytesValues == null) {
return null;
}
if (minChildren > 0 || maxChildren != 0 || scoreType == ScoreType.NONE) {
switch(scoreType) {
case NONE:
DocIdSetIterator parentIdIterator = new CountParentOrdIterator(this, parents, collector, bytesValues, minChildren, maxChildren);
return ConstantScorer.create(parentIdIterator, this, queryWeight);
case AVG:
return new AvgParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren);
default:
return new ParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren);
}
}
switch(scoreType) {
case AVG:
return new AvgParentScorer(this, parents, collector, bytesValues);
default:
return new ParentScorer(this, parents, collector, bytesValues);
}
}
return null;
}
Example 3
Project: montysolr-master File: CitationLRUCache.java View source code |
/*
* Given the set of fields, we'll look inside them and retrieve (into memory)
* all values
*/
private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List<String> fields, KVSetter setter) throws IOException {
IndexSchema schema = searcher.getCore().getLatestSchema();
List<LeafReaderContext> leaves = searcher.getIndexReader().getContext().leaves();
Bits liveDocs;
LeafReader lr;
Transformer transformer;
for (LeafReaderContext leave : leaves) {
int docBase = leave.docBase;
liveDocs = leave.reader().getLiveDocs();
lr = leave.reader();
FieldInfos fInfo = lr.getFieldInfos();
for (String field : fields) {
FieldInfo fi = fInfo.fieldInfo(field);
if (fi == null) {
log.error("Field " + field + " has no schema entry; skipping it!");
continue;
}
SchemaField fSchema = schema.getField(field);
DocValuesType fType = fi.getDocValuesType();
Map<String, Type> mapping = new HashMap<String, Type>();
final LeafReader unReader;
if (fType.equals(DocValuesType.NONE)) {
Class<? extends DocValuesType> c = fType.getClass();
if (c.isAssignableFrom(TextField.class) || c.isAssignableFrom(StrField.class)) {
if (fSchema.multiValued()) {
mapping.put(field, Type.SORTED);
} else {
mapping.put(field, Type.BINARY);
}
} else if (c.isAssignableFrom(TrieIntField.class)) {
if (fSchema.multiValued()) {
mapping.put(field, Type.SORTED_SET_INTEGER);
} else {
mapping.put(field, Type.INTEGER_POINT);
}
} else {
continue;
}
unReader = new UninvertingReader(lr, mapping);
} else {
unReader = lr;
}
switch(fType) {
case NUMERIC:
transformer = new Transformer() {
NumericDocValues dv = unReader.getNumericDocValues(field);
@Override
public void process(int docBase, int docId) {
int v = (int) dv.get(docId);
setter.set(docBase, docId, v);
}
};
break;
case SORTED_NUMERIC:
transformer = new Transformer() {
SortedNumericDocValues dv = unReader.getSortedNumericDocValues(field);
@Override
public void process(int docBase, int docId) {
dv.setDocument(docId);
int max = dv.count();
int v;
for (int i = 0; i < max; i++) {
v = (int) dv.valueAt(i);
setter.set(docBase, docId, v);
}
}
};
break;
case SORTED_SET:
transformer = new Transformer() {
SortedSetDocValues dv = unReader.getSortedSetDocValues(field);
int errs = 0;
@Override
public void process(int docBase, int docId) {
if (errs > 5)
return;
dv.setDocument(docId);
for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) {
final BytesRef value = dv.lookupOrd(ord);
setter.set(docBase, docId, value.utf8ToString());
}
}
};
break;
case SORTED:
transformer = new Transformer() {
SortedDocValues dv = unReader.getSortedDocValues(field);
TermsEnum te;
@Override
public void process(int docBase, int docId) {
BytesRef v = dv.get(docId);
if (v.length == 0)
return;
setter.set(docBase, docId, v.utf8ToString());
}
};
break;
default:
throw new IllegalArgumentException("The field " + field + " is of type that cannot be un-inverted");
}
int i = 0;
while (i < lr.maxDoc()) {
if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
i++;
continue;
}
transformer.process(docBase, i);
i++;
}
}
}
}
Example 4
Project: elasticsearch-master File: MultiValueMode.java View source code |
@Override
protected int pick(SortedDocValues values, DocIdSetIterator docItr, int startDoc, int endDoc) throws IOException {
int ord = Integer.MAX_VALUE;
boolean hasValue = false;
for (int doc = startDoc; doc < endDoc; doc = docItr.nextDoc()) {
if (values.advanceExact(doc)) {
final int innerOrd = values.ordValue();
ord = Math.min(ord, innerOrd);
hasValue = true;
}
}
return hasValue ? ord : -1;
}
Example 5
Project: spatial-solr-sandbox-master File: GeometryOperationFilter.java View source code |
@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
AtomicReader areader = context.reader();
SortedDocValues sortedDocValues = areader.getSortedDocValues(fieldName);
if (sortedDocValues == null)
return null;
OpenBitSet bits = new OpenBitSet(areader.maxDoc());
BytesRef bytes = bstream.getBytesRef();
for (int docID = 0; docID < areader.maxDoc(); docID++) {
if (acceptDocs == null || acceptDocs.get(docID)) {
sortedDocValues.get(docID, bytes);
if (bytes.length > 0) {
try {
// likely the same
bstream.setBytesRef(bytes);
Geometry geo = wkbReader.read(bstream);
if (tester.matches(geo)) {
bits.set(docID);
}
} catch (ParseException ex) {
log.warn("error reading indexed geometry", ex);
}
}
}
}
return bits;
}
Example 6
Project: lucene-solr-master File: TestJoinUtil.java View source code |
public void testSimpleOrdinalsJoin() throws Exception { final String idField = "id"; final String productIdField = "productId"; // A field indicating to what type a document belongs, which is then used to distinques between documents during joining. final String typeField = "type"; // A single sorted doc values field that holds the join values for all document types. // Typically during indexing a schema will automatically create this field with the values final String joinField = idField + productIdField; Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); // 0 Document doc = new Document(); doc.add(new TextField(idField, "1", Field.Store.NO)); doc.add(new TextField(typeField, "product", Field.Store.NO)); doc.add(new TextField("description", "random text", Field.Store.NO)); doc.add(new TextField("name", "name1", Field.Store.NO)); doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); w.addDocument(doc); // 1 doc = new Document(); doc.add(new TextField(productIdField, "1", Field.Store.NO)); doc.add(new TextField(typeField, "price", Field.Store.NO)); doc.add(new TextField("price", "10.0", Field.Store.NO)); doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); w.addDocument(doc); // 2 doc = new Document(); doc.add(new TextField(productIdField, "1", Field.Store.NO)); doc.add(new TextField(typeField, "price", Field.Store.NO)); doc.add(new TextField("price", "20.0", Field.Store.NO)); doc.add(new SortedDocValuesField(joinField, new BytesRef("1"))); w.addDocument(doc); // 3 doc = new Document(); doc.add(new TextField(idField, "2", Field.Store.NO)); doc.add(new TextField(typeField, "product", Field.Store.NO)); doc.add(new TextField("description", "more random text", Field.Store.NO)); doc.add(new TextField("name", "name2", Field.Store.NO)); doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); w.addDocument(doc); w.commit(); // 4 doc = new Document(); doc.add(new TextField(productIdField, "2", Field.Store.NO)); doc.add(new TextField(typeField, "price", Field.Store.NO)); doc.add(new TextField("price", "10.0", Field.Store.NO)); doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); w.addDocument(doc); // 5 doc = new Document(); doc.add(new TextField(productIdField, "2", Field.Store.NO)); doc.add(new TextField(typeField, "price", Field.Store.NO)); doc.add(new TextField("price", "20.0", Field.Store.NO)); doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); w.addDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); w.close(); IndexReader r = indexSearcher.getIndexReader(); SortedDocValues[] values = new SortedDocValues[r.leaves().size()]; for (int i = 0; i < values.length; i++) { LeafReader leafReader = r.leaves().get(i).reader(); values[i] = DocValues.getSorted(leafReader, joinField); } MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT); Query toQuery = new TermQuery(new Term(typeField, "price")); Query fromQuery = new TermQuery(new Term("name", "name2")); // Search for product and return prices Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap); TopDocs result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(4, result.scoreDocs[0].doc); assertEquals(5, result.scoreDocs[1].doc); fromQuery = new TermQuery(new Term("name", "name1")); joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap); result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(1, result.scoreDocs[0].doc); assertEquals(2, result.scoreDocs[1].doc); // Search for prices and return products fromQuery = new TermQuery(new Term("price", "20.0")); toQuery = new TermQuery(new Term(typeField, "product")); joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap); result = indexSearcher.search(joinQuery, 10); assertEquals(2, result.totalHits); assertEquals(0, result.scoreDocs[0].doc); assertEquals(3, result.scoreDocs[1].doc); indexSearcher.getIndexReader().close(); dir.close(); }
Example 7
Project: simple-category-extraction-component-master File: CategoryExtractionComponent.java View source code |
@Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrIndexSearcher searcher = req.getSearcher();
SortedDocValues fieldValues = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), categoryField);
SolrParams params = req.getParams();
ModifiableSolrParams modParams = new ModifiableSolrParams(params);
String qStr = params.get(CommonParams.Q);
// tokenize the query string, if any part of it matches, remove the token from the list and
// add a filter query with <categoryField>:value
StringTokenizer strtok = new StringTokenizer(qStr, " .,:;\"'");
StringBuilder strbldr = new StringBuilder();
while (strtok.hasMoreTokens()) {
String tok = strtok.nextToken().toLowerCase();
Log.info("got token: " + tok);
BytesRef key = new BytesRef(tok.getBytes());
if (fieldValues.lookupTerm(key) >= 0) {
String fq = new String(categoryField + ":" + tok);
Log.info("adding fq " + fq);
modParams.add("fq", fq);
} else {
strbldr.append(tok);
if (strbldr.length() > 0) {
strbldr.append(" ");
}
}
}
String modQ = strbldr.toString();
// if the query is now empty, make sure it hits on everything
if (modQ.trim().length() == 0) {
modQ = "*:*";
}
Log.info("final q string is: '" + modQ + "'");
modParams.set("q", modQ);
req.setParams(modParams);
}
Example 8
Project: incubator-blur-master File: SecureAtomicReader.java View source code |
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { final SortedDocValues sortedDocValues = in.getSortedDocValues(field); if (sortedDocValues == null) { return null; } return new SortedDocValues() { @Override public void lookupOrd(int ord, BytesRef result) { sortedDocValues.lookupOrd(ord, result); } @Override public int getValueCount() { return sortedDocValues.getValueCount(); } @Override public int getOrd(int docID) { try { if (_accessControl.hasAccess(ReadType.SORTED_DOC_VALUE, docID)) { return sortedDocValues.getOrd(docID); } // Default missing value. return -1; } catch (IOException e) { throw new RuntimeException(e); } } }; }
Example 9
Project: heliosearch-master File: SimpleFacetsHS.java View source code |
/**
* Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>.
* The field must have at most one indexed token per document.
*/
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
// TODO: If the number of terms is high compared to docs.size(), and zeros==false,
// we should use an alternate strategy to avoid
// 1) creating another huge int[] for the counts
// 2) looping over that huge int[] looking for the rare non-zeros.
//
// Yet another variation: if docs.size() is small and termvectors are stored,
// then use them instead of the FieldCache.
//
// TODO: this function is too big and could use some refactoring, but
// we also need a facet cache, and refactoring of SimpleFacets instead of
// trying to pass all the various params around.
FieldType ft = searcher.getSchema().getFieldType(fieldName);
NamedList<Integer> res = new NamedList<Integer>();
SortedDocValues si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName);
final BytesRef br = new BytesRef();
final BytesRef prefixRef;
if (prefix == null) {
prefixRef = null;
} else if (prefix.length() == 0) {
prefix = null;
prefixRef = null;
} else {
prefixRef = new BytesRef(prefix);
}
int startTermIndex, endTermIndex;
if (prefix != null) {
startTermIndex = si.lookupTerm(prefixRef);
if (startTermIndex < 0)
startTermIndex = -startTermIndex - 1;
prefixRef.append(UnicodeUtil.BIG_TERM);
endTermIndex = si.lookupTerm(prefixRef);
assert endTermIndex < 0;
endTermIndex = -endTermIndex - 1;
} else {
startTermIndex = -1;
endTermIndex = si.getValueCount();
}
final int nTerms = endTermIndex - startTermIndex;
int missingCount = -1;
final CharsRef charsRef = new CharsRef(10);
if (nTerms > 0 && docs.size() >= mincount) {
// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = new int[nTerms];
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int term = si.getOrd(iter.nextDoc());
int arrIdx = term - startTermIndex;
if (arrIdx >= 0 && arrIdx < nTerms)
counts[arrIdx]++;
}
if (startTermIndex == -1) {
missingCount = counts[0];
}
// IDEA: we could also maintain a count of "other"... everything that fell outside
// of the top 'N'
int off = offset;
int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
maxsize = Math.min(maxsize, nTerms);
LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);
// the smallest value in the top 'N' values
int min = mincount - 1;
for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
int c = counts[i];
if (c > min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// smaller term numbers sort higher, so subtract the term number instead
long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
boolean displaced = queue.insert(pair);
if (displaced)
min = (int) (queue.top() >>> 32);
}
}
// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
assert collectCount <= lim;
// the start and end indexes of our list "sorted" (starting with the highest value)
int sortedIdxStart = queue.size() - (collectCount - 1);
int sortedIdxEnd = queue.size() + 1;
final long[] sorted = queue.sort(collectCount);
for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
long pair = sorted[i];
int c = (int) (pair >>> 32);
int tnum = Integer.MAX_VALUE - (int) pair;
si.lookupOrd(startTermIndex + tnum, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}
} else {
// add results in index order
int i = (startTermIndex == -1) ? 1 : 0;
if (mincount <= 0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i += off;
off = 0;
}
for (; i < nTerms; i++) {
int c = counts[i];
if (c < mincount || --off >= 0)
continue;
if (--lim < 0)
break;
si.lookupOrd(startTermIndex + i, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}
}
}
if (missing) {
if (missingCount < 0) {
missingCount = getFieldMissingCount(searcher, docs, fieldName);
}
res.add(null, missingCount);
}
return res;
}
Example 10
Project: clue-master File: DocValCommand.java View source code |
private void showDocId(int docid, int docBase, Object docVals, DocValuesType docValType, BytesRef bytesRef, PrintStream out, int segmentid) throws Exception { int subid = docid - docBase; if (docVals != null) { String val; switch(docValType) { case NUMERIC: NumericDocValues dv = (NumericDocValues) docVals; val = String.valueOf(dv.get(subid)); break; case BINARY: BinaryDocValues bv = (BinaryDocValues) docVals; bytesRef = bv.get(subid); val = bytesRef.utf8ToString(); break; case SORTED: { SortedDocValues sv = (SortedDocValues) docVals; bytesRef = sv.get(subid); StringBuilder sb = new StringBuilder(); sb.append(NUM_TERMS_IN_FIELD).append(sv.getValueCount()).append(", "); sb.append("value: ["); sb.append(bytesRef.utf8ToString()); sb.append("]"); val = sb.toString(); break; } case SORTED_SET: { SortedSetDocValues sv = (SortedSetDocValues) docVals; sv.setDocument(subid); long nextOrd; long count = sv.getValueCount(); StringBuilder sb = new StringBuilder(); sb.append(NUM_TERMS_IN_FIELD).append(count).append(", "); sb.append("values: ["); boolean firstPass = true; while ((nextOrd = sv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { bytesRef = sv.lookupOrd(nextOrd); if (!firstPass) { sb.append(", "); } sb.append(bytesRef.utf8ToString()); firstPass = false; } sb.append("]"); val = sb.toString(); break; } case SORTED_NUMERIC: { SortedNumericDocValues sv = (SortedNumericDocValues) docVals; sv.setDocument(subid); int count = sv.count(); StringBuilder sb = new StringBuilder(); sb.append(NUM_TERMS_IN_FIELD).append(count).append(", "); sb.append("values: ["); boolean firstPass = true; for (int i = 0; i < count; ++i) { long nextVal = sv.valueAt(i); if (!firstPass) { sb.append(", "); } sb.append(String.valueOf(nextVal)); firstPass = false; } sb.append("]"); val = sb.toString(); break; } default: val = null; } if (val == null) { out.println("cannot read doc value type: " + docValType); } else { out.println("type: " + docValType + ", val: " + val + ", segment: " + segmentid + ", docid: " + docid + ", subid: " + subid); } } else { out.println("doc value unavailable"); } }
Example 11
Project: bobo-master File: DocComparatorSource.java View source code |
@Override
public DocComparator getComparator(AtomicReader reader, int docbase) throws IOException {
final SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
return new DocComparator() {
@Override
public int compare(ScoreDoc doc1, ScoreDoc doc2) {
return values.getOrd(doc1.doc) - values.getOrd(doc2.doc);
}
@Override
public String value(ScoreDoc doc) {
int ord = values.getOrd(doc.doc);
BytesRef term = new BytesRef();
values.lookupOrd(ord, term);
return term.utf8ToString();
}
};
}
Example 12
Project: stargate-core-master File: IndexEntryCollector.java View source code |
IndexEntry getIndexEntry(int slot, int doc, float score) throws IOException {
String pkName = LuceneUtils.primaryKeyName(pkNames, doc);
ByteBuffer primaryKey = LuceneUtils.byteBufferDocValue(primaryKeys, doc);
ByteBuffer rowKey = LuceneUtils.byteBufferDocValue(rowKeys, doc);
Map<String, Number> numericDocValues = new HashMap<>();
Map<String, String> binaryDocValues = new HashMap<>();
for (Map.Entry<String, NumericDocValues> entry : numericDocValuesMap.entrySet()) {
Type type = AggregateFunction.getLuceneType(options, entry.getKey());
Number number = LuceneUtils.numericDocValue(entry.getValue(), doc, type);
numericDocValues.put(entry.getKey(), number);
}
for (Map.Entry<String, SortedDocValues> entry : stringDocValues.entrySet()) {
binaryDocValues.put(entry.getKey(), LuceneUtils.stringDocValue(entry.getValue(), doc));
}
return new IndexEntry(rowKey, pkName, primaryKey, slot, docBase + doc, score, numericDocValues, binaryDocValues);
}
Example 13
Project: elk-master File: EmptyReader.java View source code |
@Override
public SortedDocValues getSortedDocValues(final String field) throws IOException {
return null;
}