Java Examples for org.apache.lucene.index.SortedDocValues

The following java examples will help you to understand the usage of org.apache.lucene.index.SortedDocValues. These source code samples are taken from different open source projects.

Example 1
Project: crate-master  File: View source code
public void setNextReader(LeafReaderContext context) throws IOException {
    SortedSetDocValues setDocValues = context.reader().getSortedSetDocValues(columnName);
    final SortedDocValues singleton = DocValues.unwrapSingleton(setDocValues);
    if (singleton != null) {
        values = singleton;
    } else {
        values = new SortedDocValues() {

            public int getOrd(int docID) {
                int ord = (int) setDocValues.nextOrd();
                if (setDocValues.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                    throw new GroupByOnArrayUnsupportedException(columnName);
                return ord;

            public BytesRef lookupOrd(int ord) {
                return setDocValues.lookupOrd(ord);

            public int getValueCount() {
                return (int) setDocValues.getValueCount();
Example 2
Project: elassandra-master  File: View source code
public Scorer scorer(LeafReaderContext context) throws IOException {
    DocIdSet parentsSet = parentFilter.getDocIdSet(context, null);
    if (Lucene.isEmpty(parentsSet) || remaining == 0) {
        return null;
    // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining"
    // count down (short circuit) logic will then work as expected.
    DocIdSetIterator parents = BitsFilteredDocIdSet.wrap(parentsSet, context.reader().getLiveDocs()).iterator();
    if (parents != null) {
        SortedDocValues bytesValues = collector.globalIfd.load(context).getOrdinalsValues(parentType);
        if (bytesValues == null) {
            return null;
        if (minChildren > 0 || maxChildren != 0 || scoreType == ScoreType.NONE) {
            switch(scoreType) {
                case NONE:
                    DocIdSetIterator parentIdIterator = new CountParentOrdIterator(this, parents, collector, bytesValues, minChildren, maxChildren);
                    return ConstantScorer.create(parentIdIterator, this, queryWeight);
                case AVG:
                    return new AvgParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren);
                    return new ParentCountScorer(this, parents, collector, bytesValues, minChildren, maxChildren);
        switch(scoreType) {
            case AVG:
                return new AvgParentScorer(this, parents, collector, bytesValues);
                return new ParentScorer(this, parents, collector, bytesValues);
    return null;
Example 3
Project: montysolr-master  File: View source code
   * Given the set of fields, we'll look inside them and retrieve (into memory)
   * all values
private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List<String> fields, KVSetter setter) throws IOException {
    IndexSchema schema = searcher.getCore().getLatestSchema();
    List<LeafReaderContext> leaves = searcher.getIndexReader().getContext().leaves();
    Bits liveDocs;
    LeafReader lr;
    Transformer transformer;
    for (LeafReaderContext leave : leaves) {
        int docBase = leave.docBase;
        liveDocs = leave.reader().getLiveDocs();
        lr = leave.reader();
        FieldInfos fInfo = lr.getFieldInfos();
        for (String field : fields) {
            FieldInfo fi = fInfo.fieldInfo(field);
            if (fi == null) {
                log.error("Field " + field + " has no schema entry; skipping it!");
            SchemaField fSchema = schema.getField(field);
            DocValuesType fType = fi.getDocValuesType();
            Map<String, Type> mapping = new HashMap<String, Type>();
            final LeafReader unReader;
            if (fType.equals(DocValuesType.NONE)) {
                Class<? extends DocValuesType> c = fType.getClass();
                if (c.isAssignableFrom(TextField.class) || c.isAssignableFrom(StrField.class)) {
                    if (fSchema.multiValued()) {
                        mapping.put(field, Type.SORTED);
                    } else {
                        mapping.put(field, Type.BINARY);
                } else if (c.isAssignableFrom(TrieIntField.class)) {
                    if (fSchema.multiValued()) {
                        mapping.put(field, Type.SORTED_SET_INTEGER);
                    } else {
                        mapping.put(field, Type.INTEGER_POINT);
                } else {
                unReader = new UninvertingReader(lr, mapping);
            } else {
                unReader = lr;
            switch(fType) {
                case NUMERIC:
                    transformer = new Transformer() {

                        NumericDocValues dv = unReader.getNumericDocValues(field);

                        public void process(int docBase, int docId) {
                            int v = (int) dv.get(docId);
                            setter.set(docBase, docId, v);
                case SORTED_NUMERIC:
                    transformer = new Transformer() {

                        SortedNumericDocValues dv = unReader.getSortedNumericDocValues(field);

                        public void process(int docBase, int docId) {
                            int max = dv.count();
                            int v;
                            for (int i = 0; i < max; i++) {
                                v = (int) dv.valueAt(i);
                                setter.set(docBase, docId, v);
                case SORTED_SET:
                    transformer = new Transformer() {

                        SortedSetDocValues dv = unReader.getSortedSetDocValues(field);

                        int errs = 0;

                        public void process(int docBase, int docId) {
                            if (errs > 5)
                            for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) {
                                final BytesRef value = dv.lookupOrd(ord);
                                setter.set(docBase, docId, value.utf8ToString());
                case SORTED:
                    transformer = new Transformer() {

                        SortedDocValues dv = unReader.getSortedDocValues(field);

                        TermsEnum te;

                        public void process(int docBase, int docId) {
                            BytesRef v = dv.get(docId);
                            if (v.length == 0)
                            setter.set(docBase, docId, v.utf8ToString());
                    throw new IllegalArgumentException("The field " + field + " is of type that cannot be un-inverted");
            int i = 0;
            while (i < lr.maxDoc()) {
                if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
                transformer.process(docBase, i);
Example 4
Project: elasticsearch-master  File: View source code
protected int pick(SortedDocValues values, DocIdSetIterator docItr, int startDoc, int endDoc) throws IOException {
    int ord = Integer.MAX_VALUE;
    boolean hasValue = false;
    for (int doc = startDoc; doc < endDoc; doc = docItr.nextDoc()) {
        if (values.advanceExact(doc)) {
            final int innerOrd = values.ordValue();
            ord = Math.min(ord, innerOrd);
            hasValue = true;
    return hasValue ? ord : -1;
Example 5
Project: spatial-solr-sandbox-master  File: View source code
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
    AtomicReader areader = context.reader();
    SortedDocValues sortedDocValues = areader.getSortedDocValues(fieldName);
    if (sortedDocValues == null)
        return null;
    OpenBitSet bits = new OpenBitSet(areader.maxDoc());
    BytesRef bytes = bstream.getBytesRef();
    for (int docID = 0; docID < areader.maxDoc(); docID++) {
        if (acceptDocs == null || acceptDocs.get(docID)) {
            sortedDocValues.get(docID, bytes);
            if (bytes.length > 0) {
                try {
                    // likely the same
                    Geometry geo =;
                    if (tester.matches(geo)) {
                } catch (ParseException ex) {
                    log.warn("error reading indexed geometry", ex);
    return bits;
Example 6
Project: lucene-solr-master  File: View source code
public void testSimpleOrdinalsJoin() throws Exception {
    final String idField = "id";
    final String productIdField = "productId";
    // A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
    final String typeField = "type";
    // A single sorted doc values field that holds the join values for all document types.
    // Typically during indexing a schema will automatically create this field with the values
    final String joinField = idField + productIdField;
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
    // 0
    Document doc = new Document();
    doc.add(new TextField(idField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "random text", Field.Store.NO));
    doc.add(new TextField("name", "name1", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    // 1
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    // 2
    doc = new Document();
    doc.add(new TextField(productIdField, "1", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
    // 3
    doc = new Document();
    doc.add(new TextField(idField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "product", Field.Store.NO));
    doc.add(new TextField("description", "more random text", Field.Store.NO));
    doc.add(new TextField("name", "name2", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    // 4
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "10.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    // 5
    doc = new Document();
    doc.add(new TextField(productIdField, "2", Field.Store.NO));
    doc.add(new TextField(typeField, "price", Field.Store.NO));
    doc.add(new TextField("price", "20.0", Field.Store.NO));
    doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
    IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
    IndexReader r = indexSearcher.getIndexReader();
    SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
    for (int i = 0; i < values.length; i++) {
        LeafReader leafReader = r.leaves().get(i).reader();
        values[i] = DocValues.getSorted(leafReader, joinField);
    MultiDocValues.OrdinalMap ordinalMap =, values, PackedInts.DEFAULT);
    Query toQuery = new TermQuery(new Term(typeField, "price"));
    Query fromQuery = new TermQuery(new Term("name", "name2"));
    // Search for product and return prices
    Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    TopDocs result =, 10);
    assertEquals(2, result.totalHits);
    assertEquals(4, result.scoreDocs[0].doc);
    assertEquals(5, result.scoreDocs[1].doc);
    fromQuery = new TermQuery(new Term("name", "name1"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result =, 10);
    assertEquals(2, result.totalHits);
    assertEquals(1, result.scoreDocs[0].doc);
    assertEquals(2, result.scoreDocs[1].doc);
    // Search for prices and return products
    fromQuery = new TermQuery(new Term("price", "20.0"));
    toQuery = new TermQuery(new Term(typeField, "product"));
    joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
    result =, 10);
    assertEquals(2, result.totalHits);
    assertEquals(0, result.scoreDocs[0].doc);
    assertEquals(3, result.scoreDocs[1].doc);
Example 7
Project: simple-category-extraction-component-master  File: View source code
public void prepare(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;
    SolrIndexSearcher searcher = req.getSearcher();
    SortedDocValues fieldValues = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), categoryField);
    SolrParams params = req.getParams();
    ModifiableSolrParams modParams = new ModifiableSolrParams(params);
    String qStr = params.get(CommonParams.Q);
    // tokenize the query string, if any part of it matches, remove the token from the list and
    // add a filter query with <categoryField>:value
    StringTokenizer strtok = new StringTokenizer(qStr, " .,:;\"'");
    StringBuilder strbldr = new StringBuilder();
    while (strtok.hasMoreTokens()) {
        String tok = strtok.nextToken().toLowerCase();"got token: " + tok);
        BytesRef key = new BytesRef(tok.getBytes());
        if (fieldValues.lookupTerm(key) >= 0) {
            String fq = new String(categoryField + ":" + tok);
  "adding fq " + fq);
            modParams.add("fq", fq);
        } else {
            if (strbldr.length() > 0) {
                strbldr.append(" ");
    String modQ = strbldr.toString();
    // if the query is now empty, make sure it hits on everything
    if (modQ.trim().length() == 0) {
        modQ = "*:*";
    }"final q string is: '" + modQ + "'");
    modParams.set("q", modQ);
Example 8
Project: incubator-blur-master  File: View source code
public SortedDocValues getSortedDocValues(String field) throws IOException {
    final SortedDocValues sortedDocValues = in.getSortedDocValues(field);
    if (sortedDocValues == null) {
        return null;
    return new SortedDocValues() {

        public void lookupOrd(int ord, BytesRef result) {
            sortedDocValues.lookupOrd(ord, result);

        public int getValueCount() {
            return sortedDocValues.getValueCount();

        public int getOrd(int docID) {
            try {
                if (_accessControl.hasAccess(ReadType.SORTED_DOC_VALUE, docID)) {
                    return sortedDocValues.getOrd(docID);
                // Default missing value.
                return -1;
            } catch (IOException e) {
                throw new RuntimeException(e);
Example 9
Project: heliosearch-master  File: View source code
   * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>.
   * The field must have at most one indexed token per document.
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
    // TODO: If the number of terms is high compared to docs.size(), and zeros==false,
    //  we should use an alternate strategy to avoid
    //  1) creating another huge int[] for the counts
    //  2) looping over that huge int[] looking for the rare non-zeros.
    // Yet another variation: if docs.size() is small and termvectors are stored,
    // then use them instead of the FieldCache.
    // TODO: this function is too big and could use some refactoring, but
    // we also need a facet cache, and refactoring of SimpleFacets instead of
    // trying to pass all the various params around.
    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    NamedList<Integer> res = new NamedList<Integer>();
    SortedDocValues si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName);
    final BytesRef br = new BytesRef();
    final BytesRef prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRef(prefix);
    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = si.lookupTerm(prefixRef);
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        endTermIndex = si.lookupTerm(prefixRef);
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = si.getValueCount();
    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRef charsRef = new CharsRef(10);
    if (nTerms > 0 && docs.size() >= mincount) {
        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
            int term = si.getOrd(iter.nextDoc());
            int arrIdx = term - startTermIndex;
            if (arrIdx >= 0 && arrIdx < nTerms)
        if (startTermIndex == -1) {
            missingCount = counts[0];
        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'
        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);
            // the smallest value in the top 'N' values
            int min = mincount - 1;
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
                    // index order, so we already know that the keys are ordered.  This can be very
                    // important if a lot of the counts are repeated (like zero counts would be).
                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) ( >>> 32);
            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;
            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);
            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                si.lookupOrd(startTermIndex + tnum, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0) {
                // if mincount<=0, then we won't discard any terms and we know exactly
                // where to start.
                i += off;
                off = 0;
            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount || --off >= 0)
                if (--lim < 0)
                si.lookupOrd(startTermIndex + i, br);
                ft.indexedToReadable(br, charsRef);
                res.add(charsRef.toString(), c);
    if (missing) {
        if (missingCount < 0) {
            missingCount = getFieldMissingCount(searcher, docs, fieldName);
        res.add(null, missingCount);
    return res;
Example 10
Project: clue-master  File: View source code
private void showDocId(int docid, int docBase, Object docVals, DocValuesType docValType, BytesRef bytesRef, PrintStream out, int segmentid) throws Exception {
    int subid = docid - docBase;
    if (docVals != null) {
        String val;
        switch(docValType) {
            case NUMERIC:
                NumericDocValues dv = (NumericDocValues) docVals;
                val = String.valueOf(dv.get(subid));
            case BINARY:
                BinaryDocValues bv = (BinaryDocValues) docVals;
                bytesRef = bv.get(subid);
                val = bytesRef.utf8ToString();
            case SORTED:
                    SortedDocValues sv = (SortedDocValues) docVals;
                    bytesRef = sv.get(subid);
                    StringBuilder sb = new StringBuilder();
                    sb.append(NUM_TERMS_IN_FIELD).append(sv.getValueCount()).append(", ");
                    sb.append("value: [");
                    val = sb.toString();
            case SORTED_SET:
                    SortedSetDocValues sv = (SortedSetDocValues) docVals;
                    long nextOrd;
                    long count = sv.getValueCount();
                    StringBuilder sb = new StringBuilder();
                    sb.append(NUM_TERMS_IN_FIELD).append(count).append(", ");
                    sb.append("values: [");
                    boolean firstPass = true;
                    while ((nextOrd = sv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                        bytesRef = sv.lookupOrd(nextOrd);
                        if (!firstPass) {
                            sb.append(", ");
                        firstPass = false;
                    val = sb.toString();
            case SORTED_NUMERIC:
                    SortedNumericDocValues sv = (SortedNumericDocValues) docVals;
                    int count = sv.count();
                    StringBuilder sb = new StringBuilder();
                    sb.append(NUM_TERMS_IN_FIELD).append(count).append(", ");
                    sb.append("values: [");
                    boolean firstPass = true;
                    for (int i = 0; i < count; ++i) {
                        long nextVal = sv.valueAt(i);
                        if (!firstPass) {
                            sb.append(", ");
                        firstPass = false;
                    val = sb.toString();
                val = null;
        if (val == null) {
            out.println("cannot read doc value type: " + docValType);
        } else {
            out.println("type: " + docValType + ", val: " + val + ", segment: " + segmentid + ", docid: " + docid + ", subid: " + subid);
    } else {
        out.println("doc value unavailable");
Example 11
Project: bobo-master  File: View source code
public DocComparator getComparator(AtomicReader reader, int docbase) throws IOException {
    final SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
    return new DocComparator() {

        public int compare(ScoreDoc doc1, ScoreDoc doc2) {
            return values.getOrd(doc1.doc) - values.getOrd(doc2.doc);

        public String value(ScoreDoc doc) {
            int ord = values.getOrd(doc.doc);
            BytesRef term = new BytesRef();
            values.lookupOrd(ord, term);
            return term.utf8ToString();
Example 12
Project: stargate-core-master  File: View source code
IndexEntry getIndexEntry(int slot, int doc, float score) throws IOException {
    String pkName = LuceneUtils.primaryKeyName(pkNames, doc);
    ByteBuffer primaryKey = LuceneUtils.byteBufferDocValue(primaryKeys, doc);
    ByteBuffer rowKey = LuceneUtils.byteBufferDocValue(rowKeys, doc);
    Map<String, Number> numericDocValues = new HashMap<>();
    Map<String, String> binaryDocValues = new HashMap<>();
    for (Map.Entry<String, NumericDocValues> entry : numericDocValuesMap.entrySet()) {
        Type type = AggregateFunction.getLuceneType(options, entry.getKey());
        Number number = LuceneUtils.numericDocValue(entry.getValue(), doc, type);
        numericDocValues.put(entry.getKey(), number);
    for (Map.Entry<String, SortedDocValues> entry : stringDocValues.entrySet()) {
        binaryDocValues.put(entry.getKey(), LuceneUtils.stringDocValue(entry.getValue(), doc));
    return new IndexEntry(rowKey, pkName, primaryKey, slot, docBase + doc, score, numericDocValues, binaryDocValues);
Example 13
Project: elk-master  File: View source code
public SortedDocValues getSortedDocValues(final String field) throws IOException {
    return null;