UnInvertedField.java example

Explorer
heliosearch-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.request;

import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.StatsValuesFactory;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.*;
import org.apache.solr.util.LongPriorityQueue;
import org.apache.solr.util.PrimUtils;

/**
 *
 * Final form of the un-inverted field:
 *   Each document points to a list of term numbers that are contained in that document.
 *
 *   Term numbers are in sorted order, and are encoded as variable-length deltas from the
 *   previous term number.  Real term numbers start at 2 since 0 and 1 are reserved.  A
 *   term number of 0 signals the end of the termNumber list.
 *
 *   There is a single int[maxDoc()] which either contains a pointer into a byte[] for
 *   the termNumber lists, or directly contains the termNumber list if it fits in the 4
 *   bytes of an integer.  If the first byte in the integer is 1, the next 3 bytes
 *   are a pointer into a byte[] where the termNumber list starts.
 *
 *   There are actually 256 byte arrays, to compensate for the fact that the pointers
 *   into the byte arrays are only 3 bytes long.  The correct byte array for a document
 *   is a function of it's id.
 *
 *   To save space and speed up faceting, any term that matches enough documents will
 *   not be un-inverted... it will be skipped while building the un-inverted field structure,
 *   and will use a set intersection method during faceting.
 *
 *   To further save memory, the terms (the actual string values) are not all stored in
 *   memory, but a TermIndex is used to convert term numbers to term values only
 *   for the terms needed after faceting has completed.  Only every 128th term value
 *   is stored, along with it's corresponding term number, and this is used as an
 *   index to find the closest term and iterate until the desired number is hit (very
 *   much like Lucene's own internal term index).
 *
 */
public class UnInvertedField extends DocTermOrds {
  private static int TNUM_OFFSET=2;

  static class TopTerm {
    BytesRef term;
    int termNum;

    long memSize() {
      return 8 +   // obj header
             8 + 8 +term.length +  //term
             4;    // int
    }
  }

  long memsz;
  final AtomicLong use = new AtomicLong(); // number of uses

  int[] maxTermCounts = new int[1024];

  final Map<Integer,TopTerm> bigTerms = new LinkedHashMap<Integer,TopTerm>();

  private SolrIndexSearcher.DocsEnumState deState;
  private final SolrIndexSearcher searcher;
  private final boolean isPlaceholder;

  private static UnInvertedField uifPlaceholder = new UnInvertedField();

  private UnInvertedField() { // Dummy for synchronization.
    super("fake", 0, 0); // cheapest initialization I can find.
    isPlaceholder = true;
    searcher = null;
   }

  @Override
  protected void visitTerm(TermsEnum te, int termNum) throws IOException {

    if (termNum >= maxTermCounts.length) {
      // resize by doubling - for very large number of unique terms, expanding
      // by 4K and resultant GC will dominate uninvert times.  Resize at end if material
      int[] newMaxTermCounts = new int[maxTermCounts.length*2];
      System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
      maxTermCounts = newMaxTermCounts;
    }

    final BytesRef term = te.term();

    if (te.docFreq() > maxTermDocFreq) {
      TopTerm topTerm = new TopTerm();
      topTerm.term = BytesRef.deepCopyOf(term);
      topTerm.termNum = termNum;
      bigTerms.put(topTerm.termNum, topTerm);

      if (deState == null) {
        deState = new SolrIndexSearcher.DocsEnumState();
        deState.fieldName = field;
        deState.liveDocs = searcher.getAtomicReader().getLiveDocs();
        deState.termsEnum = te;  // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
        deState.docsEnum = docsEnum;
        deState.minSetSizeCached = maxTermDocFreq;
      }

      docsEnum = deState.docsEnum;
      DocSet set = searcher.getDocSet(deState);
      maxTermCounts[termNum] = set.size();
      set.decref();
    }
  }

  @Override
  protected void setActualDocFreq(int termNum, int docFreq) {
    maxTermCounts[termNum] = docFreq;
  }

  public long memSize() {
    // can cache the mem size since it shouldn't change
    if (memsz!=0) return memsz;
    long sz = super.ramUsedInBytes();
    sz += 8*8 + 32; // local fields
    sz += bigTerms.size() * 64;
    for (TopTerm tt : bigTerms.values()) {
      sz += tt.memSize();
    }
    if (maxTermCounts != null)
      sz += maxTermCounts.length * 4;
    if (indexedTermsArray != null) {
      // assume 8 byte references?
      sz += 8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings;
    }
    memsz = sz;
    return sz;
  }

  public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
    super(field,
          // threshold, over which we use set intersections instead of counting
          // to (1) save memory, and (2) speed up faceting.
          // Add 2 for testing purposes so that there will always be some terms under
          // the threshold even when the index is very
          // small.
          searcher.maxDoc()/20 + 2,
          DEFAULT_INDEX_INTERVAL_BITS);
    //System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc());

    isPlaceholder = false;
    final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field));
    this.searcher = searcher;
    try {
      AtomicReader r = searcher.getAtomicReader();
      uninvert(r, r.getLiveDocs(), prefix == null ? null : new BytesRef(prefix));
    } catch (IllegalStateException ise) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise.getMessage());
    }
    if (tnums != null) {
      for(byte[] target : tnums) {
        if (target != null && target.length > (1<<24)*.9) {
          SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length);
        }
      }
    }

    // free space if outrageously wasteful (tradeoff memory/cpu) 
    if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
      int[] newMaxTermCounts = new int[numTermsInField];
      System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
      maxTermCounts = newMaxTermCounts;
    }

    SolrCore.log.info("UnInverted multi-valued field " + toString());
    //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index);
  }

  public int getNumTerms() {
    return numTermsInField;
  }

  public NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
    use.incrementAndGet();

    FieldType ft = searcher.getSchema().getFieldType(field);

    NamedList<Integer> res = new NamedList<Integer>();  // order is important

    DocSet docs = baseDocs;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();

    try {

      //System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField);
      if (baseSize >= mincount) {

        final int[] index = this.index;
        // tricky: we add more more element than we need because we will reuse this array later
        // for ordering term ords before converting to term labels.
        final int[] counts = new int[numTermsInField + 1];

        //
        // If there is prefix, find it's start and end term numbers
        //
        int startTerm = 0;
        int endTerm = numTermsInField;  // one past the end

        TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());
        if (te != null && prefix != null && prefix.length() > 0) {
          final BytesRef prefixBr = new BytesRef(prefix);
          if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
            startTerm = numTermsInField;
          } else {
            startTerm = (int) te.ord();
          }
          prefixBr.append(UnicodeUtil.BIG_TERM);
          if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) {
            endTerm = numTermsInField;
          } else {
            endTerm = (int) te.ord();
          }
        }

        /***********
         // Alternative 2: get the docSet of the prefix (could take a while) and
         // then do the intersection with the baseDocSet first.
         if (prefix != null && prefix.length() > 0) {
         docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
         // The issue with this method are problems of returning 0 counts for terms w/o
         // the prefix.  We can't just filter out those terms later because it may
         // mean that we didn't collect enough terms in the queue (in the sorted case).
         }
         ***********/

        boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0
            && startTerm==0 && endTerm==numTermsInField
            && (docs instanceof BitDocSet || docs instanceof BitDocSetNative);

        if (doNegative) {
          // TODO: when iterator across negative elements is available, use that
          // instead of creating a new bitset and inverting.

          if (docs instanceof BitDocSet) {
            FixedBitSet bs = ((BitDocSet)docs).getBits().clone();
            bs = bs.clone(); // don't mess with internal obs of BitDocSet
            bs.flip(0, maxDoc);
            docs = new BitDocSet(bs, maxDoc - baseSize);
          } else {
            BitDocSetNative negSet = ((BitDocSetNative)docs).clone();
            negSet.flip(0, maxDoc);
            negSet.setSize(maxDoc - baseSize);
            docs = negSet;
          }

          // simply negating will mean that we have deleted docs in the set.
          // that should be OK, as their entries in our table should be empty.
          //System.out.println("  NEG");
        }

        // For the biggest terms, do straight set intersections
        for (TopTerm tt : bigTerms.values()) {
          //System.out.println("  do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString());
          // TODO: counts could be deferred if sorted==false
          if (tt.termNum >= startTerm && tt.termNum < endTerm) {
            counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs);
            //System.out.println("    count=" + counts[tt.termNum]);
          } else {
            //System.out.println("SKIP term=" + tt.termNum);
          }
        }

        // TODO: we could short-circuit counting altogether for sorted faceting
        // where we already have enough terms from the bigTerms

        // TODO: we could shrink the size of the collection array, and
        // additionally break when the termNumber got above endTerm, but
        // it would require two extra conditionals in the inner loop (although
        // they would be predictable for the non-prefix case).
        // Perhaps a different copy of the code would be warranted.

        if (termInstances > 0) {
          DocIterator iter = docs.iterator();
          while (iter.hasNext()) {
            int doc = iter.nextDoc();
            //System.out.println("iter doc=" + doc);
            int code = index[doc];

            if ((code & 0xff)==1) {
              //System.out.println("  ptr");
              int pos = code>>>8;
              int whichArray = (doc >>> 16) & 0xff;
              byte[] arr = tnums[whichArray];
              int tnum = 0;
              for(;;) {
                int delta = 0;
                for(;;) {
                  byte b = arr[pos++];
                  delta = (delta << 7) | (b & 0x7f);
                  if ((b & 0x80) == 0) break;
                }
                if (delta == 0) break;
                tnum += delta - TNUM_OFFSET;
                //System.out.println("    tnum=" + tnum);
                counts[tnum]++;
              }
            } else {
              //System.out.println("  inlined");
              int tnum = 0;
              int delta = 0;
              for (;;) {
                delta = (delta << 7) | (code & 0x7f);
                if ((code & 0x80)==0) {
                  if (delta==0) break;
                  tnum += delta - TNUM_OFFSET;
                  //System.out.println("    tnum=" + tnum);
                  counts[tnum]++;
                  delta = 0;
                }
                code >>>= 8;
              }
            }
          }
        }
        final CharsRef charsRef = new CharsRef();

        int off=offset;
        int lim=limit>=0 ? limit : Integer.MAX_VALUE;

        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
          int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
          maxsize = Math.min(maxsize, numTermsInField);
          LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE);

          int min=mincount-1;  // the smallest value in the top 'N' values
          //System.out.println("START=" + startTerm + " END=" + endTerm);
          for (int i=startTerm; i<endTerm; i++) {
            int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
            if (c>min) {
              // NOTE: we use c>min rather than c>=min as an optimization because we are going in
              // index order, so we already know that the keys are ordered.  This can be very
              // important if a lot of the counts are repeated (like zero counts would be).

              // smaller term numbers sort higher, so subtract the term number instead
              long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);
              boolean displaced = queue.insert(pair);
              if (displaced) min=(int)(queue.top() >>> 32);
            }
          }

          // now select the right page from the results

          // if we are deep paging, we don't have to order the highest "offset" counts.
          int collectCount = Math.max(0, queue.size() - off);
          assert collectCount <= lim;

          // the start and end indexes of our list "sorted" (starting with the highest value)
          int sortedIdxStart = queue.size() - (collectCount - 1);
          int sortedIdxEnd = queue.size() + 1;
          final long[] sorted = queue.sort(collectCount);

          final int[] indirect = counts;  // reuse the counts array for the index into the tnums array
          assert indirect.length >= sortedIdxEnd;

          for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
            long pair = sorted[i];
            int c = (int)(pair >>> 32);
            int tnum = Integer.MAX_VALUE - (int)pair;

            indirect[i] = i;   // store the index for indirect sorting
            sorted[i] = tnum;  // reuse the "sorted" array to store the term numbers for indirect sorting

            // add a null label for now... we'll fill it in later.
            res.add(null, c);
          }

          // now sort the indexes by the term numbers
          PrimUtils.sort(sortedIdxStart, sortedIdxEnd, indirect, new PrimUtils.IntComparator() {
            @Override
            public int compare(int a, int b) {
              return (int)sorted[a] - (int)sorted[b];
            }

            @Override
            public boolean lessThan(int a, int b) {
              return sorted[a] < sorted[b];
            }

            @Override
            public boolean equals(int a, int b) {
              return sorted[a] == sorted[b];
            }
          });

          // convert the term numbers to term values and set
          // as the label
          //System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd);
          for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
            int idx = indirect[i];
            int tnum = (int)sorted[idx];
            final String label = getReadableValue(getTermValue(te, tnum), ft, charsRef);
            //System.out.println("  label=" + label);
            res.setName(idx - sortedIdxStart, label);
          }

        } else {
          // add results in index order
          int i=startTerm;
          if (mincount<=0) {
            // if mincount<=0, then we won't discard any terms and we know exactly
            // where to start.
            i=startTerm+off;
            off=0;
          }

          for (; i<endTerm; i++) {
            int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
            if (c<mincount || --off>=0) continue;
            if (--lim<0) break;

            final String label = getReadableValue(getTermValue(te, i), ft, charsRef);
            res.add(label, c);
          }
        }
      }

    } finally {
      if (docs != baseDocs) {
        // if doNegative, release the negative set
        docs.decref();
        docs = null;
      }
    }


    if (missing) {
      // TODO: a faster solution for this?
      res.add(null, SimpleFacetsHS.getFieldMissingCount(searcher, baseDocs, field));
    }

    //System.out.println("  res=" + res);

    return res;
  }

  /**
   * Collect statistics about the UninvertedField.  Code is very similar to {@link #getCounts(org.apache.solr.search.SolrIndexSearcher, org.apache.solr.search.DocSet, int, int, Integer, boolean, String, String)}
   * It can be used to calculate stats on multivalued fields.
   * <p/>
   * This method is mainly used by the {@link org.apache.solr.handler.component.StatsComponent}.
   *
   * @param searcher The Searcher to use to gather the statistics
   * @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on
   * @param calcDistinct whether distinct values should be collected and counted
   * @param facet One or more fields to facet on.
   * @return The {@link org.apache.solr.handler.component.StatsValues} collected
   * @throws IOException If there is a low-level I/O error.
   */
  public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, boolean calcDistinct, String[] facet) throws IOException {
    //this function is ripped off nearly wholesale from the getCounts function to use
    //for multiValued fields within the StatsComponent.  may be useful to find common
    //functionality between the two and refactor code somewhat
    use.incrementAndGet();

    SchemaField sf = searcher.getSchema().getField(field);
   // FieldType ft = sf.getType();

    QueryContext qcontext = QueryContext.newContext(searcher);
    StatsValues allstats = StatsValuesFactory.createStatsValues(qcontext, sf, calcDistinct);


    DocSet docs = baseDocs;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();

    if (baseSize <= 0) return allstats;

    DocSet all = searcher.getDocSet(new TermRangeQuery(field, null, null, false, false));
    DocSet missing = docs.andNot( all );
    all.decref();

    try {

      int i = 0;
      final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
      //Initialize facetstats, if facets have been passed in
      SortedDocValues si;
      for (String f : facet) {
        SchemaField facet_sf = searcher.getSchema().getField(f);
        finfo[i] = new FieldFacetStats(searcher, f, sf, facet_sf, calcDistinct);
        i++;
      }

      final int[] index = this.index;
      final int[] counts = new int[numTermsInField];//keep track of the number of times we see each word in the field for all the documents in the docset

      TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());

      boolean doNegative = false;
      if (finfo.length == 0) {
        //if we're collecting statistics with a facet field, can't do inverted counting
        doNegative = baseSize > maxDoc >> 1 && termInstances > 0
            && (docs instanceof BitDocSet || docs instanceof BitDocSetNative);
      }

      if (doNegative) {
        FixedBitSet bs = docs.getBits();
        if (docs instanceof BitDocSet) {
          bs = bs.clone(); // don't mess with internal obs of BitDocSet
        }
        bs.flip(0, maxDoc);
        // TODO: when iterator across negative elements is available, use that
        // instead of creating a new bitset and inverting.
        docs = new BitDocSet(bs, maxDoc - baseSize);
        // simply negating will mean that we have deleted docs in the set.
        // that should be OK, as their entries in our table should be empty.
      }

      // For the biggest terms, do straight set intersections
      for (TopTerm tt : bigTerms.values()) {
        // TODO: counts could be deferred if sorted==false
        if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
          final Term t = new Term(field, tt.term);
          if (finfo.length == 0) {
            counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs);
          } else {
            //COULD BE VERY SLOW
            //if we're collecting stats for facet fields, we need to iterate on all matching documents
            try (
                DocSet tdocs = searcher.getDocSet(new TermQuery(t));
                DocSet bigTermDocSet = tdocs.intersection(docs);
            ) {
              DocIterator iter = bigTermDocSet.iterator();
              while (iter.hasNext()) {
                int doc = iter.nextDoc();
                counts[tt.termNum]++;
                for (FieldFacetStats f : finfo) {
                  f.facetTermNum(doc, tt.termNum);
                }
              }
            }  // end try-with

          }
        }
      }


      if (termInstances > 0) {
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
          int doc = iter.nextDoc();
          int code = index[doc];

          if ((code & 0xff) == 1) {
            int pos = code >>> 8;
            int whichArray = (doc >>> 16) & 0xff;
            byte[] arr = tnums[whichArray];
            int tnum = 0;
            for (; ;) {
              int delta = 0;
              for (; ;) {
                byte b = arr[pos++];
                delta = (delta << 7) | (b & 0x7f);
                if ((b & 0x80) == 0) break;
              }
              if (delta == 0) break;
              tnum += delta - TNUM_OFFSET;
              counts[tnum]++;
              for (FieldFacetStats f : finfo) {
                f.facetTermNum(doc, tnum);
              }
            }
          } else {
            int tnum = 0;
            int delta = 0;
            for (; ;) {
              delta = (delta << 7) | (code & 0x7f);
              if ((code & 0x80) == 0) {
                if (delta == 0) break;
                tnum += delta - TNUM_OFFSET;
                counts[tnum]++;
                for (FieldFacetStats f : finfo) {
                  f.facetTermNum(doc, tnum);
                }
                delta = 0;
              }
              code >>>= 8;
            }
          }
        }
      }

      // add results in index order
      for (i = 0; i < numTermsInField; i++) {
        int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
        if (c == 0) continue;
        BytesRef value = getTermValue(te, i);

        allstats.accumulate(value, c);
        //as we've parsed the termnum into a value, lets also accumulate fieldfacet statistics
        for (FieldFacetStats f : finfo) {
          f.accumulateTermNum(i, value);
        }
      }

      int c = missing.size();
      allstats.addMissing(c);

      if (finfo.length > 0) {
        for (FieldFacetStats f : finfo) {
          Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
          FieldType facetType = searcher.getSchema().getFieldType(f.name);
          for (Map.Entry<String,StatsValues> entry : facetStatsValues.entrySet()) {
            String termLabel = entry.getKey();
            int missingCount = searcher.numDocs(new TermQuery(new Term(f.name, facetType.toInternal(termLabel))), missing);
            entry.getValue().addMissing(missingCount);
          }
          allstats.addFacet(f.name, facetStatsValues);
        }
      }

      return allstats;

    } finally {
      missing.decref();
    }

  }

  String getReadableValue(BytesRef termval, FieldType ft, CharsRef charsRef) {
    return ft.indexedToReadable(termval, charsRef).toString();
  }

  /** may return a reused BytesRef */
  BytesRef getTermValue(TermsEnum te, int termNum) throws IOException {
    //System.out.println("getTermValue termNum=" + termNum + " this=" + this + " numTerms=" + numTermsInField);
    if (bigTerms.size() > 0) {
      // see if the term is one of our big terms.
      TopTerm tt = bigTerms.get(termNum);
      if (tt != null) {
        //System.out.println("  return big " + tt.term);
        return tt.term;
      }
    }

    return lookupTerm(te, termNum);
  }

  @Override
  public String toString() {
    final long indexSize = indexedTermsArray == null ? 0 : (8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings); // assume 8 byte references?
    return "{field=" + field
            + ",memSize="+memSize()
            + ",tindexSize="+indexSize
            + ",time="+total_time
            + ",phase1="+phase1_time
            + ",nTerms="+numTermsInField
            + ",bigTerms="+bigTerms.size()
            + ",termInstances="+termInstances
            + ",uses="+use.get()
            + "}";
  }

  //////////////////////////////////////////////////////////////////
  //////////////////////////// caching /////////////////////////////
  //////////////////////////////////////////////////////////////////

  public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
    SolrCache<String,UnInvertedField> cache = searcher.getFieldValueCache();
    if (cache == null) {
      return new UnInvertedField(field, searcher);
    }
    UnInvertedField uif = null;
    Boolean doWait = false;
    synchronized (cache) {
      uif = cache.get(field);
      if (uif == null) {
        cache.put(field, uifPlaceholder); // This thread will load this field, don't let other threads try.
      } else {
        if (uif.isPlaceholder == false) {
          return uif;
        }
        doWait = true; // Someone else has put the place holder in, wait for that to complete.
      }
    }
    while (doWait) {
      try {
        synchronized (cache) {
          uif = cache.get(field); // Should at least return the placeholder, NPE if not is OK.
          if (uif.isPlaceholder == false) { // OK, another thread put this in the cache we should be good.
            return uif;
          }
          cache.wait();
        }
      } catch (InterruptedException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Thread interrupted in getUninvertedField.");
      }
    }

    uif = new UnInvertedField(field, searcher);
    synchronized (cache) {
      cache.put(field, uif); // Note, this cleverly replaces the placeholder.
      cache.notifyAll();
    }

    return uif;
  }
}