QueryTermVector.java example

Explorer
solrcene-master
package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;

import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.util.BytesRef;

/**
 *
 *
 **/
public class QueryTermVector implements TermFreqVector {
  private BytesRef [] terms = new BytesRef[0];
  private int [] termFreqs = new int[0];

  public String getField() { return null;  }

  /**
   * 
   * @param queryTerms The original list of terms from the query, can contain duplicates
   */ 
  public QueryTermVector(BytesRef [] queryTerms) {

    processTerms(queryTerms);
  }

  public QueryTermVector(String queryString, Analyzer analyzer) {    
    if (analyzer != null)
    {
      TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
      if (stream != null)
      {
        List<BytesRef> terms = new ArrayList<BytesRef>();
        try {
          boolean hasMoreTokens = false;
          
          stream.reset(); 
          final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);

          hasMoreTokens = stream.incrementToken();
          while (hasMoreTokens) {
            BytesRef bytes = new BytesRef();
            termAtt.toBytesRef(bytes);
            terms.add(bytes);
            hasMoreTokens = stream.incrementToken();
          }
          processTerms(terms.toArray(new BytesRef[terms.size()]));
        } catch (IOException e) {
        }
      }
    }                                                              
  }
  
  private void processTerms(BytesRef[] queryTerms) {
    if (queryTerms != null) {
      Arrays.sort(queryTerms);
      Map<BytesRef,Integer> tmpSet = new HashMap<BytesRef,Integer>(queryTerms.length);
      //filter out duplicates
      List<BytesRef> tmpList = new ArrayList<BytesRef>(queryTerms.length);
      List<Integer> tmpFreqs = new ArrayList<Integer>(queryTerms.length);
      int j = 0;
      for (int i = 0; i < queryTerms.length; i++) {
        BytesRef term = queryTerms[i];
        Integer position = tmpSet.get(term);
        if (position == null) {
          tmpSet.put(term, Integer.valueOf(j++));
          tmpList.add(term);
          tmpFreqs.add(Integer.valueOf(1));
        }       
        else {
          Integer integer = tmpFreqs.get(position.intValue());
          tmpFreqs.set(position.intValue(), Integer.valueOf(integer.intValue() + 1));          
        }
      }
      terms = tmpList.toArray(terms);
      //termFreqs = (int[])tmpFreqs.toArray(termFreqs);
      termFreqs = new int[tmpFreqs.size()];
      int i = 0;
      for (final Integer integer : tmpFreqs) {
        termFreqs[i++] = integer.intValue();
      }
    }
  }
  
  @Override
  public final String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append('{');
        for (int i=0; i<terms.length; i++) {
            if (i>0) sb.append(", ");
            sb.append(terms[i].utf8ToString()).append('/').append(termFreqs[i]);
        }
        sb.append('}');
        return sb.toString();
    }
  

  public int size() {
    return terms.length;
  }

  public BytesRef[] getTerms() {
    return terms;
  }

  public int[] getTermFrequencies() {
    return termFreqs;
  }

  public int indexOf(BytesRef term) {
    int res = Arrays.binarySearch(terms, term);
        return res >= 0 ? res : -1;
  }

  public int[] indexesOf(BytesRef[] terms, int start, int len) {
    int res[] = new int[len];

    for (int i=0; i < len; i++) {
        res[i] = indexOf(terms[i]);
    }
    return res;                  
  }

}