TermVectorCommand.java example

Explorer

clue-master
- src
  - main
    - java
      - com
        senseidb
        clue
        ClueApplication.java
        ClueConfiguration.java
        ClueContext.java
        HdfsDirectory.java
        api
        BytesRefDisplay.java
        BytesRefPrinter.java
        DefaultDirectoryBuilder.java
        DefaultIndexReaderFactory.java
        DefaultQueryBuilder.java
        DirectoryBuilder.java
        IndexReaderFactory.java
        QueryBuilder.java
        RawBytesRefDisplay.java
        StringBytesRefDisplay.java
        commands
        ClueCommand.java
        DeleteCommand.java
        DeleteUserCommitData.java
        DirectoryCommand.java
        DocSetInfoCommand.java
        DocValCommand.java
        DumpDocCommand.java
        ExitCommand.java
        ExplainCommand.java
        ExportCommand.java
        GetUserCommitDataCommand.java
        HelpCommand.java
        IndexTrimCommand.java
        InfoCommand.java
        MergeCommand.java
        NormsCommand.java
        PostingsCommand.java
        ReadonlyCommand.java
        ReconstructCommand.java
        SaveUserCommitData.java
        SearchCommand.java
        StoredFieldCommand.java
        TermVectorCommand.java
        TermsCommand.java
        util
        CustomBufferedIndexInput.java
        IntArrayDocIdSetIterator.java
        MatchSomeDocsQuery.java
  - test
    - java
      - com
        senseidb
        clue
        test
        BuildSampleIndex.java
        PayloadTokenizer.java

package com.senseidb.clue.commands;

import java.io.PrintStream;
import java.util.List;

import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;

import com.senseidb.clue.ClueContext;

public class TermVectorCommand extends ClueCommand {

  public TermVectorCommand(ClueContext ctx) {
    super(ctx);
  }

  @Override
  public String getName() {
    return "tv";
  }

  @Override
  public String help() {
    return "shows term vector of a field for a doc";
  }

  @Override
  public void execute(String[] args, PrintStream out) throws Exception {
    if (args.length != 2) {
      out.println("usage: field doc1,doc2...");
      return;
    }
    
    String field = args[0];
    
    int doc = Integer.parseInt(args[1]);
    
    IndexReader reader = ctx.getIndexReader();
    List<LeafReaderContext> leaves = reader.leaves();
    
    boolean found = false;
    boolean tvFound = false;
    for (LeafReaderContext ctx : leaves) {
      LeafReader atomicReader = ctx.reader();
      FieldInfo finfo = atomicReader.getFieldInfos().fieldInfo(field);
      if (finfo == null || !finfo.hasVectors()) continue;
      
      tvFound = true;
      
      int docID = doc - ctx.docBase;
      if (docID >= 0) {
      
        Terms terms = atomicReader.getTermVector(docID, field);
        if (terms == null) continue;
        
        TermsEnum te = terms.iterator();      
      
        BytesRef text = null;
        
        while ((text = te.next()) != null) {
          long tf = te.totalTermFreq();
          out.println(text.utf8ToString()+" ("+tf+")");
        }
        found = true;
        break;
      }
    }
    
    if (!tvFound) {
      out.println("term vector is not available for field: "+field);
      return;
    }

    if (!found) {
      out.println(doc+" not found");
      return;
    }
    
    
  }

}