package com.senseidb.clue.commands;
import java.io.PrintStream;
import java.util.List;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import com.senseidb.clue.ClueContext;
public class TermVectorCommand extends ClueCommand {
public TermVectorCommand(ClueContext ctx) {
super(ctx);
}
@Override
public String getName() {
return "tv";
}
@Override
public String help() {
return "shows term vector of a field for a doc";
}
@Override
public void execute(String[] args, PrintStream out) throws Exception {
if (args.length != 2) {
out.println("usage: field doc1,doc2...");
return;
}
String field = args[0];
int doc = Integer.parseInt(args[1]);
IndexReader reader = ctx.getIndexReader();
List<LeafReaderContext> leaves = reader.leaves();
boolean found = false;
boolean tvFound = false;
for (LeafReaderContext ctx : leaves) {
LeafReader atomicReader = ctx.reader();
FieldInfo finfo = atomicReader.getFieldInfos().fieldInfo(field);
if (finfo == null || !finfo.hasVectors()) continue;
tvFound = true;
int docID = doc - ctx.docBase;
if (docID >= 0) {
Terms terms = atomicReader.getTermVector(docID, field);
if (terms == null) continue;
TermsEnum te = terms.iterator();
BytesRef text = null;
while ((text = te.next()) != null) {
long tf = te.totalTermFreq();
out.println(text.utf8ToString()+" ("+tf+")");
}
found = true;
break;
}
}
if (!tvFound) {
out.println("term vector is not available for field: "+field);
return;
}
if (!found) {
out.println(doc+" not found");
return;
}
}
}