package com.senseidb.clue.commands;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import com.senseidb.clue.ClueContext;
public class DocValCommand extends ClueCommand {
private static final String NUM_TERMS_IN_FIELD = "numTerms in field: ";
public DocValCommand(ClueContext ctx) {
super(ctx);
}
@Override
public String getName() {
return "docval";
}
@Override
public String help() {
return "gets doc value for a given doc, <field> <docid>, if <docid> not specified, all docs are shown";
}
private void showDocId(int docid, int docBase,
Object docVals,
DocValuesType docValType,
BytesRef bytesRef,
PrintStream out, int segmentid) throws Exception {
int subid = docid - docBase;
if (docVals != null) {
String val;
switch (docValType) {
case NUMERIC:
NumericDocValues dv = (NumericDocValues)docVals;
val = String.valueOf(dv.get(subid));
break;
case BINARY:
BinaryDocValues bv = (BinaryDocValues)docVals;
bytesRef = bv.get(subid);
val = bytesRef.utf8ToString();
break;
case SORTED: {
SortedDocValues sv = (SortedDocValues)docVals;
bytesRef = sv.get(subid);
StringBuilder sb = new StringBuilder();
sb.append(NUM_TERMS_IN_FIELD).append(sv.getValueCount()).append(", ");
sb.append("value: [");
sb.append(bytesRef.utf8ToString());
sb.append("]");
val = sb.toString();
break;
}
case SORTED_SET: {
SortedSetDocValues sv = (SortedSetDocValues)docVals;
sv.setDocument(subid);
long nextOrd;
long count = sv.getValueCount();
StringBuilder sb = new StringBuilder();
sb.append(NUM_TERMS_IN_FIELD).append(count).append(", ");
sb.append("values: [");
boolean firstPass = true;
while ((nextOrd = sv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
bytesRef = sv.lookupOrd(nextOrd);
if (!firstPass) {
sb.append(", ");
}
sb.append(bytesRef.utf8ToString());
firstPass = false;
}
sb.append("]");
val = sb.toString();
break;
}
case SORTED_NUMERIC: {
SortedNumericDocValues sv = (SortedNumericDocValues)docVals;
sv.setDocument(subid);
int count = sv.count();
StringBuilder sb = new StringBuilder();
sb.append(NUM_TERMS_IN_FIELD).append(count).append(", ");
sb.append("values: [");
boolean firstPass = true;
for (int i = 0; i < count; ++i) {
long nextVal = sv.valueAt(i);
if (!firstPass) {
sb.append(", ");
}
sb.append(String.valueOf(nextVal));
firstPass = false;
}
sb.append("]");
val = sb.toString();
break;
}
default:
val = null;
}
if (val == null) {
out.println("cannot read doc value type: " + docValType);
} else {
out.println("type: " + docValType + ", val: " + val + ", segment: "
+ segmentid + ", docid: " + docid + ", subid: " + subid);
}
} else {
out.println("doc value unavailable");
}
}
private Object readDocValues(String field, DocValuesType docValType, LeafReader atomicReader) throws IOException{
Object docVals = null;
if (docValType == DocValuesType.NUMERIC) {
docVals = atomicReader.getNumericDocValues(field);
}
else if (docValType == DocValuesType.BINARY) {
docVals = atomicReader.getBinaryDocValues(field);
}
else if (docValType == DocValuesType.SORTED) {
docVals = atomicReader.getSortedDocValues(field);
}
else if (docValType == DocValuesType.SORTED_NUMERIC) {
docVals = atomicReader.getSortedNumericDocValues(field);
}
else if (docValType == DocValuesType.SORTED_SET) {
docVals = atomicReader.getSortedSetDocValues(field);
}
return docVals;
}
private void showDocId(int docid, int docBase, String field,
LeafReader atomicReader, PrintStream out, int segmentid)
throws Exception {
FieldInfo finfo = atomicReader.getFieldInfos().fieldInfo(field);
if (finfo == null || finfo.getDocValuesType() == DocValuesType.NONE) {
out.println("docvalue does not exist for field: " + field);
return;
}
DocValuesType docValType = finfo.getDocValuesType();
BytesRef bref = new BytesRef();
showDocId(docid, docBase, readDocValues(field, docValType, atomicReader), docValType, bref, out, segmentid);
}
@Override
public void execute(String[] args, PrintStream out) throws Exception {
if (args.length < 1) {
out.println("usage: field doc1,doc2...");
return;
}
String field = args[0];
List<Integer> docidList = new ArrayList<Integer>();
int numPerPage = 20;
try {
String[] docListStrings = args[1].split(",");
for (String s : docListStrings) {
docidList.add(Integer.parseInt(s));
}
} catch (Exception e) {
out.println("invalid docid, all docs are shown");
docidList = null;
}
IndexReader reader = ctx.getIndexReader();
List<LeafReaderContext> leaves = reader.leaves();
if (docidList != null && !docidList.isEmpty()) {
for (int i = leaves.size() - 1; i >= 0; --i) {
LeafReaderContext ctx = leaves.get(i);
for (Integer docid : docidList) {
if (ctx.docBase <= docid) {
LeafReader atomicReader = ctx.reader();
showDocId(docid, ctx.docBase, field, atomicReader, out, i);
}
}
}
out.flush();
return;
} else {
for (int i = 0; i < leaves.size(); ++i) {
LeafReaderContext ctx = leaves.get(i);
LeafReader atomicReader = ctx.reader();
FieldInfo finfo = atomicReader.getFieldInfos().fieldInfo(field);
if (finfo == null || finfo.getDocValuesType() == DocValuesType.NONE) {
out.println("docvalue does not exist for field: " + field);
break;
}
DocValuesType docValType = finfo.getDocValuesType();
BytesRef bref = new BytesRef();
int maxDoc = atomicReader.maxDoc();
for (int k = 0; k < maxDoc; ++k) {
showDocId(k + ctx.docBase, ctx.docBase, readDocValues(field, docValType, atomicReader), docValType, bref, out, i);
if (getContext().isInteractiveMode() && (k+1) % numPerPage == 0){
out.println("Ctrl-D to break");
int ch = System.in.read();
if (ch == -1) {
out.flush();
return;
}
}
}
out.flush();
}
return;
}
}
}