package com.senseidb.clue.commands;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import com.senseidb.clue.ClueContext;
import com.senseidb.clue.api.BytesRefPrinter;
public class TermsCommand extends ClueCommand {
public TermsCommand(ClueContext ctx) {
super(ctx);
}
@Override
public String getName() {
return "terms";
}
@Override
public String help() {
return "gets terms from the index, <field:term>, term can be a prefix";
}
@Override
public void execute(String[] args, PrintStream out) throws Exception {
String field;
String termVal = null;
try{
field = args[0];
}
catch(Exception e){
field = null;
}
if (field != null){
String[] parts = field.split(":");
if (parts.length > 1){
field = parts[0];
termVal = parts[1];
}
}
else{
out.println("Usage: field:value");
return;
}
BytesRefPrinter bytesRefPrinter = ctx.getTermBytesRefDisplay().getBytesRefPrinter(field);
boolean isExact = false;
if (termVal != null){
if (termVal.endsWith("*")){
termVal = termVal.substring(0, termVal.length()-1);
}
else{
isExact = true;
}
}
IndexReader reader = ctx.getIndexReader();
List<LeafReaderContext> leaves = reader.leaves();
TreeMap<BytesRef,TermsEnum> termMap = null;
HashMap<BytesRef,AtomicInteger> termCountMap = new HashMap<BytesRef,AtomicInteger>();
int numCount = 0;
int numPerPage = 20;
for (LeafReaderContext leaf : leaves){
LeafReader atomicReader = leaf.reader();
Terms terms = atomicReader.fields().terms(field);
if (terms == null) {
continue;
}
if (termMap == null){
termMap = new TreeMap<BytesRef,TermsEnum>();
}
TermsEnum te = terms.iterator();
BytesRef termBytes;
if (termVal != null){
if (isExact){
if (!te.seekExact(new BytesRef(termVal))){
continue;
}
}
else{
te.seekCeil(new BytesRef(termVal));
}
termBytes = te.term();
}
else{
termBytes = te.next();
}
while(true){
if (termBytes == null) break;
AtomicInteger count = termCountMap.get(termBytes);
if (count == null){
termCountMap.put(termBytes, new AtomicInteger(te.docFreq()));
termMap.put(termBytes, te);
break;
}
count.getAndAdd(te.docFreq());
if (isExact){
termBytes = null;
}
else{
termBytes = te.next();
}
}
}
while(termMap != null && !termMap.isEmpty()){
numCount++;
Entry<BytesRef,TermsEnum> entry = termMap.pollFirstEntry();
if (entry == null) break;
BytesRef key = entry.getKey();
AtomicInteger count = termCountMap.remove(key);
out.println(bytesRefPrinter.print(key)+" ("+count+") ");
if (ctx.isInteractiveMode() && numCount % numPerPage == 0){
out.println("Press q to break");
int ch = System.in.read();
if (ch == 'q' || ch == 'Q') {
out.flush();
return;
}
}
TermsEnum te = entry.getValue();
BytesRef nextKey = null;
if (!isExact){
nextKey = te.next();
}
while(true){
if (nextKey == null) break;
count = termCountMap.get(nextKey);
if (count == null){
termCountMap.put(nextKey, new AtomicInteger(te.docFreq()));
termMap.put(nextKey, te);
break;
}
count.getAndAdd(te.docFreq());
nextKey = te.next();
}
}
out.flush();
}
}