package com.senseidb.clue.commands; import java.io.PrintStream; import java.util.List; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; import com.senseidb.clue.ClueContext; import com.senseidb.clue.api.BytesRefPrinter; public class PostingsCommand extends ClueCommand { public PostingsCommand(ClueContext ctx) { super(ctx); } @Override public String getName() { return "postings"; } @Override public String help() { return "iterating postings given a term, e.g. <fieldname:fieldvalue>"; } @Override public void execute(String[] args, PrintStream out) throws Exception { String field = null; String termVal = null; try{ field = args[0]; } catch(Exception e){ field = null; } if (field != null){ String[] parts = field.split(":"); if (parts.length > 1){ field = parts[0]; termVal = parts[1]; } } if (field == null || termVal == null){ out.println("usage: field:term"); out.flush(); return; } BytesRefPrinter payloadPrinter = ctx.getPayloadBytesRefDisplay().getBytesRefPrinter(field); IndexReader reader = ctx.getIndexReader(); List<LeafReaderContext> leaves = reader.leaves(); int docBase = 0; int numPerPage = 20; PostingsEnum postings = null; for (LeafReaderContext leaf : leaves){ LeafReader atomicReader = leaf.reader(); Terms terms = atomicReader.terms(field); if (terms == null){ continue; } boolean hasPositions = terms.hasPositions(); if (terms != null && termVal != null){ TermsEnum te = terms.iterator(); int count = 0; if (te.seekExact(new BytesRef(termVal))){ if (hasPositions){ postings = te.postings(postings, PostingsEnum.FREQS | PostingsEnum.PAYLOADS | PostingsEnum.POSITIONS | PostingsEnum.OFFSETS); int docid; while((docid = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ count++; out.print("docid: "+(docid+docBase)+", freq: "+postings.freq()+", "); for (int i=0; i<postings.freq(); ++i){ out.print("pos " + i + ": " + postings.nextPosition()); out.print(", start offset: " + postings.startOffset()); out.print(", end offset: "+ postings.endOffset()); BytesRef payload = postings.getPayload(); if (payload != null){ out.print(", payload: " + payloadPrinter.print(payload)); } out.print(";"); } out.println(); if (ctx.isInteractiveMode() && count % numPerPage == 0){ out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } else{ postings = te.postings(postings, PostingsEnum.FREQS); int docid; while((docid = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){ count++; out.println("docid: "+(docid+docBase)+", freq: "+postings.freq()); if (ctx.isInteractiveMode() && count % numPerPage == 0){ out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } } } docBase += atomicReader.maxDoc(); } } }