package lux.index.field; import java.util.Iterator; import java.util.Map.Entry; import java.util.regex.Pattern; import lux.index.FieldRole; import lux.index.XmlIndexer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Field.Store; /** * Indexes each occurrence of each path as a separate term * TODO: also store freqs (but not positions), so as to enable path-cardinality queries */ public class PathOccurrenceField extends FieldDefinition { private final Pattern spacePattern = Pattern.compile(" "); public PathOccurrenceField () { super (FieldRole.PATH, new KeywordAnalyzer(), Store.NO, Type.STRING); } @Override public Iterable<?> getValues(XmlIndexer indexer) { return new PathOccurrenceIterator (indexer); } class PathOccurrenceIterator implements Iterable<String>, Iterator<String> { private Iterator<Entry<CharSequence, Integer>> pathCounts; private Entry<CharSequence, Integer> pathCount; private int iPathCount; public PathOccurrenceIterator(XmlIndexer indexer) { pathCounts = indexer.getPathMapper().getPathCounts().entrySet().iterator(); if (pathCounts.hasNext()) { pathCount = pathCounts.next(); } iPathCount = 0; } @Override public Iterator<String> iterator() { // better only call this once! return this; } @Override public boolean hasNext() { return pathCounts.hasNext() || pathCount != null; } @Override public String next() { StringBuilder buf = new StringBuilder(); CharSequence path = pathCount.getKey(); String [] names = spacePattern.split(path); if (names.length > 1) { buf.append (names[names.length-1]); // stop at 1 so we trim off leading "{}", reverse the names and splice with "/" for (int i = names.length-2; i > 0; i--) { // in reverse order buf.append ('/'); buf.append (names[i]); } } // advance the iteration if (iPathCount++ >= pathCount.getValue()) { iPathCount = 0; if (pathCounts.hasNext()) { pathCount = pathCounts.next(); } else { pathCount = null; } } return buf.toString(); } @Override public void remove() { throw new UnsupportedOperationException(); } } }