package lux; import static lux.index.IndexConfiguration.*; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import javax.xml.stream.XMLStreamException; import lux.index.XmlIndexer; import net.sf.saxon.s9api.Axis; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.Serializer; import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.s9api.XdmNodeKind; import net.sf.saxon.s9api.XdmSequenceIterator; import org.slf4j.LoggerFactory; public abstract class IndexTestSupportBase { private int docLimit; protected int totalDocs; protected Compiler compiler; protected HashMap<String,Integer> elementCounts = new HashMap<String,Integer>(); public static final int QUERY_EXACT = 0x00000001; public static final int QUERY_NO_DOCS = 0x00000002; public static final int QUERY_MINIMAL = 0x00000004; public static final int QUERY_CONSTANT = 0x00000008; protected XmlIndexer indexer; public IndexTestSupportBase (int options) { docLimit = Integer.MAX_VALUE; indexer = new XmlIndexer (options); } public IndexTestSupportBase () { this (INDEX_QNAMES|INDEX_PATHS|STORE_DOCUMENT|INDEX_FULLTEXT); } protected abstract void addDocument (String uri, String xml) throws XMLStreamException, IOException; protected abstract void commit () throws IOException; /** * index and store all elements of an xml document found on the classpath, * remembering the count of each element QName (indexed by ClarkName) in elementCounts * * @param filename the pathname of the document to index * @throws XMLStreamException * @throws IOException * @throws SaxonApiException */ public void indexAllElements(String filename) throws XMLStreamException, IOException, SaxonApiException { InputStream in = SearchTest.class.getClassLoader().getResourceAsStream(filename); if (in == null) { throw new FileNotFoundException (filename + " not found"); } indexAllElements(filename, in); } public void indexAllElements(String uri, InputStream in) throws XMLStreamException, IOException, SaxonApiException { indexer.index(in, uri); Serializer outputter = new Serializer(); XdmNode doc = indexer.getXdmNode(); indexer.reset(); addDocument ('/' + uri, outputter.serializeNodeToString(doc)); // index all descendants totalDocs = 1; elementCounts.clear(); XdmSequenceIterator iter = doc.axisIterator(Axis.DESCENDANT); iter.next(); // skip the root element, we already indexed it while (iter.hasNext() && totalDocs < docLimit) { XdmNode e = (XdmNode) iter.next(); if (e.getNodeKind() != XdmNodeKind.ELEMENT) { continue; } Integer count = elementCounts.get (e.getNodeName().getClarkName()); if (count == null) { elementCounts.put (e.getNodeName().getClarkName(), 1); } else { elementCounts.put (e.getNodeName().getClarkName(), count + 1); } String xml = outputter.serializeNodeToString(e); addDocument ('/' + uri + '-' + totalDocs, xml); ++totalDocs; if (totalDocs % 50 == 0) { // fragment the index commit(); } } commit(); LoggerFactory.getLogger(getClass()).info("indexed " + totalDocs + " documents"); } public int getDocLimit() { return docLimit; } public void setDocLimit(int docLimit) { this.docLimit = docLimit; } }