package lux.index.field; import java.nio.charset.Charset; import java.util.Collections; import lux.index.FieldRole; import lux.index.XmlIndexer; import lux.xml.tinybin.TinyBinary; import net.sf.saxon.tree.tiny.TinyDocumentImpl; import net.sf.saxon.tree.tiny.TinyTree; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StoredField; /** * A field that stores XML documents in a binary form ({@link TinyBinary}) that is very close to the in-memory Saxon TinyTree format. * Reading and writing these documents avoids the cost of parsing and serialization, and they take up only * slightly more space than the serialized XML form. */ public class TinyBinaryField extends FieldDefinition { public static final Charset UTF8 = Charset.forName("utf-8"); public TinyBinaryField () { super (FieldRole.XML_STORE, null, Store.YES, Type.BYTES, true); } protected TinyBinary makeTinyBinary (XmlIndexer indexer) { TinyTree tinyTree = ((TinyDocumentImpl) indexer.getXdmNode().getUnderlyingNode()).getTree(); return new TinyBinary (tinyTree, UTF8); } @Override public Iterable<?> getValues(XmlIndexer indexer) { TinyBinary tinyBinary = makeTinyBinary(indexer); return Collections.singleton(new StoredField(getName(), tinyBinary.getBytes(), 0, tinyBinary.length())); } }