package org.basex.index.value; import static org.basex.core.Text.*; import static org.basex.data.DataText.*; import static org.basex.util.Token.*; import java.io.IOException; import org.basex.data.Data; import org.basex.index.IndexBuilder; import org.basex.index.IndexTree; import org.basex.io.out.DataOutput; import org.basex.io.random.DataAccess; import org.basex.util.Num; import org.basex.util.Performance; import org.basex.util.Util; import org.basex.util.list.IntList; /** * <p>This class builds an index for attribute values and text contents in a * tree structure and stores the result to disk.</p> * * <p>The data is stored on disk in the following format:</p> * <ul> * <li> {@code DATATXT/ATV + 'l'}: contains the index values, which are dense id * lists to all text nodes/attribute values, stored in the {@link Num} format: * [size0, id1, id2, ...]. The number of index keys is stored in the first 4 * bytes of the file.</li> * <li> {@code DATATXT/ATV + 'r'}: contains 5-byte references to the id lists * for all keys. To save space, the keys itself are not stored in the index * structure. Instead, they can be found by following the id references to * the main table. * </li> * </ul> * * @author BaseX Team 2005-12, BSD License * @author Christian Gruen */ public final class ValueBuilder extends IndexBuilder { /** Temporary value tree. */ private IndexTree index = new IndexTree(); /** Index type (attributes/texts). */ private final boolean text; /** * Constructor. * @param d data reference * @param txt value type (text/attribute) */ public ValueBuilder(final Data d, final boolean txt) { super(d); text = txt; } @Override public DiskValues build() throws IOException { // delete old index abort(); final Performance perf = Util.debug ? new Performance() : null; Util.debug(det()); final String f = text ? DATATXT : DATAATV; final int k = text ? Data.TEXT : Data.ATTR; for(pre = 0; pre < size; ++pre) { if((pre & 0x0FFF) == 0) { check(); // check if main memory is exhausted if(memFull()) { write(f + csize++, false); index = new IndexTree(); Performance.gc(2); } } // skip too long values if(data.kind(pre) == k && data.textLen(pre, text) <= data.meta.maxlen) index.index(data.text(pre, text), data.meta.updindex ? data.id(pre) : pre); } if(merge) { write(f + csize++, false); index = null; Performance.gc(1); merge(); } else { write(f, true); } if(text) data.meta.textindex = true; else data.meta.attrindex = true; Util.gc(perf); return data.meta.updindex ? new UpdatableDiskValues(data, text) : new DiskValues(data, text); } /** * Merges cached index files. * @throws IOException I/O exception */ private void merge() throws IOException { final String f = text ? DATATXT : DATAATV; final DataOutput outL = new DataOutput(data.meta.dbfile(f + 'l')); final DataOutput outR = new DataOutput(data.meta.dbfile(f + 'r')); outL.write4(0); // initialize cached index iterators final IntList ml = new IntList(); final IntList il = new IntList(); final ValueMerger[] vm = new ValueMerger[csize]; for(int i = 0; i < csize; ++i) vm[i] = new ValueMerger(data, text, i); int sz = 0; // parse through all values while(true) { checkStop(); // find first index which is not completely parsed yet int min = -1; while(++min < csize && vm[min].values.length == 0); if(min == csize) break; // find index entry with smallest key ml.reset(); for(int i = min; i < csize; ++i) { if(vm[i].values.length == 0) continue; final int d = diff(vm[min].key, vm[i].key); if(d < 0) continue; if(d > 0) { min = i; ml.reset(); } ml.add(i); } // parse through all values, cache and sort id values final int ms = ml.size(); for(int m = 0; m < ms; ++m) { final ValueMerger t = vm[ml.get(m)]; final int vl = t.values.length; for(int l = 4, v; l < vl; l += Num.length(v)) { v = Num.get(t.values, l); il.add(v); } t.next(); } // write final structure to disk write(outL, outR, il); ++sz; } // close index files outR.close(); outL.close(); // write number of entries to first position final DataAccess da = new DataAccess(data.meta.dbfile(f + 'l')); da.write4(sz); da.close(); } /** * Writes the current value tree to disk. * @param name name * @param all writes the complete tree * @throws IOException I/O exception */ private void write(final String name, final boolean all) throws IOException { // write id arrays and references final DataOutput outL = new DataOutput(data.meta.dbfile(name + 'l')); final DataOutput outR = new DataOutput(data.meta.dbfile(name + 'r')); outL.write4(index.size()); final IntList il = new IntList(); index.init(); while(index.more()) { final byte[] values = index.values.get(index.next()); final int vs = Num.size(values); if(all) { // cache and sort all values for(int ip = 4; ip < vs; ip += Num.length(values, ip)) { il.add(Num.get(values, ip)); } // write final structure to disk write(outL, outR, il); } else { // write temporary structure to disk: number of entries, absolute values outR.write5(outL.size()); outL.write(values, 0, vs); } } outL.close(); outR.close(); // temporarily write texts if(!all) { final DataOutput outT = new DataOutput(data.meta.dbfile(name + 't')); index.init(); while(index.more()) outT.writeToken(index.keys.get(index.next())); outT.close(); } } /** * Writes the final value structure to disk. * @param outL index values * @param outR references * @param il values * @throws IOException I/O exception */ private static void write(final DataOutput outL, final DataOutput outR, final IntList il) throws IOException { // sort values before writing il.sort(); final int is = il.size(); outR.write5(outL.size()); outL.writeNum(is); for(int i = 0, o = 0; i < is; i++) { final int v = il.get(i); outL.writeNum(v - o); o = v; } il.reset(); } @Override public void abort() { data.meta.drop((text ? DATATXT : DATAATV) + ".+"); if(text) data.meta.textindex = false; else data.meta.attrindex = false; } @Override protected String det() { return text ? INDEX_TEXT_D : INDEX_ATTRIBUTES_D; } }