package org.basex.index;
import static org.basex.core.Text.*;
import java.io.*;
import org.basex.core.*;
import org.basex.core.jobs.*;
import org.basex.data.*;
import org.basex.index.value.*;
import org.basex.util.*;
/**
* This interface defines the functions which are needed for building
* new index structures.
*
* @author BaseX Team 2005-17, BSD License
* @author Christian Gruen
*/
public abstract class IndexBuilder extends Job {
/** Performance. */
private final Performance perf = new Performance();
/** Data reference. */
protected final Data data;
/** Total parsing value. */
protected final int size;
/** Index type. */
protected final IndexType type;
/** Text node flag. */
protected final boolean text;
/** Number of index operations to perform before writing a partial index to disk. */
private final int splitSize;
/** Maximum memory to consume. */
private final long maxMem = (long) (Runtime.getRuntime().maxMemory() * 0.8);
/** Names and namespace uri of element or attributes to include. */
private final IndexNames includeNames;
/** Current pre value. */
protected int pre;
/** Total number of index operations (may get pretty large). */
protected long count;
/** Number of partial index structures. */
protected int splits;
/** Threshold for freeing memory when estimating main memory consumption. */
private int gcCount;
/**
* Constructor.
* @param data reference
* @param type index type
*/
protected IndexBuilder(final Data data, final IndexType type) {
this.data = data;
this.type = type;
splitSize = (int) Math.min(Integer.MAX_VALUE, (long) data.meta.splitsize * splitFactor());
size = data.meta.size;
includeNames = new IndexNames(type, data);
text = type == IndexType.TEXT || type == IndexType.FULLTEXT;
// run garbage collection if memory maximum is already reached
if(Performance.memory() >= maxMem) clean();
}
/**
* Builds the index structure and returns an index instance.
* @return index instance
* @throws IOException I/O Exception
*/
public abstract ValueIndex build() throws IOException;
/**
* Checks if the command was interrupted, and prints some debug output.
* @throws IOException I/O Exception
*/
@SuppressWarnings("unused")
protected void check() throws IOException {
checkStop();
if(Prop.debug && (pre & 0x1FFFFF) == 0) Util.err(".");
}
/**
* Checks if the current entry should be indexed.
* @return result of check
*/
protected final boolean indexEntry() {
return data.kind(pre) == (text ? Data.TEXT : Data.ATTR) && includeNames.contains(pre, text);
}
/**
* Decides whether in-memory temporary index structures are so large
* that we must flush them to disk before continuing.
* @return true if structures shall be flushed to disk
* @throws IOException I/O Exception
*/
protected final boolean splitRequired() throws IOException {
// checks if a fixed split size has been specified
final boolean split;
if(splitSize > 0) {
split = count >= (splits + 1L) * splitSize;
} else {
// if not, estimate how much main memory is left
split = Performance.memory() >= maxMem;
// stop operation if index splitting degenerates
int gc = gcCount;
if(split) {
if(gc >= 0) throw new BaseXException(OUT_OF_MEM + H_OUT_OF_MEM);
gc = 30;
} else {
gc = Math.max(-1, gc - 1);
}
gcCount = gc;
}
if(split && Prop.debug) Util.err("|");
return split;
}
/**
* Performs memory cleanup after writing partial memory if necessary.
*/
protected final void clean() {
if(splitSize <= 0) Performance.gc(2);
}
/**
* Prints some final debugging information.
*/
protected final void finishIndex() {
if(!Prop.debug) return;
final StringBuilder sb = new StringBuilder();
sb.append(' ').append((count / 10000) / 100.0d).append(" M operations, ");
sb.append(perf).append(" (").append(Performance.getMemory()).append(").");
if(splits > 1 && splitSize <= 0) {
sb.append(" Recommended ").append(MainOptions.SPLITSIZE.name()).append(": ");
sb.append((int) Math.ceil(((double) count / splits) / splitFactor())).append('.');
}
Util.errln(sb);
}
/**
* Returns the split factor dependent on the index type.
* The following values are returned:
* <ul>
* <li> Full-text index: 1'000'000</li>
* <li> Other value indexes: 100'000</li>
* </ul>
* @return split factor
*/
private int splitFactor() {
return type == IndexType.FULLTEXT ? 1000000 : 100000;
}
@Override
public final String shortInfo() {
return CREATING_INDEXES;
}
@Override
public final double progressInfo() {
return pre / (size + (splits > 0 ? size / 50.0d : 0.0d));
}
@Override
public final String detailedInfo() {
switch(type) {
case TEXT: return INDEX_TEXTS_D;
case ATTRIBUTE: return INDEX_ATTRIBUTES_D;
case TOKEN: return INDEX_TOKENS_D;
case FULLTEXT: return INDEX_FULLTEXT_D;
default: throw Util.notExpected();
}
}
}