IndexBuilder.java example

Explorer
basex-master
package org.basex.index;

import static org.basex.core.Text.*;

import java.io.*;

import org.basex.core.*;
import org.basex.core.jobs.*;
import org.basex.data.*;
import org.basex.index.value.*;
import org.basex.util.*;

/**
 * This interface defines the functions which are needed for building
 * new index structures.
 *
 * @author BaseX Team 2005-17, BSD License
 * @author Christian Gruen
 */
public abstract class IndexBuilder extends Job {
  /** Performance. */
  private final Performance perf = new Performance();

  /** Data reference. */
  protected final Data data;
  /** Total parsing value. */
  protected final int size;
  /** Index type. */
  protected final IndexType type;
  /** Text node flag. */
  protected final boolean text;

  /** Number of index operations to perform before writing a partial index to disk. */
  private final int splitSize;
  /** Maximum memory to consume. */
  private final long maxMem = (long) (Runtime.getRuntime().maxMemory() * 0.8);

  /** Names and namespace uri of element or attributes to include. */
  private final IndexNames includeNames;

  /** Current pre value. */
  protected int pre;
  /** Total number of index operations (may get pretty large). */
  protected long count;
  /** Number of partial index structures. */
  protected int splits;
  /** Threshold for freeing memory when estimating main memory consumption. */
  private int gcCount;

  /**
   * Constructor.
   * @param data reference
   * @param type index type
   */
  protected IndexBuilder(final Data data, final IndexType type) {
    this.data = data;
    this.type = type;
    splitSize = (int) Math.min(Integer.MAX_VALUE, (long) data.meta.splitsize * splitFactor());
    size = data.meta.size;
    includeNames = new IndexNames(type, data);
    text = type == IndexType.TEXT || type == IndexType.FULLTEXT;

    // run garbage collection if memory maximum is already reached
    if(Performance.memory() >= maxMem) clean();
  }

  /**
   * Builds the index structure and returns an index instance.
   * @return index instance
   * @throws IOException I/O Exception
   */
  public abstract ValueIndex build() throws IOException;

  /**
   * Checks if the command was interrupted, and prints some debug output.
   * @throws IOException I/O Exception
   */
  @SuppressWarnings("unused")
  protected void check() throws IOException {
    checkStop();
    if(Prop.debug && (pre & 0x1FFFFF) == 0) Util.err(".");
  }

  /**
   * Checks if the current entry should be indexed.
   * @return result of check
   */
  protected final boolean indexEntry() {
    return data.kind(pre) == (text ? Data.TEXT : Data.ATTR) && includeNames.contains(pre, text);
  }

  /**
   * Decides whether in-memory temporary index structures are so large
   * that we must flush them to disk before continuing.
   * @return true if structures shall be flushed to disk
   * @throws IOException I/O Exception
   */
  protected final boolean splitRequired() throws IOException {
    // checks if a fixed split size has been specified
    final boolean split;
    if(splitSize > 0) {
      split = count >= (splits + 1L) * splitSize;
    } else {
      // if not, estimate how much main memory is left
      split = Performance.memory() >= maxMem;
      // stop operation if index splitting degenerates
      int gc = gcCount;
      if(split) {
        if(gc >= 0) throw new BaseXException(OUT_OF_MEM + H_OUT_OF_MEM);
        gc = 30;
      } else {
        gc = Math.max(-1, gc - 1);
      }
      gcCount = gc;
    }
    if(split && Prop.debug) Util.err("|");
    return split;
  }

  /**
   * Performs memory cleanup after writing partial memory if necessary.
   */
  protected final void clean() {
    if(splitSize <= 0) Performance.gc(2);
  }

  /**
   * Prints some final debugging information.
   */
  protected final void finishIndex() {
    if(!Prop.debug) return;

    final StringBuilder sb = new StringBuilder();
    sb.append(' ').append((count / 10000) / 100.0d).append(" M operations, ");
    sb.append(perf).append(" (").append(Performance.getMemory()).append(").");
    if(splits > 1 && splitSize <= 0) {
      sb.append(" Recommended ").append(MainOptions.SPLITSIZE.name()).append(": ");
      sb.append((int) Math.ceil(((double) count / splits) / splitFactor())).append('.');
    }
    Util.errln(sb);
  }

  /**
   * Returns the split factor dependent on the index type.
   * The following values are returned:
   * <ul>
   *   <li> Full-text index: 1'000'000</li>
   *   <li> Other value indexes: 100'000</li>
   * </ul>
   * @return split factor
   */
  private int splitFactor() {
    return type == IndexType.FULLTEXT ? 1000000 : 100000;
  }

  @Override
  public final String shortInfo() {
    return CREATING_INDEXES;
  }

  @Override
  public final double progressInfo() {
    return pre / (size + (splits > 0 ? size / 50.0d : 0.0d));
  }

  @Override
  public final String detailedInfo() {
    switch(type) {
      case TEXT: return INDEX_TEXTS_D;
      case ATTRIBUTE: return INDEX_ATTRIBUTES_D;
      case TOKEN: return INDEX_TOKENS_D;
      case FULLTEXT: return INDEX_FULLTEXT_D;
      default: throw Util.notExpected();
    }
  }
}