RowNodeWriter.java example

Explorer
pbase-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.codec.prefixtree.encode.row;

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.ByteRangeUtils;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.util.vint.UVIntTool;

/**
 * Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
 * Please see the write() method for the order in which data is written.
 */
@InterfaceAudience.Private
public class RowNodeWriter{
  protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);

  /********************* fields ******************************/

  protected PrefixTreeEncoder prefixTreeEncoder;
  protected PrefixTreeBlockMeta blockMeta;
  protected TokenizerNode tokenizerNode;

  protected int tokenWidth;
  protected int fanOut;
  protected int numCells;

  protected int width;


  /*********************** construct *************************/

  public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
    reconstruct(keyValueBuilder, tokenizerNode);
  }

  public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
    this.prefixTreeEncoder = prefixTreeEncoder;
    reset(tokenizerNode);
  }

  public void reset(TokenizerNode node) {
    this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
    this.tokenizerNode = node;
    this.tokenWidth = 0;
    this.fanOut = 0;
    this.numCells = 0;
    this.width = 0;
    calculateOffsetsAndLengths();
  }


  /********************* methods ****************************/

  protected void calculateOffsetsAndLengths(){
    tokenWidth = tokenizerNode.getTokenLength();
    if(!tokenizerNode.isRoot()){
      --tokenWidth;//root has no parent
    }
    fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
    numCells = tokenizerNode.getNumOccurrences();
  }

  public int calculateWidth(){
    calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
    return width;
  }

  public int calculateWidthOverrideOffsetWidth(int offsetWidth){
    width = 0;
    width += UVIntTool.numBytes(tokenWidth);
    width += tokenWidth;

    width += UVIntTool.numBytes(fanOut);
    width += fanOut;

    width += UVIntTool.numBytes(numCells);

    if(tokenizerNode.hasOccurrences()){
      int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
        + blockMeta.getQualifierOffsetWidth()
        + blockMeta.getTagsOffsetWidth()
        + blockMeta.getTimestampIndexWidth()
        + blockMeta.getMvccVersionIndexWidth()
        + blockMeta.getKeyValueTypeWidth()
        + blockMeta.getValueOffsetWidth()
        + blockMeta.getValueLengthWidth();
      width += numCells * fixedBytesPerCell;
    }

    if( ! tokenizerNode.isLeaf()){
      width += fanOut * offsetWidth;
    }

    return width;
  }


  /*********************** writing the compiled structure to the OutputStream ***************/

  public void write(OutputStream os) throws IOException{
    //info about this row trie node
    writeRowToken(os);
    writeFan(os);
    writeNumCells(os);

    //UFInt indexes and offsets for each cell in the row (if nub or leaf)
    writeFamilyNodeOffsets(os);
    writeQualifierNodeOffsets(os);
    writeTagNodeOffsets(os);
    writeTimestampIndexes(os);
    writeMvccVersionIndexes(os);
    writeCellTypes(os);
    writeValueOffsets(os);
    writeValueLengths(os);
    //offsets to the children of this row trie node (if branch or nub)
    writeNextRowTrieNodeOffsets(os);
  }


  /**
   * Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
   * fields can reproduce all the row keys that compose the block.
   */

  /**
   * UVInt: tokenWidth
   * bytes: token
   */
  protected void writeRowToken(OutputStream os) throws IOException {
    UVIntTool.writeBytes(tokenWidth, os);
    int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
    ByteRangeUtils.write(os, tokenizerNode.getToken(), tokenStartIndex);
  }

  /**
   * UVInt: numFanBytes/fanOut
   * bytes: each fan byte
   */
  public void writeFan(OutputStream os) throws IOException {
    UVIntTool.writeBytes(fanOut, os);
    if (fanOut <= 0) {
      return;
    }
    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
    for (int i = 0; i < children.size(); ++i) {
      TokenizerNode child = children.get(i);
      os.write(child.getToken().get(0));// first byte of each child's token
    }
  }

  /**
   * UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
   */
  protected void writeNumCells(OutputStream os) throws IOException {
    UVIntTool.writeBytes(numCells, os);
  }


  /**
   * The following methods write data for each cell in the row, mostly consisting of indexes or
   * offsets into the timestamp/column data structures that are written in the middle of the block.
   * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
   * search of a particular column/timestamp combination.
   * <p/>
   * Branch nodes will not have any data in these sections.
   */

  protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
    if (blockMeta.getFamilyOffsetWidth() <= 0) {
      return;
    }
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
          .getFirstInsertionIndex() + i : 0;
      int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
        cellInsertionIndex);
      int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
        sortedIndex);
      UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
    }
  }

  protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
    if (blockMeta.getQualifierOffsetWidth() <= 0) {
      return;
    }
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
      int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
        cellInsertionIndex);
      int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
        sortedIndex);
      UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
    }
  }

  protected void writeTagNodeOffsets(OutputStream os) throws IOException {
    if (blockMeta.getTagsOffsetWidth() <= 0) {
      return;
    }
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
      int sortedIndex = prefixTreeEncoder.getTagSorter().getSortedIndexForInsertionId(
        cellInsertionIndex);
      int indexedTagOffset = prefixTreeEncoder.getTagWriter().getOutputArrayOffset(
        sortedIndex);
      UFIntTool.writeBytes(blockMeta.getTagsOffsetWidth(), indexedTagOffset, os);
    }
  }

  protected void writeTimestampIndexes(OutputStream os) throws IOException {
    if (blockMeta.getTimestampIndexWidth() <= 0) {
      return;
    }
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
      long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
      int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
      UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
    }
  }

  protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
    if (blockMeta.getMvccVersionIndexWidth() <= 0) {
      return;
    }
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
      long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
      int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
      UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
    }
  }

  protected void writeCellTypes(OutputStream os) throws IOException {
    if (blockMeta.isAllSameType()) {
      return;
    }
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
      os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
    }
  }

  protected void writeValueOffsets(OutputStream os) throws IOException {
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
      long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
      UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
    }
  }

  protected void writeValueLengths(OutputStream os) throws IOException {
    for (int i = 0; i < numCells; ++i) {
      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
      int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
      UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
    }
  }

  /**
   * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
   */
  protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
    for (int i = 0; i < children.size(); ++i) {
      TokenizerNode child = children.get(i);
      int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
      UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
    }
  }
}