/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import com.google.common.collect.Lists;
/**
* Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
* key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
* also contains references to offsets in the other sections of the data block that enable the
* decoder to match a row key with its qualifier, timestamp, type, value, etc.
* <p>
* The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
* internals of each row node.
*/
@InterfaceAudience.Private
public class RowSectionWriter {
/***************** fields **************************/
protected PrefixTreeEncoder prefixTreeEncoder;
protected PrefixTreeBlockMeta blockMeta;
protected int numBytes;
protected ArrayList<TokenizerNode> nonLeaves;
protected ArrayList<TokenizerNode> leaves;
protected ArrayList<RowNodeWriter> leafWriters;
protected ArrayList<RowNodeWriter> nonLeafWriters;
protected int numLeafWriters;
protected int numNonLeafWriters;
/********************* construct **********************/
public RowSectionWriter() {
this.nonLeaves = Lists.newArrayList();
this.leaves = Lists.newArrayList();
this.leafWriters = Lists.newArrayList();
this.nonLeafWriters = Lists.newArrayList();
}
public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
reconstruct(prefixTreeEncoder);
}
public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
this.prefixTreeEncoder = prefixTreeEncoder;
this.blockMeta = prefixTreeEncoder.getBlockMeta();
reset();
}
public void reset() {
numBytes = 0;
nonLeaves.clear();
leaves.clear();
numLeafWriters = 0;
numNonLeafWriters = 0;
}
/****************** methods *******************************/
public RowSectionWriter compile() {
blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
// track the starting position of each node in final output
int negativeIndex = 0;
// create leaf writer nodes
// leaf widths are known at this point, so add them up
int totalLeafBytes = 0;
for (int i = leaves.size() - 1; i >= 0; --i) {
TokenizerNode leaf = leaves.get(i);
RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
++numLeafWriters;
// leaves store all but their first token byte
int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
totalLeafBytes += leafNodeWidth;
negativeIndex += leafNodeWidth;
leaf.setNegativeIndex(negativeIndex);
}
int totalNonLeafBytesWithoutOffsets = 0;
int totalChildPointers = 0;
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
++numNonLeafWriters;
totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
totalChildPointers += nonLeaf.getNumChildren();
}
// figure out how wide our offset FInts are
int offsetWidth = 0;
while (true) {
++offsetWidth;
int offsetBytes = totalChildPointers * offsetWidth;
int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
// it fits
numBytes = totalRowBytes;
break;
}
}
blockMeta.setNextNodeOffsetWidth(offsetWidth);
// populate negativeIndexes
for (int i = nonLeaves.size() - 1; i >= 0; --i) {
TokenizerNode nonLeaf = nonLeaves.get(i);
int writerIndex = nonLeaves.size() - i - 1;
RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
int nodeWidth = nonLeafWriter.calculateWidth();
negativeIndex += nodeWidth;
nonLeaf.setNegativeIndex(negativeIndex);
}
return this;
}
protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
TokenizerNode builderNode) {
RowNodeWriter rowNodeWriter = null;
//check if there is an existing node we can recycle
if (index >= list.size()) {
//there are not enough existing nodes, so add a new one which will be retrieved below
list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
}
rowNodeWriter = list.get(index);
rowNodeWriter.reset(builderNode);
return rowNodeWriter;
}
public void writeBytes(OutputStream os) throws IOException {
for (int i = numNonLeafWriters - 1; i >= 0; --i) {
RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
nonLeafWriter.write(os);
}
// duplicates above... written more for clarity right now
for (int i = numLeafWriters - 1; i >= 0; --i) {
RowNodeWriter leafWriter = leafWriters.get(i);
leafWriter.write(os);
}
}
/***************** static ******************************/
protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
ArrayList<TokenizerNode> ins, boolean leaves) {
ArrayList<TokenizerNode> outs = Lists.newArrayList();
for (int i = ins.size() - 1; i >= 0; --i) {
TokenizerNode n = ins.get(i);
if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
outs.add(ins.get(i));
}
}
return outs;
}
/************* get/set **************************/
public int getNumBytes() {
return numBytes;
}
public ArrayList<TokenizerNode> getNonLeaves() {
return nonLeaves;
}
public ArrayList<TokenizerNode> getLeaves() {
return leaves;
}
public ArrayList<RowNodeWriter> getNonLeafWriters() {
return nonLeafWriters;
}
public ArrayList<RowNodeWriter> getLeafWriters() {
return leafWriters;
}
}