/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cn.ac.ncic.mastiff.io.coding;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
//import org.apache.hadoop.hive.mastiff.StreamName;
//import org.apache.hadoop.hive.mastiff.ORCStringecnodingUtil.MyVisitor;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import FlexibleEncoding.ORC.BufferedStream;
import FlexibleEncoding.ORC.DynamicByteArray;
import FlexibleEncoding.ORC.DynamicIntArray;
import FlexibleEncoding.ORC.InStream;
import FlexibleEncoding.ORC.IntegerReader;
import FlexibleEncoding.ORC.IntegerWriter;
import FlexibleEncoding.ORC.OrcProto;
import FlexibleEncoding.ORC.OutStream;
import FlexibleEncoding.ORC.PositionedOutputStream;
import FlexibleEncoding.ORC.RedBlackTree;
import FlexibleEncoding.ORC.RunLengthIntegerReader;
import FlexibleEncoding.ORC.RunLengthIntegerReaderV2;
import FlexibleEncoding.ORC.RunLengthIntegerWriter;
import FlexibleEncoding.ORC.RunLengthIntegerWriterV2;
import FlexibleEncoding.ORC.StreamName;
import FlexibleEncoding.ORC.StringRedBlackTree;
import FlexibleEncoding.ORC.TestInStream;
import FlexibleEncoding.ORC.TestStringRedBlackTree;
public class ORCStringEcnodingUtil {
private static HashMap<Integer,String> hashMap=new HashMap<Integer,String>() ;
private final ArrayList<Integer> arrayList=new ArrayList<Integer>() ;
private static final int INITIAL_DICTIONARY_SIZE = 4096;
public OutStream stringOutput;
public IntegerWriter lengthOutput;
public IntegerWriter rowOutput;
public StringRedBlackTree dictionary =new StringRedBlackTree(INITIAL_DICTIONARY_SIZE);
private final boolean isDirectV2 = true;
public DynamicIntArray rows = new DynamicIntArray();
public int[] dumpOrder ;
private int currentId=0;
public int dictionarySize=0;
public DynamicByteArray dictionaryBuffer;
public int[] dictionaryOffsets;
private IntegerReader reader;
private final StringRedBlackTree tree = new StringRedBlackTree(5);
public final TestInStream.OutputCollector collect1 = new TestInStream.OutputCollector();
public TestInStream.OutputCollector collect2 = new TestInStream.OutputCollector();
public TestInStream.OutputCollector collect3 = new TestInStream.OutputCollector();
/**
* Checks the validity of the entire tree. Also ensures that the number of
* nodes visited is the same as the size of the set.
*/
public void checkTree(StringRedBlackTree tree) throws IOException {
IntWritable count = new IntWritable(0);
if (tree.isRed(tree.root)) {
printTree(tree, "", tree.root);
throw new IllegalStateException("root is red");
}
checkSubtree(tree, tree.root, count);
if (count.get() != tree.size) {
printTree(tree, "", tree.root);
throw new IllegalStateException("Broken tree! visited= " + count.get() +
" size=" + tree.size);
}
}
void printTree(RedBlackTree tree, String indent, int node
) throws IOException {
if (node == RedBlackTree.NULL) {
System.err.println(indent + "NULL");
} else {
System.err.println(indent + "Node " + node + " color " +
(tree.isRed(node) ? "red" : "black"));
printTree(tree, indent + " ", tree.getLeft(node));
printTree(tree, indent + " ", tree.getRight(node));
}
}
/**
* Checks the red-black tree rules to make sure that we have correctly built
* a valid tree.
*
* Properties:
* 1. Red nodes must have black children
* 2. Each node must have the same black height on both sides.
*
* @param node The id of the root of the subtree to check for the red-black
* tree properties.
* @return The black-height of the subtree.
*/
public int checkSubtree(RedBlackTree tree, int node, IntWritable count
) throws IOException {
if (node == RedBlackTree.NULL) {
return 1;
}
count.set(count.get() + 1);
boolean is_red = tree.isRed(node);
int left = tree.getLeft(node);
int right = tree.getRight(node);
if (is_red) {
if (tree.isRed(left)) {
printTree(tree, "", tree.root);
throw new IllegalStateException("Left node of " + node + " is " + left +
" and both are red.");
}
if (tree.isRed(right)) {
printTree(tree, "", tree.root);
throw new IllegalStateException("Right node of " + node + " is " +
right + " and both are red.");
}
}
int left_depth = checkSubtree(tree, left, count);
int right_depth = checkSubtree(tree, right, count);
if (left_depth != right_depth) {
printTree(tree, "", tree.root);
throw new IllegalStateException("Lopsided tree at node " + node +
" with depths " + left_depth + " and " + right_depth);
}
if (is_red) {
return left_depth;
} else {
return left_depth + 1;
}
}
void checkContents(StringRedBlackTree tree, int[] order,
String... params
) throws IOException {
tree.visit(new MyVisitor(params, order));
}
void checkContents(StringRedBlackTree tree) throws IOException {
tree.visit(new MyVisitor(null, null));
}
StringRedBlackTree buildTree(String... params) throws IOException {
StringRedBlackTree result = new StringRedBlackTree(1000);
for(String word: params) {
result.add(word);
checkTree(result);
}
return result;
}
private class MyVisitor implements StringRedBlackTree.Visitor {
private final String[] words;
private final int[] order;
private final DataOutputBuffer buffer = new DataOutputBuffer();
int current = 0;
MyVisitor(String[] args, int[] order) {
words = args;
this.order = order;
}
public void visit(StringRedBlackTree.VisitorContext context
) throws IOException {
String word = context.getText().toString();
int tmp=context.getOriginalPosition();
context.writeBytes(stringOutput);
lengthOutput.write(context.getLength());
dumpOrder[context.getOriginalPosition()] = currentId++;
current += 1;
}
}
public void iterator() throws IOException{
checkContents(dictionary);
}
public OutStream createStream(int column,
OrcProto.Stream.Kind kind
) throws IOException {
FlexibleEncoding.ORC.StreamName name = new FlexibleEncoding.ORC.StreamName(column, kind);
BufferedStream result = null ;
if (result == null) {
result = new BufferedStream(name.toString(),INITIAL_DICTIONARY_SIZE, null);
}
return result.outStream;
}
public IntegerWriter createIntegerWriter(PositionedOutputStream output,
boolean signed, boolean isDirectV2) {
if (isDirectV2) {
return new RunLengthIntegerWriterV2(output, signed);
} else {
return new RunLengthIntegerWriter(output, signed);
}
}
public void add(String str) throws IOException{
checkTree(dictionary);
rows.add(dictionary.add(str));
}
public void init() throws IOException{
stringOutput=new OutStream("test1", 1000, null, collect1) ;
lengthOutput=new RunLengthIntegerWriterV2(
new OutStream("test2", 1000, null, collect2), false);
rowOutput =new RunLengthIntegerWriterV2(
new OutStream("test3", 1000, null, collect3), false);
// stringOutput = createStream(0,
// OrcProto.Stream.Kind.DICTIONARY_DATA);
//
// lengthOutput = createIntegerWriter(createStream(1,
// OrcProto.Stream.Kind.LENGTH), false, isDirectV2);
// rowOutput = createIntegerWriter(createStream(2,
// OrcProto.Stream.Kind.DATA), false, isDirectV2);
}
public void flush() throws IOException{
System.out.println("293 "+stringOutput.getBufferSize()); ;
//BufferedStream bfs= (BufferedStream) stringOutput.receiver;
stringOutput.flush();
lengthOutput.flush();
rowOutput.flush();
//directStreamOutput.flush();
//directLengthOutput.flush();
// reset all of the fields to be ready for the next stripe.
// dictionary.clear();
// rows.clear();
// stringOutput.clear();
}
public void rowoutPut() throws IOException{
for(int i=0;i<rows.size();i++){
rowOutput.write(dumpOrder[rows.get(i)]);
}
}
public void readerInit() throws IOException{
FlexibleEncoding.ORC.StreamName name = new FlexibleEncoding.ORC.StreamName(0,
OrcProto.Stream.Kind.DICTIONARY_DATA);
// InStream in = streams.get(name);
ByteBuffer inBuf1 = ByteBuffer.allocate(collect1.buffer.size());
collect1.buffer.setByteBuffer(inBuf1, 0, collect1.buffer.size());
inBuf1.flip();
InStream in = InStream.create
("test1", inBuf1, null, dictionarySize) ;
if (in.available() > 0) {
dictionaryBuffer = new DynamicByteArray(64, in.available());
dictionaryBuffer.readAll(in);
in.close();
// read the lengths google proto buffer
name = new StreamName(1, OrcProto.Stream.Kind.LENGTH);
// in = streams.get(name);
ByteBuffer inBuf2 = ByteBuffer.allocate(collect2.buffer.size());
collect2.buffer.setByteBuffer(inBuf2, 0, collect2.buffer.size());
inBuf2.flip();
in = InStream.create
("test2", inBuf2, null, dictionarySize) ;
// IntegerReader lenReader = createIntegerReader(encodings.get(columnId)
// .getKind(), in, false);
IntegerReader lenReader = createIntegerReader(OrcProto.ColumnEncoding.Kind.DIRECT_V2, in, false);
int offset = 0;
dictionaryOffsets = new int[dictionarySize + 1];
for(int i=0; i < dictionarySize; ++i) {
dictionaryOffsets[i] = offset;
offset += (int) lenReader.next();
}
dictionaryOffsets[dictionarySize] = offset;
in.close();
name = new FlexibleEncoding.ORC.StreamName(2, OrcProto.Stream.Kind.DATA);
ByteBuffer inBuf3 = ByteBuffer.allocate(collect3.buffer.size());
collect3.buffer.setByteBuffer(inBuf3, 0, collect3.buffer.size());
inBuf3.flip();
in = InStream.create
("test3", inBuf3, null, dictionarySize) ;
reader = createIntegerReader(OrcProto.ColumnEncoding.Kind.DIRECT_V2,
in, false);
}
}
public String readEachValue(Text previous) throws IOException{
Text result = null;
int entry = (int) reader.next();
if (previous == null) {
result = new Text();
} else {
result = (Text) previous;
}
int offset = dictionaryOffsets[entry];
int length;
// if it isn't the last entry, subtract the offsets otherwise use
// the buffer length.
if (entry < dictionaryOffsets.length - 1) {
length = dictionaryOffsets[entry + 1] - offset;
} else {
length = dictionaryBuffer.size() - offset;
}
// If the column is just empty strings, the size will be zero,
// so the buffer will be null, in that case just return result
// as it will default to empty
if (dictionaryBuffer != null) {
dictionaryBuffer.setText(result, offset, length);
} else {
result.clear();
}
return result.toString();
}
public IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
InStream in,
boolean signed) throws IOException {
switch (kind) {
case DIRECT_V2:
case DICTIONARY_V2:
return new RunLengthIntegerReaderV2(in, signed);
case DIRECT:
case DICTIONARY:
return new RunLengthIntegerReader(in, signed);
default:
throw new IllegalArgumentException("Unknown encoding " + kind);
}
}
public void foreach() throws IOException{
for(int i=0;i<rows.size();i++){
System.out.println("result "+readEachValue(null));
}
}
public static void main(String[] args) throws Exception {
ORCStringEcnodingUtil test = new ORCStringEcnodingUtil();
// test.test1();
// test.dumpOrder = new int[test.dictionary.size()];
// test.dictionarySize=dictionary.size();
test.init();
test.iterator();
test.rowoutPut();
test.flush();
test.readerInit();
test.foreach();
}
}