package FlexibleEncoding.Parquet;
/*
* this is used for test various encoding from Parquet and ORC file . also used to test some new combinations encodings of basic encoding ways from Parquet and ORC ,which is combineed by wangmeng
* @author wangmeng
*/
import static org.junit.Assert.assertEquals;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class TestOnlyByteDictionaryRLEBitPackingZIgzar {
static java.util.Calendar c=java.util.Calendar.getInstance();
static java.text.SimpleDateFormat f=new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒");
public static int fileLong =0 ,count =0 ,runcount=0 ,DictionarySize;
public static long startTime ,encodingReadTime ,encodingTime,encodingWriteTime, RencodingReadTime ,RencodingTime,RencodingWriteTime, finalTime;
public static long DeltaencodingReadTime=0 ,DeltaencodingTime,DeltaencodingWriteTime=0, DeltaRencodingReadTime=0 ,DeltaRencodingTime=0,DeltaRencodingWriteTime=0;
public static void testByteDictionary(String[] s) throws IOException {
// System.out.println("ByteOnlyDictionaryRLEBitPackingZIgzar : begin write to DictionaryRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime()));
encodingReadTime=System.currentTimeMillis() ;
File file=new File(s[0]) ;
FileInputStream fis =new FileInputStream(file);
DataInputStream dis=new DataInputStream(fis);
fileLong=(int) file.length();
byte[] initbytes=new byte[fileLong] ;
// for (int i = 0; i < fileLong; i++) {
// initbytes[i]=dis.readByte() ;
// }
dis.readFully(initbytes);
fis.close();
dis.close();
encodingTime=System.currentTimeMillis() ;
DeltaencodingReadTime=DeltaencodingReadTime+encodingTime-encodingReadTime ;
final OnlyDictionaryValuesWriter.PlainBinaryDictionaryValuesWriter cw = new OnlyDictionaryValuesWriter.PlainBinaryDictionaryValuesWriter(Integer.MAX_VALUE, fileLong);
//byte[] tmpByte=new byte[1];
//tmpByte[0]=initbytes[0];
// Binary binary=Binary.fromByteArray(tmpByte) ;
//
// cw.writeBytes(binary) ;
long tmps=System.currentTimeMillis();
for (int i = 0; i < fileLong; i++) {
// cw.writeBytes(binary.copyBianry(initbytes[i])) ;
// cw.writeBytes(v); ;
//Binary.fromString(prefix + i % 10)
// cw.writeBytes(v);
// Binary tmp= Binary.fromString("" +initbytes[i]);
// Binary.fromByteArray(initbytes[i]);
// tmpByte[0]=initbytes[i];
// Binary b=Binary.fromByteArray(tmpByte) ;
// b.
//binary.copyBianry(initbytes[i])
// System.out.println(initbytes[i]);
cw.writeBytes(Binary.fromString("" +initbytes[i]));
}
System.out.println("time 1 "+(System.currentTimeMillis()-tmps));
BytesInput bi=cw.getBytes() ;
encodingWriteTime=System.currentTimeMillis() ;
String[] str =new String[s.length-1];
for(int i=0 ;i<s.length-1;i++){
str[i]=s[i+1];
}
long tmp= cw.WriteDictionaryToDisk(s);
long tmp0=bi.writeToDisk(str);
DeltaencodingTime=DeltaencodingTime+encodingWriteTime-encodingTime +tmp +tmp0;
RencodingReadTime=System.currentTimeMillis() ;
DeltaencodingWriteTime=DeltaencodingWriteTime+RencodingReadTime-encodingWriteTime-tmp-tmp0 ;
// System.out.println("ByteOnlyDictionaryRLEBitPackingZIgzar : finish write to DictionaryRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime()));
//System.out.println("ByteDictionaryRLEBitPackingZIgzar: begin read from DictionaryRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime()));
byte[] bytes=getDictionaryIdBytes(s);
byte[] dictionaryBytes=getDictionaryBytes(s[3]);
RencodingTime=System.currentTimeMillis() ;
DeltaRencodingReadTime=DeltaRencodingReadTime+RencodingTime-RencodingReadTime;
DictionaryValuesReader cr = initDicReader(DictionarySize,dictionaryBytes,PrimitiveType.PrimitiveTypeName.BINARY);
cr.initFromPage(fileLong, bytes, 0);
// int count=0 ;
byte[] result=new byte[fileLong] ;
//byte[] b ;
// Binary m ;
// byte[] b ;
// Binary m ;
long tmp2 =System.currentTimeMillis() ;
for (int i = 0; i < fileLong; i++) {
//int back = cr.readInteger();
// m=cr.readBytes();
// b=m.getBytes();
// result[i]=b[0] ;
result[i]=Byte.parseByte(cr.readBytes().toStringUsingUTF8());
//System.out.println(cr.readBytes().toStringUsingUTF8());
// System.out.println(result[i]);
count ++ ;
}
System.out.println("time2 "+(System.currentTimeMillis()-tmp2));
// for (int i = 0; i < fileLong; i++) {
// //int back = cr.readInteger();
// // m=cr.readBytes();
// // b=m.getBytes();
// // result[i]=cr.readByte() ;
//
// System.out.println(result[i]);
// count ++ ;
// }
RencodingWriteTime=System.currentTimeMillis() ;
DeltaRencodingTime=DeltaRencodingTime+RencodingWriteTime-RencodingTime;
FileOutputStream revrseencodingFis=new FileOutputStream(new File(s[4])) ;
DataOutputStream revrseencodingDos=new DataOutputStream(revrseencodingFis) ;
revrseencodingDos.write(result);
revrseencodingFis.close();
revrseencodingDos.close();
finalTime=System.currentTimeMillis() ;
DeltaRencodingWriteTime=DeltaRencodingWriteTime+finalTime-RencodingWriteTime;
// System.out.println("total count "+ count);
// System.out.println("ByteOnlyDictionaryRLEBitPackingZIgzar : finish read from DictionaryRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime()));
}
private static byte[] getDictionaryIdBytes(String[] s) throws IOException{
// BytesInput bit=cw.getBIdytes();
// System.out.println("67 "+bit.size());
// byte[] byt= bit.toByteArray();////inner write data to disk wm
File file=new File(s[1]);
DataInputStream dis =new DataInputStream(new FileInputStream(file));
byte[] byt=new byte[(int) file.length()];
// System.out.println(dis.readInt());
// System.out.println(dis.readInt());
dis.readFully(byt);
dis.close();
return byt;
}
private static byte[] getDictionaryBytes(String s) throws IOException{
File file =new File(s);
DataInputStream dis =new DataInputStream(new FileInputStream(file));
byte[] bytes=new byte[(int) (file.length()-4)];
//System.out.println(file.length()-4);
DictionarySize = dis.readInt();
dis.readFully(bytes);
dis.close();
return bytes;
}
private static DictionaryValuesReader initDicReader(int DictionarySize ,byte[] bytes, PrimitiveType.PrimitiveTypeName type)
throws IOException {
//need know dictionary size(50) dictionaryBytesSize(50*4) dictionary BytsInput wm
// final DictionaryPage dictionaryPage = cw.createDictionaryPage().copy();
// new DictionaryPage(dictionaryEncoder.getBytes(), lastUsedDictionarySize, Encoding.PLAIN_DICTIONARY);
// final DictionaryPage dictionaryPage = cw.createDictionaryPage().copy() ;
// File file =new File(s[3]);
// DataInputStream dis =new DataInputStream(new FileInputStream(file));
// byte[] bytes=new byte[(int) (file.length()-4)];
// //System.out.println(file.length()-4);
//
// int DictionarySize = dis.readInt();
// dis.readFully(bytes);
// dis.close();
CapacityByteArrayOutputStream cbs=new CapacityByteArrayOutputStream(bytes.length);
cbs.write(bytes, 0, bytes.length);
BytesInput bytesInput =new BytesInput.CapacityBAOSBytesInput(cbs) ;
// DictionaryPage dictionaryPage =new DictionaryPage(200,50,Encoding.PLAIN_DICTIONARY);
DictionaryPage dictionaryPage =new DictionaryPage( bytesInput,DictionarySize,Encoding.PLAIN_DICTIONARY);
// System.out.println(bytesInput.toByteArray().length);
final ColumnDescriptor descriptor = new ColumnDescriptor(new String[] {"foo"}, type, 0, 0);
Encoding encoding=Encoding.PLAIN_DICTIONARY ;
final Dictionary dictionary = encoding.initDictionary(descriptor, dictionaryPage);
final DictionaryValuesReader cr = new DictionaryValuesReader(dictionary);
return cr;
}
private void roundTripInt(DictionaryValuesWriter cw, ValuesReader reader, int maxDictionaryByteSize) throws IOException {
int fallBackThreshold = maxDictionaryByteSize / 4;
for (int i = 0; i < 100; i++) {
cw.writeInteger(i);
if (i < fallBackThreshold) {
// assertEquals(cw.getEncoding(), PLAIN_DICTIONARY);
} else {
// assertEquals(cw.getEncoding(), PLAIN);
}
}
reader.initFromPage(100, cw.getBytes().toByteArray(), 0);
for (int i = 0; i < 100; i++) {
// assertEquals(i, reader.readInteger());
}
}
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
File sourcefileDirectory =new File(args[0]);
File[] sourcefiles= sourcefileDirectory.listFiles() ;
int length=sourcefiles.length;
String[] sourcestr=new String[length];
String[] encodinstr=new String[length];
String[] dictionaryStr=new String[length];
String[] revrseEncodingstr=new String[length];
for (int i=0;i<sourcefiles.length;i++){
sourcestr[i]=sourcefiles[i].getAbsolutePath() ;
encodinstr[i]=args[1]+"/"+i ;
dictionaryStr[i]=args[3]+"/"+i ;
revrseEncodingstr[i]=args[4]+"/"+i ;
// System.out.println(finalstr[i]);
}
String[] runstr=new String[5] ;
runstr[2]=args[2];
startTime=System.currentTimeMillis() ;
System.out.println("ORCRLE : begin ORCRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime()));
for (int i=0;i<length;i++){
runstr[0]=sourcestr[i];
runstr[1]=encodinstr[i] ;
runstr[3]=dictionaryStr[i];
runstr[4]=revrseEncodingstr[i] ;
testByteDictionary(runstr);
runcount++ ;
}
System.out.println("ORCRLE : finish ORCRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime()));
System.out.println("totoal run :"+runcount+" times");
System.out.println("totoal valuecount :"+count);
//DeltaencodingReadTime=0 ,DeltaencodingTime,DeltaencodingWriteTime=0, DeltaRencodingReadTime=0 ,DeltaRencodingTime=0,DeltaRencodingWriteTime=0;
System.out.println("DeltaencodingReadTime :"+DeltaencodingReadTime+" mis");
System.out.println("DeltaencodingTime :"+DeltaencodingTime+" mis");
System.out.println("DeltaencodingWriteTime :"+DeltaencodingWriteTime+" mis");
System.out.println("DeltaRencodingReadTime :"+DeltaRencodingReadTime+" mis");
System.out.println("DeltaRencodingTime :"+DeltaRencodingTime+" mis");
System.out.println("DeltaRencodingWriteTime :"+DeltaRencodingWriteTime+" mis");
System.out.println("total time : "+(finalTime-startTime)+" mis");
long encodingtotalLong=0 , revrsetotalLong=0 ;
for(int j=0 ;j<length;j++){
revrsetotalLong= revrsetotalLong+new File(revrseEncodingstr[j]).length();
encodingtotalLong=encodingtotalLong+new File(encodinstr[j]).length()+new File(dictionaryStr[j]).length();
}
System.out.println("encodingtotalLong : "+encodingtotalLong+" /1024/1024 "+encodingtotalLong/1024/1024);
System.out.println("revrsetotalLong : "+revrsetotalLong+" /1024/1024 "+revrsetotalLong/1024/1024);
}
}