package FlexibleEncoding.Parquet; /* * this is used for test various encoding from Parquet and ORC file . also used to test some new combinations encodings of basic encoding ways from Parquet and ORC ,which is combineed by wangmeng * @author wangmeng */ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.Random; public class TestDeltaBytesEncoding { public static int blockSize = 128; public static int miniBlockNum = 4; public static byte[] deltaBytes; public static byte[] rleBytes; //public static byte[] data; public static int fileLong =0 ,count =0 ,runcount=0 ; public static long startTime ,encodingReadTime ,encodingTime,encodingWriteTime, RencodingReadTime ,RencodingTime,RencodingWriteTime, finalTime; public static long DeltaencodingReadTime=0 ,DeltaencodingTime,DeltaencodingWriteTime=0, DeltaRencodingReadTime=0 ,DeltaRencodingTime=0,DeltaRencodingWriteTime=0; static java.util.Calendar c=java.util.Calendar.getInstance(); static java.text.SimpleDateFormat f=new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒"); public static void prepare(String[] s) throws IOException { // System.out.println("bytedelta: begin write to deltaencoding : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime())); encodingReadTime=System.currentTimeMillis() ; File file=new File(s[0]) ; FileInputStream fis =new FileInputStream(file); DataInputStream dis=new DataInputStream(fis); fileLong=(int) file.length(); //System.out.println("fileLong "+fileLong); byte[] data = new byte[(int) file.length()]; dis.readFully(data); fis.close(); dis.close(); encodingTime=System.currentTimeMillis() ; DeltaencodingReadTime=DeltaencodingReadTime+encodingTime-encodingReadTime ; // ValuesWriter delta = new DeltaBinaryPackingValuesWriter(blockSize, miniBlockNum, 100); DeltaByteArrayWriter delta=new DeltaByteArrayWriter((int)file.length()/10); // ValuesWriter rle = new RunLengthBitPackingHybridValuesWriter(32, 100); // delta.writeBytes(data); for(int i=0; i < fileLong; i++) { // delta.writeBytes(Binary.fromString(""+data[i])); // delta.writeBytes(data[i]); delta.writeBytes(Binary.fromString(""+data[i])); } BytesInput bi=delta.getBytes(); encodingWriteTime=System.currentTimeMillis() ; // delta.writeBytes(v); String[] str =new String[s.length-1]; for(int i=0 ;i<s.length-1;i++){ str[i]=s[i+1]; } long time=bi.writeToDisk(str); DeltaencodingTime=DeltaencodingTime+encodingWriteTime-encodingTime +time ; RencodingReadTime=System.currentTimeMillis() ; DeltaencodingWriteTime=DeltaencodingWriteTime+RencodingReadTime-encodingWriteTime-time ; // System.out.println("bytedelta: finish write to deltaencoding : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime())); } public static void readingDelta(String[] s) throws IOException { // System.out.println("bytedelta: begin read from deltaencoding : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime())); // ByteArrayOutputStream biss=new ByteArrayOutputStream(); File file= new File(s[1]); FileInputStream fis=new FileInputStream(file); DataInputStream dis=new DataInputStream(fis); // System.out.println("length"+file.length()); byte[] bytes=new byte[ (int) file.length()]; dis.readFully(bytes); fis.close(); dis.close(); RencodingTime=System.currentTimeMillis() ; DeltaRencodingReadTime=DeltaRencodingReadTime+RencodingTime-RencodingReadTime; DeltaByteArrayReader reader = new DeltaByteArrayReader() ; reader.initFromPage(fileLong, bytes,0); //Binary binary= reader.readBytes(); Binary[] binary = new Binary[fileLong]; byte[] result=new byte[fileLong]; // long tmp=System.currentTimeMillis(); for(int i=0; i < fileLong; i++) { // binary[i] = reader.readBytes(); // if(i==1){ // System.out.println("type "+reader.readBytes()); // } // binary[i].writeTo(out); // Binary tmp= reader.readBytes(); // System.out.println(tmp.); result[i] = Byte.parseByte(reader.readBytes().toStringUsingUTF8()); // result[i] = reader.readByte(); //result[i] = reader.readB // if(data[i]==result[i]){ // System.out.println(result[i]); // // } count++; // System.out.println(data[i]); // result[i]=binary[i].getBytes()[0]; } // System.out.println("tmie "+(System.currentTimeMillis()-tmp)); RencodingWriteTime=System.currentTimeMillis() ; DeltaRencodingTime=DeltaRencodingTime+RencodingWriteTime-RencodingTime; FileOutputStream revrseencodingFis=new FileOutputStream(new File(s[3])) ; DataOutputStream revrseencodingDos=new DataOutputStream(revrseencodingFis) ; // for(int i=0; i < fileLong; ++i) { // //int x = (int) in.next(); // count++; // revrseencodingDos.writeInt(result[i]); // } //count=result.length; revrseencodingDos.write(result); revrseencodingFis.close(); revrseencodingDos.close(); finalTime=System.currentTimeMillis() ; DeltaRencodingWriteTime=DeltaRencodingWriteTime+finalTime-RencodingWriteTime; // System.out.println("total count "+count); // System.out.println("bytedelta: finish read from deltaencoding and after bianli : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime())); } ///////////////////////////////////////////////////////////////////////// public static Binary[] RencodingByteData(ValuesReader reader, byte[] data, int length) throws IOException { Binary[] bins = new Binary[length]; reader.initFromPage(length, data, 0); for(int i=0; i < length; i++) { bins[i] = reader.readBytes(); } return bins; } public static void encodingByteData(ValuesWriter writer, String[] strings) throws IOException { for(int i=0; i < strings.length; i++) { writer.writeBytes(Binary.fromString(strings[i])); } } public static void main(String[] args) throws IOException { // TODO Auto-generated method stub File sourcefileDirectory =new File(args[0]); File[] sourcefiles= sourcefileDirectory.listFiles() ; int length=sourcefiles.length; String[] sourcestr=new String[length]; String[] encodinstr=new String[length]; //String[] dictionaryStr=new String[length]; String[] revrseEncodingstr=new String[length]; for (int i=0;i<sourcefiles.length;i++){ sourcestr[i]=sourcefiles[i].getAbsolutePath() ; encodinstr[i]=args[1]+"/"+i ; // dictionaryStr[i]=args[3]+"/"+i ; revrseEncodingstr[i]=args[3]+"/"+i ; // System.out.println(finalstr[i]); } String[] runstr=new String[4] ; runstr[2]=args[2]; startTime=System.currentTimeMillis() ; System.out.println("ORCRLE : begin ORCRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime())); for (int i=0;i<length;i++){ runstr[0]=sourcestr[i]; runstr[1]=encodinstr[i] ; //runstr[3]=dictionaryStr[i]; runstr[3]=revrseEncodingstr[i] ; prepare(runstr); readingDelta(runstr); runcount++ ; } System.out.println("ORCRLE : finish ORCRLE : " +new java.text.SimpleDateFormat("yyyy年MM月dd日hh时mm分ss秒").format(java.util.Calendar.getInstance().getTime())); System.out.println("totoal run :"+runcount+" times"); System.out.println("totoal valuecount :"+count); //DeltaencodingReadTime=0 ,DeltaencodingTime,DeltaencodingWriteTime=0, DeltaRencodingReadTime=0 ,DeltaRencodingTime=0,DeltaRencodingWriteTime=0; System.out.println("DeltaencodingReadTime :"+DeltaencodingReadTime+" mis"); System.out.println("DeltaencodingTime :"+DeltaencodingTime+" mis"); System.out.println("DeltaencodingWriteTime :"+DeltaencodingWriteTime+" mis"); System.out.println("DeltaRencodingReadTime :"+DeltaRencodingReadTime+" mis"); System.out.println("DeltaRencodingTime :"+DeltaRencodingTime+" mis"); System.out.println("DeltaRencodingWriteTime :"+DeltaRencodingWriteTime+" mis"); System.out.println("total time : "+(finalTime-startTime)+" mis"); long encodingtotalLong=0 , revrsetotalLong=0 ; for(int j=0 ;j<length;j++){ revrsetotalLong= revrsetotalLong+new File(revrseEncodingstr[j]).length(); encodingtotalLong=encodingtotalLong+new File(encodinstr[j]).length(); } System.out.println("encodingtotalLong : "+encodingtotalLong+" /1024/1024 "+encodingtotalLong/1024/1024); System.out.println("revrsetotalLong : "+revrsetotalLong+" /1024/1024 "+revrsetotalLong/1024/1024); } }