SegmentFile.java example

Explorer
----Data---Storage---master
- src
package org.apache.hadoop.hive.mastiff;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryUsage;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.mastiff.SerializeUtil.PageId;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.orc.OutStream;
//import FlexibleEncoding.ORC.RunLengthByteWriter;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.orc.RunLengthByteWriter;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.orc.RunLengthIntegerWriter;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.orc.TestInStream;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.parquet.Binary;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.parquet.BytesInput;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.parquet.DeltaBinaryPackingValuesWriter;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.parquet.DeltaByteArrayWriter;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.parquet.OnlyDictionaryValuesWriter.PlainBinaryDictionaryValuesWriter;
import org.apache.hadoop.hive.mastiffFlexibleEncoding.parquet.OnlyDictionaryValuesWriter.PlainIntegerDictionaryValuesWriter;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;

import FlexibleEncoding.ORC.DynamicIntArray;
import cn.ac.ncic.mastiff.PosChunk;
import cn.ac.ncic.mastiff.ValPair;
import cn.ac.ncic.mastiff.etl.ETLUtils;
import cn.ac.ncic.mastiff.hive.serde.lazy.ClusterAccessor;
import cn.ac.ncic.mastiff.hive.serde.lazy.ClusterAccessor.DataType;
import cn.ac.ncic.mastiff.hive.serde.lazy.Row;
import cn.ac.ncic.mastiff.io.PosRLEChunk;
import cn.ac.ncic.mastiff.io.coding.Compression.Algorithm;
import cn.ac.ncic.mastiff.io.coding.EnDecode;
import cn.ac.ncic.mastiff.io.coding.Encoder;
import cn.ac.ncic.mastiff.io.coding.Encoder.CodingType;
import cn.ac.ncic.mastiff.io.coding.ORCStringEcnodingUtil;
import cn.ac.ncic.mastiff.io.segmentfile.BlockCache;
import cn.ac.ncic.mastiff.io.segmentfile.LruBlockCache;
import cn.ac.ncic.mastiff.io.segmentfile.PageCache;
import cn.ac.ncic.mastiff.io.segmentfile.PageMeta;
import cn.ac.ncic.mastiff.io.segmentfile.PageMeta.ScanMode;
import cn.ac.ncic.mastiff.io.segmentfile.PageMetaList;
import cn.ac.ncic.mastiff.io.segmentfile.PageMetaSection;
import cn.ac.ncic.mastiff.io.segmentfile.SimplePageCache;
import cn.ac.ncic.mastiff.mapred.MastiffMapReduce;
import cn.ac.ncic.mastiff.operators.ExprDesc;
import cn.ac.ncic.mastiff.operators.Predicate;
import cn.ac.ncic.mastiff.utils.Bytes;
import cn.ac.ncic.mastiff.utils.Utils;

/**
 * File format for mastiff.
 *
 * Copied from {@link cn.ac.ncic.mastiff.io.segmentfile.SegmentFile}
 */
public class SegmentFile {
  static final Log LOG = LogFactory.getLog(SegmentFile.class);

  public static final String SEGFILE_CACHE_SIZE_KEY = "mastiff.pagecache.size";

  private static BlockCache globalPageCache = null;

  /**
   * SegmentFile Writer.
   */
  public static class Writer implements Closeable {

    public class TabConfig {
      private String[]  str=null ;
      private DeltaByteArrayWriter[] deltaByteArrayStringWriter =null;
      private DynamicIntArray[]  dynamicIntArray =null ;
      private  PlainBinaryDictionaryValuesWriter[]   dictionaryBitPackingRLEZigZarByte=null;
      private  PlainIntegerDictionaryValuesWriter[]   dictionaryBitPackingRLEZigZarInt=null;
      private   ORCStringEcnodingUtil[] oRCStringEcnodingUtil=null ;
      private  TestInStream.OutputCollector[]  collect=null;
      private  DeltaBinaryPackingValuesWriter[]  deltaBianryBitPackingInt=null ;
      private DeltaByteArrayWriter[]  deltaByteArrayWriter=null ;
      private RunLengthIntegerWriter[]   runLengthInteger=null;
      private  RunLengthByteWriter[]   runLengthByte=null;
      private boolean[]  pageInit ,firstNumber;
      private  int[] pagesizes ;
      private Algorithm[] algorithms ;
      public boolean isInit=false ,isFirst=true ;
      private  DataType[][] originalTableSchema;
      private  List<List<DataType>> originalSchema;
      private int numFields;
      private int[] cluster_pages;
      private List<List<DataType>> clusterSchema;
      private int numClusters;
      private ClusterAccessor[] accessors;
      private  ClusterAccessor[] backups;
      private Row[] rows, prevRows, maxs, mins, segMaxs, segMins;
      private Encoder[] compressors;
      private CodingType[] codings;
      private ValPair[] vps;
      private  ValPair[] backupVps;
      private  PageMeta[] pms;
      private  int[] pageIds;
      private  long  pageIdCount=0 ;
      private  long sgementSize = 0;
      private int[] startPoss, numReps;
      private PageId segId;
      private final int   count=0 ;
      private  ArrayList<List<BytesWritable>> clusterValue =null;
      private final  BytesWritable outValue = new BytesWritable();
      private final DataOutputBuffer out = new DataOutputBuffer();
      private final   long SegmentSize=536870912-SerializeUtil.desc.clusterTypes.size()*131072;
      private final  int[] tmpLength = new int[1];
      private int[][] columnsMapping;
      private final  DataInputBuffer in = new DataInputBuffer();
      public void configure(JobConf job, Properties tbl) throws IOException {
        int numClusters = SerializeUtil.desc.clusterTypes.size();
        if (numClusters != SerializeUtil.desc.clusterAlgos.length) {
          throw new RuntimeException("Please check the cluster algorithms, " +
              SerializeUtil.desc.clusterAlgos.length + " algorithms provided while there are " +
              numClusters + " clusters.");
        }
        if (numClusters != SerializeUtil.desc.clusterCodingTypes.length) {
          throw new RuntimeException("Please check the cluster coding types, " +
              SerializeUtil.desc.clusterAlgos.length + " coding types provided while there are " +
              numClusters + " clusters.");
        }
        numFields = SerializeUtil.desc.tableSchema[0].length;
        originalTableSchema = SerializeUtil.desc.tableSchema;
        DataType[][] storageSchema = new DataType[originalTableSchema.length][];
        for (int i = 0; i < originalTableSchema.length; i++) {
          storageSchema[i] = new DataType[originalTableSchema[i].length];
          for (int j = 0; j < storageSchema[i].length; j++) {
            if (originalTableSchema[i][j] == DataType.DATE) {
              storageSchema[i][j] = DataType.LONG;
            } else {
              storageSchema[i][j] = originalTableSchema[i][j];
            }
          }
        }
        dynamicIntArray=new DynamicIntArray[numClusters] ;
        oRCStringEcnodingUtil=new  ORCStringEcnodingUtil[numClusters] ;
        deltaByteArrayStringWriter=new   DeltaByteArrayWriter[numClusters] ;
        runLengthInteger=new  RunLengthIntegerWriter[numClusters] ;
        runLengthByte=new  RunLengthByteWriter[numClusters] ;
        collect=new  TestInStream.OutputCollector[numClusters] ;
        deltaBianryBitPackingInt=new DeltaBinaryPackingValuesWriter[numClusters] ;
        deltaByteArrayWriter=new DeltaByteArrayWriter[numClusters] ;
        dictionaryBitPackingRLEZigZarInt=new  PlainIntegerDictionaryValuesWriter[numClusters] ;
        dictionaryBitPackingRLEZigZarByte=new  PlainBinaryDictionaryValuesWriter[numClusters] ;
        pageInit=new boolean[numClusters];
        firstNumber=new boolean[numClusters];
        str=new String[numClusters];
        for(int i=0 ;i<numClusters;i++){
          pageInit[i]=false;
          firstNumber[i]=false;
          str[i]=new String() ;

        }
        // replace DataType.DATE to DataType.LONG
        for (int i = 0; i < SerializeUtil.desc.clusterTypes.size(); i++) {
          for (int j = 0; j < SerializeUtil.desc.clusterTypes.get(i).size(); j++) {
            if (SerializeUtil.desc.clusterTypes.get(i).get(j) == DataType.DATE) {
              SerializeUtil.desc.clusterTypes.get(i).set(j, DataType.LONG);
            }
          }
        }
        // int pagesize = MastiffMapReduce.getTablePageSize(job);
        int pagesize = 131072*4*10;
        cluster_pages = new int[numClusters];

        for (int i = 0; i < numClusters; i++) {
          cluster_pages[i] = pagesize ;
          //          cluster_pages[i] = SerializeUtil.desc.clusterAlgos[i] == null ?
          //              pagesize : SerializeUtil.desc.clusterAlgos[i].getScaleRatio() * pagesize;
          //  }
        }
        //        for (int b = 0; b < numClusters; b++) {
        //          //  switch (b){
        //          //     case 0:
        //          //  clusterValue.add(new ArrayList<BytesWritable>(90));
        //          clusterValue.add(new ArrayList<BytesWritable>(512));
        //          //  //       break ;
        //          //     //  case 1:
        //          //       //  clusterValue.add(new ArrayList<BytesWritable>(175));
        //          //           clusterValue.add(new ArrayList<BytesWritable>(40));
        //          //   //      break ;
        //          //  //     case 2:
        //          //    //     clusterValue.add(new ArrayList<BytesWritable>(90));
        //          //         clusterValue.add(new ArrayList<BytesWritable>(40));
        //          ////         break ;
        //          //       case 3:
        //          //       //  clusterValue.add(new ArrayList<BytesWritable>(680));
        //          //         clusterValue.add(new ArrayList<BytesWritable>(40));
        //          //         break ;
        //          //       case 4:
        //          //        clusterValue.add(new ArrayList<BytesWritable>(45));
        //          //         break ;
        //          //       case 5:
        //          //  //       clusterValue.add(new ArrayList<BytesWritable>(170));
        //          //         clusterValue.add(new ArrayList<BytesWritable>(40));
        //          //         break ;
        //          //       case 6:
        //          //       //  clusterValue.add(new ArrayList<BytesWritable>(170));
        //          //         clusterValue.add(new ArrayList<BytesWritable>(40));
        //          //         break ;
        //          //       case 7:
        //          //      //   clusterValue.add(new ArrayList<BytesWritable>(170));
        //          //         clusterValue.add(new ArrayList<BytesWritable>(40));
        //          //         break ;
        //          //       case 8:
        //          //       //  clusterValue.add(new ArrayList<BytesWritable>(1120));
        //          //         clusterValue.add(new ArrayList<BytesWritable>(40));
        //          //         break ;
        //          //       }
        //
        //        }

        preConfigure(job, storageSchema, ETLUtils.getSchema(SerializeUtil.desc.clusterTypes),
            cluster_pages, SerializeUtil.desc.clusterAlgos, SerializeUtil.desc.clusterCodingTypes,
            SerializeUtil.desc.columnsMapping);
      }

      public void preConfigure(JobConf job, DataType[][] originalTypes,
          DataType[][] clusterTypes, int[] pagesizes, Algorithm[] algorithms,
          CodingType[] codings, int[][] columnsMapping) {
        this.columnsMapping = columnsMapping;
        this.codings = codings;
        originalSchema = ETLUtils.getSchema(originalTypes);// //���������������������������������ize������
        numFields = originalSchema.get(0).size();// //������������������
        clusterSchema = ETLUtils.getSchema(clusterTypes);
        numClusters = clusterSchema.size();
        accessors = new ClusterAccessor[numClusters];
        backups = new ClusterAccessor[numClusters];
        rows = new Row[numClusters];// ///
        prevRows = new Row[numClusters];
        maxs = new Row[numClusters];
        mins = new Row[numClusters];
        segMaxs = new Row[numClusters];
        segMins = new Row[numClusters];
        compressors = new Encoder[numClusters];
        vps = new ValPair[numClusters];
        backupVps = new ValPair[numClusters];
        pms = new PageMeta[numClusters];
        pageIds = new int[numClusters];
        startPoss = new int[numClusters];
        numReps = new int[numClusters];
        this.pagesizes=pagesizes;
        this.algorithms=algorithms;
        for (int i = 0; i < numClusters; i++) {
          accessors[i] = new ClusterAccessor();
          accessors[i].init(clusterSchema.get(i));
          backups[i] = new ClusterAccessor(accessors[i]);
          rows[i] = new Row(clusterSchema.get(i));
          prevRows[i] = null;
          if (Algorithm.NONE ==SerializeUtil.desc.clusterAlgos[i]) {
            algorithms[i] = null;
          }
          if (accessors[i].getFixedLen() > 0) {
compressors[i]=EnDecode.getEncoder(pagesizes[i],accessors[i].getFixedLen(), 0, algorithms[i],codings[i]);
          } else {
            compressors[i]=EnDecode.getEncoder(pagesizes[i],0, 0, algorithms[i],codings[i]);
            //   compressors[i] = new VarLenEncoder(pagesizes[i], 0, algorithms[i]);
          }
          compressors[i].reset();

          vps[i] = new ValPair();
          backupVps[i] = new ValPair();

          pms[i] = new PageMeta();
          pms[i].startPos = 0;
          pms[i].numPairs = 0;

          pageIds[i] = 0;

          startPoss[i] = numReps[i] = 0;
        }
        segId = new PageId();
        segId.setSegmentId(Math.random() * 100 + "");
        // String taskId = job.get("mapred.tip.id");
        // String taskId = ETLUtils.getTaskId(job);
        // segId.setSegmentId(taskId);
      }
      public void updateMaxMins(Row oldMax, Row oldMin, Row newMax, Row newMin) {
        int size = oldMax.size();
        for (int i = 0; i < size; i++) {
          Comparable maxCp = (Comparable) newMax.get(i).getObject();
          if (maxCp.compareTo(oldMax.get(i).getObject()) > 0) {
            oldMax.get(i).setObject(newMax.get(i).getPrimitiveObject());
          }
          Comparable minCp = (Comparable) newMin.get(i).getObject();
          if (minCp.compareTo(oldMin.get(i).getObject()) < 0) {
            oldMin.get(i).setObject(newMin.get(i).getPrimitiveObject());
          }
        }
      }
      void outputPage(int i) throws IOException {
        // 1) prepare the page
        byte[] page = compressors[i].getPage();

        int pageLen = compressors[i].getPageLen();

        if (page == null || pageLen <= 0) {
          return;
        }
        out.reset();
        out.writeInt(pageLen);
        out.write(page, 0, pageLen);

        // 2) write page meta
        backupVps[i].data = backups[i].serialize(maxs[i], tmpLength);

        backupVps[i].offset = 0;
        backupVps[i].length = tmpLength[0];
        backupVps[i].write(out);

        backupVps[i].data = backups[i].serialize(mins[i], tmpLength);
        backupVps[i].length = tmpLength[0];

        // write min row
        backupVps[i].write(out);
        out.writeInt(pms[i].startPos);
        out.writeInt(pms[i].numPairs);

        // set max & min row of current segment
        if (segMaxs[i] == null || segMins[i] == null) {
          segMaxs[i] = maxs[i];
          segMins[i] = mins[i];

        } else {

          //    Row.updateMaxMins(segMaxs[i], segMins[i], maxs[i], mins[i]);
          updateMaxMins(segMaxs[i], segMins[i], maxs[i], mins[i]);


        }

        // 3) output the page
        segId.setPageId(pageIds[i]);
        segId.setClusterId(i);


        if(!isInit){
          isInit=true ;
          clusterValue=new ArrayList<List<BytesWritable>>(SerializeUtil.desc.clusterTypes.size());
          for (int b = 0; b < numClusters; b++) {
            clusterValue.add(new ArrayList<BytesWritable>(512));
          }

        }
        clusterValue.get(i).add(new BytesWritable());
        clusterValue.get(i).get(clusterValue.get(i).size() - 1)
        .set(out.getData(), 0, out.getLength());
        sgementSize = sgementSize + cluster_pages[i];

        // 4) reset
        pageIds[i]++;
        pms[i].startPos += pms[i].numPairs;
        pms[i].numPairs = 0;
        maxs[i] = mins[i] = null;
        compressors[i].reset();


      }

      void SegmentMeta() throws IOException {

        for (int i = 0; i < numClusters; i++) {

          if(codings[i]==CodingType.MV){
            outputPage(i);
          }
          else if(codings[i]==CodingType.RunLengthEncodingInt){
            lastPageRunLengthEncodingInt( i);
          }
          else if(codings[i]==CodingType.RunLengthEncodingByte){
            lastPageRunLengthEncodingByte( i);
          }
          else if(codings[i]==CodingType.RunLengthEncodingLong){
            lastPageRunLengthEncodingLong(i);
          }
          else if(codings[i]==CodingType.DeltaBinaryArrayZigZarByte){
            //  lastPageRunLengthEncodingByte( i);
            lastPageDeltaBinaryArraysBitPackingByte(i);
          }
          else if(codings[i]==CodingType.DeltaBinaryBitPackingZigZarInt){
            //   lastPageRunLengthEncodingByte( i);
            lastPageDeltaBinaryBitPackingInt(i);
          }
          else if(codings[i]==CodingType.DeltaBinaryBitPackingZigZarLong){
            //   lastPageRunLengthEncodingByte( i);
            lastPageDeltaBinaryBitPackingLong(i);
          }
          else if(codings[i]==CodingType.DictionaryBitPackingRLEByte){
            //   lastPageRunLengthEncodingByte( i);
            lastPageDictionaryBitPackingRLEZigZarByte(i);
          }
          else if(codings[i]==CodingType.DictionaryBitPackingRLEInt){
            //   lastPageRunLengthEncodingByte( i);
            lastPageDictionaryBitPackingRLEZigZarInt(i);
          }
          else if(codings[i]==CodingType.DictionaryBitPackingRLELong){
            //   lastPageRunLengthEncodingByte( i);
            lastPageDictionaryBitPackingRLEZigZarLong(i);
          } else if(codings[i]==CodingType.DeltaBinaryPackingString){
            //   lastPageRunLengthEncodingByte( i);
            lastPageDeltaBinaryPackingString(i);
          }
          else if(codings[i]==CodingType.RedBlackTreeString){
            //   lastPageRunLengthEncodingByte( i);
            lastPageRedBlackTreeString(i);
          }


          else {
            throw new UnsupportedEncodingException("Wrong encoding Type");
          }
        }  // write out the segment infomations
        for (int i = 0; i < numClusters; i++) {
          outputSegmentMeta(i);
        }

      }
      private void lastPageRedBlackTreeString(int i) throws IOException {
        oRCStringEcnodingUtil[i].dictionarySize= oRCStringEcnodingUtil[i].dictionary.size();
        oRCStringEcnodingUtil[i].dumpOrder=new int[ oRCStringEcnodingUtil[i].dictionary.size()] ;
        oRCStringEcnodingUtil[i].init();
        oRCStringEcnodingUtil[i].iterator();
        oRCStringEcnodingUtil[i].rowoutPut() ;
        oRCStringEcnodingUtil[i].flush();
        DataOutputBuffer  dob=new DataOutputBuffer() ;
        dob.writeInt( oRCStringEcnodingUtil[i].dictionarySize);
        int  length=oRCStringEcnodingUtil[i].collect1.buffer.size() ;
        ByteBuffer inBuf = ByteBuffer.allocate( length);
        oRCStringEcnodingUtil[i].collect1.buffer.setByteBuffer(inBuf, 0, length);
        inBuf.flip();
        dob.writeInt(length);
        dob.write(inBuf.array(), 0, length);
        inBuf.clear() ;
        length=oRCStringEcnodingUtil[i].collect2.buffer.size() ;
        oRCStringEcnodingUtil[i].collect2.buffer.setByteBuffer(inBuf, 0, length);
        inBuf.flip();
        dob.writeInt(length);
        dob.write(inBuf.array(), 0, length);
        inBuf.clear() ;
        length=oRCStringEcnodingUtil[i].collect3.buffer.size() ;
        oRCStringEcnodingUtil[i].collect3.buffer.setByteBuffer(inBuf, 0, length);
        inBuf.flip();
        dob.writeInt(length);
        dob.write(inBuf.array(), 0, length);
        inBuf.clear() ;
        vps[i].data=dob.getData() ;
        vps[i].offset = 0;
        vps[i].length= vps[i].data.length ;
        compressors[i].appendPage(vps[i]);
        outputPage(i);
        LOG.info("dataoffset  "+compressors[i].dataOffset);
        LOG.info("numPairs  "+compressors[i].numPairs);
        LOG.info("startPos  "+compressors[i].startPos);
        pageInit[i]=false;

      }

      private void lastPageDeltaBinaryPackingString(int i) throws IOException {
        vps[i].data=deltaByteArrayStringWriter[i].getBytes().toByteArray();
        vps[i].offset = 0;
        vps[i].length= vps[i].data.length ;
        compressors[i].appendPage(vps[i]);
        outputPage(i);
        pageInit[i]=false;
        deltaByteArrayStringWriter[i].reset();
      }

      void   lastPageRunLengthEncodingInt(int  i) throws IOException{
        runLengthInteger[i].flush();
        ByteBuffer inBuf = ByteBuffer.allocate(collect[i].buffer.size());
        byte[] pageBytes=collect[i].buffer.getByteBuffer(inBuf, 0, collect[i].buffer.size()) ;
        collect[i].buffer.clear();
        inBuf.clear();
        vps[i].data=pageBytes ;
        vps[i].offset = 0;
        vps[i].length= pageBytes.length;
        compressors[i].appendPage(vps[i]);
        outputPage(i);
        pageInit[i]=false;
      }
      void   lastPageRunLengthEncodingByte(int  i) throws IOException{
        runLengthByte[i].flush();
        ByteBuffer inBuf = ByteBuffer.allocate(collect[i].buffer.size());
        byte[] pageBytes=collect[i].buffer.getByteBuffer(inBuf, 0, collect[i].buffer.size()) ;
        collect[i].buffer.clear();
        inBuf.clear();
        vps[i].data=pageBytes ;
        vps[i].offset = 0;
        vps[i].length= pageBytes.length;
        compressors[i].appendPage(vps[i]);
        outputPage(i);
        pageInit[i]=false;
      }
      void   lastPageRunLengthEncodingLong(int  i) throws IOException{
        throw  new  UnsupportedEncodingException("WangMeng  has  not  implement  this  method   on 03.27.2014  yet");
      }
      void   lastPageDeltaBinaryBitPackingInt(int  i) throws IOException{
        BytesInput bi=deltaBianryBitPackingInt[i].getBytes() ;
        byte[] pageBytes=bi.getBufferSize() ;
        vps[i].data=pageBytes ;
        vps[i].offset = 0;
        vps[i].length= pageBytes.length;
        try {
          compressors[i].appendPage(vps[i]);
        } catch (IOException e) {
          e.printStackTrace();
        }

        try {
          outputPage(i);
        } catch (IOException e) {
          e.printStackTrace();
        }
        pageInit[i]=false;
      }
      void   lastPageDeltaBinaryArraysBitPackingByte(int  i) throws IOException{
        BytesInput  bi=deltaByteArrayWriter[i].getBytes();
        byte[] pageBytes=bi.getBufferSize() ;
        vps[i].data=pageBytes ;
        vps[i].offset = 0;
        vps[i].length= pageBytes.length;
        try {
          compressors[i].appendPage(vps[i]);
        } catch (IOException e) {
          e.printStackTrace();
        }

        try {
          outputPage(i);
        } catch (IOException e) {
          e.printStackTrace();
        }
        pageInit[i]=false;
      }
      void   lastPageDeltaBinaryBitPackingLong(int  i) throws IOException{
        throw  new  UnsupportedEncodingException("WangMeng  has  not  implement  this  method   on 03.27.2014  yet");
      }
      void   lastPageDictionaryBitPackingRLEZigZarByte(int  i) throws IOException{
        BytesInput sbi= dictionaryBitPackingRLEZigZarByte[i].getBytes();
        byte[]   dictionaryBuffer=dictionaryBitPackingRLEZigZarByte[i].getDictionaryBuffer();
        byte[]    dictionaryID=sbi.getBufferSize() ;
        DataOutputBuffer  dob=new DataOutputBuffer() ;
        dob.writeInt(dictionaryBuffer.length);
        dob.write(dictionaryBuffer, 0, dictionaryBuffer.length);
        dob.write(dictionaryID, 0, dictionaryID.length);
        byte[] pageBytes=dob.getData() ;
        dob.close();
        vps[i].data=pageBytes ;
        vps[i].offset = 0;
        vps[i].length= pageBytes.length;
        try {
          compressors[i].appendPage(vps[i]);
        } catch (IOException e) {
          e.printStackTrace();
        }
        try {
          outputPage(i);
        } catch (IOException e) {
          e.printStackTrace();
        }
        pageInit[i]=false;
      }
      void   lastPageDictionaryBitPackingRLEZigZarInt(int  i) throws IOException{
        BytesInput sbi= dictionaryBitPackingRLEZigZarInt[i].getBytes();
        byte[]   dictionaryBuffer=dictionaryBitPackingRLEZigZarInt[i].getDictionaryBuffer();
        byte[]    dictionaryID=sbi.getBufferSize() ;
        DataOutputBuffer  dob=new DataOutputBuffer() ;
        dob.writeInt(dictionaryBuffer.length);
        dob.write(dictionaryBuffer, 0, dictionaryBuffer.length);
        dob.write(dictionaryID, 0, dictionaryID.length);
        byte[] pageBytes=dob.getData() ;
        dob.close();
        vps[i].data=pageBytes ;
        vps[i].offset = 0;
        vps[i].length= pageBytes.length;
        try {
          compressors[i].appendPage(vps[i]);
        } catch (IOException e) {
          e.printStackTrace();
        }

        try {
          outputPage(i);
        } catch (IOException e) {
          e.printStackTrace();
        }
        pageInit[i]=false;
      }
      void   lastPageDictionaryBitPackingRLEZigZarLong(int  i) throws IOException{
        throw  new  UnsupportedEncodingException("WangMeng  has  not  implement  this  method   on 03.27.2014  yet");
      }
      private void outputSegmentMeta(int i)
          throws IOException {
        out.reset();
        // 1) write segment meta
        backupVps[i].data = backups[i].serialize(segMaxs[i], tmpLength);
        backupVps[i].offset = 0;
        backupVps[i].length = tmpLength[0];
        backupVps[i].write(out);
        backupVps[i].data = backups[i].serialize(segMins[i], tmpLength);
        backupVps[i].length = tmpLength[0];
        // write min row
        backupVps[i].write(out);

        out.writeInt(0); // start pos of a segment is zero
        out.writeInt(pms[i].startPos);

        // 2) output the page
        segId.setPageId(-1); // meta data page is -1
        segId.setClusterId(i);
        outValue.set(out.getData(), 0, out.getLength());
        clusterValue.get(i).add(new BytesWritable());
        clusterValue.get(i).get(clusterValue.get(i).size() - 1)
        .set(out.getData(), 0, out.getLength());
        sgementSize = sgementSize + cluster_pages[i];
        //        segMaxs[i]=null ;
        //        segMins[i]=null ;
        pms[i].startPos =0;
        pms[i].numPairs = 0;
        /////////////////////////////////
        if (accessors[i].getFixedLen() > 0) {
          compressors[i]=EnDecode.getEncoder(pagesizes[i],
              accessors[i].getFixedLen(), 0, algorithms[i],codings[i]);
          compressors[i].reset();
        } else {
          //     compressors[i] = new VarLenEncoder(pagesizes[i], 0, algorithms[i]);
          compressors[i]=EnDecode.getEncoder(pagesizes[i],0, 0, algorithms[i],codings[i]);
          compressors[i].reset();
        }
        maxs[i] = mins[i] = null;
        segMaxs[i]=null ;
        segMins[i]=null ;
        pms[i].startPos =0;
        pms[i].numPairs = 0;
        backups[i] = new ClusterAccessor(accessors[i]);
        vps[i] = new ValPair();
        backupVps[i] = new ValPair();
        pms[i] = new PageMeta();
        pageIds[i] = 0;
        startPoss[i] = numReps[i] = 0;
      }

      void passValue(PageId key, ArrayList<List<BytesWritable>> segmentValue2)
          throws IOException {
        int m = 0;
        beginSegment();
        beginCluster();
        for (int i = 0; i < segmentValue2.size(); i++) {
          for (int j = -1; j < segmentValue2.get(i).size() - 1; j++) {
            PageMeta pm = new PageMeta();

            BytesWritable bw;
            if (j != -1) {
              bw = (BytesWritable) segmentValue2.get(i).get(j);
              in.reset(bw.getBytes(), 0, bw.getLength());
            }
            else {
              bw = (BytesWritable) segmentValue2.get(i).get(segmentValue2.get(i).size() - 1);
              in.reset(bw.getBytes(), 0, bw.getLength());
            }
            if (m == 0) {
              pm.readFields(in);
              addSegmentMeta(i, pm);
              m++;
            }
            else {
              int length = in.readInt();
              in.skip(length);
              pm.readFields(in);
              Segappend(bw.getBytes(), Bytes.SIZEOF_INT, length, pm);
            }
          }
          finishCluster();
          m = 0;
          if (i < segmentValue2.size() - 1) {
            beginCluster();
          }
        }
        finishSegment();
      }
      public  void  runLengthEncodingInt(int i){
        if( pageInit[i]==false){
          collect[i] = new TestInStream.OutputCollector();
          compressors[i].reset();
          try {
            runLengthInteger[i] = new RunLengthIntegerWriter(new OutStream("test", 1000, null, collect[i]), true);
          } catch (IOException e) {
            e.printStackTrace();
          }
          pageInit[i]=true;
        }

        if(compressors[i].dataOffset + compressors[i].valueLen < compressors[i].pageCapacity){
          try {
            runLengthInteger[i].write(((Integer)(rows[i].getValue(0))).intValue());
          } catch (IOException e) {
            e.printStackTrace();
          }
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;



          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }

          pms[i].numPairs++;
        }
        else{
          try {
            runLengthInteger[i].write(((Integer)(rows[i].getValue(0))).intValue());
          } catch (IOException e) {
            e.printStackTrace();
          }
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
          try {
            runLengthInteger[i].flush();
          } catch (IOException e) {
            e.printStackTrace();
          }
          ByteBuffer inBuf = ByteBuffer.allocate(collect[i].buffer.size());
          byte[] pageBytes=collect[i].buffer.getByteBuffer(inBuf, 0, collect[i].buffer.size()) ;
          collect[i].buffer.clear();
          inBuf.clear();
          vps[i].data=pageBytes ;
          vps[i].offset = 0;
          vps[i].length= pageBytes.length;
          try {
            compressors[i].appendPage(vps[i]);
          } catch (IOException e) {
            e.printStackTrace();
          }
          try {
            outputPage(i);
          } catch (IOException e) {
            e.printStackTrace();
          }
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          LOG.info("dataoffset  "+compressors[i].dataOffset);
          LOG.info("numPairs  "+compressors[i].numPairs);
          LOG.info("startPos  "+compressors[i].startPos);
          pageInit[i]=false;
        }


      }

      public  void runLengthEncodingByte(int i){
        if( pageInit[i]==false){
          collect[i] = new TestInStream.OutputCollector();
          compressors[i].reset();
          try {
            runLengthByte[i] = new RunLengthByteWriter(new OutStream("test", 1000, null, collect[i]));
          } catch (IOException e) {
            e.printStackTrace();
          }
          pageInit[i]=true;
        }

        if(compressors[i].dataOffset + compressors[i].valueLen < compressors[i].pageCapacity){
          try {
            runLengthByte[i].write(((Byte)(rows[i].getValue(0))).byteValue());
          } catch (IOException e) {
            e.printStackTrace();
          }
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;

          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }

          pms[i].numPairs++;
        }
        else{
          try {
            runLengthByte[i].write(((Byte)(rows[i].getValue(0))).byteValue());
          } catch (IOException e) {
            e.printStackTrace();
          }
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;

          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }

          pms[i].numPairs++;

          try {
            runLengthByte[i].flush();
          } catch (IOException e) {
            e.printStackTrace();
          }
          ByteBuffer inBuf = ByteBuffer.allocate(collect[i].buffer.size());
          //         collect.buffer.write(dos, 0, collect.buffer.size());
          byte[] pageBytes=collect[i].buffer.getByteBuffer(inBuf, 0, collect[i].buffer.size()) ;

          collect[i].buffer.clear();
          inBuf.clear();

          vps[i].data=pageBytes ;
          vps[i].offset = 0;
          vps[i].length= pageBytes.length;
          try {
            compressors[i].appendPage(vps[i]);
          } catch (IOException e) {
            e.printStackTrace();
          }

          try {
            outputPage(i);
          } catch (IOException e) {
            e.printStackTrace();
          }

          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          //    pms[i].numPairs++;
          pageInit[i]=false;
        }


      }
      public  void runLengthEncodingLong(int i) throws UnsupportedEncodingException{
        throw  new  UnsupportedEncodingException("WangMeng  has  not  implement  this  method   on 03.27.2014  yet");
      }
      public  void deltaBinaryArrayZigZarByte(int i) throws IOException{
        if( pageInit[i]==false){
          //          int blockSize = 128;
          //          int miniBlockNum = 4;
          deltaByteArrayWriter[i]=new DeltaByteArrayWriter( pagesizes[i]/4);
          compressors[i].reset();
          pageInit[i]=true;
        }
        if(compressors[i].dataOffset + compressors[i].valueLen < compressors[i].pageCapacity){

          deltaByteArrayWriter[i].writeBytes(Binary.fromString(""+((Byte)(rows[i].getValue(0))).byteValue()));
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }

          pms[i].numPairs++;
        }
        else{
          deltaByteArrayWriter[i].writeBytes(Binary.fromString(""+((Byte)(rows[i].getValue(0))).byteValue()));
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
          BytesInput  bi=deltaByteArrayWriter[i].getBytes();
          byte[] pageBytes=bi.getBufferSize() ;
          vps[i].data=pageBytes ;
          vps[i].offset = 0;
          vps[i].length= pageBytes.length;
          try {
            compressors[i].appendPage(vps[i]);
          } catch (IOException e) {
            e.printStackTrace();
          }

          try {
            outputPage(i);
          } catch (IOException e) {
            e.printStackTrace();
          }

          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          LOG.info("dataoffset  "+compressors[i].dataOffset);
          LOG.info("numPairs  "+compressors[i].numPairs);
          LOG.info("startPos  "+compressors[i].startPos);
          //  pms[i].numPairs++;
          pageInit[i]=false;
        }
      }
      public  void deltaBinaryBitPackingZigZarInt(int i) throws IOException{
        if( pageInit[i]==false){
          int blockSize = 128;
          int miniBlockNum = 4;
          deltaBianryBitPackingInt[i] = new DeltaBinaryPackingValuesWriter(blockSize, miniBlockNum, pagesizes[i]/4);
          compressors[i].reset();
          pageInit[i]=true;
        }
        if(compressors[i].dataOffset + compressors[i].valueLen < compressors[i].pageCapacity){
          deltaBianryBitPackingInt[i].writeInteger(((Integer)(rows[i].getValue(0))).intValue());
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }

          pms[i].numPairs++;
        }
        else{
          deltaBianryBitPackingInt[i].writeInteger(((Integer)(rows[i].getValue(0))).intValue());
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
          BytesInput bi=deltaBianryBitPackingInt[i].getBytes() ;
          byte[] pageBytes=bi.getBufferSize() ;
          vps[i].data=pageBytes ;
          vps[i].offset = 0;
          vps[i].length= pageBytes.length;
          try {
            compressors[i].appendPage(vps[i]);
          } catch (IOException e) {
            e.printStackTrace();
          }

          try {
            outputPage(i);
          } catch (IOException e) {
            e.printStackTrace();
          }

          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          LOG.info("dataoffset  "+compressors[i].dataOffset);
          LOG.info("numPairs  "+compressors[i].numPairs);
          LOG.info("startPos  "+compressors[i].startPos);
          //   pms[i].numPairs++;
          pageInit[i]=false;
        }
      }
      public  void deltaBinaryBitPackingZigZarLong(int i) throws UnsupportedEncodingException{
        throw new  UnsupportedEncodingException("now I  have  not  implement   this  method   for  twitter.Parquet  do  not  implement  this  just now")  ;
      }
      public  void dictionaryBitPackingRLEByte(int i) throws IOException{
        if( pageInit[i]==false){
          //   deltaBianryBitPackingInt[i] = new DeltaBinaryPackingValuesWriter(blockSize, miniBlockNum, pagesizes[i]/4);
          dictionaryBitPackingRLEZigZarByte[i]= new PlainBinaryDictionaryValuesWriter(Integer.MAX_VALUE, pagesizes[i]);
          compressors[i].reset();
          pageInit[i]=true;
        }
        if(compressors[i].dataOffset + compressors[i].valueLen < compressors[i].pageCapacity){
          dictionaryBitPackingRLEZigZarByte[i].writeBytes(Binary.fromString("" +((Byte)(rows[i].getValue(0))).byteValue()));
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
        }
        else{
          dictionaryBitPackingRLEZigZarByte[i].writeBytes(Binary.fromString("" +((Byte)(rows[i].getValue(0))).byteValue()));
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
          BytesInput sbi= dictionaryBitPackingRLEZigZarByte[i].getBytes();
          byte[]   dictionaryBuffer=dictionaryBitPackingRLEZigZarByte[i].getDictionaryBuffer();
          byte[]    dictionaryID=sbi.getBufferSize() ;
          DataOutputBuffer  dob=new DataOutputBuffer() ;
          dob.writeInt(dictionaryBuffer.length);
          dob.write(dictionaryBuffer, 0, dictionaryBuffer.length);
          dob.write(dictionaryID, 0, dictionaryID.length);
          byte[] pageBytes=dob.getData() ;
          dob.close();
          vps[i].data=pageBytes ;
          vps[i].offset = 0;
          vps[i].length= pageBytes.length;
          try {
            compressors[i].appendPage(vps[i]);
          } catch (IOException e) {
            e.printStackTrace();
          }

          try {
            outputPage(i);
          } catch (IOException e) {
            e.printStackTrace();
          }

          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          LOG.info("dataoffset  "+compressors[i].dataOffset);
          LOG.info("numPairs  "+compressors[i].numPairs);
          LOG.info("startPos  "+compressors[i].startPos);
          //   pms[i].numPairs++;
          pageInit[i]=false;
        }

      }
      public  void dictionaryBitPackingRLEInt(int i) throws IOException{

        if( pageInit[i]==false){
          //   deltaBianryBitPackingInt[i] = new DeltaBinaryPackingValuesWriter(blockSize, miniBlockNum, pagesizes[i]/4);
          dictionaryBitPackingRLEZigZarInt[i]= new PlainIntegerDictionaryValuesWriter(Integer.MAX_VALUE, pagesizes[i]);
          compressors[i].reset();
          pageInit[i]=true;
        }
        if(compressors[i].dataOffset + compressors[i].valueLen < compressors[i].pageCapacity){
          dictionaryBitPackingRLEZigZarInt[i].writeInteger(((Integer)(rows[i].getValue(0))).intValue());
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
        }
        else{
          dictionaryBitPackingRLEZigZarInt[i].writeInteger(((Integer)(rows[i].getValue(0))).intValue());
          compressors[i].numPairs++;
          compressors[i].dataOffset += compressors[i].valueLen;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
          BytesInput sbi= dictionaryBitPackingRLEZigZarInt[i].getBytes();
          byte[]   dictionaryBuffer=dictionaryBitPackingRLEZigZarInt[i].getDictionaryBuffer();
          byte[]    dictionaryID=sbi.getBufferSize() ;
          DataOutputBuffer  dob=new DataOutputBuffer() ;
          dob.writeInt(dictionaryBuffer.length);
          dob.write(dictionaryBuffer, 0, dictionaryBuffer.length);
          dob.write(dictionaryID, 0, dictionaryID.length);
          byte[] pageBytes=dob.getData() ;
          dob.close();
          vps[i].data=pageBytes ;
          vps[i].offset = 0;
          vps[i].length= pageBytes.length;
          try {
            compressors[i].appendPage(vps[i]);
          } catch (IOException e) {
            e.printStackTrace();
          }
          try {
            outputPage(i);
          } catch (IOException e) {
            e.printStackTrace();
          }
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          LOG.info("dataoffset  "+compressors[i].dataOffset);
          LOG.info("numPairs  "+compressors[i].numPairs);
          LOG.info("startPos  "+compressors[i].startPos);
          //  pms[i].numPairs++;
          pageInit[i]=false;
        }
      }
      public  void dictionaryBitPackingRLELong(int i) throws UnsupportedEncodingException{
        throw  new  UnsupportedEncodingException("WangMeng  has  not  implement  this  method   on 03.27.2014  yet");
      }

      public   void append(Writable val) throws IOException {
        RowMap  rowMap = (RowMap) val;
        for (int i = 0; i < numClusters; i++) {
          rows[i]=rowMap.row[i];
          if(codings[i]!=CodingType.MV){
            switch   (codings[i]){
            case  RunLengthEncodingByte: runLengthEncodingByte(i); break ;
            case  RunLengthEncodingInt: runLengthEncodingInt(i); break ;
            case  RunLengthEncodingLong: runLengthEncodingLong(i); break ;
            case  DeltaBinaryArrayZigZarByte :deltaBinaryArrayZigZarByte(i); break ;
            case DeltaBinaryBitPackingZigZarInt :deltaBinaryBitPackingZigZarInt(i); break ;
            case DeltaBinaryBitPackingZigZarLong :deltaBinaryBitPackingZigZarLong(i); break ;
            case DictionaryBitPackingRLEByte:dictionaryBitPackingRLEByte(i);break ;
            case DictionaryBitPackingRLEInt:dictionaryBitPackingRLEInt(i);break ;
            case DictionaryBitPackingRLELong:dictionaryBitPackingRLELong(i);break ;
            case DeltaBinaryPackingString:deltaBinaryPackingString(i);break ;
            case RedBlackTreeString:redBlackTreeString(i);break ;
            default :  throw  new  UnsupportedEncodingException()  ;
            }
          }
          else {
            vps[i].data = accessors[i].serialize(rows[i], tmpLength);
            vps[i].offset = 0;
            vps[i].length = tmpLength[0];
            vps[i].pos = pms[i].startPos + pms[i].numPairs;
            while (!compressors[i].append(vps[i])) {//bytesLeft < )
    //          System.out.println(".............................................................................    "+rows[i].getValue(0)+compressors[i].bytesLeft+"  "+(vps[i].length + Bytes.SIZEOF_INT));
              outputPage(i);
            }
            // update page meta
            if (maxs[i] == null || mins[i] == null) {
              maxs[i] = rows[i].duplicate();
              mins[i] = rows[i].duplicate();
            } else {
              rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
            }
            pms[i].numPairs++;
          }
          // ///very important ,jude whether write to disk or not
          if ((sgementSize>SegmentSize) && i == (numClusters - 1)  ) {
            SegmentMeta();
            passValue(segId, clusterValue);
            outputStream.flush();
            segId = new PageId();
            //  segId.setSegmentId(Math.random() * 100 + "");
            pageIdCount++ ;
            segId.setSegmentId(""+ pageIdCount);
            //            for(int l=0 ;l< clusterValue.size();l++){
            //              //          for(int j=0;j<clusterValue.get(i).size();j++){
            //              //            clusterValue.get(i).get(j).
            //              //          }
            //              clusterValue.get(l).clear();
            //            }

            //      clusterValue.clear();
            //   segmentValue.clear();
            sgementSize = 0;
            isInit=false ;
          }
        }
        //       rowMap=null ;
        //      val=null;
      }

      private void  redBlackTreeString(int i) throws IOException {
        if( pageInit[i]==false){
          System.out.println("compressors[i].bytesLeft  "+compressors[i].bytesLeft);
          oRCStringEcnodingUtil[i]=new  ORCStringEcnodingUtil() ;
          compressors[i].reset();
          pageInit[i]=true;
        }
        if(firstNumber[i]==true){
     //     System.out.println("1545 run  one time ..............................................................."+str[i]+"  "+compressors[i].numPairs+"   "+ compressors[i].dataOffset);
          deltaByteArrayStringWriter[i].writeBytes(Binary.fromString(str[i]));
          compressors[i].numPairs++;
          compressors[i].dataOffset += str[i].length();
          compressors[i].index.writeInt(compressors[i].dataOffset);
          compressors[i].bytesLeft -= str[i].length() ;
          compressors[i].bytesLeft -= Bytes.SIZEOF_INT;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          firstNumber[i]=false;
        }
        //   if (bytesLeft < pair.length + Bytes.SIZEOF_INT)
        String  str=((String)(rows[i].getValue(0))) ;
        if(compressors[i].bytesLeft>=( Bytes.SIZEOF_INT+str.length()+2)){
          //deltaByteArrayStringWriter[i].writeBytes(Binary.fromString(str));
          oRCStringEcnodingUtil[i].add(str);
          compressors[i].numPairs++;
          compressors[i].dataOffset += str.length()+2;
          //compressors[i].index.writeInt(compressors[i].dataOffset);
          compressors[i].bytesLeft -= str.length()+2 ;
          compressors[i].bytesLeft -= Bytes.SIZEOF_INT;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }

          pms[i].numPairs++;
        }
        else{
          firstNumber[i]=true;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
          oRCStringEcnodingUtil[i].dictionarySize= oRCStringEcnodingUtil[i].dictionary.size();
          oRCStringEcnodingUtil[i].dumpOrder=new int[ oRCStringEcnodingUtil[i].dictionary.size()] ;
          oRCStringEcnodingUtil[i].init();
          oRCStringEcnodingUtil[i].iterator();
          oRCStringEcnodingUtil[i].rowoutPut() ;
          oRCStringEcnodingUtil[i].flush();
          DataOutputBuffer  dob=new DataOutputBuffer() ;
          dob.writeInt( oRCStringEcnodingUtil[i].dictionarySize);
          int  length=oRCStringEcnodingUtil[i].collect1.buffer.size() ;
          ByteBuffer inBuf = ByteBuffer.allocate( length);
          oRCStringEcnodingUtil[i].collect1.buffer.setByteBuffer(inBuf, 0, length);
          inBuf.flip();
          dob.writeInt(length);
          dob.write(inBuf.array(), 0, length);
          inBuf.clear() ;
          length=oRCStringEcnodingUtil[i].collect2.buffer.size() ;
          oRCStringEcnodingUtil[i].collect2.buffer.setByteBuffer(inBuf, 0, length);
          inBuf.flip();
          dob.writeInt(length);
          dob.write(inBuf.array(), 0, length);
          inBuf.clear() ;
          length=oRCStringEcnodingUtil[i].collect3.buffer.size() ;
          oRCStringEcnodingUtil[i].collect3.buffer.setByteBuffer(inBuf, 0, length);
          inBuf.flip();
          dob.writeInt(length);
          dob.write(inBuf.array(), 0, length);
          inBuf.clear() ;
          vps[i].data=dob.getData() ;
          vps[i].offset = 0;
          vps[i].length= vps[i].data.length ;
          compressors[i].appendPage(vps[i]);
          outputPage(i);
          LOG.info("dataoffset  "+compressors[i].dataOffset);
          LOG.info("numPairs  "+compressors[i].numPairs);
          LOG.info("startPos  "+compressors[i].startPos);
          pageInit[i]=false;
        }
      }
      private void deltaBinaryPackingString(int i) throws IOException {
        if( pageInit[i]==false){
          //     System.out.println("compressors[i].bytesLeft  "+compressors[i].bytesLeft);
          deltaByteArrayStringWriter[i]=new DeltaByteArrayWriter(64*1024) ;
          // fixedEncoding=  (FixedLenEncoder)compressors[i]   ;
          compressors[i].reset();
          pageInit[i]=true;
        }
        str[i]=((String)(rows[i].getValue(0))) ;
        if(compressors[i].bytesLeft-100>=( Bytes.SIZEOF_INT+str[i].length()+2)){
          deltaByteArrayStringWriter[i].writeBytes(Binary.fromString(str[i]));
          compressors[i].numPairs++;
          compressors[i].dataOffset += str[i].length()+2;
          compressors[i].bytesLeft -= str[i].length()+2 ;
          compressors[i].bytesLeft -= Bytes.SIZEOF_INT;
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
        }
        else{
          deltaByteArrayStringWriter[i].writeBytes(Binary.fromString(str[i]));
          compressors[i].numPairs++;
          compressors[i].dataOffset += str[i].length();
          compressors[i].index.writeInt(compressors[i].dataOffset);
          compressors[i].bytesLeft -= str[i].length() ;
          compressors[i].bytesLeft -= Bytes.SIZEOF_INT;
          firstNumber[i]=true;
      //    System.out.println("1579 ....................................  " +" firstNumber[i]  "+ firstNumber[i]+"  "+ str[i]+"  "+( Bytes.SIZEOF_INT+str[i].length()+2)+"    "+compressors[i].bytesLeft);
          if (maxs[i] == null || mins[i] == null) {
            maxs[i] = rows[i].duplicate();
            mins[i] = rows[i].duplicate();
          } else {
            rows[i].compareAndSetMaxMin(maxs[i], mins[i]);
          }
          pms[i].numPairs++;
          vps[i].data=deltaByteArrayStringWriter[i].getBytes().toByteArray();
          vps[i].offset = 0;
          vps[i].length= vps[i].data.length ;
          compressors[i].appendPage(vps[i]);
          outputPage(i);
          LOG.info("dataoffset  "+compressors[i].dataOffset);
          LOG.info("numPairs  "+compressors[i].numPairs);
          LOG.info("startPos  "+compressors[i].startPos);
          //  pms[i].numPairs++;
          pageInit[i]=false;
        }

      }

      void close() throws IOException {
        SegmentMeta();
        passValue(segId, clusterValue);
        for (int k = 0; k < numClusters; k++) {
          pageIds[k] = 0;
          pms[k].startPos = 0;
          pms[k].numPairs = 0;
          maxs[k] = mins[k] = null;
        }
        segId = new PageId();
        segId.setSegmentId(Math.random() * 100 + "");
        clusterValue.clear();
        // segmentValue.clear();
        sgementSize = 0;
      }
    }
    // FileSystem stream to write on.
    private FSDataOutputStream outputStream;
    // True if we opened the <code>outputStream</code> (and so will close it).
    private boolean closeOutputStream;
    // Name for this object used when logging or in toString. Is either
    // the result of a toString on stream or else toString of passed file Path.
    private String name;
    private boolean TabConfiInit = false;
    private final  ArrayList<Long> segOffsets = new ArrayList<Long>();
    private final   ArrayList<Long> segLengths = new ArrayList<Long>();
    private final  ArrayList<Long> segPMSOffsets = new ArrayList<Long>();
    // Segment Meta
    private PageMeta[] curSegMetas;
    private final List<PageMeta[]> segMetasList = new ArrayList<PageMeta[]>();
    // Page Meta Section
    private   PageMetaSection pms;
    private  PageMetaList[] pageMetaLists;
    private List<PageMeta> curPageMetaList;
    private List<Long> curPMOffsetList;
    private  TabConfig tabConfig;
    // Cluster Offset in current segment
    private long[] clusterOffsetInCurSegment;
    // May be null if we were passed a stream.
    private Path path = null;
    private Path tmpPath=null ;
    private Path tmpoutputPath=null ;
    private int curClusterIdx;
    private int curSegIdx;
    private int numClusters;
    private final boolean withPageMeta;
    private FileSystem fs =null ;

    /**
     * Constructor that takes a Path.
     *
     * @param fs
     * @param path
     * @param columns
     * @throws IOException
     */
    public Writer(FileSystem fs, Path tmpPath, Path path, Path tmpoutputPath,
        List<List<DataType>> columns)
            throws IOException {
      this(fs, tmpPath, path, tmpoutputPath, columns, true);
    }

    /**
     * Constructor that takes a Path
     *
     * @param fs
     * @param path
     * @param columns
     * @param withPageMeta
     * @throws IOException
     */
    public Writer(FileSystem fs, Path tmpPath, Path path, Path finalOutPath,
        List<List<DataType>> columns,
        boolean withPageMeta)
            throws IOException {
      this(fs.create(tmpPath, true, fs.getConf().getInt("io.file.buffer.size", 4096),
          fs.getDefaultReplication(), fs.getDefaultBlockSize()), columns, withPageMeta);
      fs.setVerifyChecksum(true);
      closeOutputStream = true;
      name = path.toString();
      this.path = path;
      this.tmpPath = tmpPath;
      this.fs = fs;
      this.tmpoutputPath = tmpoutputPath;
    }

    /**
     * Constructor that takes a stream.
     *
     * @param ostream
     *          Stream to use.
     * @param columns
     * @param withPageMeta
     * @throws IOException
     */
    public Writer(final FSDataOutputStream ostream, List<List<DataType>> columns,
        boolean withPageMeta)
            throws IOException {
      // LOG.debug("create a writer...");
      this.outputStream = ostream;
      this.closeOutputStream = false;
      this.name = this.outputStream.toString();
      this.withPageMeta = withPageMeta;
      init(columns);
    }

    private void init(List<List<DataType>> columns) {
      if (columns != null) {
        numClusters = columns.size();
        // LOG.debug("Init " + numClusters + " clusters.");

        pms = new PageMetaSection(withPageMeta);
        pageMetaLists = new PageMetaList[numClusters];
        for (int i = 0; i < numClusters; i++) {
          pageMetaLists[i] = new PageMetaList(withPageMeta);
          pageMetaLists[i].setMetaList(new ArrayList<PageMeta>(),
              new ArrayList<Long>());
        }

        clusterOffsetInCurSegment = new long[numClusters];
      }

      curSegIdx = 0;
    }

    public void append(Writable r) throws IOException {
      if (TabConfiInit == false) {
        tabConfig = new TabConfig();
        tabConfig.configure(SerializeUtil.jc, SerializeUtil.tableProperties);
        TabConfiInit = true;
        tabConfig.append(r);
      } else {
        tabConfig.append(r);
      }
    }

    private void resetPageMetaSection() {
      for (PageMetaList pageMetaList : pageMetaLists) {
        pageMetaList.getMetaList().clear();
        pageMetaList.getOffsetList().clear();
      }
    }

    public void addSegmentMeta(int clusterId, final PageMeta pm) {
      curSegMetas[clusterId] = pm;
    }

    public void beginSegment() throws IOException {
      LOG.info("Begin a new Segment from position " + outputStream.getPos());
      // And the segment start offset;
      segOffsets.add(outputStream.getPos());
      // reinit the arguments of a segment
      curClusterIdx = 0;
      if (withPageMeta) {
        curSegMetas = new PageMeta[numClusters];
      }
    }

    public  void finishSegment() throws IOException {
      LOG.info("Finish data section in a Segment at position " + outputStream.getPos());
      segPMSOffsets.add(outputStream.getPos());
      // write the pagemeta section
      LOG.info("And write its page meta section.");
      pms.setPageMetaLists(pageMetaLists);
      pms.write(outputStream);
      // write the cluter index
      LOG.info("And write its clusters' offsets.");
      for (long l : clusterOffsetInCurSegment) {
        outputStream.writeLong(l);
      }
      // record the length of the segment
      long length = outputStream.getPos() - segOffsets.get(curSegIdx);
      LOG.info("This segment  length is " + length);
      segLengths.add(length);

      if (withPageMeta) {
        segMetasList.add(curSegMetas);
        curSegMetas = null;
      }
      resetPageMetaSection();
      pageMetaLists = new PageMetaList[numClusters];
      for (int i = 0; i < numClusters; i++) {
        pageMetaLists[i] = new PageMetaList(withPageMeta);
        pageMetaLists[i].setMetaList(new ArrayList<PageMeta>(),
            new ArrayList<Long>());
      }
      curSegIdx++;
    }
    /**
     * @return Path or null if we were passed a stream rather than a Path.
     */
    public Path getPath() {
      return this.path;
    }

    @Override
    public String toString() {
      return "writer=" + this.name;
    }

    public void beginCluster() throws IOException {
      LOG.info("Begin a new Cluster from position " + outputStream.getPos());

      curPageMetaList = this.pageMetaLists[curClusterIdx].getMetaList();
      curPMOffsetList = this.pageMetaLists[curClusterIdx].getOffsetList();

      this.clusterOffsetInCurSegment[curClusterIdx] = outputStream.getPos();
    }

    public void finishCluster() {
      LOG.info("Finish Cluster " + curClusterIdx + ".");
      LOG.info("Finish a Cluster while writing " + curPageMetaList.size() + " pages.");
      curClusterIdx++;
    }

    public void Segappend(final byte[] page, final int offset,
        final int length, final PageMeta pm) throws IOException {
      curPMOffsetList.add(outputStream.getPos());
      outputStream.write(page, offset, length);
      curPageMetaList.add(pm);
    }


    public void fileClose() throws IOException {
      // outputStream.flush();
      // outputStream.sync();
      if (outputStream == null) {
        return;
      }

      LOG.info("Finish the segment file by writing its segments' index at position "
          + outputStream.getPos() + " .");
      LOG.info("Total segments are " + curSegIdx);
      long segIdxOffset = outputStream.getPos();
      // Write out the segment index
      for (int i = 0; i < curSegIdx; i++) {
        outputStream.writeLong(segOffsets.get(i));
        outputStream.writeLong(segPMSOffsets.get(i));
        outputStream.writeLong(segLengths.get(i));
        if (withPageMeta) {
          for (int j = 0; j < numClusters; j++) {
            // segMetasList.get(i)[j].write(outputStream);
            PageMeta pageMeta = segMetasList.get(i)[j];
            pageMeta.write(outputStream);
          }
        }
      }
      outputStream.writeInt(numClusters);
      outputStream.writeInt(curSegIdx);
      outputStream.writeLong(segIdxOffset);
      LOG.info("Finished @ position " + outputStream.getPos());
      // if (this.closeOutputStream) {
      // LOG.info("fs.size"+fs.getLength(path));
      // LOG.info("fs.name"+fs.getName());
      // LOG.info("fs.backup"+fs.getReplication(path));
      // LOG.info("fs.status"+fs.getFileStatus(path));
      // // LOG.info(""+fs.setVerifyChecksum(verifyChecksum););
      // fs.setVerifyChecksum(true);
      // fs.printStatistics();
      outputStream.close();
      // LOG.info("fs.size"+fs.getLength(path));
      // LOG.info("fs.name"+fs.getName());
      // LOG.info("fs.backup"+fs.getReplication(path));
      // LOG.info("fs.status"+fs.getFileStatus(path));
      outputStream = null;
      // DistributedFileSystem dfs=(DistributedFileSystem)fs ;
      if (fs.exists(path)) {
        fs.delete(path, true);
      }
      fs.rename(tmpPath, path);
      // fs.create(path);
      // dfs.moveToLocalFile(tmpPath,finalOutPath);
      fs.delete(tmpPath, true);
      // fs.delete(tmpoutputPath, true);
      // fs.MoveTask
    }

    @Override
    public synchronized void close() throws IOException {
      tabConfig.close();
      fileClose();
    }
  }

  public static class SegmentIndexRef {
    public int numSegs;
    public int numClusters;
    public long[] segOffsets;
    public long[] segPMSOffsets;
    public long[] segLengths;
    public PageMeta[][] segMetas;
  }

  /**
   * Segment Index Reader. (read in the segment index for M/R splitting.)
   */
  public static class SegmentIndexReader implements Closeable {

    long segIndexOffset;

    SegmentIndexRef ref = new SegmentIndexRef();

    // Stream to read from
    private FSDataInputStream istream = null;
    private final long fileSize;

    private final boolean withPageMeta;

    public SegmentIndexReader(FileSystem fs, Path path) throws IOException {
      this(fs, path, true);
    }

    public SegmentIndexReader(FileSystem fs, Path path, boolean withPM)
        throws IOException {
      istream = fs.open(path);
      fileSize = fs.getFileStatus(path).getLen();
      this.withPageMeta = withPM;
      LOG.info("Open Segment File " + path + " : file length is " + fileSize
          + " , with page meta : " + withPageMeta);
    }

    public synchronized void readSegIndex() throws IOException {
      istream.seek(fileSize - 2 * Bytes.SIZEOF_INT - Bytes.SIZEOF_LONG);
      ref.numClusters = istream.readInt();
      ref.numSegs = istream.readInt();
      segIndexOffset = istream.readLong();

      LOG.info("Trying to read " + ref.numSegs + " segments at position " + segIndexOffset);
      ref.segOffsets = new long[ref.numSegs];
      ref.segPMSOffsets = new long[ref.numSegs];
      ref.segLengths = new long[ref.numSegs];
      if (withPageMeta) {
        ref.segMetas = new PageMeta[ref.numSegs][];
      }

      istream.seek(segIndexOffset);

      for (int i = 0; i < ref.numSegs; i++) {
        ref.segOffsets[i] = istream.readLong();
        ref.segPMSOffsets[i] = istream.readLong();
        ref.segLengths[i] = istream.readLong();
        if (withPageMeta) {
          ref.segMetas[i] = new PageMeta[ref.numClusters];
          for (int j = 0; j < ref.numClusters; j++) {
            ref.segMetas[i][j] = new PageMeta();
            ref.segMetas[i][j].readFields(istream);
          }
        }
      }
    }

    public synchronized long[] getSegOffsets() {
      return ref.segOffsets;
    }

    public synchronized long[] getSegPMSOffsets() {
      return ref.segPMSOffsets;
    }

    public synchronized long[] getSegLengths() {
      return ref.segLengths;
    }

    public synchronized PageMeta[][] getSegMetas() {
      return ref.segMetas;
    }

    public synchronized int getNumSegs() {
      return ref.numSegs;
    }

    public synchronized SegmentIndexRef getRef() {
      return ref;
    }

    @Override
    public synchronized void close() throws IOException {
      if (istream == null) {
        return;
      }

      istream.close();
      istream = null;
    }

  }

  /**
   * Counters to calculate read pages
   */
  public static enum SegmentReadPageCounter {
    CLUSTER1, CLUSTER2, CLUSTER3, CLUSTER4, CLUSTER5, CLUSTER6, CLUSTER7,
    CLUSTER8, CLUSTER9, CLUSTER10, CLUSTER11, CLUSTER12, CLUSTER13,
    CLUSTER14, CLUSTER15, CLUSTER16, OTHERCLUSTERS
  }

  /**
   * Counters to calculate skipped pages
   */
  public static enum SegmentSkippedPageCounter {
    CLUSTER1, CLUSTER2, CLUSTER3, CLUSTER4, CLUSTER5, CLUSTER6, CLUSTER7,
    CLUSTER8, CLUSTER9, CLUSTER10, CLUSTER11, CLUSTER12, CLUSTER13,
    CLUSTER14, CLUSTER15, CLUSTER16, OTHERCLUSTERS
  }

  /**
   * Counters to calculate the cache hits
   */
  public static enum SegmentCacheHitCounter {
    CLUSTER1, CLUSTER2, CLUSTER3, CLUSTER4, CLUSTER5, CLUSTER6, CLUSTER7,
    CLUSTER8, CLUSTER9, CLUSTER10, CLUSTER11, CLUSTER12, CLUSTER13,
    CLUSTER14, CLUSTER15, CLUSTER16, OTHERCLUSTERS
  }

  /** Counters to calculate the position seek times */
  public static enum SegmentPosSeekCounter {
    MOVEON, MOVEBACK
  }

  /**
   * <p>
   * SegmentReader is used to read a segment. It wasn't used to read the actual data. A
   * <i>SegmentReader</i> is used to process the segment page meta section and generated the related
   * <i> ClusterReader</i>s to read the actual data.
   * </p>
   *
   */
  public static class SegmentReader implements Closeable {

    public static enum READMODE {
      /**
       * During point query, we use a shared global lru page cache
       * to reduce the overhead of random access.
       */
      POINTQUERY,
      /**
       * During m/r scan query, we use a simple queue-based page cache
       * to reduce the overhead of random access in a segment split.
       */
      MR
    }

    //
    Configuration conf;

    // stream to read in
    private final FSDataInputStream istream;

    // read in the page meta section
    PageMetaSection pms = null;

    // Segment information
    long segmentOffset;
    long segmentLength;
    long segmentPMSOffset;

    // Number of Clusters
    int numClusters;
    long[] clusterOffsets;

    // Cache Pool
    // pcp just used in m/r mode
    PageCache[] pcp;
    // pagecache used in POINTQUERY mode
    BlockCache pagecache;

    // Statistics of Cache Pool
    int pageLoads;
    int cacheHits;

    Map<Integer, ScanMode[]> scanMap = null;

    Reporter reporter = null;

    boolean withPageMeta;

    READMODE mode;
    int segId;

    /**
     * Create the Segment Reader.
     *
     * @param fs
     *          which file system that store the segment file
     * @param file
     *          the path of the segment file
     * @param buffersize
     *          the length of the buffer size to read the data
     * @param segmentOffset
     *          the file offset of the segment in the segment file
     * @param segmentLength
     *          the length of the segment
     * @param segmentPMSOffset
     *          the file offset of the page meta section of the segment
     * @throws IOException
     */
    public SegmentReader(Configuration conf, FileSystem fs, Path file, int segId, int buffersize,
        long segmentOffset, long segmentLength, long segmentPMSOffset) throws IOException {
      this(conf, fs, file, segId, buffersize, segmentOffset, segmentLength, segmentPMSOffset, true,
          READMODE.MR);
    }

    public SegmentReader(Configuration conf, FileSystem fs, Path file, int segId, int buffersize,
        long segmentOffset, long segmentLength, long segmentPMSOffset,
        boolean withPageMeta) throws IOException {
      this(conf, fs, file, segId, buffersize, segmentOffset, segmentLength, segmentPMSOffset,
          withPageMeta,
          READMODE.MR);
    }

    /**
     * Create the Segment Reader
     *
     * @param fs
     *          which file system that store the segment file
     * @param file
     *          the path of the segment file
     * @param buffersize
     *          the length of the buffer size to read the data
     * @param segmentOffset
     *          the file offset of the segment in the segment file
     * @param segmentLength
     *          the length of the segment
     * @param segmentPMSOffset
     *          the file offset of the page meta section of the segment
     * @param withPageMeta
     *          do we need to read the max/min page meta
     * @throws IOException
     */
    public SegmentReader(Configuration conf, FileSystem fs, Path file, int segId, int buffersize,
        long segmentOffset, long segmentLength, long segmentPMSOffset,
        boolean withPageMeta, READMODE readMode) throws IOException {
      this.conf = conf;
      this.istream = fs.open(file, buffersize);

      this.segmentOffset = segmentOffset;
      this.segmentLength = segmentLength;
      this.segmentPMSOffset = segmentPMSOffset;
      this.withPageMeta = withPageMeta;

      this.mode = readMode;
      this.segId = segId;
    }

    /**
     * Init a M/R repoter, so we can collect the statistics of the activities
     * of a segment reader during query processing
     *
     * @param reporter
     *          the M/R reporter
     */
    public synchronized void initReporter(Reporter reporter) {
      this.reporter = reporter;
    }

    /**
     * Load the page meta section.
     * Note: this methods need to be called before any other activity
     *
     * @throws IOException
     */
    public synchronized void loadPMS() throws IOException {
      // move to the offset of page meta section
      istream.seek(segmentPMSOffset);
      pms = new PageMetaSection(withPageMeta);
      pms.readFields(istream);

      // we get the page meta section, we know the number of clusters
      numClusters = pms.getPageMetaLists().length;

      LOG.info("Load page meta section with " + numClusters + " clusters.");

      if (mode == READMODE.MR) {
        pcp = new PageCache[numClusters];
        for (int i = 0; i < numClusters; i++) {
          pcp[i] = new SimplePageCache();
        }
      } else if (mode == READMODE.POINTQUERY) {
        pagecache = getBlockCache(conf);
      }

      // read in the cluster offsets
      clusterOffsets = new long[numClusters];
      for (int i = 0; i < numClusters; i++) {
        clusterOffsets[i] = istream.readLong();
      }
    }

    public synchronized void buildScanMap(ExprDesc expr, ClusterAccessor[] accessors) {
      if (pms == null) {
        return;
      }
      pms.setClusterAccessors(accessors);

      this.scanMap = pms.computeScanMap(expr);
    }

    /**
     * Clear the scan map of the last query. So the segment reader can be re-used
     * in the following query processing.
     */
    public synchronized void clearScanMap() {
      scanMap = null;
    }

    /**
     * Create a cluster reader to read the actual data.
     *
     * @param clusterId
     *          which cluster to read data
     * @param cachePage
     *          do we need to cache the page in the buffer
     * @return cluster reader
     */
    public synchronized ClusterReader newClusterReader(int clusterId, boolean cachePage) {
      long clusterLength;
      if (clusterId == numClusters - 1) {
        clusterLength = segmentPMSOffset - clusterOffsets[clusterId];
      } else {
        clusterLength = clusterOffsets[clusterId + 1] - clusterOffsets[clusterId];
      }
      ScanMode[] modes = null;
      if (scanMap != null) {
        modes = scanMap.get(clusterId);
      }
      return new ClusterReader(this, clusterId, clusterOffsets[clusterId],
          clusterLength, pms.getPageMetaLists()[clusterId], modes, cachePage);
    }

    /**
     * Read a page of clusterId at position
     *
     * @param clusterId
     * @param position
     * @return
     */

    //    DynamicByteArray  dynamicBuffer = new DynamicByteArray();
    //    dynamicBuffer.add(bytes, 0, bytes.length);
    //
    //    ByteBuffer inBuf = ByteBuffer.allocate(dynamicBuffer.size());
    //    //  System.out.println("56  "+inBuf.getInt());
    //    dynamicBuffer.setByteBuffer(inBuf, 0, dynamicBuffer.size());
    //
    //    inBuf.flip();
    //
    //    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
    //        ("test", inBuf, codec, (int)readfile.length()), true);
    //    //  int  count=0 ;
    //    int[]  result=new  int[fileLong] ;
    //    for(int i=0; i < fileLong; ++i) {
    //
    //      result[i]= (int) in.next();
    //      count ++ ;
    //    }
    ////    inBuf.clear();
    public synchronized ByteBuffer readPage(int clusterId, int position, boolean cachePage)
        throws IOException {
      PageMetaList pmList = pms.getPageMetaLists()[clusterId];
      int pageId = Utils.findTargetPos(pmList.getMetaList(), 0, pmList.getMetaList().size() - 1,
          position);
      if (pageId < 0) {
        throw new IOException("No page in segment " + this.segId + " contains position " + position);
      }
      long offset = pmList.getOffsetList().get(pageId);
      long length;
      if (pageId == pmList.getOffsetList().size() - 1) {
        long clusterLength;
        if (clusterId == numClusters - 1) {
          clusterLength = segmentPMSOffset - clusterOffsets[clusterId];
        } else {
          clusterLength = clusterOffsets[clusterId + 1] - clusterOffsets[clusterId];
        }
        length = clusterOffsets[clusterId] + clusterLength - offset;
      } else {
        length = pmList.getOffsetList().get(pageId + 1) - offset;
      }
      return readPage(clusterId, pageId, offset, length, cachePage);
    }

    /**
     * Read in a file page.
     *
     * @param clusterId
     *          which cluster to read
     * @param pageId
     *          which page to read
     * @param cachePage
     *          need to cache a page?
     * @return Block wrapped in a ByteBuffer.
     * @throws IOException
     */
    synchronized ByteBuffer readPage(int clusterId, int pageId, long offset, long length,
        boolean cachePage)
            throws IOException {
      pageLoads++;
      ByteBuffer cachedPage = null;
      if (mode == READMODE.MR) {
        cachedPage = pcp[clusterId].getPage(pageId);
      } else if (mode == READMODE.POINTQUERY) {
        cachedPage = pagecache.getBlock(makePageName(segId, clusterId, pageId));
      }
      if (cachedPage != null) {
        cacheHits++;
        if (reporter != null) {
          if (clusterId < 16) {
            reporter.incrCounter(SegmentCacheHitCounter.values()[clusterId], 1);
          } else {
            reporter.incrCounter(SegmentCacheHitCounter.OTHERCLUSTERS, 1);
          }
        }
        return cachedPage.duplicate();
      }

      /**
       * Report the related activities to M/R frameworks, so we will know
       * what happened during query processing
       */
      if (reporter != null) {
        if (clusterId < 16) {
          reporter.incrCounter(SegmentReadPageCounter.values()[clusterId], 1);
        } else {
          reporter.incrCounter(SegmentReadPageCounter.OTHERCLUSTERS, 1);
        }

        if (istream.getPos() - offset > 0) {
          reporter.incrCounter(SegmentPosSeekCounter.MOVEBACK, 1);
        } else {
          reporter.incrCounter(SegmentPosSeekCounter.MOVEON, 1);
        }
      }

      // ByteBuffer buf = ByteBuffer.allocate(longToInt(length));
      // istream.readFully(offset, buf.array());

      // Read the page from filesystem
      InputStream is = new BoundedRangeFileInputStream(istream, offset, length);
      ByteBuffer buf = ByteBuffer.allocate(longToInt(length));
      IOUtils.readFully(is, buf.array(), 0, buf.capacity());
      is.close();

      if (cachePage) {
        if (mode == READMODE.MR) {
          pcp[clusterId].cachePage(pageId, buf.duplicate());
        } else if (mode == READMODE.POINTQUERY) {
          pagecache.cacheBlock(makePageName(segId, clusterId, pageId), buf.duplicate(), true);
        }
      }
      return buf;
    }


    @Override
    public synchronized void close() throws IOException {
      if (istream != null) {
        istream.close();
      }
    }

  }

  /**
   * A <i>ClusterReader</i> is used to handle the read action
   * of a specified cluster.
   */
  public static class ClusterReader {

    private final SegmentReader sr;

    private final int clusterId;
    private final long clusterOffset;
    private final long clusterLength;
    private final List<PageMeta> pmList;
    private final List<Long> poList;
    private final ScanMode[] scanmode;
    private boolean cachePage;

    int curPageId;
    int numPages;

    PosRLEChunk prb = new PosRLEChunk();

    Reporter reporter = null;

    /**
     * ClusterReader Constructor
     *
     * @param sr
     * @param clusterId
     *          which cluster to read
     * @param clusterOffset
     *          the file offset of the cluster
     * @param clusterLength
     *          the length of the cluster
     * @param pml
     *          the pagemeta list of the cluster
     * @param sm
     *          the scan mode of the query
     * @param cachePage
     *          do we need to cache a page for random access
     */
    public ClusterReader(SegmentReader sr, int clusterId,
        long clusterOffset, long clusterLength, PageMetaList pml,
        ScanMode[] sm, boolean cachePage) {
      // System.out.println("Open reader for cluster " + clusterId + " offset " + clusterOffset +
      // " length " + clusterLength);
      this.sr = sr;
      this.clusterId = clusterId;
      this.clusterOffset = clusterOffset;
      this.clusterLength = clusterLength;
      this.pmList = pml.getMetaList();
      this.poList = pml.getOffsetList();
      this.cachePage = cachePage;
      scanmode = sm;
      numPages = poList.size();

      curPageId = 0;
    }

    public synchronized void setCachePage(boolean cachePage_) {
      this.cachePage = cachePage_;
    }

    public synchronized void initReporter(Reporter reporter) {
      this.reporter = reporter;
    }

    public synchronized int getNumPages() {
      return numPages;
    }

    public synchronized int getCurPageId() {
      return curPageId;
    }

    public synchronized boolean isPageCached() {
      return cachePage;
    }

    /**
     * Read a next page (sequencely)
     *
     * @return the next page data
     * @throws IOException
     */
    public synchronized byte[] nextPage() throws IOException {
      if (curPageId < numPages) {
        long offset = poList.get(curPageId);
        long length = curPageId == numPages - 1 ? clusterOffset + clusterLength - offset :
          poList.get(curPageId + 1) - offset;
        System.out.println("1567   byte[] nextPage()  length  "+length+"   clusterId  "+clusterId+"  curPageId  "+curPageId+"  offset  "+offset);
        byte[] page = sr.readPage(clusterId, curPageId, offset, length, cachePage).array();
        curPageId++;
        return page;
      } else {
        return null;
      }
    }

    /**
     * @deprecated
     * @param predicate
     * @return the page data
     * @throws IOException
     */
    @Deprecated
    public synchronized byte[] nextPage(Predicate predicate) throws IOException {
      return null;
    }

    /**
     * Skip to the page thats contain the target position.
     * It is useful during position filtering.
     *
     * @param pos
     *          position
     * @return the page data
     * @throws IOException
     */
    public synchronized byte[] skipToPosAndGetPage(int pos) throws IOException {
      // System.err.println("[SegmentFile]skipToPosAndGetPage : skip to pos " + pos +
      // " to read a page.");

      if (curPageId >= numPages) {
        return null;
      }

      int skipToIdx = Utils.findTargetPos(pmList, curPageId, numPages - 1, pos);
      if (skipToIdx >= 0) {
        if (reporter != null && skipToIdx > curPageId) {
          if (clusterId < 16) {
            reporter.incrCounter(SegmentSkippedPageCounter.values()[clusterId], skipToIdx
                - curPageId - 1);
          } else {
            reporter
            .incrCounter(SegmentSkippedPageCounter.OTHERCLUSTERS, skipToIdx - curPageId - 1);
          }
        }
        curPageId = skipToIdx;
      }
      return nextPage();
    }

    /**
     * Get the last position of the cluster
     *
     * @return last position
     */
    public synchronized int getLastPos() {
      PageMeta pm = pmList.get(numPages - 1);
      return pm.startPos + pm.numPairs - 1;
    }

    /**
     * <p>
     * Read in the next necessary page by predicate. (skip all the negative pages) <br>
     * 1) if the page is a rough page, return the rough page and its scan mode. <br>
     * 2) if the page is a positive page, return the first positive page and its scan mode, and we
     * also collect the continuous position range until we encounters a rough/negative page.
     * </p>
     *
     * <p>
     * this method is used in producing position chunks stream</i>
     *
     * @param modes
     *          the scan mode of the page(output)
     * @param blks
     *          the position range of the predicate pages(output)
     * @return the reference to the page
     * @throws IOException
     */
    public synchronized byte[] nextPredicatePagePos(ScanMode[] modes, PosChunk[] blks)
        throws IOException {
      if (scanmode == null) {
        modes[0] = ScanMode.Rough;
        if (curPageId < numPages) {
          PageMeta tmpPm = pmList.get(curPageId);
          prb.setTriple(null, tmpPm.startPos, tmpPm.numPairs);
          blks[0] = prb;
          return nextPage();
        } else {
          return null;
        }
      }

      byte[] page = null;

      // skip all the negative pages
      int prevPageId = curPageId;
      while (curPageId < numPages && scanmode[curPageId] == ScanMode.Negative) {
        curPageId++;
      }
      if (reporter != null) {
        if (clusterId < 16) {
          reporter.incrCounter(SegmentSkippedPageCounter.values()[clusterId], curPageId
              - prevPageId - 1);
        } else {
          reporter.incrCounter(SegmentSkippedPageCounter.OTHERCLUSTERS, curPageId - prevPageId - 1);
        }
      }

      if (curPageId < numPages) {
        if (scanmode[curPageId] == ScanMode.Rough) {
          modes[0] = ScanMode.Rough;
          PageMeta pm = pmList.get(curPageId);
          // System.err.println("Read page " + curPageId + " : start from " + pm.startPos +
          // ", numReps " + pm.numPairs);
          prb.setTriple(null, pm.startPos, pm.numPairs);
          blks[0] = prb;
          page = nextPage();
        } else if (scanmode[curPageId] == ScanMode.Positive) {
          modes[0] = ScanMode.Positive;

          PageMeta pm = pmList.get(curPageId);
          int startPos = pm.startPos;
          int numPairs = pm.numPairs;
          // System.err.println("Read page " + curPageId + " : start from " + startPos + ", numReps"
          // + numPairs);
          page = nextPage();
          while (curPageId < numPages &&
              scanmode[curPageId] == ScanMode.Positive) {
            pm = pmList.get(curPageId);
            numPairs += pm.numPairs;
            // System.err.println("Read page " + curPageId + " : start from " + pm.startPos +
            // ", numReps " + pm.numPairs);
            curPageId++;
          }
          // System.err.println("Return scan range : start from " + startPos + ", numReps " +
          // numPairs + ".");
          prb.setTriple(null, startPos, numPairs);
          blks[0] = prb;
        } else {
          assert (false);
        }
      }

      return page;
    }

    /**
     * Read in the next page by predicates
     *
     * @param modes
     *          the scan mode of the next page
     * @return the next page data
     * @throws IOException
     */
    public synchronized byte[] nextPredicatePageValue(ScanMode[] modes) throws IOException {
      if (scanmode == null) {
        modes[0] = ScanMode.Rough;
        return nextPage();
      }

      byte[] page = null;

      int prevPageId = curPageId;
      // skip all the negative pages
      while (curPageId < numPages && scanmode[curPageId] == ScanMode.Negative) {
        curPageId++;
      }
      if (reporter != null) {
        if (clusterId < 16) {
          reporter.incrCounter(SegmentSkippedPageCounter.values()[clusterId], curPageId
              - prevPageId - 1);
        } else {
          reporter.incrCounter(SegmentSkippedPageCounter.OTHERCLUSTERS, curPageId - prevPageId - 1);
        }
      }

      if (curPageId < numPages) {
        modes[0] = scanmode[curPageId];
        page = nextPage();
      }
      return page;
    }

  }

  // Utility methods.
  /*
   * @param l Long to convert to an int.
   *
   * @return <code>l</code> cast as an int.
   */
  static synchronized int longToInt(final long l) {
    // Expecting the size() of a block not exceeding 4GB. Assuming the
    // size() will wrap to negative integer if it exceeds 2GB (From tfile).
    return (int) (l & 0x00000000ffffffffL);
  }

  /**
   * Return the global block cache
   *
   * @param conf
   *          The current configuration
   * @return the block cache or null
   */
  public static synchronized BlockCache getBlockCache(Configuration conf) {
    if (globalPageCache != null) {
      return globalPageCache;
    }

    float cachePercentage = conf.getFloat(SEGFILE_CACHE_SIZE_KEY, 0.0f);
    if (cachePercentage == 0L) {
      return null;
    }
    if (cachePercentage > 1.0) {
      throw new IllegalArgumentException(SEGFILE_CACHE_SIZE_KEY +
          " must be between 0.0 and 1.0, not > 1.0");
    }

    // Calculate the amount of heap to give the heap
    MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
    long cacheSize = (long) (mu.getMax() * cachePercentage);
    LOG.info("Allocating LruPageCache with maximum size " +
        StringUtils.humanReadableInt(cacheSize));
    globalPageCache = new LruBlockCache(cacheSize,
        MastiffMapReduce.getTablePageSize(conf));

    return globalPageCache;
  }

  static synchronized String makePageName(int segId, int clusterId, int pageId) {
    StringBuilder sb = new StringBuilder();
    sb.append(segId).append('-').append(clusterId).append('-').append(pageId);
    return sb.toString();
  }
}