DataReaderWriter.java example

Explorer
Cloud-Stenography-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.data;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.backend.executionengine.ExecException;

/**
 * A class to handle reading and writing of intermediate results of data
 * types.  This class could also be used for storing permanent results.
 */
public class DataReaderWriter {
    private static TupleFactory mTupleFactory = TupleFactory.getInstance();
    private static BagFactory mBagFactory = BagFactory.getInstance();
    static final int UNSIGNED_SHORT_MAX = 65535;
    static final String UTF8 = "UTF-8";

    public static Tuple bytesToTuple(DataInput in) throws IOException {
        // Don't use Tuple.readFields, because it requires you to
        // create a tuple with no size and then append fields.
        // That's less efficient than allocating the tuple size up
        // front and then filling in the spaces.
        // Read the size.
        int sz = in.readInt();
        // if sz == 0, we construct an "empty" tuple -
        // presumably the writer wrote an empty tuple!
        if (sz < 0) {
            throw new IOException("Invalid size " + sz + " for a tuple");
        }
        Tuple t = mTupleFactory.newTuple(sz);
        for (int i = 0; i < sz; i++) {
            t.set(i, readDatum(in));
        }
        return t;

    }
    
    public static DataBag bytesToBag(DataInput in) throws IOException {
        DataBag bag = mBagFactory.newDefaultBag();
        bag.readFields(in);
        return bag;
    }
    
    public static Map<Object, Object> bytesToMap(DataInput in) throws IOException {
        int size = in.readInt();    
        Map<Object, Object> m = new HashMap<Object, Object>(size);
        for (int i = 0; i < size; i++) {
            Object key = readDatum(in);
            m.put(key, readDatum(in));
        }
        return m;    
    }
    
    public static String bytesToCharArray(DataInput in) throws IOException{
        int size = in.readUnsignedShort();
        byte[] ba = new byte[size];
        in.readFully(ba);
        return new String(ba, DataReaderWriter.UTF8);
    }

    public static String bytesToBigCharArray(DataInput in) throws IOException{
        int size = in.readInt();
        byte[] ba = new byte[size];
        in.readFully(ba);
        return new String(ba, DataReaderWriter.UTF8);
    }
    
    public static Object readDatum(DataInput in) throws IOException, ExecException {
        // Read the data type
        byte b = in.readByte();
        return readDatum(in, b);
    }
        
    public static Object readDatum(DataInput in, byte type) throws IOException, ExecException {
        switch (type) {
            case DataType.TUPLE: 
                return bytesToTuple(in);
            
            case DataType.BAG: 
                return bytesToBag(in);

            case DataType.MAP: 
                return bytesToMap(in);    

            case DataType.INTEGER:
                return new Integer(in.readInt());

            case DataType.LONG:
                return new Long(in.readLong());

            case DataType.FLOAT:
                return new Float(in.readFloat());

            case DataType.DOUBLE:
                return new Double(in.readDouble());

            case DataType.BOOLEAN:
                return new Boolean(in.readBoolean());

            case DataType.BYTE:
                return new Byte(in.readByte());

            case DataType.BYTEARRAY: {
                int size = in.readInt();
                byte[] ba = new byte[size];
                in.readFully(ba);
                return new DataByteArray(ba);
                                     }

            case DataType.BIGCHARARRAY: 
                return bytesToBigCharArray(in);
            

            case DataType.CHARARRAY: 
                return bytesToCharArray(in);
            
            case DataType.NULL:
                return null;

            default:
                throw new RuntimeException("Unexpected data type " + type +
                    " found in stream.");
        }
    }

    public static void writeDatum(
            DataOutput out,
            Object val) throws IOException {
        // Read the data type
        byte type = DataType.findType(val);
        switch (type) {
            case DataType.TUPLE:
                // Because tuples are written directly by hadoop, the
                // tuple's write method needs to write the indicator byte.
                // So don't write the indicator byte here as it is for
                // everyone else.
                ((Tuple)val).write(out);
                break;
                
            case DataType.BAG:
                out.writeByte(DataType.BAG);
                ((DataBag)val).write(out);
                break;

            case DataType.MAP: {
                out.writeByte(DataType.MAP);
                Map<Object, Object> m = (Map<Object, Object>)val;
                out.writeInt(m.size());
                Iterator<Map.Entry<Object, Object> > i =
                    m.entrySet().iterator();
                while (i.hasNext()) {
                    Map.Entry<Object, Object> entry = i.next();
                    writeDatum(out, entry.getKey());
                    writeDatum(out, entry.getValue());
                }
                break;
                               }

            case DataType.INTEGER:
                out.writeByte(DataType.INTEGER);
                out.writeInt((Integer)val);
                break;

            case DataType.LONG:
                out.writeByte(DataType.LONG);
                out.writeLong((Long)val);
                break;

            case DataType.FLOAT:
                out.writeByte(DataType.FLOAT);
                out.writeFloat((Float)val);
                break;

            case DataType.DOUBLE:
                out.writeByte(DataType.DOUBLE);
                out.writeDouble((Double)val);
                break;

            case DataType.BOOLEAN:
                out.writeByte(DataType.BOOLEAN);
                out.writeBoolean((Boolean)val);
                break;

            case DataType.BYTE:
                out.writeByte(DataType.BYTE);
                out.writeByte((Byte)val);
                break;

            case DataType.BYTEARRAY: {
                out.writeByte(DataType.BYTEARRAY);
                DataByteArray bytes = (DataByteArray)val;
                out.writeInt(bytes.size());
                out.write(bytes.mData);
                break;
                                     }

            case DataType.CHARARRAY: {
                String s = (String)val;
                byte[] utfBytes = s.getBytes(DataReaderWriter.UTF8);
                int length = utfBytes.length;
                
                if(length < DataReaderWriter.UNSIGNED_SHORT_MAX) {
                    out.writeByte(DataType.CHARARRAY);
                    out.writeShort(length);
                    out.write(utfBytes);
                } else {
                	out.writeByte(DataType.BIGCHARARRAY);
                	out.writeInt(length);
                	out.write(utfBytes);
                }
                break;
                                     }

            case DataType.NULL:
                out.writeByte(DataType.NULL);
                break;

            default:
                throw new RuntimeException("Unexpected data type " + type +
                    " found in stream.");
        }
    }
}