/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.io;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.block.ColumnType;
import com.linkedin.cubert.block.DataType;
/**
* Deserializes tuples stored using variable length encoding.
*
* @author Maneesh Varshney
*
* @param <K>
*/
public class CompactDeserializer<K> implements Deserializer<K>
{
private InputStream in;
private DataType[] datatypes;
private byte[] buffer = new byte[32];
public CompactDeserializer(BlockSchema schema)
{
if (!schema.isFlatSchema())
throw new IllegalArgumentException("CompactDeserializer can be used with flat tuple schema only");
datatypes = new DataType[schema.getNumColumns()];
for (int i = 0; i < datatypes.length; i++)
{
ColumnType ctype = schema.getColumnType(i);
datatypes[i] = ctype.getType();
}
}
@Override
public void open(InputStream in) throws IOException
{
this.in = in;
}
@SuppressWarnings("unchecked")
@Override
public K deserialize(K object) throws IOException
{
if (in.available() == 0)
throw new EOFException();
Tuple t = (Tuple) object;
if (t == null || t.size() != datatypes.length)
t = TupleFactory.getInstance().newTuple(datatypes.length);
for (int i = 0; i < datatypes.length; i++)
{
Object field = null;
switch (datatypes[i])
{
case BOOLEAN:
{
IntWritable writable = VariableLengthEncoder.decodeInteger(in);
field = (writable == null) ? null : (Boolean) (writable.get() == 1);
break;
}
case BYTE:
{
IntWritable writable = VariableLengthEncoder.decodeInteger(in);
field = (writable == null) ? null : (byte) writable.get();
break;
}
case DOUBLE:
{
DoubleWritable writable = VariableLengthEncoder.decodeDouble(in);
field = (writable == null) ? null : writable.get();
break;
}
case FLOAT:
{
FloatWritable writable = VariableLengthEncoder.decodeFloat(in);
field = (writable == null) ? null : writable.get();
break;
}
case INT:
{
IntWritable writable = VariableLengthEncoder.decodeInteger(in);
field = (writable == null) ? null : writable.get();
break;
}
case LONG:
{
LongWritable writable = VariableLengthEncoder.decodeLong(in);
field = (writable == null) ? null : writable.get();
break;
}
case STRING:
{
IntWritable writable = VariableLengthEncoder.decodeInteger(in);
if (writable == null)
field = null;
else
{
int length = writable.get();
while (length > buffer.length)
buffer = new byte[2 * buffer.length];
in.read(buffer, 0, length);
field = new String(buffer, 0, length);
}
break;
}
default:
throw new RuntimeException("Cannot deserialize column of type "
+ datatypes[i]);
}
t.set(i, field);
}
return (K) t;
}
@Override
public void close() throws IOException
{
// TODO Auto-generated method stub
}
}