/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.io;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.io.serializer.Serializer;
import org.apache.pig.data.Tuple;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.block.DataType;
/**
* Serializes tuples using variable byte encoding.
*
* @author Maneesh Varshney
*
* @param <K>
*/
public class CompactSerializer<K> implements Serializer<K>
{
private OutputStream out;
private DataType[] datatypes;
public CompactSerializer(BlockSchema schema)
{
if (!schema.isFlatSchema())
throw new IllegalArgumentException("CompactSerializer can be used with flat tuple schema only");
datatypes = new DataType[schema.getNumColumns()];
for (int i = 0; i < schema.getNumColumns(); i++)
datatypes[i] = schema.getColumnType(i).getType();
}
@Override
public void close() throws IOException
{
}
@Override
public void open(OutputStream out) throws IOException
{
this.out = out;
}
@Override
public void serialize(K object) throws IOException
{
Tuple tuple = (Tuple) object;
for (int i = 0; i < datatypes.length; i++)
{
Object field = tuple.get(i);
switch (datatypes[i])
{
case BOOLEAN:
if (field == null)
VariableLengthEncoder.encodeNullInteger(out);
else
VariableLengthEncoder.encodeInteger((Boolean) field ? 1 : 0, out);
break;
case BYTE:
if (field == null)
VariableLengthEncoder.encodeNullInteger(out);
else
VariableLengthEncoder.encodeInteger((Byte) field, out);
break;
case DOUBLE:
if (field == null)
VariableLengthEncoder.encodeNullDouble(out);
else
VariableLengthEncoder.encodeDouble(((Number) field).doubleValue(),
out);
break;
case FLOAT:
if (field == null)
VariableLengthEncoder.encodeNullFloat(out);
else
VariableLengthEncoder.encodeFloat(((Number) field).floatValue(), out);
break;
case INT:
if (field == null)
VariableLengthEncoder.encodeNullInteger(out);
else
VariableLengthEncoder.encodeInteger(((Number) field).intValue(), out);
break;
case LONG:
if (field == null)
VariableLengthEncoder.encodeNullLong(out);
else
VariableLengthEncoder.encodeLong(((Number) field).longValue(), out);
break;
case STRING:
if (field == null)
VariableLengthEncoder.encodeNullInteger(out);
else
{
String s = (String) field;
byte[] bytes = s.getBytes();
VariableLengthEncoder.encodeInteger(bytes.length, out);
out.write(bytes);
}
break;
default:
throw new RuntimeException("Cannot serialize column of type "
+ datatypes[i]);
}
}
}
}