/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.accumulo.serializers;
import com.facebook.presto.accumulo.Types;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.block.BlockBuilderStatus;
import com.facebook.presto.spi.block.InterleavedBlockBuilder;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.spi.type.TypeUtils;
import com.facebook.presto.spi.type.VarcharType;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
* Interface for deserializing the data in Accumulo into a Presto row.
* <p>
* Provides a means for end-users of the connector to customize how the data in an Accumulo row gets
* serialized and deserialized from/to a Presto row.
* <p>
* The workflow of how this class is called by the Accumulo connector for reading data is as
* follows:
* <ol>
* <li>setRowIdName - Sets the Presto name which is the Accumulo row ID</li>
* <li>setRowOnly - True if only the row ID is going to be retrieved, false if more data is
* necessary.</li>
* <li>setMapping - Multiple calls for each Presto column, setting the mapping of Presto column name
* to Accumulo column family and qualifier</li>
* <li>deserialize - Called for each Accumulo entry in the same row. Implements should
* retrieve the Presto column value from the given key/value pair</li>
* <li>get* - Called to retrieve the data type for the given Presto column name</li>
* <li>reset - Begins a new Row, serializer is expected to clear any state</li>
* <li>If there are more entries left, go back to deserialize, else end!</li>
* </ol>
*
* @see LexicoderRowSerializer
* @see StringRowSerializer
*/
public interface AccumuloRowSerializer
{
/**
* Gets the default AccumuloRowSerializer, {@link LexicoderRowSerializer}.
*
* @return Default serializer
*/
static AccumuloRowSerializer getDefault()
{
return new LexicoderRowSerializer();
}
/**
* Sets the Presto name which maps to the Accumulo row ID.
*
* @param name Presto column name
*/
void setRowIdName(String name);
/**
* Sets the mapping for the Presto column name to Accumulo family and qualifier.
*
* @param name Presto name
* @param family Accumulo family
* @param qualifier Accumulo qualifier
*/
void setMapping(String name, String family, String qualifier);
/**
* Sets a Boolean value indicating whether or not only the row ID is going to be retrieved from the serializer.
*
* @param rowOnly True if only the row ID is set, false otherwise
*/
void setRowOnly(boolean rowOnly);
/**
* Reset the state of the serializer to prepare for a new set of entries with the same row ID.
*/
void reset();
/**
* Deserialize the given Accumulo entry, retrieving data for the Presto column.
*
* @param entry Entry to deserialize
* @throws IOException If an IO error occurs during deserialization
*/
void deserialize(Entry<Key, Value> entry)
throws IOException;
/**
* Gets a Boolean value indicating whether or not the Presto column is a null value.
*
* @param name Column name
* @return True if null, false otherwise.
*/
boolean isNull(String name);
/**
* Gets the array Block of the given Presto column.
*
* @param name Column name
* @param type Array type
* @return True if null, false otherwise.
*/
Block getArray(String name, Type type);
/**
* Encode the given array Block into the given Text object.
*
* @param text Text object to set
* @param type Array type
* @param block Array block
*/
void setArray(Text text, Type type, Block block);
/**
* Gets the Boolean value of the given Presto column.
*
* @param name Column name
* @return Boolean value
*/
boolean getBoolean(String name);
/**
* Encode the given Boolean value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setBoolean(Text text, Boolean value);
/**
* Gets the Byte value of the given Presto column.
*
* @param name Column name
* @return Byte value
*/
byte getByte(String name);
/**
* Encode the given Byte value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setByte(Text text, Byte value);
/**
* Gets the Date value of the given Presto column.
*
* @param name Column name
* @return Date value
*/
Date getDate(String name);
/**
* Encode the given Date value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setDate(Text text, Date value);
/**
* Gets the Double value of the given Presto column.
*
* @param name Column name
* @return Double value
*/
double getDouble(String name);
/**
* Encode the given Double value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setDouble(Text text, Double value);
/**
* Gets the Float value of the given Presto column.
*
* @param name Column name
* @return Float value
*/
float getFloat(String name);
/**
* Encode the given Float value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setFloat(Text text, Float value);
/**
* Gets the Integer value of the given Presto column.
*
* @param name Column name
* @return Integer value
*/
int getInt(String name);
/**
* Encode the given Integer value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setInt(Text text, Integer value);
/**
* Gets the Long value of the given Presto column.
*
* @param name Column name
* @return Long value
*/
long getLong(String name);
/**
* Encode the given Long value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setLong(Text text, Long value);
/**
* Gets the Map value of the given Presto column and Map type.
*
* @param name Column name
* @param type Map type
* @return Map value
*/
Block getMap(String name, Type type);
/**
* Encode the given map Block into the given Text object.
*
* @param text Text object to set
* @param type Map type
* @param block Map block
*/
void setMap(Text text, Type type, Block block);
/**
* Gets the Short value of the given Presto column.
*
* @param name Column name
* @return Short value
*/
short getShort(String name);
/**
* Encode the given Short value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setShort(Text text, Short value);
/**
* Gets the Time value of the given Presto column.
*
* @param name Column name
* @return Time value
*/
Time getTime(String name);
/**
* Encode the given Time value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setTime(Text text, Time value);
/**
* Gets the Timestamp value of the given Presto column.
*
* @param name Column name
* @return Timestamp value
*/
Timestamp getTimestamp(String name);
/**
* Encode the given Timestamp value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setTimestamp(Text text, Timestamp value);
/**
* Gets the Varbinary value of the given Presto column.
*
* @param name Column name
* @return Varbinary value
*/
byte[] getVarbinary(String name);
/**
* Encode the given byte[] value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setVarbinary(Text text, byte[] value);
/**
* Gets the String value of the given Presto column.
*
* @param name Column name
* @return String value
*/
String getVarchar(String name);
/**
* Encode the given String value into the given Text object.
*
* @param text Text object to set
* @param value Value to encode
*/
void setVarchar(Text text, String value);
/**
* Encodes a Presto Java object to a byte array based on the given type.
* <p>
* Java Lists and Maps can be converted to Blocks using
* {@link AccumuloRowSerializer#getBlockFromArray(Type, java.util.List)} and
* {@link AccumuloRowSerializer#getBlockFromMap(Type, Map)}
* <p>
* <table summary="Expected data types">
* <tr>
* <th>Type to Encode</th>
* <th>Expected Java Object</th>
* </tr>
* <tr>
* <td>ARRAY</td>
* <td>com.facebook.presto.spi.block.Block</td>
* </tr>
* <tr>
* <td>BIGINT</td>
* <td>Integer or Long</td>
* </tr>
* <tr>
* <td>BOOLEAN</td>
* <td>Boolean</td>
* </tr>
* <tr>
* <td>DATE</td>
* <td>java.sql.Date, Long</td>
* </tr>
* <tr>
* <td>DOUBLE</td>
* <td>Double</td>
* </tr>
* <tr>
* <td>INTEGER</td>
* <td>Integer</td>
* </tr>
* <tr>
* <td>Map</td>
* <td>com.facebook.presto.spi.block.Block</td>
* </tr>
* <tr>
* <td>REAL</td>
* <td>Float</td>
* </tr>
* <tr>
* <td>SMALLINT</td>
* <td>Short</td>
* </tr>
* <tr>
* <td>TIME</td>
* <td>java.sql.Time, Long</td>
* </tr>
* <tr>
* <td>TIMESTAMP</td>
* <td>java.sql.Timestamp, Long</td>
* </tr>
* <tr>
* <td>TINYINT</td>
* <td>Byte</td>
* </tr>
* <tr>
* <td>VARBINARY</td>
* <td>io.airlift.slice.Slice or byte[]</td>
* </tr>
* <tr>
* <td>VARCHAR</td>
* <td>io.airlift.slice.Slice or String</td>
* </tr>
* </table>
*
* @param type The presto {@link com.facebook.presto.spi.type.Type}
* @param value The Java object per the table in the method description
* @return Encoded bytes
*/
byte[] encode(Type type, Object value);
/**
* Generic function to decode the given byte array to a Java object based on the given type.
* <p>
* Blocks from ARRAY and MAP types can be converted
* to Java Lists and Maps using {@link AccumuloRowSerializer#getArrayFromBlock(Type, Block)}
* and {@link AccumuloRowSerializer#getMapFromBlock(Type, Block)}
* <p>
* <table summary="Expected data types">
* <tr>
* <th>Encoded Type</th>
* <th>Returned Java Object</th>
* </tr>
* <tr>
* <td>ARRAY</td>
* <td>List<?></td>
* </tr>
* <tr>
* <td>BIGINT</td>
* <td>Long</td>
* </tr>
* <tr>
* <td>BOOLEAN</td>
* <td>Boolean</td>
* </tr>
* <tr>
* <td>DATE</td>
* <td>Long</td>
* </tr>
* <tr>
* <td>DOUBLE</td>
* <td>Double</td>
* </tr>
* <tr>
* <td>Map</td>
* <td>Map<?,?></td>
* </tr>
* <tr>
* <td>REAL</td>
* <td>Double</td>
* </tr>
* <tr>
* <td>SMALLINT</td>
* <td>Long</td>
* </tr>
* <tr>
* <td>TIME</td>
* <td>Long</td>
* </tr>
* <tr>
* <td>TIMESTAMP</td>
* <td>Long</td>
* </tr>
* <tr>
* <td>TINYINT</td>
* <td>Long</td>
* </tr>
* <tr>
* <td>VARBINARY</td>
* <td>byte[]</td>
* </tr>
* <tr>
* <td>VARCHAR</td>
* <td>String</td>
* </tr>
* </table>
*
* @param type The presto {@link com.facebook.presto.spi.type.Type}
* @param value Encoded bytes to decode
* @param <T> The Java type of the object that has been encoded to the given byte array
* @return The Java object per the table in the method description
*/
<T> T decode(Type type, byte[] value);
/**
* Given the array element type and Presto Block, decodes the Block into a list of values.
*
* @param elementType Array element type
* @param block Array block
* @return List of values
*/
static List<Object> getArrayFromBlock(Type elementType, Block block)
{
ImmutableList.Builder<Object> arrayBuilder = ImmutableList.builder();
for (int i = 0; i < block.getPositionCount(); ++i) {
arrayBuilder.add(readObject(elementType, block, i));
}
return arrayBuilder.build();
}
/**
* Given the map type and Presto Block, decodes the Block into a map of values.
*
* @param type Map type
* @param block Map block
* @return List of values
*/
static Map<Object, Object> getMapFromBlock(Type type, Block block)
{
Map<Object, Object> map = new HashMap<>(block.getPositionCount() / 2);
Type keyType = Types.getKeyType(type);
Type valueType = Types.getValueType(type);
for (int i = 0; i < block.getPositionCount(); i += 2) {
map.put(readObject(keyType, block, i), readObject(valueType, block, i + 1));
}
return map;
}
/**
* Encodes the given list into a Block.
*
* @param elementType Element type of the array
* @param array Array of elements to encode
* @return Presto Block
*/
static Block getBlockFromArray(Type elementType, List<?> array)
{
BlockBuilder builder = elementType.createBlockBuilder(new BlockBuilderStatus(), array.size());
for (Object item : array) {
writeObject(builder, elementType, item);
}
return builder.build();
}
/**
* Encodes the given map into a Block.
*
* @param mapType Presto type of the map
* @param map Map of key/value pairs to encode
* @return Presto Block
*/
static Block getBlockFromMap(Type mapType, Map<?, ?> map)
{
Type keyType = mapType.getTypeParameters().get(0);
Type valueType = mapType.getTypeParameters().get(1);
BlockBuilder builder = new InterleavedBlockBuilder(ImmutableList.of(keyType, valueType), new BlockBuilderStatus(), map.size() * 2);
for (Entry<?, ?> entry : map.entrySet()) {
writeObject(builder, keyType, entry.getKey());
writeObject(builder, valueType, entry.getValue());
}
return builder.build();
}
/**
* Recursive helper function used by {@link AccumuloRowSerializer#getBlockFromArray} and
* {@link AccumuloRowSerializer#getBlockFromMap} to add the given object to the given block
* builder. Supports nested complex types!
*
* @param builder Block builder
* @param type Presto type
* @param obj Object to write to the block builder
*/
static void writeObject(BlockBuilder builder, Type type, Object obj)
{
if (Types.isArrayType(type)) {
BlockBuilder arrayBldr = builder.beginBlockEntry();
Type elementType = Types.getElementType(type);
for (Object item : (List<?>) obj) {
writeObject(arrayBldr, elementType, item);
}
builder.closeEntry();
}
else if (Types.isMapType(type)) {
BlockBuilder mapBlockBuilder = builder.beginBlockEntry();
for (Entry<?, ?> entry : ((Map<?, ?>) obj).entrySet()) {
writeObject(mapBlockBuilder, Types.getKeyType(type), entry.getKey());
writeObject(mapBlockBuilder, Types.getValueType(type), entry.getValue());
}
builder.closeEntry();
}
else {
TypeUtils.writeNativeValue(type, builder, obj);
}
}
/**
* Recursive helper function used by {@link AccumuloRowSerializer#getArrayFromBlock} and
* {@link AccumuloRowSerializer#getMapFromBlock} to decode the Block into a Java type.
*
* @param type Presto type
* @param block Block to decode
* @param position Position in the block to get
* @return Java object from the Block
*/
static Object readObject(Type type, Block block, int position)
{
if (Types.isArrayType(type)) {
Type elementType = Types.getElementType(type);
return getArrayFromBlock(elementType, block.getObject(position, Block.class));
}
else if (Types.isMapType(type)) {
return getMapFromBlock(type, block.getObject(position, Block.class));
}
else {
if (type.getJavaType() == Slice.class) {
Slice slice = (Slice) TypeUtils.readNativeValue(type, block, position);
return type.equals(VarcharType.VARCHAR) ? slice.toStringUtf8() : slice.getBytes();
}
return TypeUtils.readNativeValue(type, block, position);
}
}
}