/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.thrift;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hive.service.rpc.thrift.TColumn;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TCompactProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TIOStreamTransport;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This SerDe is used to serialize the final output to thrift-able objects directly in the SerDe. Use this SerDe only for final output resultSets.
* It is used if HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS is set to true. It buffers rows that come in from FileSink till it reaches max_buffer_size (also configurable)
* or all rows are finished and FileSink.closeOp() is called.
*/
public class ThriftJDBCBinarySerDe extends AbstractSerDe {
public static final Logger LOG = LoggerFactory.getLogger(ThriftJDBCBinarySerDe.class.getName());
private List<String> columnNames;
private List<TypeInfo> columnTypes;
private ColumnBuffer[] columnBuffers;
private TypeInfo rowTypeInfo;
private ArrayList<Object> row;
private BytesWritable serializedBytesWritable = new BytesWritable();
private ByteStream.Output output = new ByteStream.Output();
private TProtocol protocol = new TCompactProtocol(new TIOStreamTransport(output));
private ThriftFormatter thriftFormatter = new ThriftFormatter();
private int MAX_BUFFERED_ROWS;
private int count;
private StructObjectInspector rowObjectInspector;
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// Get column names
MAX_BUFFERED_ROWS =
HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE);
LOG.info("ThriftJDBCBinarySerDe max number of buffered columns: " + MAX_BUFFERED_ROWS);
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl
.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
rowObjectInspector =
(StructObjectInspector) TypeInfoUtils
.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
initializeRowAndColumns();
try {
thriftFormatter.initialize(conf, tbl);
} catch (Exception e) {
new SerDeException(e);
}
}
@Override
public Class<? extends Writable> getSerializedClass() {
return BytesWritable.class;
}
private Writable serializeBatch() throws SerDeException {
output.reset();
for (int i = 0; i < columnBuffers.length; i++) {
TColumn tColumn = columnBuffers[i].toTColumn();
try {
tColumn.write(protocol);
} catch(TException e) {
throw new SerDeException(e);
}
}
initializeRowAndColumns();
serializedBytesWritable.set(output.getData(), 0, output.getLength());
return serializedBytesWritable;
}
// use the columnNames to initialize the reusable row object and the columnBuffers. reason this is being done is if buffer is full, we should reinitialize the
// column buffers, otherwise at the end when closeOp() is called, things get printed multiple times.
private void initializeRowAndColumns() {
row = new ArrayList<Object>(columnNames.size());
for (int i = 0; i < columnNames.size(); i++) {
row.add(null);
}
// Initialize column buffers
columnBuffers = new ColumnBuffer[columnNames.size()];
for (int i = 0; i < columnBuffers.length; i++) {
columnBuffers[i] = new ColumnBuffer(Type.getType(columnTypes.get(i)));
}
}
/**
* Write TColumn objects to the underlying stream of TProtocol
*/
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
//if row is null, it means there are no more rows (closeOp()). another case can be that the buffer is full.
if (obj == null)
return serializeBatch();
count += 1;
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
try {
Object[] formattedRow = (Object[]) thriftFormatter.convert(obj, objInspector);
for (int i = 0; i < columnNames.size(); i++) {
columnBuffers[i].addValue(formattedRow[i]);
}
} catch (Exception e) {
throw new SerDeException(e);
}
if (count == MAX_BUFFERED_ROWS) {
count = 0;
return serializeBatch();
}
return null;
}
@Override
public SerDeStats getSerDeStats() {
return null;
}
/**
* Return the bytes from this writable blob.
* Eventually the client of this method will interpret the byte using the Thrift Protocol
*/
@Override
public Object deserialize(Writable blob) throws SerDeException {
return ((BytesWritable) blob).getBytes();
}
@Override
public ObjectInspector getObjectInspector() throws SerDeException {
return rowObjectInspector;
}
}