/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.hive; import com.facebook.presto.hive.HiveWriteUtils.FieldSetter; import com.facebook.presto.hive.metastore.StorageFormat; import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.google.common.base.Splitter; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.Serializer; import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; import org.apache.hadoop.hive.serde2.columnar.OptimizedLazyBinaryColumnarSerde; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.mapred.JobConf; import java.io.IOException; import java.util.List; import java.util.Properties; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_DATA_ERROR; import static com.facebook.presto.hive.HiveType.toHiveTypes; import static com.facebook.presto.hive.HiveWriteUtils.createFieldSetter; import static com.facebook.presto.hive.HiveWriteUtils.createRecordWriter; import static com.facebook.presto.hive.HiveWriteUtils.getRowColumnInspectors; import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES; import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector; public class RecordFileWriter implements HiveFileWriter { private final Path path; private final JobConf conf; private final int fieldCount; @SuppressWarnings("deprecation") private final Serializer serializer; private final RecordWriter recordWriter; private final SettableStructObjectInspector tableInspector; private final List<StructField> structFields; private final Object row; private final FieldSetter[] setters; private final long estimatedWriterSystemMemoryUsage; public RecordFileWriter( Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, DataSize estimatedWriterSystemMemoryUsage, JobConf conf, TypeManager typeManager) { this.path = requireNonNull(path, "path is null"); this.conf = requireNonNull(conf, "conf is null"); // existing tables may have columns in a different order List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, "")); List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream() .map(hiveType -> hiveType.getType(typeManager)) .collect(toList()); fieldCount = fileColumnNames.size(); String serDe = storageFormat.getSerDe(); if (serDe.equals(LazyBinaryColumnarSerDe.class.getName())) { serDe = OptimizedLazyBinaryColumnarSerde.class.getName(); } serializer = initializeSerializer(conf, schema, serDe); recordWriter = createRecordWriter(path, conf, schema, storageFormat.getOutputFormat()); List<ObjectInspector> objectInspectors = getRowColumnInspectors(fileColumnTypes); tableInspector = getStandardStructObjectInspector(fileColumnNames, objectInspectors); // reorder (and possibly reduce) struct fields to match input structFields = ImmutableList.copyOf(inputColumnNames.stream() .map(tableInspector::getStructFieldRef) .collect(toList())); row = tableInspector.create(); setters = new FieldSetter[structFields.size()]; for (int i = 0; i < setters.length; i++) { setters[i] = createFieldSetter(tableInspector, row, structFields.get(i), fileColumnTypes.get(structFields.get(i).getFieldID())); } this.estimatedWriterSystemMemoryUsage = estimatedWriterSystemMemoryUsage.toBytes(); } @Override public long getSystemMemoryUsage() { return estimatedWriterSystemMemoryUsage; } @Override public void appendRows(Page dataPage) { for (int position = 0; position < dataPage.getPositionCount(); position++) { appendRow(dataPage, position); } } public void appendRow(Page dataPage, int position) { for (int field = 0; field < fieldCount; field++) { Block block = dataPage.getBlock(field); if (block.isNull(position)) { tableInspector.setStructFieldData(row, structFields.get(field), null); } else { setters[field].setField(block, position); } } try { recordWriter.write(serializer.serialize(row, tableInspector)); } catch (SerDeException | IOException e) { throw new PrestoException(HIVE_WRITER_DATA_ERROR, e); } } @Override public void commit() { try { recordWriter.close(false); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_CLOSE_ERROR, "Error committing write to Hive", e); } } @Override public void rollback() { try { try { recordWriter.close(true); } finally { // perform explicit deletion here as implementations of RecordWriter.close() often ignore the abort flag. path.getFileSystem(conf).delete(path, false); } } catch (IOException e) { throw new PrestoException(HIVE_WRITER_CLOSE_ERROR, "Error rolling back write to Hive", e); } } @SuppressWarnings("deprecation") private static Serializer initializeSerializer(Configuration conf, Properties properties, String serializerName) { try { Serializer result = (Serializer) Class.forName(serializerName).getConstructor().newInstance(); result.initialize(conf, properties); return result; } catch (SerDeException | ReflectiveOperationException e) { throw Throwables.propagate(e); } } @Override public String toString() { return toStringHelper(this) .add("path", path) .toString(); } }