/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.storage.parquet;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import parquet.hadoop.api.WriteSupport;
import parquet.io.api.Binary;
import parquet.io.api.RecordConsumer;
import parquet.schema.GroupType;
import parquet.schema.MessageType;
import parquet.schema.Type;
import org.apache.hadoop.conf.Configuration;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.storage.Tuple;
import org.apache.tajo.datum.Datum;
/**
* Tajo implementation of {@link WriteSupport} for {@link Tuple}s.
* Users should use {@link ParquetAppender} and not this class directly.
*/
public class TajoWriteSupport extends WriteSupport<Tuple> {
private static final String TAJO_SCHEMA = "parquet.tajo.schema";
private RecordConsumer recordConsumer;
private MessageType rootSchema;
private Schema rootTajoSchema;
/**
* Creates a new TajoWriteSupport.
*
* @param tajoSchema The Tajo schema for the table.
*/
public TajoWriteSupport(Schema tajoSchema) {
this.rootSchema = new TajoSchemaConverter().convert(tajoSchema);
this.rootTajoSchema = tajoSchema;
}
/**
* Initializes the WriteSupport.
*
* @param configuration The job's configuration.
* @return A WriteContext that describes how to write the file.
*/
@Override
public WriteContext init(Configuration configuration) {
Map<String, String> extraMetaData = new HashMap<String, String>();
extraMetaData.put(TajoReadSupport.TAJO_SCHEMA_METADATA_KEY,
rootTajoSchema.toJson());
return new WriteContext(rootSchema, extraMetaData);
}
/**
* Called once per row group.
*
* @param recordConsumer The {@link RecordConsumer} to write to.
*/
@Override
public void prepareForWrite(RecordConsumer recordConsumer) {
this.recordConsumer = recordConsumer;
}
/**
* Writes a Tuple to the file.
*
* @param tuple The Tuple to write to the file.
*/
@Override
public void write(Tuple tuple) {
recordConsumer.startMessage();
writeRecordFields(rootSchema, rootTajoSchema, tuple);
recordConsumer.endMessage();
}
private void writeRecordFields(GroupType schema, Schema tajoSchema,
Tuple tuple) {
List<Type> fields = schema.getFields();
// Parquet ignores Tajo NULL_TYPE columns, so the index may differ.
int index = 0;
for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) {
Column column = tajoSchema.getColumn(tajoIndex);
if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
continue;
}
Datum datum = tuple.get(tajoIndex);
Type fieldType = fields.get(index);
if (!tuple.isNull(tajoIndex)) {
recordConsumer.startField(fieldType.getName(), index);
writeValue(fieldType, column, datum);
recordConsumer.endField(fieldType.getName(), index);
} else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
throw new RuntimeException("Null-value for required field: " +
column.getSimpleName());
}
++index;
}
}
private void writeValue(Type fieldType, Column column, Datum datum) {
switch (column.getDataType().getType()) {
case BOOLEAN:
recordConsumer.addBoolean((Boolean) datum.asBool());
break;
case BIT:
case INT2:
case INT4:
recordConsumer.addInteger(datum.asInt4());
break;
case INT8:
recordConsumer.addLong(datum.asInt8());
break;
case FLOAT4:
recordConsumer.addFloat(datum.asFloat4());
break;
case FLOAT8:
recordConsumer.addDouble(datum.asFloat8());
break;
case CHAR:
case TEXT:
recordConsumer.addBinary(Binary.fromString(datum.asChars()));
break;
case PROTOBUF:
case BLOB:
case INET4:
case INET6:
recordConsumer.addBinary(Binary.fromByteArray(datum.asByteArray()));
break;
default:
break;
}
}
}