/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.parquet.io.api.Binary;
import java.lang.Override;
import java.lang.RuntimeException;
import java.util.Arrays;
<@pp.dropOutputFile />
<@pp.changeOutputFile name="org/apache/drill/exec/store/ParquetOutputRecordWriter.java" />
<#include "/@includes/license.ftl" />
package org.apache.drill.exec.store;
import com.google.common.collect.Lists;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.expr.TypeHelper;
import org.apache.drill.exec.expr.holders.*;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter;
import org.apache.drill.exec.store.parquet.ParquetTypeHelper;
import org.apache.drill.exec.vector.*;
import org.apache.drill.exec.util.DecimalUtility;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.io.api.Binary;
import io.netty.buffer.DrillBuf;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.common.types.TypeProtos;
import org.joda.time.DateTimeConstants;
import java.io.IOException;
import java.lang.UnsupportedOperationException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
/**
* Abstract implementation of RecordWriter interface which exposes interface:
* {@link #writeHeader(List)}
* {@link #addField(int,String)}
* to output the data in string format instead of implementing addField for each type holder.
*
* This is useful for text format writers such as CSV, TSV etc.
*
* NB: Source code generated using FreeMarker template ${.template_name}
*/
public abstract class ParquetOutputRecordWriter extends AbstractRecordWriter implements RecordWriter {
private RecordConsumer consumer;
private MessageType schema;
public void setUp(MessageType schema, RecordConsumer consumer) {
this.schema = schema;
this.consumer = consumer;
}
<#list vv.types as type>
<#list type.minor as minor>
<#list vv.modes as mode>
@Override
public FieldConverter getNew${mode.prefix}${minor.class}Converter(int fieldId, String fieldName, FieldReader reader) {
return new ${mode.prefix}${minor.class}ParquetConverter(fieldId, fieldName, reader);
}
public class ${mode.prefix}${minor.class}ParquetConverter extends FieldConverter {
private Nullable${minor.class}Holder holder = new Nullable${minor.class}Holder();
<#if minor.class?contains("Interval")>
private final byte[] output = new byte[12];
</#if>
public ${mode.prefix}${minor.class}ParquetConverter(int fieldId, String fieldName, FieldReader reader) {
super(fieldId, fieldName, reader);
}
@Override
public void writeField() throws IOException {
<#if mode.prefix == "Nullable" >
if (!reader.isSet()) {
return;
}
<#elseif mode.prefix == "Repeated" >
// empty lists are represented by simply not starting a field, rather than starting one and putting in 0 elements
if (reader.size() == 0) {
return;
}
consumer.startField(fieldName, fieldId);
for (int i = 0; i < reader.size(); i++) {
</#if>
<#if minor.class == "TinyInt" ||
minor.class == "UInt1" ||
minor.class == "UInt2" ||
minor.class == "SmallInt" ||
minor.class == "Int" ||
minor.class == "Time" ||
minor.class == "Decimal9" ||
minor.class == "UInt4">
<#if mode.prefix == "Repeated" >
reader.read(i, holder);
consumer.addInteger(holder.value);
<#else>
consumer.startField(fieldName, fieldId);
reader.read(holder);
consumer.addInteger(holder.value);
consumer.endField(fieldName, fieldId);
</#if>
<#elseif
minor.class == "Float4">
<#if mode.prefix == "Repeated" >
reader.read(i, holder);
consumer.addFloat(holder.value);
<#else>
consumer.startField(fieldName, fieldId);
reader.read(holder);
consumer.addFloat(holder.value);
consumer.endField(fieldName, fieldId);
</#if>
<#elseif
minor.class == "BigInt" ||
minor.class == "Decimal18" ||
minor.class == "TimeStamp" ||
minor.class == "UInt8">
<#if mode.prefix == "Repeated" >
reader.read(i, holder);
consumer.addLong(holder.value);
<#else>
consumer.startField(fieldName, fieldId);
reader.read(holder);
consumer.addLong(holder.value);
consumer.endField(fieldName, fieldId);
</#if>
<#elseif minor.class == "Date">
<#if mode.prefix == "Repeated" >
reader.read(i, holder);
consumer.addInteger((int) (holder.value / DateTimeConstants.MILLIS_PER_DAY));
<#else>
consumer.startField(fieldName, fieldId);
reader.read(holder);
// convert from internal Drill date format to Julian Day centered around Unix Epoc
consumer.addInteger((int) (holder.value / DateTimeConstants.MILLIS_PER_DAY));
consumer.endField(fieldName, fieldId);
</#if>
<#elseif
minor.class == "Float8">
<#if mode.prefix == "Repeated" >
reader.read(i, holder);
consumer.addDouble(holder.value);
<#else>
consumer.startField(fieldName, fieldId);
reader.read(holder);
consumer.addDouble(holder.value);
consumer.endField(fieldName, fieldId);
</#if>
<#elseif
minor.class == "Bit">
<#if mode.prefix == "Repeated" >
reader.read(i, holder);
consumer.addBoolean(holder.value == 1);
<#else>
consumer.startField(fieldName, fieldId);
reader.read(holder);
consumer.addBoolean(holder.value == 1);
consumer.endField(fieldName, fieldId);
</#if>
<#elseif
minor.class == "Decimal28Sparse" ||
minor.class == "Decimal38Sparse">
<#if mode.prefix == "Repeated" >
<#else>
consumer.startField(fieldName, fieldId);
reader.read(holder);
byte[] bytes = DecimalUtility.getBigDecimalFromSparse(
holder.buffer, holder.start, ${minor.class}Holder.nDecimalDigits, holder.scale).unscaledValue().toByteArray();
byte[] output = new byte[ParquetTypeHelper.getLengthForMinorType(MinorType.${minor.class?upper_case})];
if (holder.getSign(holder.start, holder.buffer)) {
Arrays.fill(output, 0, output.length - bytes.length, (byte)0xFF);
} else {
Arrays.fill(output, 0, output.length - bytes.length, (byte)0x0);
}
System.arraycopy(bytes, 0, output, output.length - bytes.length, bytes.length);
consumer.addBinary(Binary.fromByteArray(output));
consumer.endField(fieldName, fieldId);
</#if>
<#elseif minor.class?contains("Interval")>
consumer.startField(fieldName, fieldId);
reader.read(holder);
<#if minor.class == "IntervalDay">
Arrays.fill(output, 0, 4, (byte) 0);
IntervalUtility.intToLEByteArray(holder.days, output, 4);
IntervalUtility.intToLEByteArray(holder.milliseconds, output, 8);
<#elseif minor.class == "IntervalYear">
IntervalUtility.intToLEByteArray(holder.value, output, 0);
Arrays.fill(output, 4, 8, (byte) 0);
Arrays.fill(output, 8, 12, (byte) 0);
<#elseif minor.class == "Interval">
IntervalUtility.intToLEByteArray(holder.months, output, 0);
IntervalUtility.intToLEByteArray(holder.days, output, 4);
IntervalUtility.intToLEByteArray(holder.milliseconds, output, 8);
</#if>
consumer.addBinary(Binary.fromByteArray(output));
consumer.endField(fieldName, fieldId);
<#elseif
minor.class == "TimeTZ" ||
minor.class == "Decimal28Dense" ||
minor.class == "Decimal38Dense">
<#if mode.prefix == "Repeated" >
<#else>
</#if>
<#elseif minor.class == "VarChar" || minor.class == "Var16Char" || minor.class == "VarBinary">
<#if mode.prefix == "Repeated">
reader.read(i, holder);
//consumer.startField(fieldName, fieldId);
consumer.addBinary(Binary.fromByteBuffer(holder.buffer.nioBuffer(holder.start, holder.end - holder.start)));
//consumer.endField(fieldName, fieldId);
<#else>
reader.read(holder);
DrillBuf buf = holder.buffer;
consumer.startField(fieldName, fieldId);
consumer.addBinary(Binary.fromByteBuffer(holder.buffer.nioBuffer(holder.start, holder.end - holder.start)));
consumer.endField(fieldName, fieldId);
</#if>
</#if>
<#if mode.prefix == "Repeated">
}
consumer.endField(fieldName, fieldId);
</#if>
}
}
</#list>
</#list>
</#list>
private static class IntervalUtility {
private static void intToLEByteArray(final int value, final byte[] output, final int outputIndex) {
int shiftOrder = 0;
for (int i = outputIndex; i < outputIndex + 4; i++) {
output[i] = (byte) (value >> shiftOrder);
shiftOrder += 8;
}
}
}
}