/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.realtime.impl.kafka;
import com.linkedin.pinot.common.data.TimeFieldSpec;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericData.Array;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.util.Utf8;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.readers.AvroRecordReader;
public class AvroRecordToPinotRowGenerator {
private final Schema indexingSchema;
private final String incomingTimeColumnName;
private final FieldSpec incomingTimeFieldSpec;
public AvroRecordToPinotRowGenerator(Schema indexingSchema) {
this.indexingSchema = indexingSchema;
incomingTimeColumnName = indexingSchema.getTimeFieldSpec().getIncomingTimeColumnName();
if (indexingSchema.getTimeColumnName().equals(incomingTimeColumnName)) {
incomingTimeFieldSpec = new TimeFieldSpec(indexingSchema.getTimeFieldSpec().getIncomingGranularitySpec());
} else {
incomingTimeFieldSpec = indexingSchema.getTimeFieldSpec();
}
}
public GenericRow transform(GenericData.Record record, org.apache.avro.Schema schema, GenericRow destination) {
FieldSpec fieldSpec;
for (String column : indexingSchema.getColumnNames()) {
if (column.equals(indexingSchema.getTimeFieldSpec().getOutgoingTimeColumnName())) {
column = incomingTimeColumnName;
fieldSpec = incomingTimeFieldSpec;
} else {
fieldSpec = indexingSchema.getFieldSpecFor(column);
}
Object entry = record.get(column);
if (entry != null) {
if (entry instanceof Array) {
entry = AvroRecordReader.transformAvroArrayToObjectArray((Array) entry, fieldSpec);
if (fieldSpec.getDataType() == DataType.STRING || fieldSpec.getDataType() == DataType.STRING_ARRAY) {
for (int i = 0; i < ((Object[]) entry).length; ++i) {
if (((Object[]) entry)[i] != null) {
((Object[]) entry)[i] = ((Object[]) entry)[i].toString();
}
}
}
} else {
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
if (fieldSpec.getDataType() == DataType.STRING) {
entry = entry.toString();
}
}
} else {
// entry was null.
if (fieldSpec.isSingleValueField()) {
entry = AvroRecordReader.getDefaultNullValue(fieldSpec);
} else {
// A multi-value field, and null. Any of the instanceof checks above will not match, so we need to repeat some
// of the logic above here.
entry = AvroRecordReader.transformAvroArrayToObjectArray((Array) entry, fieldSpec);
if (fieldSpec.getDataType() == DataType.STRING || fieldSpec.getDataType() == DataType.STRING_ARRAY) {
for (int i = 0; i < ((Object[]) entry).length; ++i) {
if (((Object[]) entry)[i] != null) {
((Object[]) entry)[i] = ((Object[]) entry)[i].toString();
}
}
}
}
}
destination.putField(column, entry);
}
return destination;
}
public GenericRow transform(GenericRecord avroRecord, GenericRow destination) {
for (String column : indexingSchema.getColumnNames()) {
Object entry = avroRecord.get(column);
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
if (entry instanceof Array) {
entry = AvroRecordReader.transformAvroArrayToObjectArray((Array) entry, indexingSchema.getFieldSpecFor(column));
}
if (entry == null && indexingSchema.getFieldSpecFor(column).isSingleValueField()) {
entry = AvroRecordReader.getDefaultNullValue(indexingSchema.getFieldSpecFor(column));
}
destination.putField(column, entry);
}
return destination;
}
}