/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.tools.segment.converter; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.FieldSpec.DataType; import com.linkedin.pinot.core.data.GenericRow; import com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader; import java.io.File; import java.util.Arrays; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.avro.SchemaBuilder.FieldAssembler; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericDatumWriter; /** * The <code>PinotSegmentToAvroConverter</code> class is the tool to convert Pinot segment to AVRO format. */ public class PinotSegmentToAvroConverter implements PinotSegmentConverter { private final String _segmentDir; private final String _outputFile; public PinotSegmentToAvroConverter(String segmentDir, String outputFile) { _segmentDir = segmentDir; _outputFile = outputFile; } @Override public void convert() throws Exception { PinotSegmentRecordReader recordReader = new PinotSegmentRecordReader(new File(_segmentDir)); try { recordReader.init(); Schema avroSchema = buildAvroSchemaFromPinotSchema(recordReader.getSchema()); try (DataFileWriter<Record> recordWriter = new DataFileWriter<>(new GenericDatumWriter<Record>(avroSchema))) { recordWriter.create(avroSchema, new File(_outputFile)); while (recordReader.hasNext()) { GenericRow row = recordReader.next(); Record record = new Record(avroSchema); for (String field : row.getFieldNames()) { Object value = row.getValue(field); if (value instanceof Object[]) { record.put(field, Arrays.asList((Object[]) value)); } else { record.put(field, value); } } recordWriter.append(record); } } } finally { recordReader.close(); } } /** * Helper method to build Avro schema from Pinot schema. * * @param pinotSchema Pinot schema. * @return Avro schema. */ private Schema buildAvroSchemaFromPinotSchema(com.linkedin.pinot.common.data.Schema pinotSchema) { FieldAssembler<Schema> fieldAssembler = SchemaBuilder.record("record").fields(); for (FieldSpec fieldSpec : pinotSchema.getAllFieldSpecs()) { DataType dataType = fieldSpec.getDataType(); if (fieldSpec.isSingleValueField()) { switch (dataType) { case INT: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().intType().noDefault(); break; case LONG: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().longType().noDefault(); break; case FLOAT: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().floatType().noDefault(); break; case DOUBLE: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().doubleType().noDefault(); break; case STRING: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().stringType().noDefault(); break; default: throw new RuntimeException("Unsupported data type: " + dataType); } } else { switch (dataType) { case INT: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().array().items().intType().noDefault(); break; case LONG: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().array().items().longType().noDefault(); break; case FLOAT: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().array().items().floatType().noDefault(); break; case DOUBLE: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().array().items().doubleType().noDefault(); break; case STRING: fieldAssembler = fieldAssembler.name(fieldSpec.getName()).type().array().items().stringType().noDefault(); break; default: throw new RuntimeException("Unsupported data type: " + dataType); } } } return fieldAssembler.endRecord(); } }