/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.index.persist;
import com.linkedin.pinot.util.TestUtils;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.json.JSONObject;
import org.testng.Assert;
import org.testng.annotations.Test;
import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.extractors.FieldExtractorFactory;
import com.linkedin.pinot.core.data.readers.AvroRecordReader;
import com.linkedin.pinot.core.data.readers.FileFormat;
import com.linkedin.pinot.core.data.readers.RecordReaderFactory;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion;
import com.linkedin.pinot.core.indexsegment.utils.AvroUtils;
public class AvroDataPublisherTest {
private final String AVRO_DATA = "data/test_sample_data.avro";
private final String JSON_DATA = "data/test_sample_data.json";
private final String AVRO_MULTI_DATA = "data/test_sample_data_multi_value.avro";
@Test
public void TestReadAvro() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
Schema schema =
new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING)
.addSingleValueDimension("column2", DataType.STRING).build();
final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setFormat(FileFormat.AVRO);
config.setInputFilePath(filePath);
config.setSegmentVersion(SegmentVersion.v1);
AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
int cnt = 0;
for (String line : FileUtils.readLines(new File(jsonPath))) {
JSONObject obj = new JSONObject(line);
if (avroDataPublisher.hasNext()) {
GenericRow recordRow = avroDataPublisher.next();
for (String column : recordRow.getFieldNames()) {
String valueFromJson = obj.get(column).toString();
String valueFromAvro = recordRow.getValue(column).toString();
if (cnt > 1) {
Assert.assertEquals(valueFromJson, valueFromAvro);
}
}
}
cnt++;
}
Assert.assertEquals(cnt, 10001);
}
@Test
public void TestReadPartialAvro() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
final List<String> projectedColumns = new ArrayList<String>();
projectedColumns.add("column3");
projectedColumns.add("column2");
Schema schema =
new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING)
.addSingleValueDimension("column2", DataType.STRING).build();
final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setFormat(FileFormat.AVRO);
config.setInputFilePath(filePath);
config.setSegmentVersion(SegmentVersion.v1);
final AvroRecordReader avroDataPublisher =
new AvroRecordReader(FieldExtractorFactory.getPlainFieldExtractor(config), config.getInputFilePath());
avroDataPublisher.next();
int cnt = 0;
for (final String line : FileUtils.readLines(new File(jsonPath))) {
final JSONObject obj = new JSONObject(line);
if (avroDataPublisher.hasNext()) {
final GenericRow recordRow = avroDataPublisher.next();
// System.out.println(recordRow);
Assert.assertEquals(recordRow.getFieldNames().length, 2);
for (final String column : recordRow.getFieldNames()) {
final String valueFromJson = obj.get(column).toString();
final String valueFromAvro = recordRow.getValue(column).toString();
if (cnt > 1) {
Assert.assertEquals(valueFromAvro, valueFromJson);
}
}
}
cnt++;
}
Assert.assertEquals(10001, cnt);
}
@Test
public void TestReadMultiValueAvro() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_MULTI_DATA));
final SegmentGeneratorConfig config =
new SegmentGeneratorConfig(AvroUtils.extractSchemaFromAvro(new File(filePath)));
config.setFormat(FileFormat.AVRO);
config.setInputFilePath(filePath);
config.setSegmentVersion(SegmentVersion.v1);
AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
int cnt = 0;
while (avroDataPublisher.hasNext()) {
GenericRow recordRow = avroDataPublisher.next();
for (String column : recordRow.getFieldNames()) {
String valueStringFromAvro = null;
if (avroDataPublisher.getSchema().getFieldSpecFor(column).isSingleValueField()) {
Object valueFromAvro = recordRow.getValue(column);
valueStringFromAvro = valueFromAvro.toString();
} else {
Object[] valueFromAvro = (Object[]) recordRow.getValue(column);
valueStringFromAvro = "[";
int i = 0;
for (Object valueObject : valueFromAvro) {
if (i++ == 0) {
valueStringFromAvro += valueObject.toString();
} else {
valueStringFromAvro += ", " + valueObject.toString();
}
}
valueStringFromAvro += "]";
}
}
cnt++;
}
Assert.assertEquals(28949, cnt);
}
}