/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.data.readers; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.RandomStringUtils; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import com.google.common.io.Files; import com.linkedin.pinot.common.data.DimensionFieldSpec; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.FieldSpec.DataType; import com.linkedin.pinot.common.data.MetricFieldSpec; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.common.data.TimeFieldSpec; import com.linkedin.pinot.common.data.TimeGranularitySpec; import com.linkedin.pinot.core.data.GenericRow; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl; /** * Tests the PinotSegmentRecordReader to check that the records being generated * are the same as the records used to create the segment */ public class PinotSegmentRecordReaderTest { private String segmentName; private Schema schema; private String segmentOutputDir; private File segmentIndexDir; private List<GenericRow> rows; private TestRecordReader recordReader; private static String D_SV_1 = "d_sv_1"; private static String D_MV_1 = "d_mv_1"; private static String M1 = "m1"; private static String M2 = "m2"; private static String TIME = "t"; @BeforeClass public void setup() throws Exception { segmentName = "pinotSegmentRecordReaderTest"; schema = createPinotSchema(); segmentOutputDir = Files.createTempDir().toString(); segmentIndexDir = new File(segmentOutputDir, segmentName); rows = createTestData(); recordReader = new TestRecordReader(rows, schema); createSegment(); } private List<GenericRow> createTestData() { List<GenericRow> rows = new ArrayList<>(); Random random = new Random(); Map<String, Object> fields; for (int i = 0; i < 10000; i++) { fields = new HashMap<>(); fields.put(D_SV_1, D_SV_1 + "_" + RandomStringUtils.randomAlphabetic(2)); Object[] d2Array = new Object[5]; for (int j = 0; j < 5; j++) { d2Array[j] = D_MV_1 + "_" + j + "_" + RandomStringUtils.randomAlphabetic(2); } fields.put(D_MV_1, d2Array); fields.put(M1, Math.abs(random.nextInt())); fields.put(M2, Math.abs(random.nextFloat())); fields.put(TIME, Math.abs(random.nextLong())); GenericRow row = new GenericRow(); row.init(fields); rows.add(row); } return rows; } private Schema createPinotSchema() { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); FieldSpec spec; spec = new DimensionFieldSpec(D_SV_1, DataType.STRING, true); testSchema.addField(spec); spec = new DimensionFieldSpec(D_MV_1, DataType.STRING, false); testSchema.addField(spec); spec = new MetricFieldSpec(M1, DataType.INT); testSchema.addField(spec); spec = new MetricFieldSpec(M2, DataType.FLOAT); testSchema.addField(spec); spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, TIME)); testSchema.addField(spec); return testSchema; } private void createSegment() throws Exception { SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema); segmentGeneratorConfig.setTableName(segmentName); segmentGeneratorConfig.setOutDir(segmentOutputDir); segmentGeneratorConfig.setSegmentName(segmentName); SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); driver.init(segmentGeneratorConfig, recordReader); driver.build(); if (!segmentIndexDir.exists()) { throw new IllegalStateException("Segment generation failed"); } } @Test public void testPinotSegmentRecordReader() throws Exception { List<GenericRow> outputRows = new ArrayList<>(); PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(segmentIndexDir); pinotSegmentRecordReader.init(); while (pinotSegmentRecordReader.hasNext()) { outputRows.add(pinotSegmentRecordReader.next()); } pinotSegmentRecordReader.close(); Assert.assertEquals(outputRows.size(), rows.size(), "Number of rows returned by PinotSegmentRecordReader is incorrect"); for (int i = 0; i < outputRows.size(); i++) { GenericRow outputRow = outputRows.get(i); GenericRow row = rows.get(i); Assert.assertEquals(outputRow.getValue(D_SV_1), row.getValue(D_SV_1)); Assert.assertEquals(outputRow.getValue(D_MV_1), row.getValue(D_MV_1)); Assert.assertEquals(outputRow.getValue(M1), row.getValue(M1)); Assert.assertEquals(outputRow.getValue(M2), row.getValue(M2)); Assert.assertEquals(outputRow.getValue(TIME), row.getValue(TIME)); } } @AfterClass public void cleanup() { FileUtils.deleteQuietly(new File(segmentOutputDir)); } }