/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.index.creator; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.joda.time.DateTime; import org.joda.time.LocalDateTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import com.linkedin.pinot.common.data.DimensionFieldSpec; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.common.data.TimeFieldSpec; import com.linkedin.pinot.core.data.GenericRow; import com.linkedin.pinot.core.data.readers.RecordReader; import com.linkedin.pinot.core.data.readers.TestRecordReader; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; import com.linkedin.pinot.core.segment.store.SegmentDirectory; public class SegmentGenerationWithTimeColumnTest { private static final String STRING_COL_NAME = "someString"; private static final String TIME_COL_NAME = "date"; private static final String TIME_COL_FORMAT = "yyyyMMdd"; private static final String SEGMENT_DIR_NAME = System.getProperty("java.io.tmpdir") + File.separator + "segmentGenTest"; private static final String SEGMENT_NAME = "testSegment"; private static final int NUM_ROWS = 10000; private Random _random = new Random(System.nanoTime()); private long minTime; private long maxTime; private long startTime = System.currentTimeMillis(); @BeforeMethod public void reset() { minTime = Long.MAX_VALUE; maxTime = Long.MIN_VALUE; FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME)); } @Test public void testSimpleDateSegmentGeneration() throws Exception { Schema schema = createSchema(true); File segmentDir = buildSegment(schema, true); SegmentMetadataImpl metadata = SegmentDirectory.loadSegmentMetadata(segmentDir); Assert.assertEquals(metadata.getStartTime(), sdfToMillis(minTime)); Assert.assertEquals(metadata.getEndTime(), sdfToMillis(maxTime)); } @Test public void testEpochDateSegmentGeneration() throws Exception { Schema schema = createSchema(false); File segmentDir = buildSegment(schema, false); SegmentMetadataImpl metadata = SegmentDirectory.loadSegmentMetadata(segmentDir); Assert.assertEquals(metadata.getStartTime(), minTime); Assert.assertEquals(metadata.getEndTime(), maxTime); } private Schema createSchema(boolean isSimpleDate) { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(STRING_COL_NAME, FieldSpec.DataType.STRING, true)); if (isSimpleDate) { schema.addField(new TimeFieldSpec(TIME_COL_NAME, FieldSpec.DataType.INT, TimeUnit.DAYS)); } else { schema.addField(new TimeFieldSpec(TIME_COL_NAME, FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS)); } return schema; } private File buildSegment(Schema schema, boolean isSimpleDate) throws Exception { SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema); config.setRawIndexCreationColumns(schema.getDimensionNames()); config.setOutDir(SEGMENT_DIR_NAME); config.setSegmentName(SEGMENT_NAME); config.setTimeColumnName(TIME_COL_NAME); if (isSimpleDate) { config.setSimpleDateFormat(TIME_COL_FORMAT); } final List<GenericRow> rows = new ArrayList<>(); for (int row = 0; row < NUM_ROWS; row++) { HashMap<String, Object> map = new HashMap<>(); for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) { Object value; value = getRandomValueForColumn(fieldSpec, isSimpleDate); map.put(fieldSpec.getName(), value); } GenericRow genericRow = new GenericRow(); genericRow.init(map); rows.add(genericRow); } SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); RecordReader reader = new TestRecordReader(rows, schema); driver.init(config, reader); driver.build(); driver.getOutputDirectory().deleteOnExit(); return driver.getOutputDirectory(); } private Object getRandomValueForColumn(FieldSpec fieldSpec, boolean isSimpleDate) { if (fieldSpec.getName().equals(TIME_COL_NAME)) { return getRandomValueForTimeColumn(isSimpleDate); } return RawIndexCreatorTest.getRandomValue(_random, fieldSpec.getDataType()); } private Object getRandomValueForTimeColumn(boolean isSimpleDate) { long randomMs = ThreadLocalRandom.current().nextLong(startTime); long dateColVal = randomMs; Object result; if (isSimpleDate) { DateTime dateTime = new DateTime(randomMs); LocalDateTime localDateTime = dateTime.toLocalDateTime(); int year = localDateTime.getYear(); int month = localDateTime.getMonthOfYear(); int day = localDateTime.getDayOfMonth(); String dateColStr = String.format("%04d%02d%02d", year, month, day); dateColVal = Integer.valueOf(dateColStr); result = new Integer(Integer.valueOf(dateColStr)); } else { result = new Long(dateColVal); } if (dateColVal < minTime) { minTime = dateColVal; } if (dateColVal > maxTime) { maxTime = dateColVal; } return result; } private long sdfToMillis(long value) { DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(TIME_COL_FORMAT); DateTime dateTime = DateTime.parse(Long.toString(value), sdfFormatter); return dateTime.getMillis(); } }