/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.segment.index.loader;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion;
import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver;
import com.linkedin.pinot.core.segment.creator.impl.SegmentCreationDriverFactory;
import com.linkedin.pinot.core.segment.creator.impl.V1Constants;
import com.linkedin.pinot.core.segment.index.ColumnMetadata;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.index.converter.SegmentV1V2ToV3FormatConverter;
import com.linkedin.pinot.core.segment.index.readers.StringDictionary;
import com.linkedin.pinot.core.segment.memory.PinotDataBuffer;
import com.linkedin.pinot.core.segment.store.ColumnIndexType;
import com.linkedin.pinot.core.segment.store.SegmentDirectory;
import com.linkedin.pinot.core.segment.store.SegmentDirectoryPaths;
import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils;
import com.linkedin.pinot.util.TestUtils;
import java.io.File;
import java.nio.file.Files;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class LoadersTest {
private static final String AVRO_DATA = "data/test_data-mv.avro";
private static final String PADDING_OLD = "data/paddingOld.tar.gz";
private static final String PADDING_PERCENT = "data/paddingPercent.tar.gz";
private static final String PADDING_NULL = "data/paddingNull.tar.gz";
private File _indexDir;
private File _segmentDirectory;
private IndexLoadingConfig _v1IndexLoadingConfig;
private IndexLoadingConfig _v3IndexLoadingConfig;
@BeforeMethod
public void setUp()
throws Exception {
_indexDir = Files.createTempDirectory(LoadersTest.class.getName() + "_segmentDir").toFile();
final String filePath = TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(AVRO_DATA));
final SegmentGeneratorConfig config =
SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), _indexDir, "daysSinceEpoch",
TimeUnit.HOURS, "testTable");
config.setSegmentNamePostfix("1");
config.setTimeColumnName("daysSinceEpoch");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
_segmentDirectory = new File(_indexDir, driver.getSegmentName());
_v1IndexLoadingConfig = new IndexLoadingConfig();
_v1IndexLoadingConfig.setReadMode(ReadMode.mmap);
_v1IndexLoadingConfig.setSegmentVersion(SegmentVersion.v1);
_v3IndexLoadingConfig = new IndexLoadingConfig();
_v3IndexLoadingConfig.setReadMode(ReadMode.mmap);
_v3IndexLoadingConfig.setSegmentVersion(SegmentVersion.v3);
}
@AfterMethod
public void tearDownClass() {
if (_indexDir != null) {
FileUtils.deleteQuietly(_indexDir);
}
}
@Test
public void testLoad()
throws Exception {
SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(_segmentDirectory);
Assert.assertEquals(originalMetadata.getSegmentVersion(), SegmentVersion.v1);
// note: ordering of these two test blocks matters
{
// Explicitly pass v1 format since we will convert by default to v3
IndexSegment indexSegment = Loaders.IndexSegment.load(_segmentDirectory, _v1IndexLoadingConfig);
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), originalMetadata.getVersion());
Assert.assertFalse(SegmentDirectoryPaths.segmentDirectoryFor(_segmentDirectory, SegmentVersion.v3).exists());
}
{
// with this code and converter in place, make sure we still load original version
// by default. We require specific configuration for v3.
IndexSegment indexSegment = Loaders.IndexSegment.load(_segmentDirectory, ReadMode.mmap);
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), SegmentVersion.v3.toString());
Assert.assertTrue(SegmentDirectoryPaths.segmentDirectoryFor(_segmentDirectory, SegmentVersion.v3).exists());
}
}
@Test
public void testLoadWithStaleConversionDir()
throws Exception {
// Format converter will leave stale directories around if there was
// conversion failure. This test case checks loading in that scenario
SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(_segmentDirectory);
Assert.assertEquals(originalMetadata.getSegmentVersion(), SegmentVersion.v1);
Assert.assertFalse(SegmentDirectoryPaths.segmentDirectoryFor(_segmentDirectory, SegmentVersion.v3).exists());
File v3TempDir = new SegmentV1V2ToV3FormatConverter().v3ConversionTempDirectory(_segmentDirectory);
FileUtils.touch(v3TempDir);
{
IndexSegment indexSegment = Loaders.IndexSegment.load(_segmentDirectory, ReadMode.mmap);
Assert.assertEquals(indexSegment.getSegmentMetadata().getVersion(), SegmentVersion.v3.toString());
Assert.assertTrue(SegmentDirectoryPaths.segmentDirectoryFor(_segmentDirectory, SegmentVersion.v3).exists());
}
{
IndexSegment indexSegment = Loaders.IndexSegment.load(_segmentDirectory, _v3IndexLoadingConfig);
Assert.assertEquals(SegmentVersion.valueOf(indexSegment.getSegmentMetadata().getVersion()), SegmentVersion.v3);
Assert.assertTrue(SegmentDirectoryPaths.segmentDirectoryFor(_segmentDirectory, SegmentVersion.v3).exists());
Assert.assertFalse(v3TempDir.exists());
}
}
@Test
public void testPadding()
throws Exception {
// Old Format
TarGzCompressionUtils.unTar(
new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_OLD))), _indexDir);
File segmentDirectory = new File(_indexDir, "paddingOld");
SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(),
V1Constants.Str.LEGACY_STRING_PAD_CHAR);
SegmentDirectory segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
ColumnMetadata columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
SegmentDirectory.Reader reader = segmentDir.createReader();
PinotDataBuffer dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
StringDictionary dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character %
TarGzCompressionUtils.unTar(
new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_PERCENT))),
_indexDir);
segmentDirectory = new File(_indexDir, "paddingPercent");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(),
V1Constants.Str.LEGACY_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character Null
TarGzCompressionUtils.unTar(
new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_NULL))),
_indexDir);
segmentDirectory = new File(_indexDir, "paddingNull");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(),
V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda\0\0\0\0");
Assert.assertEquals(dict.getStringValue(1), "lynda 2.0");
Assert.assertEquals(dict.get(0), "lynda");
Assert.assertEquals(dict.get(1), "lynda 2.0");
Assert.assertEquals(dict.indexOf("lynda\0"), 0);
Assert.assertEquals(dict.indexOf("lynda\0\0"), 0);
}
}