/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.data.extractors; import com.linkedin.pinot.common.data.FieldSpec.DataType; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.core.data.GenericRow; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; import org.testng.Assert; import org.testng.annotations.Test; public class PlainFieldExtractorTest { private static final DataType[] ALL_TYPES = {DataType.INT, DataType.LONG, DataType.FLOAT, DataType.DOUBLE, DataType.STRING}; // All types have single/multi-value version. private static final int NUMBER_OF_TYPES = 2 * ALL_TYPES.length; private static final int INDEX_OF_STRING_TYPE = NUMBER_OF_TYPES - 2; private static final String TEST_COLUMN = "testColumn"; private static final Schema[] ALL_TYPE_SCHEMAS = new Schema[NUMBER_OF_TYPES]; static { int i = 0; for (DataType dataType : ALL_TYPES) { ALL_TYPE_SCHEMAS[i++] = new Schema.SchemaBuilder().setSchemaName("testSchema") .addSingleValueDimension(TEST_COLUMN, dataType) .build(); ALL_TYPE_SCHEMAS[i++] = new Schema.SchemaBuilder().setSchemaName("testSchema") .addMultiValueDimension(TEST_COLUMN, dataType) .build(); } } private class AnyClassWithToString { @Override public String toString() { return "AnyClass"; } } @Test public void simpleTest() { Schema schema = new Schema.SchemaBuilder().setSchemaName("testSchema") .addSingleValueDimension("svDimensionInt", DataType.INT) .addSingleValueDimension("svDimensionDouble", DataType.DOUBLE) .addSingleValueDimension("svClassObject", DataType.STRING) .addMultiValueDimension("mvDimensionLong", DataType.LONG) .addMultiValueDimension("mvClassObject", DataType.STRING) .addMetric("metricInt", DataType.INT) .addTime("timeInt", TimeUnit.DAYS, DataType.INT) .build(); PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(schema); GenericRow row = new GenericRow(); Map<String, Object> fieldMap = new HashMap<>(); fieldMap.put("svDimensionInt", (short) 5); fieldMap.put("svDimensionDouble", 3.2F); fieldMap.put("svClassObject", new AnyClassWithToString()); fieldMap.put("mvDimensionLong", 13); fieldMap.put("mvClassObject", new Object[]{new AnyClassWithToString(), new AnyClassWithToString()}); fieldMap.put("metricInt", 34.5); long currentDaysSinceEpoch = System.currentTimeMillis() / 1000 / 60 / 60 / 24; fieldMap.put("timeInt", currentDaysSinceEpoch); row.init(fieldMap); row = plainFieldExtractor.transform(row); Assert.assertTrue(row.getValue("svDimensionInt") instanceof Integer); Assert.assertEquals(row.getValue("svDimensionInt"), 5); Assert.assertTrue(row.getValue("svDimensionDouble") instanceof Double); Assert.assertEquals((double) row.getValue("svDimensionDouble"), 3.2, 0.1); Assert.assertTrue(row.getValue("svClassObject") instanceof String); Assert.assertEquals(row.getValue("svClassObject"), "AnyClass"); Assert.assertTrue(row.getValue("mvDimensionLong") instanceof Object[]); Assert.assertTrue(((Object[]) row.getValue("mvDimensionLong"))[0] instanceof Long); Assert.assertEquals(((Object[]) row.getValue("mvDimensionLong"))[0], 13L); Assert.assertTrue(row.getValue("mvClassObject") instanceof Object[]); Assert.assertTrue(((Object[]) row.getValue("mvClassObject"))[0] instanceof String); Assert.assertTrue(((Object[]) row.getValue("mvClassObject"))[1] instanceof String); Assert.assertEquals(((Object[]) row.getValue("mvClassObject"))[0], "AnyClass"); Assert.assertEquals(((Object[]) row.getValue("mvClassObject"))[1], "AnyClass"); Assert.assertTrue(row.getValue("metricInt") instanceof Integer); Assert.assertEquals(row.getValue("metricInt"), 34); Assert.assertTrue(row.getValue("timeInt") instanceof Integer); Assert.assertEquals(row.getValue("timeInt"), (int) currentDaysSinceEpoch); } @Test public void nullValueTest() { GenericRow row = new GenericRow(); Map<String, Object> fieldMap = new HashMap<>(); for (int i = 0; i < NUMBER_OF_TYPES; i++) { PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(ALL_TYPE_SCHEMAS[i]); row.init(fieldMap); plainFieldExtractor.transform(row); Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 0); Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 1); Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 0); } } @Test public void classWithToStringTest() { GenericRow row = new GenericRow(); Map<String, Object> fieldMap = new HashMap<>(); for (int i = 0; i < NUMBER_OF_TYPES; i++) { PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(ALL_TYPE_SCHEMAS[i]); fieldMap.put(TEST_COLUMN, new AnyClassWithToString()); row.init(fieldMap); plainFieldExtractor.transform(row); fieldMap.put(TEST_COLUMN, new Object[]{new AnyClassWithToString(), new AnyClassWithToString()}); row.init(fieldMap); plainFieldExtractor.transform(row); // AnyClassWithToString only works with String (array). if (i >= INDEX_OF_STRING_TYPE) { Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 0); Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0); Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 2); } else { Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 2); Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0); Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 0); } } } @Test public void automatedTest() { int numTypes = 19; Object[] objectArray = new Object[numTypes]; // Pinot data types. objectArray[0] = 500; // Integer objectArray[1] = new Object[]{500}; // Integer array objectArray[2] = 500L; // Long objectArray[3] = new Object[]{500L}; // Long array objectArray[4] = 500.5F; // Float objectArray[5] = new Object[]{500.5F}; // Float array objectArray[6] = 500.5; // Double objectArray[7] = new Object[]{500.5}; // Double array objectArray[8] = "500"; // String objectArray[9] = new Object[]{"500"}; // String array // Non-Pinot data types. objectArray[10] = true; // Boolean objectArray[11] = (byte) 65; // Byte objectArray[12] = new Object[]{(byte) 65}; // Byte array objectArray[13] = 'a'; // Character objectArray[14] = new Object[]{'a'}; // Character array objectArray[15] = (short) 500; // Short objectArray[16] = new Object[]{(short) 500}; // Short array objectArray[17] = new AnyClassWithToString(); // Object objectArray[18] = new Object[]{new AnyClassWithToString()}; // Object array GenericRow row = new GenericRow(); Map<String, Object> fieldMap = new HashMap<>(); for (int i = 0; i < NUMBER_OF_TYPES; i++) { for (int j = 0; j < numTypes; j++) { PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(ALL_TYPE_SCHEMAS[i]); fieldMap.put(TEST_COLUMN, objectArray[j]); row.init(fieldMap); plainFieldExtractor.transform(row); // Check when schema and field match. if (i == j) { Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 0); Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0); Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 0); continue; } // Check conversions from Object (array). if (j == 10 || j >= 17/* Index of AnyClassWithToString */) { if (i >= INDEX_OF_STRING_TYPE) { // Conversions from Boolean or Object (array) to String (array). (Allowed) Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 0); Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0); Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 1); } else { // Conversions from Boolean or Object (array) to non-String (array). (Not allowed) Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 1); Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0); Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 0); } continue; } // Check other conversions. (Because string value is "500", it can convert to any type) Assert.assertEquals(plainFieldExtractor.getTotalErrors(), 0); Assert.assertEquals(plainFieldExtractor.getTotalNulls(), 0); Assert.assertEquals(plainFieldExtractor.getTotalConversions(), 1); } } } @Test public void timeSpecStringTest() { Schema schema = new Schema.SchemaBuilder().setSchemaName("testSchema") .addTime("timeString", TimeUnit.DAYS, DataType.STRING) .build(); PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(schema); GenericRow row = new GenericRow(); Map<String, Object> fieldMap = new HashMap<>(); fieldMap.put("timeString", "2016-01-01"); row.init(fieldMap); plainFieldExtractor.transform(row); Assert.assertTrue(row.getValue("timeString") instanceof String); Assert.assertEquals(row.getValue("timeString"), "2016-01-01"); } @Test public void differentIncomingOutgoingTimeSpecTest() { Schema schema = new Schema.SchemaBuilder().setSchemaName("testSchema") .addTime("incoming", TimeUnit.DAYS, DataType.INT, "outgoing", TimeUnit.HOURS, DataType.LONG) .build(); PlainFieldExtractor plainFieldExtractor = new PlainFieldExtractor(schema); GenericRow row = new GenericRow(); Map<String, Object> fieldMap = new HashMap<>(); long currentDaysSinceEpoch = System.currentTimeMillis() / 1000 / 60 / 60 / 24; fieldMap.put("incoming", currentDaysSinceEpoch); row.init(fieldMap); row = plainFieldExtractor.transform(row); Assert.assertNull(row.getValue("incoming")); Assert.assertTrue(row.getValue("outgoing") instanceof Long); Assert.assertEquals(row.getValue("outgoing"), currentDaysSinceEpoch * 24); } }