package com.thinkbiganalytics.discovery.util; /*- * #%L * thinkbig-schema-discovery-api * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import org.apache.commons.lang3.StringUtils; import org.junit.Before; import org.junit.Test; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.sql.JDBCType; import java.util.Arrays; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; public class ParserHelperTest { @Before public void setUp() throws Exception { } public void textExtract(String text, int numRows, int numExpected) throws Exception { try (InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8))) { String value = ParserHelper.extractSampleLines(is, StandardCharsets.UTF_8, numRows); assertEquals(numExpected, value.split("\n").length); } } @Test public void testExtractSample10Lines() throws Exception { String text = "col1,col2,col3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\n"; textExtract(text, 10, 10); } @Test public void testExtractMaxSampleLines() throws Exception { String text = "col1,col2,col3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\nv1,v2,v3\n"; textExtract(text, 100, 12); } @Test public void testInvalidFile() throws Exception { String text = StringUtils.leftPad("Z", ParserHelper.MAX_CHARS, "Z"); try { textExtract(text, 100, 1); fail(); } catch (IOException e) { // good } } @Test public void testDeriveJDBCDataType() throws Exception { assertEquals("DOUBLE", ParserHelper.deriveJDBCDataType(Arrays.asList("1.0", "20000", "-64.2001")).getName()); assertEquals("INTEGER", ParserHelper.deriveJDBCDataType(Arrays.asList("1", "20000", "64")).getName()); assertEquals("VARCHAR", ParserHelper.deriveJDBCDataType(Arrays.asList("1L", "200,00", "64")).getName()); assertEquals("VARCHAR", ParserHelper.deriveJDBCDataType(Arrays.asList("BOB", "20", "64")).getName()); assertEquals("VARCHAR", ParserHelper.deriveJDBCDataType(null).getName()); } @Test public void testSqlTypeToHiveType() throws Exception { assertEquals(ParserHelper.sqlTypeToHiveType(JDBCType.DOUBLE), "double"); } @Test public void testDeriveDataTypes() throws Exception { TestField f1 = new TestField(); //f1.setNativeDataType(""); f1.setSampleValues(Arrays.asList("10", "20", "30")); TestField f2 = new TestField(); f2.setNativeDataType(""); f2.setSampleValues(Arrays.asList("10.2", "20.3", "30.4")); TestField f3 = new TestField(); f3.setNativeDataType(""); f3.setSampleValues(Arrays.asList("BOB", "20.3", "30.4")); TestField f4 = new TestField(); f4.setNativeDataType("BIGINT"); f4.setSampleValues(Arrays.asList("2015", "203", "304")); TestField f5 = new TestField(); f5.setNativeDataType("INVALIDTYPE"); f5.setSampleValues(Arrays.asList("BOB", "20.3", "30.4")); TestField f6 = new TestField(); f6.setSampleValues(null); ParserHelper.deriveDataTypes(TableSchemaType.HIVE, Arrays.asList(f1, f2, f3, f4, f5, f6)); assertEquals("int", f1.getDerivedDataType()); assertEquals("double", f2.getDerivedDataType()); assertEquals("string", f3.getDerivedDataType()); assertEquals("bigint", f4.getDerivedDataType()); assertEquals("string", f5.getDerivedDataType()); assertEquals("string", f6.getDerivedDataType()); } }