/*********************************************************************************************************************** * * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. * **********************************************************************************************************************/ package eu.stratosphere.api.java.io; import java.util.Arrays; import org.junit.Assert; import org.junit.Test; import eu.stratosphere.api.java.ExecutionEnvironment; import eu.stratosphere.api.java.operators.DataSource; import eu.stratosphere.api.java.tuple.Tuple4; import eu.stratosphere.api.java.tuple.Tuple5; import eu.stratosphere.api.java.typeutils.BasicTypeInfo; import eu.stratosphere.api.java.typeutils.TupleTypeInfo; import eu.stratosphere.types.TypeInformation; import eu.stratosphere.api.java.typeutils.ValueTypeInfo; import eu.stratosphere.types.LongValue; import eu.stratosphere.types.StringValue; /** * Tests for the CSV reader builder. */ public class CSVReaderTest { @Test public void testIgnoreHeaderConfigure() { CsvReader reader = getCsvReader(); reader.ignoreFirstLine(); Assert.assertTrue(reader.skipFirstLineAsHeader); } @Test public void testIncludeFieldsDense() { CsvReader reader = getCsvReader(); reader.includeFields(true, true, true); Assert.assertTrue(Arrays.equals(new boolean[] {true, true, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields("ttt"); Assert.assertTrue(Arrays.equals(new boolean[] {true, true, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields("TTT"); Assert.assertTrue(Arrays.equals(new boolean[] {true, true, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields("111"); Assert.assertTrue(Arrays.equals(new boolean[] {true, true, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields(0x7L); Assert.assertTrue(Arrays.equals(new boolean[] {true, true, true}, reader.includedMask)); } @Test public void testIncludeFieldsSparse() { CsvReader reader = getCsvReader(); reader.includeFields(false, true, true, false, false, true, false, false); Assert.assertTrue(Arrays.equals(new boolean[] {false, true, true, false, false, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields("fttfftff"); Assert.assertTrue(Arrays.equals(new boolean[] {false, true, true, false, false, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields("FTTFFTFF"); Assert.assertTrue(Arrays.equals(new boolean[] {false, true, true, false, false, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields("01100100"); Assert.assertTrue(Arrays.equals(new boolean[] {false, true, true, false, false, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields("0t1f0TFF"); Assert.assertTrue(Arrays.equals(new boolean[] {false, true, true, false, false, true}, reader.includedMask)); reader = getCsvReader(); reader.includeFields(0x26L); Assert.assertTrue(Arrays.equals(new boolean[] {false, true, true, false, false, true}, reader.includedMask)); } @Test public void testIllegalCharInStringMask() { CsvReader reader = getCsvReader(); try { reader.includeFields("1t0Tfht"); Assert.fail("Reader accepted an invalid mask string"); } catch (IllegalArgumentException e) { // expected } } @Test public void testIncludeFieldsErrorWhenExcludingAll() { CsvReader reader = getCsvReader(); try { reader.includeFields(false, false, false, false, false, false); Assert.fail("The reader accepted a fields configuration that excludes all fields."); } catch (IllegalArgumentException e) { // all good } try { reader.includeFields(0); Assert.fail("The reader accepted a fields configuration that excludes all fields."); } catch (IllegalArgumentException e) { // all good } try { reader.includeFields("ffffffffffffff"); Assert.fail("The reader accepted a fields configuration that excludes all fields."); } catch (IllegalArgumentException e) { // all good } try { reader.includeFields("00000000000000000"); Assert.fail("The reader accepted a fields configuration that excludes all fields."); } catch (IllegalArgumentException e) { // all good } } @Test public void testReturnType() throws Exception { CsvReader reader = getCsvReader(); DataSource<Item> items = reader.tupleType(Item.class); Assert.assertTrue(items.getType().getTypeClass() == Item.class); } @Test public void testFieldTypes() throws Exception { CsvReader reader = getCsvReader(); DataSource<Item> items = reader.tupleType(Item.class); TypeInformation<?> info = items.getType(); if (!info.isTupleType()) { Assert.fail(); } else { TupleTypeInfo<?> tinfo = (TupleTypeInfo<?>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3)); } CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) items.getInputFormat(); Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes()); } @Test public void testSubClass() throws Exception { CsvReader reader = getCsvReader(); DataSource<SubItem> sitems = reader.tupleType(SubItem.class); TypeInformation<?> info = sitems.getType(); Assert.assertEquals(true, info.isTupleType()); Assert.assertEquals(SubItem.class, info.getTypeClass()); @SuppressWarnings("unchecked") TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3)); CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat(); Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes()); } @Test public void testSubClassWithPartialsInHierarchie() throws Exception { CsvReader reader = getCsvReader(); DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class); TypeInformation<?> info = sitems.getType(); Assert.assertEquals(true, info.isTupleType()); Assert.assertEquals(FinalItem.class, info.getTypeClass()); @SuppressWarnings("unchecked") TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass()); Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass()); Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass()); Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass()); CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat(); Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes()); } @Test public void testUnsupportedPartialitem() throws Exception { CsvReader reader = getCsvReader(); try { reader.tupleType(PartialItem.class); Assert.fail("tupleType() accepted an underspecified generic class."); } catch (Exception e) { // okay. } } private static CsvReader getCsvReader() { return new CsvReader("/some/none/existing/path", ExecutionEnvironment.createLocalEnvironment(1)); } // -------------------------------------------------------------------------------------------- // Custom types for testing // -------------------------------------------------------------------------------------------- public static class Item extends Tuple4<Integer, String, Double, String> { private static final long serialVersionUID = -7444437337392053502L; } public static class SubItem extends Item { private static final long serialVersionUID = 1L; } public static class PartialItem<A, B, C> extends Tuple5<Integer, A, Double, B, C> { private static final long serialVersionUID = 1L; } public static class FinalItem extends PartialItem<String, StringValue, LongValue> { private static final long serialVersionUID = 1L; } }