/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.schemes; import cascading.flow.Flow; import cascading.flow.FlowDef; import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; import cascading.pipe.Pipe; import cascading.property.AppProps; import cascading.scheme.Scheme; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tap.hadoop.Hfs; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntryIterator; import hydrograph.engine.cascading.scheme.TextDelimitedAndFixedWidth; import org.apache.hadoop.conf.Configuration; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.lang.reflect.Type; import java.util.Date; import java.util.Properties; import static data.InputData.*; import static org.junit.Assert.assertEquals; @SuppressWarnings({ "unchecked", "rawtypes" }) public class TextDelimiterAndFixedWidthTest { String inPath; String outPath; Hadoop2MR1FlowConnector flowConnector; Fields fields, fields_new; Class[] types; Scheme inScheme, outScheme; Pipe pipe, pipe2, pipe3; Tap inTap; Tap outTap; Flow flow; TupleEntryIterator sourceIterator; TupleEntryIterator sinkIterator; FlowDef flowDef; @Before public void prepare() { outPath = "testData/schemes/TextMixed/output"; Configuration conf = new Configuration(); Properties properties = new Properties(); properties.putAll(conf.getValByRegex(".*")); AppProps.setApplicationJarClass(properties, TextDelimiterAndFixedWidthTest.class); flowConnector = new Hadoop2MR1FlowConnector(properties); fields = new Fields("f1", "f2", "f3", "f4", "f5"); fields_new = new Fields("f1", "f2", "f3", "f4", "f5", "newline"); } @Test public void itShouldProduceValidResultsForSimpleMixedScheme() throws IOException { String[] inputLengthsAndDelimiters = { "@,@", "3", "4", "3", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", "2", "6", "4", "\n" }; types = new Class[] { String.class, String.class, Integer.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, Integer.class, Integer.class, Integer.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters); inTap = new Hfs(inScheme, itShouldProduceValidResultsForSimpleMixedScheme); outTap = new Hfs(outScheme, outPath + "/itShouldProduceValidResultsForSimpleMixedScheme", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); } @Test public void itShouldProduceValidResultsForAllRecordsInOneLine() throws IOException { String[] inputLengthsAndDelimiters = { ":", "!", "4", ";", "@" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters); inTap = new Hfs(inScheme, itShouldProduceValidResultsForAllRecordsInOneLine); outTap = new Hfs(outScheme, outPath + "/itShouldProduceValidResultsForAllRecordsInOneLine", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(5, tupleRead.size()); assertEquals("abc asd 1234 qwe rty", tupleRead.toString().replace("\r", "")); } @Test public void itShouldProduceValidResultsForCedillaDelimitedRecords() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\\xC7", "4", "\\xC7", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "\n" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters); inTap = new Hfs(inScheme, itShouldProduceValidResultsForCedillaDelimitedRecords); outTap = new Hfs(outScheme, outPath + "/itShouldProduceValidResultsForCedillaDelimitedRecords", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(5, tupleRead.size()); assertEquals("abc sed 1234 qwe rty", tupleRead.toString().replace("\r", "")); } @Test public void itShouldProduceValidResultsForRecordSpanningMultipleLines() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\n", "4", "\\xC7", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters); inTap = new Hfs(inScheme, itShouldProduceValidResultsForRecordSpanningMultipleLines); outTap = new Hfs(outScheme, outPath + "/itShouldProduceValidResultsForRecordSpanningMultipleLines", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(5, tupleRead.size()); assertEquals("abc ase 1234 qwe rty", tupleRead.toString().replace("\r", "")); } @Test public void itShouldProduceValidResultsForRecordWithLastFixedWidthField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\n", "\\xC7", "\n", "4" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, String.class, String.class, Integer.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForRecordWithLastFixedWidthField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForRecordWithLastFixedWidthField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(5, tupleRead.size()); assertEquals("abc ase qwe rty 1234", tupleRead.toString().replace("\r", "")); } @Test public void itShouldProduceValidResultsForSimpleMixedSchemeWithFixedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { "@", "3", "4", "3", "\n", "1" }; String[] outputLengthsAndDelimiters1 = { ",", "2", "6", "4", "\n", "1" }; types = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, Integer.class, Integer.class, Integer.class, String.class, Integer.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, Integer.class, Integer.class, Integer.class, String.class, Integer.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForSimpleMixedSchemeWithFixedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForSimpleMixedSchemeWithFixedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc ase 1234 qwe rty ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForAllRecordsInOneLineWithFixedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "!", "4", ";", "@", "1" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "1" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class, Integer.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, Integer.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForAllRecordsInOneLineWithFixedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForAllRecordsInOneLineWithFixedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); assertEquals(6, tupleRead.size()); assertEquals("abc asd 1234 qwe rty ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForCedillaDelimitedRecordsWithFixedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\\xC7", "4", "\\xC7", "@", "1" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "1" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class, Integer.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, Integer.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForCedillaDelimitedRecordsWithFixedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForCedillaDelimitedRecordsWithFixedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc sed 1234 qwe rty ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForRecordSpanningMultipleLinesWithFixedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\n", "4", "\\xC7", "\n", "1" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "1" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class, Integer.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, Integer.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForRecordSpanningMultipleLinesWithFixedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForRecordSpanningMultipleLinesWithFixedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc ase 1234 qwe rty ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForRecordWithLastFixedWidthFieldAndFixedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\n", "\\xC7", "\n", "4", "1" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "1" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, String.class, String.class, Integer.class, Integer.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, Integer.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForRecordWithLastFixedWidthFieldAndFixedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForRecordWithLastFixedWidthFieldAndFixedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc ase qwe rty 1234 ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForSimpleMixedSchemeWithDelimitedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { "!", "3", "4", "3", "\n", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", "2", "6", "4", "\n", "\n" }; types = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, Integer.class, Integer.class, Integer.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, Integer.class, Integer.class, Integer.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForSimpleMixedSchemeWithDelimitedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForSimpleMixedSchemeWithDelimitedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); } @Test public void itShouldProduceValidResultsForAllRecordsInOneLineWithDelimitedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "!", "4", ";", "@", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "\n" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForAllRecordsInOneLineWithDelimitedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForAllRecordsInOneLineWithDelimitedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc asd 1234 qwe rty ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForCedillaDelimitedRecordsWithDelimitedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\\xC7", "4", "\\xC7", "@", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "\n" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForCedillaDelimitedRecordsWithDelimitedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForCedillaDelimitedRecordsWithDelimitedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc sed 1234 qwe rty ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForRecordSpanningMultipleLinesWithDelimitedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\n", "4", "\\xC7", "\n", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "\n" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs( inScheme, itShouldProduceValidResultsForRecordSpanningMultipleLinesWithDelimitedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForRecordSpanningMultipleLinesWithDelimitedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc ase 1234 qwe rty ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForRecordWithLastFixedWidthFieldAndDelimitedNewlineField() throws IOException { String[] inputLengthsAndDelimiters = { ":", "\n", "\\xC7", "\n", "4", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", ":", "4", "!", "@", "\n" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, String.class, String.class, Integer.class, String.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, Integer.class, String.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields_new, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields_new, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs( inScheme, itShouldProduceValidResultsForRecordWithLastFixedWidthFieldAndDelimitedNewlineField); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForRecordWithLastFixedWidthFieldAndDelimitedNewlineField", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(6, tupleRead.size()); assertEquals("abc ase qwe rty 1234 ", tupleRead.toString().replace("\r", "").replace("\n", "")); } @Test public void itShouldProduceValidResultsForFixedWidthRecordsInOneLine() throws IOException { String[] inputLengthsAndDelimiters = { "3", "3", "4", "3", "3" }; String[] outputLengthsAndDelimiters1 = { ",", ",", ",", ",", "\n" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { Integer.class, Integer.class, Integer.class, Integer.class, Integer.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, String.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForFixedWidthRecordsInOneLine); outTap = new Hfs(outScheme, outPath + "/itShouldProduceValidResultsForFixedWidthRecordsInOneLine", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); Tuple tupleWritten = sinkIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(5, tupleRead.size()); assertEquals("123 123 1234 123 123", tupleRead.toString().replace("\r", "")); // itShouldWriteAllRecords assertEquals("123 123 1234 123 123", tupleWritten.toString().replace("\r", "")); } @Test public void itShouldProduceValidResultsForDelimiterPresentInFixedWidthData() throws IOException { String[] inputLengthsAndDelimiters = { "2", "1", "2", "1", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", ",", ",", ",", "\n" }; types = new Class[] { String.class, String.class, String.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { Integer.class, Integer.class, Integer.class, Integer.class, String.class }; Type[] typesOfLengthsAndDelimiters1 = new Class[] { String.class, String.class, String.class, String.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(fields, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters1); inTap = new Hfs(inScheme, itShouldProduceValidResultsForDelimiterPresentInFixedWidthData); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForDelimiterPresentInFixedWidthData", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); sourceIterator = flow.openSource(); sinkIterator = flow.openSink(); Tuple tupleRead = sourceIterator.next().getTuple(); Tuple tupleWritten = sinkIterator.next().getTuple(); // itShouldReadAllRecords assertEquals(5, tupleRead.size()); assertEquals("12 3 12 3 12", tupleRead.toString().replace("\r", "")); // itShouldWriteAllRecords assertEquals("12 3 12 3 12", tupleWritten.toString().replace("\r", "")); } @Test public void itShouldProduceValidResultsForAllDataTypes() throws IOException { String[] inputLengthsAndDelimiters = { "!", "\t", "|", "5", "\n" }; String[] outputLengthsAndDelimiters1 = { ",", "\t", "|", "3", "\n" }; types = new Class[] { Integer.class, String.class, Date.class, Long.class, Float.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, String.class, String.class, Integer.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(new Fields("id", "name", "DOB", "salary", "rating"), inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8"); outScheme = new TextDelimitedAndFixedWidth(new Fields("id", "name", "DOB", "salary", "rating"), outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters); inTap = new Hfs(inScheme, itShouldProduceValidResultsForAllDataTypes); outTap = new Hfs(outScheme, outPath + "/itShouldProduceValidResultsForAllDataTypes", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); } @Test public void itShouldProduceValidResultsForSimpleMixedSchemeWithQuoteChar() throws IOException { String[] inputLengthsAndDelimiters = { "@,@", "3", "4", "3", "|" }; String[] outputLengthsAndDelimiters1 = { ",", "2", "6", "4", "|" }; types = new Class[] { String.class, String.class, Integer.class, String.class, String.class }; Type[] typesOfLengthsAndDelimiters = new Class[] { String.class, Integer.class, Integer.class, Integer.class, String.class }; inScheme = new TextDelimitedAndFixedWidth(fields, inputLengthsAndDelimiters, typesOfLengthsAndDelimiters, types, false, false, "UTF-8", "."); outScheme = new TextDelimitedAndFixedWidth(fields, outputLengthsAndDelimiters1, typesOfLengthsAndDelimiters, types, false, false, "UTF-8","*"); inTap = new Hfs(inScheme, itShouldProduceValidResultsForSimpleMixedSchemeWithQuoteChar); outTap = new Hfs( outScheme, outPath + "/itShouldProduceValidResultsForSimpleMixedSchemeWithQuoteChar", SinkMode.REPLACE); pipe = new Pipe("pipe"); flowDef = FlowDef.flowDef().addSource(pipe, inTap) .addTailSink(pipe, outTap); flow = flowConnector.connect(flowDef); flow.complete(); } }