/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.assembly; import cascading.pipe.Pipe; import cascading.tuple.Fields; import cascading.tuple.Tuple; import com.hotels.plunger.Bucket; import com.hotels.plunger.Data; import com.hotels.plunger.DataBuilder; import com.hotels.plunger.Plunger; import hydrograph.engine.cascading.assembly.UnionAllAssembly; import hydrograph.engine.cascading.assembly.UnionAllAssembly.SchemaMismatchException; import hydrograph.engine.cascading.assembly.infra.ComponentParameters; import hydrograph.engine.core.component.entity.UnionAllEntity; import hydrograph.engine.core.component.entity.elements.OutSocket; import hydrograph.engine.core.component.entity.elements.SchemaField; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import java.lang.reflect.Type; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; /** * Test unionAll sub assembly. The tests are written using plunger framework * * @author Prabodh */ public class UnionAllAssemblyTest { @Before public void setup() { // TODO: Add setup related code here } /** * Test the unionAll component working */ @Test public void TestSimpleUnionAllComponentWorking() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder( new Fields("col1", "col2", "col3")) .addTuple(1, "C2R1", "C3R1").addTuple(2, "C2R2", "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // unionAll // component Data file2 = new DataBuilder( new Fields("col1", "col2", "col3")) .addTuple(3, "C2R3", "C3R3").addTuple(4, "C2R4", "C3R4").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); // pipe corresponding // to an input of // unionAll // component UnionAllEntity unionAllEntity = new UnionAllEntity(); unionAllEntity.setComponentId("unionAll"); unionAllEntity.setOutSocket(new OutSocket("out1")); ComponentParameters parameters = new ComponentParameters(); Fields in1Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); Fields in2Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); addSchemaFields(parameters, in1Fields); addSchemaFields(parameters, in2Fields); parameters.addInputPipe(pipe1); // first input to unionAll component parameters.addInputPipe(pipe2); // second input to unionAll component parameters.addInputFields(in1Fields); // list parameters.addInputFields(in2Fields); // fields // on // input // of // unionAll // component parameters.addOutputFields(new Fields("col1", "col2", "col3")); // list // of // fields // on // output // of // unionAll // component // parameters.s("testunionAll"); //set the name of the component UnionAllAssembly unionAll = new UnionAllAssembly(unionAllEntity, parameters); // create // a // dummy // component // to // be // tested Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3"), unionAll); // create // bucket // for // the // unionAll // sub // assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(4)); // assertThat(actual.get(0), is(new Tuple("C1R1", "C2R1", "C3R1"))); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple(1, "C2R1", "C3R1")); expectedOutput.add(new Tuple(2, "C2R2", "C3R2")); expectedOutput.add(new Tuple(3, "C2R3", "C3R3")); expectedOutput.add(new Tuple(4, "C2R4", "C3R4")); Assert.assertEquals(expectedOutput, output); } private void addSchemaFields(ComponentParameters parameters, Fields fields) { Type[] types = fields.getTypes(); Set<SchemaField> schemaFields = new LinkedHashSet<>(); for (int i = 0; i < fields.size(); i++) { SchemaField sc = new SchemaField(fields.get(i).toString(), types[i].toString()); schemaFields.add(sc); } parameters.addSchemaFields(schemaFields); } /** * Test unionAll component working with four inputs */ @Test public void TestUnionAllComponentWithFourInputs() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple(1, "C2R1", "C3R1") .addTuple(2, "C2R2", "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // unionAll // component Data file2 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple(3, "C2R3", "C3R3") .addTuple(4, "C2R4", "C3R4").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); // pipe corresponding // to an input of // unionAll // component Data file3 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple(5, "C2R5", "C3R5") .addTuple(6, "C2R6", "C3R6").build(); Pipe pipe3 = plunger.newNamedPipe("pipe3", file3); // pipe corresponding // to an input of // unionAll // component Data file4 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple(7, "C2R7", "C3R7") .addTuple(8, "C2R8", "C3R8").build(); Pipe pipe4 = plunger.newNamedPipe("pipe4", file4); // pipe corresponding // to an input of // unionAll // component // Map<String, String> test = new HashMap<String, String>(); // test.put("col1", "out"); UnionAllEntity unionAllEntity = new UnionAllEntity(); unionAllEntity.setComponentId("1"); unionAllEntity.setOutSocket(new OutSocket("abcc")); Fields in1Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); Fields in2Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); Fields in3Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); Fields in4Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); ComponentParameters parameters = new ComponentParameters(); addSchemaFields(parameters, in1Fields); addSchemaFields(parameters, in2Fields); addSchemaFields(parameters, in3Fields); addSchemaFields(parameters, in4Fields); parameters.addInputPipe(pipe1); // first input to unionAll component parameters.addInputPipe(pipe2); // second input to unionAll component parameters.addInputPipe(pipe3); // third input to unionAll component parameters.addInputPipe(pipe4); // fourth input to unionAll component parameters.addInputFields(in1Fields); // list parameters.addInputFields(in2Fields); // of parameters.addInputFields(in3Fields); // fields parameters.addInputFields(in4Fields); // on // input // of // unionAll // component parameters.addOutputFields(new Fields("col1", "col2", "col3")); // list // of // fields // on // output // of // unionAll // component // parameters.setComponentName("testunionAll"); //set the name of the // component UnionAllAssembly unionAllAssembly = new UnionAllAssembly(unionAllEntity, parameters); // create // a // dummy // component // to // be // tested Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3"), unionAllAssembly); // create // bucket // for // the // unionAll // sub // assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(8)); // assertThat(actual.get(0), is(new Tuple("C1R1", "C2R1", "C3R1"))); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple(1, "C2R1", "C3R1")); expectedOutput.add(new Tuple(2, "C2R2", "C3R2")); expectedOutput.add(new Tuple(3, "C2R3", "C3R3")); expectedOutput.add(new Tuple(4, "C2R4", "C3R4")); expectedOutput.add(new Tuple(5, "C2R5", "C3R5")); expectedOutput.add(new Tuple(6, "C2R6", "C3R6")); expectedOutput.add(new Tuple(7, "C2R7", "C3R7")); expectedOutput.add(new Tuple(8, "C2R8", "C3R8")); Assert.assertEquals(expectedOutput, output); } /** * Test unionAll component with different order of fields in input. The * unionAll component should re-align the fields on all the inputs to match * the first input. The first input is on port 0 */ @Test public void TestReAligningInputFieldsInUnionAllComponent() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("col1", "col3", "col2")).addTuple("C1R1", "C3R1", "C2R1") .addTuple("C1R2", "C3R2", "C2R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // unionAll // component Data file2 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple("C1R3", "C2R3", "C3R3") .addTuple("C1R4", "C2R4", "C3R4").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); // pipe corresponding // to an input of // unionAll // component UnionAllEntity unionAllEntity = new UnionAllEntity(); unionAllEntity.setComponentId("unionAll"); unionAllEntity.setOutSocket(new OutSocket("out1")); ComponentParameters parameters = new ComponentParameters(); Fields in1Fields=new Fields("col1", "col3", "col2").applyTypes(Integer.class, String.class, String.class); Fields in2Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); addSchemaFields(parameters, in1Fields); addSchemaFields(parameters, in2Fields); parameters.addInputPipe(pipe1); // first input to unionAll component parameters.addInputPipe(pipe2); // second input to unionAll component parameters.addInputFields(new Fields("col1", "col3", "col2")); // list parameters.addInputFields(new Fields("col1", "col2", "col3")); // of // fields // on // input // of // unionAll // component parameters.addOutputFields(new Fields("col1", "col2", "col3")); // list // of // fields // on // output // of // unionAll // component // parameters.setComponentName("testunionAll"); //set the name of the // component UnionAllAssembly unionAllAssembly = new UnionAllAssembly(unionAllEntity, parameters); // create // a // dummy // component // to // be // tested Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3"), unionAllAssembly); // create // bucket // for // the // unionAll // sub // assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(4)); } @Test(expected=SchemaMismatchException.class) public void itShouldThrowExceptionIfInSchemasAreMismatch() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder( new Fields("col1", "col2", "col3","col4")) .addTuple(1, "C2R1", "C3R1","C4R1").addTuple(2, "C2R2", "C3R2","C4R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // unionAll // component Data file2 = new DataBuilder( new Fields("col1", "col2", "col3")) .addTuple(3, "C2R3", "C3R3").addTuple(4, "C2R4", "C3R4").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); // pipe corresponding // to an input of // unionAll // component UnionAllEntity unionAllEntity = new UnionAllEntity(); unionAllEntity.setComponentId("unionAll"); unionAllEntity.setOutSocket(new OutSocket("out1")); ComponentParameters parameters = new ComponentParameters(); //create fields of inSockets with different no of fields Fields in1Fields=new Fields("col1", "col2", "col3","col4").applyTypes(Integer.class, String.class, String.class,String.class); Fields in2Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); addSchemaFields(parameters, in2Fields); addSchemaFields(parameters, in1Fields); parameters.addInputPipe(pipe1); // first input to unionAll component parameters.addInputPipe(pipe2); // second input to unionAll component parameters.addInputFields(in1Fields); // list parameters.addInputFields(in2Fields); // fields // on // input // of // unionAll // component parameters.addOutputFields(new Fields("col1", "col2", "col3")); // list // of // fields // on // output // of // unionAll // component // parameters.s("testunionAll"); //set the name of the component UnionAllAssembly unionAll = new UnionAllAssembly(unionAllEntity, parameters);// create // a // dummy // component // to // be // tested } @Test(expected=SchemaMismatchException.class) public void itShouldThrowExceptionIfInSchemasFieldsHaveDifferentDataTypes() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder( new Fields("col1", "col2", "col3")) .addTuple(1, "C2R1", "C3R1").addTuple(2, "C2R2", "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // unionAll // component Data file2 = new DataBuilder( new Fields("col1", "col2", "col3")) .addTuple(3, "C2R3", "C3R3").addTuple(4, "C2R4", "C3R4").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); // pipe corresponding // to an input of // unionAll // component UnionAllEntity unionAllEntity = new UnionAllEntity(); unionAllEntity.setComponentId("unionAll"); unionAllEntity.setOutSocket(new OutSocket("out1")); ComponentParameters parameters = new ComponentParameters(); //create fields for inSockets that have different data types Fields in1Fields=new Fields("col1", "col2", "col3").applyTypes(Integer.class, String.class, String.class); Fields in2Fields=new Fields("col1", "col2", "col3").applyTypes(String.class, String.class, String.class); addSchemaFields(parameters, in2Fields); addSchemaFields(parameters, in1Fields); parameters.addInputPipe(pipe1); // first input to unionAll component parameters.addInputPipe(pipe2); // second input to unionAll component parameters.addInputFields(in1Fields); // list parameters.addInputFields(in2Fields); // fields // on // input // of // unionAll // component parameters.addOutputFields(new Fields("col1", "col2", "col3")); // list // of // fields // on // output // of // unionAll // component // parameters.s("testunionAll"); //set the name of the component UnionAllAssembly unionAll = new UnionAllAssembly(unionAllEntity, parameters);// create // a // dummy // component // to // be // tested } @Test(expected=SchemaMismatchException.class) public void itShouldThrowExceptionIfInSchemasFieldsAreNotIdentical() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder( new Fields("col11", "col2", "col3")) .addTuple(1, "C2R1", "C3R1").addTuple(2, "C2R2", "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // unionAll // component Data file2 = new DataBuilder( new Fields("col1", "col2", "col3")) .addTuple(3, "C2R3", "C3R3").addTuple(4, "C2R4", "C3R4").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); // pipe corresponding // to an input of // unionAll // component UnionAllEntity unionAllEntity = new UnionAllEntity(); unionAllEntity.setComponentId("unionAll"); unionAllEntity.setOutSocket(new OutSocket("out1")); ComponentParameters parameters = new ComponentParameters(); //create fields for inSockets that are not identical Fields in1Fields=new Fields("col11", "col2", "col3").applyTypes(Integer.class, String.class, String.class); Fields in2Fields=new Fields("col1", "col2", "col3").applyTypes(String.class, String.class, String.class); addSchemaFields(parameters, in2Fields); addSchemaFields(parameters, in1Fields); parameters.addInputPipe(pipe1); // first input to unionAll component parameters.addInputPipe(pipe2); // second input to unionAll component parameters.addInputFields(in1Fields); // list parameters.addInputFields(in2Fields); // fields // on // input // of // unionAll // component parameters.addOutputFields(new Fields("col11", "col2", "col3")); // list // of // fields // on // output // of // unionAll // component // parameters.s("testunionAll"); //set the name of the component UnionAllAssembly unionAll = new UnionAllAssembly(unionAllEntity, parameters);// create // a // dummy // component // to // be // tested } }