/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.assembly; import cascading.pipe.Pipe; import cascading.tuple.Fields; import cascading.tuple.Tuple; import com.hotels.plunger.Bucket; import com.hotels.plunger.Data; import com.hotels.plunger.DataBuilder; import com.hotels.plunger.Plunger; import hydrograph.engine.cascading.assembly.TransformAssembly; import hydrograph.engine.cascading.assembly.infra.ComponentParameters; import hydrograph.engine.core.component.entity.TransformEntity; import hydrograph.engine.core.component.entity.elements.*; import org.junit.Assert; import org.junit.Test; import java.util.*; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; /** * Unit test class for transform sub assembly. The tests are written using * plunger framework * * @author Prabodh * */ public class TransformAssemblyTest { /* * Tests to write: 2. Wild card mapping 3. Multiple operations 4. Override * field assignment, field in operation output as well as pass through 6. * Map fields with similar field names in operation output fields 7. Map * fields with similar field names in pass through fields */ /** * Unit test with simple transform operation */ @Test public void simpleTransformWithOneOperationTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.SimpleTransformTest"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name_trimmed" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); transformEntity.setOperation(operation); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); passThroughFieldsList1.add(new PassThroughField("name", "in")); passThroughFieldsList1.add(new PassThroughField("city", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name_trimmed", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name_trimmed", "id", "name", "city"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", 1, " John ", "Chicago")); expectedOutput.add(new Tuple("Mary", 2, "Mary ", "Richmond")); Assert.assertEquals(expectedOutput, output); } /** * Unit test without any transform operation, to mimic drop fields * functionality */ @Test public void noOperationWithMapFieldsTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, "John", "Chicago") .addTuple(2, "Mary", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("city", "new_city", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); passThroughFieldsList1.add(new PassThroughField("name", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(false); transformEntity.setNumOperations(0); transformEntity.setComponentId("testTransform"); // set the name of the // component TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create a dummy component to be tested Bucket bucket = plunger.newBucket(new Fields("new_city", "id", "name"), transform); // create bucket for the transform sub assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("Chicago", 1, "John")); expectedOutput.add(new Tuple("Richmond", 2, "Mary")); Assert.assertEquals(expectedOutput, output); } /** * Unit test for testing renaming of fields in transform component */ @Test public void transformComponentWithWildCardPassthroughFields() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.SimpleTransformTest"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name_trimmed" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); transformEntity.setOperation(operation); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("*", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name_trimmed", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name_trimmed", "id", "name", "city"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", 1, " John ", "Chicago")); expectedOutput.add(new Tuple("Mary", 2, "Mary ", "Richmond")); Assert.assertEquals(expectedOutput, output); } @Test public void renameFieldsTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, "John", "Chicago") .addTuple(2, "Mary", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass( "hydrograph.engine.cascading.test.customtransformclasses.TransformTest_RenameFields"); operation.setOperationInputFields(new String[] { "id", "name", "city" }); operation.setOperationOutputFields(new String[] { "new.id", "new.name", "new.city" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("new.id", "operation1")); operationFieldsList.add(new OperationField("new.name", "operation1")); operationFieldsList.add(new OperationField("new.city", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); // set the name of the component transformEntity.setComponentId("testTransform"); // create a dummy component to be tested TransformAssembly transform = new TransformAssembly(transformEntity, parameters); Bucket bucket = plunger.newBucket(new Fields("new.id", "new.name", "new.city"), transform); // create // bucket // for // the // transform // sub // assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple(1, "John", "Chicago")); expectedOutput.add(new Tuple(2, "Mary", "Richmond")); Assert.assertEquals(expectedOutput, output); } /** * Unit test for testing user properties in transform component */ @Test public void userPropertiesTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, "John", "Chicago") .addTuple(2, "Mary", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass( "hydrograph.engine.cascading.test.customtransformclasses.TransformTest_UserProperties"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name", "load_id" }); Properties userProps = new Properties(); userProps.put("LOAD_ID", 1); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); passThroughFieldsList1.add(new PassThroughField("city", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name", "operation1")); operationFieldsList.add(new OperationField("load_id", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); // set the name of the component transformEntity.setComponentId("testTransform"); // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); Bucket bucket = plunger.newBucket(new Fields("name", "load_id", "id", "city"), transform); List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", 1, 1, "Chicago")); expectedOutput.add(new Tuple("Mary", 1, 2, "Richmond")); Assert.assertEquals(expectedOutput, output); } /** * Unit test with multiple transform operations */ @Test public void simpleTransformWithMultipleOperationTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation1 = new Operation(); operation1.setOperationId("operation1"); operation1.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.SimpleTransformTest"); operation1.setOperationInputFields(new String[] { "name" }); operation1.setOperationOutputFields(new String[] { "name_trimmed" }); operation1.setOperationProperties(userProps); // create another operation object and set the properties of operation Operation operation2 = new Operation(); operation2.setOperationId("operation2"); operation2.setOperationClass( "hydrograph.engine.cascading.test.customtransformclasses.TransformTest_RenameFields"); operation2.setOperationInputFields(new String[] { "id", "name", "city" }); operation2.setOperationOutputFields(new String[] { "new.id", "new.name", "new.city" }); userProps.put("LOAD_ID", 1); operation2.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation1); operationList.add(operation2); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name_trimmed", "operation1")); operationFieldsList.add(new OperationField("new.id", "operation2")); operationFieldsList.add(new OperationField("new.name", "operation2")); operationFieldsList.add(new OperationField("new.city", "operation2")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); // set the name of the component transformEntity.setComponentId("testTransform"); // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(2); // set the name of the component transformEntity.setComponentId("testTransform"); // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(2); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name_trimmed", "new.id", "new.name", "new.city", "id"), transform); List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", 1, " John ", "Chicago", 1)); expectedOutput.add(new Tuple("Mary", 2, "Mary ", "Richmond", 2)); Assert.assertEquals(expectedOutput, output); } /** * Unit test for testing overriding fields in custom transform operation * when the field is also specified in pass through */ @Test public void overrideFieldsInCustomOperationtest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding to an input of transform component ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass( "hydrograph.engine.cascading.test.customtransformclasses.TransformTest_UserProperties"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name", "load_id" }); userProps.put("LOAD_ID", 1); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); passThroughFieldsList1.add(new PassThroughField("name", "in")); passThroughFieldsList1.add(new PassThroughField("city", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name", "operation1")); operationFieldsList.add(new OperationField("load_id", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); // set the name of the component transformEntity.setComponentId("testTransform"); // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); // create a dummy component to be tested TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name", "load_id", "id", "city"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", 1, 1, "Chicago")); expectedOutput.add(new Tuple("Mary", 1, 2, "Richmond")); Assert.assertEquals(expectedOutput, output); } /** * Unit test with simple transform operation and a map field */ @Test public void simpleTransformWithOneOperationAndMapFieldsTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.SimpleTransformTest"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name_trimmed" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("city", "new_city", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); passThroughFieldsList1.add(new PassThroughField("name", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name_trimmed", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name_trimmed", "new_city", "id", "name"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", "Chicago", 1, " John ")); expectedOutput.add(new Tuple("Mary", "Richmond", 2, "Mary ")); Assert.assertEquals(expectedOutput, output); } /** * Unit test with simple transform operation and multiple map fields */ @Test public void simpleTransformWithOneOperationAndMulitpleMapFieldsTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.SimpleTransformTest"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name_trimmed" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("id", "new_id", "in")); mapFieldsList.add(new MapField("city", "new_city", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("name", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name_trimmed", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name_trimmed", "new_id", "new_city", "name"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", 1, "Chicago", " John ")); expectedOutput.add(new Tuple("Mary", 2, "Richmond", "Mary ")); Assert.assertEquals(expectedOutput, output); } /** * Unit test with same field name in map fields and pass through fields */ @Test public void sameFieldNameInMapFieldsAndPassThroughFieldsTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.SimpleTransformTest"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name_trimmed" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("city", "new_city", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); passThroughFieldsList1.add(new PassThroughField("city", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name_trimmed", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name_trimmed", "new_city", "id", "city"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", "Chicago", 1, "Chicago")); expectedOutput.add(new Tuple("Mary", "Richmond", 2, "Richmond")); Assert.assertEquals(expectedOutput, output); } /** * Unit test with same field name in map fields and pass through fields */ @Test public void sameFieldNameInMapFieldTargetAndOperationInputFieldsTest() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city", "old_city")) .addTuple(1, "John", "Chicago", "Wheeling").addTuple(2, "Mary", "Richmond", "Henrico").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city", "old_city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.TransformTest_RenameCity"); operation.setOperationInputFields(new String[] { "city" }); operation.setOperationOutputFields(new String[] { "other_city" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("old_city", "city", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in")); passThroughFieldsList1.add(new PassThroughField("name", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("other_city", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("other_city", "city", "id", "name"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("Chicago", "Wheeling", 1, "John")); expectedOutput.add(new Tuple("Richmond", "Henrico", 2, "Mary")); Assert.assertEquals(expectedOutput, output); } @Test public void transformComponentWithWildCardPassthroughFieldsWithPriority() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("id", "name", "city")).addTuple(1, " John ", "Chicago") .addTuple(2, "Mary ", "Richmond").build(); // pipe corresponding to an input of transform component Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); ComponentParameters parameters = new ComponentParameters(); Properties userProps = new Properties(); parameters.addInputPipe(pipe1); // first input to transform component parameters.addInputFields(new Fields("id", "name", "city")); TransformEntity transformEntity = new TransformEntity(); // create the operation object and set the properties of operation Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationClass( "hydrograph.engine.cascading.test.customtransformclasses.TransformWithSameInputOutputField"); operation.setOperationInputFields(new String[] { "name" }); operation.setOperationOutputFields(new String[] { "name" }); operation.setOperationProperties(userProps); List<Operation> operationList = new ArrayList<>(); operationList.add(operation); transformEntity.setOperationsList(operationList); transformEntity.setOperation(operation); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("*", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set operation field List<OperationField> operationFieldsList = new ArrayList<>(); operationFieldsList.add(new OperationField("name", "operation1")); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); transformEntity.setOutSocketList(outSocketList); transformEntity.setComponentId("testTransform"); // set the name of the // component // set operation present and number of operations in the entity transformEntity.setOperationPresent(true); transformEntity.setNumOperations(1); TransformAssembly transform = new TransformAssembly(transformEntity, parameters); // create bucket for the transform sub assembly Bucket bucket = plunger.newBucket(new Fields("name", "id", "city"), transform); // get results from bucket List<Tuple> actual = bucket.result().asTupleList(); // assert the actual results with expected results assertThat(actual.size(), is(2)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("John", 1, "Chicago")); expectedOutput.add(new Tuple("Mary", 2, "Richmond")); Assert.assertEquals(expectedOutput, output); } }