/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.assembly; import cascading.pipe.Pipe; import cascading.tuple.Fields; import cascading.tuple.Tuple; import com.hotels.plunger.Bucket; import com.hotels.plunger.Data; import com.hotels.plunger.DataBuilder; import com.hotels.plunger.Plunger; import hydrograph.engine.cascading.assembly.NormalizeAssembly; import hydrograph.engine.cascading.assembly.infra.ComponentParameters; import hydrograph.engine.core.component.entity.NormalizeEntity; import hydrograph.engine.core.component.entity.elements.*; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.util.*; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; /** * Test normalize sub assembly. The tests are written using plunger framework * * @author Prabodh */ public class NormalizeAssemblyTest { @Before public void setup() { // TODO: add setup related code here } /** * A simple unit test to test working of normalize component. The unit test * passes denormalized (vector) records to component and checks for * normalized output. * * @throws IOException */ @Test public void TestNormalizeRecords() throws IOException { Plunger plunger = new Plunger(); Fields fields = new Fields("id", "name", "marks1", "marks2", "marks3"); Data file1 = new DataBuilder(fields).addTuple(1, "John", 94, 56, 84).addTuple(2, "Mary", 54, 89, 74).build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // normalize // component ArrayList<Fields>fieldList = new ArrayList<Fields>(); fieldList.add(fields); ComponentParameters parameters = new ComponentParameters(); parameters.setInputFieldsList(fieldList); parameters.addInputPipe(pipe1); NormalizeEntity normalizeEnitity = new NormalizeEntity(); normalizeEnitity.setComponentId("normalizeTest"); ArrayList<Operation> operationList = new ArrayList<Operation>(); Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationInputFields(new String[] { "marks1", "marks2", "marks3" }); operation.setOperationOutputFields(new String[] { "marks" }); Properties props = new Properties(); props.setProperty("VectorSize", "3"); operation.setOperationProperties(props); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.NormalizeCustomTransform"); operationList.add(operation); normalizeEnitity.setOperationsList(operationList); normalizeEnitity.setNumOperations(1); normalizeEnitity.setOperationPresent(true); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("name", "name_new", "in0")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in0")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set Operation Field List<OperationField> operationFieldsList = new ArrayList<>(); OperationField operationField = new OperationField("marks", "operation1"); operationFieldsList.add(operationField); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); normalizeEnitity.setOutSocketList(outSocketList); NormalizeAssembly normalize = new NormalizeAssembly(normalizeEnitity, parameters); Bucket bucket = plunger.newBucket(new Fields("marks", "id", "name_new"), normalize); // create // bucket // for // the // normalize // sub // assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(6)); // Use HashSet so that order of fields does not matter while comparison Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple(94, "John", 1)); expectedOutput.add(new Tuple(56, "John", 1)); expectedOutput.add(new Tuple(84, "John", 1)); expectedOutput.add(new Tuple(54, "Mary", 2)); expectedOutput.add(new Tuple(89, "Mary", 2)); expectedOutput.add(new Tuple(74, "Mary", 2)); Assert.assertEquals(expectedOutput, output); } @Test public void TestNormalizeForReuseableRowChanges() throws IOException { Plunger plunger = new Plunger(); Fields fields = new Fields("id", "foo", "s1", "s2", "s3"); Data file1 = new DataBuilder(fields).addTuple(1, 1, "A", "B", "C").addTuple(2, 0, "A", "B", "C") .addTuple(3, 0, "A", "B", "C").addTuple(4, 1, "A", "B", "C").addTuple(5, 1, "A", "B", "C") .addTuple(6, 0, "A", "B", "C").addTuple(7, 1, "A", "B", "C").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // normalize // component ArrayList<Fields>fieldList = new ArrayList<Fields>(); fieldList.add(fields); ComponentParameters parameters = new ComponentParameters(); parameters.setInputFieldsList(fieldList); parameters.addInputPipe(pipe1); NormalizeEntity normalizeEnitity = new NormalizeEntity(); normalizeEnitity.setComponentId("normalizeTest"); ArrayList<Operation> operationList = new ArrayList<Operation>(); Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationInputFields(new String[] { "foo", "s1", "s2", "s3" }); operation.setOperationOutputFields(new String[] { "string" }); Properties props = new Properties(); props.setProperty("VectorSize", "3"); operation.setOperationProperties(props); operation.setOperationClass("hydrograph.engine.cascading.test.customtransformclasses.NormalizeTransform"); operationList.add(operation); normalizeEnitity.setOperationsList(operationList); normalizeEnitity.setNumOperations(1); normalizeEnitity.setOperationPresent(true); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("foo", "foo_new", "in0")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("id", "in0")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set Operation Field List<OperationField> operationFieldsList = new ArrayList<>(); OperationField operationField = new OperationField("string", "operation1"); operationFieldsList.add(operationField); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); normalizeEnitity.setOutSocketList(outSocketList); NormalizeAssembly normalize = new NormalizeAssembly(normalizeEnitity, parameters); Bucket bucket = plunger.newBucket(new Fields("id", "foo_new", "string"), normalize); // create // bucket // for // the // normalize // sub // assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket assertThat(actual.size(), is(12)); Set<Tuple> output = new HashSet<Tuple>(actual); Set<Tuple> expectedOutput = new HashSet<Tuple>(); expectedOutput.add(new Tuple("A", 1, 1)); expectedOutput.add(new Tuple("B", 1, 1)); expectedOutput.add(new Tuple("C", 1, 1)); expectedOutput.add(new Tuple("A", 1, 4)); expectedOutput.add(new Tuple("B", 1, 4)); expectedOutput.add(new Tuple("C", 1, 4)); expectedOutput.add(new Tuple("A", 1, 5)); expectedOutput.add(new Tuple("B", 1, 5)); expectedOutput.add(new Tuple("C", 1, 5)); expectedOutput.add(new Tuple("A", 1, 7)); expectedOutput.add(new Tuple("B", 1, 7)); expectedOutput.add(new Tuple("C", 1, 7)); Assert.assertEquals(expectedOutput, output); } @Test public void TestNormalizeRecordsWithWildCardPassthroughFields() throws IOException { Plunger plunger = new Plunger(); Fields fields = new Fields("id", "names", "acc_no", "city"); Data file1 = new DataBuilder(fields).addTuple(1, "John and Smith", 1001, "AAA") .addTuple(2, "Mary and Bose", 1154, "BBB").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); // pipe corresponding // to an input of // normalize // component ComponentParameters parameters = new ComponentParameters(); parameters.addInputPipe(pipe1); parameters.addInputFields(new Fields("id", "names", "acc_no", "city")); NormalizeEntity normalizeEnitity = new NormalizeEntity(); normalizeEnitity.setComponentId("normalizeTest"); ArrayList<Operation> operationList = new ArrayList<Operation>(); Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationInputFields(new String[] { "names" }); operation.setOperationOutputFields(new String[] { "name" }); Properties props = new Properties(); props.setProperty("regex", " and "); operation.setOperationProperties(props); operation.setOperationClass("hydrograph.engine.transformation.userfunctions.normalize.RegexSplitNormalize"); operationList.add(operation); operation.setOperationOutputFields(new String[] { "name" }); normalizeEnitity.setOperationsList(operationList); normalizeEnitity.setNumOperations(1); normalizeEnitity.setOperationPresent(true); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("acc_no", "new_acc", "in0")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("*", "in0")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set Operation Field List<OperationField> operationFieldsList = new ArrayList<>(); OperationField operationField = new OperationField("name", "operation1"); operationFieldsList.add(operationField); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); normalizeEnitity.setOutSocketList(outSocketList); NormalizeAssembly normalize = new NormalizeAssembly(normalizeEnitity, parameters); Bucket bucket = plunger.newBucket(new Fields("name", "new_acc", "id", "names", "acc_no", "city"), normalize); // create // bucket // for the // normalize // sub // assembly List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(4)); assertTrue(actual.get(0).equals(new Tuple("John", 1001, 1, "John and Smith", 1001, "AAA"))); assertTrue(actual.get(1).equals(new Tuple("Smith", 1001, 1, "John and Smith", 1001, "AAA"))); assertTrue(actual.get(2).equals(new Tuple("Mary", 1154, 2, "Mary and Bose", 1154, "BBB"))); assertTrue(actual.get(3).equals(new Tuple("Bose", 1154, 2, "Mary and Bose", 1154, "BBB"))); } @After public void cleanup() { // TODO: add cleanup related code here } }