/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.assembly; import cascading.pipe.Pipe; import cascading.tuple.Fields; import cascading.tuple.Tuple; import com.hotels.plunger.Bucket; import com.hotels.plunger.Data; import com.hotels.plunger.DataBuilder; import com.hotels.plunger.Plunger; import hydrograph.engine.cascading.assembly.CumulateAssembly; import hydrograph.engine.cascading.assembly.infra.ComponentParameters; import hydrograph.engine.core.component.entity.CumulateEntity; import hydrograph.engine.core.component.entity.elements.*; import org.junit.Before; import org.junit.Test; import java.util.ArrayList; import java.util.List; import java.util.Properties; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; public class CumulateAssemblyTest { @Before public void setup() { // TODO: setup related things go here } /** * Test aggregate component's with simple count operation */ @Test public void CumulateCountOfResults() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("col1", "col2", "col3", "col4")) .addTuple("C1R1", "C2R1", "C3Rx", "C4R1").addTuple("C1R1", "C2R2", "C3Rx", "C4R2") .addTuple("C1R1", "C2R3", "C3Rx", "C4R3").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); CumulateEntity cumulateEntity = new CumulateEntity(); cumulateEntity.setComponentId("CumulateTest"); KeyField keyField = new KeyField(); keyField.setName("col1"); keyField.setSortOrder("asc"); cumulateEntity.setKeyFields(new KeyField[] { keyField }); ArrayList<Operation> operationList = new ArrayList<Operation>(); Operation operation = new Operation(); operation.setOperationId("operation1"); operation.setOperationInputFields(new String[] { "col2" }); operation.setOperationOutputFields(new String[] { "count" }); operation.setOperationClass("hydrograph.engine.transformation.userfunctions.cumulate.Count"); operation.setOperationProperties(new Properties()); operationList.add(operation); cumulateEntity.setOperationsList(operationList); cumulateEntity.setNumOperations(1); cumulateEntity.setOperationPresent(true); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("col4", "col4_new", "in0")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("col3", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set Operation Field List<OperationField> operationFieldsList = new ArrayList<>(); OperationField operationField = new OperationField("count", "operation1"); operationFieldsList.add(operationField); outSocket1.setOperationFieldList(operationFieldsList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); cumulateEntity.setOutSocketList(outSocketList); ComponentParameters parameters = new ComponentParameters(); parameters.addInputPipe(pipe1); parameters.addInputFields(new Fields("col1", "col2", "col3")); CumulateAssembly aggr = new CumulateAssembly(cumulateEntity, parameters); // create bucket for the aggr sub assembly Bucket bucket = plunger.newBucket(new Fields("count", "col4_new", "col3"), aggr); List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(3)); assertThat(actual.get(0), is(new Tuple(Long.valueOf("1"), "C4R1", "C3Rx"))); assertThat(actual.get(1), is(new Tuple(Long.valueOf("2"), "C4R2", "C3Rx"))); assertThat(actual.get(2), is(new Tuple(Long.valueOf("3"), "C4R3", "C3Rx"))); } /** * Test aggregate component's with simple count operation and map fields */ @Test public void itShouldCumulateAndDoCountAndMapFields() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple("C1R1", "C2R1", "C3Rx") .addTuple("C1R1", "C2R2", "C3Rx").addTuple("C1R1", "C2R3", "C3Rx").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); CumulateEntity cumulateEntity = new CumulateEntity(); cumulateEntity.setComponentId("CumulateTest"); KeyField keyField = new KeyField(); keyField.setName("col1"); keyField.setSortOrder("asc"); cumulateEntity.setKeyFields(new KeyField[] { keyField }); ArrayList<Operation> operationList = new ArrayList<Operation>(); Operation operation = new Operation(); operation.setOperationId("operationName1"); operation.setOperationInputFields(new String[] { "col2" }); operation.setOperationOutputFields(new String[] { "count" }); operation.setOperationClass("hydrograph.engine.transformation.userfunctions.cumulate.Count"); operation.setOperationProperties(new Properties()); operationList.add(operation); cumulateEntity.setOperationsList(operationList); cumulateEntity.setNumOperations(1); cumulateEntity.setOperationPresent(true); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("col3", "col3_new", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set Operation Field List<OperationField> operationFieldList = new ArrayList<>(); OperationField operationField = new OperationField("count", "operationName1"); operationFieldList.add(operationField); outSocket1.setOperationFieldList(operationFieldList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); cumulateEntity.setOutSocketList(outSocketList); ComponentParameters parameters = new ComponentParameters(); parameters.addInputPipe(pipe1); parameters.addInputFields(new Fields("col1", "col2", "col3")); CumulateAssembly aggr = new CumulateAssembly(cumulateEntity, parameters); // create bucket for the aggr sub assembly Bucket bucket = plunger.newBucket(new Fields("count", "col3_new"), aggr); List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(3)); assertThat(actual.get(0), is(new Tuple(Long.valueOf("1"), "C3Rx"))); assertThat(actual.get(1), is(new Tuple(Long.valueOf("2"), "C3Rx"))); assertThat(actual.get(2), is(new Tuple(Long.valueOf("3"), "C3Rx"))); } // @Test(expected=cascading.tuple.TupleException.class) @Test public void itShouldCumulateAndDoCountAndMapFieldsWithWildCardPassthroughFields() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple("C1R1", "C2R1", "C3Rx") .addTuple("C1R1", "C2R2", "C3Rx").addTuple("C1R1", "C2R3", "C3Rx").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); CumulateEntity cumulateEntity = new CumulateEntity(); cumulateEntity.setComponentId("CumulateTest"); KeyField keyField = new KeyField(); keyField.setName("col1"); keyField.setSortOrder("asc"); cumulateEntity.setKeyFields(new KeyField[] { keyField }); ArrayList<Operation> operationList = new ArrayList<Operation>(); Operation operation = new Operation(); operation.setOperationId("operationName1"); operation.setOperationInputFields(new String[] { "col2" }); operation.setOperationOutputFields(new String[] { "count" }); operation.setOperationClass("hydrograph.engine.transformation.userfunctions.cumulate.Count"); operation.setOperationProperties(new Properties()); operationList.add(operation); cumulateEntity.setOperationsList(operationList); cumulateEntity.setNumOperations(1); cumulateEntity.setOperationPresent(true); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("col3", "col3_new", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("*", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set Operation Field List<OperationField> operationFieldList = new ArrayList<>(); OperationField operationField = new OperationField("count", "operationName1"); operationFieldList.add(operationField); outSocket1.setOperationFieldList(operationFieldList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); cumulateEntity.setOutSocketList(outSocketList); ComponentParameters parameters = new ComponentParameters(); parameters.addInputPipe(pipe1); parameters.addInputFields(new Fields("col1", "col2", "col3")); CumulateAssembly aggr = new CumulateAssembly(cumulateEntity, parameters); // create bucket for the aggr sub assembly Bucket bucket = plunger.newBucket(new Fields("count", "col3_new", "col1", "col2", "col3"), aggr); List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(3)); assertThat(actual.get(0), is(new Tuple(Long.valueOf("1"), "C3Rx", "C1R1", "C2R1", "C3Rx"))); } @Test public void itShouldCumulateAndCountWithWildCardPassthroughFieldsWithPriority() { Plunger plunger = new Plunger(); Data file1 = new DataBuilder(new Fields("col1", "col2", "col3","count")).addTuple("C1R1", "C2R1", "C3Rx",1) .addTuple("C1R1", "C2R2", "C3Rx",1).addTuple("C1R1", "C2R3", "C3Rx",1).build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); CumulateEntity cumulateEntity = new CumulateEntity(); cumulateEntity.setComponentId("CumulateTest"); KeyField keyField = new KeyField(); keyField.setName("col1"); keyField.setSortOrder("asc"); cumulateEntity.setKeyFields(new KeyField[] { keyField }); ArrayList<Operation> operationList = new ArrayList<Operation>(); Operation operation = new Operation(); operation.setOperationId("operationName1"); operation.setOperationInputFields(new String[] { "col2" }); operation.setOperationOutputFields(new String[] { "count" }); operation.setOperationClass("hydrograph.engine.transformation.userfunctions.cumulate.Count"); operation.setOperationProperties(new Properties()); operationList.add(operation); cumulateEntity.setOperationsList(operationList); cumulateEntity.setNumOperations(1); cumulateEntity.setOperationPresent(true); // create outSocket OutSocket outSocket1 = new OutSocket("out0"); // set map fields List<MapField> mapFieldsList = new ArrayList<>(); mapFieldsList.add(new MapField("col3", "col3_new", "in")); outSocket1.setMapFieldsList(mapFieldsList); // set pass through fields List<PassThroughField> passThroughFieldsList1 = new ArrayList<>(); passThroughFieldsList1.add(new PassThroughField("*", "in")); outSocket1.setPassThroughFieldsList(passThroughFieldsList1); // set Operation Field List<OperationField> operationFieldList = new ArrayList<>(); OperationField operationField = new OperationField("count", "operationName1"); operationFieldList.add(operationField); outSocket1.setOperationFieldList(operationFieldList); // add outSocket in list List<OutSocket> outSocketList = new ArrayList<>(); outSocketList.add(outSocket1); cumulateEntity.setOutSocketList(outSocketList); ComponentParameters parameters = new ComponentParameters(); parameters.addInputPipe(pipe1); parameters.addInputFields(new Fields("col1", "col2", "col3","count")); CumulateAssembly aggr = new CumulateAssembly(cumulateEntity, parameters); // create bucket for the aggr sub assembly Bucket bucket = plunger.newBucket(new Fields("count", "col3_new", "col1", "col2", "col3"), aggr); List<Tuple> actual = bucket.result().asTupleList(); // get results from // bucket // assert the actual results with expected results assertThat(actual.size(), is(3)); assertThat(actual.get(0), is(new Tuple(Long.valueOf("1"), "C3Rx", "C1R1", "C2R1", "C3Rx"))); assertThat(actual.get(2), is(new Tuple(Long.valueOf("3"), "C3Rx", "C1R1", "C2R3", "C3Rx"))); } }