/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.assembly;
import cascading.pipe.Pipe;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import com.hotels.plunger.Bucket;
import com.hotels.plunger.Data;
import com.hotels.plunger.DataBuilder;
import com.hotels.plunger.Plunger;
import hydrograph.engine.cascading.assembly.AggregateAssembly;
import hydrograph.engine.cascading.assembly.infra.ComponentParameters;
import hydrograph.engine.core.component.entity.AggregateEntity;
import hydrograph.engine.core.component.entity.elements.*;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
public class AggregateAssemblyTest {
@Before
public void setup() {
// TODO: setup related things go here
}
/**
* Test aggregate component's with simple count operation
*/
@Test
public void TestSimpleAggregateOperation() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3", "col4"))
.addTuple("C1R1", "C2R1", "C3Rx", "C4R1").addTuple("C1R1", "C2R2", "C3Rx", "C4R2")
.addTuple("C1R1", "C2R3", "C3Rx", "C4R3").build();
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
AggregateEntity aggregateEntity = new AggregateEntity();
aggregateEntity.setComponentId("AggregateTest");
KeyField keyField = new KeyField();
keyField.setName("col1");
keyField.setSortOrder("asc");
aggregateEntity.setKeyFields(new KeyField[] { keyField });
//added comment to test jenkins build
//added comment to test jenkins build
ArrayList<Operation> operationList = new ArrayList<Operation>();
Operation operation = new Operation();
operation.setOperationId("operation1");
operation.setOperationInputFields(new String[] { "col2" });
operation.setOperationOutputFields(new String[] { "count" });
operation.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Count");
operation.setOperationProperties(new Properties());
operationList.add(operation);
aggregateEntity.setOperationsList(operationList);
aggregateEntity.setNumOperations(1);
aggregateEntity.setOperationPresent(true);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col4", "col4_new", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col3", "in"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// set Operation Field
List<OperationField> operationFieldsList = new ArrayList<>();
OperationField operationField = new OperationField("count", "operation1");
operationFieldsList.add(operationField);
outSocket1.setOperationFieldList(operationFieldsList);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
aggregateEntity.setOutSocketList(outSocketList);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1);
parameters.addInputFields(new Fields("col1", "col2", "col3"));
AggregateAssembly aggr = new AggregateAssembly(aggregateEntity, parameters);
// create bucket for the aggr sub assembly
Bucket bucket = plunger.newBucket(new Fields("count", "col4_new", "col3"), aggr);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
assertThat(actual.get(0), is(new Tuple(Long.valueOf("3"), "C4R3", "C3Rx")));
}
/**
* Test aggregate component's with simple count operation and map fields
*/
@Test
public void TestSimpleAggregateOperationWithMapFields() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple("C1R1", "C2R1", "C3Rx")
.addTuple("C1R1", "C2R2", "C3Rx").addTuple("C1R1", "C2R3", "C3Rx").build();
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
AggregateEntity aggregateEntity = new AggregateEntity();
aggregateEntity.setComponentId("AggregateTest");
KeyField keyField = new KeyField();
keyField.setName("col1");
keyField.setSortOrder("asc");
aggregateEntity.setKeyFields(new KeyField[] { keyField });
ArrayList<Operation> operationList = new ArrayList<Operation>();
Operation operation = new Operation();
operation.setOperationId("operationName1");
operation.setOperationInputFields(new String[] { "col2" });
operation.setOperationOutputFields(new String[] { "count" });
operation.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Count");
operation.setOperationProperties(new Properties());
operationList.add(operation);
aggregateEntity.setOperationsList(operationList);
aggregateEntity.setNumOperations(1);
aggregateEntity.setOperationPresent(true);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col3", "col3_new", "in"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// set Operation Field
List<OperationField> operationFieldList = new ArrayList<>();
OperationField operationField = new OperationField("count", "operationName1");
operationFieldList.add(operationField);
outSocket1.setOperationFieldList(operationFieldList);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
aggregateEntity.setOutSocketList(outSocketList);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1);
parameters.addInputFields(new Fields("col1", "col2", "col3"));
AggregateAssembly aggr = new AggregateAssembly(aggregateEntity, parameters);
// create bucket for the aggr sub assembly
Bucket bucket = plunger.newBucket(new Fields("count", "col3_new"), aggr);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
assertThat(actual.get(0), is(new Tuple(Long.valueOf("3"), "C3Rx")));
}
@Test
public void TestAggregateWithMultipleOperations() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple("C1R1", "C2R1", 100)
.addTuple("C1R1", "C2R2", 100).addTuple("C1R1", "C2R3", 100).build();
// pipe corresponding to an input of aggregate component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
ArrayList<Operation> operationList = new ArrayList<Operation>();
Operation operation = new Operation();
operation.setOperationId("operationName1");
operation.setOperationInputFields(new String[] { "col2" });
operation.setOperationOutputFields(new String[] { "count" });
operation.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Count");
operation.setOperationProperties(new Properties());
operationList.add(operation);
Operation operation1 = new Operation();
operation1.setOperationId("operationName2");
operation1.setOperationInputFields(new String[] { "col3" });
operation1.setOperationOutputFields(new String[] { "sum" });
operation1.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Sum");
operation1.setOperationProperties(new Properties());
operationList.add(operation1);
AggregateEntity aggregateEntity = new AggregateEntity();
aggregateEntity.setComponentId("AggregateTest");
KeyField keyField = new KeyField();
keyField.setName("col1");
keyField.setSortOrder("asc");
aggregateEntity.setKeyFields(new KeyField[] { keyField });
aggregateEntity.setOperationsList(operationList);
aggregateEntity.setNumOperations(1);
aggregateEntity.setOperationPresent(true);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col1", "in"));
passThroughFieldsList1.add(new PassThroughField("col2", "in"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// set Operation Field
List<OperationField> operationFieldList = new ArrayList<>();
OperationField operationField = new OperationField("sum", "operationName1");
OperationField operationField1 = new OperationField("count", "operationName2");
operationFieldList.add(operationField);
operationFieldList.add(operationField1);
outSocket1.setOperationFieldList(operationFieldList);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
aggregateEntity.setOutSocketList(outSocketList);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1);
parameters.addInputFields(new Fields("col1", "col2", "col3"));
AggregateAssembly aggregate = new AggregateAssembly(aggregateEntity, parameters);
// create bucket for the aggregate sub assembly
Bucket bucket = plunger.newBucket(new Fields("sum", "count", "col1", "col2"), aggregate);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
// Output of count is long, whereas output of sum is same as that of
// input
assertThat(actual.get(0).get(new int[] { 0, 1, 2 }), is(new Tuple(300, (long) 3, "C1R1")));
}
@Test
public void TestAggregateOnNullKeys() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple("C1R1", "C2R1", 100)
.addTuple("C1R1", "C2R2", 100).addTuple("C1R1", "C2R3", 100).build();
// pipe corresponding to an input of aggregate component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
ArrayList<Operation> operationList = new ArrayList<>();
Operation operation1 = new Operation();
operation1.setOperationId("operationName1");
operation1.setOperationInputFields(new String[] { "col3" });
operation1.setOperationOutputFields(new String[] { "sum" });
operation1.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Sum");
operation1.setOperationProperties(new Properties());
operationList.add(operation1);
AggregateEntity aggregateEntity = new AggregateEntity();
aggregateEntity.setComponentId("AggregateTest");
aggregateEntity.setKeyFields(null);
aggregateEntity.setOperationsList(operationList);
aggregateEntity.setNumOperations(1);
aggregateEntity.setOperationPresent(true);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col1", "in"));
passThroughFieldsList1.add(new PassThroughField("col2", "in"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// set Operation Field
List<OperationField> operationFieldList = new ArrayList<>();
OperationField operationField = new OperationField("sum", "operationName1");
operationFieldList.add(operationField);
outSocket1.setOperationFieldList(operationFieldList);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
aggregateEntity.setOutSocketList(outSocketList);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1);
parameters.addInputFields(new Fields("col1", "col2", "col3"));
AggregateAssembly aggregate = new AggregateAssembly(aggregateEntity, parameters);
// create bucket for the aggregate sub assembly
Bucket bucket = plunger.newBucket(new Fields("sum", "col1", "col2"), aggregate);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
assertThat(actual.get(0).get(new int[] { 0, 1 }), is(new Tuple(300, "C1R1")));
}
@Test
public void TestAggregateWithSecondaryKeyFields() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3")).addTuple("C1R1", "C2R1", 200)
.addTuple("C1R1", "C2R2", 100).addTuple("C1R1", "C2R2", 100).addTuple("C1R1", "C2R1", 200)
.addTuple("C1R1", "C2R1", 200).build();
// pipe corresponding to an input of aggregate component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
ArrayList<Operation> operationList = new ArrayList<>();
Operation operation1 = new Operation();
operation1.setOperationId("operationName1");
operation1.setOperationInputFields(new String[] { "col3" });
operation1.setOperationOutputFields(new String[] { "sum" });
operation1.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Sum");
operation1.setOperationProperties(new Properties());
operationList.add(operation1);
AggregateEntity aggregateEntity = new AggregateEntity();
aggregateEntity.setComponentId("AggregateTest");
aggregateEntity.setKeyFields(null);
KeyField keyField = new KeyField();
keyField.setName("col2");
keyField.setSortOrder("asc");
aggregateEntity.setSecondaryKeyFields(new KeyField[] { keyField });
aggregateEntity.setOperationsList(operationList);
aggregateEntity.setNumOperations(1);
aggregateEntity.setOperationPresent(true);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col1", "in"));
passThroughFieldsList1.add(new PassThroughField("col2", "in"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// set Operation Field
List<OperationField> operationFieldList = new ArrayList<>();
OperationField operationField = new OperationField("sum", "operationName1");
operationFieldList.add(operationField);
outSocket1.setOperationFieldList(operationFieldList);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
aggregateEntity.setOutSocketList(outSocketList);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1);
parameters.addInputFields(new Fields("col1", "col2", "col3"));
AggregateAssembly aggregate = new AggregateAssembly(aggregateEntity, parameters);
// create bucket for the aggregate sub assembly
Bucket bucket = plunger.newBucket(new Fields("sum", "col1", "col2"), aggregate);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
assertThat(actual.get(0).get(new int[] { 0, 1 }), is(new Tuple(800, "C1R1")));
}
@Test
public void itShouldTestSimpleAggregateWithWildCardPassthroughFields(){
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3", "col4"))
.addTuple("C1K1", "C2R1", "C3R1", "C4Rx").addTuple("C1K1", "C2R2", "C3R2", "C4Rx")
.addTuple("C1K1", "C2R3", "C3R3", "C4Rx").addTuple("C1K2", "C2R1", "C3R1", "C4Rx").addTuple("C1K2", "C2R2", "C3R2", "C4Rx").build();
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
AggregateEntity aggregateEntity = new AggregateEntity();
aggregateEntity.setComponentId("AggregateTest");
KeyField keyField = new KeyField();
keyField.setName("col1");
keyField.setSortOrder("asc");
aggregateEntity.setKeyFields(new KeyField[] { keyField });
ArrayList<Operation> operationList = new ArrayList<Operation>();
Operation operation = new Operation();
operation.setOperationId("operation1");
operation.setOperationInputFields(new String[] { "col2" });
operation.setOperationOutputFields(new String[] { "count" });
operation.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Count");
operation.setOperationProperties(new Properties());
operationList.add(operation);
aggregateEntity.setOperationsList(operationList);
aggregateEntity.setNumOperations(1);
aggregateEntity.setOperationPresent(true);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col4", "col4_new", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("*", "in"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// set Operation Field
List<OperationField> operationFieldsList = new ArrayList<>();
OperationField operationField = new OperationField("count", "operation1");
operationFieldsList.add(operationField);
outSocket1.setOperationFieldList(operationFieldsList);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
aggregateEntity.setOutSocketList(outSocketList);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1);
parameters.addInputFields(new Fields("col1", "col2", "col3"));
AggregateAssembly aggr = new AggregateAssembly(aggregateEntity, parameters);
// create bucket for the aggr sub assembly
Bucket bucket = plunger.newBucket(new Fields("count", "col4_new", "col1", "col2", "col3"), aggr);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// 3 C4R3 C1R1 C2R3 C3Rx
// assert the actual results with expected results
assertThat(actual.size(), is(2));
assertThat(actual.get(0), is(new Tuple(Long.valueOf("3"), "C4Rx", "C1K1", "C2R3", "C3R3")));
assertThat(actual.get(1), is(new Tuple(Long.valueOf("2"), "C4Rx", "C1K2", "C2R2", "C3R2")));
}
@Test
public void itShouldTestAggregateWithWildCardPassthroughFieldsWithPriority(){
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3", "count"))
.addTuple("C1K1", "C2R1", "C3R1", 1).addTuple("C1K1", "C2R2", "C3R2", 1)
.addTuple("C1K1", "C2R3", "C3R3", 1).addTuple("C1K2", "C2R1", "C3R1", 1).addTuple("C1K2", "C2R2", "C3R2", 1).build();
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
AggregateEntity aggregateEntity = new AggregateEntity();
aggregateEntity.setComponentId("AggregateTest");
KeyField keyField = new KeyField();
keyField.setName("col1");
keyField.setSortOrder("asc");
aggregateEntity.setKeyFields(new KeyField[] { keyField });
ArrayList<Operation> operationList = new ArrayList<Operation>();
Operation operation = new Operation();
operation.setOperationId("operation1");
operation.setOperationInputFields(new String[] { "col2" });
operation.setOperationOutputFields(new String[] { "count" });
operation.setOperationClass("hydrograph.engine.transformation.userfunctions.aggregate.Count");
operation.setOperationProperties(new Properties());
operationList.add(operation);
aggregateEntity.setOperationsList(operationList);
aggregateEntity.setNumOperations(1);
aggregateEntity.setOperationPresent(true);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col3", "col3_new", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("*", "in"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// set Operation Field
List<OperationField> operationFieldsList = new ArrayList<>();
OperationField operationField = new OperationField("count", "operation1");
operationFieldsList.add(operationField);
outSocket1.setOperationFieldList(operationFieldsList);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
aggregateEntity.setOutSocketList(outSocketList);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1);
parameters.addInputFields(new Fields("col1", "col2", "col3","count"));
AggregateAssembly aggr = new AggregateAssembly(aggregateEntity, parameters);
// create bucket for the aggr sub assembly
Bucket bucket = plunger.newBucket(new Fields("count", "col3_new", "col1", "col2","col3"), aggr);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
assertThat(actual.get(0), is(new Tuple(Long.valueOf("3"), "C3R3", "C1K1", "C2R3", "C3R3")));
assertThat(actual.get(1), is(new Tuple(Long.valueOf("2"), "C3R2", "C1K2", "C2R2", "C3R2")));
}
}