/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.assembly;
import cascading.pipe.Pipe;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import com.hotels.plunger.Bucket;
import com.hotels.plunger.Data;
import com.hotels.plunger.DataBuilder;
import com.hotels.plunger.Plunger;
import hydrograph.engine.cascading.assembly.JoinAssembly;
import hydrograph.engine.cascading.assembly.infra.ComponentParameters;
import hydrograph.engine.core.component.entity.JoinEntity;
import hydrograph.engine.core.component.entity.elements.JoinKeyFields;
import hydrograph.engine.core.component.entity.elements.MapField;
import hydrograph.engine.core.component.entity.elements.OutSocket;
import hydrograph.engine.core.component.entity.elements.PassThroughField;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
/**
* Test join sub assembly. The tests are written using plunger framework
*
* @author Prabodh
*/
public class JoinAssemblyTest {
@Before
public void setup() {
// TODO: add setup related code here
}
/**
* Test simple inner join operation using join component with 2 inputs
*/
@Test
public void TestInnerJoin() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R2", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col1", "col1", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col2", "in0"));
passThroughFieldsList1.add(new PassThroughField("col3", "in0"));
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3",
"col4", "col5"), join);
// get results from bucket
List<Tuple> actual = bucket.result().asTupleList();
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", "C4R1", "C5R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", "C4R2", "C5R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple inner join operation using join component with 2 inputs. One
* of the input - output map is kept empty
*/
@Test
public void TestInnerJoinWithEmptyInputOutputMapping() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R2", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("*", "in0"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3"),
join); // create bucket for the join sub assembly
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple inner join operation using join component with 2 inputs using
* grouping fields
*/
@Test
public void TestInnerJoinWithGroupFields() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "G1.col2", "G1.col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R2", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "G1.col2", "G1.col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "G1.col2", "G1.col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col1", "col1", "in0"));
mapFieldsList.add(new MapField("G1.*", "G1.*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
Bucket bucket = plunger.newBucket(new Fields("col1", "G1.col2",
"G1.col3", "col4", "col5"), join); // create bucket for the join
// sub assembly
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", "C4R1", "C5R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", "C4R2", "C5R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple inner join operation using join component with 2 inputs. One
* of the input has a wildcard mapping.
*/
@Test
public void TestInnerJoinWithWildcardMapping() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", 21, "C3R1").addTuple("C1R2", 22, "C3R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", 41, "C5R1").addTuple("C1R2", 42, "C5R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
/*passThroughFieldsList1.add(new PassThroughField("*", "in0"));*/
passThroughFieldsList1.add(new PassThroughField("col1", "in1"));
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col2",
"col3", "col1", "col4", "col5"), join);
// get results from bucket
List<Tuple> actual = bucket.result().asTupleList();
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple(21, "C3R1", "C1R1", 41, "C5R1"));
expectedOutput.add(new Tuple(22, "C3R2", "C1R2", 42, "C5R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple inner join operation using join component with 2 inputs. One
* of the input has a wildcard as well as one to one mapping
*/
@Test
public void TestInnerJoinWithWildcardAndOneToOneMapping() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", 21, "C3R1").addTuple("C1R2", 22, "C3R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", 41, "C5R1").addTuple("C1R2", 42, "C5R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
mapFieldsList.add(new MapField("col2", "RenamedColumn2", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col1", "col3", "RenamedColumn2",
"col4", "col5"), join);
// get results from bucket
List<Tuple> actual = bucket.result().asTupleList();
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C3R1", 21, 41, "C5R1"));
expectedOutput.add(new Tuple("C1R2", "C3R2", 22, 42, "C5R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple inner join operation using join component with 2 inputs. One
* of the input has a wildcard as well as one to one mapping. The one to one
* mapping is prefixed.
*/
@Test
public void TestInnerJoinWithWildcarWithPrefixAndOneToOneMapping() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("in0.col1", "col2", "in0.col3"))
.addTuple("C1R1", 21, "C3R1").addTuple("C1R2", 22, "C3R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", 41, "C5R1").addTuple("C1R2", 42, "C5R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("in0.col1", "col2", "in0.col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("in0.col1", "col2", "in0.col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true,
new String[] { "in0.col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("in0.*", "*", "in0"));
mapFieldsList.add(new MapField("col2", "RenamedColumn2", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col1", "col3",
"RenamedColumn2", "col4", "col5"), join);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C3R1", 21, 41, "C5R1"));
expectedOutput.add(new Tuple("C1R2", "C3R2", 22, 42, "C5R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple inner join operation using join component with 2 inputs using
* grouping fields
*/
@Test
public void TestInnerJoinWithWildcardWithPrefixInTargetAndOneToOneMapping() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", 21, "C3R1").addTuple("C1R2", 22, "C3R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col3"))
.addTuple("C1R1", 41, "C5R1").addTuple("C1R2", 42, "C5R2")
.build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col3"));
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col3"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "target_group.*", "in0"));
mapFieldsList.add(new MapField("col2", "RenamedColumn2", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col3", "in1"));
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("target_group.col1",
"target_group.col3", "RenamedColumn2", "col4", "col3"), join);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C3R1", 21, "C5R1", 41));
expectedOutput.add(new Tuple("C1R2", "C3R2", 22, "C5R2", 42));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test the unused port of a simple inner join operation with 2 inputs
*/
@Test
public void TestInnerJoinWithUnusedPort() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
OutSocket outSocket2 = new OutSocket("unused0");
outSocket2.setSocketType("unused");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
outSocket2.setCopyOfInSocketId("in0");
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
outSocketList.add(outSocket2);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
// set the unused port explicitly. Default is out
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3"),
join.getOutLink("unused", "unused0", "testJoin"));
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test both the unused ports of a simple inner join operation with 2 inputs
*/
@Test
public void TestInnerJoinWithMultipleUnusedPorts() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
OutSocket outSocket2 = new OutSocket("unused0");
outSocket2.setSocketType("unused");
OutSocket outSocket3 = new OutSocket("unused1");
outSocket3.setSocketType("unused");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
outSocket2.setCopyOfInSocketId("in0");
outSocket3.setCopyOfInSocketId("in1");
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
outSocketList.add(outSocket2);
outSocketList.add(outSocket3);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the first unused port of join sub assembly
// set the unused port explicitly. Default is out
Bucket bucket1 = plunger.newBucket(new Fields("col1", "col2", "col3"),
join.getOutLink("unused", "unused0", "testJoin"));
// create bucket for the second unused port of join sub assembly
// set the unused port explicitly. Default is out
Bucket bucket2 = plunger.newBucket(new Fields("col1", "col4", "col5"),
join.getOutLink("unused", "unused1", "testJoin"));
// buckets need to be created before calling result() on any one of them
List<Tuple> actual = bucket1.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2"));
Assert.assertEquals(expectedOutput, output);
// test second unused port
actual = bucket2.result().asTupleList(); // get results from bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
output = new HashSet<Tuple>(actual);
expectedOutput.clear();
expectedOutput.add(new Tuple("C1R3", "C4R3", "C5R3"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple inner join operation using join component with 3 inputs
*/
@Test
public void TestInnerJoinWithMoreThanTwoInputs() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R2", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
Data file3 = new DataBuilder(new Fields("col1", "col6", "col7"))
.addTuple("C1R1", "C6R1", "C7R1")
.addTuple("C1R2", "C6R2", "C7R2").build();
// pipe corresponding to an input of join component
Pipe pipe3 = plunger.newNamedPipe("pipe3", file3);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputPipe(pipe3); // third input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addInputFields(new Fields("col1", "col6", "col7"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in2", new Fields("col1", "col6", "col7"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
parameters.addinSocketId("in2");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in2", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
passThroughFieldsList1.add(new PassThroughField("col7", "in2"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3",
"col4", "col5", "col7"), join); // create bucket for the join
// sub assembly
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", "C4R1", "C5R1",
"C7R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", "C4R2", "C5R2",
"C7R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple left join operation using join component with 2 inputs
*/
@Test
public void TestLeftJoin() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", false, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3",
"col4", "col5"), join);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", "C4R1", "C5R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", null, null));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple left join operation using join component with 3 inputs
*/
@Test
public void TestLeftJoinWithMoreThanTwoInputs() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
Data file3 = new DataBuilder(new Fields("col1", "col6", "col7"))
.addTuple("C1R1", "C6R1", "C7R1")
.addTuple("C1R3", "C6R2", "C7R2").build();
// pipe corresponding to an input of join component
Pipe pipe3 = plunger.newNamedPipe("pipe3", file3);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputPipe(pipe3); // third input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addInputFields(new Fields("col1", "col6", "col7"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in2", new Fields("col1", "col6", "col7"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
parameters.addinSocketId("in2");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", false, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in2", false, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
passThroughFieldsList1.add(new PassThroughField("col7", "in2"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3",
"col4", "col5", "col7"), join); // create bucket for the join
// sub assembly
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", "C4R1", "C5R1",
"C7R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", null, null, null));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test the unused port of a simple left join operation with 2 inputs
*/
@Test
public void TestLeftJoinWithUnusedPort() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
/* parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
*/
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", false, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0", "out");
OutSocket outSocket2 = new OutSocket("unused0", "unused");
OutSocket outSocket3 = new OutSocket("unused1", "unused");
outSocket2.setCopyOfInSocketId("in0");
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
outSocket3.setCopyOfInSocketId("in1");
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col4", "col4", "in1"));
mapFieldsList.add(new MapField("col5", "col5", "in1"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("*", "in0"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
outSocketList.add(outSocket2);
outSocketList.add(outSocket3);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
// set the unused port explicitly. Default is out
Bucket bucket = plunger.newBucket(new Fields("col1", "col4", "col5"),
join.getOutLink("unused", "unused1", "testJoin"));
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R3", "C4R3", "C5R3"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test a mixed join operation using join component with 3 inputs
*/
@Test
public void TestMixedJoinWithMoreThanTwoInputs() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2")
.addTuple("C1R3", "C2R3", "C3R3").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R2", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
Data file3 = new DataBuilder(new Fields("col1", "col6", "col7"))
.addTuple("C1R1", "C6R1", "C7R1")
.addTuple("C1R2", "C6R2", "C7R2")
.addTuple("C1R3", "C6R3", "C7R3").build();
// pipe corresponding to an input of join component
Pipe pipe3 = plunger.newNamedPipe("pipe3", file3);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputPipe(pipe3); // third input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addInputFields(new Fields("col1", "col6", "col7"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in2", new Fields("col1", "col6", "col7"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
parameters.addinSocketId("in2");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", false, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", false, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in2", false, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
passThroughFieldsList1.add(new PassThroughField("col7", "in2"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3",
"col4", "col5", "col7"), join);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(3));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", "C4R1", "C5R1",
"C7R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", "C4R2", "C5R2",
"C7R2"));
expectedOutput
.add(new Tuple("C1R3", "C2R3", "C3R3", null, null, "C7R3"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test a mixed join operation using join component with 2 key fields
*/
@Test
public void TestMixedJoinWithTwoInputs() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2")
.addTuple("C1R3", "C2R3", "C3R3").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R2", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", false, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", false, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3",
"col4", "col5"), join);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(3));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", "C4R1", "C5R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", "C4R2", "C5R2"));
expectedOutput
.add(new Tuple("C1R3", "C2R3", "C3R3", null, null));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test a mixed join operation using join component with 2 key fields
*/
@Test
public void TestMixedJoinWithMoreThanOneKeyFields() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2")
.addTuple("C1R3", "C2R3", "C3R3").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R2", "C4R2", "C5R2").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", false, new String[] { "col1", "col2" }));
keyFieldsList.add(new JoinKeyFields("in1", false, new String[] { "col1", "col4" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col4", "in1"));
passThroughFieldsList1.add(new PassThroughField("col5", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3",
"col4", "col5"), join);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(5));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R1", "C2R1", "C3R1", null, null));
expectedOutput
.add(new Tuple(null, null, null, "C4R1", "C5R1"));
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2", null, null));
expectedOutput
.add(new Tuple(null, null ,null, "C4R2", "C5R2"));
expectedOutput
.add(new Tuple("C1R3", "C2R3", "C3R3", null, null));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test simple right join operation using join component with 2 inputs
*/
@Test
public void TestRightJoin() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", false, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("*", "*", "in1"));
outSocket1.setMapFieldsList(mapFieldsList);
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col2", "in0"));
passThroughFieldsList1.add(new PassThroughField("col3", "in0"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
Bucket bucket = plunger.newBucket(new Fields("col2", "col3", "col1",
"col4", "col5"), join);
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(2));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C2R1", "C3R1", "C1R1", "C4R1", "C5R1"));
expectedOutput.add(new Tuple(null, null, "C1R3", "C4R3", "C5R3"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Test the unused port of a simple right join operation with 2 inputs
*/
@Test
public void TestRightJoinWithUnusedPort() {
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters = new ComponentParameters();
parameters.addInputPipe(pipe1); // first input to join component
parameters.addInputPipe(pipe2); // second input to join component
parameters.addInputFields(new Fields("col1", "col2", "col3"));
parameters.addInputFields(new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in0");
parameters.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters.addCopyOfInSocket("in1", new Fields("col1", "col4", "col5"));
parameters.addinSocketId("in1");
JoinEntity joinEntity = new JoinEntity();
// set the name of the component
joinEntity.setComponentId("testJoin");
// set key fields
ArrayList<JoinKeyFields> keyFieldsList = new ArrayList<JoinKeyFields>();
keyFieldsList.add(new JoinKeyFields("in0", false, new String[] { "col1" }));
keyFieldsList.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
joinEntity.setKeyFields(keyFieldsList);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
OutSocket outSocket2 = new OutSocket("unused0");
outSocket2.setSocketType("unused");
// set map fields
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col2", "col2", "in0"));
mapFieldsList.add(new MapField("col3", "col3", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
outSocket2.setCopyOfInSocketId("in0");
// set pass through fields
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("*", "in1"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
outSocketList.add(outSocket2);
joinEntity.setOutSocketList(outSocketList);
// create a dummy component to be tested
JoinAssembly join = new JoinAssembly(joinEntity, parameters);
// create bucket for the join sub assembly
// set the unused port explicitly. Default is out
Bucket bucket = plunger.newBucket(new Fields("col1", "col2", "col3"),
join.getOutLink("unused", "unused0", "testJoin"));
List<Tuple> actual = bucket.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output = new HashSet<Tuple>(actual);
Set<Tuple> expectedOutput = new HashSet<Tuple>();
expectedOutput.add(new Tuple("C1R2", "C2R2", "C3R2"));
Assert.assertEquals(expectedOutput, output);
}
/**
* Integration test of two join assemblies with copyOfInSocket in the outSocket
*/
@Test
public void testCopyOfInsocketOfTwoJoinWithThreeInputsHavingSameSchema(){
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input 1 of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input 2 of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters_1 = new ComponentParameters();
parameters_1.addInputPipe(pipe1); // first input to join component
parameters_1.addInputPipe(pipe2); // second input to join component
parameters_1.addInputFields(new Fields("col1", "col2", "col3"));
parameters_1.addInputFields(new Fields("col1", "col2", "col3"));
parameters_1.addinSocketId("in0");
parameters_1.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters_1.addCopyOfInSocket("in1", new Fields("col1", "col2", "col3"));
parameters_1.addinSocketId("in1");
JoinEntity join_1_Entity = new JoinEntity();
// set the name of the component
join_1_Entity.setComponentId("testJoin_1");
// set key fields
ArrayList<JoinKeyFields> keyFields_1_List = new ArrayList<JoinKeyFields>();
keyFields_1_List.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFields_1_List.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
join_1_Entity.setKeyFields(keyFields_1_List);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
outSocket1.setCopyOfInSocketId("in0");
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
join_1_Entity.setOutSocketList(outSocketList);
List<MapField> mapFieldsList = new ArrayList<>();
outSocket1.setMapFieldsList(mapFieldsList);
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
JoinAssembly join = new JoinAssembly(join_1_Entity, parameters_1);
Bucket bucket_1 = plunger.newBucket(new Fields("col1", "col2", "col3"),
join.getOutLink("out", "out0", "testJoin_1"));
List<Tuple> actual_1 = bucket_1.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual_1.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output_1 = new HashSet<Tuple>(actual_1);
Set<Tuple> expectedOutput_1 = new HashSet<Tuple>();
expectedOutput_1.add(new Tuple("C1R1", "C2R1", "C3R1"));
Assert.assertEquals(expectedOutput_1, output_1);
Plunger plunger2 = new Plunger();
ComponentParameters parameters_2 = new ComponentParameters();
Data file3 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R4", "C3R4")
.addTuple("C1R4", "C2R4", "C3R4").build();
// pipe corresponding to an input 3 of join component
Pipe pipe3 = plunger2.newNamedPipe("pipe3", file3);
parameters_2.addInputPipe(pipe3); // first input to join component
parameters_2.addInputPipe(plunger2.newNamedPipe("join_1_pipe", bucket_1.result())); // second input to join component
parameters_2.addInputFields(new Fields("col1", "col2", "col3"));
parameters_2.addInputFields(new Fields("col1", "col2", "col3"));
parameters_2.addinSocketId("in0");
parameters_2.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters_2.addCopyOfInSocket("in1", new Fields("col1", "col2", "col3"));
parameters_2.addinSocketId("in1");
JoinEntity join_2_Entity = new JoinEntity();
// set the name of the component
join_2_Entity.setComponentId("testJoin_2");
// set key fields
ArrayList<JoinKeyFields> keyFields_2_List = new ArrayList<JoinKeyFields>();
keyFields_2_List.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFields_2_List.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
join_2_Entity.setKeyFields(keyFields_2_List);
// create outSocket
OutSocket outSocket2 = new OutSocket("out0");
outSocket2.setSocketType("out");
outSocket2.setCopyOfInSocketId("in0");
// add outSocket in list
List<OutSocket> outSocketList_2 = new ArrayList<>();
outSocketList_2.add(outSocket2);
join_2_Entity.setOutSocketList(outSocketList_2);
List<MapField> mapFieldsList_2 = new ArrayList<>();
outSocket2.setMapFieldsList(mapFieldsList_2);
List<PassThroughField> passThroughFieldsList2 = new ArrayList<>();
outSocket2.setPassThroughFieldsList(passThroughFieldsList2);
JoinAssembly join_2 = new JoinAssembly(join_2_Entity, parameters_2);
Bucket bucket_2 = plunger2.newBucket(new Fields("col1", "col2", "col3"),
join_2.getOutLink("out", "out0", "testJoin_2"));
List<Tuple> actual_2 = bucket_2.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual_2.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output_2 = new HashSet<Tuple>(actual_2);
Set<Tuple> expectedOutput_2 = new HashSet<Tuple>();
expectedOutput_2.add(new Tuple("C1R1", "C2R4", "C3R4"));
Set<Tuple> unexpectedOutput_2 = new HashSet<Tuple>();
unexpectedOutput_2.add(new Tuple("C1R1", "C2R1", "C3R1"));
Assert.assertEquals(expectedOutput_2, output_2);
Assert.assertNotEquals(unexpectedOutput_2, output_2);
}
/**
* Integration test of two join assemblies with mapFields and passThroughFields in the outSocket
*/
@Test
public void testMapAndPassthroughFieldsOfTwoJoinWithThreeInputsHavingSameSchema(){
Plunger plunger = new Plunger();
Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R1", "C3R1")
.addTuple("C1R2", "C2R2", "C3R2").build();
// pipe corresponding to an input 1 of join component
Pipe pipe1 = plunger.newNamedPipe("pipe1", file1);
Data file2 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C4R1", "C5R1")
.addTuple("C1R3", "C4R3", "C5R3").build();
// pipe corresponding to an input 2 of join component
Pipe pipe2 = plunger.newNamedPipe("pipe2", file2);
ComponentParameters parameters_1 = new ComponentParameters();
parameters_1.addInputPipe(pipe1); // first input to join component
parameters_1.addInputPipe(pipe2); // second input to join component
parameters_1.addInputFields(new Fields("col1", "col2", "col3"));
parameters_1.addInputFields(new Fields("col1", "col2", "col3"));
parameters_1.addinSocketId("in0");
parameters_1.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters_1.addCopyOfInSocket("in1", new Fields("col1", "col2", "col3"));
parameters_1.addinSocketId("in1");
JoinEntity join_1_Entity = new JoinEntity();
// set the name of the component
join_1_Entity.setComponentId("testJoin_1");
// set key fields
ArrayList<JoinKeyFields> keyFields_1_List = new ArrayList<JoinKeyFields>();
keyFields_1_List.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFields_1_List.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
join_1_Entity.setKeyFields(keyFields_1_List);
// create outSocket
OutSocket outSocket1 = new OutSocket("out0");
outSocket1.setSocketType("out");
// add outSocket in list
List<OutSocket> outSocketList = new ArrayList<>();
outSocketList.add(outSocket1);
join_1_Entity.setOutSocketList(outSocketList);
List<MapField> mapFieldsList = new ArrayList<>();
mapFieldsList.add(new MapField("col2", "col2_in1", "in1"));
mapFieldsList.add(new MapField("col3", "col3_in0", "in0"));
outSocket1.setMapFieldsList(mapFieldsList);
List<PassThroughField> passThroughFieldsList1 = new ArrayList<>();
passThroughFieldsList1.add(new PassThroughField("col1", "in0"));
outSocket1.setPassThroughFieldsList(passThroughFieldsList1);
JoinAssembly join = new JoinAssembly(join_1_Entity, parameters_1);
Bucket bucket_1 = plunger.newBucket(new Fields("col3_in0", "col1", "col2_in1"),
join.getOutLink("out", "out0", "testJoin_1"));
List<Tuple> actual_1 = bucket_1.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual_1.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output_1 = new HashSet<Tuple>(actual_1);
Set<Tuple> expectedOutput_1 = new HashSet<Tuple>();
expectedOutput_1.add(new Tuple("C3R1", "C1R1", "C4R1"));
Assert.assertEquals(expectedOutput_1, output_1);
Plunger plunger2 = new Plunger();
ComponentParameters parameters_2 = new ComponentParameters();
Data file3 = new DataBuilder(new Fields("col1", "col2", "col3"))
.addTuple("C1R1", "C2R4", "C3R4")
.addTuple("C1R4", "C2R4", "C3R4").build();
// pipe corresponding to an input 3 of join component
Pipe pipe3 = plunger2.newNamedPipe("pipe3", file3);
parameters_2.addInputPipe(pipe3); // first input to join component
parameters_2.addInputPipe(plunger2.newNamedPipe("join_1_pipe", bucket_1.result())); // second input to join component
parameters_2.addInputFields(new Fields("col1", "col2", "col3"));
parameters_2.addInputFields(new Fields("col1", "col2_in1", "col3_in0"));
parameters_2.addinSocketId("in0");
parameters_2.addCopyOfInSocket("in0", new Fields("col1", "col2", "col3"));
parameters_2.addCopyOfInSocket("in1", new Fields("col1", "col2_in1", "col3_in0"));
parameters_2.addinSocketId("in1");
JoinEntity join_2_Entity = new JoinEntity();
// set the name of the component
join_2_Entity.setComponentId("testJoin_2");
// set key fields
ArrayList<JoinKeyFields> keyFields_2_List = new ArrayList<JoinKeyFields>();
keyFields_2_List.add(new JoinKeyFields("in0", true, new String[] { "col1" }));
keyFields_2_List.add(new JoinKeyFields("in1", true, new String[] { "col1" }));
join_2_Entity.setKeyFields(keyFields_2_List);
// create outSocket
OutSocket outSocket2 = new OutSocket("out0");
outSocket2.setSocketType("out");
// outSocket2.setCopyOfInSocketId("in0");
// add outSocket in list
List<OutSocket> outSocketList_2 = new ArrayList<>();
outSocketList_2.add(outSocket2);
join_2_Entity.setOutSocketList(outSocketList_2);
List<MapField> mapFieldsList_2 = new ArrayList<>();
outSocket2.setMapFieldsList(mapFieldsList_2);
List<PassThroughField> passThroughFieldsList2 = new ArrayList<>();
passThroughFieldsList2.add(new PassThroughField("col1", "in0"));
passThroughFieldsList2.add(new PassThroughField("col2_in1", "in1"));
passThroughFieldsList2.add(new PassThroughField("col3", "in0"));
outSocket2.setPassThroughFieldsList(passThroughFieldsList2);
JoinAssembly join_2 = new JoinAssembly(join_2_Entity, parameters_2);
Bucket bucket_2 = plunger2.newBucket(new Fields("col1", "col2_in1", "col3"),
join_2.getOutLink("out", "out0", "testJoin_2"));
List<Tuple> actual_2 = bucket_2.result().asTupleList(); // get results from
// bucket
// assert the actual results with expected results
assertThat(actual_2.size(), is(1));
// Use HashSet so that order of fields does not matter while comparison
Set<Tuple> output_2 = new HashSet<Tuple>(actual_2);
Set<Tuple> expectedOutput_2 = new HashSet<Tuple>();
expectedOutput_2.add(new Tuple("C1R1", "C3R4", "C4R1"));
Set<Tuple> unexpectedOutput_2 = new HashSet<Tuple>();
unexpectedOutput_2.add(new Tuple("C1R1", "C2R1", "C3R1"));
Assert.assertEquals(expectedOutput_2, output_2);
Assert.assertNotEquals(unexpectedOutput_2, output_2);
}
/*
* Negative test cases start
*//**
* Test validation of join component for empty key fields for one of the
* input. The component should throw exception as it expects key fields for
* each input
*/
/*
* @Test public void TestJoinWithEmptyKeyFields() {
*
* Plunger plunger = new Plunger();
*
* Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
* .addTuple("C1R1", "C2R1", "C3R1") .addTuple("C1R2", "C2R2",
* "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); //
* pipe corresponding // to an input of // join component
*
* Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
* .addTuple("C1R1", "C4R1", "C5R1") .addTuple("C1R3", "C4R2",
* "C5R2").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); //
* pipe corresponding // to an input of // join component
*
* ComponentParameters parameters = new ComponentParameters();
*
* parameters.addInputPipe(pipe1); // first input to join component
* parameters.addInputPipe(pipe2); // second input to join component
*
* parameters.addInputFields(new Fields("col1", "col2", "col3"));
* parameters.addInputFields(new Fields("col1", "col4", "col5"));
*
* parameters.addSourceTargetMap(0, "*", "*");
* parameters.addSourceTargetMap(1, "col4", "col4");
* parameters.addSourceTargetMap(1, "col5", "col5");
*
* parameters.addOutputFields(new Fields("col1", "col2", "col3", "col4",
* "col5"));
*
* parameters.setJoinTypes(new boolean[] { true, true });
*
* parameters.setUnusedPorts(new ArrayList<Boolean>(Arrays.asList(false,
* false)));
*
* parameters.setComponentName("testJoin"); // set the name of the //
* component
*
* JoinSubAssembly join;
*
* try { // The component should throw an exception on validation join = new
* JoinSubAssembly(parameters); // create a dummy component // to be tested
* assertThat("Validation is not working", is("Validation is working")); }
* catch (ParametersValidationException e) { assertThat( e.getMessage(),
* is("'Key Fields' parameter cannot be null for component 'testJoin'")); }
* }
*//**
* Test validation of join component for empty key fields for one of the
* input. The component should throw exception as it expects key fields for
* each input
*/
/*
* @Test public void TestJoinWithEmptyKeyFieldsInOneInput() {
*
* Plunger plunger = new Plunger();
*
* Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
* .addTuple("C1R1", "C2R1", "C3R1") .addTuple("C1R2", "C2R2",
* "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); //
* pipe corresponding // to an input of // join component
*
* Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
* .addTuple("C1R1", "C4R1", "C5R1") .addTuple("C1R3", "C4R2",
* "C5R2").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); //
* pipe corresponding // to an input of // join component
*
* ComponentParameters parameters = new ComponentParameters();
*
* parameters.addInputPipe(pipe1); // first input to join component
* parameters.addInputPipe(pipe2); // second input to join component
*
* parameters.addInputFields(new Fields("col1", "col2", "col3"));
* parameters.addInputFields(new Fields("col1", "col4", "col5"));
*
* parameters.addSourceTargetMap(0, "*", "*");
* parameters.addSourceTargetMap(1, "col4", "col4");
* parameters.addSourceTargetMap(1, "col5", "col5");
*
* parameters.addOutputFields(new Fields("col1", "col2", "col3", "col4",
* "col5"));
*
* parameters.setJoinTypes(new boolean[] { true, true });
*
* parameters.addKeyFields(new Fields("col1"));
*
* parameters.setUnusedPorts(new ArrayList<Boolean>(Arrays.asList(false,
* false)));
*
* parameters.setComponentName("testJoin"); // set the name of the //
* component
*
* JoinSubAssembly join;
*
* try { // The component should throw an exception on validation join = new
* JoinSubAssembly(parameters); // create a dummy component // to be tested
* assertThat("Validation is not working", is("Validation is working")); }
* catch (ParametersValidationException e) { assertThat( e.getMessage(), is(
* "Number of input links (2) does not match number of key field instances (1) for component 'testJoin'"
* )); } }
*//**
* Test validation of join component for empty input fields for one of
* the input. The component should throw exception as it expects input
* fields for each input
*/
/*
* @Test public void TestJoinWithEmptyInputFields() {
*
* Plunger plunger = new Plunger();
*
* Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
* .addTuple("C1R1", "C2R1", "C3R1") .addTuple("C1R2", "C2R2",
* "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); //
* pipe corresponding // to an input of // join component
*
* Data file2 = new DataBuilder(new Fields("col1", "col4", "col5"))
* .addTuple("C1R1", "C4R1", "C5R1") .addTuple("C1R3", "C4R2",
* "C5R2").build(); Pipe pipe2 = plunger.newNamedPipe("pipe2", file2); //
* pipe corresponding // to an input of // join component
*
* ComponentParameters parameters = new ComponentParameters();
*
* parameters.addInputPipe(pipe1); // first input to join component
* parameters.addInputPipe(pipe2); // second input to join component
*
* parameters.addInputFields(new Fields("col1", "col2", "col3")); //
* parameters.addInputFields(new Fields("col1", "col4", "col5"));
*
* parameters.addSourceTargetMap(0, "*", "*");
* parameters.addSourceTargetMap(1, "col4", "col4");
* parameters.addSourceTargetMap(1, "col5", "col5");
*
* parameters.addOutputFields(new Fields("col1", "col2", "col3", "col4",
* "col5"));
*
* parameters.setJoinTypes(new boolean[] { true, true });
*
* parameters.addKeyFields(new Fields("col1")); parameters.addKeyFields(new
* Fields("col1"));
*
* parameters.setUnusedPorts(new ArrayList<Boolean>(Arrays.asList(false,
* false)));
*
* parameters.setComponentName("testJoin"); // set the name of the //
* component
*
* JoinSubAssembly join;
*
* try { // The component should throw an exception on validation join = new
* JoinSubAssembly(parameters); // create a dummy component // to be tested
* assertThat("Validation is not working", is("Validation is working")); }
* catch (ParametersValidationException e) { assertThat( e.getMessage(), is(
* "Number of input links (2) does not match number of input field instances (1) for component 'testJoin'"
* )); } }
*//**
* Test validation of join component for just one input pipe. The
* component should throw a validation exception as it expects two input
* pipes
*/
/*
* @Test public void TestJoinWithOneInputPipe() {
*
* Plunger plunger = new Plunger();
*
* Data file1 = new DataBuilder(new Fields("col1", "col2", "col3"))
* .addTuple("C1R1", "C2R1", "C3R1") .addTuple("C1R2", "C2R2",
* "C3R2").build(); Pipe pipe1 = plunger.newNamedPipe("pipe1", file1); //
* pipe corresponding // to an input of // join component
*
* ComponentParameters parameters = new ComponentParameters();
*
* parameters.addInputPipe(pipe1); // first input to join component
*
* parameters.addInputFields(new Fields("col1", "col2", "col3")); //
* parameters.addInputFields(new Fields("col1", "col4", "col5"));
*
* parameters.addSourceTargetMap(0, "*", "*");
* parameters.addSourceTargetMap(1, "col4", "col4");
* parameters.addSourceTargetMap(1, "col5", "col5");
*
* parameters.addOutputFields(new Fields("col1", "col2", "col3", "col4",
* "col5"));
*
* parameters.setJoinTypes(new boolean[] { true, true });
*
* parameters.addKeyFields(new Fields("col1")); parameters.addKeyFields(new
* Fields("col1"));
*
* parameters.setUnusedPorts(new ArrayList<Boolean>(Arrays.asList(false,
* false)));
*
* parameters.setComponentName("testJoin"); // set the name of the //
* component
*
* JoinSubAssembly join;
*
* try { // The component should throw an exception on validation join = new
* JoinSubAssembly(parameters); // create a dummy component // to be tested
* assertThat("Validation is not working", is("Validation is working")); }
* catch (ParametersValidationException e) { assertThat( e.getMessage(), is(
* "Atleast two input links should be provided in input pipes parameter for component 'testJoin'"
* )); } } Negative test cases end
*/
}