/*********************************************************************************************************************** * * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. * **********************************************************************************************************************/ package eu.stratosphere.api.java.operator; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import junit.framework.Assert; import org.junit.BeforeClass; import org.junit.Test; import eu.stratosphere.api.common.InvalidProgramException; import eu.stratosphere.api.java.DataSet; import eu.stratosphere.api.java.ExecutionEnvironment; import eu.stratosphere.api.java.functions.KeySelector; import eu.stratosphere.api.java.tuple.Tuple5; import eu.stratosphere.api.java.typeutils.BasicTypeInfo; import eu.stratosphere.api.java.typeutils.TupleTypeInfo; @SuppressWarnings("serial") public class JoinOperatorTest { // TUPLE DATA private static final List<Tuple5<Integer, Long, String, Long, Integer>> emptyTupleData = new ArrayList<Tuple5<Integer, Long, String, Long, Integer>>(); private final TupleTypeInfo<Tuple5<Integer, Long, String, Long, Integer>> tupleTypeInfo = new TupleTypeInfo<Tuple5<Integer, Long, String, Long, Integer>>( BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO ); private static List<CustomType> customTypeData = new ArrayList<CustomType>(); @BeforeClass public static void insertCustomData() { customTypeData.add(new CustomType()); } @Test public void testJoinKeyFields1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0); } catch(Exception e) { Assert.fail(); } } @Test(expected = InvalidProgramException.class) public void testJoinKeyFields2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, incompatible join key types ds1.join(ds2).where(0).equalTo(2); } @Test(expected = InvalidProgramException.class) public void testJoinKeyFields3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, incompatible number of join keys ds1.join(ds2).where(0,1).equalTo(2); } @Test(expected = IllegalArgumentException.class) public void testJoinKeyFields4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, join key out of range ds1.join(ds2).where(5).equalTo(0); } @Test(expected = IllegalArgumentException.class) public void testJoinKeyFields5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, empty join key ds1.join(ds2).where().equalTo(); } @Test(expected = IllegalArgumentException.class) public void testJoinKeyFields6() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, negative key field position ds1.join(ds2).where(-1).equalTo(-1); } @Test(expected = IllegalArgumentException.class) public void testJoinKeyFields7() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<CustomType> ds2 = env.fromCollection(customTypeData); // should not work, join key fields on custom type ds1.join(ds2).where(5).equalTo(0); } @Test public void testJoinKeySelectors1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds1 = env.fromCollection(customTypeData); DataSet<CustomType> ds2 = env.fromCollection(customTypeData); // should work try { ds1.join(ds2) .where( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ) .equalTo( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinKeyMixing1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds1 = env.fromCollection(customTypeData); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2) .where( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ) .equalTo(3); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinKeyMixing2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<CustomType> ds2 = env.fromCollection(customTypeData); // should work try { ds1.join(ds2) .where(3) .equalTo( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ); } catch(Exception e) { Assert.fail(); } } @Test(expected = InvalidProgramException.class) public void testJoinKeyMixing3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<CustomType> ds2 = env.fromCollection(customTypeData); // should not work, incompatible types ds1.join(ds2) .where(2) .equalTo( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ); } @Test(expected = InvalidProgramException.class) public void testJoinKeyMixing4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<CustomType> ds2 = env.fromCollection(customTypeData); // should not work, more than one key field position ds1.join(ds2) .where(1,3) .equalTo( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ); } @Test public void testJoinProjection1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0) .types(Integer.class); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinProjection2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0,3) .types(Integer.class, Long.class); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinProjection3() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0) .projectSecond(3) .types(Integer.class, Long.class); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinProjection4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectFirst(0,2) .projectSecond(1,4) .projectFirst(1) .types(Integer.class, String.class, Long.class, Integer.class, Long.class); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinProjection5() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectSecond(0,2) .projectFirst(1,4) .projectFirst(1) .types(Integer.class, String.class, Long.class, Integer.class, Long.class); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinProjection6() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<CustomType> ds1 = env.fromCollection(customTypeData); DataSet<CustomType> ds2 = env.fromCollection(customTypeData); // should work try { ds1.join(ds2) .where( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ) .equalTo( new KeySelector<CustomType, Long>() { @Override public Long getKey(CustomType value) { return value.myLong; } } ) .projectFirst() .projectSecond() .types(CustomType.class, CustomType.class); } catch(Exception e) { Assert.fail(); } } @Test public void testJoinProjection7() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { ds1.join(ds2).where(0).equalTo(0) .projectSecond() .projectFirst(1,4) .types(Tuple5.class, Long.class, Integer.class); } catch(Exception e) { Assert.fail(); } } @Test(expected=IndexOutOfBoundsException.class) public void testJoinProjection8() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectFirst(5) .types(Integer.class); } @Test(expected=IndexOutOfBoundsException.class) public void testJoinProjection9() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectSecond(5) .types(Integer.class); } @Test(expected=IllegalArgumentException.class) public void testJoinProjection10() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, type does not match ds1.join(ds2).where(0).equalTo(0) .projectFirst(2) .types(Integer.class); } @Test(expected=IllegalArgumentException.class) public void testJoinProjection11() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, type does not match ds1.join(ds2).where(0).equalTo(0) .projectSecond(2) .types(Integer.class); } @Test(expected=IllegalArgumentException.class) public void testJoinProjection12() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, number of types and fields does not match ds1.join(ds2).where(0).equalTo(0) .projectSecond(2) .projectFirst(1) .types(String.class); } @Test(expected=IndexOutOfBoundsException.class) public void testJoinProjection13() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectSecond(0) .projectFirst(5) .types(Integer.class); } @Test(expected=IndexOutOfBoundsException.class) public void testJoinProjection14() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, index out of range ds1.join(ds2).where(0).equalTo(0) .projectFirst(0) .projectSecond(5) .types(Integer.class); } /* * #################################################################### */ public static class CustomType implements Serializable { private static final long serialVersionUID = 1L; public int myInt; public long myLong; public String myString; public CustomType() {}; public CustomType(int i, long l, String s) { myInt = i; myLong = l; myString = s; } @Override public String toString() { return myInt+","+myLong+","+myString; } } }