/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.java.operators.join;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.Utils;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.operators.JoinOperator.DefaultJoin;
import org.apache.flink.api.java.operators.JoinOperator.EquiJoin;
import org.apache.flink.api.common.operators.Keys;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.typeutils.TypeExtractor;
/**
* Intermediate step of an Outer Join transformation. <br>
* To continue the Join transformation, select the join key of the first input {@link DataSet} by calling
* {@link JoinOperatorSetsBase#where(int...)} or
* {@link JoinOperatorSetsBase#where(KeySelector)}.
*
* @param <I1> The type of the first input DataSet of the Join transformation.
* @param <I2> The type of the second input DataSet of the Join transformation.
*/
@Public
public class JoinOperatorSetsBase<I1, I2> {
protected final DataSet<I1> input1;
protected final DataSet<I2> input2;
protected final JoinHint joinHint;
protected final JoinType joinType;
public JoinOperatorSetsBase(DataSet<I1> input1, DataSet<I2> input2) {
this(input1, input2, JoinHint.OPTIMIZER_CHOOSES);
}
public JoinOperatorSetsBase(DataSet<I1> input1, DataSet<I2> input2, JoinHint hint) {
this(input1, input2, hint, JoinType.INNER);
}
public JoinOperatorSetsBase(DataSet<I1> input1, DataSet<I2> input2, JoinHint hint, JoinType type) {
if (input1 == null || input2 == null) {
throw new NullPointerException();
}
this.input1 = input1;
this.input2 = input2;
this.joinHint = hint;
this.joinType = type;
}
/**
* Continues a Join transformation. <br>
* Defines the {@link Tuple} fields of the first join {@link DataSet} that should be used as join keys.<br>
* <b>Note: Fields can only be selected as join keys on Tuple DataSets.</b><br>
*
* @param fields The indexes of the other Tuple fields of the first join DataSets that should be used as keys.
* @return An incomplete Join transformation.
* Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or
* {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)}
* to continue the Join.
*
* @see Tuple
* @see DataSet
*/
public JoinOperatorSetsPredicateBase where(int... fields) {
return new JoinOperatorSetsPredicateBase(new Keys.ExpressionKeys<>(fields, input1.getType()));
}
/**
* Continues a Join transformation. <br>
* Defines the fields of the first join {@link DataSet} that should be used as grouping keys. Fields
* are the names of member fields of the underlying type of the data set.
*
* @param fields The fields of the first join DataSets that should be used as keys.
* @return An incomplete Join transformation.
* Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or
* {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)}
* to continue the Join.
*
* @see Tuple
* @see DataSet
*/
public JoinOperatorSetsPredicateBase where(String... fields) {
return new JoinOperatorSetsPredicateBase(new Keys.ExpressionKeys<>(fields, input1.getType()));
}
/**
* Continues a Join transformation and defines a {@link KeySelector} function for the first join {@link DataSet}.<br>
* The KeySelector function is called for each element of the first DataSet and extracts a single
* key value on which the DataSet is joined. <br>
*
* @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is joined.
* @return An incomplete Join transformation.
* Call {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or
* {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)}
* to continue the Join.
*
* @see KeySelector
* @see DataSet
*/
public <K> JoinOperatorSetsPredicateBase where(KeySelector<I1, K> keySelector) {
TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input1.getType());
return new JoinOperatorSetsPredicateBase(new Keys.SelectorFunctionKeys<>(keySelector, input1.getType(), keyType));
}
/**
* Intermediate step of a Join transformation. <br>
* To continue the Join transformation, select the join key of the second input {@link DataSet} by calling
* {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(int...)} or
* {@link org.apache.flink.api.java.operators.join.JoinOperatorSetsBase.JoinOperatorSetsPredicateBase#equalTo(KeySelector)}.
*
*/
public class JoinOperatorSetsPredicateBase {
protected final Keys<I1> keys1;
protected JoinOperatorSetsPredicateBase(Keys<I1> keys1) {
if (keys1 == null) {
throw new NullPointerException();
}
if (keys1.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
this.keys1 = keys1;
}
/**
* Continues a Join transformation and defines the {@link Tuple} fields of the second join
* {@link DataSet} that should be used as join keys.<br>
* <b>Note: Fields can only be selected as join keys on Tuple DataSets.</b><br>
*
* The resulting {@link JoinFunctionAssigner} needs to be finished by providing a
* {@link JoinFunction} by calling {@link JoinFunctionAssigner#with(JoinFunction)}
*
* @param fields The indexes of the Tuple fields of the second join DataSet that should be used as keys.
* @return A JoinFunctionAssigner.
*/
public JoinFunctionAssigner<I1, I2> equalTo(int... fields) {
return createJoinFunctionAssigner(new Keys.ExpressionKeys<>(fields, input2.getType()));
}
/**
* Continues a Join transformation and defines the fields of the second join
* {@link DataSet} that should be used as join keys.<br>
*
* The resulting {@link JoinFunctionAssigner} needs to be finished by providing a
* {@link JoinFunction} by calling {@link JoinFunctionAssigner#with(JoinFunction)}
*
* @param fields The fields of the second join DataSet that should be used as keys.
* @return A JoinFunctionAssigner.
*/
public JoinFunctionAssigner<I1, I2> equalTo(String... fields) {
return createJoinFunctionAssigner(new Keys.ExpressionKeys<>(fields, input2.getType()));
}
/**
* Continues a Join transformation and defines a {@link KeySelector} function for the second join {@link DataSet}.<br>
* The KeySelector function is called for each element of the second DataSet and extracts a single
* key value on which the DataSet is joined. <br>
*
* The resulting {@link JoinFunctionAssigner} needs to be finished by providing a
* {@link JoinFunction} by calling {@link JoinFunctionAssigner#with(JoinFunction)}
*
* @param keySelector The KeySelector function which extracts the key values from the second DataSet on which it is joined.
* @return A JoinFunctionAssigner.
*/
public <K> JoinFunctionAssigner<I1, I2> equalTo(KeySelector<I2, K> keySelector) {
TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keySelector, input2.getType());
return createJoinFunctionAssigner(new Keys.SelectorFunctionKeys<>(keySelector, input2.getType(), keyType));
}
protected JoinFunctionAssigner<I1, I2> createJoinFunctionAssigner(Keys<I2> keys2) {
DefaultJoin<I1, I2> join = createDefaultJoin(keys2);
return new DefaultJoinFunctionAssigner(join);
}
protected DefaultJoin<I1, I2> createDefaultJoin(Keys<I2> keys2) {
if (keys2 == null) {
throw new NullPointerException("The join keys may not be null.");
}
if (keys2.isEmpty()) {
throw new InvalidProgramException("The join keys may not be empty.");
}
try {
keys1.areCompatible(keys2);
} catch (Keys.IncompatibleKeysException e) {
throw new InvalidProgramException("The pair of join keys are not compatible with each other.",e);
}
return new DefaultJoin<>(input1, input2, keys1, keys2, joinHint, Utils.getCallLocationName(4), joinType);
}
private class DefaultJoinFunctionAssigner implements JoinFunctionAssigner<I1, I2> {
private final DefaultJoin<I1, I2> defaultJoin;
public DefaultJoinFunctionAssigner(DefaultJoin<I1, I2> defaultJoin) {
this.defaultJoin = defaultJoin;
}
public <R> EquiJoin<I1, I2, R> with(JoinFunction<I1, I2, R> joinFunction) {
return defaultJoin.with(joinFunction);
}
public <R> EquiJoin<I1, I2, R> with(FlatJoinFunction<I1, I2, R> joinFunction) {
return defaultJoin.with(joinFunction);
}
}
}
}