/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.java.tuple;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Scanner;
import com.google.common.io.Files;
/**
* Source code generator for tuple classes and classes which depend on the arity of tuples.
*/
class TupleGenerator {
// Parameters for tuple classes
private static final String ROOT_DIRECTORY = "./flink-java/src/main/java";
private static final String GEN_TYPE_PREFIX = "T";
// Parameters for tuple-dependent classes
private static final String BEGIN_INDICATOR = "BEGIN_OF_TUPLE_DEPENDENT_CODE";
private static final String END_INDICATOR = "END_OF_TUPLE_DEPENDENT_CODE";
// Parameters for CsvReader
private static final String CSV_READER_PACKAGE = "org.apache.flink.api.java.io";
private static final String CSV_READER_CLASSNAME = "CsvReader";
// Parameters for ProjectOperator
private static final String PROJECT_OPERATOR_PACKAGE = "org.apache.flink.api.java.operators";
private static final String PROJECT_OPERATOR_CLASSNAME = "ProjectOperator";
// Parameters for JoinOperator
private static final String JOIN_OPERATOR_PACKAGE = "org.apache.flink.api.java.operators";
private static final String JOIN_OPERATOR_CLASSNAME = "JoinOperator";
// parameters for CrossOperator
private static final String CROSS_OPERATOR_PACKAGE = "org.apache.flink.api.java.operators";
private static final String CROSS_OPERATOR_CLASSNAME = "CrossOperator";
// min. and max. tuple arity
private static final int FIRST = 1;
private static final int LAST = 25;
public static void main(String[] args) throws Exception {
System.err.println("Current directory "+System.getProperty("user.dir"));
String rootDir = ROOT_DIRECTORY;
if(args.length > 0) {
rootDir = args[0] + "/" + ROOT_DIRECTORY;
}
System.err.println("Using root directory: "+rootDir);
File root = new File(rootDir);
modifyCsvReader(root);
modifyProjectOperator(root);
modifyJoinProjectOperator(root);
modifyCrossProjectOperator(root);
}
private static File getPackage(File root, String packageString) {
File dir = new File(root, packageString.replace('.', '/'));
if (!dir.exists() && dir.isDirectory()) {
System.err.println("None existent directory: " + dir.getAbsolutePath());
System.exit(1);
}
return dir;
}
private static void insertCodeIntoFile(String code, File file) throws IOException {
String fileContent = Files.toString(file, StandardCharsets.UTF_8);
try (Scanner s = new Scanner(fileContent)) {
StringBuilder sb = new StringBuilder();
String line;
boolean indicatorFound = false;
// add file beginning
while (s.hasNextLine() && (line = s.nextLine()) != null) {
sb.append(line).append("\n");
if (line.contains(BEGIN_INDICATOR)) {
indicatorFound = true;
break;
}
}
if(!indicatorFound) {
System.out.println("No indicator found in '" + file + "'. Will skip code generation.");
s.close();
return;
}
// add generator signature
sb.append("\t// GENERATED FROM ").append(TupleGenerator.class.getName()).append(".\n");
// add tuple dependent code
sb.append(code).append("\n");
// skip generated code
while (s.hasNextLine() && (line = s.nextLine()) != null) {
if (line.contains(END_INDICATOR)) {
sb.append(line).append("\n");
break;
}
}
// add file ending
while (s.hasNextLine() && (line = s.nextLine()) != null) {
sb.append(line).append("\n");
}
s.close();
Files.write(sb.toString(), file, StandardCharsets.UTF_8);
}
}
private static void modifyCrossProjectOperator(File root) throws IOException {
// generate code
StringBuilder sb = new StringBuilder();
// method begin
sb.append("\n");
// method comment
sb.append("\t\t/**\n");
sb.append("\t\t * Chooses a projectTupleX according to the length of\n");
sb.append("\t\t * {@link org.apache.flink.api.java.operators.CrossOperator.CrossProjection#fieldIndexes} \n");
sb.append("\t\t * \n");
sb.append("\t\t * @return The projected DataSet.\n");
sb.append("\t\t */\n");
// method signature
sb.append("\t\t@SuppressWarnings(\"unchecked\")\n");
sb.append("\t\tpublic <OUT extends Tuple> ProjectCross<I1, I2, OUT> projectTupleX() {\n");
sb.append("\t\t\tProjectCross<I1, I2, OUT> projectionCross = null;\n\n");
sb.append("\t\t\tswitch (fieldIndexes.length) {\n");
for (int numFields = FIRST; numFields <= LAST; numFields++) {
sb.append("\t\t\tcase " + numFields +":" + " projectionCross = (ProjectCross<I1, I2, OUT>) projectTuple"+numFields+"(); break;\n");
}
sb.append("\t\t\tdefault: throw new IllegalStateException(\"Excessive arity in tuple.\");\n");
sb.append("\t\t\t}\n\n");
sb.append("\t\t\treturn projectionCross;\n");
// method end
sb.append("\t\t}\n");
for (int numFields = FIRST; numFields <= LAST; numFields++) {
// method begin
sb.append("\n");
// method comment
sb.append("\t\t/**\n");
sb.append("\t\t * Projects a pair of crossed elements to a {@link Tuple} with the previously selected fields. \n");
sb.append("\t\t * \n");
sb.append("\t\t * @return The projected data set.\n");
sb.append("\t\t * \n");
sb.append("\t\t * @see Tuple\n");
sb.append("\t\t * @see DataSet\n");
sb.append("\t\t */\n");
// method signature
sb.append("\t\tpublic <");
appendTupleTypeGenerics(sb, numFields);
sb.append("> ProjectCross<I1, I2, Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> projectTuple"+numFields+"(");
sb.append(") {\n");
// extract field types
sb.append("\t\t\tTypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes);\n");
// create new tuple type info
sb.append("\t\t\tTupleTypeInfo<Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> tType = new TupleTypeInfo<Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(fTypes);\n\n");
// create and return new project operator
sb.append("\t\t\treturn new ProjectCross<I1, I2, Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(this.ds1, this.ds2, this.fieldIndexes, this.isFieldInFirst, tType, this, hint);\n");
// method end
sb.append("\t\t}\n");
}
// insert code into file
File dir = getPackage(root, CROSS_OPERATOR_PACKAGE);
File projectOperatorClass = new File(dir, CROSS_OPERATOR_CLASSNAME + ".java");
insertCodeIntoFile(sb.toString(), projectOperatorClass);
}
private static void modifyProjectOperator(File root) throws IOException {
// generate code
StringBuilder sb = new StringBuilder();
// method begin
sb.append("\n");
// method comment
sb.append("\t\t/**\n");
sb.append("\t\t * Chooses a projectTupleX according to the length of\n");
sb.append("\t\t * {@link org.apache.flink.api.java.operators.ProjectOperator.Projection#fieldIndexes} \n");
sb.append("\t\t * \n");
sb.append("\t\t * @return The projected DataSet.\n");
sb.append("\t\t * \n");
sb.append("\t\t * @see org.apache.flink.api.java.operators.ProjectOperator.Projection\n");
sb.append("\t\t */\n");
// method signature
sb.append("\t\t@SuppressWarnings(\"unchecked\")\n");
sb.append("\t\tpublic <OUT extends Tuple> ProjectOperator<T, OUT> projectTupleX() {\n");
sb.append("\t\t\tProjectOperator<T, OUT> projOperator;\n\n");
sb.append("\t\t\tswitch (fieldIndexes.length) {\n");
for (int numFields = FIRST; numFields <= LAST; numFields++) {
sb.append("\t\t\tcase " + numFields +":" + " projOperator = (ProjectOperator<T, OUT>) projectTuple"+numFields+"(); break;\n");
}
sb.append("\t\t\tdefault: throw new IllegalStateException(\"Excessive arity in tuple.\");\n");
sb.append("\t\t\t}\n\n");
sb.append("\t\t\treturn projOperator;\n");
// method end
sb.append("\t\t}\n");
for (int numFields = FIRST; numFields <= LAST; numFields++) {
// method begin
sb.append("\n");
// method comment
sb.append("\t\t/**\n");
sb.append("\t\t * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. \n");
sb.append("\t\t * \n");
sb.append("\t\t * @return The projected DataSet.\n");
sb.append("\t\t * \n");
sb.append("\t\t * @see Tuple\n");
sb.append("\t\t * @see DataSet\n");
sb.append("\t\t */\n");
// method signature
sb.append("\t\tpublic <");
appendTupleTypeGenerics(sb, numFields);
sb.append("> ProjectOperator<T, Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> projectTuple"+numFields+"(");
sb.append(") {\n");
// extract field types
sb.append("\t\t\tTypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType());\n");
// create new tuple type info
sb.append("\t\t\tTupleTypeInfo<Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> tType = new TupleTypeInfo<Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(fTypes);\n\n");
// create and return new project operator
sb.append("\t\t\treturn new ProjectOperator<T, Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(this.ds, this.fieldIndexes, tType);\n");
// method end
sb.append("\t\t}\n");
}
// insert code into file
File dir = getPackage(root, PROJECT_OPERATOR_PACKAGE);
File projectOperatorClass = new File(dir, PROJECT_OPERATOR_CLASSNAME + ".java");
insertCodeIntoFile(sb.toString(), projectOperatorClass);
}
private static void modifyJoinProjectOperator(File root) throws IOException {
// generate code
StringBuilder sb = new StringBuilder();
// method begin
sb.append("\n");
// method comment
sb.append("\t\t/**\n");
sb.append("\t\t * Chooses a projectTupleX according to the length of\n");
sb.append("\t\t * {@link org.apache.flink.api.java.operators.JoinOperator.JoinProjection#fieldIndexes}\n");
sb.append("\t\t * \n");
sb.append("\t\t * @return The projected DataSet.\n");
sb.append("\t\t * \n");
sb.append("\t\t * @see org.apache.flink.api.java.operators.JoinOperator.ProjectJoin\n");
sb.append("\t\t */\n");
// method signature
sb.append("\t\t@SuppressWarnings(\"unchecked\")\n");
sb.append("\t\tpublic <OUT extends Tuple> ProjectJoin<I1, I2, OUT> projectTupleX() {\n");
sb.append("\t\t\tProjectJoin<I1, I2, OUT> projectJoin = null;\n\n");
sb.append("\t\t\tswitch (fieldIndexes.length) {\n");
for (int numFields = FIRST; numFields <= LAST; numFields++) {
sb.append("\t\t\tcase " + numFields +":" + " projectJoin = (ProjectJoin<I1, I2, OUT>) projectTuple"+numFields+"(); break;\n");
}
sb.append("\t\t\tdefault: throw new IllegalStateException(\"Excessive arity in tuple.\");\n");
sb.append("\t\t\t}\n\n");
sb.append("\t\t\treturn projectJoin;\n");
// method end
sb.append("\t\t}\n");
for (int numFields = FIRST; numFields <= LAST; numFields++) {
// method begin
sb.append("\n");
// method comment
sb.append("\t\t/**\n");
sb.append("\t\t * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. \n");
sb.append("\t\t * Requires the classes of the fields of the resulting tuples. \n");
sb.append("\t\t * \n");
sb.append("\t\t * @return The projected data set.\n");
sb.append("\t\t * \n");
sb.append("\t\t * @see Tuple\n");
sb.append("\t\t * @see DataSet\n");
sb.append("\t\t */\n");
// method signature
sb.append("\t\tpublic <");
appendTupleTypeGenerics(sb, numFields);
sb.append("> ProjectJoin<I1, I2, Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> projectTuple"+numFields+"(");
sb.append(") {\n");
// extract field types
sb.append("\t\t\tTypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes);\n");
// create new tuple type info
sb.append("\t\t\tTupleTypeInfo<Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> tType = new TupleTypeInfo<Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(fTypes);\n\n");
// create and return new project operator
sb.append("\t\t\treturn new ProjectJoin<I1, I2, Tuple"+numFields+"<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType, this);\n");
// method end
sb.append("\t\t}\n");
}
// insert code into file
File dir = getPackage(root, JOIN_OPERATOR_PACKAGE);
File projectOperatorClass = new File(dir, JOIN_OPERATOR_CLASSNAME + ".java");
insertCodeIntoFile(sb.toString(), projectOperatorClass);
}
private static void modifyCsvReader(File root) throws IOException {
// generate code
StringBuilder sb = new StringBuilder(1000);
for (int numFields = FIRST; numFields <= LAST; numFields++) {
// method begin
sb.append("\n");
// java doc
sb.append("\t/**\n");
sb.append("\t * Specifies the types for the CSV fields. This method parses the CSV data to a ").append(numFields).append("-tuple\n");
sb.append("\t * which has fields of the specified types.\n");
sb.append("\t * This method is overloaded for each possible length of the tuples to support type safe\n");
sb.append("\t * creation of data sets through CSV parsing.\n");
sb.append("\t *\n");
for (int pos = 0; pos < numFields; pos++) {
sb.append("\t * @param type").append(pos);
sb.append(" The type of CSV field ").append(pos).append(" and the type of field ");
sb.append(pos).append(" in the returned tuple type.\n");
}
sb.append("\t * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.\n");
sb.append("\t */\n");
// method signature
sb.append("\tpublic <");
appendTupleTypeGenerics(sb, numFields);
sb.append("> DataSource<Tuple" + numFields + "<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> types(");
for (int i = 0; i < numFields; i++) {
if (i > 0) {
sb.append(", ");
}
sb.append("Class<");
sb.append(GEN_TYPE_PREFIX + i);
sb.append("> type" + i);
}
sb.append(") {\n");
// get TupleTypeInfo
sb.append("\t\tTupleTypeInfo<Tuple" + numFields + "<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(");
for (int i = 0; i < numFields; i++) {
if (i > 0) {
sb.append(", ");
}
sb.append("type" + i);
}
sb.append(");\n");
// create csv input format
sb.append("\t\tCsvInputFormat<Tuple" + numFields + "<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">> inputFormat = new TupleCsvInputFormat<Tuple" + numFields + "<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(path, types, this.includedMask);\n");
// configure input format
sb.append("\t\tconfigureInputFormat(inputFormat);\n");
// return
sb.append("\t\treturn new DataSource<Tuple" + numFields + "<");
appendTupleTypeGenerics(sb, numFields);
sb.append(">>(executionContext, inputFormat, types, Utils.getCallLocationName());\n");
// end of method
sb.append("\t}\n");
}
// insert code into file
File dir = getPackage(root, CSV_READER_PACKAGE);
File csvReaderClass = new File(dir, CSV_READER_CLASSNAME + ".java");
insertCodeIntoFile(sb.toString(), csvReaderClass);
}
private static void appendTupleTypeGenerics(StringBuilder sb, int numFields) {
for (int i = 0; i < numFields; i++) {
if (i > 0) {
sb.append(", ");
}
sb.append(GEN_TYPE_PREFIX + i);
}
}
private static String HEADER =
"/*\n"
+ " * Licensed to the Apache Software Foundation (ASF) under one\n"
+ " * or more contributor license agreements. See the NOTICE file\n"
+ " * distributed with this work for additional information\n"
+ " * regarding copyright ownership. The ASF licenses this file\n"
+ " * to you under the Apache License, Version 2.0 (the\n"
+ " * \"License\"); you may not use this file except in compliance\n"
+ " * with the License. You may obtain a copy of the License at\n"
+ " *\n"
+ " * http://www.apache.org/licenses/LICENSE-2.0\n"
+ " *\n"
+ " * Unless required by applicable law or agreed to in writing, software\n"
+ " * distributed under the License is distributed on an \"AS IS\" BASIS,\n"
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n"
+ " * See the License for the specific language governing permissions and\n"
+ " * limitations under the License.\n"
+ " */" +
"\n" +
"\n" +
"\n" +
"// --------------------------------------------------------------\n" +
"// THIS IS A GENERATED SOURCE FILE. DO NOT EDIT!\n" +
"// GENERATED FROM " + TupleGenerator.class.getName() + ".\n" +
"// --------------------------------------------------------------\n\n\n";
}