/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.java.typeutils;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.types.Value;
@Public
public class TypeInfoParser {
private static final String TUPLE_PACKAGE = "org.apache.flink.api.java.tuple";
private static final String VALUE_PACKAGE = "org.apache.flink.types";
private static final String WRITABLE_PACKAGE = "org.apache.hadoop.io";
private static final Pattern tuplePattern = Pattern.compile("^(" + TUPLE_PACKAGE.replaceAll("\\.", "\\\\.") + "\\.)?((Tuple[1-9][0-9]?)<|(Tuple0))");
private static final Pattern writablePattern = Pattern.compile("^((" + WRITABLE_PACKAGE.replaceAll("\\.", "\\\\.") + "\\.)?Writable)<([^\\s,>]*)(,|>|$|\\[)");
private static final Pattern enumPattern = Pattern.compile("^((java\\.lang\\.)?Enum)<([^\\s,>]*)(,|>|$|\\[)");
private static final Pattern basicTypePattern = Pattern
.compile("^((java\\.lang\\.)?(String|Integer|Byte|Short|Character|Double|Float|Long|Boolean|Void))(,|>|$|\\[)");
private static final Pattern basicTypeDatePattern = Pattern.compile("^((java\\.util\\.)?Date)(,|>|$|\\[)");
private static final Pattern basicTypeBigIntPattern = Pattern.compile("^((java\\.math\\.)?BigInteger)(,|>|$|\\[)");
private static final Pattern basicTypeBigDecPattern = Pattern.compile("^((java\\.math\\.)?BigDecimal)(,|>|$|\\[)");
private static final Pattern primitiveTypePattern = Pattern.compile("^(int|byte|short|char|double|float|long|boolean|void)(,|>|$|\\[)");
private static final Pattern valueTypePattern = Pattern.compile("^((" + VALUE_PACKAGE.replaceAll("\\.", "\\\\.")
+ "\\.)?(String|Int|Byte|Short|Char|Double|Float|Long|Boolean|List|Map|Null))Value(,|>|$|\\[)");
private static final Pattern pojoGenericObjectPattern = Pattern.compile("^([^\\s,<>\\[]+)(<)?");
private static final Pattern fieldPattern = Pattern.compile("^([^\\s,<>\\[]+)=");
/**
* Generates an instance of <code>TypeInformation</code> by parsing a type
* information string. A type information string can contain the following
* types:
*
* <ul>
* <li>Basic types such as <code>Integer</code>, <code>String</code>, etc.
* <li>Basic type arrays such as <code>Integer[]</code>,
* <code>String[]</code>, etc.
* <li>Tuple types such as <code>Tuple1<TYPE0></code>,
* <code>Tuple2<TYPE0, TYPE1></code>, etc.</li>
* <li>Pojo types such as <code>org.my.MyPojo<myFieldName=TYPE0,myFieldName2=TYPE1></code>, etc.</li>
* <li>Generic types such as <code>java.lang.Class</code>, etc.
* <li>Custom type arrays such as <code>org.my.CustomClass[]</code>,
* <code>org.my.CustomClass$StaticInnerClass[]</code>, etc.
* <li>Value types such as <code>DoubleValue</code>,
* <code>StringValue</code>, <code>IntegerValue</code>, etc.</li>
* <li>Tuple array types such as <code>Tuple2<TYPE0,TYPE1>[], etc.</code></li>
* <li>Writable types such as <code>Writable<org.my.CustomWritable></code></li>
* <li>Enum types such as <code>Enum<org.my.CustomEnum></code></li>
* </ul>
*
* Example:
* <code>"Tuple2<String,Tuple2<Integer,org.my.MyJob$Pojo<word=String>>>"</code>
*
* @param infoString
* type information string to be parsed
* @return <code>TypeInformation</code> representation of the string
*/
@SuppressWarnings("unchecked")
public static <X> TypeInformation<X> parse(String infoString) {
try {
if (infoString == null) {
throw new IllegalArgumentException("String is null.");
}
String clearedString = infoString.replaceAll("\\s", "");
if (clearedString.length() == 0) {
throw new IllegalArgumentException("String must not be empty.");
}
StringBuilder sb = new StringBuilder(clearedString);
TypeInformation<X> ti = (TypeInformation<X>) parse(sb);
if (sb.length() > 0) {
throw new IllegalArgumentException("String could not be parsed completely.");
}
return ti;
} catch (Exception e) {
throw new IllegalArgumentException("String could not be parsed: " + e.getMessage(), e);
}
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private static TypeInformation<?> parse(StringBuilder sb) throws ClassNotFoundException {
String infoString = sb.toString();
final Matcher tupleMatcher = tuplePattern.matcher(infoString);
final Matcher writableMatcher = writablePattern.matcher(infoString);
final Matcher enumMatcher = enumPattern.matcher(infoString);
final Matcher basicTypeMatcher = basicTypePattern.matcher(infoString);
final Matcher basicTypeDateMatcher = basicTypeDatePattern.matcher(infoString);
final Matcher basicTypeBigIntMatcher = basicTypeBigIntPattern.matcher(infoString);
final Matcher basicTypeBigDecMatcher = basicTypeBigDecPattern.matcher(infoString);
final Matcher primitiveTypeMatcher = primitiveTypePattern.matcher(infoString);
final Matcher valueTypeMatcher = valueTypePattern.matcher(infoString);
final Matcher pojoGenericMatcher = pojoGenericObjectPattern.matcher(infoString);
if (infoString.length() == 0) {
return null;
}
TypeInformation<?> returnType = null;
boolean isPrimitiveType = false;
// tuples
if (tupleMatcher.find()) {
boolean isGenericTuple = true;
String className = tupleMatcher.group(3);
if(className == null) { // matched Tuple0
isGenericTuple = false;
className = tupleMatcher.group(2);
sb.delete(0, className.length());
} else {
sb.delete(0, className.length() + 1); // +1 for "<"
}
if (infoString.startsWith(TUPLE_PACKAGE)) {
sb.delete(0, TUPLE_PACKAGE.length() + 1); // +1 for trailing "."
}
int arity = Integer.parseInt(className.replaceAll("\\D", ""));
Class<?> clazz = loadClass(TUPLE_PACKAGE + "." + className);
TypeInformation<?>[] types = new TypeInformation<?>[arity];
for (int i = 0; i < arity; i++) {
types[i] = parse(sb);
if (types[i] == null) {
throw new IllegalArgumentException("Tuple arity does not match given parameters.");
}
}
if (isGenericTuple) {
if(sb.charAt(0) != '>') {
throw new IllegalArgumentException("Tuple arity does not match given parameters.");
}
// remove '>'
sb.deleteCharAt(0);
}
returnType = new TupleTypeInfo(clazz, types);
}
// writable types
else if (writableMatcher.find()) {
String className = writableMatcher.group(1);
String fullyQualifiedName = writableMatcher.group(3);
sb.delete(0, className.length() + 1 + fullyQualifiedName.length() + 1);
Class<?> clazz = loadClass(fullyQualifiedName);
returnType = TypeExtractor.createHadoopWritableTypeInfo(clazz);
}
// enum types
else if (enumMatcher.find()) {
String className = enumMatcher.group(1);
String fullyQualifiedName = enumMatcher.group(3);
sb.delete(0, className.length() + 1 + fullyQualifiedName.length() + 1);
Class<?> clazz = loadClass(fullyQualifiedName);
returnType = new EnumTypeInfo(clazz);
}
// basic types
else if (basicTypeMatcher.find()) {
String className = basicTypeMatcher.group(1);
sb.delete(0, className.length());
Class<?> clazz;
// check if fully qualified
if (className.startsWith("java.lang")) {
clazz = loadClass(className);
} else {
clazz = loadClass("java.lang." + className);
}
returnType = BasicTypeInfo.getInfoFor(clazz);
}
// special basic type "Date"
else if (basicTypeDateMatcher.find()) {
String className = basicTypeDateMatcher.group(1);
sb.delete(0, className.length());
Class<?> clazz;
// check if fully qualified
if (className.startsWith("java.util")) {
clazz = loadClass(className);
} else {
clazz = loadClass("java.util." + className);
}
returnType = BasicTypeInfo.getInfoFor(clazz);
}
// special basic type "BigInteger"
else if (basicTypeBigIntMatcher.find()) {
String className = basicTypeBigIntMatcher.group(1);
sb.delete(0, className.length());
Class<?> clazz;
// check if fully qualified
if (className.startsWith("java.math")) {
clazz = loadClass(className);
} else {
clazz = loadClass("java.math." + className);
}
returnType = BasicTypeInfo.getInfoFor(clazz);
}
// special basic type "BigDecimal"
else if (basicTypeBigDecMatcher.find()) {
String className = basicTypeBigDecMatcher.group(1);
sb.delete(0, className.length());
Class<?> clazz;
// check if fully qualified
if (className.startsWith("java.math")) {
clazz = loadClass(className);
} else {
clazz = loadClass("java.math." + className);
}
returnType = BasicTypeInfo.getInfoFor(clazz);
}
// primitive types
else if (primitiveTypeMatcher.find()) {
String keyword = primitiveTypeMatcher.group(1);
sb.delete(0, keyword.length());
Class<?> clazz = null;
if (keyword.equals("int")) {
clazz = int.class;
} else if (keyword.equals("byte")) {
clazz = byte.class;
} else if (keyword.equals("short")) {
clazz = short.class;
} else if (keyword.equals("char")) {
clazz = char.class;
} else if (keyword.equals("double")) {
clazz = double.class;
} else if (keyword.equals("float")) {
clazz = float.class;
} else if (keyword.equals("long")) {
clazz = long.class;
} else if (keyword.equals("boolean")) {
clazz = boolean.class;
} else if (keyword.equals("void")) {
clazz = void.class;
}
returnType = BasicTypeInfo.getInfoFor(clazz);
isPrimitiveType = true;
}
// values
else if (valueTypeMatcher.find()) {
String className = valueTypeMatcher.group(1);
sb.delete(0, className.length() + 5);
Class<?> clazz;
// check if fully qualified
if (className.startsWith(VALUE_PACKAGE)) {
clazz = loadClass(className + "Value");
} else {
clazz = loadClass(VALUE_PACKAGE + "." + className + "Value");
}
returnType = ValueTypeInfo.getValueTypeInfo((Class<Value>) clazz);
}
// pojo objects or generic types
else if (pojoGenericMatcher.find()) {
String fullyQualifiedName = pojoGenericMatcher.group(1);
sb.delete(0, fullyQualifiedName.length());
boolean isPojo = pojoGenericMatcher.group(2) != null;
// pojo
if (isPojo) {
sb.deleteCharAt(0);
Class<?> clazz = loadClass(fullyQualifiedName);
ArrayList<PojoField> fields = new ArrayList<PojoField>();
while (sb.charAt(0) != '>') {
final Matcher fieldMatcher = fieldPattern.matcher(sb);
if (!fieldMatcher.find()) {
throw new IllegalArgumentException("Field name missing.");
}
String fieldName = fieldMatcher.group(1);
sb.delete(0, fieldName.length() + 1);
Field field = TypeExtractor.getDeclaredField(clazz, fieldName);
if (field == null) {
throw new IllegalArgumentException("Field '" + fieldName + "'could not be accessed.");
}
fields.add(new PojoField(field, parse(sb)));
}
sb.deleteCharAt(0); // remove '>'
returnType = new PojoTypeInfo(clazz, fields);
}
// generic type
else {
returnType = new GenericTypeInfo(loadClass(fullyQualifiedName));
}
}
if (returnType == null) {
throw new IllegalArgumentException("Error at '" + infoString + "'");
}
// arrays
int arrayDimensionCount = 0;
while (sb.length() > 1 && sb.charAt(0) == '[' && sb.charAt(1) == ']') {
arrayDimensionCount++;
sb.delete(0, 2);
}
if (sb.length() > 0 && sb.charAt(0) == '[') {
throw new IllegalArgumentException("Closing square bracket missing.");
}
// construct multidimension array
if (arrayDimensionCount > 0) {
TypeInformation<?> arrayInfo = null;
// first dimension
// primitive array
if (isPrimitiveType) {
if (returnType == BasicTypeInfo.INT_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.BYTE_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.SHORT_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.SHORT_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.CHAR_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.CHAR_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.DOUBLE_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.DOUBLE_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.FLOAT_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.FLOAT_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.LONG_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.LONG_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
arrayInfo = PrimitiveArrayTypeInfo.BOOLEAN_PRIMITIVE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.VOID_TYPE_INFO) {
throw new IllegalArgumentException("Can not create an array of void.");
}
}
// basic array
else if (returnType instanceof BasicTypeInfo
&& returnType != BasicTypeInfo.DATE_TYPE_INFO) {
if (returnType == BasicTypeInfo.INT_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.BYTE_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.BYTE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.SHORT_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.SHORT_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.CHAR_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.CHAR_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.DOUBLE_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.DOUBLE_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.FLOAT_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.FLOAT_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.LONG_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.LONG_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.BOOLEAN_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.BOOLEAN_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.STRING_TYPE_INFO) {
arrayInfo = BasicArrayTypeInfo.STRING_ARRAY_TYPE_INFO;
} else if (returnType == BasicTypeInfo.VOID_TYPE_INFO) {
throw new IllegalArgumentException("Can not create an array of void.");
}
}
// object array
else {
arrayInfo = ObjectArrayTypeInfo.getInfoFor(loadClass("[L" + returnType.getTypeClass().getName() + ";"),
returnType);
}
// further dimensions
if (arrayDimensionCount > 1) {
String arrayPrefix = "[";
for (int i = 1; i < arrayDimensionCount; i++) {
arrayPrefix += "[";
arrayInfo = ObjectArrayTypeInfo.getInfoFor(loadClass(arrayPrefix + "L" +
returnType.getTypeClass().getName() + ";"), arrayInfo);
}
}
returnType = arrayInfo;
}
// remove possible ','
if (sb.length() > 0 && sb.charAt(0) == ',') {
sb.deleteCharAt(0);
}
// check if end
return returnType;
}
private static Class<?> loadClass(String fullyQualifiedName) {
try {
return Class.forName(fullyQualifiedName);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Class '" + fullyQualifiedName
+ "' could not be found. Please note that inner classes must be declared static.");
}
}
}