package com.aliyun.odps.data;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.aliyun.odps.OdpsType;
import com.aliyun.odps.type.ArrayTypeInfo;
import com.aliyun.odps.type.CharTypeInfo;
import com.aliyun.odps.type.DecimalTypeInfo;
import com.aliyun.odps.type.MapTypeInfo;
import com.aliyun.odps.type.StructTypeInfo;
import com.aliyun.odps.type.TypeInfo;
import com.aliyun.odps.type.VarcharTypeInfo;
/**
* Created by zhenhong.gzh on 16/12/13.
*/
public class OdpsTypeTransformer {
private static final int STRING_MAX_LENGTH = 8 * 1024 * 1024;
// 9999-12-31 23:59:59
private static final long DATETIME_MAX_TICKS = 253402271999000L;
// 0001-01-01 00:00:00
private static final long DATETIME_MIN_TICKS = -62135798400000L;
private static Map<OdpsType, Class> ODPS_TYPE_MAPPER = new HashMap<OdpsType, Class>() ;
OdpsTypeTransformer() {
}
static {
ODPS_TYPE_MAPPER.put(OdpsType.BIGINT, Long.class);
ODPS_TYPE_MAPPER.put(OdpsType.STRING, String.class);
ODPS_TYPE_MAPPER.put(OdpsType.DATETIME, java.util.Date.class);
ODPS_TYPE_MAPPER.put(OdpsType.DOUBLE, Double.class);
ODPS_TYPE_MAPPER.put(OdpsType.BOOLEAN, Boolean.class);
ODPS_TYPE_MAPPER.put(OdpsType.DECIMAL, BigDecimal.class);
ODPS_TYPE_MAPPER.put(OdpsType.ARRAY, List.class);
ODPS_TYPE_MAPPER.put(OdpsType.MAP, Map.class);
ODPS_TYPE_MAPPER.put(OdpsType.STRUCT, Struct.class);
ODPS_TYPE_MAPPER.put(OdpsType.INT, Integer.class);
ODPS_TYPE_MAPPER.put(OdpsType.TINYINT, Byte.class);
ODPS_TYPE_MAPPER.put(OdpsType.SMALLINT, Short.class);
ODPS_TYPE_MAPPER.put(OdpsType.DATE, java.sql.Date.class);
ODPS_TYPE_MAPPER.put(OdpsType.TIMESTAMP, java.sql.Timestamp.class);
ODPS_TYPE_MAPPER.put(OdpsType.FLOAT, Float.class);
ODPS_TYPE_MAPPER.put(OdpsType.CHAR, Char.class);
ODPS_TYPE_MAPPER.put(OdpsType.BINARY, Binary.class);
ODPS_TYPE_MAPPER.put(OdpsType.VARCHAR, Varchar.class);
ODPS_TYPE_MAPPER.put(OdpsType.INTERVAL_YEAR_MONTH, IntervalYearMonth.class);
ODPS_TYPE_MAPPER.put(OdpsType.INTERVAL_DAY_TIME, IntervalDayTime.class);
}
public static Class odpsTypeToJavaType(OdpsType type) {
if (ODPS_TYPE_MAPPER.containsKey(type)) {
return ODPS_TYPE_MAPPER.get(type);
}
throw new IllegalArgumentException("Cannot get Java type for Odps type: " + type);
}
private static void validateString(String value) {
try {
if ((value.length() * 6 > STRING_MAX_LENGTH) && (value.getBytes("utf-8").length
> STRING_MAX_LENGTH)) {
throw new IllegalArgumentException("InvalidData: The string's length is more than "
+ STRING_MAX_LENGTH / 1024 / 1024 + "M.");
}
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException(e.getMessage(), e);
}
}
private static void validateChar(Char value, CharTypeInfo typeInfo) {
if (value.length() > typeInfo.getLength()) {
throw new IllegalArgumentException(String.format(
"InvalidData: %s data is overflow, pls check data length: %s.", typeInfo.getTypeName(),
(value).length()));
}
}
private static void validateVarChar(Varchar value, VarcharTypeInfo typeInfo) {
if (value.length() > typeInfo.getLength()) {
throw new IllegalArgumentException(String.format(
"InvalidData: %s data is overflow, pls check data length: %s.", typeInfo.getTypeName(),
(value).length()));
}
}
private static void validateBigint(Long value) {
if (value == Long.MIN_VALUE) {
throw new IllegalArgumentException("InvalidData: Bigint out of range.");
}
}
private static void validateDateTime(java.util.Date value) {
if ((value.getTime() > DATETIME_MAX_TICKS || value.getTime() < DATETIME_MIN_TICKS)) {
throw new IllegalArgumentException("InvalidData: Datetime out of range.");
}
}
private static void validateDecimal(BigDecimal value, DecimalTypeInfo typeInfo) {
BigDecimal tmpValue = value.setScale(typeInfo.getScale(), RoundingMode.HALF_UP);
int intLength = tmpValue.precision() - tmpValue.scale();
if (intLength > (typeInfo.getPrecision() - typeInfo.getScale())) {
throw new IllegalArgumentException(
String.format("InvalidData: decimal value %s overflow, max integer digit number is %s.",
value, (typeInfo.getPrecision() - typeInfo.getScale())));
}
}
private static List transformArray(List value, ArrayTypeInfo typeInfo) {
List<Object> newList = new ArrayList<Object>(value.size());
TypeInfo elementTypeInfo = typeInfo.getElementTypeInfo();
for (Object obj : value) {
newList.add(transform(obj, elementTypeInfo));
}
return newList;
}
private static Map transformMap(Map value, MapTypeInfo typeInfo) {
TypeInfo keyTypeInfo = typeInfo.getKeyTypeInfo();
TypeInfo valTypeInfo = typeInfo.getValueTypeInfo();
Map newMap = new HashMap(value.size(), 1.0f);
Iterator iter = value.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry entry = (Map.Entry) iter.next();
Object entryKey = transform(
entry.getKey(), keyTypeInfo);
Object entryValue = transform(
entry.getValue(), valTypeInfo);
newMap.put(entryKey, entryValue);
}
return newMap;
}
private static Struct transformStruct(Struct value, StructTypeInfo typeInfo) {
List<Object> elements = new ArrayList<Object>();
for (int i = 0; i < typeInfo.getFieldCount(); ++i) {
TypeInfo fieldTypeInfo = value.getFieldTypeInfo(i);
elements.add(transform(value.getFieldValue(i), fieldTypeInfo));
}
return new SimpleStruct(typeInfo, elements);
}
static Object transform(Object value, TypeInfo typeInfo) {
if (value == null) {
return null;
}
switch (typeInfo.getOdpsType()) {
case STRING:
// allow byte [] to set on STRING column, ugly
if (value instanceof byte []) {
value = ArrayRecord.bytesToString((byte []) value);
}
validateString((String) value);
break;
case BIGINT:
validateBigint((Long) value);
break;
case DATETIME:
validateDateTime((java.util.Date) value);
break;
case DECIMAL:
validateDecimal((BigDecimal) value, (DecimalTypeInfo) typeInfo);
break;
case CHAR:
validateChar((Char) value, (CharTypeInfo) typeInfo);
break;
case VARCHAR:
validateVarChar((Varchar) value, (VarcharTypeInfo) typeInfo);
break;
case ARRAY:
return transformArray((List) value, (ArrayTypeInfo) typeInfo);
case MAP:
return transformMap((Map) value, (MapTypeInfo) typeInfo);
case STRUCT:
return transformStruct((Struct) value, (StructTypeInfo) typeInfo);
}
return odpsTypeToJavaType(typeInfo.getOdpsType()).cast(value);
}
}