/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.mapred.utils;
import com.aliyun.odps.Column;
import com.aliyun.odps.OdpsType;
import java.util.ArrayList;
import java.util.Arrays;
/**
* 表达MapReduce任务行属性的工具类
*/
public class SchemaUtils {
private static final String SEPERATOR = ",";
private static final String DELIMITER = ":";
/**
* 从字符串描述解析行属性。其中行属性的分隔符是',',字段分隔符为':'。例如,"word:string,count:bigint"表示两列的一行,
* 其中第一列名称为word,类型为string;第二列名称为count,类型为bigint。
*
* @param str
* 字符串描述
* @return 行属性
* @see #toString()
*/
public static Column[] fromString(String str) {
if (str == null || str.isEmpty()) {
return new Column[0];
}
String remain = str;
ArrayList<Column> cols = new ArrayList<Column>();
int pos;
while (remain.length() > 0) {
pos = remain.indexOf(SEPERATOR);
if (pos < 0) {
// Last one
pos = remain.length();
}
String tok = remain.substring(0, pos);
String[] knv = tok.split(DELIMITER, 2);
if (knv.length != 2) {
throw new IllegalArgumentException(
"Malformed schema definition, expecting \"name:type\" but was \"" + tok + "\"");
}
if (knv[1].toUpperCase().startsWith("MAP")) {
// We need to find the next SEPARATOR
pos = remain.indexOf(SEPERATOR, pos + 1);
if (pos < 0) {
pos = remain.length();
}
tok = remain.substring(0, pos);
// Re-split
knv = tok.split(DELIMITER, 2);
}
if (pos == remain.length()) {
remain = "";
} else {
remain = remain.substring(pos + 1);
}
cols.add(getColumn(knv[0], knv[1]));
}
return cols.toArray(new Column[]{});
}
/**
* 行属性序列化为描述字符串
*
* @param cols
* 行属性
* @return 描述字符串
* @see #fromString(String)
*/
public static String toString(Column[] cols) {
if (cols == null) {
return "";
}
StringBuilder sb = new StringBuilder();
for (Column c : cols) {
if (c == null) {
continue;
}
if (sb.length() > 0) {
sb.append(SEPERATOR);
}
sb.append(c.getName()).append(DELIMITER).append(getOdpsTypeString(c));
}
return sb.toString();
}
/**
* 获取行属性的名称数组
*
* @param cols
* 行属性
* @return 名称数组
*/
public static String[] getNames(Column[] cols) {
String[] names = new String[cols.length];
for (int i = 0; i < cols.length; i++) {
names[i] = cols[i].getName();
}
return names;
}
/**
* 获取行属性的类型数组
*
* @param cols
* 行属性
* @return 类型数组
*/
public static OdpsType[] getTypes(Column[] cols) {
OdpsType[] types = new OdpsType[cols.length];
for (int i = 0; i < cols.length; i++) {
types[i] = cols[i].getType();
}
return types;
}
private static String getOdpsTypeString(Column tp) {
StringBuilder sb = new StringBuilder();
if (tp.getType() == OdpsType.ARRAY) {
sb.append("ARRAY<");
sb.append(tp.getGenericTypeList().get(0).toString());
sb.append(">");
} else if (tp.getType() == OdpsType.MAP) {
sb.append("MAP<");
sb.append(tp.getGenericTypeList().get(0).toString());
sb.append(",");
sb.append(tp.getGenericTypeList().get(1).toString());
sb.append(">");
} else {
sb.append(tp.getType().toString());
}
return sb.toString();
}
private static Column getColumn(String name, String typeStr) {
name = name.trim();
typeStr = typeStr.toUpperCase().trim();
if (typeStr.startsWith("ARRAY<")) {
String remain = typeStr.substring(6);
if (remain.length() < 2 || remain.charAt(remain.length() -1) != '>') {
throw new IllegalArgumentException("Malformed schema, not a valid array type: " + typeStr);
}
OdpsType elementType = OdpsType.valueOf(remain.substring(0, remain.length()-1));
Column col = new Column(name, OdpsType.ARRAY);
col.setGenericTypeList(Arrays.asList(elementType));
return col;
} else if (typeStr.startsWith("MAP<")) {
String remain = typeStr.substring(4);
if (remain.length() < 4 || remain.charAt(remain.length() - 1) != '>') {
throw new IllegalArgumentException("Malformed schema, not a valid map type: " + typeStr);
}
remain = remain.substring(0, remain.length() -1);
String[] knv = remain.split(",", 2);
if (knv.length != 2) {
throw new IllegalArgumentException(
"Malformed schema , not a valid map type: " + typeStr);
}
Column col = new Column(name, OdpsType.MAP);
col.setGenericTypeList(Arrays.asList(OdpsType.valueOf(knv[0]), OdpsType.valueOf(knv[1])));
return col;
} else {
return new Column(name, OdpsType.valueOf(typeStr));
}
}
}