/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.internal.io;
import co.cask.cdap.api.data.schema.Schema;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Parses a SQL like schema into a {@link Schema}.
*/
public final class SQLSchemaParser {
private String schema;
private int pos;
private int end;
// used so record names are unique
private int recordNum;
public SQLSchemaParser(String schema) {
// replace all whitespace with a single space
this.schema = schema.trim().toLowerCase();
this.pos = 0;
this.end = this.schema.length();
this.recordNum = 1;
}
// name type, name type, ...
public Schema parse() throws IOException {
try {
List<Schema.Field> fields = new ArrayList<>();
while (pos < end) {
String name = nextToken();
expectWhitespace("Expecting whitespace between column name and type");
skipWhitespace();
errorIf(pos >= end, "Unexpected EOF");
fields.add(Schema.Field.of(name, parseType()));
// stop if we're at the last field
if (pos >= end) {
break;
}
advancePastComma("Expected a comma separating schema columns");
}
return Schema.recordOf("rec", fields);
} catch (Exception e) {
if (e instanceof IOException) {
throw e;
}
// can happen if, for example, there are multiple fields in a record with the same name
throw new IOException(e);
}
}
private Schema parseType() throws IOException {
// null, boolean, int, long, float, double, bytes, or string
Schema type;
String typeStr = nextToken();
if ("boolean".equals(typeStr)) {
type = Schema.of(Schema.Type.BOOLEAN);
} else if ("int".equals(typeStr)) {
type = Schema.of(Schema.Type.INT);
} else if ("long".equals(typeStr)) {
type = Schema.of(Schema.Type.LONG);
} else if ("float".equals(typeStr)) {
type = Schema.of(Schema.Type.FLOAT);
} else if ("double".equals(typeStr)) {
type = Schema.of(Schema.Type.DOUBLE);
} else if ("bytes".equals(typeStr)) {
type = Schema.of(Schema.Type.BYTES);
} else if ("string".equals(typeStr)) {
type = Schema.of(Schema.Type.STRING);
} else if ("null".equals(typeStr)) {
type = Schema.of(Schema.Type.NULL);
} else if ("array".equals(typeStr)) {
type = parseArray();
} else if ("map".equals(typeStr)) {
type = parseMap();
} else if ("record".equals(typeStr)) {
type = parseRecord();
} else if ("union".equals(typeStr)) {
type = parseUnion();
} else {
throw new IOException("Unknown data type " + typeStr);
}
skipWhitespace();
if (schema.startsWith("not null", pos)) {
pos += 8;
return type;
} else {
return Schema.nullableOf(type);
}
}
// <col1:type1,col2:type2,...>
private Schema parseRecord() throws IOException {
expectChar('<', "record must be followed with a '<'");
skipWhitespace();
String recordName = "rec" + recordNum;
recordNum++;
List<Schema.Field> fields = new ArrayList<>();
// keep going until we get to the enclosing '>'
while (true) {
// colName:type
String colName = nextToken();
errorIf(schema.charAt(pos) != ':', "Expecting a ':' between field name and type");
pos++;
errorIf(pos >= end, "Unexpected EOF");
fields.add(Schema.Field.of(colName, parseType()));
// must be at the end or at a comma
if (tryAdvancePastEndBracket()) {
break;
}
advancePastComma("Expected a comma separating record fields");
}
return Schema.recordOf(recordName, fields);
}
// <type,type>
private Schema parseMap() throws IOException {
expectChar('<', "map must be followed by a '<'");
skipWhitespace();
Schema keyType = parseType();
// key and value must be separated by a comma
advancePastComma("Expected a comma separating map key and value types");
Schema valueType = parseType();
skipWhitespace();
expectChar('>', "map must end with a '>'");
return Schema.mapOf(keyType, valueType);
}
// <type,...>
private Schema parseUnion() throws IOException {
expectChar('<', "union must be followed by a '<'");
skipWhitespace();
List<Schema> unionTypes = new ArrayList<>();
// keep going until we see the closing '>'
while (true) {
unionTypes.add(parseType());
if (tryAdvancePastEndBracket()) {
break;
}
advancePastComma("Expected a comma separating union types");
}
return Schema.unionOf(unionTypes);
}
// <type>
private Schema parseArray() throws IOException {
expectChar('<', "array must be followed by a '<'");
skipWhitespace();
Schema componentType = parseType();
skipWhitespace();
expectChar('>', "array must end with a '>'");
return Schema.arrayOf(componentType);
}
private boolean tryAdvancePastEndBracket() {
skipWhitespace();
if (schema.charAt(pos) == '>') {
pos++;
return true;
}
return false;
}
private void skipWhitespace() {
while (pos < end && Character.isWhitespace(schema.charAt(pos))) {
pos++;
}
}
// advances forward past the next token and returns the token.
// in other words, goes forward until it sees whitespace or ':' or ',' or '<' or '>'.
private String nextToken() {
char currChar = schema.charAt(pos);
int endPos = pos;
while (!(Character.isWhitespace(currChar) ||
currChar == ':' || currChar == ',' || currChar == '<' || currChar == '>')) {
endPos++;
if (endPos == end) {
break;
}
currChar = schema.charAt(endPos);
}
String token = schema.substring(pos, endPos);
pos = endPos;
return token;
}
// move past a comma and optional whitespace.
// in other words, moves past a ", " or ",".
private void advancePastComma(String errMsg) throws IOException {
skipWhitespace();
expectChar(',', errMsg);
skipWhitespace();
}
// error if the current character is not what is expected, and move past the character
private void expectChar(char c, String errMsg) throws IOException {
errorIf(schema.charAt(pos) != c, errMsg);
pos++;
}
// error if the current character is not whitespace, and move past the whitespace
private void expectWhitespace(String errMsg) throws IOException {
errorIf(!Character.isWhitespace(schema.charAt(pos)), errMsg);
skipWhitespace();
}
private void errorIf(boolean condition, String errMsg) throws IOException {
if (condition) {
throw new IOException(String.format("schema is malformed. %s.", errMsg));
}
}
}