/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.io;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.internal.io.ReflectionSchemaGenerator;
import co.cask.cdap.internal.io.SchemaTypeAdapter;
import com.google.common.reflect.TypeToken;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.junit.Assert;
import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
/**
* Test for schema generation.
*/
public class SchemaTest {
/**
* Test node.
*/
public final class Node {
private int data;
private List<Node> children;
}
/**
* Test parent.
* @param <T> Parameter
*/
public class Parent<T> {
private T data;
private ByteBuffer buffer;
}
/**
* Test child.
* @param <T> Paramter.
*/
public class Child<T> extends Parent<Map<String, T>> {
private int height;
private Node rootNode;
private State state;
}
/**
* Test enum.
*/
public enum State {
OK, ERROR
}
@Test
public void testGenerateSchema() throws UnsupportedTypeException {
Schema schema = (new ReflectionSchemaGenerator()).generate((new TypeToken<Child<Node>>() { }).getType());
Gson gson = new GsonBuilder()
.registerTypeAdapter(Schema.class, new SchemaTypeAdapter())
.create();
Assert.assertEquals(schema, gson.fromJson(gson.toJson(schema), Schema.class));
}
/**
* Testing more node.
*/
public final class Node2 {
private int data;
private List<Node2> children;
}
@Test
public void testSchemaHash() throws UnsupportedTypeException {
Schema s1 = new ReflectionSchemaGenerator().generate(Node.class);
Schema s2 = new ReflectionSchemaGenerator().generate(Node2.class);
Assert.assertEquals(s1.getSchemaHash(), s2.getSchemaHash());
Assert.assertEquals(s1, s2);
Schema schema = (new ReflectionSchemaGenerator()).generate((new TypeToken<Child<Node>>() { }).getType());
Assert.assertNotEquals(s1.getSchemaHash(), schema.getSchemaHash());
}
/**
* Yet more node.
*/
public final class Node3 {
private long data;
private String tag;
private List<Node3> children;
}
/**
* More and more node.
*/
public static final class Node4 {
private static final Schema SCHEMA = Schema.recordOf(
Node4.class.getName(),
Schema.Field.of("data", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
private String data;
}
/**
* More and more and more node.
*/
public static final class Node5 {
private static final Schema SCHEMA = Schema.recordOf(
Node5.class.getName(),
Schema.Field.of("x", Schema.nullableOf(Node4.SCHEMA)));
private Node4 x;
}
/**
* Tests a record as a field, and that record as an inner field of another record.
*/
public static final class Node6 {
private static final Schema SCHEMA = Schema.recordOf(
Node6.class.getName(),
Schema.Field.of("x", Schema.nullableOf(Node4.SCHEMA)),
Schema.Field.of("y", Schema.nullableOf(Node5.SCHEMA)));
private Node4 x;
private Node5 y;
}
@Test
public void testCompatible() throws UnsupportedTypeException {
Schema s1 = new ReflectionSchemaGenerator().generate(Node.class);
Schema s2 = new ReflectionSchemaGenerator().generate(Node3.class);
Schema s3 = new ReflectionSchemaGenerator().generate(Node4.class);
Assert.assertNotEquals(s1, s2);
Assert.assertTrue(s1.isCompatible(s2));
Assert.assertFalse(s2.isCompatible(s1));
Assert.assertTrue(s2.isCompatible(s3));
}
@Test
public void testPrimitiveArray() throws UnsupportedTypeException {
Schema schema = new ReflectionSchemaGenerator().generate(int[].class);
Assert.assertEquals(Schema.arrayOf(Schema.of(Schema.Type.INT)), schema);
}
@Test
public void testParseJson() throws IOException, UnsupportedTypeException {
Schema schema = new ReflectionSchemaGenerator().generate(Node.class);
Assert.assertEquals(schema, Schema.parseJson(schema.toString()));
}
@Test
public void testSameRecordDifferentLevels() throws UnsupportedTypeException, IOException {
Schema actual = new ReflectionSchemaGenerator().generate(Node6.class);
Assert.assertEquals(Node6.SCHEMA, actual);
// check serialization and deserialization.
Assert.assertEquals(Node6.SCHEMA, Schema.parseJson(actual.toString()));
}
@Test
public void testParseFlatSQL() throws IOException {
// simple, non-nested types
String schemaStr = "bool_field boolean, " +
"int_field int not null, " +
"long_field long not null, " +
"float_field float NOT NULL, " +
"double_field double NOT NULL, " +
"bytes_field bytes not null, " +
"array_field array<string> not null, " +
"map_field map<string,int> not null, " +
"record_field record<x:int,y:double>, " +
"string_field string";
Schema expected = Schema.recordOf(
"rec",
Schema.Field.of("bool_field", Schema.nullableOf(Schema.of(Schema.Type.BOOLEAN))),
Schema.Field.of("int_field", Schema.of(Schema.Type.INT)),
Schema.Field.of("long_field", Schema.of(Schema.Type.LONG)),
Schema.Field.of("float_field", Schema.of(Schema.Type.FLOAT)),
Schema.Field.of("double_field", Schema.of(Schema.Type.DOUBLE)),
Schema.Field.of("bytes_field", Schema.of(Schema.Type.BYTES)),
Schema.Field.of("array_field", Schema.arrayOf(Schema.nullableOf(Schema.of(Schema.Type.STRING)))),
Schema.Field.of("map_field", Schema.mapOf(
Schema.nullableOf(Schema.of(Schema.Type.STRING)),
Schema.nullableOf(Schema.of(Schema.Type.INT)))),
Schema.Field.of("record_field", Schema.nullableOf(Schema.recordOf(
"rec1",
Schema.Field.of("x", Schema.nullableOf(Schema.of(Schema.Type.INT))),
Schema.Field.of("y", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE)))))),
Schema.Field.of("string_field", Schema.nullableOf(Schema.of(Schema.Type.STRING)))
);
Assert.assertEquals(expected, Schema.parseSQL(schemaStr));
}
@Test
public void testNestedSQL() throws IOException {
Schema expected = Schema.recordOf(
"rec",
Schema.Field.of(
"x",
Schema.mapOf(
Schema.recordOf("rec1",
// String x
Schema.Field.of("x", Schema.of(Schema.Type.STRING)),
// String[] y
Schema.Field.of("y", Schema.arrayOf(Schema.of(Schema.Type.STRING))),
// Map<byte[],double> z
Schema.Field.of("z", Schema.mapOf(Schema.of(Schema.Type.BYTES),
Schema.of(Schema.Type.DOUBLE)))),
Schema.arrayOf(Schema.recordOf(
"rec2",
Schema.Field.of("x",
// Map<array<byte[]>, Map<boolean,byte[]> x
Schema.mapOf(Schema.arrayOf(Schema.of(Schema.Type.BYTES)),
Schema.mapOf(Schema.of(Schema.Type.BOOLEAN),
Schema.of(Schema.Type.BYTES)))
)))
)),
Schema.Field.of("y", Schema.of(Schema.Type.INT)));
String schemaStr =
"x map<" +
"record<" +
"x:string not null," +
"y:array<string not null> not null," +
"z:map<bytes not null,double not null> not null" +
"> not null," +
"array<" +
"record<" +
"x:map<" +
"array<bytes not null> not null," +
"map<boolean not null,bytes not null> not null" +
"> not null" +
"> not null" +
"> not null" +
"> not null, y int not null";
Assert.assertEquals(expected, Schema.parseSQL(schemaStr));
}
@Test
public void testParseSQLWithWhitespace() throws IOException {
String schemaStr = "map_field map< string , int > not null,\n" +
"arr_field array< record< x:int , y:double >\t> not null";
Schema expectedSchema = Schema.recordOf(
"rec",
Schema.Field.of("map_field", Schema.mapOf(
Schema.nullableOf(Schema.of(Schema.Type.STRING)), Schema.nullableOf(Schema.of(Schema.Type.INT)))),
Schema.Field.of("arr_field",
Schema.arrayOf(Schema.nullableOf(
Schema.recordOf("rec1",
Schema.Field.of("x", Schema.nullableOf(Schema.of(Schema.Type.INT))),
Schema.Field.of("y", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE)))))))
);
Assert.assertEquals(expectedSchema, Schema.parseSQL(schemaStr));
}
@Test
public void testInvalidSQL() {
verifyThrowsException("int x");
verifyThrowsException("x map<int, int");
verifyThrowsException("x array<string");
verifyThrowsException("x bool");
verifyThrowsException("x integer");
verifyThrowsException("x record<y int>");
verifyThrowsException("x array<>");
}
@Test
public void testSerializable() throws IOException, ClassNotFoundException {
Schema schema = Schema.recordOf(
"record",
Schema.Field.of("boolean", Schema.of(Schema.Type.BOOLEAN)),
Schema.Field.of("int", Schema.of(Schema.Type.INT)),
Schema.Field.of("long", Schema.of(Schema.Type.LONG)),
Schema.Field.of("float", Schema.of(Schema.Type.FLOAT)),
Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)),
Schema.Field.of("string", Schema.of(Schema.Type.STRING)),
Schema.Field.of("bytes", Schema.of(Schema.Type.BYTES)),
Schema.Field.of("enum", Schema.enumWith("a", "b", "c")),
Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.INT))),
Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.INT))),
Schema.Field.of("union", Schema.unionOf(Schema.of(Schema.Type.NULL), Schema.of(Schema.Type.STRING)))
);
// Trigger the computation of the schemaString field
String schemaString = schema.toString();
ByteArrayOutputStream os = new ByteArrayOutputStream();
try (ObjectOutputStream oos = new ObjectOutputStream(os)) {
oos.writeObject(schema);
}
try (ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(os.toByteArray()))) {
Schema restoredSchema = (Schema) ois.readObject();
Assert.assertEquals(schema, restoredSchema);
Assert.assertEquals(schemaString, restoredSchema.toString());
}
}
private void verifyThrowsException(String toParse) {
try {
Schema.parseSQL(toParse);
Assert.fail();
} catch (IOException e) {
// expected
}
}
}