/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.format;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.FormatSpecification;
import co.cask.cdap.api.data.format.Formats;
import co.cask.cdap.api.data.format.RecordFormat;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.format.UnexpectedFormatException;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import com.google.common.collect.ImmutableMap;
import org.junit.Assert;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
/**
*
*/
public class DelimitedStringsRecordFormatTest {
@Test
public void testSimpleSchemaValidation() throws UnsupportedTypeException {
Schema simpleSchema = Schema.recordOf("event",
Schema.Field.of("f1", Schema.of(Schema.Type.BOOLEAN)),
Schema.Field.of("f2", Schema.of(Schema.Type.INT)),
Schema.Field.of("f3", Schema.of(Schema.Type.FLOAT)),
Schema.Field.of("f4", Schema.of(Schema.Type.DOUBLE)),
Schema.Field.of("f5", Schema.of(Schema.Type.BYTES)),
Schema.Field.of("f6", Schema.of(Schema.Type.STRING))
);
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
simpleSchema, Collections.<String, String>emptyMap());
format.initialize(formatSpec);
}
@Test
public void testArrayOfNullableStringsSchema() throws UnsupportedTypeException {
Schema schema = Schema.recordOf(
"event",
Schema.Field.of("arr", Schema.arrayOf(Schema.nullableOf(Schema.of(Schema.Type.STRING)))));
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, Collections.<String, String>emptyMap());
format.initialize(formatSpec);
}
@Test
public void testNullableFieldsAllowedInSchema() throws UnsupportedTypeException {
Schema schema = Schema.recordOf(
"event",
Schema.Field.of("f1", Schema.unionOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f2", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f3", Schema.unionOf(Schema.of(Schema.Type.FLOAT), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f4", Schema.unionOf(Schema.of(Schema.Type.DOUBLE), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f5", Schema.unionOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f6", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL)))
);
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, Collections.<String, String>emptyMap());
format.initialize(formatSpec);
}
@Test
public void testSimpleArraySchemaValidation() throws UnsupportedTypeException {
Schema schema = Schema.recordOf("event",
Schema.Field.of("f1", Schema.of(Schema.Type.BOOLEAN)),
Schema.Field.of("f2", Schema.of(Schema.Type.INT)),
Schema.Field.of("f3", Schema.of(Schema.Type.FLOAT)),
Schema.Field.of("f4", Schema.of(Schema.Type.DOUBLE)),
Schema.Field.of("f5", Schema.of(Schema.Type.BYTES)),
Schema.Field.of("f6", Schema.of(Schema.Type.STRING)),
Schema.Field.of("f7", Schema.arrayOf(Schema.of(Schema.Type.STRING)))
);
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, Collections.<String, String>emptyMap());
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test(expected = UnsupportedTypeException.class)
public void testComplexArraySchemaValidation() throws UnsupportedTypeException {
Schema mapSchema = Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING));
Schema schema = Schema.recordOf("event", Schema.Field.of("f1", Schema.arrayOf(mapSchema)));
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, Collections.<String, String>emptyMap());
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test(expected = UnsupportedTypeException.class)
public void testMapFieldInvalid() throws UnsupportedTypeException {
Schema mapSchema = Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING));
Schema schema = Schema.recordOf("event", Schema.Field.of("f1", mapSchema));
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, Collections.<String, String>emptyMap());
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test(expected = UnsupportedTypeException.class)
public void testRecordFieldInvalid() throws UnsupportedTypeException {
Schema recordSchema = Schema.recordOf("record", Schema.Field.of("recordField", Schema.of(Schema.Type.STRING)));
Schema schema = Schema.recordOf("event", Schema.Field.of("f1", recordSchema));
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, Collections.<String, String>emptyMap());
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test(expected = IllegalArgumentException.class)
public void testRecordMappingWithNonSimpleSchema() throws UnsupportedTypeException {
Schema arraySchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
Schema schema = Schema.recordOf("event", Schema.Field.of("f1", arraySchema));
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, ImmutableMap.of(DelimitedStringsRecordFormat.MAPPING, "0:f1"));
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test(expected = IllegalArgumentException.class)
public void testRecordMappingTooFewMappings() throws UnsupportedTypeException {
Schema arraySchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
Schema schema = Schema.recordOf("event", Schema.Field.of("f1", arraySchema));
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, ImmutableMap.of(DelimitedStringsRecordFormat.MAPPING, ""));
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test(expected = IllegalArgumentException.class)
public void testRecordMappingTooManyMappings() throws UnsupportedTypeException {
Schema arraySchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
Schema schema = Schema.recordOf("event", Schema.Field.of("f1", arraySchema));
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, ImmutableMap.of(DelimitedStringsRecordFormat.MAPPING, "0:f1,1:f2"));
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test(expected = IllegalArgumentException.class)
public void testRecordMappingWrongMapping() throws UnsupportedTypeException {
Schema arraySchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
Schema schema = Schema.recordOf("event", Schema.Field.of("f1", arraySchema));
FormatSpecification formatSpec =
new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema, ImmutableMap.of(DelimitedStringsRecordFormat.MAPPING, "0:f2"));
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(formatSpec);
}
@Test
public void testStringArrayFormat() throws UnsupportedTypeException, UnexpectedFormatException {
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
format.initialize(null);
String body = "userX,actionY,itemZ";
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
String[] actual = output.get("body");
String[] expected = body.split(",");
Assert.assertTrue(Arrays.equals(expected, actual));
}
@Test
public void testDelimiter() throws UnsupportedTypeException, UnexpectedFormatException {
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
FormatSpecification spec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
null,
ImmutableMap.of(DelimitedStringsRecordFormat.DELIMITER, " "));
format.initialize(spec);
String body = "userX actionY itemZ";
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
String[] actual = output.get("body");
String[] expected = body.split(" ");
Assert.assertArrayEquals(expected, actual);
}
@Test
public void testCSV() throws Exception {
FormatSpecification spec = new FormatSpecification(Formats.CSV, null, Collections.<String, String>emptyMap());
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
String body = "userX,actionY,itemZ";
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
String[] actual = output.get("body");
String[] expected = body.split(",");
Assert.assertArrayEquals(expected, actual);
}
@Test
public void testTSV() throws Exception {
FormatSpecification spec = new FormatSpecification(Formats.TSV, null, Collections.<String, String>emptyMap());
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
String body = "userX\tactionY\titemZ";
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
String[] actual = output.get("body");
String[] expected = body.split("\t");
Assert.assertArrayEquals(expected, actual);
}
@Test
public void testFormatRecordWithMapping() throws UnsupportedTypeException {
Schema schema = Schema.recordOf(
"event",
Schema.Field.of("f3", Schema.unionOf(Schema.of(Schema.Type.FLOAT), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f4", Schema.unionOf(Schema.of(Schema.Type.DOUBLE), Schema.of(Schema.Type.NULL))));
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
FormatSpecification spec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema,
ImmutableMap.of(
DelimitedStringsRecordFormat.DELIMITER, ",",
DelimitedStringsRecordFormat.MAPPING, "2:f3,3:f4"));
format.initialize(spec);
boolean booleanVal = false;
int intVal = Integer.MAX_VALUE;
float floatVal = Float.MAX_VALUE;
double doubleVal = Double.MAX_VALUE;
byte[] bytesVal = new byte[] { 0, 1, 2 };
String stringVal = "foo bar";
String[] arrayVal = new String[] { "extra1", "extra2", "extra3" };
String body = new StringBuilder()
.append(booleanVal).append(",")
.append(intVal).append(",")
.append(floatVal).append(",")
.append(doubleVal).append(",")
.append(Bytes.toStringBinary(bytesVal)).append(",")
.append(stringVal).append(",")
.append(arrayVal[0]).append(",")
.append(arrayVal[1]).append(",")
.append(arrayVal[2])
.toString();
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
Assert.assertEquals(2, output.getSchema().getFields().size());
Assert.assertNull(output.get("f1"));
Assert.assertNull(output.get("f2"));
Assert.assertEquals(floatVal, output.get("f3"));
Assert.assertEquals(doubleVal, output.get("f4"));
Assert.assertNull(output.get("f5"));
Assert.assertNull(output.get("f6"));
Assert.assertNull(output.get("f7"));
// now try with null fields.
output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes("true,,3.14159,,,hello world,extra1"))));
Assert.assertEquals(2, output.getSchema().getFields().size());
Assert.assertNull(output.get("f1"));
Assert.assertNull(output.get("f2"));
Assert.assertTrue(Math.abs(3.14159 - (Float) output.get("f3")) < 0.000001);
Assert.assertNull(output.get("f4"));
Assert.assertNull(output.get("f5"));
Assert.assertNull(output.get("f6"));
Assert.assertNull(output.get("f7"));
}
@Test
public void testFormatRecordWithSchema() throws UnsupportedTypeException, UnexpectedFormatException {
Schema schema = Schema.recordOf(
"event",
Schema.Field.of("f1", Schema.unionOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f2", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f3", Schema.unionOf(Schema.of(Schema.Type.FLOAT), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f4", Schema.unionOf(Schema.of(Schema.Type.DOUBLE), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f5", Schema.unionOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f6", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))),
Schema.Field.of("f7", Schema.arrayOf(Schema.of(Schema.Type.STRING)))
);
DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
FormatSpecification spec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(),
schema,
ImmutableMap.of(DelimitedStringsRecordFormat.DELIMITER, ","));
format.initialize(spec);
boolean booleanVal = false;
int intVal = Integer.MAX_VALUE;
float floatVal = Float.MAX_VALUE;
double doubleVal = Double.MAX_VALUE;
byte[] bytesVal = new byte[] { 0, 1, 2 };
String stringVal = "foo bar";
String[] arrayVal = new String[] { "extra1", "extra2", "extra3" };
String body = new StringBuilder()
.append(booleanVal).append(",")
.append(intVal).append(",")
.append(floatVal).append(",")
.append(doubleVal).append(",")
.append(Bytes.toStringBinary(bytesVal)).append(",")
.append(stringVal).append(",")
.append(arrayVal[0]).append(",")
.append(arrayVal[1]).append(",")
.append(arrayVal[2])
.toString();
StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
Assert.assertEquals(booleanVal, output.get("f1"));
Assert.assertEquals(intVal, output.get("f2"));
Assert.assertEquals(floatVal, output.get("f3"));
Assert.assertEquals(doubleVal, output.get("f4"));
Assert.assertArrayEquals(bytesVal, (byte[]) output.get("f5"));
Assert.assertEquals(stringVal, output.get("f6"));
Assert.assertArrayEquals(arrayVal, (String[]) output.get("f7"));
// now try with null fields.
output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes("true,,3.14159,,,hello world,extra1"))));
Assert.assertTrue((Boolean) output.get("f1"));
Assert.assertNull(output.get("f2"));
Assert.assertTrue(Math.abs(3.14159 - (Float) output.get("f3")) < 0.000001);
Assert.assertNull(output.get("f4"));
Assert.assertNull(output.get("f5"));
Assert.assertEquals("hello world", output.get("f6"));
Assert.assertArrayEquals(new String[] {"extra1"}, (String[]) output.get("f7"));
}
}