/** * Copyright 2014 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi; import java.io.File; import java.io.IOException; import java.io.InputStream; import org.apache.avro.Schema; import org.junit.Assert; import org.junit.Test; import org.kitesdk.data.DatasetIOException; import org.kitesdk.data.PartitionStrategy; import org.kitesdk.data.TestHelpers; import org.kitesdk.data.ValidationException; public class TestPartitionStrategyParser { public static void checkParser(PartitionStrategy expected, String json) { PartitionStrategy parsed = PartitionStrategyParser.parse(json); Assert.assertEquals(expected, parsed); parsed = PartitionStrategyParser.parse(expected.toString()); Assert.assertEquals("Should reparse properly", expected, parsed); } @Test public void testIdentity() { // right now, the field type is taken from the Schema checkParser(new PartitionStrategy.Builder() .identity("username", "id") .build(), "[ {\"type\": \"identity\", " + "\"source\": \"username\", " + "\"name\": \"id\"} ]" ); checkParser(new PartitionStrategy.Builder() .identity("username", "username_copy") .build(), "[ {\"type\": \"identity\", \"source\": \"username\"} ]" ); } @Test public void testHash() { checkParser(new PartitionStrategy.Builder().hash("id", 64).build(), "[ {\"type\": \"hash\", \"source\": \"id\", \"buckets\": 64} ]"); checkParser(new PartitionStrategy.Builder().hash("id", "h", 64).build(), "[ {\"type\": \"hash\", " + "\"source\": \"id\", " + "\"name\": \"h\", " + "\"buckets\": 64} ]" ); TestHelpers.assertThrows("Should reject missing buckets", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"hash\", " + "\"source\": \"id\", " + "\"name\": \"h\"} ]"); } } ); TestHelpers.assertThrows("Should reject invalid buckets", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"hash\", " + "\"source\": \"id\", " + "\"name\": \"h\", " + "\"buckets\": \"green\"} ]"); } } ); } @Test public void testFixedSizedRange() { checkParser(new PartitionStrategy.Builder().fixedSizeRange("id", 64).build(), "[ {\"type\": \"range\", \"source\": \"id\", \"size\": 64} ]"); checkParser(new PartitionStrategy.Builder().fixedSizeRange("id", "rng", 64).build(), "[ {\"type\": \"range\", " + "\"source\": \"id\", " + "\"name\": \"rng\", " + "\"size\": 64} ]" ); TestHelpers.assertThrows("Should reject missing size", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"range\", " + "\"source\": \"id\", " + "\"name\": \"rng\"} ]"); } } ); TestHelpers.assertThrows("Should reject invalid size", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"range\", " + "\"source\": \"id\", " + "\"name\": \"rng\", " + "\"size\": \"green\"} ]"); } } ); } @Test public void testDateFormat() { checkParser(new PartitionStrategy.Builder() .dateFormat("time", "date", "yyyyMMdd") .build(), "[ {\"type\": \"dateFormat\", " + "\"source\": \"time\", " + "\"name\": \"date\", " + "\"format\": \"yyyyMMdd\"} ]"); TestHelpers.assertThrows("Should reject missing format", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"dateFormat\", " + "\"source\": \"time\", " + "\"name\": \"date\"} ]"); } } ); TestHelpers.assertThrows("Should reject missing name", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"dateFormat\", " + "\"source\": \"time\", " + "\"format\": \"yyyyMMdd\"} ]"); } } ); } @Test public void testYear() { checkParser(new PartitionStrategy.Builder().year("time").build(), "[ {\"type\": \"year\", \"source\": \"time\"} ]"); checkParser(new PartitionStrategy.Builder().year("time", "y").build(), "[ {\"type\": \"year\", \"source\": \"time\", \"name\": \"y\"} ]"); } @Test public void testMonth() { checkParser(new PartitionStrategy.Builder().month("time").build(), "[ {\"type\": \"month\", \"source\": \"time\"} ]"); checkParser(new PartitionStrategy.Builder().month("time", "m").build(), "[ {\"type\": \"month\", \"source\": \"time\", \"name\": \"m\"} ]"); } @Test public void testDay() { checkParser(new PartitionStrategy.Builder().day("time").build(), "[ {\"type\": \"day\", \"source\": \"time\"} ]"); checkParser(new PartitionStrategy.Builder().day("time", "d").build(), "[ {\"type\": \"day\", \"source\": \"time\", \"name\": \"d\"} ]"); } @Test public void testHour() { checkParser(new PartitionStrategy.Builder().hour("time").build(), "[ {\"type\": \"hour\", \"source\": \"time\"} ]"); checkParser(new PartitionStrategy.Builder().hour("time", "h").build(), "[ {\"type\": \"hour\", \"source\": \"time\", \"name\": \"h\"} ]"); } @Test public void testMinute() { checkParser(new PartitionStrategy.Builder().minute("time").build(), "[ {\"type\": \"minute\", \"source\": \"time\"} ]"); checkParser(new PartitionStrategy.Builder().minute("time", "m").build(), "[ {\"type\": \"minute\", \"source\": \"time\", \"name\": \"m\"} ]"); } @Test public void testProvided() { checkParser(new PartitionStrategy.Builder().provided("version").build(), "[ {\"type\": \"provided\", \"name\": \"version\"} ]"); checkParser(new PartitionStrategy.Builder().provided("version").build(), "[ {\"type\": \"provided\", \"name\": \"version\", \"values\": \"string\"} ]"); checkParser(new PartitionStrategy.Builder().provided("version", "string").build(), "[ {\"type\": \"provided\", \"name\": \"version\", \"values\": \"string\"} ]"); checkParser(new PartitionStrategy.Builder().provided("version", "int").build(), "[ {\"type\": \"provided\", \"name\": \"version\", \"values\": \"int\"} ]"); checkParser(new PartitionStrategy.Builder().provided("version", "long").build(), "[ {\"type\": \"provided\", \"name\": \"version\", \"values\": \"long\"} ]"); TestHelpers.assertThrows("Should reject missing name", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"provided\"} ]"); } }); TestHelpers.assertThrows("Should reject unsupported values type", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {" + "\"type\": \"provided\", " + "\"name\": \"version\", " + "\"values\": \"float\"" + "} ]"); } }); TestHelpers.assertThrows("Should reject invalid values type", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {" + "\"type\": \"provided\", " + "\"name\": \"version\", " + "\"values\": \"unknown\"" + "} ]"); } }); } @Test public void testMultipleFields() { checkParser(new PartitionStrategy.Builder() .provided("version") .hash("username", 64) .identity("username", "u") .year("time") .month("time") .day("time") .hour("time") .minute("time") .dateFormat("time", "datetime", "yyyy_MM_dd_HHmmss") .build(), "[ " + "{\"type\": \"provided\", \"name\": \"version\"}," + "{\"type\": \"hash\", \"source\": \"username\", \"buckets\": 64}," + "{\"type\": \"identity\"," + "\"source\": \"username\", \"name\": \"u\"}," + "{\"type\": \"year\", \"source\": \"time\"}," + "{\"type\": \"month\", \"source\": \"time\"}," + "{\"type\": \"day\", \"source\": \"time\"}," + "{\"type\": \"hour\", \"source\": \"time\"}," + "{\"type\": \"minute\", \"source\": \"time\"}," + "{\"type\": \"dateFormat\", \"source\": \"time\", " + "\"name\": \"datetime\", \"format\": \"yyyy_MM_dd_HHmmss\"}" + " ]"); } @Test public void testNumericInsteadOfString() { // coerced to a string checkParser(new PartitionStrategy.Builder().year("34").build(), "[ {\"type\": \"year\", \"source\": 34} ]"); } @Test public void testMissingSource() { String[] types = new String[] { "identity", "hash", "year", "month", "day", "hour", "minute", "dateFormat"}; for (final String type : types) { TestHelpers.assertThrows("Should reject missing source", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"" + type + "\"} ]"); } } ); } } @Test public void testMissingType() { TestHelpers.assertThrows("Should reject missing partitioner type", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"source\": \"banana\"} ]"); } } ); } @Test public void testUnknownType() { TestHelpers.assertThrows("Should reject unknown partitioner type", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"cats\", \"source\": \"banana\"} ]"); } } ); } @Test public void testJsonObject() { TestHelpers.assertThrows("Should reject non-array strategy", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("{\"type\": \"year\", \"source\": \"banana\"}"); } } ); } @Test public void testNonRecordPartitioner() { TestHelpers.assertThrows("Should reject JSON string partitioner", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ " + "{\"type\": \"year\", \"source\": \"time\"}," + "\"cheese!\"" + " ]"); } } ); TestHelpers.assertThrows("Should reject JSON number partitioner", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ " + "{\"type\": \"year\", \"source\": \"time\"}," + "34" + " ]"); } } ); TestHelpers.assertThrows("Should reject JSON array partitioner", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ " + "{\"type\": \"year\", \"source\": \"time\"}," + "[ 1, 2, 3 ]" + " ]"); } } ); } @Test public void testInvalidJson() { TestHelpers.assertThrows("Should reject bad JSON", ValidationException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse("[ {\"type\", \"year\", \"source\": \"banana\"} ]"); } } ); } @Test public void testInputStreamIOException() { TestHelpers.assertThrows("Should pass DatasetIOException", DatasetIOException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse(new InputStream() { @Override public int read() throws IOException { throw new IOException("InputStream angry."); } }); } } ); } @Test public void testMissingFile() { TestHelpers.assertThrows("Should pass DatasetIOException", DatasetIOException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse(new File("target/missing.json")); } } ); } @Test public void testAddEmbeddedPartitionStrategy() { PartitionStrategy strategy = new PartitionStrategy.Builder() .hash("username", 16) .identity("username", "u") .build(); Schema original = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"username\", \"type\": \"string\"}," + " {\"name\": \"real_name\", \"type\": \"string\"}" + " ]" + "}"); Schema embedded = PartitionStrategyParser.embedPartitionStrategy(original, strategy); Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(embedded)); Assert.assertEquals(strategy, PartitionStrategyParser.parseFromSchema(embedded)); } @Test public void testReplaceEmbeddedPartitionStrategy() { PartitionStrategy strategy = new PartitionStrategy.Builder() .hash("username", 16) .identity("username", "u") .build(); Schema original = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"partitions\": [" + " {\"type\": \"hash\", \"source\": \"real_name\", \"buckets\": 64}," + " {\"type\": \"identity\", \"source\": \"real_name\", \"name\": \"r\"}" + " ]," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"username\", \"type\": \"string\"}," + " {\"name\": \"real_name\", \"type\": \"string\"}" + " ]" + "}"); Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(original)); Assert.assertFalse(PartitionStrategyParser.parseFromSchema(original).equals(strategy)); Schema embedded = PartitionStrategyParser.embedPartitionStrategy(original, strategy); Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(embedded)); Assert.assertEquals(strategy, PartitionStrategyParser.parseFromSchema(embedded)); } }