package com.alexholmes.avro.sort.avrokey; import org.apache.avro.Schema; import org.apache.avro.hadoop.io.AvroSerialization; import org.apache.avro.io.AvroDataHack; import org.apache.avro.mapred.AvroKey; import org.apache.avro.specific.SpecificRecordBase; import org.apache.hadoop.conf.Configuration; import org.junit.Test; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import static org.junit.Assert.*; /** * */ public class TestAvroSort { public static AvroDataHack.OrderedField of(Schema s, String fieldName, boolean ascending) { return new AvroDataHack.OrderedField().setField(s.getField(fieldName)).setAscendingOrder(ascending); } static Map<Schema, List<AvroDataHack.OrderedField>> generateMap() { Map<Schema, List<AvroDataHack.OrderedField>> input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo2", true), of(Foo.SCHEMA$, "foo1", true), of(Foo.SCHEMA$, "foo3", true))); input.put(Bar.SCHEMA$, Arrays.asList(of(Bar.SCHEMA$, "bar2", true))); return input; } static String generateJsonString() { return AvroSort.schemaFieldsToJson(generateMap()); } @Test public void testJson() { assertEquals(0, Foo.SCHEMA$.getField("foo1").pos()); assertEquals(1, Foo.SCHEMA$.getField("foo2").pos()); assertEquals(2, Foo.SCHEMA$.getField("foo3").pos()); assertEquals(0, Bar.SCHEMA$.getField("bar1").pos()); assertEquals(1, Bar.SCHEMA$.getField("bar2").pos()); Map<Schema, List<AvroDataHack.OrderedField>> input = generateMap(); String json = AvroSort.schemaFieldsToJson(input); System.out.println("JSON = '" + json + "'"); Map<Schema, List<AvroDataHack.OrderedField>> output = AvroSort.jsonToSchemaFields(json); assertEquals(input, output); } @Test public void testSingleFieldPartitioner() { Map<Schema, List<AvroDataHack.OrderedField>> input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo1", true))); assertPartitionsEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertPartitionsEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(2).setBar2(3).build()).build()); assertPartitionsNotEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(6).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); } @Test public void testTwoFieldPartitioner() { Map<Schema, List<AvroDataHack.OrderedField>> input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo1", true), of(Foo.SCHEMA$, "foo2", true))); assertPartitionsEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertPartitionsEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(2).setBar2(3).build()).build()); assertPartitionsNotEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertPartitionsNotEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(6).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertPartitionsNotEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(6).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); } @Test public void testChildRecordFieldPartitioner() { Map<Schema, List<AvroDataHack.OrderedField>> input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo2", true), of(Foo.SCHEMA$, "foo3", true))); input.put(Bar.SCHEMA$, Arrays.asList( of(Bar.SCHEMA$, "bar2", true))); assertPartitionsEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertPartitionsNotEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(3).build()).build()); assertPartitionsNotEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertPartitionsNotEqual(input, Foo.newBuilder().setFoo1(5).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(3).build()).build()); } public static void assertPartitionsEqual(Map<Schema, List<AvroDataHack.OrderedField>> input, Foo x, Foo y) { Configuration c = new Configuration(); c.set(AvroSort.PARTITIONING_FIELD_POSITIONS, AvroSort.schemaFieldsToJson(input)); AvroSort.AvroSecondarySortPartitioner part = new AvroSort.AvroSecondarySortPartitioner(); part.setConf(c); int partitionX = part.getPartition(new AvroKey<SpecificRecordBase>(x), null, 10); int partitionY = part.getPartition(new AvroKey<SpecificRecordBase>(y), null, 10); assertEquals("x=" + x + " y=" + y, partitionX, partitionY); } public static void assertPartitionsNotEqual(Map<Schema, List<AvroDataHack.OrderedField>> input, Foo x, Foo y) { Configuration c = new Configuration(); c.set(AvroSort.PARTITIONING_FIELD_POSITIONS, AvroSort.schemaFieldsToJson(input)); AvroSort.AvroSecondarySortPartitioner part = new AvroSort.AvroSecondarySortPartitioner(); part.setConf(c); int partitionX = part.getPartition(new AvroKey<SpecificRecordBase>(x), null, 10); int partitionY = part.getPartition(new AvroKey<SpecificRecordBase>(y), null, 10); assertNotSame("x=" + x + " y=" + y, partitionX, partitionY); } @Test public void testSingleFieldComparator() { Map<Schema, List<AvroDataHack.OrderedField>> input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo1", true))); assertComparatorEquals(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorEquals(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(2).setBar2(3).build()).build()); assertComparatorLhsLess(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(6).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorLhsMore(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(4).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); // descending sort input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo1", false))); assertComparatorEquals(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorLhsMore(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(6).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorLhsLess(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(4).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); } @Test public void testTwoFieldComparator() { Map<Schema, List<AvroDataHack.OrderedField>> input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo2", true), of(Foo.SCHEMA$, "foo1", true))); assertComparatorEquals(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorEquals(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(2).setBar2(3).build()).build()); assertComparatorLhsLess(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(6).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorLhsLess(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorLhsLess(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(6).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); // descending sort input = new HashMap<Schema, List<AvroDataHack.OrderedField>>(); input.put(Foo.SCHEMA$, Arrays.asList( of(Foo.SCHEMA$, "foo2", false), of(Foo.SCHEMA$, "foo1", false))); assertComparatorEquals(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorEquals(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(2).setBar2(3).build()).build()); assertComparatorLhsMore(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(6).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorLhsMore(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(5).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); assertComparatorLhsMore(Foo.SCHEMA$, input, Foo.newBuilder().setFoo1(6).setFoo2("bar").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build(), Foo.newBuilder().setFoo1(5).setFoo2("foo").setFoo3(Bar.newBuilder().setBar1(1).setBar2(2).build()).build()); } public static void assertComparatorEquals(Schema schema, Map<Schema, List<AvroDataHack.OrderedField>> input, Foo x, Foo y) { int result = getComparatorResult(schema, input, x, y); assertEquals("x=" + x + " y=" + y, 0, result); } public static void assertComparatorLhsLess(Schema schema, Map<Schema, List<AvroDataHack.OrderedField>> input, Foo x, Foo y) { int result = getComparatorResult(schema, input, x, y); assertTrue("x=" + x + " y=" + y, result < 0); } public static void assertComparatorLhsMore(Schema schema, Map<Schema, List<AvroDataHack.OrderedField>> input, Foo x, Foo y) { int result = getComparatorResult(schema, input, x, y); assertTrue("x=" + x + " y=" + y, result > 0); } public static int getComparatorResult(Schema schema, Map<Schema, List<AvroDataHack.OrderedField>> input, Foo x, Foo y) { Configuration c = new Configuration(); c.set(AvroSort.SORTING_FIELD_POSITIONS, AvroSort.schemaFieldsToJson(input)); AvroSerialization.setKeyWriterSchema(c, schema); AvroSerialization.setKeyReaderSchema(c, schema); AvroSort.AvroSortingComparator comp = new AvroSort.AvroSortingComparator(); comp.setConf(c); return comp.compare(new AvroKey<SpecificRecordBase>(x), new AvroKey<SpecificRecordBase>(y)); } }