/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import java.util.Set;
import java.util.UUID;
import org.apache.avro.SchemaBuilder;
import org.junit.Assert;
import org.junit.Test;
import org.kitesdk.data.PartitionStrategy;
import org.kitesdk.data.impl.Accessor;
public class TestKeyRangeIterable {
public static final Constraints emptyConstraints = new Constraints(
SchemaBuilder.record("Event").fields()
.requiredString("id")
.requiredLong("timestamp")
.optionalString("component")
.optionalInt("number")
.optionalInt("number2")
.endRecord());
public static final PartitionStrategy id = new PartitionStrategy.Builder()
.identity("component", "id_component")
.build();
public static final MarkerComparator idCmp = new MarkerComparator(id);
public static final PartitionStrategy hash = new PartitionStrategy.Builder()
.hash("id", 64)
.identity("id")
.build();
public static final MarkerComparator hashCmp = new MarkerComparator(hash);
public static final PartitionStrategy num = new PartitionStrategy.Builder()
.identity("number", "id_number")
.build();
public static final MarkerComparator numCmp = new MarkerComparator(num);
public static final PartitionStrategy num2 = new PartitionStrategy.Builder()
.identity("number", "id_number")
.identity("number2", "id_number2")
.build();
public static final MarkerComparator num2Cmp = new MarkerComparator(num2);
public static final PartitionStrategy time = new PartitionStrategy.Builder()
.year("timestamp").month("timestamp").day("timestamp").build();
public static final MarkerComparator timeCmp = new MarkerComparator(time);
public static final PartitionStrategy strategy = new PartitionStrategy.Builder()
.hash("id", "id_hash", 64)
.year("timestamp").month("timestamp").day("timestamp")
.identity("component", "id_component")
.identity("id")
.build();
public static final MarkerComparator cmp = new MarkerComparator(strategy);
@Test
public void testUnbounded() {
Constraints c = emptyConstraints.partitionedBy(id);
assertIterableEquals(
Sets.newHashSet(new MarkerRange(idCmp)),
c.toKeyRanges());
}
@Test
public void testSingleSet() {
Constraints c = emptyConstraints.partitionedBy(id)
.with("component", "com.company.Main");
Marker main = new Marker.Builder("id_component", "com.company.Main").build();
MarkerRange actual = Iterables.getOnlyElement(c.toKeyRanges());
Assert.assertEquals(main, actual.getStart().getBound());
Assert.assertEquals(main, actual.getEnd().getBound());
Assert.assertEquals(new MarkerRange(idCmp).of(main), actual);
c = emptyConstraints.partitionedBy(id).with("component",
"com.company.Main", "com.company.SomeClass");
Marker sc = new Marker.Builder("id_component", "com.company.SomeClass").build();
assertIterableEquals(
Sets.newHashSet(
new MarkerRange(idCmp).of(main),
new MarkerRange(idCmp).of(sc)),
c.toKeyRanges());
}
@Test
@SuppressWarnings("unchecked")
public void testGroupSet() {
String[] ids = new String[]{
UUID.randomUUID().toString(), UUID.randomUUID().toString()};
FieldPartitioner hashFunc = Accessor.getDefault().getFieldPartitioners(hash).get(0);
Constraints c = emptyConstraints.partitionedBy(hash).with("id", ids[0]);
Marker marker0 = new Marker.Builder()
.add("id_hash", hashFunc.apply(ids[0])).add("id_copy", ids[0]).build();
MarkerRange actual = Iterables.getOnlyElement(c.toKeyRanges());
Assert.assertEquals(marker0, actual.getStart().getBound());
Assert.assertEquals(marker0, actual.getEnd().getBound());
c = emptyConstraints.partitionedBy(hash).with("id", (Object[]) ids);
Marker marker1 = new Marker.Builder()
.add("id_hash", hashFunc.apply(ids[1])).add("id_copy", ids[1]).build();
assertIterableEquals(
Sets.newHashSet(
new MarkerRange(hashCmp).of(marker0),
new MarkerRange(hashCmp).of(marker1)),
c.toKeyRanges());
}
@Test
public void testSingleRange() {
Constraints c = emptyConstraints.partitionedBy(num2)
.from("number", 5).toBefore("number", 18)
.to("number2", 9);
Marker start = new Marker.Builder("id_number", 5).build();
Marker stop = new Marker.Builder().add("id_number", 18).add("id_number2", 9).build();
Assert.assertEquals(
new MarkerRange(num2Cmp).from(start).to(stop),
Iterables.getOnlyElement(c.toKeyRanges()));
}
@Test
public void testHashRange() {
Constraints c = emptyConstraints.partitionedBy(hash)
.from("id", "0000").toBefore("id", "0001");
// note the lack of a hash field -- ranges cannot be projected through hash
Marker start = new Marker.Builder("id_copy", "0000").build();
Marker stop = new Marker.Builder("id_copy", "0001").build();
Assert.assertEquals(
new MarkerRange(hashCmp).from(start).to(stop),
Iterables.getOnlyElement(c.toKeyRanges()));
}
@Test
public void testGroupRange() {
Constraints c = emptyConstraints.partitionedBy(num)
.from("number", 5).toBefore("number", 18);
Marker start = new Marker.Builder("id_number", 5).build();
Marker stop = new Marker.Builder("id_number", 18).build();
Assert.assertEquals(
new MarkerRange(numCmp).from(start).to(stop),
Iterables.getOnlyElement(c.toKeyRanges()));
}
@Test
public void testTimeSet() {
Long[] timestamps = new Long[] {
1379020547042L, // Thu Sep 12 14:15:47 PDT 2013
1381612547042L, // Sat Oct 12 14:15:47 PDT 2013
1384204547042L // Mon Nov 11 13:15:47 PST 2013
};
Constraints c = emptyConstraints.partitionedBy(time)
.with("timestamp", (Object[]) timestamps);
Marker sep = new Marker.Builder().add("year", 2013).add("month", 9).add("day", 12).build();
Marker oct = new Marker.Builder().add("year", 2013).add("month", 10).add("day", 12).build();
Marker nov = new Marker.Builder().add("year", 2013).add("month", 11).add("day", 11).build();
assertIterableEquals(Sets.newHashSet(
new MarkerRange(timeCmp).of(sep),
new MarkerRange(timeCmp).of(oct),
new MarkerRange(timeCmp).of(nov)),
c.toKeyRanges());
}
@Test
public void testTimeRange() {
Long[] timestamps = new Long[] {
1379020547042L, // Thu Sep 12 14:15:47 PDT 2013
1381612547042L, // Sat Oct 12 14:15:47 PDT 2013
1384204547042L // Mon Nov 11 13:15:47 PST 2013
};
Constraints c = emptyConstraints.partitionedBy(time)
.from("timestamp", timestamps[0]).to("timestamp", timestamps[2]);
Marker sep = new Marker.Builder().add("year", 2013).add("month", 9).add("day", 12).build();
Marker nov = new Marker.Builder().add("year", 2013).add("month", 11).add("day", 11).build();
Assert.assertEquals(
new MarkerRange(timeCmp).from(sep).to(nov),
Iterables.getOnlyElement(c.toKeyRanges()));
}
@Test
@SuppressWarnings("unchecked")
public void testCombined() {
Long[] timestamps = new Long[] {
1379020547042L, // Thu Sep 12 14:15:47 PDT 2013
1384204547042L // Mon Nov 11 13:15:47 PST 2013
};
String[] ids = new String[]{
UUID.randomUUID().toString(), UUID.randomUUID().toString()};
FieldPartitioner hashFunc = Accessor.getDefault().getFieldPartitioners(hash).get(0);
Constraints c = emptyConstraints.partitionedBy(strategy)
.from("timestamp", timestamps[0]).to("timestamp", timestamps[1])
.with("id", (Object[]) ids);
// first range
Marker sep0 = new Marker.Builder()
.add("year", 2013).add("month", 9).add("day", 12)
.add("id_hash", hashFunc.apply(ids[0])).add("id_copy", ids[0])
.build();
Marker nov0 = new Marker.Builder()
.add("year", 2013).add("month", 11).add("day", 11)
.add("id_hash", hashFunc.apply(ids[0])).add("id_copy", ids[0])
.build();
// second range
Marker sep1 = new Marker.Builder()
.add("year", 2013).add("month", 9).add("day", 12)
.add("id_hash", hashFunc.apply(ids[1])).add("id_copy", ids[1])
.build();
Marker nov1 = new Marker.Builder()
.add("year", 2013).add("month", 11).add("day", 11)
.add("id_hash", hashFunc.apply(ids[1])).add("id_copy", ids[1])
.build();
assertIterableEquals(Sets.newHashSet(
new MarkerRange(cmp).from(sep0).to(nov0),
new MarkerRange(cmp).from(sep1).to(nov1)),
c.toKeyRanges());
// more complication
Marker sep2 = new Marker.Builder()
.add("year", 2013).add("month", 9).add("day", 12)
.add("id_hash", hashFunc.apply(ids[0])).add("id_copy", ids[0])
.add("id_component", "com.company.Main")
.build();
Marker nov2 = new Marker.Builder()
.add("year", 2013).add("month", 11).add("day", 11)
.add("id_hash", hashFunc.apply(ids[0])).add("id_copy", ids[0])
.add("id_component", "com.company.Main")
.build();
Marker sep3 = new Marker.Builder()
.add("year", 2013).add("month", 9).add("day", 12)
.add("id_hash", hashFunc.apply(ids[1])).add("id_copy", ids[1])
.add("id_component", "com.company.Main")
.build();
Marker nov3 = new Marker.Builder()
.add("year", 2013).add("month", 11).add("day", 11)
.add("id_hash", hashFunc.apply(ids[1])).add("id_copy", ids[1])
.add("id_component", "com.company.Main")
.build();
Marker sep4 = new Marker.Builder()
.add("year", 2013).add("month", 9).add("day", 12)
.add("id_hash", hashFunc.apply(ids[0])).add("id_copy", ids[0])
.add("id_component", "com.company.SomeClass")
.build();
Marker nov4 = new Marker.Builder()
.add("year", 2013).add("month", 11).add("day", 11)
.add("id_hash", hashFunc.apply(ids[0])).add("id_copy", ids[0])
.add("id_component", "com.company.SomeClass")
.build();
Marker sep5 = new Marker.Builder()
.add("year", 2013).add("month", 9).add("day", 12)
.add("id_hash", hashFunc.apply(ids[1])).add("id_copy", ids[1])
.add("id_component", "com.company.SomeClass")
.build();
Marker nov5 = new Marker.Builder()
.add("year", 2013).add("month", 11).add("day", 11)
.add("id_hash", hashFunc.apply(ids[1])).add("id_copy", ids[1])
.add("id_component", "com.company.SomeClass")
.build();
assertIterableEquals(Sets.newHashSet(
new MarkerRange(cmp).from(sep2).to(nov2),
new MarkerRange(cmp).from(sep3).to(nov3),
new MarkerRange(cmp).from(sep4).to(nov4),
new MarkerRange(cmp).from(sep5).to(nov5)),
c.with("component", "com.company.Main", "com.company.SomeClass")
.toKeyRanges());
}
@Test
public void testOneSidedRange() {
// Thu Sep 12 14:15:47 PDT 2013
Constraints c = emptyConstraints.partitionedBy(time)
.from("timestamp", 1379020547042L);
Marker sep = new Marker.Builder()
.add("year", 2013).add("month", 9).add("day", 12).build();
assertIterableEquals(Sets.newHashSet(new MarkerRange(timeCmp).from(sep)),
c.toKeyRanges());
}
public static <T> void assertIterableEquals(
Iterable<T> expected, Iterable<T> actualIterable) {
Set<T> expectedSet = Sets.newHashSet(expected);
for (T actual : actualIterable) {
// need to check as iteration happens because the object is reused
Assert.assertTrue("Unexpected record: " + actual + " expected: " + expectedSet,
expectedSet.remove(actual));
}
Assert.assertEquals("Not all expected records were present: " + expectedSet,
0, expectedSet.size());
}
}