/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi.partition; import com.google.common.base.Predicate; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import java.text.SimpleDateFormat; import java.util.List; import java.util.Set; import java.util.TimeZone; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.junit.Assert; import org.junit.Ignore; import org.junit.Test; import org.kitesdk.data.TestHelpers; import org.kitesdk.data.spi.FieldPartitioner; import org.kitesdk.data.spi.predicates.In; import org.kitesdk.data.spi.predicates.Predicates; import org.kitesdk.data.spi.predicates.Range; import org.kitesdk.data.spi.predicates.Ranges; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TestPartitionerProjection { private static final Logger LOG = LoggerFactory .getLogger(TestPartitionerProjection.class); public long sepInstant = 1379020547042l; // Thu Sep 12 14:15:47 PDT 2013 public long octInstant = 1381612547042l; // Sat Oct 12 14:15:47 PDT 2013 public long novInstant = 1384204547042l; // Mon Nov 11 13:15:47 PST 2013 public static final long ONE_DAY_MILLIS = 86400000; // 24 * 60 * 60 * 1000 public static final long ONE_YEAR_MILLIS = ONE_DAY_MILLIS * 365; @Test public void testDateFormatPartitionerRangePredicate() { FieldPartitioner<Long, String> fp = new DateFormatPartitioner("timestamp", "date", "yyyy-MM-dd"); Predicate<String> projected = fp.project( Ranges.open(octInstant, octInstant + ONE_DAY_MILLIS)); Assert.assertEquals(Ranges.closed("2013-10-12", "2013-10-13"), projected); } @Test @Ignore // Not yet implemented public void testDateFormatPartitionerProjectStrict() { FieldPartitioner<Long, String> fp = new DateFormatPartitioner("timestamp", "date", "yyyy-MM-dd"); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); format.setTimeZone(TimeZone.getTimeZone("UTC")); Predicate<String> projected = fp.projectStrict( Ranges.open(sepInstant, novInstant)); Assert.assertEquals(Ranges.closed("2013-09-13", "2013-11-10"), projected); } @Test public void testDateFormatPartitionerSetPredicate() { FieldPartitioner<Long, String> fp = new DateFormatPartitioner("timestamp", "date", "yyyy-MM-dd"); Assert.assertEquals( Predicates.in("2013-09-12", "2013-10-12", "2013-11-11"), fp.project(Predicates.in(sepInstant, octInstant, novInstant))); Assert.assertNull(fp.projectStrict( Predicates.in(sepInstant, octInstant, novInstant))); } @Test public void testYearFieldPartitionerRangePredicate() { FieldPartitioner<Long, Integer> fp = new YearFieldPartitioner("timestamp", "year"); // Range within a year Assert.assertEquals(Ranges.singleton(2013), fp.project(Ranges.open(sepInstant, novInstant))); Assert.assertNull("No year value definitely satisfies original predicate", fp.projectStrict(Ranges.open(sepInstant, novInstant))); // Range spanning a year Assert.assertEquals(Ranges.closed(2012, 2013), fp.project( Ranges.open(sepInstant - ONE_YEAR_MILLIS, novInstant))); Assert.assertNull("No year value definitely satisfies original predicate", fp.projectStrict(Ranges.open( sepInstant - ONE_YEAR_MILLIS, novInstant))); // Range spanning two years Assert.assertEquals(Ranges.closed(2012, 2014), fp.project(Ranges.open( sepInstant - ONE_YEAR_MILLIS, novInstant + ONE_YEAR_MILLIS))); Assert.assertEquals(Ranges.singleton(2013), fp.projectStrict(Ranges.open( sepInstant - ONE_YEAR_MILLIS, novInstant + ONE_YEAR_MILLIS))); // open ended ranges Assert.assertEquals(Ranges.atLeast(2013), fp.project(Ranges.greaterThan(sepInstant))); Assert.assertEquals(Ranges.atLeast(2014), fp.projectStrict(Ranges.greaterThan(sepInstant))); Assert.assertEquals(Ranges.atMost(2013), fp.project(Ranges.atMost(sepInstant))); Assert.assertEquals(Ranges.atMost(2012), fp.projectStrict(Ranges.atMost(sepInstant))); // edge cases long first2013 = new DateTime(2013, 1, 1, 0, 0, DateTimeZone.UTC) .getMillis(); long last2012 = first2013 - 1; Assert.assertEquals(Ranges.atMost(2012), fp.projectStrict(Ranges.atMost(last2012))); Assert.assertEquals(Ranges.atMost(2012), fp.projectStrict(Ranges.lessThan(first2013))); Assert.assertEquals(Ranges.atLeast(2013), fp.projectStrict(Ranges.atLeast(first2013))); Assert.assertEquals(Ranges.atLeast(2013), fp.projectStrict(Ranges.greaterThan(last2012))); } @Test public void testYearFieldPartitionerSetPredicate() { FieldPartitioner<Long, Integer> fp = new YearFieldPartitioner("timestamp", "year"); // A single year Assert.assertEquals(Predicates.in(2013), fp.project(Predicates.in(sepInstant, octInstant))); Assert.assertNull(fp.projectStrict(Predicates.in(sepInstant))); // Multiple years Assert.assertEquals(Predicates.in(2012, 2013), fp.project(Predicates.in(sepInstant - ONE_YEAR_MILLIS, octInstant))); Assert.assertNull(fp.projectStrict( Predicates.in(sepInstant - ONE_YEAR_MILLIS, octInstant))); } @Test public void testCalendarFieldPartitioners() { List<CalendarFieldPartitioner> fps = Lists.newArrayList( new MonthFieldPartitioner("timestamp", "month"), new DayOfMonthFieldPartitioner("timestamp", "day"), new HourFieldPartitioner("timestamp", "hour"), new MinuteFieldPartitioner("timestamp", "min")); // none of these fields can produce a valid predicate independently for (CalendarFieldPartitioner fp : fps) { Assert.assertNull(fp.project(Predicates.in(octInstant))); Assert.assertNull(fp.projectStrict(Predicates.in(octInstant))); Assert.assertNull(fp.project(Ranges.greaterThan(sepInstant))); Assert.assertNull(fp.projectStrict(Ranges.open(octInstant, novInstant))); } } @Test @SuppressWarnings("unchecked") public void testHashFieldPartitionerRangePredicate() { FieldPartitioner<Object, Integer> fp = new HashFieldPartitioner("name", 50); // cannot enumerate all inputs, so we can't calculate the set of potential // hash values other than all hash values mod the number of buckets Assert.assertNull(fp.project((Predicate)Ranges.open("a", "b"))); Assert.assertNull(fp.projectStrict((Predicate) Ranges.open("a", "b"))); } @Test @SuppressWarnings("unchecked") public void testHashFieldPartitionerSetPredicate() { FieldPartitioner<Object, Integer> fp = new HashFieldPartitioner("name", 50); Assert.assertEquals(Predicates.in(fp.apply("a"), fp.apply("b")), fp.project((Predicate)Predicates.in("a", "b"))); // the set of inputs that result in a particular value is not closed Assert.assertNull(fp.projectStrict((Predicate) Predicates.in("a"))); } @Test public void testIdentityFieldPartitionerRangePredicate() { FieldPartitioner<String, String> fp = new IdentityFieldPartitioner<String>("str", "str_copy", String.class, 50); Range<String> r = Ranges.openClosed("a", "b"); Assert.assertEquals(r, fp.project(r)); Assert.assertEquals(r, fp.projectStrict(r)); } @Test public void testIdentityFieldPartitionerSetPredicate() { FieldPartitioner<String, String> fp = new IdentityFieldPartitioner<String>("str", "str_copy", String.class, 50); In<String> s = Predicates.in("a", "b"); Assert.assertEquals(s, fp.project(s)); Assert.assertEquals(s, fp.projectStrict(s)); } @Test public void testIntRangeFieldPartitionerRangePredicate() { final FieldPartitioner<Integer, Integer> fp = new IntRangeFieldPartitioner("num", 5, 10, 15, 20); Assert.assertEquals(Ranges.closed(1, 2), fp.project(Ranges.open(5, 15))); Assert.assertEquals(Ranges.closed(0, 2), fp.project(Ranges.open(4, 15))); // even though 21 is above the last bound, the range is valid if open Assert.assertEquals(Ranges.closed(0, 3), fp.project(Ranges.open(4, 21))); TestHelpers.assertThrows("Should not project an invalid range", IllegalArgumentException.class, new Runnable() { @Override public void run() { fp.project(Ranges.openClosed(5, 21)); } }); Assert.assertEquals(Ranges.closed(1, 2), fp.projectStrict(Ranges.open(5, 15))); Assert.assertEquals(Ranges.singleton(1), fp.projectStrict(Ranges.open(5, 14))); Assert.assertEquals(Ranges.atMost(2), fp.projectStrict(Ranges.atMost(15))); Assert.assertEquals(Ranges.atMost(3), fp.projectStrict(Ranges.lessThan(21))); Assert.assertNull(fp.projectStrict(Ranges.closed(15, 16))); // unbounded range is no problem, although accepted values would be // rejected if partitioned Assert.assertEquals(Ranges.atLeast(3), fp.projectStrict(Ranges.atLeast(14))); TestHelpers.assertThrows("Should not project an invalid range", IllegalArgumentException.class, new Runnable() { @Override public void run() { fp.projectStrict(Ranges.openClosed(5, 21)); } }); } @Test public void testIntRangeFieldPartitionerSetPredicate() { final FieldPartitioner<Integer, Integer> fp = new IntRangeFieldPartitioner("num", 5, 10, 15, 20); Assert.assertEquals(Predicates.in(1, 3), fp.project(Predicates.in(6, 7, 16, 17))); TestHelpers.assertThrows("Should not project invalid set", IllegalArgumentException.class, new Runnable() { @Override public void run() { fp.project(Predicates.in(21)); } }); // null if no full range is included Assert.assertNull(fp.projectStrict(Predicates.in(6, 7, 16, 17))); Assert.assertEquals(Predicates.in(1), fp.projectStrict(Predicates.in(6, 7, 8, 9, 10, 16, 17))); Assert.assertEquals(Predicates.in(1, 3), fp.projectStrict(Predicates.in( 5, 6, 7, 8, 9, 10, 16, 17, 18, 19, 20))); // doesn't complain about values that are too large Assert.assertEquals(Predicates.in(1), fp.projectStrict(Predicates.in(6, 7, 8, 9, 10, 16, 17, 22))); } @Test public void testLongFixedSizeRangeFieldPartitionerRangePredicate() { final FieldPartitioner<Long, Long> fp = new LongFixedSizeRangeFieldPartitioner("num", 5); Assert.assertEquals(Ranges.closed(-5L, 10L), fp.project(Ranges.open(-2L, 15L))); Assert.assertEquals(Ranges.closed(5L, 10L), fp.project(Ranges.open(5L, 15L))); Assert.assertEquals(Ranges.closed(5L, 10L), fp.project(Ranges.open(4L, 15L))); Assert.assertEquals(Ranges.closed(0L, 15L), fp.project(Ranges.closed(4L, 15L))); Assert.assertEquals(Ranges.closed(5L, 20L), fp.project(Ranges.openClosed(5L, 21L))); Assert.assertEquals(Ranges.atMost(15L), fp.project(Ranges.atMost(15L))); Assert.assertEquals(Ranges.atMost(20L), fp.project(Ranges.lessThan(21L))); Assert.assertEquals(Ranges.atLeast(10L), fp.project(Ranges.atLeast(14L))); Assert.assertEquals(Ranges.singleton(10L), fp.projectStrict(Ranges.open(5L, 15L))); Assert.assertNull(fp.projectStrict(Ranges.open(5L, 14L))); Assert.assertEquals(Ranges.atMost(10L), fp.projectStrict(Ranges.atMost(15L))); Assert.assertEquals(Ranges.atMost(15L), fp.projectStrict(Ranges.lessThan(21L))); Assert.assertEquals(Ranges.atLeast(15L), fp.projectStrict(Ranges.atLeast(14L))); } @Test public void testLongFixedSizeRangeFieldPartitionerSetPredicate() { final FieldPartitioner<Long, Long> fp = new LongFixedSizeRangeFieldPartitioner("num", 5); Assert.assertEquals(Predicates.in(5L, 15L), fp.project(Predicates.in(5L, 6L, 15L, 16L))); // null if no full range is included Assert.assertNull(fp.projectStrict(Predicates.in(5L, 6L, 15L, 16L))); Assert.assertEquals(Predicates.in(5L), fp.projectStrict(Predicates.in(5L, 6L, 7L, 8L, 9L, 15L, 16L))); Assert.assertEquals(Predicates.in(5L, 15L), fp.projectStrict(Predicates.in( 4L, 5L, 6L, 7L, 8L, 9L, 10L, 15L, 16L, 17L, 18L, 19L, 20L))); } @Test public void testRangeFieldPartitionerRangePredicate() { final FieldPartitioner<String, String> fp = new RangeFieldPartitioner("str", "str_bound", new String[]{"a", "b", "c"}); // projected to sets because the range ["a", "b"] includes "aa", etc. Assert.assertEquals(Predicates.in("a"), fp.project(Ranges.atMost("a"))); Assert.assertEquals(Predicates.in("a", "b"), fp.project(Ranges.closedOpen("a", "b"))); Assert.assertEquals(Predicates.in("a", "b"), fp.project(Ranges.closedOpen("a", "aa"))); Assert.assertEquals(Predicates.in("a", "b", "c"), fp.project(Ranges.closedOpen("a", "ba"))); Assert.assertEquals(Predicates.in("a", "b", "c"), fp.project(Ranges.closedOpen("0", "c"))); Assert.assertEquals(Predicates.in("c"), fp.project(Ranges.atLeast("c"))); TestHelpers.assertThrows("Cannot project endpoint outside of bounds", IllegalArgumentException.class, new Runnable() { @Override public void run() { fp.project(Ranges.atMost("cc")); } }); Assert.assertNull(fp.projectStrict(Ranges.lessThan("a"))); Assert.assertEquals(Predicates.in("a"), fp.projectStrict(Ranges.atMost("a"))); Assert.assertEquals(Predicates.in("a"), fp.projectStrict(Ranges.lessThan("b"))); Assert.assertEquals(Predicates.in("a", "b"), fp.projectStrict(Ranges.atMost("b"))); Assert.assertEquals(Predicates.in("c"), fp.projectStrict(Ranges.atLeast("b"))); Assert.assertEquals(Predicates.in("c"), fp.projectStrict(Ranges.greaterThan("b"))); Assert.assertEquals(Predicates.in("a"), fp.projectStrict(Ranges.atMost("ab"))); } @Test public void testRangeFieldPartitionerSetPredicate() { final FieldPartitioner<String, String> fp = new RangeFieldPartitioner("str", new String[]{"a", "b", "c"}); Assert.assertEquals(Predicates.in("a"), fp.project(Predicates.in("0"))); Assert.assertEquals(Predicates.in("a"), fp.project(Predicates.in("a"))); Assert.assertEquals(Predicates.in("b"), fp.project(Predicates.in("aa"))); Assert.assertEquals(Predicates.in("a", "b"), fp.project(Predicates.in("a", "aa", "b"))); TestHelpers.assertThrows("Cannot project endpoint outside of bounds", IllegalArgumentException.class, new Runnable() { @Override public void run() { fp.project(Predicates.in("cc")); } }); // cannot enumerate all of the potential input, so no satisfied projection Assert.assertNull(fp.projectStrict(Predicates.in("0"))); Assert.assertNull(fp.projectStrict(Predicates.in("a"))); Assert.assertNull(fp.projectStrict(Predicates.in("aa"))); Assert.assertNull(fp.projectStrict(Predicates.in("a", "aa", "b"))); Assert.assertNull(fp.projectStrict(Predicates.in("cc"))); } @Test public void testListFieldPartitionerRangePredicate() { List<Set<Integer>> sets = Lists.newArrayList(); sets.add(Sets.newHashSet(7, 14, 21, 28, 35, 42, 49)); sets.add(Sets.newHashSet(11, 22, 33, 44)); sets.add(Sets.newHashSet(13, 26, 39)); final FieldPartitioner<Integer, Integer> fp = new ListFieldPartitioner<Integer>("name", sets, Integer.class); // projection: any set that has one element matching the predicate Assert.assertEquals(Predicates.in(0, 1, 2), fp.project(Ranges.closed(0, 50))); Assert.assertEquals(Predicates.in(0, 1), fp.project(Ranges.closed(5, 12))); Assert.assertEquals(Predicates.in(1, 2), fp.project(Ranges.open(7, 14))); Assert.assertEquals(Predicates.in(0, 1), fp.project(Ranges.closedOpen(7, 13))); Assert.assertEquals(Predicates.in(1, 2), fp.project(Ranges.openClosed(7, 13))); Assert.assertEquals(Predicates.in(0), fp.project(Ranges.atMost(10))); Assert.assertEquals(Predicates.in(0, 1), fp.project(Ranges.atLeast(40))); Assert.assertEquals(Predicates.in(0), fp.project(Ranges.greaterThan(44))); Assert.assertEquals(Predicates.in(0, 1), fp.project(Ranges.lessThan(13))); Assert.assertEquals(null, fp.project(Ranges.lessThan(5))); // strict projection: any set for where all elements match the predicate Assert.assertEquals(Predicates.in(0, 1, 2), fp.projectStrict(Ranges.closed(0, 50))); Assert.assertEquals(Predicates.in(1, 2), fp.projectStrict(Ranges.closed(11, 44))); Assert.assertEquals(Predicates.in(2), fp.projectStrict(Ranges.open(11, 44))); Assert.assertEquals(Predicates.in(2), fp.projectStrict(Ranges.closedOpen(13, 44))); Assert.assertEquals(Predicates.in(2), fp.projectStrict(Ranges.openClosed(11, 39))); Assert.assertEquals(Predicates.in(1, 2), fp.projectStrict(Ranges.atMost(44))); Assert.assertEquals(Predicates.in(1, 2), fp.projectStrict(Ranges.atLeast(11))); Assert.assertEquals(Predicates.in(2), fp.projectStrict(Ranges.greaterThan(11))); Assert.assertEquals(Predicates.in(2), fp.projectStrict(Ranges.lessThan(44))); Assert.assertEquals(null, fp.projectStrict(Ranges.closed(20, 30))); } @Test public void testListFieldPartitionerSetPredicate() { List<Set<Integer>> sets = Lists.newArrayList(); sets.add(Sets.newHashSet(7, 14, 21, 28, 35, 42, 49)); sets.add(Sets.newHashSet(11, 22, 33, 44)); sets.add(Sets.newHashSet(13, 26, 39)); final FieldPartitioner<Integer, Integer> fp = new ListFieldPartitioner<Integer>("name", sets, Integer.class); TestHelpers.assertThrows("Values not in any set are invalid", IllegalArgumentException.class, new Runnable() { @Override public void run() { fp.project(Predicates.in(12)); } }); Assert.assertEquals(Predicates.in(0, 1, 2), fp.project(Predicates.in(7, 11, 13))); Assert.assertEquals(Predicates.in(0, 1, 2), fp.project(Predicates.in(7, 11, 22, 33, 44, 13))); Assert.assertEquals(Predicates.in(0, 2), fp.project(Predicates.in(7, 13))); Assert.assertEquals(Predicates.in(0), fp.project(Predicates.in(7))); Assert.assertEquals(Predicates.in(2), fp.project(Predicates.in(13))); Assert.assertEquals(null, fp.projectStrict(Predicates.in(7, 11, 13))); Assert.assertEquals(Predicates.in(1), fp.projectStrict(Predicates.in(7, 11, 22, 33, 44, 13))); } }