/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi; import com.google.common.base.Function; import com.google.common.base.Objects; import com.google.common.base.Predicate; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.DiscreteDomains; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import java.util.Arrays; import java.util.Calendar; import java.util.Iterator; import java.util.List; import java.util.Map; import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; import org.kitesdk.data.PartitionStrategy; import org.kitesdk.data.impl.Accessor; import org.kitesdk.data.spi.partition.CalendarFieldPartitioner; import org.kitesdk.data.spi.predicates.Exists; import org.kitesdk.data.spi.predicates.In; import org.kitesdk.data.spi.predicates.Predicates; import org.kitesdk.data.spi.predicates.Range; import org.kitesdk.data.spi.predicates.Ranges; @Immutable public class TimeDomain { private static final List<Integer> order = Lists.newArrayList( Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH, Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND ); private static final LoadingCache<Pair<PartitionStrategy, String>, TimeDomain> domains = CacheBuilder.newBuilder().build( new CacheLoader<Pair<PartitionStrategy, String>, TimeDomain>() { @Override public TimeDomain load(Pair<PartitionStrategy, String> entry) { return new TimeDomain(entry.first(), entry.second()); } }); public static TimeDomain get(PartitionStrategy strategy, String source) { return domains.getUnchecked(Pair.of(strategy, source)); } // the calendar field partitioners from the strategy, in the correct order private final List<CalendarFieldPartitioner> partitioners; public TimeDomain(PartitionStrategy strategy, String sourceName) { Map<Integer, CalendarFieldPartitioner> mapping = Maps.newHashMap(); for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { // there may be partitioners for more than one source field if (sourceName.equals(fp.getSourceName()) && fp instanceof CalendarFieldPartitioner) { mapping.put( ((CalendarFieldPartitioner) fp).getCalendarField(), (CalendarFieldPartitioner) fp); } } // get the partitioners to check for this strategy this.partitioners = Lists.newArrayList(); for (int field : order) { // if there is no partition for the next field, then all are included // example: yyyy/mm/dd partitioning accepts when field is hour if (mapping.containsKey(field)) { partitioners.add(mapping.get(field)); } else if (!partitioners.isEmpty()) { break; } } } public Predicate<Marker> project(Predicate<Long> predicate) { if (predicate instanceof In) { return new TimeSetPredicate((In<Long>) predicate); } else if (predicate instanceof Range) { return new TimeRangePredicate((Range<Long>) predicate); } else { return null; } } public Predicate<Marker> projectStrict(Predicate<Long> predicate) { if (predicate instanceof Exists) { return Predicates.exists(); } else if (predicate instanceof In) { return null; } else if (predicate instanceof Range) { return new TimeRangeStrictPredicate((Range<Long>) predicate); } else { return null; } } private class TimeSetPredicate implements Predicate<Marker> { private final In<List<Integer>> times; private TimeSetPredicate(In<Long> times) { this.times = times.transform(new Function<Long, List<Integer>>() { @Override public List<Integer> apply(@Nullable Long timestamp) { List<Integer> time = Lists .newArrayListWithExpectedSize(partitioners.size()); for (CalendarFieldPartitioner fp : partitioners) { time.add(fp.apply(timestamp)); } return time; } }); } @Override public boolean apply(@Nullable Marker key) { List<Integer> time = Lists .newArrayListWithExpectedSize(partitioners.size()); for (CalendarFieldPartitioner fp : partitioners) { time.add((Integer) key.get(fp.getName())); } //if contains null is partial so allow it for now... return time.contains(null) ? true : times.apply(time); } @Override public String toString() { return Objects.toStringHelper(this).add("in", times).toString(); } } /** * Predicate that accepts a {@link StorageKey} if could include an entity * that would be accepted by the original time range. */ private class TimeRangePredicate extends TimeRangePredicateImpl { private TimeRangePredicate(Range<Long> timeRange) { // adjust the range end-points if exclusive to avoid extra partitions super(timeRange, true /* accept end-points */ ); } } /** * Predicate that accepts a {@link StorageKey} only if entities it includes * must be accepted by the original time range. */ private class TimeRangeStrictPredicate extends TimeRangePredicateImpl { private TimeRangeStrictPredicate(Range<Long> timeRange) { super(timeRange, false /* exclude end-points */ ); } } /** * A common implementation class for time-based range predicates. */ private class TimeRangePredicateImpl implements Predicate<Marker> { private final Range<Long> range; private final String[] names; private final int[] lower; private final int[] upper; private final boolean acceptEqual; private TimeRangePredicateImpl(Range<Long> timeRange, boolean acceptEqual) { this.range = Ranges.adjustClosed(timeRange, DiscreteDomains.longs()); this.acceptEqual = acceptEqual; int length = partitioners.size(); this.names = new String[length]; for (int i = 0; i < length; i += 1) { names[i] = partitioners.get(i).getName(); } if (range.hasLowerBound()) { long start = range.lowerEndpoint() - (acceptEqual ? 0 : 1); this.lower = new int[length]; for (int i = 0; i < length; i += 1) { lower[i] = partitioners.get(i).apply(start); } } else { this.lower = new int[0]; } if (range.hasUpperBound()) { long stop = range.upperEndpoint() + (acceptEqual ? 0 : 1); this.upper = new int[length]; for (int i = 0; i < length; i += 1) { upper[i] = partitioners.get(i).apply(stop); } } else { this.upper = new int[0]; } } @Override public boolean apply(@Nullable Marker key) { if (key == null) { return false; } boolean returnVal = true; // no bounds => accept if (lower.length > 0) { returnVal = checkLower(key); } if (returnVal && upper.length > 0) { returnVal = checkUpper(key); } return returnVal; } private boolean checkLower(Marker key) { for (int i = 0; i < names.length; i += 1) { Object markerValue = key.get(names[i]); if(markerValue != null) { int value = (Integer) markerValue; if (value < lower[i]) { // strictly within range, so all other levels must be // example: 2013-4-10 to 2013-10-4 => 4 < month < 10 => accept return false; } else if (value > lower[i]) { // falls out of the range at this level return true; } }else { //returning true because at this point no value is known so nothing to disqualify on return true; } // value was equal to one endpoint, continue checking } // each position was satisfied, defer to acceptEqual return acceptEqual; } private boolean checkUpper(Marker key) { // same as checkLower, see comments there for (int i = 0; i < names.length; i += 1) { Object markerValue = key.get(names[i]); if(markerValue != null) { int value = (Integer) markerValue; if (value > upper[i]) { return false; } else if (value < upper[i]) { return true; } }else{ //returning true because at this point no value is known so nothing to disqualify on return true; } } return acceptEqual; } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null || !(obj instanceof TimeRangePredicateImpl)) { return false; } TimeRangePredicateImpl that = (TimeRangePredicateImpl) obj; if (!range.equals(that.range)) { return false; } // both permissive or both strict if (this.acceptEqual == that.acceptEqual) { return true; } // one is strict and the other permissive. the only time the two agree is // when the range aligns with a boundary in the partitions. we can detect // a boundary when the values differ for the same range. if (lower.length > 0) { boolean differ = false; for (int i = 0; i < lower.length; i += 1) { if (lower[i] != that.lower[i]) { differ = true; break; } } if (!differ) { return false; } } if (upper.length > 0) { boolean differ = false; for (int i = 0; i < upper.length; i += 1) { if (upper[i] != that.upper[i]) { differ = true; break; } } if (!differ) { return false; } } return true; } @Override public int hashCode() { return Objects.hashCode(range, acceptEqual); } @Override public String toString() { Objects.ToStringHelper helper = Objects.toStringHelper(this); if (lower.length > 0) { helper.add("lower", Arrays.toString(lower)); } if (upper.length > 0) { helper.add("upper", Arrays.toString(upper)); } return helper.toString(); } } @SuppressWarnings("unchecked") Iterator<MarkerRange.Builder> addStackedIterator( Predicate<Long> timePredicate, Iterator<MarkerRange.Builder> inner) { if (timePredicate instanceof In) { // normal group handling is sufficient for a set of specific times // instantiate directly because the add method projects the predicate return new KeyRangeIterable.SetGroupIterator( (In) timePredicate, (List) partitioners, inner); } else if (timePredicate instanceof Range) { return new TimeRangeIterator( (Range<Long>) timePredicate, partitioners, inner); } return null; } private static class TimeRangeIterator extends KeyRangeIterable.StackedIterator<Range<Long>, MarkerRange.Builder> { private final List<CalendarFieldPartitioner> fields; private TimeRangeIterator(Range<Long> timeRange, List<CalendarFieldPartitioner> fps, Iterator<MarkerRange.Builder> inner) { this.fields = fps; setItem(timeRange); setInner(inner); } @Override public MarkerRange.Builder update( MarkerRange.Builder current, Range<Long> range) { // FIXME: this assumes all of the partition fields are in order // This should identify out-of-order fields and alter the range for (CalendarFieldPartitioner cfp : fields) { boolean hasLower = range.hasLowerBound(); boolean hasUpper = range.hasUpperBound(); if (hasLower) { current.addToStart(cfp.getName(), cfp.apply(range.lowerEndpoint())); } if (hasUpper) { current.addToEnd(cfp.getName(), cfp.apply(range.upperEndpoint())); } } return current; } } }