/** * Copyright 2012 Willet Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.willetinc.hadoop.mapreduce.dynamodb; import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; import com.amazonaws.services.dynamodb.model.AttributeValue; import com.amazonaws.services.dynamodb.model.ComparisonOperator; /** * Implements Splitter over DynamoDB Number datatype values. */ public class BigDecimalSplitter extends AbstractSplitter { private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class); private static final BigDecimal MIN_POSITIVE_VALUE = new BigDecimal( "0.0000000000000000000000000000000000001"); @Override void generateRangeKeySplits( Configuration conf, List<InputSplit> splits, Types hashKeyType, AttributeValue hashKeyValue, Types rangeKeyType, AttributeValue minRangeKeyValue, AttributeValue maxRangeKeyValue, int numRangeSplits) { BigDecimal numSplits = BigDecimal.valueOf(numRangeSplits); BigDecimal minVal = new BigDecimal(minRangeKeyValue.getN()); BigDecimal maxVal = new BigDecimal(maxRangeKeyValue.getN()); // Get all the split points together. List<BigDecimal> splitPoints = split(numSplits, minVal, maxVal); // Turn the split points into a set of intervals. BigDecimal start = splitPoints.get(0); for (int i = 1; i < splitPoints.size(); i++) { BigDecimal end = splitPoints.get(i); List<AttributeValue> rangeKeyValues = new ArrayList<AttributeValue>(); rangeKeyValues.add(new AttributeValue().withN(start.toString())); rangeKeyValues.add(new AttributeValue().withN(end.toString())); splits.add(new DynamoDBQueryInputFormat.DynamoDBQueryInputSplit( hashKeyType, hashKeyValue, rangeKeyType, rangeKeyValues, ComparisonOperator.BETWEEN)); // set start to end of last interval plus minimum positive value // in the case of DynamoDB Numbers it is 1.0^-38: // This is necessary to ensure we don't miss any values between // intervals. start = end.add(MIN_POSITIVE_VALUE); } } private static final BigDecimal MIN_INCREMENT = new BigDecimal( 10000 * Double.MIN_VALUE); /** * Divide numerator by denominator. If impossible in exact mode, use * rounding. */ protected static BigDecimal tryDivide( BigDecimal numerator, BigDecimal denominator) { try { return numerator.divide(denominator); } catch (ArithmeticException ae) { return numerator.divide(denominator, BigDecimal.ROUND_HALF_UP); } } /** * <p> * Returns a list of BigDecimals one element longer than the list of input * splits. This represents the boundaries between input splits. All splits * are open on the top end, except the last one. * </p> * * <p> * So the list [0, 5, 8, 12, 18] would represent splits capturing the * intervals: * </p> * * <p> * The smallest positive value supported by DynamoDB 'e' is used to separate * intervals * </p> * * <p> * e = 0.0000000000000000000000000000000000001 * </p> * * <p> * [0, 5] [5+e, 8] [8+e, 12] [12+e, 18] * </p> */ List<BigDecimal> split( BigDecimal numSplits, BigDecimal minVal, BigDecimal maxVal) { List<BigDecimal> splits = new ArrayList<BigDecimal>(); // Use numSplits as a hint. May need an extra task if the size doesn't // divide cleanly. BigDecimal splitSize = tryDivide(maxVal.subtract(minVal), (numSplits)); if (splitSize.compareTo(MIN_INCREMENT) < 0) { splitSize = MIN_INCREMENT; LOG.warn("Set BigDecimal splitSize to MIN_INCREMENT"); } BigDecimal curVal = minVal; while (curVal.compareTo(maxVal) <= 0) { splits.add(curVal); curVal = curVal.add(splitSize); } if (splits.get(splits.size() - 1).compareTo(maxVal) != 0 || splits.size() == 1) { // We didn't end on the maxVal. Add that to the end of the list. splits.add(maxVal); } return splits; } }