/** * Copyright 2012 Willet Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.willetinc.hadoop.mapreduce.dynamodb; import java.io.IOException; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; /** * Splitter will generate InputSpits for use with DynamoDBQueryInputFormat. * DynamoDBQueryInputFormat needs to partition HashKey values and optionally * interpolate between two RangeKey values that represent the lowest and * highest valued records to import. Depending on the data-type of the column, * this requires different behavior. DBSplitter implementations should perform * this for a data type or family of data types. */ public interface DynamoDBSplitter { /** * <p>Generates input splits across values in a DynamoDB table.</p> * * <p>There are two ways we can split the input table:</p> * <ol> * <li>Across HashKey values (hashKey = value)</li> * <li>Across HashKey values and within rangeKeyValues <br /> * (hashKey = value && (rangeKey BETWEEN value1 and value2))</li> * </ol> * * @param conf Hadoop configuration * @return Generated InputSplits * @throws IOException Error generating input splits. */ List<InputSplit> split(Configuration conf) throws IOException; }