DynamoDBSplitter.java example

Explorer

Hadoop-DynamoDB-master
- src
  - main
    - java
      - com
        willetinc
        hadoop
        mapreduce
        dynamodb
        AbstractSplitter.java
        AttributeValueIOUtils.java
        BigDecimalSplitter.java
        BinarySplitter.java
        DefaultSplitter.java
        DynamoDBConfiguration.java
        DynamoDBOutputFormat.java
        DynamoDBQueryInputFormat.java
        DynamoDBQueryRecordReader.java
        DynamoDBRecordReader.java
        DynamoDBScanInputFormat.java
        DynamoDBScanRecordReader.java
        DynamoDBSplitter.java
        TextSplitter.java
        Types.java
        io
        AttributeValueWritable.java
        BSWritable.java
        BWritable.java
        DynamoDBItemWritable.java
        DynamoDBKeyWritable.java
        NSWritable.java
        NWritable.java
        SSWritable.java
        SWritable.java
  - test
    - java
      - com
        willetinc
        hadoop
        mapreduce
        dynamodb
        AbstractSplitterTest.java
        AttributeValueIOUtilsTest.java
        BigDecimalSplitterTest.java
        BinarySplitterTest.java
        CSRecord.java
        DynamoDBOutputFormatTest.java
        DynamoDBQueryInputFormatTest.java
        DynamoDBQueryRecordReaderTest.java
        DynamoDBScanRecordReaderTest.java
        TestSuite.java
        TextSplitterTest.java
        io
        AttributeValueWritableTest.java
        BSWritableTest.java
        BWritableTest.java
        IOTestSuite.java
        NSWritableTest.java
        NWritableTest.java
        SSWritableTest.java
        SWritableTest.java

/**
 * Copyright 2012 Willet Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.willetinc.hadoop.mapreduce.dynamodb;

import java.io.IOException;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;

/**
 * Splitter will generate InputSpits for use with DynamoDBQueryInputFormat.
 * DynamoDBQueryInputFormat needs to partition HashKey values and optionally
 * interpolate between two RangeKey values that represent the lowest and 
 * highest valued records to import. Depending on the data-type of the column,
 * this requires different behavior. DBSplitter implementations should perform 
 * this for a data type or family of data types.
 */
public interface DynamoDBSplitter {

	/**
	 * <p>Generates input splits across values in a DynamoDB table.</p>
	 * 
	 * <p>There are two ways we can split the input table:</p>
	 * <ol>
	 * 		<li>Across HashKey values (hashKey = value)</li>
	 * 		<li>Across HashKey values and within rangeKeyValues <br />
	 *    	(hashKey = value && (rangeKey BETWEEN value1 and value2))</li>
	 * </ol>
	 * 
	 * @param conf Hadoop configuration
	 * @return Generated InputSplits
	 * @throws IOException Error generating input splits.
	 */
	List<InputSplit> split(Configuration conf) throws IOException;
	
}