package org.apache.cassandra.hadoop;
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.SortedMap;
import org.apache.cassandra.thrift.InvalidRequestException;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.thrift.TException;
/**
* The <code>SampleColumnFamilyOutputTool</code> provides a tool interface which
* runs a {@link SampleColumnMapper} on the <key, value> pairs obtained
* from a sequence file, and then reduces it through the default
* {@link ColumnFamilyOutputReducer}.
*
* @author Karthick Sankarachary
*
*/
public class SampleColumnFamilyOutputTool extends Configured implements Tool
{
private Path inputdir;
public SampleColumnFamilyOutputTool(Path inputdir, String columnFamily)
{
this.inputdir = inputdir;
}
public int run(String[] args)
throws InvalidRequestException, TException, IOException, InterruptedException, ClassNotFoundException
{
Job job = new Job(new Configuration());
// In case your job runs out of memory, use this setting
// (provided you're on Hadoop 0.20.1 or later)
// job.getConfiguration().setInt(JobContext.IO_SORT_MB, 1);
ConfigHelper.setOutputColumnFamily(job.getConfiguration(),
ColumnFamilyOutputFormatTest.KEYSPACE,
ColumnFamilyOutputFormatTest.COLUMN_FAMILY);
SequenceFileInputFormat.addInputPath(job, inputdir);
job.setMapperClass(SampleColumnMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(ColumnWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
// TODO: no idea why this test is passing
job.setReducerClass(ColumnFamilyOutputReducer.class);
job.setOutputKeyClass(byte[].class);
job.setOutputValueClass(SortedMap.class);
job.setOutputFormatClass(ColumnFamilyOutputFormat.class);
job.waitForCompletion(true);
return 0;
}
}