/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.aliyun.odps.mapred.local;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.junit.Before;
import org.junit.Test;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.mapred.JobClient;
import com.aliyun.odps.mapred.MapperBase;
import com.aliyun.odps.mapred.ReducerBase;
import com.aliyun.odps.mapred.conf.JobConf;
import com.aliyun.odps.mapred.local.utils.TestUtils;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;
/**
* This is the trivial map/reduce program that does absolutely nothing other
* than use the framework to fragment and sort the input values.
* <p>
* To run: jar -libjars mapreduce-examples.jar -classpath
* clt/lib/mapreduce-examples.jar com.aliyun.odps.mapreduce.examples.Sort [-m
* <i>maps</i>] [-mapoutKey <i>output key class</i>] [-mapoutValue <i>output
* value class</i>] <i>in-tbl</i> <i>out-tbl</i>
*/
public class NullSort {
@Before
public void setUp() throws Exception {
TestUtils.setEnvironment(TestUtils.odps_test_mrtask);
}
static int printUsage() {
System.out.println("sort [-m <maps>] [-r <reduces>] "
+ "[-inFormat <input format class>] "
+ "[-outFormat <output format class>] "
+ "[-outKey <output key class>] " + "[-outValue <output value class>] "
+ "[-totalOrder <pcnt> <num samples> <max splits>] "
+ "<input> <output>");
return -1;
}
/**
* Implements the identity function, mapping record's first two columns to
* outputs.
*/
public static class IdentityMapper extends MapperBase {
private Record word;
private Record one;
@Override
public void setup(TaskContext context) throws IOException {
word = context.createMapOutputKeyRecord();
one = context.createMapOutputValueRecord();
}
@Override
public void map(long recordNum, Record record, TaskContext context)
throws IOException {
word.set(new Object[]{(Long) record.get(0)});
one.set(new Object[]{(Long) record.get(1)});
context.write(word, one);
}
}
public static class IdentityReducer extends ReducerBase {
private Record result = null;
@Override
public void setup(TaskContext context) throws IOException {
result = context.createOutputRecord();
}
/**
* Writes all keys and values directly to output.
*/
@Override
public void reduce(Record key, Iterator<Record> values, TaskContext context)
throws IOException {
result.set(0, key.get(0));
while (values.hasNext()) {
Record val = values.next();
result.set(1, val.get(0));
context.write(result);
}
}
}
/**
* The main driver for sort program. Invoke this method to submit the
* map/reduce job.
*
* @throws IOException
* When there is communication problems with the job tracker.
*/
@Test
public void test() throws Exception {
String[] args = new String[2];
args[0] = "nullsort_in";
args[1] = "nullsort_out";
JobConf jobConf = new JobConf();
jobConf.setMapperClass(IdentityMapper.class);
jobConf.setReducerClass(IdentityReducer.class);
// Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
// Class<? extends Writable> outputValueClass = BytesWritable.class;
List<String> otherArgs = new ArrayList<String>();
for (int i = 0; i < args.length; ++i) {
try {
if ("-mapoutKey".equals(args[i])) {
// outputKeyClass = Class.forName(args[++i]).asSubclass(
// WritableComparable.class);
++i;
} else if ("-mapoutValue".equals(args[i])) {
// outputValueClass = Class.forName(args[++i])
// .asSubclass(Writable.class);
++i;
} else {
otherArgs.add(args[i]);
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + args[i]);
printUsage();
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from "
+ args[i - 1]);
printUsage(); // exits
}
}
// Set user-supplied (possibly default) job configs
jobConf.setNumReduceTasks(1);
// jobConf.setMapOutputKeyClass(outputKeyClass);
// jobConf.setMapOutputValueClass(outputValueClass);
jobConf.setMapOutputKeySchema(SchemaUtils.fromString("key:bigint"));
jobConf.setMapOutputValueSchema(SchemaUtils.fromString("value:bigint"));
// Make sure there are exactly 2 parameters left.
if (otherArgs.size() != 2) {
System.out.println("ERROR: Wrong number of parameters: "
+ otherArgs.size() + " instead of 2.");
printUsage();
}
InputUtils.addTable(TableInfo.builder().tableName(args[0])
.build(), jobConf);
OutputUtils.addTable(TableInfo.builder().tableName(args[1]).build(),
jobConf);
Date startTime = new Date();
System.out.println("Job started: " + startTime);
JobClient.runJob(jobConf);
Date end_time = new Date();
System.out.println("Job ended: " + end_time);
System.out.println("The job took "
+ (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
}
}