/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.mapred.local;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Before;
import org.junit.Test;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.mapred.JobClient;
import com.aliyun.odps.mapred.MapperBase;
import com.aliyun.odps.mapred.ReducerBase;
import com.aliyun.odps.mapred.conf.JobConf;
import com.aliyun.odps.mapred.local.utils.TestUtils;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;
/**
* Join, mr_join_src1/mr_join_src2(key bigint, value string), mr_join_out(key
* bigint, value1 string, value2 string)
*
* @author mingdi
*/
public class JoinRight {
public static final Log LOG = LogFactory.getLog(JoinRight.class);
@Before
public void setUp() throws Exception {
TestUtils.setEnvironment(TestUtils.odps_test_mrtask);
}
public static class JoinMapper extends MapperBase {
private Record mapkey;
private Record mapvalue;
@Override
public void setup(TaskContext context) throws IOException {
mapkey = context.createMapOutputKeyRecord();
mapvalue = context.createMapOutputValueRecord();
}
@Override
public void map(long key, Record record, TaskContext context)
throws IOException {
long tag = 1;
String val = record.get(1).toString();
if (val.startsWith("valb_")) {
tag = 2;
}
mapkey.set(0, record.get(0));
mapkey.set(1, tag);
mapvalue.set(0, tag);
for (int i = 1; i < record.getColumnCount(); i++) {
mapvalue.set(i, record.get(i));
}
context.write(mapkey, mapvalue);
}
}
public static class JoinReducer extends ReducerBase {
private Record result = null;
@Override
public void setup(TaskContext context) throws IOException {
result = context.createOutputRecord();
}
@Override
public void reduce(Record key, Iterator<Record> values, TaskContext context)
throws IOException {
long k = (Long) key.get(0);
List<Object[]> list1 = new ArrayList<Object[]>();
boolean flag = false;
Record value = null;
while (values.hasNext()) {
value = values.next();
long tag = (Long) value.get(0);
if (tag == 1) {
list1.add(value.toArray().clone());
flag = true;
} else {
if (!flag) {
int index = 0;
result.set(index++, k);
for (int i = 1; i < value.getColumnCount(); i++) {
result.set(index++, null);
}
for (int i = 1; i < value.getColumnCount(); i++) {
result.set(index++, value.get(i));
}
context.write(result);
} else {
for (Object[] data1 : list1) {
int index = 0;
result.set(index++, k);
for (int i = 1; i < data1.length; i++) {
result.set(index++, data1[i]);
}
for (int i = 1; i < value.getColumnCount(); i++) {
result.set(index++, value.get(i));
}
context.write(result);
}
}
}
}
}
}
@Test
public void test() throws Exception {
String[] args = new String[3];
args[0] = "join_in1";
args[1] = "join_in2";
args[2] = "join_out_right";
if (args.length != 3) {
System.err.println("Usage: Join <input table1> <input table2> <out>");
System.exit(2);
}
JobConf job = new JobConf();
job.setMapperClass(JoinMapper.class);
job.setReducerClass(JoinReducer.class);
job.setMapOutputKeySchema(SchemaUtils.fromString("key:bigint,tag:bigint"));
job.setMapOutputValueSchema(SchemaUtils
.fromString("tagx:bigint,value:string"));
job.setPartitionColumns(new String[]{"key"});
job.setOutputKeySortColumns(new String[]{"key", "tag"});
job.setOutputGroupingColumns(new String[]{"key"});
job.set("tablename2", args[1]);
job.setNumReduceTasks(1);
job.setInt("table.counter", 0);
InputUtils.addTable(TableInfo.builder().tableName(args[0]).build(), job);
InputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), job);
OutputUtils.addTable(TableInfo.builder().tableName(args[2]).build(), job);
JobClient.runJob(job);
}
}