/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.zebra.mapreduce;
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.zebra.mapreduce.BasicTableOutputFormat;
import org.apache.hadoop.zebra.mapreduce.TableInputFormat;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.schema.Schema;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.data.Tuple;
/**
* This is a sample to show using zebra table to do a simple basic union in
* map/reduce * To run this, we need have two basic tables ready. They contain
* the data as in Sample 1, i.e., (word, count). In this example, they are at:
* /homes/chaow/mapredu/t1 /homes/chaow/mapredu/t2 The resulting table is put
* at: /homes/chaow/mapredu2/t1
*
*/
public class TableMRSample2 {
static class MapClass extends
Mapper<BytesWritable, Tuple, BytesWritable, Tuple> {
private BytesWritable bytesKey;
private Tuple tupleRow;
@Override
public void map(BytesWritable key, Tuple value, Context context)
throws IOException, InterruptedException {
System.out.println(key.toString() + value.toString());
context.write(key, value);
}
@Override
public void setup(Context context) {
bytesKey = new BytesWritable();
try {
Schema outSchema = BasicTableOutputFormat.getSchema(context);
tupleRow = TypesUtils.createTuple(outSchema);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) throws ParseException, IOException,
InterruptedException, ClassNotFoundException {
Job job = new Job();
job.setJobName("tableMRSample");
Configuration conf = job.getConfiguration();
conf.set("table.output.tfile.compression", "gz");
// input settings
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(BasicTableOutputFormat.class);
job.setMapperClass(TableMRSample2.MapClass.class);
List<Path> paths = new ArrayList<Path>(2);
Path p = new Path("/homes/chaow/mapredu/t1");
System.out.println("path = " + p);
paths.add(p);
p = new Path("/homes/chaow/mapredu/t2");
paths.add(p);
TableInputFormat.setInputPaths(job, paths.toArray(new Path[2]));
TableInputFormat.setProjection(job, "word");
BasicTableOutputFormat.setOutputPath(job, new Path(
"/homes/chaow/mapredu2/t1"));
BasicTableOutputFormat.setSchema(job, "word:string");
BasicTableOutputFormat.setStorageHint(job, "[word]");
// set map-only job.
job.setNumReduceTasks(0);
// TODO: need to find a replacement
//job.setNumMapTasks(2);
job.submit();
}
}
}