TableMRSample2.java example

Explorer
spork-streaming-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.zebra.mapreduce;

import java.io.IOException;
import java.util.List;
import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.zebra.mapreduce.BasicTableOutputFormat;
import org.apache.hadoop.zebra.mapreduce.TableInputFormat;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.schema.Schema;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.data.Tuple;

/**
 * This is a sample to show using zebra table to do a simple basic union in
 * map/reduce * To run this, we need have two basic tables ready. They contain
 * the data as in Sample 1, i.e., (word, count). In this example, they are at:
 * /homes/chaow/mapredu/t1 /homes/chaow/mapredu/t2 The resulting table is put
 * at: /homes/chaow/mapredu2/t1
 * 
 */
public class TableMRSample2 {
	static class MapClass extends
	Mapper<BytesWritable, Tuple, BytesWritable, Tuple> {
		private BytesWritable bytesKey;
		private Tuple tupleRow;

		@Override
		public void map(BytesWritable key, Tuple value, Context context)
		throws IOException, InterruptedException {
			System.out.println(key.toString() + value.toString());
			context.write(key, value);
		}

		@Override
		public void setup(Context context) {
			bytesKey = new BytesWritable();
			try {
				Schema outSchema = BasicTableOutputFormat.getSchema(context);
				tupleRow = TypesUtils.createTuple(outSchema);
			} catch (IOException e) {
				throw new RuntimeException(e);
			} catch (ParseException e) {
				throw new RuntimeException(e);
			}
		}

		public static void main(String[] args) throws ParseException, IOException, 
		InterruptedException, ClassNotFoundException {
			Job job = new Job();
			job.setJobName("tableMRSample");
			Configuration conf = job.getConfiguration();
			conf.set("table.output.tfile.compression", "gz");

			// input settings
			job.setInputFormatClass(TableInputFormat.class);
			job.setOutputFormatClass(BasicTableOutputFormat.class);
			job.setMapperClass(TableMRSample2.MapClass.class);

			List<Path> paths = new ArrayList<Path>(2);
			Path p = new Path("/homes/chaow/mapredu/t1");
			System.out.println("path = " + p);
			paths.add(p);
			p = new Path("/homes/chaow/mapredu/t2");
			paths.add(p);

			TableInputFormat.setInputPaths(job, paths.toArray(new Path[2]));
			TableInputFormat.setProjection(job, "word");
			BasicTableOutputFormat.setOutputPath(job, new Path(
			"/homes/chaow/mapredu2/t1"));

			BasicTableOutputFormat.setSchema(job, "word:string");
			BasicTableOutputFormat.setStorageHint(job, "[word]");

			// set map-only job.
			job.setNumReduceTasks(0);
			// TODO: need to find a replacement
			//job.setNumMapTasks(2);
			job.submit();
		}
	}
}