/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.testlib; import java.io.IOException; import com.google.common.base.Preconditions; import org.apache.hadoop.io.NullWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kiji.mapreduce.KijiTableContext; import org.kiji.mapreduce.framework.HFileKeyValue; import org.kiji.mapreduce.gather.GathererContext; import org.kiji.mapreduce.gather.KijiGatherer; import org.kiji.mapreduce.gather.impl.InternalGathererContext; import org.kiji.mapreduce.impl.KijiTableContextFactory; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiRowData; /** * Example of a «table mapper» implemented as a gatherer that output to a Kiji table. * * <p> A table mapper reads from a Kiji table and writes to another Kiji table * (or possible the same). * * <p> This mapper expects an input table with the layout specified in * src/test/resources/org/kiji/mapreduce/layout/test.json, * reads basic users info (info:first_name, info:last_name and info:zip_code), * and writes rows whose ID are the zip codes. * It writes "first_name last_name" at column "primitives:string". * * Most of the boilerplate could be avoided :( */ public class SimpleTableMapperAsGatherer extends KijiGatherer<HFileKeyValue, NullWritable> { private static final Logger LOG = LoggerFactory.getLogger(SimpleTableMapperAsGatherer.class); /** {@inheritDoc} */ @Override public Class<?> getOutputKeyClass() { return HFileKeyValue.class; } /** {@inheritDoc} */ @Override public Class<?> getOutputValueClass() { return NullWritable.class; } /** {@inheritDoc} */ @Override public KijiDataRequest getDataRequest() { return KijiDataRequest.create("info"); } private KijiTableContext mTableContext = null; /** {@inheritDoc} */ @Override public void setup(GathererContext<HFileKeyValue, NullWritable> context) throws IOException { Preconditions.checkState(mTableContext == null); super.setup(context); mTableContext = KijiTableContextFactory.create(((InternalGathererContext)context).getMapReduceContext()); } /** {@inheritDoc} */ @Override public void gather(KijiRowData input, GathererContext<HFileKeyValue, NullWritable> unused) throws IOException { Preconditions.checkState(mTableContext != null); final String firstName = input.getMostRecentValue("info", "first_name").toString(); final String lastName = input.getMostRecentValue("info", "last_name").toString(); final Integer zipCode = input.getMostRecentValue("info", "zip_code"); LOG.info(String.format("Processing row: %s %s %d", firstName, lastName, zipCode)); // Note: this is actually dangerous, // to accumulating several persons with the same zip-code, // we must ensure different timestamps. mTableContext.put( mTableContext.getEntityId(zipCode.toString()), "primitives", "string", System.currentTimeMillis(), String.format("%s %s", firstName, lastName)); LOG.info(String.format("Processed row: %s %s %d", firstName, lastName, zipCode)); } /** {@inheritDoc} */ @Override public void cleanup(GathererContext<HFileKeyValue, NullWritable> context) throws IOException { Preconditions.checkState(mTableContext != null); mTableContext.close(); mTableContext = null; super.cleanup(context); } }