/* * Licensed to Think Big Analytics, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Think Big Analytics, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2010 Think Big Analytics. All Rights Reserved. */ package tap.core; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; import org.apache.avro.Schema; import org.apache.avro.mapred.*; import org.apache.hadoop.mapred.*; import tap.Phase; import tap.Pipe; import tap.core.io.BinaryKey; import tap.core.io.avro.BinaryKeyDatumWriter; import tap.core.io.avro.BinaryKeyEncoder; import tap.util.ObjectFactory; /** Base class for a combiner or a reducer */ @SuppressWarnings("deprecation") abstract class BaseAvroReducer<V, OUT, KO, VO> extends MapReduceBase implements Reducer<AvroKey<BinaryKey>, AvroValue<V>, KO, VO> { private TapReducerInterface<V, OUT> reducer; private AvroMultiCollector<OUT> collector; private ReduceIterable reduceIterable = new ReduceIterable(); private TapContext<OUT> context; protected boolean isPipeReducer = false; protected OUT out; protected Pipe<OUT> outpipe = null; protected abstract TapReducerInterface<V, OUT> getReducer(JobConf conf); protected abstract AvroMultiCollector<OUT> getCollector(OutputCollector<KO, VO> c, Reporter reporter); @SuppressWarnings({ "unchecked" }) @Override public void configure(JobConf conf) { this.reducer = getReducer(conf); try { this.out = (OUT) ObjectFactory.newInstance(Class.forName(conf.get(Phase.REDUCE_OUT_CLASS))); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } // Determine if we are using legacy reduce signature or newer Pipe based signature isPipeReducer = (null != conf.get(Phase.REDUCER_OUT_PIPE_CLASS)); if (isPipeReducer) { outpipe = new Pipe<OUT>(out); } if (null != reducer) { reducer.init(conf.get("mapred.output.dir")); } } class ReduceIterable implements Iterable<V>, Iterator<V> { private Iterator<AvroValue<V>> values; public boolean hasNext() { return values.hasNext(); } public V next() { return values.next().datum(); } public void remove() { throw new UnsupportedOperationException(); } public Iterator<V> iterator() { return this; } } class ReuseableByteArrayOutputStream extends ByteArrayOutputStream { public byte[] getBuffer() { return buf; } public int getCount() { return count; } } @SuppressWarnings("unchecked") @Override public final void reduce(AvroKey<BinaryKey> key, Iterator<AvroValue<V>> values, OutputCollector<KO, VO> collector, Reporter reporter) throws IOException { if (this.collector == null) { this.collector = getCollector(collector, reporter); } if (this.isPipeReducer) { // create an Iterator inPipe Pipe<V> inPipe = new Pipe<V>((Iterator<AvroValue<V>>)values); if (null == this.outpipe.getContext()) { this.outpipe.setContext(new TapContext<OUT>(this.collector, reporter)); } if(this.collector instanceof BinaryKeyAwareCollector) { ((BinaryKeyAwareCollector) this.collector).setCurrentKey(key.datum()); } reducer.reduce(inPipe, outpipe); } } @Override public void close() throws IOException { reducer.close(outpipe); } interface BinaryKeyAwareCollector { void setCurrentKey(BinaryKey key); } }