/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.streaming; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Iterator; import java.net.URLDecoder; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.Writable; /** A generic Reducer bridge. * It delegates operations to an external program via stdin and stdout. */ public class PipeReducer extends PipeMapRed implements Reducer<WritableComparable, Writable, WritableComparable, Writable> { protected Reducer preReducer; protected BufferingOutputCollector oc; private boolean ignoreKey = false; String getPipeCommand(JobConf job) { String str = job.get("stream.reduce.streamprocessor"); if (str == null) { return str; } try { return URLDecoder.decode(str, "UTF-8"); } catch (UnsupportedEncodingException e) { System.err.println("stream.reduce.streamprocessor in jobconf not found"); return null; } } public void configure(JobConf job) { super.configure(job); Class<?> c = job.getClass("stream.reduce.posthook", null, Mapper.class); if(c != null) { postMapper = (Mapper)ReflectionUtils.newInstance(c, job); LOG.info("PostHook="+c.getName()); } c = job.getClass("stream.reduce.prehook", null, Reducer.class); if(c != null) { preReducer = (Reducer)ReflectionUtils.newInstance(c, job); oc = new InmemBufferingOutputCollector(); LOG.info("PreHook="+c.getName()); } this.ignoreKey = job.getBoolean("stream.reduce.ignoreKey", false); } boolean getDoPipe() { String argv = getPipeCommand(job_); // Currently: null is identity reduce. REDUCE_NONE is no-map-outputs. return (argv != null) && !StreamJob.REDUCE_NONE.equals(argv); } private void blowPipe(WritableComparable key, Writable val, OutputCollector output) throws IOException { numRecRead_++; maybeLogRecord(); // i took out the check for doPipe_. it's ridiculous. // doPipes is set under conditions where the reducer is // IdentityReducer. so the code would never come through this // path. if (outerrThreadsThrowable != null) { mapRedFinished(); throw new IOException ("MROutput/MRErrThread failed:" + StringUtils.stringifyException(outerrThreadsThrowable)); } if(!this.ignoreKey) { write(key); clientOut_.write('\t'); } write(val); clientOut_.write('\n'); // clientOut_.flush(); } public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { Writable [] prret; Writable val; // init if (doPipe_ && outThread_ == null) { startOutputThreads(output, reporter); } try { if (preReducer != null) { preReducer.reduce(key, values, oc, reporter); while((prret = oc.retrieve()) != null) { key = (WritableComparable)prret[0]; val = prret[1]; blowPipe(key, val, output); } } else { while (values.hasNext()) { val = (Writable) values.next(); blowPipe(key, val, output); } } } catch (IOException io) { // a common reason to get here is failure of the subprocess. // Document that fact, if possible. String extraInfo = ""; try { int exitVal = sim.exitValue(); if (exitVal == 0) { extraInfo = "subprocess exited successfully\n"; } else { extraInfo = "subprocess exited with error code " + exitVal + "\n"; }; } catch (IllegalThreadStateException e) { // hmm, but child is still running. go figure. extraInfo = "subprocess still running\n"; }; appendLogToJobLog("failure"); mapRedFinished(); throw new IOException(extraInfo + getContext() + io.getMessage()); } } public void close() throws IOException { if (preReducer != null) { preReducer.close(); } appendLogToJobLog("success"); mapRedFinished(); } @Override char getFieldSeparator() { return super.reduceOutFieldSeparator; } @Override int getNumOfKeyFields() { return super.numOfReduceOutputKeyFields; } }