/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.streaming; import java.io.*; import java.net.URLDecoder; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.Writable; /** A generic Mapper bridge. * It delegates operations to an external program via stdin and stdout. */ public class PipeMapper extends PipeMapRed implements Mapper<WritableComparable, Writable, WritableComparable, Writable> { private boolean ignoreKey = false; private boolean skipNewline = false; String getPipeCommand(JobConf job) { String str = job.get("stream.map.streamprocessor"); if (str == null) { return str; } try { return URLDecoder.decode(str, "UTF-8"); } catch (UnsupportedEncodingException e) { System.err.println("stream.map.streamprocessor in jobconf not found"); return null; } } boolean getDoPipe() { return true; } public void configure(JobConf job) { super.configure(job); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName(); this.ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()) || job.getBoolean("stream.map.ignoreKey", false); this.skipNewline = job.getBoolean("stream.map.skipNewline", false); Class<?> c = job.getClass("stream.map.posthook", null, Mapper.class); if(c != null) { postMapper = (Mapper)ReflectionUtils.newInstance(c, job); LOG.info("PostHook="+c.getName()); } } // Do NOT declare default constructor // (MapRed creates it reflectively) public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException { // init if (outThread_ == null) { startOutputThreads(output, reporter); } if (outerrThreadsThrowable != null) { mapRedFinished(); throw new IOException ("MROutput/MRErrThread failed:" + StringUtils.stringifyException( outerrThreadsThrowable)); } try { // 1/4 Hadoop in numRecRead_++; maybeLogRecord(); if (debugFailDuring_ && numRecRead_ == 3) { throw new IOException("debugFailDuring_"); } // 2/4 Hadoop to Tool if (numExceptions_ == 0) { if (!this.ignoreKey) { write(key); clientOut_.write('\t'); } write(value); if(!this.skipNewline) { clientOut_.write('\n'); } // clientOut_.flush(); } else { numRecSkipped_++; } } catch (IOException io) { numExceptions_++; if (numExceptions_ > 1 || numRecWritten_ < minRecWrittenToEnableSkip_) { // terminate with failure String msg = logFailure(io); appendLogToJobLog("failure"); mapRedFinished(); throw new IOException(msg); } else { // terminate with success: // swallow input records although the stream processor failed/closed } } } public void close() { appendLogToJobLog("success"); mapRedFinished(); } @Override char getFieldSeparator() { return super.mapOutputFieldSeparator; } @Override int getNumOfKeyFields() { return super.numOfMapOutputKeyFields; } }