/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.streaming;
import java.io.*;
import java.net.URLDecoder;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.Writable;
/** A generic Mapper bridge.
* It delegates operations to an external program via stdin and stdout.
*/
public class PipeMapper extends PipeMapRed
implements Mapper<WritableComparable, Writable, WritableComparable, Writable> {
private boolean ignoreKey = false;
private boolean skipNewline = false;
String getPipeCommand(JobConf job) {
String str = job.get("stream.map.streamprocessor");
if (str == null) {
return str;
}
try {
return URLDecoder.decode(str, "UTF-8");
}
catch (UnsupportedEncodingException e) {
System.err.println("stream.map.streamprocessor in jobconf not found");
return null;
}
}
boolean getDoPipe() {
return true;
}
public void configure(JobConf job) {
super.configure(job);
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
this.ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()) ||
job.getBoolean("stream.map.ignoreKey", false);
this.skipNewline = job.getBoolean("stream.map.skipNewline", false);
Class<?> c = job.getClass("stream.map.posthook", null, Mapper.class);
if(c != null) {
postMapper = (Mapper)ReflectionUtils.newInstance(c, job);
LOG.info("PostHook="+c.getName());
}
}
// Do NOT declare default constructor
// (MapRed creates it reflectively)
public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException {
// init
if (outThread_ == null) {
startOutputThreads(output, reporter);
}
if (outerrThreadsThrowable != null) {
mapRedFinished();
throw new IOException ("MROutput/MRErrThread failed:"
+ StringUtils.stringifyException(
outerrThreadsThrowable));
}
try {
// 1/4 Hadoop in
numRecRead_++;
maybeLogRecord();
if (debugFailDuring_ && numRecRead_ == 3) {
throw new IOException("debugFailDuring_");
}
// 2/4 Hadoop to Tool
if (numExceptions_ == 0) {
if (!this.ignoreKey) {
write(key);
clientOut_.write('\t');
}
write(value);
if(!this.skipNewline) {
clientOut_.write('\n');
}
// clientOut_.flush();
} else {
numRecSkipped_++;
}
} catch (IOException io) {
numExceptions_++;
if (numExceptions_ > 1 || numRecWritten_ < minRecWrittenToEnableSkip_) {
// terminate with failure
String msg = logFailure(io);
appendLogToJobLog("failure");
mapRedFinished();
throw new IOException(msg);
} else {
// terminate with success:
// swallow input records although the stream processor failed/closed
}
}
}
public void close() {
appendLogToJobLog("success");
mapRedFinished();
}
@Override
char getFieldSeparator() {
return super.mapOutputFieldSeparator;
}
@Override
int getNumOfKeyFields() {
return super.numOfMapOutputKeyFields;
}
}