/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.stage.output;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.InterruptedIOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.WeakHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobStatus.State;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.util.Progressable;
import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.directio.DirectDataSource;
import com.asakusafw.runtime.directio.DirectDataSourceConstants;
import com.asakusafw.runtime.directio.DirectDataSourceRepository;
import com.asakusafw.runtime.directio.FilePattern;
import com.asakusafw.runtime.directio.OutputAttemptContext;
import com.asakusafw.runtime.directio.OutputTransactionContext;
import com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil;
import com.asakusafw.runtime.directio.hadoop.ProgressableCounter;
import com.asakusafw.runtime.stage.StageConstants;
import com.asakusafw.runtime.stage.StageOutput;
import com.asakusafw.runtime.util.VariableTable;
/**
* A bridge implementation for Hadoop {@link OutputFormat}.
* @since 0.2.5
* @version 0.9.0
*/
public final class BridgeOutputFormat extends OutputFormat<Object, Object> {
static final Log LOG = LogFactory.getLog(BridgeOutputFormat.class);
private static final Charset ASCII = StandardCharsets.US_ASCII;
private static final long SERIAL_VERSION = 1;
private static final String KEY = "com.asakusafw.output.bridge"; //$NON-NLS-1$
private final Map<TaskAttemptID, OutputCommitter> commiterCache = new WeakHashMap<>();
/**
* Returns whether this stage has an output corresponding this format.
* @param context current context
* @return {@code true} if such output exists, otherwise {@code false}
* @throws IllegalArgumentException if some parameters were {@code null}
*/
public static boolean hasOutput(JobContext context) {
if (context == null) {
throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
}
return context.getConfiguration().getRaw(KEY) != null;
}
/**
* Sets current output information into the current context.
* @param context current context
* @param outputList output information to be set
* @throws IllegalArgumentException if some parameters were {@code null}
*/
public static void set(JobContext context, List<StageOutput> outputList) {
if (context == null) {
throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
}
if (outputList == null) {
throw new IllegalArgumentException("outputList must not be null"); //$NON-NLS-1$
}
List<OutputSpec> specs = new ArrayList<>();
for (StageOutput output : outputList) {
List<String> deletePatterns = getDeletePatterns(output);
OutputSpec spec = new OutputSpec(output.getName(), deletePatterns);
specs.add(spec);
}
save(context.getConfiguration(), specs);
}
private static List<String> getDeletePatterns(StageOutput output) {
assert output != null;
List<String> results = new ArrayList<>();
for (Map.Entry<String, String> entry : output.getAttributes().entrySet()) {
if (entry.getKey().startsWith(DirectDataSourceConstants.PREFIX_DELETE_PATTERN)) {
String rawDeletePattern = entry.getValue();
results.add(rawDeletePattern);
}
}
return results;
}
private static void save(Configuration conf, List<OutputSpec> specs) {
assert conf != null;
assert specs != null;
for (OutputSpec spec : specs) {
if (spec.resolved) {
throw new IllegalStateException();
}
}
ByteArrayOutputStream sink = new ByteArrayOutputStream();
try (DataOutputStream output = new DataOutputStream(new GZIPOutputStream(new Base64OutputStream(sink)))) {
WritableUtils.writeVLong(output, SERIAL_VERSION);
WritableUtils.writeVInt(output, specs.size());
for (OutputSpec spec : specs) {
WritableUtils.writeString(output, spec.basePath);
WritableUtils.writeVInt(output, spec.deletePatterns.size());
for (String pattern : spec.deletePatterns) {
WritableUtils.writeString(output, pattern);
}
}
} catch (IOException e) {
throw new IllegalStateException(e);
}
conf.set(KEY, new String(sink.toByteArray(), ASCII));
}
private static List<OutputSpec> getSpecs(JobContext context) {
assert context != null;
String encoded = context.getConfiguration().getRaw(KEY);
if (encoded == null) {
return Collections.emptyList();
}
VariableTable table = getVariableTable(context);
try {
ByteArrayInputStream source = new ByteArrayInputStream(encoded.getBytes(ASCII));
DataInputStream input = new DataInputStream(new GZIPInputStream(new Base64InputStream(source)));
long version = WritableUtils.readVLong(input);
if (version != SERIAL_VERSION) {
throw new IOException(MessageFormat.format(
"Invalid StageOutput version: framework={0}, saw={1}",
SERIAL_VERSION,
version));
}
List<OutputSpec> results = new ArrayList<>();
int specCount = WritableUtils.readVInt(input);
for (int specIndex = 0; specIndex < specCount; specIndex++) {
String basePath = WritableUtils.readString(input);
try {
basePath = table.parse(basePath);
} catch (IllegalArgumentException e) {
throw new IllegalStateException(MessageFormat.format(
"Invalid basePath: {0}",
basePath), e);
}
int patternCount = WritableUtils.readVInt(input);
List<String> patterns = new ArrayList<>();
for (int patternIndex = 0; patternIndex < patternCount; patternIndex++) {
String pattern = WritableUtils.readString(input);
try {
pattern = table.parse(pattern);
} catch (IllegalArgumentException e) {
throw new IllegalStateException(MessageFormat.format(
"Invalid delete pattern: {0}",
pattern), e);
}
patterns.add(pattern);
}
results.add(new OutputSpec(basePath, patterns, true));
}
return results;
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
private static DirectDataSourceRepository getDataSourceRepository(JobContext context) {
assert context != null;
return HadoopDataSourceUtil.loadRepository(context.getConfiguration());
}
/**
* Creates output context from Hadoop context.
* @param context current context in Hadoop
* @param datasourceId datasource ID
* @return the created context
* @throws IllegalArgumentException if some parameters were {@code null}
*/
public static OutputTransactionContext createContext(JobContext context, String datasourceId) {
if (context == null) {
throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
}
if (datasourceId == null) {
throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$
}
String transactionId = getTransactionId(context, datasourceId);
return new OutputTransactionContext(transactionId, datasourceId, createCounter(context));
}
/**
* Creates output context from Hadoop context.
* @param context current context in Hadoop
* @param datasourceId datasource ID
* @return the created context
* @throws IllegalArgumentException if some parameters were {@code null}
*/
public static OutputAttemptContext createContext(TaskAttemptContext context, String datasourceId) {
if (context == null) {
throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
}
if (datasourceId == null) {
throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$
}
String transactionId = getTransactionId(context, datasourceId);
String attemptId = getAttemptId(context, datasourceId);
return new OutputAttemptContext(transactionId, attemptId, datasourceId, createCounter(context));
}
private static String getTransactionId(JobContext jobContext, String datasourceId) {
assert jobContext != null;
assert datasourceId != null;
String executionId = jobContext.getConfiguration().get(StageConstants.PROP_EXECUTION_ID);
if (executionId == null) {
executionId = jobContext.getJobID().toString();
}
return getTransactionId(executionId);
}
private static String getTransactionId(String executionId) {
return executionId;
}
private static String getAttemptId(TaskAttemptContext taskContext, String datasourceId) {
assert taskContext != null;
assert datasourceId != null;
return taskContext.getTaskAttemptID().toString();
}
private static Counter createCounter(JobContext context) {
assert context != null;
if (context instanceof Progressable) {
return new ProgressableCounter((Progressable) context);
} else if (context instanceof org.apache.hadoop.mapred.JobContext) {
return new ProgressableCounter(((org.apache.hadoop.mapred.JobContext) context).getProgressible());
} else {
return new Counter();
}
}
@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
DirectDataSourceRepository repo = getDataSourceRepository(context);
List<OutputSpec> specs = getSpecs(context);
for (OutputSpec spec : specs) {
try {
repo.getContainerPath(spec.basePath);
} catch (IOException e) {
throw new IOException(MessageFormat.format(
"There are no corresponded data sources for the base path: {0}",
spec.basePath), e);
}
for (String pattern : spec.deletePatterns) {
try {
FilePattern.compile(pattern);
} catch (IllegalArgumentException e) {
throw new IOException(MessageFormat.format(
"Invalid delete pattern: {0}",
pattern), e);
}
}
}
}
@Override
public RecordWriter<Object, Object> getRecordWriter(
TaskAttemptContext context) throws IOException, InterruptedException {
return new EmptyFileOutputFormat().getRecordWriter(context);
}
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
synchronized (this) {
TaskAttemptID id = context.getTaskAttemptID();
OutputCommitter committer = commiterCache.get(id);
if (committer == null) {
committer = createOutputCommitter(context);
}
commiterCache.put(id, committer);
return committer;
}
}
private OutputCommitter createOutputCommitter(JobContext context) throws IOException {
assert context != null;
DirectDataSourceRepository repository = getDataSourceRepository(context);
List<OutputSpec> specs = getSpecs(context);
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Creating output commiter: {0}", //$NON-NLS-1$
specs));
}
return new BridgeOutputCommitter(repository, specs);
}
static VariableTable getVariableTable(JobContext context) {
assert context != null;
String arguments = context.getConfiguration().get(StageConstants.PROP_ASAKUSA_BATCH_ARGS, ""); //$NON-NLS-1$
VariableTable variables = new VariableTable(VariableTable.RedefineStrategy.IGNORE);
variables.defineVariables(arguments);
return variables;
}
private static final class OutputSpec {
final String basePath;
final List<String> deletePatterns;
final boolean resolved;
OutputSpec(String basePath, List<String> deletePatterns) {
this(basePath, deletePatterns, false);
}
OutputSpec(String basePath, List<String> deletePatterns, boolean resolved) {
assert basePath != null;
this.basePath = basePath;
this.deletePatterns = deletePatterns;
this.resolved = resolved;
}
@Override
public String toString() {
return MessageFormat.format(
"Output(path={0}, delete={1})", //$NON-NLS-1$
basePath,
deletePatterns);
}
}
private static final class BridgeOutputCommitter extends OutputCommitter {
private final DirectDataSourceRepository repository;
private final Map<String, String> outputMap;
private final List<OutputSpec> outputSpecs;
BridgeOutputCommitter(
DirectDataSourceRepository repository,
List<OutputSpec> outputList) throws IOException {
assert repository != null;
assert outputList != null;
this.repository = repository;
this.outputSpecs = outputList;
this.outputMap = createMap(repository, outputList);
}
private static Map<String, String> createMap(
DirectDataSourceRepository repo,
List<OutputSpec> specs) throws IOException {
assert repo != null;
assert specs != null;
Map<String, String> results = new TreeMap<>();
for (OutputSpec spec : specs) {
String containerPath = repo.getContainerPath(spec.basePath);
String id = repo.getRelatedId(spec.basePath);
results.put(containerPath, id);
}
return results;
}
@Override
public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException {
return outputMap.isEmpty() == false;
}
@Override
public void setupTask(TaskAttemptContext taskContext) throws IOException {
if (outputMap.isEmpty()) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"start Direct I/O output task: {1} ({0})", //$NON-NLS-1$
taskContext.getJobName(),
taskContext.getTaskAttemptID()));
}
long t0 = System.currentTimeMillis();
for (Map.Entry<String, String> entry : outputMap.entrySet()) {
String containerPath = entry.getKey();
String id = entry.getValue();
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"start Direct I/O output task setup for datasource: " //$NON-NLS-1$
+ "datasource={0} ({2} ({1}))", //$NON-NLS-1$
id,
taskContext.getJobName(),
taskContext.getTaskAttemptID()));
}
OutputAttemptContext context = createContext(taskContext, id);
try {
DirectDataSource repo = repository.getRelatedDataSource(containerPath);
repo.setupAttemptOutput(context);
} catch (IOException e) {
LOG.error(MessageFormat.format(
"Failed Direct I/O output task setup: datasource={0} ({2} ({1}))",
id,
taskContext.getJobName(),
taskContext.getTaskAttemptID()), e);
throw e;
} catch (InterruptedException e) {
throw (IOException) new InterruptedIOException(MessageFormat.format(
"Interrupted while setup attempt: {0}, {1} (path={2})",
context.getTransactionId(),
context.getAttemptId(),
containerPath)).initCause(e);
}
context.getCounter().add(1);
}
if (LOG.isDebugEnabled()) {
long t1 = System.currentTimeMillis();
LOG.debug(MessageFormat.format(
"finish Direct I/O output task setup: task={1} ({0}), elapsed={2}ms", //$NON-NLS-1$
taskContext.getJobName(),
taskContext.getTaskAttemptID(),
t1 - t0));
}
}
@Override
public void commitTask(TaskAttemptContext taskContext) throws IOException {
if (outputMap.isEmpty()) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"start Direct I/O output task commit: {1} ({0})", //$NON-NLS-1$
taskContext.getJobName(),
taskContext.getTaskAttemptID()));
}
long t0 = System.currentTimeMillis();
for (Map.Entry<String, String> entry : outputMap.entrySet()) {
String containerPath = entry.getKey();
String id = entry.getValue();
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"start Direct I/O output task commit for datasource: " //$NON-NLS-1$
+ "datasource={0} ({2} ({1}))", //$NON-NLS-1$
id,
taskContext.getJobName(),
taskContext.getTaskAttemptID()));
}
OutputAttemptContext context = createContext(taskContext, id);
try {
DirectDataSource repo = repository.getRelatedDataSource(containerPath);
repo.commitAttemptOutput(context);
} catch (IOException e) {
LOG.error(MessageFormat.format(
"Failed Direct I/O output task commit: datasource={0} ({2} ({1}))",
id,
taskContext.getJobName(),
taskContext.getTaskAttemptID()), e);
throw e;
} catch (InterruptedException e) {
throw (IOException) new InterruptedIOException(MessageFormat.format(
"Interrupted while commit task attempt: {0}, {1} (path={2})",
context.getTransactionId(),
context.getAttemptId(),
containerPath)).initCause(e);
} catch (RuntimeException e) {
LOG.fatal("TASK COMMIT FAILED", e);
throw e;
}
context.getCounter().add(1);
}
doCleanupTask(taskContext);
if (LOG.isInfoEnabled()) {
long t1 = System.currentTimeMillis();
LOG.info(MessageFormat.format(
"staged Direct I/O output task: task={1} ({0}), elapsed={2}ms",
taskContext.getJobName(),
taskContext.getTaskAttemptID(),
t1 - t0));
}
}
@Override
public void abortTask(TaskAttemptContext taskContext) throws IOException {
if (outputMap.isEmpty()) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Start Direct I/O output task abort: {1} ({0})", //$NON-NLS-1$
taskContext.getJobName(),
taskContext.getTaskAttemptID()));
}
long t0 = System.currentTimeMillis();
doCleanupTask(taskContext);
if (LOG.isInfoEnabled()) {
long t1 = System.currentTimeMillis();
LOG.info(MessageFormat.format(
"aborted Direct I/O output task: task={1} ({0}), elapsed={2}ms",
taskContext.getJobName(),
taskContext.getTaskAttemptID(),
t1 - t0));
}
}
private void doCleanupTask(TaskAttemptContext taskContext) throws IOException {
assert taskContext != null;
for (Map.Entry<String, String> entry : outputMap.entrySet()) {
String containerPath = entry.getKey();
String id = entry.getValue();
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Start directio task cleanup for datasource: datasource={0} ({2} ({1}))", //$NON-NLS-1$
id,
taskContext.getJobName(),
taskContext.getTaskAttemptID()));
}
OutputAttemptContext context = createContext(taskContext, id);
try {
DirectDataSource repo = repository.getRelatedDataSource(containerPath);
repo.cleanupAttemptOutput(context);
} catch (IOException e) {
LOG.error(MessageFormat.format(
"Failed directio task cleanup: datasource={0} ({2} ({1}))",
id,
taskContext.getJobName(),
taskContext.getTaskAttemptID()), e);
throw e;
} catch (InterruptedException e) {
throw (IOException) new InterruptedIOException(MessageFormat.format(
"Interrupted while cleanup attempt: {0}, {1} (path={2})",
context.getTransactionId(),
context.getAttemptId(),
containerPath)).initCause(e);
}
context.getCounter().add(1);
}
}
@Override
public void setupJob(JobContext jobContext) throws IOException {
if (outputMap.isEmpty()) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"start Direct I/O output job setup: {0} ({1})", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName()));
}
long t0 = System.currentTimeMillis();
cleanOutput(jobContext);
setTransactionInfo(jobContext, true);
for (Map.Entry<String, String> entry : outputMap.entrySet()) {
String containerPath = entry.getKey();
String id = entry.getValue();
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Start Direct I/O output job setup: datasource={0} ({1} ({2}))", //$NON-NLS-1$
id,
jobContext.getJobID(),
jobContext.getJobName()));
}
OutputTransactionContext context = createContext(jobContext, id);
try {
DirectDataSource repo = repository.getRelatedDataSource(containerPath);
repo.setupTransactionOutput(context);
} catch (IOException e) {
LOG.error(MessageFormat.format(
"Failed Direct I/O output job setup: datasource={0} ({1} ({2}))",
id,
jobContext.getJobID(),
jobContext.getJobName()), e);
throw e;
} catch (InterruptedException e) {
throw (IOException) new InterruptedIOException(MessageFormat.format(
"Interrupted while setup transaction: {0}, (path={1})",
context.getTransactionId(),
containerPath)).initCause(e);
}
context.getCounter().add(1);
}
if (LOG.isInfoEnabled()) {
long t1 = System.currentTimeMillis();
LOG.info(MessageFormat.format(
"initialized Direct I/O output: job={0} ({1}), elapsed={2}ms",
jobContext.getJobID(),
jobContext.getJobName(),
t1 - t0));
}
}
private void cleanOutput(JobContext jobContext) throws IOException {
assert jobContext != null;
for (OutputSpec spec : outputSpecs) {
if (spec.deletePatterns.isEmpty()) {
continue;
}
String id = repository.getRelatedId(spec.basePath);
OutputTransactionContext context = createContext(jobContext, id);
try {
DirectDataSource repo = repository.getRelatedDataSource(spec.basePath);
String basePath = repository.getComponentPath(spec.basePath);
for (String pattern : spec.deletePatterns) {
FilePattern resources = FilePattern.compile(pattern);
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Deleting output: datasource={0}, basePath={1}, pattern={2}", //$NON-NLS-1$
id,
basePath,
pattern));
}
boolean succeed = repo.delete(basePath, resources, true, context.getCounter());
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Deleted output (succeed={3}): " //$NON-NLS-1$
+ "datasource={0}, basePath={1}, pattern={2}", //$NON-NLS-1$
id,
basePath,
pattern,
succeed));
}
}
} catch (IOException e) {
LOG.error(MessageFormat.format(
"Failed directio job setup: datasource={0} ({1} ({2}))",
id,
jobContext.getJobID(),
jobContext.getJobName()), e);
throw e;
} catch (InterruptedException e) {
throw (IOException) new InterruptedIOException(MessageFormat.format(
"Interrupted while setup cleaning output: datasource={0} ({1} ({2}))",
id,
jobContext.getJobID(),
jobContext.getJobName())).initCause(e);
}
}
}
@Override
public void commitJob(JobContext jobContext) throws IOException {
if (outputMap.isEmpty()) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"start Direct I/O output job commit: {0} ({1})", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName()));
}
long t0 = System.currentTimeMillis();
setCommitted(jobContext, true);
doCleanupJob(jobContext);
if (LOG.isInfoEnabled()) {
long t1 = System.currentTimeMillis();
LOG.info(MessageFormat.format(
"committed Direct I/O output: job={0} ({1}), elapsed={2}ms",
jobContext.getJobID(),
jobContext.getJobName(),
t1 - t0));
}
}
private void setTransactionInfo(JobContext jobContext, boolean value) throws IOException {
Configuration conf = jobContext.getConfiguration();
Path transactionInfo = getTransactionInfoPath(jobContext);
FileSystem fs = transactionInfo.getFileSystem(conf);
if (value) {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Creating Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(transactionInfo)));
}
try (OutputStream output = new SafeOutputStream(fs.create(transactionInfo, false));
PrintWriter writer = new PrintWriter(
new OutputStreamWriter(output, HadoopDataSourceUtil.COMMENT_CHARSET))) {
writer.printf(" User Name: %s%n", //$NON-NLS-1$
conf.getRaw(StageConstants.PROP_USER));
writer.printf(" Batch ID: %s%n", //$NON-NLS-1$
conf.getRaw(StageConstants.PROP_BATCH_ID));
writer.printf(" Flow ID: %s%n", //$NON-NLS-1$
conf.getRaw(StageConstants.PROP_FLOW_ID));
writer.printf(" Execution ID: %s%n", //$NON-NLS-1$
conf.getRaw(StageConstants.PROP_EXECUTION_ID));
writer.printf("Batch Arguments: %s%n", //$NON-NLS-1$
conf.getRaw(StageConstants.PROP_ASAKUSA_BATCH_ARGS));
writer.printf(" Hadoop Job ID: %s%n", //$NON-NLS-1$
jobContext.getJobID());
writer.printf("Hadoop Job Name: %s%n", //$NON-NLS-1$
jobContext.getJobName());
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Finish creating Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(transactionInfo)));
}
if (LOG.isTraceEnabled()) {
try (FSDataInputStream input = fs.open(transactionInfo);
Scanner scanner = new Scanner(new InputStreamReader(
input, HadoopDataSourceUtil.COMMENT_CHARSET))) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
LOG.trace(">> " + line); //$NON-NLS-1$
}
}
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Deleting Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(transactionInfo)));
}
fs.delete(transactionInfo, false);
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Finish deleting Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(transactionInfo)));
}
}
}
private void setCommitted(JobContext jobContext, boolean value) throws IOException {
Configuration conf = jobContext.getConfiguration();
Path commitMark = getCommitMarkPath(jobContext);
FileSystem fs = commitMark.getFileSystem(conf);
if (value) {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Creating Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(commitMark)));
}
fs.create(commitMark, false).close();
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Finish creating Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(commitMark)));
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Deleting Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(commitMark)));
}
fs.delete(commitMark, false);
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Finish deleting Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
fs.makeQualified(commitMark)));
}
}
}
private boolean isCommitted(JobContext jobContext) throws IOException {
Path commitMark = getCommitMarkPath(jobContext);
FileSystem fs = commitMark.getFileSystem(jobContext.getConfiguration());
return fs.exists(commitMark);
}
@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
if (outputMap.isEmpty()) {
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Start Direct I/O output job abort: job={0} ({1}), state={2}", //$NON-NLS-1$
jobContext.getJobID(),
jobContext.getJobName(),
state));
}
long t0 = System.currentTimeMillis();
if (state == State.FAILED) {
doCleanupJob(jobContext);
}
if (LOG.isInfoEnabled()) {
long t1 = System.currentTimeMillis();
LOG.info(MessageFormat.format(
"aborted Direct I/O output: job={0} ({1}), state={2}, elapsed={3}ms",
jobContext.getJobID(),
jobContext.getJobName(),
state,
t1 - t0));
}
}
private void doCleanupJob(JobContext jobContext) throws IOException {
if (isCommitted(jobContext)) {
rollforward(jobContext);
}
cleanup(jobContext);
setCommitted(jobContext, false);
setTransactionInfo(jobContext, false);
}
private void rollforward(JobContext jobContext) throws IOException {
assert jobContext != null;
for (Map.Entry<String, String> entry : outputMap.entrySet()) {
String containerPath = entry.getKey();
String id = entry.getValue();
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Start Direct I/O output job rollforward: datasource={0} ({1} ({2}))", //$NON-NLS-1$
id,
jobContext.getJobID(),
jobContext.getJobName()));
}
OutputTransactionContext context = createContext(jobContext, id);
try {
DirectDataSource repo = repository.getRelatedDataSource(containerPath);
repo.commitTransactionOutput(context);
} catch (IOException e) {
LOG.error(MessageFormat.format(
"Failed Direct I/O output job rollforward: datasource={0} ({1} ({2}))",
id,
jobContext.getJobID(),
jobContext.getJobName()), e);
throw e;
} catch (InterruptedException e) {
throw (IOException) new InterruptedIOException(MessageFormat.format(
"Interrupted while commit transaction: {0}, (path={1})",
context.getTransactionId(),
containerPath)).initCause(e);
}
context.getCounter().add(1);
}
}
private void cleanup(JobContext jobContext) throws IOException {
for (Map.Entry<String, String> entry : outputMap.entrySet()) {
String containerPath = entry.getKey();
String id = entry.getValue();
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format(
"Start Direct I/O output job cleanup: datasource={0} ({1} ({2}))", //$NON-NLS-1$
id,
jobContext.getJobID(),
jobContext.getJobName()));
}
OutputTransactionContext context = createContext(jobContext, id);
try {
DirectDataSource repo = repository.getRelatedDataSource(containerPath);
repo.cleanupTransactionOutput(context);
} catch (IOException e) {
LOG.error(MessageFormat.format(
"Failed Direct I/O output job cleanup: datasource={0} ({1} ({2}))",
id,
jobContext.getJobID(),
jobContext.getJobName()), e);
throw e;
} catch (InterruptedException e) {
throw (IOException) new InterruptedIOException(MessageFormat.format(
"Interrupted while cleanup transaction: {0}, (path={1})",
context.getTransactionId(),
containerPath)).initCause(e);
}
context.getCounter().add(1);
}
}
private static Path getTransactionInfoPath(JobContext context) throws IOException {
assert context != null;
Configuration conf = context.getConfiguration();
String executionId = conf.get(StageConstants.PROP_EXECUTION_ID);
return HadoopDataSourceUtil.getTransactionInfoPath(conf, executionId);
}
private static Path getCommitMarkPath(JobContext context) throws IOException {
assert context != null;
Configuration conf = context.getConfiguration();
String executionId = conf.get(StageConstants.PROP_EXECUTION_ID);
return HadoopDataSourceUtil.getCommitMarkPath(conf, executionId);
}
}
private static class SafeOutputStream extends OutputStream {
private final OutputStream delegate;
private final AtomicBoolean closed = new AtomicBoolean();
SafeOutputStream(OutputStream delegate) {
this.delegate = delegate;
}
@Override
public void write(int b) throws IOException {
delegate.write(b);
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
delegate.write(b, off, len);
}
@Override
public void close() throws IOException {
if (closed.compareAndSet(false, true)) {
delegate.close();
}
}
}
}