/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.io.text.directio;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.util.ReflectionUtils;
import com.asakusafw.runtime.directio.hadoop.ConfigurableBinaryStreamFormat;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.io.text.TextFormat;
import com.asakusafw.runtime.io.text.TextInput;
import com.asakusafw.runtime.io.text.TextOutput;
import com.asakusafw.runtime.io.text.driver.InputOption;
import com.asakusafw.runtime.io.text.driver.OutputOption;
import com.asakusafw.runtime.io.text.driver.RecordDefinition;
import com.asakusafw.runtime.value.StringOption;
/**
* An abstract implementation of Direct I/O data format for formatted text files.
* @param <T> the data type
* @since 0.9.1
*/
public abstract class AbstractTextStreamFormat<T> extends ConfigurableBinaryStreamFormat<T> {
private static final Set<InputOption> INPUT_OPTS_HEAD =
Collections.unmodifiableSet(EnumSet.of(InputOption.FROM_FILE_HEAD));
private static final Set<InputOption> INPUT_OPTS_REST = Collections.emptySet();
private static final Set<OutputOption> OUTPUT_OPTS = Collections.emptySet();
private final AtomicReference<TextFormat> textFormatCache = new AtomicReference<>();
private final AtomicReference<RecordDefinition<T>> recordDefinitionCache = new AtomicReference<>();
/**
* Returns the {@link TextFormat}.
* @return the text format
*/
protected abstract TextFormat createTextFormat();
/**
* Returns the {@link RecordDefinition}.
* @return the record structure definition
*/
protected abstract RecordDefinition<T> createRecordDefinition();
/**
* Returns the input splitter of this format.
* @return the input splitter, or {@code null} is input split is disabled
*/
protected InputSplitter getInputSplitter() {
return null;
}
/**
* Returns the compression codec class.
* @return the compression codec class, or {@code null} if compression is disabled
*/
protected Class<? extends CompressionCodec> getCompressionCodecClass() {
return null;
}
/**
* Processes the data object, which has filled the current record data.
* @param model the data object
* @param path the current input path
* @param input the current input
*/
protected void afterInput(T model, StringOption path, TextInput<T> input) {
return;
}
/**
* Process the data object.
* @param model the data object
* @param path the current output path
* @param output the current output
*/
protected void beforeOutput(T model, StringOption path, TextOutput<T> output) {
return;
}
/**
* Returns the {@link TextFormat}.
* @return the text format
*/
public final TextFormat getTextFormat() {
return cached(this::createTextFormat, textFormatCache);
}
/**
* Returns the {@link RecordDefinition}.
* @return the record structure definition
*/
public final RecordDefinition<T> getRecordDefinition() {
return cached(this::createRecordDefinition, recordDefinitionCache);
}
private static <U> U cached(Supplier<U> factory, AtomicReference<U> cache) {
U cached = cache.get();
if (cached != null) {
return cached;
}
cache.compareAndSet(null, factory.get());
return cache.get();
}
@Override
public final long getPreferredFragmentSize() {
InputSplitter splitter = getInputSplitter();
if (splitter != null) {
return splitter.getPreferredSize();
} else {
return -1L;
}
}
@Override
public final long getMinimumFragmentSize() {
InputSplitter splitter = getInputSplitter();
if (splitter != null) {
return splitter.getLowerLimitSize();
} else {
return -1L;
}
}
@Override
public ModelInput<T> createInput(
Class<? extends T> dataType,
String path,
InputStream stream,
long offset, long fragmentSize) throws IOException, InterruptedException {
TextFormat format = getTextFormat();
RecordDefinition<T> record = getRecordDefinition();
Set<InputOption> options = offset == 0 ? INPUT_OPTS_HEAD : INPUT_OPTS_REST;
InputStream source = decorate(stream, offset, fragmentSize);
return new DecoratedInput(record.newInput(format.open(source), path, options), new StringOption(path));
}
@Override
public ModelOutput<T> createOutput(
Class<? extends T> dataType,
String path,
OutputStream stream) throws IOException, InterruptedException {
TextFormat format = getTextFormat();
RecordDefinition<T> record = getRecordDefinition();
Set<OutputOption> options = OUTPUT_OPTS;
OutputStream destination = decorate(stream);
return new DecoratedOutput(record.newOutput(format.open(destination), path, options), new StringOption(path));
}
private InputStream decorate(InputStream stream, long offset, long splitSize) throws IOException {
InputSplitter splitter = getInputSplitter();
if (splitter != null) {
assert getCompressionCodecClass() == null;
return splitter.trim(stream, offset, splitSize != -1L ? splitSize : Long.MAX_VALUE);
}
Class<? extends CompressionCodec> codecClass = getCompressionCodecClass();
if (codecClass != null) {
CompressionCodec codec = ReflectionUtils.newInstance(codecClass, getConf());
return codec.createInputStream(stream);
}
return stream;
}
private OutputStream decorate(OutputStream stream) throws IOException {
Class<? extends CompressionCodec> codecClass = getCompressionCodecClass();
if (codecClass != null) {
CompressionCodec codec = ReflectionUtils.newInstance(codecClass, getConf());
return codec.createOutputStream(stream);
}
return stream;
}
private class DecoratedInput implements ModelInput<T> {
private final TextInput<T> input;
private final StringOption path;
DecoratedInput(TextInput<T> input, StringOption path) {
this.input = input;
this.path = path;
}
@Override
public boolean readTo(T model) throws IOException {
if (input.readTo(model)) {
afterInput(model, path, input);
return true;
}
return false;
}
@Override
public void close() throws IOException {
input.close();
}
}
private class DecoratedOutput implements ModelOutput<T> {
private final TextOutput<T> output;
private final StringOption path;
DecoratedOutput(TextOutput<T> output, StringOption path) {
this.output = output;
this.path = path;
}
@Override
public void write(T model) throws IOException {
beforeOutput(model, path, output);
output.write(model);
}
@Override
public void close() throws IOException {
output.close();
}
}
}