/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.stage.directio; import java.io.IOException; import java.text.MessageFormat; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapreduce.TaskCounter; import org.apache.hadoop.util.ReflectionUtils; import com.asakusafw.runtime.directio.Counter; import com.asakusafw.runtime.directio.DataDefinition; import com.asakusafw.runtime.directio.DataFormat; import com.asakusafw.runtime.directio.DirectDataSource; import com.asakusafw.runtime.directio.DirectDataSourceRepository; import com.asakusafw.runtime.directio.OutputAttemptContext; import com.asakusafw.runtime.directio.SimpleDataDefinition; import com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil; import com.asakusafw.runtime.flow.MapperWithRuntimeResource; import com.asakusafw.runtime.io.ModelOutput; import com.asakusafw.runtime.stage.StageConstants; import com.asakusafw.runtime.stage.output.BridgeOutputFormat; import com.asakusafw.runtime.util.VariableTable; /** * Mapper which directly creates file for direct output. * @param <T> target data type * @since 0.4.0 * @version 0.8.1 */ public abstract class AbstractNoReduceDirectOutputMapper<T> extends MapperWithRuntimeResource< Object, T, Object, Object> { private final Log log; private final Class<? extends T> dataType; private final String outputId; private final String rawBasePath; private final String rawResourcePath; private final Class<? extends DataFormat<? super T>> dataFormatClass; /** * Creates a new instance. * @param dataType target data type * @param rawBasePath target base path * @param rawResourcePath target resource path * @param dataFormatClass output data format * @throws IllegalArgumentException if some parameters were {@code null} */ public AbstractNoReduceDirectOutputMapper( Class<? extends T> dataType, String rawBasePath, String rawResourcePath, Class<? extends DataFormat<? super T>> dataFormatClass) { this(dataType, null, rawBasePath, rawResourcePath, dataFormatClass); } /** * Creates a new instance. * @param dataType target data type * @param outputId the output ID (nullable) * @param rawBasePath target base path * @param rawResourcePath target resource path * @param dataFormatClass output data format * @throws IllegalArgumentException if some parameters were {@code null} * @since 0.8.1 */ public AbstractNoReduceDirectOutputMapper( Class<? extends T> dataType, String outputId, String rawBasePath, String rawResourcePath, Class<? extends DataFormat<? super T>> dataFormatClass) { if (dataType == null) { throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$ } if (rawBasePath == null) { throw new IllegalArgumentException("rawBasePath must not be null"); //$NON-NLS-1$ } if (rawResourcePath == null) { throw new IllegalArgumentException("rawResourcePath must not be null"); //$NON-NLS-1$ } if (dataFormatClass == null) { throw new IllegalArgumentException("dataFormatClass must not be null"); //$NON-NLS-1$ } this.log = LogFactory.getLog(getClass()); this.dataType = dataType; this.outputId = outputId; this.rawBasePath = rawBasePath; this.rawResourcePath = rawResourcePath; this.dataFormatClass = dataFormatClass; } @Override protected void runInternal(Context context) throws IOException, InterruptedException { if (context.nextKeyValue() == false) { if (log.isDebugEnabled()) { log.debug(MessageFormat.format( "There are not input for directly output Mapper {0}@{1}", //$NON-NLS-1$ getClass().getName(), context.getTaskAttemptID())); } } else { if (log.isDebugEnabled()) { log.debug(MessageFormat.format( "Start setup directly output Mapper {0}@{1}", //$NON-NLS-1$ getClass().getName(), context.getTaskAttemptID())); } DirectDataSourceRepository repository = HadoopDataSourceUtil.loadRepository(context.getConfiguration()); String arguments = context.getConfiguration().get(StageConstants.PROP_ASAKUSA_BATCH_ARGS, ""); //$NON-NLS-1$ VariableTable variables = new VariableTable(VariableTable.RedefineStrategy.IGNORE); variables.defineVariables(arguments); String path = variables.parse(rawBasePath, false); String sourceId = repository.getRelatedId(path); OutputAttemptContext outputContext = BridgeOutputFormat.createContext(context, sourceId); DataFormat<? super T> format = ReflectionUtils.newInstance(dataFormatClass, context.getConfiguration()); DirectDataSource datasource = repository.getRelatedDataSource(path); String basePath = repository.getComponentPath(path); String unresolvedResourcePath = rawResourcePath.replaceAll( Pattern.quote("*"), //$NON-NLS-1$ String.format("%04d", context.getTaskAttemptID().getTaskID().getId())); //$NON-NLS-1$ String resourcePath = variables.parse(unresolvedResourcePath); DataDefinition<? super T> definition = SimpleDataDefinition.newInstance(dataType, format); if (log.isDebugEnabled()) { log.debug(MessageFormat.format( "Open mapper output (id={0}, basePath={1}, resourcePath={2})", //$NON-NLS-1$ sourceId, basePath, resourcePath)); } Counter counter = new Counter(); int records = 0; try (ModelOutput<? super T> output = datasource.openOutput( outputContext, definition, basePath, resourcePath, counter)) { do { output.write(context.getCurrentValue()); records++; } while (context.nextKeyValue()); } finally { if (log.isDebugEnabled()) { log.debug(MessageFormat.format( "Start cleanup directly output Mapper {0}@{1}", //$NON-NLS-1$ getClass().getName(), context.getTaskAttemptID())); } } org.apache.hadoop.mapreduce.Counter recordCounter = context.getCounter(TaskCounter.MAP_OUTPUT_RECORDS); recordCounter.increment(records); Constants.putCounts(context, sourceId, outputId, 1, records, counter.get()); } } }