/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred.local; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.counter.Counter; import com.aliyun.odps.counter.Counters; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.RecordReader; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.data.VolumeInfo; import com.aliyun.odps.local.common.FileSplit; import com.aliyun.odps.local.common.TableMeta; import com.aliyun.odps.local.common.WareHouse; import com.aliyun.odps.local.common.utils.PartitionUtils; import com.aliyun.odps.mapred.Mapper; import com.aliyun.odps.mapred.Mapper.TaskContext; import com.aliyun.odps.mapred.Partitioner; import com.aliyun.odps.mapred.Reducer; import com.aliyun.odps.mapred.TaskId; import com.aliyun.odps.mapred.bridge.ErrorCode; import com.aliyun.odps.mapred.bridge.WritableRecord; import com.aliyun.odps.mapred.bridge.type.ColumnBasedRecordComparator; import com.aliyun.odps.mapred.conf.BridgeJobConf; import com.aliyun.odps.utils.ReflectionUtils; import com.aliyun.odps.volume.FileSystem; public class MapDriver extends DriverBase { public static final Log LOG = LogFactory.getLog(MapDriver.class); private TaskContext mapContext; MapOutputBuffer outputBuffer; private Counters counters; class DirectMapContextImpl extends LocalTaskContext implements TaskContext { int rowNumber = 1; protected RecordReader reader; Record record; protected Counter mapOutputRecordCounter; protected TableInfo inputTableInfo; protected Partitioner partitioner; public DirectMapContextImpl(BridgeJobConf conf, TaskId taskid, Counters counters, RecordReader reader, TableInfo inputTableInfo) throws IOException { super(conf, taskid, counters); this.reader = reader; mapOutputRecordCounter = counters.findCounter(JobCounter.MAP_OUTPUT_RECORDS); this.inputTableInfo = inputTableInfo; Class<? extends Partitioner> partitionerClass; if (pipeMode) { conf.setMapperClass(pipeNode.getTransformClass()); partitionerClass = pipeNode.getPartitionerClass(); } else { partitionerClass = getJobConf().getPartitionerClass(); } if (partitionerClass != null) { partitioner = ReflectionUtils.newInstance(partitionerClass, getJobConf()); partitioner.configure(conf); } } @Override public long getCurrentRecordNum() { return rowNumber; } @Override public Record getCurrentRecord() { return record; } @Override public boolean nextRecord() { try { record = reader.read(); } catch (IOException e) { throw new RuntimeException(e); } if (record == null) { return false; } return true; } @Override public void write(Record record) throws IOException { mapOutputRecordCounter.increment(1); write(record, TableInfo.DEFAULT_LABEL); } @Override public void write(Record record, String label) throws IOException { if (getNumReduceTasks() > 0) { throw new UnsupportedOperationException(ErrorCode.UNEXPECTED_MAP_WRITE_OUTPUT.toString()); } if (outputBuffer != null) { outputBuffer.add(record, label); } recordWriters.get(label).write(record); counters.findCounter(JobCounter.__EMPTY_OUTPUT_RECORD_COUNT).increment(1); } @Override public void write(Record key, Record value) { if (getNumReduceTasks() == 0) { throw new UnsupportedOperationException(ErrorCode.UNEXPECTED_MAP_WRITE_INTER.toString()); } mapOutputRecordCounter.increment(1); if (partitioner != null) { int part = partitioner.getPartition(key, value, getNumReduceTasks()); if (part < 0 || part >= getNumReduceTasks()) { throw new RuntimeException("partitioner return invalid partition value:" + part); } outputBuffer.add(key, value, part); } else { outputBuffer.add(key, value); } counters.findCounter(JobCounter.__EMPTY_OUTPUT_RECORD_COUNT).increment(1); } public void close() throws IOException { reader.close(); closeWriters(); } @Override public TableInfo getInputTableInfo() { return inputTableInfo; } @Override public Record createOutputKeyRecord() throws IOException { // TODO Auto-generated method stub return null; } @Override public Record createOutputValueRecord() throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getInputVolumeInfo() throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getInputVolumeInfo(String label) throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getOutputVolumeInfo() throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getOutputVolumeInfo(String label) throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getInputVolumeFileSystem() throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getInputVolumeFileSystem(String label) throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getOutputVolumeFileSystem() throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getOutputVolumeFileSystem(String label) throws IOException { // TODO Auto-generated method stub return null; } } class ProxiedMapContextImpl extends DirectMapContextImpl implements TaskContext { class CombinerContextImpl extends DirectMapContextImpl implements com.aliyun.odps.mapred.Reducer.TaskContext { private Record key; private Iterator<Record> itr; private Counter combineInputGroupCounter; private Counter combineOutputRecordCounter; public CombinerContextImpl(BridgeJobConf conf, TaskId taskid, Counters counters) throws IOException { super(conf, taskid, counters, null, null); combineInputGroupCounter = counters.findCounter(JobCounter.COMBINE_INPUT_GROUPS); combineOutputRecordCounter = counters.findCounter(JobCounter.COMBINE_OUTPUT_RECORDS); } @Override public boolean nextKeyValue() { if (itr == null) { Object[] init = queue.peek(); if (init == null) { return false; } key = createMapOutputKeyRecord(); Record value = createMapOutputValueRecord(); String[] groupingColumns = getGroupingColumns(); Comparator<Object[]> grpComparator = new ColumnBasedRecordComparator( groupingColumns, key.getColumns()); itr = new LocalGroupingRecordIterator(queue, (WritableRecord) key, (WritableRecord) value, grpComparator, false, counters); key.set(Arrays.copyOf(init, key.getColumnCount())); } else { while (itr.hasNext()) { itr.remove(); } if (!((LocalGroupingRecordIterator) itr).reset()) { return false; } } combineInputGroupCounter.increment(1); return true; } @Override public Record getCurrentKey() { return key; } @Override public Iterator<Record> getValues() { return itr; } @Override public void write(Record record) throws IOException { write(record, TableInfo.DEFAULT_LABEL); combineOutputRecordCounter.increment(1); } @Override public void write(Record record, String label) throws IOException { recordWriters.get(label).write(record); } @Override public void write(Record key, Record value) { if (partitioner != null) { int part = partitioner.getPartition(key, value, this.getNumReduceTasks()); if (part < 0 || part >= this.getNumReduceTasks()) { throw new RuntimeException("partitioner return invalid partition value:" + part); } outputBuffer.add(key, value, part); } else { outputBuffer.add(key, value); } combineOutputRecordCounter.increment(1); } @Override public Record createOutputKeyRecord() throws IOException { // TODO Auto-generated method stub return null; } @Override public Record createOutputValueRecord() throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getInputVolumeInfo() throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getInputVolumeInfo(String label) throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getOutputVolumeInfo() throws IOException { // TODO Auto-generated method stub return null; } @Override public VolumeInfo getOutputVolumeInfo(String label) throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getInputVolumeFileSystem() throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getInputVolumeFileSystem(String label) throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getOutputVolumeFileSystem() throws IOException { // TODO Auto-generated method stub return null; } @Override public FileSystem getOutputVolumeFileSystem(String label) throws IOException { // TODO Auto-generated method stub return null; } } private LinkedList<Object[]> queue = new LinkedList<Object[]>(); public ProxiedMapContextImpl(BridgeJobConf conf, TaskId taskid, Counters counters, RecordReader reader, TableInfo inputTableInfo) throws IOException { super(conf, taskid, counters, reader, inputTableInfo); } @Override public void write(Record key, Record value) { mapOutputRecordCounter.increment(1); queue.add(ArrayUtils.addAll(((WritableRecord) key).toWritableArray(), ((WritableRecord) value).toWritableArray())); counters.findCounter(JobCounter.__EMPTY_OUTPUT_RECORD_COUNT).increment(1); } @Override public void close() throws IOException { Collections.sort(queue, outputBuffer.getComparator()); Reducer combiner = ReflectionUtils.newInstance(getCombinerClass(), conf); CombinerContextImpl combineCtx = new CombinerContextImpl(conf, taskId, counters); LOG.info("Start to run Combiner, TaskId: " + taskId); combiner.setup(combineCtx); while (combineCtx.nextKeyValue()) { combiner.reduce(combineCtx.getCurrentKey(), combineCtx.getValues(), combineCtx); } combiner.cleanup(combineCtx); super.close(); LOG.info("Fininshed run Combiner, TaskId: " + taskId); } } public MapDriver(BridgeJobConf job, FileSplit split, TaskId id, final MapOutputBuffer buffer, final Counters counters, TableInfo tableInfo) throws IOException { super(job, id, counters); this.outputBuffer = buffer; this.split = split; Counter mapInputByteCounter = counters .findCounter(JobCounter.MAP_INPUT_BYTES); Counter mapInputRecordCounter = counters .findCounter(JobCounter.MAP_INPUT_RECORDS); TableMeta tableMeta = null; if (tableInfo != null) { tableMeta = new TableMeta(tableInfo.getProjectName(), tableInfo.getTableName(), null); } RecordReader reader = new CSVRecordReader(split, tableMeta, mapInputRecordCounter, mapInputByteCounter, counters, WareHouse.getInstance().getInputColumnSeperator()); if (job.getCombinerClass() != null) { mapContext = new ProxiedMapContextImpl(job, taskId, counters, reader, tableInfo); } else { mapContext = new DirectMapContextImpl(job, id, counters, reader, tableInfo); } this.counters = counters; } private FileSplit split; public void run() throws IOException { Mapper mapper = ((LocalTaskContext) mapContext).createMapper(); // Fill Partition Info if (split != null && split != FileSplit.NullSplit) { File whInputFile = WareHouse.getInstance().getJobDir().convertInputFile(split.getFile()); PartitionSpec partitionSpec = WareHouse.getInstance().resolvePartition( mapContext.getInputTableInfo().getProjectName(), mapContext.getInputTableInfo().getTableName(), whInputFile); mapContext.getInputTableInfo().setPartSpec(PartitionUtils.convert(partitionSpec)); } mapper.setup(mapContext); while (mapContext.nextRecord()) { mapper.map(mapContext.getCurrentRecordNum(), mapContext.getCurrentRecord(), mapContext); } mapper.cleanup(mapContext); ((DirectMapContextImpl) mapContext).close(); } }