/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred; import java.io.BufferedInputStream; import java.io.IOException; import java.util.Iterator; import com.aliyun.odps.counter.Counter; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.data.VolumeInfo; import com.aliyun.odps.mapred.conf.JobConf; import com.aliyun.odps.volume.FileSystem; /** * MapReduce任务运行时的上下文信息 */ public interface TaskContext extends JobContext { /** * 获取当前运行Task的标识. * * @return 当前运行{@link TaskId} 的标识 */ TaskId getTaskID(); /** * 获取默认输出的表信息. * * @return 默认输出的表信息 {@link TableInfo} * @throws IOException */ TableInfo[] getOutputTableInfo() throws IOException; /** * 创建默认输出表的记录对象,创建的记录对象会包含表schema信息. * * <p> * 最好在{@link com.aliyun.odps.mapred.Mapper#setup(TaskContext)}和 * {@link com.aliyun.odps.mapred.Reducer#setup(TaskContext)} * 方法里创建输出记录对象然后重用,避免频繁进行输出记录对象的创建。 * * <p> * 代码示例: * * <pre> * private Record result; * * @Override * public void setup(TaskContext context) throws IOException { * result = context.createOutputRecord(); * } * * @Override * public void reduce(Record key, Iterator<Record> values, TaskContext context) * throws IOException { * long count = 0; * while (values.hasNext()) { * Record val = values.next(); * count += (Long) val.get(0); * } * result.set(0, key.get(0)); * result.set(1, count); * context.write(result); * } * * </pre> * * </p> * * @return 默认输出表的记录对象 * @throws IOException * @see #write(Record) */ Record createOutputRecord() throws IOException; /** * 创建给定label输出表的记录对象,创建的记录对象会包含表schema信息. * * <p> * 最好在{@link com.aliyun.odps.mapred.Mapper#setup(TaskContext)}和 * {@link com.aliyun.odps.mapred.Reducer#setup(TaskContext)} * 方法里创建输出记录对象然后重用,避免频繁进行输出记录对象的创建。 * * <p> * 代码示例: * * <pre> * private Map<String, Record> result; * * @Override * public void setup(TaskContext context) throws IOException { * TableInfo[] tbls = context.getOutputTableInfo(); * result = new HashMap<String, Record>(); * for (TableInfo tbl : tbls) { * String lable = tbl.getLabel(); * result.put(lable, context.createOutputRecord(lable)); * } * } * * @Override * public void reduce(Record key, Iterator<Record> values, TaskContext context) * throws IOException { * long count = 0; * while (values.hasNext()) { * Record val = values.next(); * count += (Long) val.get(0); * } * * for (String lable : result.keySet()) { * Record r = result.get(lable); * r.set(0, key.get(0)); * r.set(1, count); * context.write(r, lable); * } * } * </pre> * * </p> * * @param label * 输出标签 * @return 给定label输出表的记录对象 * @throws IOException * @see #write(Record, String) */ Record createOutputRecord(String label) throws IOException; Record createOutputKeyRecord() throws IOException; Record createOutputValueRecord() throws IOException; /** * 创建Map输出Key的记录对象. * * @return {@link Record} * @throws IOException */ Record createMapOutputKeyRecord() throws IOException; /** * 创建Map输出Value的记录对象 * * @return {@link Record} * @throws IOException */ Record createMapOutputValueRecord() throws IOException; /** * 读取文件类型资源,返回一个带缓存的输入流. * * <p> * readResourceFileAsStream 支持边读边处理 * </p> * * @param resourceName * 资源名称 * @return 资源内容的{@link BufferedInputStream} * @throws IOException * 资源未声明、资源类型不匹配以及其他读取错误抛异常 */ BufferedInputStream readResourceFileAsStream(String resourceName) throws IOException; /** * 读取Archive文件类型资源,返回一个带缓存的输入流迭代器. * * <p> * readResourceArchiveAsStream 支持边读边处理 * </p> * * @param resourceName 资源名称 * @return 资源内容的{@link BufferedInputStream}的迭代器 * @throws IOException 资源未声明、资源类型不匹配以及其他读取错误抛异常 */ Iterable<BufferedInputStream> readResourceArchiveAsStream(String resourceName) throws IOException; /** * 读取Archive文件类型资源,返回一个带缓存的输入流的迭代器. * * <p> * readResourceArchiveAsStream 支持边读边处理 * </p> * * @param resourceName 资源名称 * @param relativePath Archive内部相对路径 * @return 资源内容的{@link BufferedInputStream}的迭代器 * @throws IOException 资源未声明、资源类型不匹配以及其他读取错误抛异常 */ Iterable<BufferedInputStream> readResourceArchiveAsStream(String resourceName, String relativePath) throws IOException; /** * 读取表类型资源,ODPS的小表(Table)也可以作为资源,大小限制参见ODPS的相关文档. * * <p> * 使用表类型资源步骤:<br/> * <ol> * <li>增加资源表: console < add table my_table partition(ds='1') as res_table; * <li>运行M/R作业命令: console < jar -resources res_table ...,或者使用API( * {@link JobConf#setResources(String)}) * <li>M/R里读取资源:context.readResourceTable("res_table"); * </ol> * </p> * * <p> * 代码示例: * * <pre> * Iterator<Record> iter = context.readResourceTable("res_table"); * while (iter.hasNext()) { * Record r = iter.next(); * // handle record * } * </pre> * * @param resourceName * 表类型资源名称 * @return 记录迭代器 * @throws IOException * 资源未声明、资源类型不匹配以及其他错误抛异常 */ Iterator<Record> readResourceTable(String resourceName) throws IOException; /** * 获取给定名称的Counter对象,name为{@link Enum} * * @param name * Counter名称 * @return Counter */ Counter getCounter(Enum<?> name); /** * 获取给定组名和名称的 Counter 对象. * * @param group * Counter 组名 * @param name * Counter 名 * @return Counter */ Counter getCounter(String group, String name); /** * 向 MapReduce 框架报告进度. * * 如果用户方法处理时间很长,且中间没有调用框架,可以调用这个方法避免task超时,框架默认600秒超时. */ void progress(); /** * 写记录到默认输出. * * @param {@link * Record} * 待写出的记录对象 * @throws IOException * @see #createOutputRecord() */ public void write(Record record) throws IOException; /** * 写记录到给定标签输出. * * @param record * 待写出的记录对象 * @throws IOException * @see #createOutputRecord(String) */ public void write(Record record, String label) throws IOException; /** * Map写记录到中间结果 * * @param key * @param value * @throws IOException */ public void write(Record key, Record value) throws IOException; /** * 获得默认的输入volume的描述信息 * * @return VolumeInfo * @throws IOException */ VolumeInfo getInputVolumeInfo() throws IOException; /** * 获得给定label的输入volume的描述信息 * * @return VolumeInfo * @throws IOException */ VolumeInfo getInputVolumeInfo(String label) throws IOException; /** * 获取默认的输出volume的描述信息 * * @return VolumeInfo * @throws IOException */ VolumeInfo getOutputVolumeInfo() throws IOException; /** * 获取给定label的输出volume的描述信息 * * @param label * @return VolumeInfo * @throws IOException */ VolumeInfo getOutputVolumeInfo(String label) throws IOException; /** * 获取默认的输入volume fileSystem * * @return FileSystem * @throws IOException */ FileSystem getInputVolumeFileSystem() throws IOException; /** * 获取给定label的输入volume fileSystem * * @param label * 输出label * @return FileSystem * @throws IOException */ FileSystem getInputVolumeFileSystem(String label) throws IOException; /** * 获取默认的输出volume fileSystem * * @return FileSystem * @throws IOException */ FileSystem getOutputVolumeFileSystem() throws IOException; /** * 获取给定label的输出volume fileSystem * * @param label * 输出label * @return FileSystem * @throws IOException */ FileSystem getOutputVolumeFileSystem(String label) throws IOException; }