/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.udf;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.util.HashMap;
import com.aliyun.odps.counter.Counter;
import com.aliyun.odps.counter.Counters;
import com.aliyun.odps.data.VolumeInfo;
import com.aliyun.odps.volume.FileSystem;
/**
* 运行时的执行上下文信息。
* <p>
* 包括UDF所在的StageID,WorkerID等信息。
* </p>
*/
public abstract class ExecutionContext {
protected String stageID;
protected int workerID;
protected String tableInfo;
protected String localResourceDirectory;
/**
* 获取运行时StageID
*
* @return StageID
*/
public String getStageID() {
return stageID;
}
/**
* 获取运行时WorkerID
*
* @return WorkerID
*/
public int getWorkerID() {
return workerID;
}
/**
* 获取运行时处理的数据块所属的表或分区信息。
*
* @return <p>非分区表返回:"项目名.表名"。分区表返回:"项目名.表名/分区名=分区值"。如果有多级分区,按照分区结构返回相应的目录结构。</p>
* @throws <p>当UDF运行在非Map端时,调用此方法会抛出
* {@link InvalidInvocationException} 异常。</p>
*/
public String getTableInfo() {
if (tableInfo == null) {
throw new InvalidInvocationException("table info is only available at mapper.");
}
return tableInfo;
}
/**
* 在{@link UDF}运行期调用此方法防止worker因超时被强制终止。
* <p>
* {@link UDF}在运行期每一次调用({@link UDF#evaluate()}、{@link UDTF#process(Object[])}等方法)
* 最多能消耗的时间有一定限制,当超出这个限制时,worker会被强制终止。
* 当{@link UDF}涉及计算密集型的任务时,可能一次调用需要消耗更多时间,调用此方法可以重置时间限制,防止被提前终止。
* </p>
*/
public abstract void claimAlive();
private final static int MAX_COUNTERS_NUM = 64;
private final static int MAX_COUNTERS_LEN = 100;
protected int countersNumberLimit = MAX_COUNTERS_NUM;
protected Counters counters = new Counters();
protected HashMap<String, Counter> userCounters = new HashMap<String, Counter>();
/**
* 通过{@link Enum}获取{@link Counter}
* <p>
* 等价于调用<pre>
* {@code
* getCounter(name.getDeclaringClass().getName(), name.toString())
* }
* </pre>
* </p>
*
* @param name
* {@link Counter}标识
* @return 对应标识的{@link Counter}对象
* @throws IllegalArgumentException
* - name 不能为null
* @see #getCounter(String, String)
*/
public Counter getCounter(Enum<?> name) {
if (name == null) {
throw new IllegalArgumentException("Counter name must not be null.");
}
return getCounter(name.getDeclaringClass().getName(), name.toString());
}
/**
* 通过Group名和Counter名获取{@link Counter}
* <p>
* {@link Counter}信息最后会汇总在SQL Task的Summary中。
* </p>
*
* @param group
* Group标识
* @param name
* Counter标识
* @return 对应标识的{@link Counter}对象
* @throws IllegalArgumentException
* Group名和Counter名中不能 <br>
* <ul>
* <li>为null</li>
* <li>为空字符串 </li>
* <li>包含'#'字符 </li>
* <li>加起来字符串长度超过64字节</li>
* </ul>
* <p>
* 当所定义Counter数量超过100个时也会抛出异常
* </p>
*/
public Counter getCounter(String group, String name) {
String key = group + "#" + name;
if (userCounters.containsKey(key)) {
return userCounters.get(key);
}
checkUserDefinedCounters(group, name);
Counter counter = counters.findCounter(group, name);
userCounters.put(key, counter);
return counter;
}
private void checkUserDefinedCounters(String groupName, String counterName) {
if (groupName == null || groupName.isEmpty() ||
counterName == null || counterName.isEmpty()) {
throw new IllegalArgumentException("Group or counter name must not be null or empty.");
}
if (groupName.contains("#")) {
throw new IllegalArgumentException("Group name: '" + shortenName(groupName)
+ "' cannot contain character '#'");
}
if (counterName.contains("#")) {
throw new IllegalArgumentException("Counter name: '" + shortenName(counterName)
+ "' cannot contain character '#'");
}
int maxLength = MAX_COUNTERS_LEN;
if (groupName.length() + counterName.length() > maxLength) {
throw new IllegalArgumentException("Length of group name '" + shortenName(groupName)
+ "' and counter name '" + shortenName(counterName)
+ "' exceeded limit " + maxLength);
}
if (userCounters.size() >= countersNumberLimit) {
throw new IllegalArgumentException(
"Total num of user defined counters exceeded limit "
+ countersNumberLimit);
}
}
private String shortenName(String name) {
if (name.length() > 110) {
return name.substring(0, 110) + "...";
}
return name;
}
/**
* 读取文件类型资源,返回一个带缓存的输入流。
*
* <p>
* {@link #readResourceFileAsStream(String)}
* 该方法支持边读边处理(常见的场景是读取一行处理一行),适合读取比较大的文件资源,
* 防止Java内存溢出,如果文件资源比较小,也可以直接使用。<br />
* {@link #readResourceFile(String)} 方法会把内容一次性读取到内存。
* </p>
*
* @param resourceName
* 资源名称
* @return 资源内容的BufferedInputStream
* @throws IOException
* 资源未声明、资源类型不匹配以及其他读取错误抛异常
* @see #readResourceFile(String)
*/
public abstract BufferedInputStream readResourceFileAsStream(
String resourceName) throws IOException;
/**
* 读取文件类型资源,一次全部读取到内存,返回 byte[].
*
* <p>
* 如果资源文件比较大,应该使用{@link #readResourceFileAsStream(String)}
* 获得一个带缓存的输入流,支持边读边处理,防止Java内存溢出。
* </p>
*
* @param resourceName
* 资源名称
* @return 资源内容
* @throws IOException
* 资源未声明、资源类型不匹配以及其他读取错误抛异常
* @see #readResourceFileAsStream(String)
*/
public abstract byte[] readResourceFile(String resourceName)
throws IOException;
/**
* <p>
* 读取表类型资源,返回Iterable<Object[]>,Object为对应表每一列的Java原生类型.
* </p>
*
* @param resourceName
* 资源名称
* @return Iterable<Object[]>
* @throws IOException
* 资源未声明、资源类型不匹配以及其他读取错误抛异常
*/
public abstract Iterable<Object[]> readResourceTable(String resourceName)
throws IOException;
/**
* 读取压缩档案类型资源,返回 BufferedInputStream 的迭代器.
*
* @param resourceName 资源名称
* @return BufferedInputStream的迭代器
* @throws IOException 资源未声明、资源类型不匹配以及其他读取错误抛异常
*/
public abstract Iterable<BufferedInputStream> readCacheArchiveAsStream(String resourceName)
throws IOException;
/**
* 读取压缩档案类型资源,返回 BufferedInputStream 的迭代器.
*
* @param resourceName
* 资源名称
* @param relativePath
* 读取资源的相对路径
* @return BufferedInputStream的迭代器
* @throws IOException 资源未声明、资源类型不匹配以及其他读取错误抛异常
*/
public abstract Iterable<BufferedInputStream> readCacheArchiveAsStream(String resourceName,
String relativePath) throws IOException;
public abstract VolumeInfo getInputVolumeInfo() throws IOException;
public abstract VolumeInfo getInputVolumeInfo(String label) throws IOException;
public abstract VolumeInfo getOutputVolumeInfo() throws IOException;
public abstract VolumeInfo getOutputVolumeInfo(String label) throws IOException;
public abstract FileSystem getInputVolumeFileSystem() throws IOException;
public abstract FileSystem getInputVolumeFileSystem(String label) throws IOException;
public abstract FileSystem getOutputVolumeFileSystem() throws IOException;
public abstract FileSystem getOutputVolumeFileSystem(String label) throws IOException;
}