/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.mapred;
import java.io.IOException;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.conf.Configuration;
import com.aliyun.odps.counter.Counters;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.mapred.conf.JobConf;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.pipeline.Pipeline;
/**
* 作业提交和跟踪,可以通过 Job 定义、提交、控制、查询作业实例.
*
* @see JobClient
*/
public class Job extends JobConf {
private RunningJob info;
/**
* 默认构造函数
*/
public Job() {
this(new JobConf());
}
/**
* 构造函数,给定一个 {@link Configuration} 对象
*
* @param conf
* 配置管理器
*/
public Job(Configuration conf) {
super(conf);
}
/**
* 查询作业是否结束.
*
* @return 作业结束返回true,否则返回false
* @see RunningJob#isComplete()
*/
public boolean isComplete() {
if (info == null) {
return false;
}
return info.isComplete();
}
/**
* 查询作业实例是否运行成功.
*
* @return 作业成功反馈true,否则返回false
* @see RunningJob#isSuccessful()
*/
public boolean isSuccessful() {
if (info == null) {
return false;
}
return info.isSuccessful();
}
/**
* Kill 此作业运行实例
*
* @see RunningJob#killJob()
*/
public void killJob() {
if (info == null) {
return;
}
info.killJob();
}
/**
* 获取当前作业实例的 Counters.
*
* @return Counters,或null如果任务没有完成。
*/
public Counters getCounters() {
if (info == null) {
return null;
}
return new Counters(info.getCounters());
}
/**
* 非阻塞(异步)方式提交作业后立即返回,作业提交失败时抛{@link OdpsException}异常,类似于
* {@link JobClient#submitJob(JobConf)}.
*
* <p>
* 代码示例如下:
*
* <pre>
* Job job = new Job();
* ... //config job
* job.submit();
* while (!job.isComplete()) {
* Thread.sleep(2000);
* }
* if (!job.isSuccessful()) {
* throw new Exception("Job failed!");
* }
* </pre>
*
* </p>
*
* @throws OdpsException
*/
public void submit() throws OdpsException {
info = JobClient.submitJob(this);
}
/**
* 阻塞(同步)方式提交作业并等待作业结束,类似于 {@link JobClient#runJob(JobConf)}.
*
* <p>
* 如果未调用{@link #submit()},会先调用,然后轮询作业直至作业结束。<br/>
* 该方法的返回值指示作业是否运行成功,作业主程序(main函数)需要对返回值进行判断决定程序是否返回非0值,从而影响console的返回值。 <br/>
*
* @throws OdpsException
*/
public boolean run() throws OdpsException {
return waitForCompletion();
}
/**
* 阻塞(同步)方式提交作业并等待作业结束,类似于 {@link JobClient#runJob(JobConf)}.
*
* <p>
* 如果未调用{@link #submit()},会先调用,然后轮询作业直至作业结束。<br/>
* 该方法的返回值指示作业是否运行成功,作业主程序(main函数)需要对返回值进行判断决定程序是否返回非0值,从而影响console的返回值。 <br/>
*
* 代码示例如下:
*
* <pre>
* Job job = new Job();
* ... //config job
* boolean success = job.waitForCompletion(true);
* if (!success) {
* throw new Exception("Job failed!");
* }
* </pre>
*
* </p>
*
* @return 如果作业成功,返回true,否则返回false
* @throws OdpsException
* 作业提交或者轮询作业状态失败,抛异常
*/
public boolean waitForCompletion() throws OdpsException {
if (info == null) {
submit();
}
info.waitForCompletion();
return isSuccessful();
}
/**
* 获取任务的instance ID
*
* @return instance ID,或null如果任务没有被提交
*/
public String getInstanceID() {
if (info == null) {
return null;
}
return info.getInstanceID();
}
/**
* 获取任务的状态
*
* @return 任务状态
*/
public JobStatus getJobStatus() {
if (info == null) {
return JobStatus.PREP;
}
return info.getJobStatus();
}
/**
* 获取任务的诊断信息
*
* @return 任务的诊断信息
*/
public String getDiagnostics() {
if (info == null) {
return "";
}
return info.getDiagnostics();
}
/**
* 获取任务的map执行进度
*
* @return
* @throws IOException
*/
public float mapProgress() throws IOException {
if (info == null) {
return 0f;
}
return info.mapProgress();
}
/**
* 获取任务的reduce执行进度
*
* @return
* @throws IOException
*/
public float reduceProgress() throws IOException {
if (info == null) {
return 0f;
}
return info.reduceProgress();
}
/**
* 增加作业输入
*
* <p>
* 作业运行过程中,框架会读取输入表的数据为一条条 {@link Record},传给 {@link Mapper} 进行处理。
* </p>
* <p>
* </p>
*
* <p>
* <b>示例:</b>
*
* <pre>
* Job job = new Job();
*
* job.addInput(TableInfo.builder().tableName(tblName).build());
* </pre>
*
* </p>
* <p>
* <b>限制:</b>
* <ul>
* <li>输入表或分区要求已经存在,且对指定列具有读权限
* <li>调用一次 <i>addInput</i> 视为一路输入,ODPS MapReduce
* 单个作业的输入路数不能超过1024,且表的数量不能超过64,注意这里并非限制最多读 1024 分区
* <li>不支持通配符表名或分区范围查询的方式指定作业输入
* </ul>
* </p>
*
* @param tbl
* 输入表信息
* @param cols
* 指定读取的列
* @see GraphLoader
*/
public void addInput(TableInfo table) {
InputUtils.addTable(table, this);
}
/**
* 增加默认作业输出.
*
* @param tbl
* 输出表信息
* @throws IOException
*/
public void addOutput(TableInfo table) {
OutputUtils.addTable(table, this);
}
/**
* 设置以Pipeline模式运行MapReduce。
* <br/>
* <p>
* <b>注意</b>:Pipeline模式下,job.setMapper/ReducerClass设置皆无效
* </p>
*
* @param pipeline
*/
public void setPipeline(Pipeline pipeline) {
Pipeline.toJobConf(this, pipeline);
}
}