/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.mapred;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.jar.Manifest;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.parser.Feature;
import com.aliyun.odps.Column;
import com.aliyun.odps.Instance;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.OdpsType;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.Resource;
import com.aliyun.odps.Table;
import com.aliyun.odps.conf.Configured;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.data.VolumeInfo;
import com.aliyun.odps.mapred.bridge.BridgeRunningJob;
import com.aliyun.odps.mapred.bridge.ErrorCode;
import com.aliyun.odps.mapred.bridge.MetaExplorer;
import com.aliyun.odps.mapred.bridge.MetaExplorerImpl;
import com.aliyun.odps.mapred.bridge.utils.Validator;
import com.aliyun.odps.mapred.bridge.utils.ValidatorFactory;
import com.aliyun.odps.mapred.conf.BridgeJobConf;
import com.aliyun.odps.mapred.conf.SessionState;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.mapred.utils.SchemaUtils;
public abstract class BridgeJobRunner extends Configured implements JobRunner, EventListener {
protected static final Log LOG = LogFactory.getLog(BridgeJobRunner.class);
private static final String TEMP_RESOURCE_PREFIX = "file:";
protected BridgeJobConf job;
protected String jobId;
protected boolean isClean;
protected String taskName;
protected Map<String, String> aliasToTempResource = new HashMap<String, String>();
protected MetaExplorer metaExplorer;
/**
* Create jar with jobconf.
*
* @return
* @throws OdpsException
*/
private ByteArrayOutputStream createJarArchive() throws OdpsException {
try {
ByteArrayOutputStream archiveOut = new ByteArrayOutputStream();
// Open archive file
JarOutputStream out = new JarOutputStream(archiveOut, new Manifest());
ByteArrayOutputStream jobOut = new ByteArrayOutputStream();
job.writeXml(jobOut);
// Add jobconf entry
JarEntry jobconfEntry = new JarEntry("jobconf.xml");
out.putNextEntry(jobconfEntry);
out.write(jobOut.toByteArray());
out.close();
return archiveOut;
} catch (IOException ex) {
throw new OdpsException(ErrorCode.UNEXPECTED.toString(), ex);
}
}
private void applyFrameworkResources() throws OdpsException {
// Add framework jars
String padding = "-" + jobId;
Set<String> added = new HashSet<String>();
applyFrameworkResource(Odps.class, "odps-sdk-core.jar", padding, added);
applyFrameworkResource(Mapper.class, "odps-sdk-mapred.jar", padding, added);
applyFrameworkResource(BridgeJobRunner.class, "odps-mapred-bridge.jar",
padding, added);
applyFrameworkResource(JSON.class, "fastjson.jar",
padding, added);
}
private void applyFrameworkResource(Class<?> clz, String alias,
String padding, Set<String> added) throws OdpsException {
String jarFilePath;
try {
jarFilePath = new File(clz.getProtectionDomain().getCodeSource().getLocation().toURI()).getAbsolutePath();
} catch (URISyntaxException ex) {
throw new OdpsException(ex);
}
if (added.contains(jarFilePath)) {
return;
}
int trycount = 0;
while (true) {
try {
aliasToTempResource.put(alias, metaExplorer
.addFileResourceWithRetry(jarFilePath, Resource.Type.JAR, padding,
true));
added.add(jarFilePath);
return;
} catch (Exception ex) {
trycount++;
if (trycount >= 3) {
throw new OdpsException(ex);
}
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
}
}
}
}
@SuppressWarnings("unchecked")
protected void setUp() throws OdpsException {
// Prepare additional config parameters
// merge streaming job alias resources if exist
if (job.get("stream.temp.resource.alias") != null) {
String aliasJson = job.get("stream.temp.resource.alias");
try {
aliasToTempResource.putAll((Map<String, String>) JSON.parseObject(
aliasJson, Map.class, Feature.OrderedField));
} catch (JSONException e) {
throw new OdpsException("parse stream temp resource alias json failed!", e);
}
}
// for user defined partitioner, estimate reduce number if not set
boolean isEstimateReduceNum =
(job.getPartitionerClass() != null) && (job.get("odps.stage.reducer.num") == null);
long inputSize = 0;
// Expand input columns if applicable.
TableInfo[] infos = InputUtils.getTables(job);
// for multi inputs not allow inner output in mapper
if (infos != null && infos.length > 1) {
job.setMapperInnerOutputEnable(false);
}
String project = metaExplorer.getDefaultProject();
boolean changed = false;
if (infos != null) {
for (int i = 0; i < infos.length; i++) {
TableInfo info = infos[i];
if (info.getProjectName() == null) {
changed = true;
info.setProjectName(project);
}
Table tbl = metaExplorer.getTable(info.getProjectName(), info.getTableName());
List<Column> schema = tbl.getSchema().getColumns();
String[] inputCols = getInputColumnsFromCommandSettings(job, info);
if (inputCols.length == 0 && info.getCols() == null) {
changed = true;
Column[] columns = schema.toArray(new Column[schema.size()]);
job.setInputSchema(info, columns);
info.setCols(SchemaUtils.getNames(columns));
} else {
if (inputCols.length == 0) {
inputCols = info.getCols();
}
Column[] columns = new Column[inputCols.length];
for (int k = 0; k < inputCols.length; k++) {
String colName = inputCols[k];
for (Column c : schema) {
if (c.getName().equalsIgnoreCase(colName)) {
columns[k] = c;
break;
}
}
}
job.setInputSchema(info, columns);
}
if (isEstimateReduceNum) {
PartitionSpec part = info.getPartitionSpec();
if (!part.isEmpty()) {
// for partition table input
inputSize += tbl.getPartition(part).getSize();
} else {
inputSize += tbl.getSize();
}
}
}
}
if (changed) {
InputUtils.setTables(infos, job);
}
if (isEstimateReduceNum) {
job.setNumReduceTasks(estimateReduceNum(inputSize, job));
}
//add project information for volume if necessary
changed = false;
VolumeInfo[] volumeInfos = InputUtils.getVolumes(job);
if (volumeInfos != null) {
for (VolumeInfo volume : volumeInfos) {
if (volume.getProjectName() == null) {
changed = true;
volume.setProjectName(project);
}
}
}
if (changed) {
InputUtils.setVolumes(volumeInfos, job);
}
changed = false;
volumeInfos = OutputUtils.getVolumes(job);
if (volumeInfos != null) {
for (VolumeInfo volume : volumeInfos) {
if (volume.getProjectName() == null) {
changed = true;
volume.setProjectName(project);
}
}
}
if (changed) {
OutputUtils.setVolumes(volumeInfos, job);
}
// Expand output columns.
infos = OutputUtils.getTables(job);
if (infos == null) {
job.setOutputSchema(new Column[]{new Column("nil", OdpsType.STRING)},
TableInfo.DEFAULT_LABEL);
} else {
for (TableInfo info : infos) {
if (info.getProjectName() == null) {
info.setProjectName(project);
}
List<Column> schema = metaExplorer.getTable(info.getProjectName(), info.getTableName())
.getSchema().getColumns();
Column[] schemaArray = schema.toArray(new Column[schema.size()]);
info.setCols(SchemaUtils.getNames(schemaArray));
job.setOutputSchema(schemaArray, info.getLabel());
}
OutputUtils.setTables(infos, job);
}
processTempResources();
// Adding jobconf jar.
ByteArrayOutputStream jarOut = null;
try {
jarOut = createJarArchive();
jarOut.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
String resName = metaExplorer.addTempResourceWithRetry(
new ByteArrayInputStream(jarOut.toByteArray()), jobId + ".jar", Resource.Type.JAR);
aliasToTempResource.put("jobconf.jar", resName);
applyFrameworkResources();
List<String> totalRes = new ArrayList<String>();
String[] resources = job.getResources();
if (resources != null) {
Collections.addAll(totalRes, resources);
}
totalRes.addAll(aliasToTempResource.keySet());
job.setResources(StringUtils.join(totalRes, ","));
}
private String[] getInputColumnsFromCommandSettings(BridgeJobConf job, TableInfo info) {
String[] columns = new String[0];
String fullTableName = info.getProjectName() + "." + info.getTableName();
String colsSetting = job.get("odps.mapred.input.columns." + fullTableName);
if (colsSetting != null) {
columns = colsSetting.split(",");
}
return columns;
}
private int estimateReduceNum(long inputSize, BridgeJobConf job) {
long splitSize = job.getSplitSize() * 1024 * 1024; // bytes
int numMapTasks = 1;
if (inputSize > 0) {
numMapTasks = (int)(inputSize / splitSize) + 1;
} else {
numMapTasks = job.getNumMapTasks();
}
return (int)(numMapTasks / 3) + 1;
}
/**
* Register temporary resources.
*/
private void processTempResources() throws OdpsException {
String[] res = job.getResources();
if (res == null) {
return;
}
StringBuilder sb = new StringBuilder();
for (String r : res) {
if (r.toLowerCase().startsWith(TEMP_RESOURCE_PREFIX)) {
Resource.Type type = r.endsWith(".jar") ? Resource.Type.JAR : Resource.Type.FILE;
URL url;
try {
url = new URL(r);
} catch (MalformedURLException e) {
throw new OdpsException(e);
}
aliasToTempResource.put(FilenameUtils.getName(url.getPath()),
metaExplorer
.addFileResourceWithRetry(url.getPath(), type, "_" + jobId,
true));
} else {
if (sb.length() > 0) {
sb.append(',');
}
sb.append(r);
}
}
job.setResources(sb.toString());
}
protected void tearDown() throws OdpsException {
// Remove resources
for (String resource : aliasToTempResource.values()) {
metaExplorer.deleteResource(resource);
}
isClean = true;
}
abstract protected Instance submitInternal() throws OdpsException;
@Override
public RunningJob submit() throws OdpsException {
// JobConf field should be initialized here because ReflectionUtils is
// designed to create object
// and then set JobConf.
job = new BridgeJobConf(getConf());
metaExplorer = getMetaExplorer();
jobId = System.currentTimeMillis() + RandomStringUtils.randomNumeric(8);
// Set a prefix to job name because odps only accept job name whose pattern
// is '([a-z]|[A-Z]){1,}([a-z]|[A-Z]|[\d]|_)*' ...
taskName = "MRonSQL_" + jobId;
Validator validator = ValidatorFactory.getValidator(job, metaExplorer);
validator.validate();
setUp();
Instance instance = submitInternal();
BridgeRunningJob runningJob = new BridgeRunningJob(instance, taskName, this);
if (SessionState.get().isCostMode()) {
runningJob.setIsCostMode(true);
}
return runningJob;
}
protected MetaExplorer getMetaExplorer() {
return new MetaExplorerImpl(SessionState.get().getOdps());
}
@Override
public void onComplete() {
try {
if (!isClean) {
tearDown();
}
} catch (OdpsException e) {
// Silently swallow it.
}
}
}