/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.pig.PigRunner;
import org.apache.pig.backend.hadoop.executionengine.tez.TezExecType;
import org.apache.pig.tools.pigstats.InputStats;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.mapreduce.MRJobStats;
import org.apache.pig.tools.pigstats.mapreduce.SimplePigStats;
import org.apache.pig.tools.pigstats.tez.TezDAGStats;
import org.apache.pig.tools.pigstats.tez.TezPigScriptStats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.Field;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
*
*/
public class PigUtils {
private static Logger LOGGER = LoggerFactory.getLogger(PigUtils.class);
protected static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static File createTempPigScript(String content) throws IOException {
File tmpFile = File.createTempFile("zeppelin", "pig");
LOGGER.debug("Create pig script file:" + tmpFile.getAbsolutePath());
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
return tmpFile.getAbsoluteFile();
}
public static File createTempPigScript(List<String> lines) throws IOException {
return createTempPigScript(StringUtils.join(lines, "\n"));
}
public static String extactJobStats(PigStats stats) {
if (stats instanceof SimplePigStats) {
return extractFromSimplePigStats((SimplePigStats) stats);
} else if (stats instanceof TezPigScriptStats) {
return extractFromTezPigStats((TezPigScriptStats) stats);
} else {
throw new RuntimeException("Unrecognized stats type:" + stats.getClass().getSimpleName());
}
}
public static String extractFromSimplePigStats(SimplePigStats stats) {
try {
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));
Field startTimeField = PigStats.class.getDeclaredField("startTime");
startTimeField.setAccessible(true);
long startTime = (Long) (startTimeField.get(stats));
Field endTimeField = PigStats.class.getDeclaredField("endTime");
endTimeField.setAccessible(true);
long endTime = (Long) (endTimeField.get(stats));
if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
LOGGER.warn("unknown return code, can't display the results");
return null;
}
if (stats.getPigContext() == null) {
LOGGER.warn("unknown exec type, don't display the results");
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
StringBuilder sb = new StringBuilder();
sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
sb.append(stats.getHadoopVersion()).append("\t").append(stats.getPigVersion()).append("\t")
.append(userId).append("\t")
.append(sdf.format(new Date(startTime))).append("\t")
.append(sdf.format(new Date(endTime))).append("\t")
.append(stats.getFeatures()).append("\n");
sb.append("\n");
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
sb.append("Success!\n");
} else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Some jobs have failed! Stop running all dependent jobs\n");
} else {
sb.append("Failed!\n");
}
sb.append("\n");
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stats);
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Job Stats (time in seconds):\n");
sb.append(MRJobStats.SUCCESS_HEADER).append("\n");
List<JobStats> arr = jobPlan.getSuccessfulJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Failed Jobs:\n");
sb.append(MRJobStats.FAILURE_HEADER).append("\n");
List<JobStats> arr = jobPlan.getFailedJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
sb.append("Input(s):\n");
for (InputStats is : stats.getInputStats()) {
sb.append(is.getDisplayString());
}
sb.append("\n");
sb.append("Output(s):\n");
for (OutputStats ds : stats.getOutputStats()) {
sb.append(ds.getDisplayString());
}
sb.append("\nCounters:\n");
sb.append("Total records written : " + stats.getRecordWritten()).append("\n");
sb.append("Total bytes written : " + stats.getBytesWritten()).append("\n");
sb.append("Spillable Memory Manager spill count : "
+ stats.getSMMSpillCount()).append("\n");
sb.append("Total bags proactively spilled: "
+ stats.getProactiveSpillCountObjects()).append("\n");
sb.append("Total records proactively spilled: "
+ stats.getProactiveSpillCountRecords()).append("\n");
sb.append("\nJob DAG:\n").append(jobPlan.toString());
return "Script Statistics: \n" + sb.toString();
} catch (Exception e) {
LOGGER.error("Can not extract message from SimplePigStats", e);
return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
}
}
private static String extractFromTezPigStats(TezPigScriptStats stats) {
try {
if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
LOGGER.warn("unknown return code, can't display the results");
return null;
}
if (stats.getPigContext() == null) {
LOGGER.warn("unknown exec type, don't display the results");
return null;
}
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));
Field startTimeField = PigStats.class.getDeclaredField("startTime");
startTimeField.setAccessible(true);
long startTime = (Long) (startTimeField.get(stats));
Field endTimeField = PigStats.class.getDeclaredField("endTime");
endTimeField.setAccessible(true);
long endTime = (Long) (endTimeField.get(stats));
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
StringBuilder sb = new StringBuilder();
sb.append("\n");
sb.append(String.format("%1$20s: %2$-100s%n", "HadoopVersion", stats.getHadoopVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "PigVersion", stats.getPigVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "TezVersion", TezExecType.getTezVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "UserId", userId));
sb.append(String.format("%1$20s: %2$-100s%n", "FileName", stats.getFileName()));
sb.append(String.format("%1$20s: %2$-100s%n", "StartedAt", sdf.format(new Date(startTime))));
sb.append(String.format("%1$20s: %2$-100s%n", "FinishedAt", sdf.format(new Date(endTime))));
sb.append(String.format("%1$20s: %2$-100s%n", "Features", stats.getFeatures()));
sb.append("\n");
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
sb.append("Success!\n");
} else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Some tasks have failed! Stop running all dependent tasks\n");
} else {
sb.append("Failed!\n");
}
sb.append("\n");
// Print diagnostic info in case of failure
if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
if (stats.getErrorMessage() != null) {
String[] lines = stats.getErrorMessage().split("\n");
for (int i = 0; i < lines.length; i++) {
String s = lines[i].trim();
if (i == 0 || !org.apache.commons.lang.StringUtils.isEmpty(s)) {
sb.append(String.format("%1$20s: %2$-100s%n", i == 0 ? "ErrorMessage" : "", s));
}
}
sb.append("\n");
}
}
Field tezDAGStatsMapField = TezPigScriptStats.class.getDeclaredField("tezDAGStatsMap");
tezDAGStatsMapField.setAccessible(true);
Map<String, TezDAGStats> tezDAGStatsMap =
(Map<String, TezDAGStats>) tezDAGStatsMapField.get(stats);
int count = 0;
for (TezDAGStats dagStats : tezDAGStatsMap.values()) {
sb.append("\n");
sb.append("DAG " + count++ + ":\n");
sb.append(dagStats.getDisplayString());
sb.append("\n");
}
sb.append("Input(s):\n");
for (InputStats is : stats.getInputStats()) {
sb.append(is.getDisplayString().trim()).append("\n");
}
sb.append("\n");
sb.append("Output(s):\n");
for (OutputStats os : stats.getOutputStats()) {
sb.append(os.getDisplayString().trim()).append("\n");
}
return "Script Statistics:\n" + sb.toString();
} catch (Exception e) {
LOGGER.error("Can not extract message from SimplePigStats", e);
return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
}
}
public static List<String> extractJobIds(PigStats stat) {
if (stat instanceof SimplePigStats) {
return extractJobIdsFromSimplePigStats((SimplePigStats) stat);
} else if (stat instanceof TezPigScriptStats) {
return extractJobIdsFromTezPigStats((TezPigScriptStats) stat);
} else {
throw new RuntimeException("Unrecognized stats type:" + stat.getClass().getSimpleName());
}
}
public static List<String> extractJobIdsFromSimplePigStats(SimplePigStats stat) {
List<String> jobIds = new ArrayList<>();
try {
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stat);
List<JobStats> arr = jobPlan.getJobList();
for (JobStats js : arr) {
jobIds.add(js.getJobId());
}
return jobIds;
} catch (Exception e) {
LOGGER.error("Can not extract jobIds from SimpelPigStats", e);
throw new RuntimeException("Can not extract jobIds from SimpelPigStats", e);
}
}
public static List<String> extractJobIdsFromTezPigStats(TezPigScriptStats stat) {
List<String> jobIds = new ArrayList<>();
try {
Field tezDAGStatsMapField = TezPigScriptStats.class.getDeclaredField("tezDAGStatsMap");
tezDAGStatsMapField.setAccessible(true);
Map<String, TezDAGStats> tezDAGStatsMap =
(Map<String, TezDAGStats>) tezDAGStatsMapField.get(stat);
for (TezDAGStats dagStats : tezDAGStatsMap.values()) {
LOGGER.debug("Tez JobId:" + dagStats.getJobId());
jobIds.add(dagStats.getJobId());
}
return jobIds;
} catch (Exception e) {
LOGGER.error("Can not extract jobIds from TezPigScriptStats", e);
throw new RuntimeException("Can not extract jobIds from TezPigScriptStats", e);
}
}
}