/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.logging; import org.apache.commons.lang.StringUtils; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.process.EngineType; import org.apache.falcon.hadoop.HadoopClientFactory; import org.apache.falcon.security.CurrentUser; import org.apache.falcon.workflow.WorkflowExecutionContext; import org.apache.falcon.workflow.util.OozieActionConfigurationHelper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.oozie.client.OozieClient; import org.apache.oozie.client.OozieClientException; import org.apache.oozie.client.WorkflowAction; import org.apache.oozie.client.WorkflowJob; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URL; import java.net.URLConnection; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; /** * Utility called in the post process of oozie workflow to move oozie action executor log. */ public class JobLogMover { private static final Logger LOG = LoggerFactory.getLogger(JobLogMover.class); private static final String YARN = "yarn"; private static final String MAPREDUCE_FRAMEWORK = "mapreduce.framework.name"; private static final Set<String> FALCON_ACTIONS = new HashSet<String>(Arrays.asList(new String[]{"eviction", "replication", })); private Configuration getConf() { Configuration conf = null; try { conf = OozieActionConfigurationHelper.createActionConf(); } catch (IOException ioe) { LOG.warn("Cannot get Oozie configuration. Returning default"); } return conf == null ? new Configuration(): conf; } public void moveLog(WorkflowExecutionContext context){ if (UserGroupInformation.isSecurityEnabled()) { LOG.info("Unable to move logs as security is enabled."); return; } try { run(context); } catch (Exception ignored) { // Mask exception, a failed log mover will not fail the user workflow LOG.error("Exception in job log mover:", ignored); } } public int run(WorkflowExecutionContext context) { try { String engineUrl = context.getWorkflowEngineUrl(); if (StringUtils.isBlank(engineUrl)) { LOG.warn("Unable to retrieve workflow url for {} with status {} ", context.getWorkflowId(), context.getWorkflowStatus()); return 0; } String instanceOwner = context.getWorkflowUser(); if (StringUtils.isNotBlank(instanceOwner)) { CurrentUser.authenticate(instanceOwner); } else { CurrentUser.authenticate(System.getProperty("user.name")); } OozieClient client = new OozieClient(engineUrl); WorkflowJob jobInfo; try { jobInfo = client.getJobInfo(context.getWorkflowId()); } catch (OozieClientException e) { LOG.error("Error getting jobinfo for: {}", context.getUserSubflowId(), e); return 0; } //Assumption is - Each wf run will have a directory //the corresponding job logs are stored within the respective dir Path path = new Path(context.getLogDir() + "/" + context.getNominalTime() + "/" + String.format("%03d", context.getWorkflowRunId())); FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(path.toUri(), getConf()); if (EntityType.FEED.name().equalsIgnoreCase(context.getEntityType()) || notUserWorkflowEngineIsOozie(context.getUserWorkflowEngine())) { // if replication wf, retention wf or PIG Process copyOozieLog(client, fs, path, jobInfo.getId()); List<WorkflowAction> workflowActions = jobInfo.getActions(); for (int i=0; i < workflowActions.size(); i++) { if (FALCON_ACTIONS.contains(workflowActions.get(i).getName())) { copyTTlogs(fs, path, jobInfo.getActions().get(i)); break; } } } else { String flowId; // if process wf with pig, hive if (context.getUserWorkflowEngine().equals("pig") ||context.getUserWorkflowEngine().equals("hive")) { flowId = jobInfo.getId(); } else { jobInfo = client.getJobInfo(context.getUserSubflowId()); // if process wf with oozie engine flowId = jobInfo.getExternalId(); } copyOozieLog(client, fs, path, flowId); WorkflowJob subflowInfo = client.getJobInfo(flowId); List<WorkflowAction> actions = subflowInfo.getActions(); for (WorkflowAction action : actions) { if (isActionTypeSupported(action)) { LOG.info("Copying hadoop TT log for action: {} of type: {}", action.getName(), action.getType()); copyTTlogs(fs, path, action); } else { LOG.info("Ignoring hadoop TT log for non supported action: {} of type: {}", action.getName(), action.getType()); } } } } catch (Exception e) { // JobLogMover doesn't throw exception, a failed log mover will not fail the user workflow LOG.error("Exception in log mover:", e); } return 0; } private boolean notUserWorkflowEngineIsOozie(String userWorkflowEngine) { // userWorkflowEngine will be null for replication and "not null" for pig, hive, oozie return userWorkflowEngine != null && EngineType.fromValue(userWorkflowEngine) == null; } private void copyOozieLog(OozieClient client, FileSystem fs, Path path, String id) throws OozieClientException, IOException { InputStream in = new ByteArrayInputStream(client.getJobLog(id).getBytes()); OutputStream out = fs.create(new Path(path, "oozie.log")); IOUtils.copyBytes(in, out, 4096, true); LOG.info("Copied oozie log to {}", path); } private void copyTTlogs(FileSystem fs, Path path, WorkflowAction action) throws Exception { List<String> ttLogUrls = getTTlogURL(action.getExternalId()); if (ttLogUrls != null) { int index = 1; for (String ttLogURL : ttLogUrls) { LOG.info("Fetching log for action: {} from url: {}", action.getExternalId(), ttLogURL); InputStream in = getURLinputStream(new URL(ttLogURL)); OutputStream out = fs.create(new Path(path, action.getName() + "_" + action.getType() + "_" + getMappedStatus(action.getStatus()) + "-" + index + ".log")); IOUtils.copyBytes(in, out, 4096, true); LOG.info("Copied log to {}", path); index++; } } } private boolean isActionTypeSupported(WorkflowAction action) { return action.getType().equals("pig") || action.getType().equals("hive") || action.getType().equals("java") || action.getType().equals("map-reduce"); } private String getMappedStatus(WorkflowAction.Status status) { if (status == WorkflowAction.Status.FAILED || status == WorkflowAction.Status.KILLED || status == WorkflowAction.Status.ERROR) { return "FAILED"; } else { return "SUCCEEDED"; } } private List<String> getTTlogURL(String jobId) throws Exception { TaskLogURLRetriever logRetriever = ReflectionUtils .newInstance(getLogRetrieverClassName(getConf()), getConf()); return logRetriever.retrieveTaskLogURL(jobId); } @SuppressWarnings("unchecked") private Class<? extends TaskLogURLRetriever> getLogRetrieverClassName(Configuration conf) { if (YARN.equals(conf.get(MAPREDUCE_FRAMEWORK))) { return TaskLogRetrieverYarn.class; } else { return DefaultTaskLogRetriever.class; } } private InputStream getURLinputStream(URL url) throws IOException { URLConnection connection = url.openConnection(); connection.setDoOutput(true); connection.connect(); return connection.getInputStream(); } }