/** * Copyright (c) 2010 Yahoo! Inc. All rights reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. See accompanying LICENSE file. */ package org.apache.oozie.command.coord; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.oozie.CoordinatorActionBean; import org.apache.oozie.CoordinatorActionInfo; import org.apache.oozie.CoordinatorJobBean; import org.apache.oozie.ErrorCode; import org.apache.oozie.XException; import org.apache.oozie.client.CoordinatorAction; import org.apache.oozie.client.CoordinatorJob; import org.apache.oozie.client.SLAEvent.SlaAppType; import org.apache.oozie.client.rest.RestConstants; import org.apache.oozie.command.CommandException; import org.apache.oozie.coord.CoordELFunctions; import org.apache.oozie.service.HadoopAccessorService; import org.apache.oozie.service.Services; import org.apache.oozie.store.CoordinatorStore; import org.apache.oozie.store.StoreException; import org.apache.oozie.util.DateUtils; import org.apache.oozie.util.ParamChecker; import org.apache.oozie.util.XConfiguration; import org.apache.oozie.util.XLog; import org.apache.oozie.util.XmlUtils; import org.apache.oozie.util.db.SLADbOperations; import org.jdom.Element; import org.jdom.JDOMException; public class CoordRerunCommand extends CoordinatorCommand<CoordinatorActionInfo> { private String jobId; private String rerunType; private String scope; private boolean refresh; private boolean noCleanup; private final XLog log = XLog.getLog(getClass()); public CoordRerunCommand(String jobId, String rerunType, String scope, boolean refresh, boolean noCleanup) { super("coord_rerun", "coord_rerun", 1, XLog.STD); this.jobId = ParamChecker.notEmpty(jobId, "jobId"); this.rerunType = ParamChecker.notEmpty(rerunType, "rerunType"); this.scope = ParamChecker.notEmpty(scope, "scope"); this.refresh = refresh; this.noCleanup = noCleanup; } @Override protected CoordinatorActionInfo call(CoordinatorStore store) throws StoreException, CommandException { try { CoordinatorJobBean coordJob = store.getCoordinatorJob(jobId, false); CoordinatorActionInfo coordInfo = null; setLogInfo(coordJob); if (coordJob.getStatus() != CoordinatorJob.Status.KILLED && coordJob.getStatus() != CoordinatorJob.Status.FAILED) { incrJobCounter(1); List<CoordinatorActionBean> coordActions; if (rerunType.equals(RestConstants.JOB_COORD_RERUN_DATE)) { coordActions = getCoordActionsFromDates(jobId, scope, store); } else if (rerunType.equals(RestConstants.JOB_COORD_RERUN_ACTION)) { coordActions = getCoordActionsFromIds(jobId, scope, store); } else { throw new CommandException(ErrorCode.E1018, "date or action expected."); } if (checkAllActionsRunnable(coordActions)) { Configuration conf = new XConfiguration(new StringReader(coordJob.getConf())); for (CoordinatorActionBean coordAction : coordActions) { String actionXml = coordAction.getActionXml(); if (!noCleanup) { Element eAction = XmlUtils.parseXml(actionXml); cleanupOutputEvents(eAction, coordJob.getUser(), coordJob.getGroup(), conf); } if (refresh) { refreshAction(coordJob, coordAction, store); } updateAction(coordJob, coordAction, actionXml, store); // TODO: time 100s should be configurable queueCallable(new CoordActionNotification(coordAction), 100); queueCallable(new CoordActionInputCheckCommand(coordAction.getId()), 100); } } else { throw new CommandException(ErrorCode.E1018, "part or all actions are not eligible to rerun!"); } coordInfo = new CoordinatorActionInfo(coordActions); } else { log.info("CoordRerunCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid=" + jobId); throw new CommandException(ErrorCode.E1018, "coordinator job is killed or failed so all actions are not eligible to rerun!"); } return coordInfo; } catch (XException xex) { throw new CommandException(xex); } catch (JDOMException jex) { throw new CommandException(ErrorCode.E0700, jex); } catch (Exception ex) { throw new CommandException(ErrorCode.E1018, ex); } } /** * Get the list of actions for given id ranges * * @param jobId * @param scope * @param store * @return the list of all actions to rerun * @throws CommandException * @throws StoreException */ private List<CoordinatorActionBean> getCoordActionsFromIds(String jobId, String scope, CoordinatorStore store) throws CommandException, StoreException { ParamChecker.notEmpty(jobId, "jobId"); ParamChecker.notEmpty(scope, "scope"); Set<String> actions = new HashSet<String>(); String[] list = scope.split(","); for (String s : list) { s = s.trim(); if (s.contains("-")) { String[] range = s.split("-"); if (range.length != 2) { throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'"); } int start; int end; try { start = Integer.parseInt(range[0].trim()); end = Integer.parseInt(range[1].trim()); if (start > end) { throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'"); } } catch (NumberFormatException ne) { throw new CommandException(ErrorCode.E0302, ne); } for (int i = start; i <= end; i++) { actions.add(jobId + "@" + i); } } else { try { Integer.parseInt(s); } catch (NumberFormatException ne) { throw new CommandException(ErrorCode.E0302, "format is wrong for action id'" + s + "'. Integer only."); } actions.add(jobId + "@" + s); } } List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>(); for (String id : actions) { CoordinatorActionBean coordAction = store.getCoordinatorAction(id, false); coordActions.add(coordAction); log.debug("Rerun coordinator for actionId='" + id + "'"); } return coordActions; } /** * Get the list of actions for given date ranges * * @param jobId * @param scope * @param store * @return the list of dates to rerun * @throws CommandException * @throws StoreException */ private List<CoordinatorActionBean> getCoordActionsFromDates(String jobId, String scope, CoordinatorStore store) throws CommandException, StoreException { ParamChecker.notEmpty(jobId, "jobId"); ParamChecker.notEmpty(scope, "scope"); Set<CoordinatorActionBean> actionSet = new HashSet<CoordinatorActionBean>(); String[] list = scope.split(","); for (String s : list) { s = s.trim(); if (s.contains("::")) { String[] dateRange = s.split("::"); if (dateRange.length != 2) { throw new CommandException(ErrorCode.E0302, "format is wrong for date's range '" + s + "'"); } Date start; Date end; try { start = DateUtils.parseDateUTC(dateRange[0].trim()); end = DateUtils.parseDateUTC(dateRange[1].trim()); if (start.after(end)) { throw new CommandException(ErrorCode.E0302, "start date is older than end date: '" + s + "'"); } } catch (Exception e) { throw new CommandException(ErrorCode.E0302, e); } List<CoordinatorActionBean> listOfActions = getActionIdsFromDateRange(jobId, start, end, store); actionSet.addAll(listOfActions); } else { Date date; try { date = DateUtils.parseDateUTC(s.trim()); } catch (Exception e) { throw new CommandException(ErrorCode.E0302, e); } CoordinatorActionBean coordAction = store.getCoordActionForNominalTime(jobId, date); actionSet.add(coordAction); } } List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>(); for (CoordinatorActionBean coordAction : actionSet) { coordActions.add(coordAction); log.debug("Rerun coordinator for actionId='" + coordAction.getId() + "'"); } return coordActions; } private List<CoordinatorActionBean> getActionIdsFromDateRange(String jobId, Date start, Date end, CoordinatorStore store) throws StoreException { List<CoordinatorActionBean> list = store.getCoordActionsForDates(jobId, start, end); return list; } /** * Check if all given actions are eligible to rerun. * * @param actions list of CoordinatorActionBean * @return true if all actions are eligible to rerun */ private boolean checkAllActionsRunnable(List<CoordinatorActionBean> coordActions) { for (CoordinatorActionBean coordAction : coordActions) { if (!coordAction.isTerminalStatus()) { return false; } } return true; } /** * Cleanup output-events directories * * @param eAction * @param workflow * @param action */ @SuppressWarnings("unchecked") private void cleanupOutputEvents(Element eAction, String user, String group, Configuration conf) { Element outputList = eAction.getChild("output-events", eAction.getNamespace()); if (outputList != null) { for (Element data : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) { if (data.getChild("uris", data.getNamespace()) != null) { String uris = data.getChild("uris", data.getNamespace()).getTextTrim(); if (uris != null) { String[] uriArr = uris.split(CoordELFunctions.INSTANCE_SEPARATOR); for (String uri : uriArr) { Path path = new Path(uri); try { FileSystem fs = Services.get().get(HadoopAccessorService.class). createFileSystem(user, group, path.toUri(), conf); if (fs.exists(path)) { if (!fs.delete(path, true)) { throw new IOException(); } } log.debug("Cleanup the output dir " + path); } catch (Exception ex) { log.warn("Failed to cleanup the output dir " + uri, ex); } } } } } } else { log.info("No output-events defined in coordinator xml. Therefore nothing to cleanup"); } } /** * Refresh an Action * * @param coordJob * @param coordAction * @param store * @throws Exception */ private void refreshAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, CoordinatorStore store) throws Exception { Configuration jobConf = null; try { jobConf = new XConfiguration(new StringReader(coordJob.getConf())); } catch (IOException ioe) { log.warn("Configuration parse error. read from DB :" + coordJob.getConf(), ioe); throw new CommandException(ErrorCode.E1005, ioe); } String jobXml = coordJob.getJobXml(); Element eJob = XmlUtils.parseXml(jobXml); Date actualTime = new Date(); String actionXml = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(), coordAction .getNominalTime(), actualTime, coordAction.getActionNumber(), jobConf, coordAction); log.debug("Refresh Action actionId=" + coordAction.getId() + ", actionXml=" + XmlUtils.prettyPrint(actionXml).toString()); coordAction.setActionXml(actionXml); } /** * Update an Action into database table * * @param coordJob * @param coordAction * @param actionXml * @param store * @throws Exception */ private void updateAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, String actionXml, CoordinatorStore store) throws Exception { log.debug("updateAction for actionId=" + coordAction.getId()); coordAction.setStatus(CoordinatorAction.Status.WAITING); coordAction.setExternalId(""); coordAction.setExternalStatus(""); coordAction.setRerunTime(new Date()); store.updateCoordinatorAction(coordAction); writeActionRegistration(coordAction.getActionXml(), coordAction, store, coordJob.getUser(), coordJob.getGroup()); } /** * Create SLA RegistrationEvent * * @param actionXml * @param actionBean * @param store * @param user * @param group * @throws Exception */ private void writeActionRegistration(String actionXml, CoordinatorActionBean actionBean, CoordinatorStore store, String user, String group) throws Exception { Element eAction = XmlUtils.parseXml(actionXml); Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla")); SLADbOperations.writeSlaRegistrationEvent(eSla, store, actionBean.getId(), SlaAppType.COORDINATOR_ACTION, user, group); } @Override protected CoordinatorActionInfo execute(CoordinatorStore store) throws StoreException, CommandException { log.info("STARTED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope); CoordinatorActionInfo coordInfo = null; try { if (lock(jobId)) { coordInfo = call(store); } else { queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL); log.warn("CoordRerunCommand lock was not acquired - " + " failed " + jobId + ". Requeing the same."); } } catch (InterruptedException e) { queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL); log.warn("CoordRerunCommand lock acquiring failed " + " with exception " + e.getMessage() + " for job id " + jobId + ". Requeing the same."); } finally { log.info("ENDED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope); } return coordInfo; } }