/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package gobblin.cluster; import com.google.common.base.Joiner; import java.util.Map; import java.util.Set; import org.I0Itec.zkclient.DataUpdater; import org.apache.helix.AccessOption; import org.apache.helix.ConfigAccessor; import org.apache.helix.HelixAdmin; import org.apache.helix.HelixDataAccessor; import org.apache.helix.HelixManager; import org.apache.helix.PropertyPathConfig; import org.apache.helix.PropertyType; import org.apache.helix.ZNRecord; import org.apache.helix.manager.zk.ZKHelixAdmin; import org.apache.helix.manager.zk.ZKHelixDataAccessor; import org.apache.helix.manager.zk.ZkBaseDataAccessor; import org.apache.helix.manager.zk.ZkClient; import org.apache.helix.store.HelixPropertyStore; import org.apache.helix.store.zk.ZkHelixPropertyStore; import org.apache.helix.task.JobDag; import org.apache.helix.task.TaskConstants; import org.apache.helix.task.TaskDriver; import org.apache.helix.task.TaskState; import org.apache.helix.task.TaskUtil; import org.apache.helix.task.WorkflowConfig; import org.apache.helix.task.WorkflowContext; import org.apache.log4j.Logger; /** * #HELIX-0.6.7-WORKAROUND * Replacement TaskDriver methods to workaround bugs and changes in behavior for the 0.6.7 upgrade */ public class GobblinHelixTaskDriver { /** For logging */ private static final Logger LOG = Logger.getLogger(GobblinHelixTaskDriver.class); private final HelixDataAccessor _accessor; private final ConfigAccessor _cfgAccessor; private final HelixPropertyStore<ZNRecord> _propertyStore; private final HelixAdmin _admin; private final String _clusterName; private final TaskDriver _taskDriver; public GobblinHelixTaskDriver(HelixManager manager) { this(manager.getClusterManagmentTool(), manager.getHelixDataAccessor(), manager .getConfigAccessor(), manager.getHelixPropertyStore(), manager.getClusterName()); } public GobblinHelixTaskDriver(ZkClient client, String clusterName) { this(client, new ZkBaseDataAccessor<ZNRecord>(client), clusterName); } public GobblinHelixTaskDriver(ZkClient client, ZkBaseDataAccessor<ZNRecord> baseAccessor, String clusterName) { this(new ZKHelixAdmin(client), new ZKHelixDataAccessor(clusterName, baseAccessor), new ConfigAccessor(client), new ZkHelixPropertyStore<ZNRecord>(baseAccessor, PropertyPathConfig.getPath(PropertyType.PROPERTYSTORE, clusterName), null), clusterName); } public GobblinHelixTaskDriver(HelixAdmin admin, HelixDataAccessor accessor, ConfigAccessor cfgAccessor, HelixPropertyStore<ZNRecord> propertyStore, String clusterName) { _admin = admin; _accessor = accessor; _cfgAccessor = cfgAccessor; _propertyStore = propertyStore; _clusterName = clusterName; _taskDriver = new TaskDriver(admin, accessor, cfgAccessor, propertyStore, clusterName); } /** * Delete a job from an existing named queue, * the queue has to be stopped prior to this call * * @param queueName * @param jobName */ public void deleteJob(final String queueName, final String jobName) { WorkflowConfig workflowCfg = _taskDriver.getWorkflowConfig(queueName); if (workflowCfg == null) { throw new IllegalArgumentException("Queue " + queueName + " does not yet exist!"); } if (workflowCfg.isTerminable()) { throw new IllegalArgumentException(queueName + " is not a queue!"); } boolean isRecurringWorkflow = (workflowCfg.getScheduleConfig() != null && workflowCfg.getScheduleConfig().isRecurring()); if (isRecurringWorkflow) { WorkflowContext wCtx = _taskDriver.getWorkflowContext(queueName); String lastScheduledQueue = wCtx.getLastScheduledSingleWorkflow(); // delete the current scheduled one deleteJobFromScheduledQueue(lastScheduledQueue, jobName, true); // Remove the job from the original queue template's DAG removeJobFromDag(queueName, jobName); // delete the ideal state and resource config for the template job final String namespacedJobName = TaskUtil.getNamespacedJobName(queueName, jobName); _admin.dropResource(_clusterName, namespacedJobName); // Delete the job template from property store String jobPropertyPath = Joiner.on("/") .join(TaskConstants.REBALANCER_CONTEXT_ROOT, namespacedJobName); _propertyStore.remove(jobPropertyPath, AccessOption.PERSISTENT); } else { deleteJobFromScheduledQueue(queueName, jobName, false); } } /** * delete a job from a scheduled (non-recurrent) queue. * * @param queueName * @param jobName */ private void deleteJobFromScheduledQueue(final String queueName, final String jobName, boolean isRecurrent) { WorkflowConfig workflowCfg = _taskDriver.getWorkflowConfig(queueName); if (workflowCfg == null) { // When try to delete recurrent job, it could be either not started or finished. So // there may not be a workflow config. if (isRecurrent) { return; } else { throw new IllegalArgumentException("Queue " + queueName + " does not yet exist!"); } } WorkflowContext wCtx = _taskDriver.getWorkflowContext(queueName); if (wCtx != null && wCtx.getWorkflowState() == null) { throw new IllegalStateException("Queue " + queueName + " does not have a valid work state!"); } // #HELIX-0.6.7-WORKAROUND // This check is removed to get the same behavior as 0.6.6-SNAPSHOT until new APIs to support delete are provided //String workflowState = // (wCtx != null) ? wCtx.getWorkflowState().name() : TaskState.NOT_STARTED.name(); //if (workflowState.equals(TaskState.IN_PROGRESS.name())) { // throw new IllegalStateException("Queue " + queueName + " is still in progress!"); //} removeJob(queueName, jobName); } private boolean removeJobContext(HelixPropertyStore<ZNRecord> propertyStore, String jobResource) { return propertyStore.remove( Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT, jobResource), AccessOption.PERSISTENT); } private void removeJob(String queueName, String jobName) { // Remove the job from the queue in the DAG removeJobFromDag(queueName, jobName); // delete the ideal state and resource config for the job final String namespacedJobName = TaskUtil.getNamespacedJobName(queueName, jobName); _admin.dropResource(_clusterName, namespacedJobName); // update queue's property to remove job from JOB_STATES if it is already started. removeJobStateFromQueue(queueName, jobName); // Delete the job from property store removeJobContext(_propertyStore, jobName); } /** Remove the job name from the DAG from the queue configuration */ private void removeJobFromDag(final String queueName, final String jobName) { final String namespacedJobName = TaskUtil.getNamespacedJobName(queueName, jobName); DataUpdater<ZNRecord> dagRemover = new DataUpdater<ZNRecord>() { @Override public ZNRecord update(ZNRecord currentData) { if (currentData == null) { LOG.error("Could not update DAG for queue: " + queueName + " ZNRecord is null."); return null; } // Add the node to the existing DAG JobDag jobDag = JobDag.fromJson( currentData.getSimpleField(WorkflowConfig.WorkflowConfigProperty.Dag.name())); Set<String> allNodes = jobDag.getAllNodes(); if (!allNodes.contains(namespacedJobName)) { LOG.warn( "Could not delete job from queue " + queueName + ", job " + jobName + " not exists"); return currentData; } String parent = null; String child = null; // remove the node from the queue for (String node : allNodes) { if (jobDag.getDirectChildren(node).contains(namespacedJobName)) { parent = node; jobDag.removeParentToChild(parent, namespacedJobName); } else if (jobDag.getDirectParents(node).contains(namespacedJobName)) { child = node; jobDag.removeParentToChild(namespacedJobName, child); } } if (parent != null && child != null) { jobDag.addParentToChild(parent, child); } jobDag.removeNode(namespacedJobName); // Save the updated DAG try { currentData .setSimpleField(WorkflowConfig.WorkflowConfigProperty.Dag.name(), jobDag.toJson()); } catch (Exception e) { throw new IllegalStateException( "Could not remove job " + jobName + " from DAG of queue " + queueName, e); } return currentData; } }; String path = _accessor.keyBuilder().resourceConfig(queueName).getPath(); if (!_accessor.getBaseDataAccessor().update(path, dagRemover, AccessOption.PERSISTENT)) { throw new IllegalArgumentException( "Could not remove job " + jobName + " from DAG of queue " + queueName); } } /** update queue's property to remove job from JOB_STATES if it is already started. */ private void removeJobStateFromQueue(final String queueName, final String jobName) { final String namespacedJobName = TaskUtil.getNamespacedJobName(queueName, jobName); String queuePropertyPath = Joiner.on("/") .join(TaskConstants.REBALANCER_CONTEXT_ROOT, queueName, TaskUtil.CONTEXT_NODE); DataUpdater<ZNRecord> updater = new DataUpdater<ZNRecord>() { @Override public ZNRecord update(ZNRecord currentData) { if (currentData != null) { Map<String, String> states = currentData.getMapField(WorkflowContext.JOB_STATES); if (states != null && states.containsKey(namespacedJobName)) { states.keySet().remove(namespacedJobName); } } return currentData; } }; if (!_propertyStore.update(queuePropertyPath, updater, AccessOption.PERSISTENT)) { LOG.warn("Fail to remove job state for job " + namespacedJobName + " from queue " + queueName); } } }