/** * Copyright 2012-2013 University Of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.workflowsim.failure; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.cloudbus.cloudsim.Log; /** * FailureMonitor collects failure information * * @author Weiwei Chen * @since WorkflowSim Toolkit 1.0 * @date Apr 9, 2013 */ public class FailureMonitor { /** * VM ID to a Failure Record. * */ protected static Map<Integer, List<FailureRecord>> vm2record; /** * Type to a Failure Record. * */ protected static Map<Integer, List<FailureRecord>> type2record; /** * JobID to a Failure Record. * */ protected static Map<Integer, FailureRecord> jobid2record; /** * All the record list. */ protected static List<FailureRecord> recordList; /** * Id to a Job. */ public static Map index2job; /** * Initialize a FailureMonitor object. */ public static void init() { vm2record = new HashMap<>(); type2record = new HashMap<>(); jobid2record = new HashMap<>(); recordList = new ArrayList<>(); } /** * Gets the optimal clustering factor based on analysis * * @param d delay * @param a task failure rate monitored * @param t task runtime * @return optimal clustering factor */ protected static double getK(double d, double a, double t) { double k = (-d + Math.sqrt(d * d - 4 * d / Math.log(1 - a))) / (2 * t); return k; } /** * Gets the clustering factor * * @param record, a request * @return the clustering factor suggested */ public static int getClusteringFactor(FailureRecord record) { double d = record.delayLength; double t = record.length; double a = 0.0; switch (FailureParameters.getMonitorMode()) { case MONITOR_JOB: /** * not supported * */ case MONITOR_ALL: a = analyze(0, record.depth); break; case MONITOR_VM: a = analyze(0, record.vmId); break; } if (a <= 0.0) { return record.allTaskNum; } else { double k = getK(d, a, t); if (k <= 1) { k = 1;//minimal } return (int) k; } } /** * A post from a broker so that we can update record list * * @param record a failure record */ public static void postFailureRecord(FailureRecord record) { if (record.workflowId < 0 || record.jobId < 0 || record.vmId < 0) { Log.printLine("Error in receiving failure record"); return; } switch (FailureParameters.getMonitorMode()) { case MONITOR_VM: if (!vm2record.containsKey(record.vmId)) { vm2record.put(record.vmId, new ArrayList<>()); } vm2record.get(record.vmId).add(record); break; case MONITOR_JOB: if (!type2record.containsKey(record.depth)) { type2record.put(record.depth, new ArrayList<>()); } type2record.get(record.depth).add(record); break; case MONITOR_NONE: break; } recordList.add(record); } /** * Update the detected task failure rate based on record lists * * @param workflowId, doesn't work in this version * @param type, the type of job or vm * @return task failure rate */ public static double analyze(int workflowId, int type) { /** * workflow level : all jobs together * */ int sumFailures = 0; int sumJobs = 0; switch (FailureParameters.getMonitorMode()) { case MONITOR_ALL: for (FailureRecord record : recordList) { sumFailures += record.failedTasksNum; sumJobs += record.allTaskNum; } break; case MONITOR_JOB: if (type2record.containsKey(type)) { for (FailureRecord record : type2record.get(type)) { sumFailures += record.failedTasksNum; sumJobs += record.allTaskNum; } } break; case MONITOR_VM: if (vm2record.containsKey(type)) { for (FailureRecord record : vm2record.get(type)) { sumFailures += record.failedTasksNum; sumJobs += record.allTaskNum; } } break; } if (sumFailures == 0) { return 0; } double alpha = (double) ((double) sumFailures / (double) sumJobs); return alpha; } }