/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.raid; import java.util.EnumMap; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.metrics.MetricsContext; import org.apache.hadoop.metrics.MetricsRecord; import org.apache.hadoop.metrics.MetricsUtil; import org.apache.hadoop.metrics.Updater; import org.apache.hadoop.metrics.util.MetricsBase; import org.apache.hadoop.metrics.util.MetricsLongValue; import org.apache.hadoop.metrics.util.MetricsRegistry; import org.apache.hadoop.metrics.util.MetricsTimeVaryingLong; import org.apache.hadoop.raid.DistBlockIntegrityMonitor; public class RaidNodeMetrics implements Updater { public static final Log LOG = LogFactory.getLog( "org.apache.hadoop.raid.RaidNodeMetrics"); public static final int DEFAULT_NAMESPACE_ID = 0; static long metricsResetInterval = 86400 * 1000; // 1 day. private static ConcurrentMap<Integer, RaidNodeMetrics> instances = new ConcurrentHashMap<Integer, RaidNodeMetrics>(); // Number of files currently raided. public static final String filesRaidedMetric = "files_raided"; // Number of files fixed by block fixer. public static final String filesFixedMetric = "files_fixed"; // Number of failures encountered by block fixer. public static final String fileFixFailuresMetric = "file_fix_failures"; // Number of failures encountered by using new code block fixer public static final String blockFixSimulationFailuresMetric = "block_fix_simulation_failures"; // Number of failures encountered by using new code block fixer public static final String blockFixSimulationSuccessMetric = "block_fix_simulation_success"; // Number of files that need to be fixed by block fixer. public static final String numFilesToFixMetric = "files_to_fix"; // Number of files copied by block copier. public static final String filesCopiedMetric = "files_copied"; // Number of failures encountered by block copier. public static final String fileCopyFailuresMetric = "file_copy_failures"; // Number of files that need to be copied by block copier. public static final String numFilesToCopyMetric = "files_to_copy"; // Number of failures encountered during raiding. public static final String raidFailuresMetric = "raid_failures"; // Number of purged files/directories. public static final String entriesPurgedMetric = "entries_purged"; // Slot-seconds used by RAID jobs public static final String raidSlotSecondsMetric = "raid_slot_seconds"; // Slot-seconds used by corrupt block fixing jobs. public static final String blockFixSlotSecondsMetric = "blockfix_slot_seconds"; // Slot-seconds used by decommissioning block copying jobs. public static final String blockCopySlotSecondsMetric = "blockcopy_slot_seconds"; // Number of block moved because of violation of the stripe block placement public static final String blockMoveMetric = "block_move"; // Number of scheduled block move public static final String blockMoveScheduledMetric = "block_move_scheduled"; // Number of skipped block move public static final String blockMoveSkippedMetric = "block_move_skipped"; // Number of blocks which are misplaced public static final String misplacedMetricHeader = "misplaced"; // Number of corrupt files being fixed with high priority public static final String corruptFilesHighPriMetric = "corrupt_files_high_pri"; // Number of corrupt files being fixed with low priority public static final String corruptFilesLowPriMetric = "corrupt_files_low_pri"; // Number of files being copied off decommissioning hosts with low priority public static final String decomFilesLowPriMetric = "decom_files_low_pri"; // Number of files being copied off decommissioning hosts with lowest priority public static final String decomFilesLowestPriMetric = "decom_files_lowest_pri"; // Monitor number of misplaced blocks in a stripe public static final int MAX_MONITORED_MISPLACED_BLOCKS = 5; //Number of files which have at least one block missing public static final String filesWithMissingBlksMetric = "files_with_missing_blks"; //Number of stripes using "rs" codec with certain number of blocks missing public static final String NumStrpsOneMissingBlkMetric = "stripes_with_one_missingBlk"; public static final String NumStrpsTwoMissingBlkMetric = "stripes_with_two_missingBlk"; public static final String NumStrpsThreeMissingBlkMetric = "stripes_with_three_missingBlk"; public static final String NumStrpsFourMissingBlkMetric = "stripes_with_four_missingBlk"; public static final String NumStrpsFiveMoreMissingBlkMetric = "stripes_with_fiveOrMore_missingBlk"; public static final String NumFilesToFixDroppedMetric = "files_to_fix_dropped"; public static final String numFileFixReadBytesRemoteRackMetric = "file_fix_bytes_read_remote_rack"; MetricsContext context; private MetricsRecord metricsRecord; private MetricsRegistry registry = new MetricsRegistry(); MetricsLongValue filesRaided = new MetricsLongValue(filesRaidedMetric, registry); MetricsTimeVaryingLong raidFailures = new MetricsTimeVaryingLong(raidFailuresMetric, registry); MetricsTimeVaryingLong filesFixed = new MetricsTimeVaryingLong(filesFixedMetric, registry); MetricsTimeVaryingLong fileFixFailures = new MetricsTimeVaryingLong(fileFixFailuresMetric, registry); MetricsTimeVaryingLong blockFixSimulationFailures = new MetricsTimeVaryingLong(blockFixSimulationFailuresMetric, registry); MetricsTimeVaryingLong blockFixSimulationSuccess = new MetricsTimeVaryingLong(blockFixSimulationSuccessMetric, registry); MetricsLongValue numFilesToFix = new MetricsLongValue(numFilesToFixMetric, registry); MetricsTimeVaryingLong filesCopied = new MetricsTimeVaryingLong(filesCopiedMetric, registry); MetricsTimeVaryingLong fileCopyFailures = new MetricsTimeVaryingLong(fileCopyFailuresMetric, registry); MetricsLongValue numFilesToCopy = new MetricsLongValue(numFilesToCopyMetric, registry); MetricsTimeVaryingLong entriesPurged = new MetricsTimeVaryingLong(entriesPurgedMetric, registry); MetricsTimeVaryingLong raidSlotSeconds = new MetricsTimeVaryingLong(raidSlotSecondsMetric, registry); MetricsTimeVaryingLong blockFixSlotSeconds = new MetricsTimeVaryingLong(blockFixSlotSecondsMetric, registry); MetricsTimeVaryingLong blockCopySlotSeconds = new MetricsTimeVaryingLong(blockCopySlotSecondsMetric, registry); MetricsTimeVaryingLong blockMove = new MetricsTimeVaryingLong(blockMoveMetric, registry); MetricsTimeVaryingLong blockMoveScheduled = new MetricsTimeVaryingLong(blockMoveScheduledMetric, registry); MetricsTimeVaryingLong blockMoveSkipped = new MetricsTimeVaryingLong(blockMoveSkippedMetric, registry); Map<String, Map<Integer, MetricsLongValue>> codecToMisplacedBlocks; MetricsLongValue numFilesWithMissingBlks = new MetricsLongValue(filesWithMissingBlksMetric, registry); MetricsLongValue numStrpsOneMissingBlk = new MetricsLongValue(NumStrpsOneMissingBlkMetric, registry); MetricsLongValue numStrpsTwoMissingBlk = new MetricsLongValue(NumStrpsTwoMissingBlkMetric, registry); MetricsLongValue numStrpsThreeMissingBlk = new MetricsLongValue(NumStrpsThreeMissingBlkMetric, registry); MetricsLongValue numStrpsFourMissingBlk = new MetricsLongValue(NumStrpsFourMissingBlkMetric, registry); MetricsLongValue numStrpsFiveMoreMissingBlk = new MetricsLongValue(NumStrpsFiveMoreMissingBlkMetric, registry); MetricsLongValue numFilesToFixDropped = new MetricsLongValue(NumFilesToFixDroppedMetric, registry); MetricsTimeVaryingLong numFileFixReadBytesRemoteRack = new MetricsTimeVaryingLong(numFileFixReadBytesRemoteRackMetric, registry); Map<String, Map<RaidState, MetricsLongValue>> sourceFiles; Map<String, Map<RaidState, MetricsLongValue>> sourceBlocks; Map<String, Map<RaidState, MetricsLongValue>> sourceBytes; Map<String, Map<RaidState, MetricsLongValue>> sourceLogical; Map<String, MetricsLongValue> parityFiles; Map<String, MetricsLongValue> parityBlocks; Map<String, MetricsLongValue> parityBytes; Map<String, MetricsLongValue> parityLogical; Map<String, MetricsLongValue> corruptFiles = null; Map<String, MetricsLongValue> underRedundantFiles = null; MetricsLongValue effectiveReplicationTimes1000 = new MetricsLongValue("effective_replication_1000", registry); MetricsLongValue saving = new MetricsLongValue("saving", registry); Map<String, MetricsLongValue> savingForCode = new HashMap<String, MetricsLongValue>(); MetricsLongValue corruptFilesHighPri = new MetricsLongValue(corruptFilesHighPriMetric, registry); MetricsLongValue corruptFilesLowPri = new MetricsLongValue(corruptFilesLowPriMetric, registry); MetricsLongValue decomFilesLowPri = new MetricsLongValue(decomFilesLowPriMetric, registry); MetricsLongValue decomFilesLowestPri = new MetricsLongValue(decomFilesLowestPriMetric, registry); // LogMetrics record the metrics for every logging into scribe // The key of logMetrics is cluster_logType_result Map<String, MetricsTimeVaryingLong> logMetrics = new HashMap<String, MetricsTimeVaryingLong>(); public static RaidNodeMetrics getInstance(int namespaceId) { RaidNodeMetrics metric = instances.get(namespaceId); if (metric == null) { metric = new RaidNodeMetrics(); RaidNodeMetrics old = instances.putIfAbsent(namespaceId, metric); if (old != null) { metric = old; } } return metric; } public static void clearInstances() { instances.clear(); } private RaidNodeMetrics() { // Create a record for raid metrics context = MetricsUtil.getContext("raidnode"); metricsRecord = MetricsUtil.createRecord(context, "raidnode"); context.registerUpdater(this); initPlacementMetrics(); initSourceMetrics(); initParityMetrics(); LOG.info("RaidNode Metrics is initialized"); } private void initPlacementMetrics() { codecToMisplacedBlocks = new HashMap<String, Map<Integer, MetricsLongValue>>(); for (Codec codec : Codec.getCodecs()) { Map<Integer, MetricsLongValue> m = new HashMap<Integer, MetricsLongValue>(); for (int i = 0; i < MAX_MONITORED_MISPLACED_BLOCKS; ++i) { m.put(i, new MetricsLongValue(misplacedMetricHeader + "_" + codec.id + "_" + i, registry)); } codecToMisplacedBlocks.put(codec.id, m); } } private void initSourceMetrics() { sourceFiles = createSourceMap(); sourceBlocks = createSourceMap(); sourceBytes = createSourceMap(); sourceLogical = createSourceMap(); for (Codec codec : Codec.getCodecs()) { for (RaidState state : RaidState.values()) { String head = (codec.id + "_" + state + "_").toLowerCase(); createSourceMetrics(sourceFiles, codec.id, state, head + "files"); createSourceMetrics(sourceBlocks, codec.id, state, head + "blocks"); createSourceMetrics(sourceBytes, codec.id, state, head + "bytes"); createSourceMetrics(sourceLogical, codec.id, state, head + "logical"); } } } public synchronized void initCorruptFilesMetrics(Configuration conf) { if (corruptFiles == null) { String[] dirs = DistBlockIntegrityMonitor.getCorruptMonitorDirs(conf); corruptFiles = new HashMap<String, MetricsLongValue>(); for (String dir: dirs) { String name = dir + "_corrupt_files"; corruptFiles.put(dir, new MetricsLongValue(name, registry)); } } } public synchronized void initUnderRedundantFilesMetrics(Configuration conf) { if (underRedundantFiles == null) { String[] dirs = DistBlockIntegrityMonitor.getCorruptMonitorDirs(conf); underRedundantFiles = new HashMap<String, MetricsLongValue>(); for (String dir: dirs) { String name = "under_redundant_files_" + dir; underRedundantFiles.put(dir, new MetricsLongValue(name, registry)); } String name = "under_redundant_files_" + BlockIntegrityMonitor.OTHERS; underRedundantFiles.put(BlockIntegrityMonitor.OTHERS, new MetricsLongValue(name, registry)); } } private void createSourceMetrics( Map<String, Map<RaidState, MetricsLongValue>> m, String code, RaidState state, String name) { Map<RaidState, MetricsLongValue> innerMap = m.get(code); innerMap.put(state, new MetricsLongValue(name, registry)); } private Map<String, Map<RaidState, MetricsLongValue>> createSourceMap() { Map<String, Map<RaidState, MetricsLongValue>> result = new HashMap<String, Map<RaidState, MetricsLongValue>>(); for (Codec codec : Codec.getCodecs()) { Map<RaidState, MetricsLongValue> m = new HashMap<RaidState, MetricsLongValue>(); for (RaidState state : RaidState.values()) { m.put(state, null); } m = new EnumMap<RaidState, MetricsLongValue>(m); result.put(codec.id, m); } return result; } private void initParityMetrics() { parityFiles = createParityMap(); parityBlocks = createParityMap(); parityBytes = createParityMap(); parityLogical = createParityMap(); for (Codec codec : Codec.getCodecs()) { String code = codec.id; String head = (code + "_parity_").toLowerCase(); createParityMetrics(parityFiles, code, head + "files"); createParityMetrics(parityBlocks, code, head + "blocks"); createParityMetrics(parityBytes, code, head + "bytes"); createParityMetrics(parityLogical, code, head + "logical"); String savingName = ("saving_" + code).toLowerCase(); savingForCode.put(code, new MetricsLongValue(savingName, registry)); } } private void createParityMetrics( Map<String, MetricsLongValue> m, String code, String name) { m.put(code, new MetricsLongValue(name, registry)); } private Map<String, MetricsLongValue> createParityMap() { Map<String, MetricsLongValue> m = new HashMap<String, MetricsLongValue>(); for (Codec codec : Codec.getCodecs()) { m.put(codec.id, null); } return m; } public MetricsRegistry getMetricsRegistry() { return this.registry; } @Override public void doUpdates(MetricsContext context) { synchronized (this) { for (MetricsBase m : registry.getMetricsList()) { m.pushMetric(metricsRecord); } } metricsRecord.update(); } }