/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.datanode; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.datanode.FSDataset.FSVolume; import org.apache.hadoop.hdfs.server.datanode.FSDataset.FSVolumeSet; import org.apache.hadoop.hdfs.server.datanode.metrics.DatanodeThreadLivenessReporter.BackgroundThread; /** * The class to periodically flush Block CRC infomation to disk * * It is supposed to run as a demon. The demon wakes up periodically and flush * CRC information volume by volume. Here is the procedure: * * Loop for every known namespace, for every namespace: * * 1. find all volumes for the namespace. Only include the volumes whose block * CRC has been initially loaded from disk (either successfully or failed); * 2. find block -> CRC mapping for all those volumes, sorted by volumes and buckets * 3. for every volume, flush the CRC information to the CRC file for the volume. * * Potentially, memory consumption can be reduced, by removing the temp map but * streaming the data from block map all the way to files. However, the codes * are tricky to write not to normal block read/write operations. So far we * don't see memory pressure of data node so we go with a simpler approach. * */ class BlockCrcMapFlusher implements Runnable { public static final Log LOG = LogFactory.getLog(BlockCrcMapFlusher.class); final private DataNode datanode; final private VolumeMap volumeMap; final private FSVolumeSet fsVolumeSet; final private long flushInterval; long lastFlushed; private volatile boolean isClosed = false; BlockCrcMapFlusher(DataNode datanode, VolumeMap volumeMap, FSVolumeSet fsVolumeSet, long flushInterval) { if (datanode == null) { throw new NullPointerException(); } this.datanode = datanode; this.volumeMap = volumeMap; this.fsVolumeSet = fsVolumeSet; this.flushInterval = flushInterval; this.lastFlushed = System.currentTimeMillis(); } public void setClose() { this.isClosed = true; } @Override public void run() { try { while (!isClosed) { datanode .updateAndReportThreadLiveness(BackgroundThread.BLOCK_CRC_FLUSHER); long timeNow = System.currentTimeMillis(); if (lastFlushed + flushInterval <= timeNow) { try { Integer[] namespaces = volumeMap.getNamespaceList(); // Process every namespace. for (Integer ns : namespaces) { NamespaceMap nsm = volumeMap.getNamespaceMap(ns); if (nsm == null) { LOG.info("Cannot find namespace map for namespace " + ns + ". It's probably deleted."); continue; } FSVolume[] fsVolumes = fsVolumeSet.getVolumes(); List<FSVolume> volumesToFlush = new ArrayList<FSVolume>(); // Only pick those volumes whose on disk block CRC has been // tried to be loaded to make sure at least the file will // contain all the information from the previous on disk file. for (FSVolume volume : fsVolumes) { if (volume.isNamespaceBlockCrcLoaded(ns)) { volumesToFlush.add(volume); } else { LOG.info("Block CRC file for Volume " + volume + " for namespece " + ns + " is not loaded yet. Skip flushing..."); } } Map<FSVolume, List<Map<Block, DatanodeBlockInfo>>> map = nsm .getBlockCrcPerVolume(volumesToFlush); if (map == null) { continue; } for (Map.Entry<FSVolume, List<Map<Block, DatanodeBlockInfo>>> entry : map .entrySet()) { // For every volume of the namespace, we write the information to disk. if (entry.getValue().size() == 0) { continue; } datanode .updateAndReportThreadLiveness(BackgroundThread.BLOCK_CRC_FLUSHER); FSVolume volume = entry.getKey(); File crcFile = volume.getBlockCrcFile(ns); File crcTmpFile = volume.getBlockCrcTmpFile(ns); if (crcFile == null || crcTmpFile == null) { LOG.warn("Cannot find CRC file to flush for namespace " + ns); } crcTmpFile.delete(); if (crcTmpFile.exists()) { LOG.warn("Not able to delete file " + crcTmpFile.getAbsolutePath() + ". skip the volume"); continue; } try { List<Map<Block, DatanodeBlockInfo>> mbds = entry.getValue(); DataOutputStream dos = new DataOutputStream( new FileOutputStream(crcTmpFile)); try { writeToCrcFile(mbds, dos); } finally { dos.close(); } crcFile.delete(); crcTmpFile.renameTo(crcFile); LOG.info("Flushed Block CRC file for Volume " + volume + " for namespece " + ns + "."); } catch (InterruptedIOException e) { LOG.info("InterruptedIOException ", e); return; } catch (IOException e) { LOG.warn("flushing namespace " + ns + " volume " + volume + " failed.", e); } } } } finally { lastFlushed = timeNow; } } else { // Wait until the flush interval. long sleepTimeLeft = lastFlushed + flushInterval - timeNow; while (sleepTimeLeft > 0) { long sleepInteval = 1000; long timeToSleep = Math.min(sleepInteval, sleepTimeLeft); Thread.sleep(timeToSleep); datanode .updateAndReportThreadLiveness(BackgroundThread.BLOCK_CRC_FLUSHER); sleepTimeLeft -= timeToSleep; } } } } catch (InterruptedException e) { LOG.info("BlockCrcMapFlusher interrupted"); } finally { LOG.info("BlockCrcMapFlusher exiting..."); } } static void writeToCrcFile(List<Map<Block, DatanodeBlockInfo>> mbds, DataOutput dataOut) throws IOException { BlockCrcFileWriter writer = new BlockCrcFileWriter(dataOut, BlockCrcInfoWritable.LATEST_BLOCK_CRC_FILE_VERSION, mbds.size()); writer.writeHeader(); for (Map<Block, DatanodeBlockInfo> mbd : mbds) { writer.writeBucket(mbd); } writer.checkFinish(); } }