/************************************************************************* * Copyright 2009-2012 Eucalyptus Systems, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. * * Please contact Eucalyptus Systems, Inc., 6755 Hollister Ave., Goleta * CA 93117, USA or visit http://www.eucalyptus.com/licenses/ if you need * additional information or have any questions. ************************************************************************/ package com.eucalyptus.troubleshooting.checker; import java.io.File; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import com.eucalyptus.records.Logs; import org.apache.log4j.Logger; import com.eucalyptus.component.ComponentId; import com.eucalyptus.component.ComponentIds; import com.eucalyptus.component.Faults; import com.eucalyptus.component.id.Eucalyptus; import com.eucalyptus.system.Threads; /** * <p> * DiskResourceCheck can be used by any eucalyptus component (walrus, SC, NC etc...) to perform periodic checks on disk space and warn the user when the system * runs low on space. This class provides a static method to {@link #start(Checker) start} the disk resource check for a particular location at a specified * interval. * </p> * <p> * {@link ScheduledExecutorService} is used for scheduling the disk space checks at configurable intervals. The thread pool size is limited to 1 * </p> * <p> * If the system is running low on disk space a fault is recorded in the log file for the specified component. Subsequent faults for the same location are not * logged until the state is reset for that location. A state reset occurs when the file location has enough free space * </p> */ public class DiskResourceCheck { private final static Logger LOG = Logger.getLogger(DiskResourceCheck.class); private static final ScheduledExecutorService pool = Executors.newSingleThreadScheduledExecutor( Threads.threadFactory( "ts-disk-check-pool-%d" ) ); private static final int OUT_OF_DISK_SPACE_FAULT_ID = 1003; private final static long DEFAULT_POLL_INTERVAL = 5 * 1000; private static final Class <? extends ComponentId> DEFAULT_COMPONENT_ID_CLASS = Eucalyptus.class; /** * Marking the constructor private on purpose, so that no code can instantiate an object this class */ private DiskResourceCheck() { } /** * <p> * Kicks off an infinite series of disk resource checks with a delay in between consecutive checks. {@link ScheduledExecutorService#scheduleWithFixedDelay * Executor service framework} is used for scheduling the worker thread, {@link Checker checker}, at regular intervals. The time delay, file location, logic * for disk space check and other configuration is provided by checker * </p> * * <p> * This method returns a {@link ScheduledFuture} object that can be used by the caller to cancel the execution. Thread execution can also be cancelled by * shutting down the executor service * </p> * * @param checker * @return ScheduledFuture */ public static ScheduledFuture<?> start(Checker checker) { return pool.scheduleWithFixedDelay(checker, 0, checker.pollInterval, TimeUnit.MILLISECONDS); } // Someone should be calling this, currently no one is. Might be a nice thing to say hello to in the service shutdown hooks. Although might complicate stuff // when multiple services using it public static void shutdown() { pool.shutdownNow(); } public static class LocationInfo { private File file; private Long minimumFreeSpace; private Double percentFreeSpace; public File getFile() { return file; } public Long getThreshold() { if (null != this.minimumFreeSpace) { return this.minimumFreeSpace; } else { return (long) (this.file.getTotalSpace() * this.percentFreeSpace / 100); } } /** * Constructor to be used when free space is an absolute quantity in bytes * * @param file * @param minimumFreeSpace */ public LocationInfo(File file, Long minimumFreeSpace) { super(); this.file = file; this.minimumFreeSpace = minimumFreeSpace; } /** * Constructor to be used when free space is a percentage of the total space available * * @param file * @param percentFreeSpace */ public LocationInfo(File file, Double percentFreeSpace) { super(); this.file = file; this.percentFreeSpace = percentFreeSpace; } // Added hashCode() and equals() since we do Set related operations @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((file == null) ? 0 : file.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; LocationInfo other = (LocationInfo) obj; if (file == null) { if (other.file != null) return false; } else if (!file.equals(other.file)) return false; return true; } } /** * Worker thread that holds the logic for disk space checks and all the relevant information required. An instance of this class is fed to * {@link ScheduledExecutorService#scheduleWithFixedDelay} method * */ public static class Checker implements Runnable { private Set<LocationInfo> locations = new HashSet<LocationInfo>(); private long pollInterval; private Class <? extends ComponentId> componentIdClass; private Set<LocationInfo> alreadyFaulted = new HashSet<LocationInfo>(); public Checker(LocationInfo locationInfo) { this.locations.add(locationInfo); this.pollInterval = DEFAULT_POLL_INTERVAL; this.componentIdClass = DEFAULT_COMPONENT_ID_CLASS; } public Checker(LocationInfo locationInfo, Class <? extends ComponentId> componentIdClass, long pollTime) { this.locations.add(locationInfo); this.componentIdClass = componentIdClass; this.pollInterval = pollTime; } public Checker(List<LocationInfo> locations, Class <? extends ComponentId> componentIdClass, long pollTime) { this.locations.addAll(locations); this.componentIdClass = componentIdClass; this.pollInterval = pollTime; } @Override public void run() { if (null != locations) { for (LocationInfo location : this.locations) { Logs.extreme().debug("Polling disk " + location.getFile() + ", pollInterval=" + pollInterval + ", threshold = " + location.getThreshold()); // Enclose everything between try catch because nothing should throw an exception to the executor upstream or it may halt subsequent tasks try { long usableSpace = location.getFile().getUsableSpace(); if (usableSpace < location.getThreshold()) { if (!this.alreadyFaulted.contains(location)) { Faults.forComponent(this.componentIdClass).havingId(OUT_OF_DISK_SPACE_FAULT_ID) .withVar("component", ComponentIds.lookup(this.componentIdClass).getFaultLogPrefix()).withVar("file", location.getFile().getAbsolutePath()).log(); this.alreadyFaulted.add(location); } else { // fault has already been logged. do nothing } } else { // Remove this location from the already faulted set. If the location is not in the set, this call will simply return false. no harm // done. another if condition is just one unnecessary step this.alreadyFaulted.remove(location); } } catch (Exception ex) { // what to do when an exception is caught? should we remove the location off the list? LOG.error("Disk resource check failed for " + location.getFile().getAbsolutePath(), ex); } } } else { // nothing to check } } } }