/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.RemoteIterator;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.IOException;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* Used to expire files in cache that hasn't been accessed for a while
*/
public class ExpireUnusedJobFiles extends Thread{
/** Logger. */
private static final Log LOG =
LogFactory.getLog(ExpireUnusedJobFiles.class);
/** Clock. */
private final Clock clock;
/** The directory to clean. */
private final Path dirToClean;
/** The filesystem to use. */
private final Configuration conf;
/** clean threshold in milliseconds. */
private final long cleanThreshold;
/** pattern to match for the files to be deleted */
private final Pattern fileToCleanPattern;
private long cleanInterval;
/**
* Constructor.
* @param clock The clock.
* @param dirToClean The directory to be cleaned
* @param fs The filesystem.
* @param fileToCleanPattern the pattern for the filename
* @param cleanThreshold the time to clean the dir
* @param cleanInterval the interval to clean the dir
*/
public ExpireUnusedJobFiles(
Clock clock, Configuration conf,
Path dirToClean, Pattern fileToCleanPattern,
long cleanThreshold, long cleanInterval) {
this(clock, conf, dirToClean, fileToCleanPattern, cleanThreshold);
this.cleanInterval = cleanInterval;
setDaemon(true);
LOG.info("ExpireUnusedJobFiles created with " +
" path = " + dirToClean +
" cleanInterval = " + cleanInterval +
" cleanThreshold = " + cleanThreshold);
}
/**
* Constructor.
* @param clock The clock.
* @param dirToClean The directory to be cleaned
* @param fs The filesystem.
* @param fileToCleanPattern the pattern for the filename
* @param cleanThreshold the time to clean the dir
*/
public ExpireUnusedJobFiles(
Clock clock, Configuration conf,
Path dirToClean, Pattern fileToCleanPattern,
long cleanThreshold) {
this.clock = clock;
this.conf = conf;
this.dirToClean = dirToClean;
this.fileToCleanPattern = fileToCleanPattern;
this.cleanThreshold = cleanThreshold;
this.cleanInterval = 0;
}
@Override
public void run() {
while (true) {
long currentTime = clock.getTime();
try {
LOG.info(Thread.currentThread().getId() + ":Trying to clean " + dirToClean);
FileSystem fs = dirToClean.getFileSystem(conf);
if (!fs.exists(dirToClean)) {
LOG.info(dirToClean + " doesn't exist");
return;
}
RemoteIterator<LocatedFileStatus> itor;
for( itor = fs.listLocatedStatus(dirToClean); itor.hasNext();) {
LocatedFileStatus dirStat = itor.next();
// Check if this is a directory matching the pattern
if (!dirStat.isDir()) {
continue;
}
Path subDirPath = dirStat.getPath();
String dirname = subDirPath.toUri().getPath();
Matcher m = fileToCleanPattern.matcher(dirname);
if (m.find()) {
if (currentTime - dirStat.getModificationTime() > cleanThreshold) {
// recursively delete all the files/dirs
LOG.info("Delete " + subDirPath);
fs.delete(subDirPath, true);
}
}
}
} catch (IOException ioe) {
LOG.error("IOException when clearing dir ", ioe);
}
if (cleanInterval == 0) {
return;
}
try {
Thread.sleep(cleanInterval);
} catch (InterruptedException e) {
}
}
}
public static void main(String[] args) throws IOException {
if (args.length < 3) {
System.err.println("Usage: " + ExpireUnusedJobFiles.class + " path pattern thresholdsec");
System.exit(1);
}
Configuration conf = new Configuration();
Path dir = new Path(args[0]);
Pattern p = Pattern.compile(args[1]);
long clearThreshold = Integer.parseInt(args[2]) * 1000L;
ExpireUnusedJobFiles expire = new ExpireUnusedJobFiles(new Clock(), conf, dir, p , clearThreshold);
expire.run();
}
}