/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.task.map;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import com.addthis.basis.util.LessStrings;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DataPurgeServiceImpl implements DataPurgeService {
private static final Logger logger = LoggerFactory.getLogger(DataPurgeServiceImpl.class);
private static final String dirSeperator = File.separator;
private static final String dirRegexSeperator = File.separator.equals("\\") ? "\\\\" : File.separator;
public DataPurgeServiceImpl() {
}
@Override
public boolean purgeData(DataPurgeConfig dataPurgeConfig, DateTime currentTime) {
if (!validatePurgeParameters(dataPurgeConfig, currentTime)) {
return false;
}
DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern(dataPurgeConfig.getDatePathFormat());
DateTime oldestDataAllowed;
if (dataPurgeConfig.getMaxAgeInDays() > 0) {
oldestDataAllowed = currentTime.plusDays(-dataPurgeConfig.getMaxAgeInDays());
} else {
oldestDataAllowed = currentTime.plusHours(-dataPurgeConfig.getMaxAgeInHours());
}
logger.debug("Oldest data allowed {} , current time is {}", new Object[]{oldestDataAllowed, currentTime});
for (String directoryPrefix : dataPurgeConfig.getDirectoryPrefix()) {
for (File prefixDirectory : expandPrefix(directoryPrefix)) {
List<File> subdirectories = getSubdirectoryList(prefixDirectory, null);
for (File subdirectory : subdirectories) {
logger.trace("Considering directory {} for purge", subdirectory);
safeDelete(prefixDirectory.getPath(), dateTimeFormatter, oldestDataAllowed, subdirectory, dataPurgeConfig.isFileBasedPurge(), dataPurgeConfig.getDateStartIndex(),
dataPurgeConfig.getDateStringLength());
}
if (dataPurgeConfig.getCleanEmptyParents()) {
for (File directory : subdirectories) {
if (directory.list() != null && directory.list().length == 0) {
try {
FileUtils.deleteDirectory(directory);
} catch (IOException e) {
logger.warn("Failed to delete empty directory {}", directory);
}
}
}
}
}
}
return true;
}
protected List<File> generateDirectoryList(String prefixDirectory) {
List<File> directoryList = new LinkedList<>();
for (File directory : expandPrefix(prefixDirectory)) {
logger.trace("prefix expanded {} to {}", prefixDirectory, directory);
getSubdirectoryList(directory, directoryList);
}
return directoryList;
}
protected void safeDelete(String directoryPrefix, DateTimeFormatter dateTimeFormatter, DateTime oldestDataAllowed, File directory, boolean fileBasedPurge, int dateStartIndex, int dateStringLength) {
String dateString;
if (fileBasedPurge) {
File[] fileList = directory.listFiles(new FileFilter() {
@Override
public boolean accept(File file) {
return file.isFile();
}
});
if (fileList != null && fileList.length > 0) {
for (File file : fileList) {
String fileName = file.getName();
dateString = fileName.substring(dateStartIndex, dateStringLength + dateStartIndex);
if (shouldDelete(dateTimeFormatter, oldestDataAllowed, dateString)) {
delete(file);
}
}
}
} else {
String directoryStr = directory.getPath().replace(directoryPrefix, "");
if (directoryStr.startsWith(dirSeperator)) {
directoryStr = directoryStr.substring(1);
}
if (shouldDelete(dateTimeFormatter, oldestDataAllowed, directoryStr)) {
delete(directory);
}
}
}
private void delete(File file) {
String name;
try {
name = file.getCanonicalPath();
} catch (IOException e) {
name = "(unk-path)" + file.getName();
}
logger.debug("Deleting: " + name);
try {
if (file.isDirectory()) {
FileUtils.deleteDirectory(file);
} else {
FileUtils.deleteQuietly(file);
}
} catch (IOException e) {
logger.error("error purging : " + file, e);
}
}
protected boolean shouldDelete(DateTimeFormatter dateTimeFormatter, DateTime oldestDataAllowed, String dateString) {
boolean result = false;
DateTime time = null;
try {
time = dateTimeFormatter.parseDateTime(dateString);
} catch (Exception e) {
// ignore this directory
}
if (time != null && time.isBefore(oldestDataAllowed)) {
result = true;
}
return result;
}
protected List<File> expandPrefix(String path) {
if (path.indexOf('*') == -1) {
LinkedList<File> list = new LinkedList<>();
list.add(new File(path));
return list;
}
File cur = path.startsWith(dirSeperator) ? new File(dirSeperator) : new File(".");
LinkedList<File> list = new LinkedList<>();
String[] tokens = LessStrings.splitArray(path, dirRegexSeperator);
expandPrefix(list, cur, tokens, 0);
return list;
}
protected void expandPrefix(List<File> list, File cur, String[] tokens, int index) {
if (index == tokens.length) {
if (cur.isDirectory() && cur.exists()) {
list.add(cur);
}
return;
}
String tok = tokens[index];
if (tok.indexOf('*') >= 0) {
FileFilter fileFilter = new WildcardFileFilter(tok);
File[] find = cur.listFiles(fileFilter);
if (find != null) {
for (File found : find) {
if (found.isDirectory()) {
expandPrefix(list, found, tokens, index + 1);
}
}
}
} else {
expandPrefix(list, new File(cur, tok), tokens, index + 1);
}
}
/**
* recursively add subdirectories into the directoryList
*/
protected List<File> getSubdirectoryList(File current, List<File> directoryList) {
if (directoryList == null) {
directoryList = new ArrayList<>();
}
directoryList.add(current);
if (current.isDirectory()) {
File[] fileArray = current.listFiles(new FileFilter() {
@Override
public boolean accept(File file) {
return file.isDirectory();
}
});
if (fileArray != null) {
for (File directory : fileArray) {
getSubdirectoryList(directory, directoryList);
}
}
}
return directoryList;
}
private boolean validatePurgeParameters(DataPurgeConfig dataPurgeConfig, DateTime currentTime) {
if (dataPurgeConfig.getDirectoryPrefix() == null || dataPurgeConfig.getDirectoryPrefix().length == 0) {
logger.error("Directory prefix can not be null or blank");
return false;
}
if (dataPurgeConfig.getDatePathFormat() == null || dataPurgeConfig.getDatePathFormat().isEmpty()) {
logger.error("Date path format can not be null or blank");
return false;
}
if (currentTime == null) {
logger.error("Current time can not be null");
return false;
}
if (dataPurgeConfig.getMaxAgeInDays() <= 0 && dataPurgeConfig.getMaxAgeInHours() <= 0) {
logger.error("max age must be > 0");
return false;
}
if (dataPurgeConfig.isFileBasedPurge() && dataPurgeConfig.getDateStartIndex() < 0) {
logger.error("File based purges require the dataStartIndex to be set");
return false;
}
if (dataPurgeConfig.getDateStartIndex() >= 0 && dataPurgeConfig.getDateStringLength() < 0) {
logger.error("Date start index was set but date string length was not defined");
return false;
}
return true;
}
}