package org.archive.hadoop.pig;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.archive.util.ArchiveUtils;
public class DateFilter extends FirstPigJobOnlyFilter implements PathFilter
{
protected final static Log LOGGER = LogFactory.getLog(DateFilter.class);
protected Date date1;
protected Date date2;
protected String paramString = null;
protected String extFilter = null;
protected PrintWriter fileWriter = null;
protected FileSystem fs = null;
protected DateFilter.CompareOp op1 = null;
protected DateFilter.CompareOp op2 = null;
public final static String DATE_FILTER_PARAM = "org.archive.pig.filter.date";
public final static String EXT_FILTER_PARAM = "org.archive.pig.filter.ext";
public final static String DATE_OUTPUT_LOG = "org.archive.pig.filter.date.logfile";
public final static String MTIME_VAR = "mtime";
static enum CompareOp
{
EQ,
LT,
GT,
GTEQ,
LTEQ,
}
private boolean compare(DateFilter.CompareOp op, long A, long B) {
switch (op) {
case EQ:
return (A == B);
case LT:
return (A < B);
case GT:
return (A > B);
case GTEQ:
return (A >= B);
case LTEQ:
return (A <= B);
default:
return (A > B);
}
}
private boolean dirSkipOp(DateFilter.CompareOp theOp, boolean second)
{
switch (theOp) {
case LT:
return (second ? false : true);
case LTEQ:
return (second ? false : true);
case GT:
return (second ? true : false);
case GTEQ:
return (second ? true : false);
}
return false;
}
protected DateFilter.CompareOp flipOp(CompareOp theOp)
{
switch (theOp) {
case LT:
return CompareOp.GT;
case GT:
return CompareOp.LT;
case GTEQ:
return CompareOp.LTEQ;
case LTEQ:
return CompareOp.GTEQ;
}
return CompareOp.EQ;
}
protected DateFilter.CompareOp parseOp(String op)
{
if (op.equals("=")) {
return CompareOp.EQ;
}
if (op.equals("<")) {
return CompareOp.LT;
}
if (op.equals(">")) {
return CompareOp.GT;
}
if (op.equals(">=")) {
return CompareOp.GTEQ;
}
if (op.equals("<=")) {
return CompareOp.LTEQ;
}
throw new IllegalArgumentException("Illegal comparison op: " + op);
}
@Override
public void setConfWhenEnabled(Configuration conf) {
if (conf == null) {
return;
}
init(conf.get(DATE_FILTER_PARAM), conf);
extFilter = conf.get(EXT_FILTER_PARAM, "");
}
public void init(String paramString, Configuration conf)
{
this.paramString = paramString;
if (this.paramString == null || this.paramString.isEmpty()) {
return;
}
String[] params = paramString.split("\\s+");
String dateStr1 = null;
String dateStr2 = null;
// < DATE1
if (params.length == 2) {
op1 = parseOp(params[0]);
dateStr1 = params[1];
} else if (params.length == 3) {
if (params[0].equals(MTIME_VAR)) {
// $mtype < DATE1
dateStr1 = params[2];
op1 = parseOp(params[1]);
} else if (params[2].equals(MTIME_VAR)) {
// DATE2 > $mtype
dateStr2 = params[0];
op2 = parseOp(params[1]);
} else {
throw new IllegalArgumentException("Must Specify mtime as param: X < mtime or mtime > X");
}
} else if (params.length == 5) {
// DATE2 > $mtype
dateStr2 = params[0];
op2 = parseOp(params[1]);
if (!params[2].equals(MTIME_VAR)) {
throw new IllegalArgumentException("Must Specify mtime as param: X < mtime < Y");
}
// $mtype < DATE1
op1 = parseOp(params[3]);
dateStr1 = params[4];
} else {
throw new IllegalArgumentException("Must use form: OP X, X OP mtime, mtime OP X, X OP mtime OP2 Y, where OP is one of <, >, <=, >=, =");
}
try {
this.fs = FileSystem.get(conf);
if (dateStr1 != null) {
date1 = parseDateForParam(dateStr1);
writeLog("Date 1: " + date1.toString());
}
if (dateStr2 != null) {
date2 = parseDateForParam(dateStr2);
writeLog("Date 2: " + date2.toString());
}
} catch (Exception e) {
e.printStackTrace();
}
// Date Output Log
String fileLog = conf.get(DATE_OUTPUT_LOG);
if (fileLog != null) {
try {
fileWriter = new PrintWriter(new File(fileLog));
} catch (IOException io) {
LOGGER.warn(io.toString());
}
}
}
protected Date parseDateForParam(String dateParam) throws IOException
{
// Try default format
try {
return new SimpleDateFormat().parse(dateParam);
} catch (ParseException pe) {
}
// Then ArchiveUtils
try {
return ArchiveUtils.getDate(dateParam);
} catch (ParseException pe) {
}
// Then file path
Path datePath = new Path(dateParam);
FileStatus status = fs.getFileStatus(datePath);
return new Date(status.getModificationTime());
}
public boolean acceptWhenEnabled(Path path) {
try {
FileStatus status = fs.getFileStatus(path);
boolean isDir = status.isDir();
// if (isDir) {
// return true;
// }
if ((extFilter != null) && !extFilter.isEmpty() && !isDir) {
if (!path.getName().endsWith(extFilter)) {
return false;
}
}
long mtime = status.getModificationTime();
// DATE2 > $mtype
if (date2 != null) {
if (!(isDir && dirSkipOp(op2, true)) && !compare(op2, date2.getTime(), mtime)) {
//if (!compare(op2, date2.getTime(), mtime)) {
return false;
}
}
// $mtype < DATE1
if (date1 != null) {
if (!(isDir && dirSkipOp(op1, false)) && !compare(op1, mtime, date1.getTime())) {
//if (!compare(op1, mtime, date1.getTime())) {
return false;
}
}
String msg = path.getName() + " (" + new Date(mtime).toString() + ")";
if (isDir) {
LOGGER.info(msg);
} else {
writeLog(msg);
}
return true;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
private void writeLog(String string) {
LOGGER.info(string);
if (fileWriter != null) {
fileWriter.println(string);
fileWriter.flush();
}
}
public String toString()
{
return paramString;
}
public void close()
{
if (fileWriter != null) {
fileWriter.close();
}
}
}