package org.apache.solr.analysis;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.solr.util.DateMathParser;
public final class DateNormalizerTokenFilter extends TokenFilter {
private SimpleDateFormat[] format;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private DateMathParser dmp;
private String offset;
private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US);
public DateNormalizerTokenFilter(TokenStream input, String incomingFormat, String offset) {
super(input);
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
this.offset = offset;
String[] parts = incomingFormat.split("\\|");
format = new SimpleDateFormat[parts.length];
for (int i=0;i<parts.length;i++) {
format[i] = new SimpleDateFormat(parts[i], Locale.US);
format[i].setTimeZone(TimeZone.getTimeZone("UTC"));
}
dmp = new DateMathParser(TimeZone.getTimeZone("UTC"));
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) {
return false;
}
CharSequence v = normalize(termAtt.toString());
termAtt.setEmpty().append(v);
return true;
}
private CharSequence normalize(String string) {
boolean normalDate = true;
if (string.contains("00")) {
string = string.replace("-00", "");
normalDate = false;
}
if (string.length()<10) {
normalDate = false;
}
for (SimpleDateFormat f: this.format) {
try {
Date date = f.parse(string);
dmp.setNow(date);
if (normalDate) { // move the docs with the date specified 30 min into the future (so that unspecified
// dates will all cluster in the first x minutes of the day)
date = dmp.parseMath(this.offset);
}
//else {
//date = dmp.parseMath("+5MINUTES"); // 00-00 dates are 1 minute after midnight
//}
return sdf.format(date);
} catch (ParseException e) {
//pass
}
}
return "0000-00-00T00:00:00Z"; // error parsing input data
}
}