package org.myrobotlab.document.transformer; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.myrobotlab.document.Document; import org.myrobotlab.logging.LoggerFactory; import org.slf4j.Logger; /** * This stage will take the values in the inputField and attempt to parse them * into a date object based on the formatString. The successfully parsed values * will be stored in the outputField. The values will overwrite the outputField * values. * * @author kwatters * */ public class ParseDate extends AbstractStage { public final static Logger log = LoggerFactory.getLogger(ParseDate.class.getCanonicalName()); private String inputField = null; private String outputField = "date"; private List<String> formatStrings; private List<SimpleDateFormat> sdfs = null; @Override public void startStage(StageConfiguration config) { // TODO Auto-generated method stub if (config != null) { inputField = config.getProperty("inputField"); outputField = config.getProperty("outputField", "date"); formatStrings = config.getListParam("formatStrings"); } // compile the date string parsers. sdfs = new ArrayList<SimpleDateFormat>(); for (String formatString : formatStrings) { SimpleDateFormat sdf = new SimpleDateFormat(formatString); sdfs.add(sdf); } } @Override public List<Document> processDocument(Document doc) { if (!doc.hasField(inputField)) { return null; } ArrayList<Date> dates = new ArrayList<Date>(); for (Object val : doc.getField(inputField)) { if (val instanceof String) { boolean parsed = false; for (SimpleDateFormat sdf : sdfs) { try { Date d = sdf.parse(val.toString()); dates.add(d); parsed = true; // we found a match break; } catch (ParseException e) { // log.warn("Unparsable date string doc id: {} value: {}", // doc.getId(), val); // e.printStackTrace(); } } if (!parsed) { log.warn("Doc ID : {} Did not parse date string: {}", doc.getId(), val); } } } // TODO: configure input/output overwrite vs append mode. if (inputField.equals(outputField)) { doc.removeField(outputField); } for (Date d : dates) { doc.addToField(outputField, d); } return null; } @Override public void stopStage() { // NOOP } @Override public void flush() { // NOOP } }