/*
* chombo: Hadoop Map Reduce utility
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.chombo.transformer;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import org.chombo.util.BasicUtils;
import org.chombo.util.ProcessorAttribute;
import org.chombo.util.Utility;
import com.typesafe.config.Config;
/**
* @author pranab
*
*/
public class DateTransformer {
/**
* @author pranab
*
*/
public static class EpochTimeGenerator extends AttributeTransformer {
/**
* @param prAttr
*/
public EpochTimeGenerator(ProcessorAttribute prAttr) {
super(prAttr.getTargetFieldOrdinals().length);
}
public EpochTimeGenerator() {
super(1);
}
@Override
public String[] tranform(String value) {
transformed[0] = "" + System.currentTimeMillis();
return transformed;
}
}
/**
* @author pranab
*
*/
public static class DateGenerator extends AttributeTransformer {
private SimpleDateFormat dateFormat;
/**
* @param prAttr
*/
public DateGenerator(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
dateFormat = new SimpleDateFormat(config.getString("dateFormat"));
}
public DateGenerator(String dateFormat) {
super(1);
this.dateFormat = new SimpleDateFormat(dateFormat);
}
@Override
public String[] tranform(String value) {
transformed[0] = dateFormat.format(Calendar.getInstance().getTime());
return transformed;
}
}
/**
* @author pranab
*
*/
public static class DateFormatTransformer extends AttributeTransformer {
private SimpleDateFormat sourceDateFormat;
private SimpleDateFormat targetDateFormat;
private boolean sourceEpochTime;
private boolean targetEpochTime;
/**
* @param prAttr
* @param config
*/
public DateFormatTransformer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
intialize(config.getString("sourceDateFormat"), config.getString("sourceTimeZone"), config.getString("targetDateFormat"),
config.getString("targetTimeZone"));
}
/**
* @param sourceDateFormat
* @param sourceTimeZone
* @param targetDateFormat
* @param targetTimeZone
*/
public DateFormatTransformer(String sourceDateFormat, String sourceTimeZone, String targetDateFormat, String targetTimeZone) {
super(1);
intialize(sourceDateFormat, sourceTimeZone, targetDateFormat, targetTimeZone);
}
/**
* @param sourceDateFormatStr
* @param sourceTimeZone
* @param targetDateFormatStr
* @param targetTimeZone
*/
private void intialize(String sourceDateFormatStr, String sourceTimeZone, String targetDateFormatStr, String targetTimeZone) {
if (sourceDateFormatStr.equals("epochTime")) {
sourceEpochTime = true;
} else {
sourceDateFormat = new SimpleDateFormat(sourceDateFormatStr);
if (!Utility.isBlank(sourceTimeZone)) {
sourceDateFormat.setTimeZone(TimeZone.getTimeZone(sourceTimeZone));
}
}
if (targetDateFormatStr.equals("epochTime")) {
targetEpochTime = true;
} else {
targetDateFormat = new SimpleDateFormat(targetDateFormatStr);
if (!Utility.isBlank(targetTimeZone)) {
targetDateFormat.setTimeZone(TimeZone.getTimeZone(targetTimeZone));
}
}
}
@Override
public String[] tranform(String value) {
try {
Date date = null;
if (null != sourceDateFormat) {
//date format
date = sourceDateFormat.parse(value);
} else {
//epoch time
date = new Date(Long.parseLong(value));
}
if (null != targetDateFormat) {
transformed[0] = targetDateFormat.format(date);
} else {
transformed[0] = "" + date.getTime();
}
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
return transformed;
}
}
/**
* @author pranab
*
*/
public static class ElapsedTimeTransformer extends AttributeTransformer {
private SimpleDateFormat dateFormat;
private boolean epochTime;
private long refTime;
private String timeUnit;
private boolean failOnInvalid;
/**
* @param prAttr
* @param config
*/
public ElapsedTimeTransformer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
String refDateStr = config.hasPath("refDateStr")? config.getString("refDateStr") : null;
intialize(config.getString("dateFormat"), config.getString("timeZone"),
config.getString("timeUnit"), config.getBoolean("failOnInvalid"), refDateStr);
}
/**
* @param dateFormat
* @param timeZone
* @param failOnInvalid
*/
public ElapsedTimeTransformer(String dateFormat, String timeZone, String timeUnit, boolean failOnInvalid,
String refDateStr) {
super(1);
intialize(dateFormat, timeZone, timeUnit, failOnInvalid, refDateStr);
}
/**
* @param dateFormatStr
* @param timeZone
* @param failOnInvalid
*/
private void intialize(String dateFormatStr, String timeZone, String timeUnit, boolean failOnInvalid,
String refDateStr) {
try {
if (dateFormatStr.equals("epochTime")) {
epochTime = true;
} else {
dateFormat = new SimpleDateFormat(dateFormatStr);
if (!Utility.isBlank(timeZone)) {
dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone));
}
}
//set reference time
if (null != refDateStr) {
if (epochTime) {
refTime = Long.parseLong(refDateStr);
} else {
Date refDate = dateFormat.parse(refDateStr);
refTime = refDate.getTime();
}
} else {
refTime = System.currentTimeMillis();
}
this.timeUnit = timeUnit;
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
}
@Override
public String[] tranform(String value) {
long elapsed = 0;
long time = 0;
try {
Date date = null;
if (null != dateFormat) {
//date format
date = dateFormat.parse(value);
time = date.getTime();
} else {
//epoch time
time = Long.parseLong(value);
}
if (time > refTime) {
elapsed = time - refTime;
elapsed = BasicUtils.convertTimeUnit(elapsed, timeUnit);
transformed[0] = "" + elapsed;
} else {
if (failOnInvalid) {
throw new IllegalArgumentException("date in future");
} else {
transformed[0] = "0";
}
}
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
return transformed;
}
}
/**
* @author pranab
*
*/
public static class ContextualElapsedTimeTransformer extends AttributeTransformer implements ContextAwareTransformer {
private String[] fields;
private boolean epochTime;
private long refTime;
private String timeUnit;
private boolean failOnInvalid;
private SimpleDateFormat dateFormat;
private int refDateFieldOrdinal;
/**
* @param prAttr
* @param config
*/
public ContextualElapsedTimeTransformer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
refDateFieldOrdinal = config.getInt("refDateFieldOrdinal");
intialize(config.getString("dateFormat"), config.getString("timeZone"),
config.getString("timeUnit"), config.getBoolean("failOnInvalid"));
}
/**
* @param dateFormat
* @param timeZone
* @param timeUnit
* @param failOnInvalid
*/
public ContextualElapsedTimeTransformer(String dateFormat, String timeZone, String timeUnit, boolean failOnInvalid) {
super(1);
intialize(dateFormat, timeZone, timeUnit, failOnInvalid);
}
/**
* @param dateFormatStr
* @param timeZone
* @param timeUnit
* @param failOnInvalid
*/
private void intialize(String dateFormatStr, String timeZone, String timeUnit, boolean failOnInvalid) {
if (dateFormatStr.equals("epochTime")) {
epochTime = true;
} else {
dateFormat = new SimpleDateFormat(dateFormatStr);
if (!Utility.isBlank(timeZone)) {
dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone));
}
}
this.timeUnit = timeUnit;
}
@Override
public void setContext(Map<String, Object> context) {
fields = (String[])context.get("record");
}
@Override
public String[] tranform(String value) {
long elapsed = 0;
long time = 0;
try {
//reference date time
String refDateStr = fields[refDateFieldOrdinal];
if (epochTime) {
refTime = Long.parseLong(refDateStr);
} else {
Date refDate = dateFormat.parse(refDateStr);
refTime = refDate.getTime();
}
Date date = null;
if (null != dateFormat) {
//date format
date = dateFormat.parse(value);
time = date.getTime();
} else {
//epoch time
time = Long.parseLong(value);
}
if (time > refTime) {
elapsed = time - refTime;
elapsed = BasicUtils.convertTimeUnit(elapsed, timeUnit);
transformed[0] = "" + elapsed;
} else {
if (failOnInvalid) {
throw new IllegalArgumentException("date in future");
} else {
transformed[0] = "0";
}
}
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
return transformed;
}
}
/**
* @author pranab
*
*/
public static class TimeCyclicShiftTransformer extends AttributeTransformer {
private SimpleDateFormat dateFormat;
private boolean epochTime;
private long refTime;
private String timeUnit;
private boolean failOnInvalid;
/**
* @param prAttr
* @param config
*/
public TimeCyclicShiftTransformer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
String refDateStr = config.hasPath("refDateStr")? config.getString("refDateStr") : null;
intialize(config.getString("dateFormat"), config.getString("timeZone"),
config.getString("timeUnit"), config.getBoolean("failOnInvalid"), refDateStr);
}
/**
* @param dateFormat
* @param timeZone
* @param failOnInvalid
*/
public TimeCyclicShiftTransformer(String dateFormat, String timeZone, String timeUnit, boolean failOnInvalid,
String refDateStr) {
super(1);
intialize(dateFormat, timeZone, timeUnit, failOnInvalid, refDateStr);
}
/**
* @param dateFormatStr
* @param timeZone
* @param failOnInvalid
*/
private void intialize(String dateFormatStr, String timeZone, String timeUnit, boolean failOnInvalid,
String refDateStr) {
try {
dateFormat = new SimpleDateFormat(dateFormatStr);
if (!Utility.isBlank(timeZone)) {
dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone));
}
//set reference time
if (null != refDateStr) {
Date refDate = dateFormat.parse(refDateStr);
refTime = refDate.getTime();
} else {
refTime = System.currentTimeMillis();
}
this.timeUnit = timeUnit;
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
}
@Override
public String[] tranform(String value) {
try {
Calendar date = Calendar.getInstance();
date.setTime(dateFormat.parse(value));
for (long time = date.getTimeInMillis(); time < refTime; time = date.getTimeInMillis()) {
if (timeUnit.equals(BasicUtils.TIME_UNIT_MONTH)) {
date.add(Calendar.MONTH, 1);
} else if (timeUnit.equals(BasicUtils.TIME_UNIT_QUARTER)) {
date.add(Calendar.MONTH, 3);
} else if (timeUnit.equals(BasicUtils.TIME_UNIT_SEMI_ANNUAL)) {
date.add(Calendar.MONTH, 6);
} else if (timeUnit.equals(BasicUtils.TIME_UNIT_YEAR)) {
date.add(Calendar.YEAR, 1);
} else {
throw new IllegalStateException("invalid time cycle unit for time shift");
}
}
transformed[0] = dateFormat.format(date.getTime());
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
return transformed;
}
}
/**
* @author pranab
*
*/
public static class ContextualTimeCyclicShiftTransformer extends AttributeTransformer implements ContextAwareTransformer {
private SimpleDateFormat dateFormat;
private long refTime;
private String timeUnit;
private String[] fields;
private int timeUnitColOrd;
private int refDateColOrd;
/**
* @param prAttr
* @param config
*/
public ContextualTimeCyclicShiftTransformer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
int refDateColOrd = config.hasPath("refDateColOrd")? config.getInt("refDateColOrd") : -1;
String refDateStr = config.hasPath("refDateStr")? config.getString("refDateStr") : null;
int timeUnitColOrd = config.hasPath("timeUnitColOrd")? config.getInt("timeUnitColOrd") : -1;
String timeUnit = config.hasPath("timeUnit")? config.getString("timeUnit") : null;
intialize(config.getString("dateFormat"), config.getString("timeZone"),
timeUnitColOrd, timeUnit, config.getBoolean("failOnInvalid"), refDateColOrd, refDateStr);
}
/**
* @param dateFormat
* @param timeZone
* @param failOnInvalid
*/
public ContextualTimeCyclicShiftTransformer(String dateFormat, String timeZone, int timeUnitColOrd,
String timeUnit, boolean failOnInvalid, int refDateColOrd, String refDateStr) {
super(1);
intialize(dateFormat, timeZone, timeUnitColOrd, timeUnit, failOnInvalid, refDateColOrd, refDateStr);
}
/**
* @param dateFormatStr
* @param timeZone
* @param failOnInvalid
*/
private void intialize(String dateFormatStr, String timeZone, int timeUnitColOrd, String timeUnit,
boolean failOnInvalid, int refDateColOrd, String refDateStr) {
try {
dateFormat = new SimpleDateFormat(dateFormatStr);
if (!Utility.isBlank(timeZone)) {
dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone));
}
//set reference time
if (null != refDateStr) {
Date refDate = dateFormat.parse(refDateStr);
refTime = refDate.getTime();
}
this.timeUnit = timeUnit;
this.timeUnitColOrd = timeUnitColOrd;
this.refDateColOrd = refDateColOrd;
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
}
@Override
public String[] tranform(String value) {
try {
//reference date
long thisRefTime = 0;
if (refDateColOrd >= 0) {
Date refDate = dateFormat.parse(fields[refDateColOrd]);
thisRefTime = refDate.getTime();
} else if (refTime > 0) {
thisRefTime = refTime;
} else {
throw new IllegalStateException("either reference date column index or global ref date must be provided");
}
//time unit
String thisTimeUnit = null;
if (timeUnitColOrd >= 0) {
thisTimeUnit = fields[timeUnitColOrd];
} else if (null != timeUnit) {
thisTimeUnit = timeUnit;
} else {
throw new IllegalStateException("either cycle time unit column index or global cycle time unit must be provided");
}
//roll forward
Calendar date = Calendar.getInstance();
date.setTime(dateFormat.parse(value));
for (long time = date.getTimeInMillis(); time < thisRefTime; time = date.getTimeInMillis()) {
if (thisTimeUnit.equals(BasicUtils.TIME_UNIT_MONTH)) {
date.add(Calendar.MONTH, 1);
} else if (thisTimeUnit.equals(BasicUtils.TIME_UNIT_QUARTER)) {
date.add(Calendar.MONTH, 3);
} else if (thisTimeUnit.equals(BasicUtils.TIME_UNIT_YEAR)) {
date.add(Calendar.YEAR, 1);
} else {
throw new IllegalStateException("invalid time cycle unit for time shift");
}
}
transformed[0] = dateFormat.format(date.getTime());
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
return transformed;
}
@Override
public void setContext(Map<String, Object> context) {
fields = (String[])context.get("record");
}
}
/**
* @author pranab
*
*/
public static class DateComponentTransformer extends AttributeTransformer {
private SimpleDateFormat sourceDateFormat;
private Map<String, SimpleDateFormat> componentDateFormats = new HashMap<String, SimpleDateFormat>();
private boolean sourceEpochTime;
private List<String> dateComponents;
private Calendar cal = Calendar.getInstance();
/**
* @param prAttr
* @param config
*/
public DateComponentTransformer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
List<String> dateComponents = config.getStringList("dateComponenets");
//component format string
Map<String, String> componentFormats = new HashMap<String, String>();
for (String dateComponent : dateComponents) {
if (!dateComponent.equals(BasicUtils.TIME_UNIT_QUARTER)) {
String compFormat = config.getString("format." + dateComponent);
componentFormats.put(dateComponent, compFormat);
}
}
intialize(config.getString("sourceDateFormat"), config.getString("sourceTimeZone"),
config.getStringList("dateComponenets"), componentFormats, config.getString("targetTimeZone"));
}
/**
* @param sourceDateFormat
* @param sourceTimeZone
* @param dateComponents
* @param targetComponentFormats
* @param targetTimeZone
*/
public DateComponentTransformer(String sourceDateFormat, String sourceTimeZone, List<String> dateComponents,
Map<String, String> targetComponentFormats,
String targetTimeZone) {
super(dateComponents.size());
intialize(sourceDateFormat, sourceTimeZone, dateComponents, targetComponentFormats, targetTimeZone);
}
/**
* @param sourceDateFormatStr
* @param sourceTimeZone
* @param dateComponents
* @param componentFormats
* @param targetTimeZone
*/
private void intialize(String sourceDateFormatStr, String sourceTimeZone, List<String> dateComponents,
Map<String, String> componentFormats, String targetTimeZone) {
this.dateComponents = dateComponents;
if (sourceDateFormatStr.equals("epochTime")) {
sourceEpochTime = true;
} else {
sourceDateFormat = new SimpleDateFormat(sourceDateFormatStr);
if (!Utility.isBlank(sourceTimeZone)) {
sourceDateFormat.setTimeZone(TimeZone.getTimeZone(sourceTimeZone));
}
}
//format object for all components
for (Map.Entry<String,String> entry :componentFormats.entrySet()) {
SimpleDateFormat targetDateFormat = new SimpleDateFormat(entry.getValue());
if (!Utility.isBlank(targetTimeZone)) {
targetDateFormat.setTimeZone(TimeZone.getTimeZone(targetTimeZone));
}
componentDateFormats.put(entry.getKey(), targetDateFormat);
}
}
@Override
public String[] tranform(String value) {
try {
Date date = null;
if (null != sourceDateFormat) {
//date format
date = sourceDateFormat.parse(value);
} else {
//epoch time
date = new Date(Long.parseLong(value));
}
int i = 0;
for (String dateComponent : dateComponents) {
if (dateComponent.equals(BasicUtils.TIME_UNIT_QUARTER)) {
cal.setTime(date);
int month = cal.get(Calendar.MONTH);
int quarter = month / 3 + 1;
transformed[i++] = "" + quarter;
} else {
SimpleDateFormat targetDateFormat = componentDateFormats.get(dateComponent);
transformed[i++] = targetDateFormat.format(date);
}
}
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
return transformed;
}
}
/**
* @author pranab
*
*/
public static class TimeCycleTransformer extends AttributeTransformer {
private SimpleDateFormat sourceDateFormat;
private boolean sourceEpochTime;
private String timeCycle;
private int hourGranularity;
private Calendar cal = Calendar.getInstance();
/**
* @param prAttr
* @param config
*/
public TimeCycleTransformer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
intialize(config.getString("sourceDateFormat"), config.getString("sourceTimeZone"), config.getString("timeCycle"),
config.getInt("hourGranularity"));
}
/**
* @param sourceDateFormatStr
* @param sourceTimeZone
* @param timeCycle
* @param hourGranularity
*/
public TimeCycleTransformer(String sourceDateFormatStr, String sourceTimeZone, String timeCycle, int hourGranularity) {
super(1);
intialize(sourceDateFormatStr, sourceTimeZone, timeCycle, hourGranularity);
}
/**
* @param sourceDateFormatStr
* @param sourceTimeZone
* @param timeCycle
* @param hourGranularity
*/
private void intialize(String sourceDateFormatStr, String sourceTimeZone, String timeCycle, int hourGranularity) {
if (sourceDateFormatStr.equals("epochTime")) {
sourceEpochTime = true;
} else {
sourceDateFormat = new SimpleDateFormat(sourceDateFormatStr);
if (!Utility.isBlank(sourceTimeZone)) {
sourceDateFormat.setTimeZone(TimeZone.getTimeZone(sourceTimeZone));
}
}
this.timeCycle = timeCycle;
if (hourGranularity % 2 == 1) {
throw new IllegalStateException("hour granularity should be even");
}
this.hourGranularity = hourGranularity;
}
/* (non-Javadoc)
* @see org.chombo.transformer.AttributeTransformer#tranform(java.lang.String)
*/
@Override
public String[] tranform(String value) {
try {
Date date = null;
if (null != sourceDateFormat) {
//date format
date = sourceDateFormat.parse(value);
} else {
//epoch time
date = new Date(Long.parseLong(value));
}
cal.setTime(date);
if (timeCycle.equals("hourOfDay")) {
int hour = cal.get(Calendar.HOUR_OF_DAY);
hour /= hourGranularity;
transformed[0] = "" + hour;
} else if (timeCycle.equals("dayOfWeek")) {
int day = cal.get(Calendar.DAY_OF_WEEK);
transformed[0] = "" + day;
} else if (timeCycle.equals("dayOfMonth")) {
int day = cal.get(Calendar.DAY_OF_MONTH);
transformed[0] = "" + day;
} else if (timeCycle.equals("monthOfYear")) {
int month = cal.get(Calendar.MONTH) + 1;
transformed[0] = "" + month;
} else if (timeCycle.equals("quarterOfYear")) {
int month = cal.get(Calendar.MONTH) + 1;
int quarter = (month + 2) / 3;
transformed[0] = "" + quarter;
} else {
throw new IllegalStateException("invalid time cycle");
}
} catch (ParseException ex) {
throw new IllegalArgumentException("failed to parse date " + ex.getMessage());
}
return transformed;
}
}
}