package com.thinkbiganalytics.policy.standardization; /*- * #%L * thinkbig-field-policy-default * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.thinkbiganalytics.policy.PolicyProperty; import com.thinkbiganalytics.policy.PolicyPropertyRef; import com.thinkbiganalytics.policy.PolicyPropertyTypes; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.TimeZone; /** * Convert date time by a provided input format to an ISO8601 format used by Hive. If the input format is null, the date is * assumed to be Java epoch time, otherwise the formatting pattern is used to convert the date. */ @Standardizer(name = "Date/Time", description = "Converts any date to Hive-friendly format with optional timezone conversion") public class DateTimeStandardizer implements StandardizationPolicy { private static final Logger log = LoggerFactory.getLogger(DateTimeStandardizer.class); @PolicyProperty(name = "Date Format", hint = "Format Example: MM/dd/YYYY. If converting from Unix timestamp leave empty.") private String inputDateFormat; @PolicyProperty(name = "Output Format", hint = "Choose an output format", type = PolicyPropertyTypes.PROPERTY_TYPE.select, selectableValues = {"DATE_ONLY", "DATETIME", "DATETIME_NOMILLIS"}, required = true) private OutputFormats outputFormat = OutputFormats.DATE_ONLY; /** * Whether the reference timezone is encoded in the ISO8601 date or specified as configuration */ @PolicyProperty(name = "Input timezone", hint = "Input timezone (optional)", type = PolicyPropertyTypes.PROPERTY_TYPE.select, selectableValues = {"", "ACT", "AET", "AGT", "ART", "AST", "BET", "BST", "CAT", "CNT", "CST", "CTT", "EAT", "ECT", "IET", "IST", "JST", "MIT", "NET", "NST", "PLT", "PNT", "PRT", "PST", "SST", "UTC", "VST", "EST", "MST", "HST"}, value = "") private String inputTimezone; /** * Whether the reference timezone is encoded in the ISO8601 date or specified as configuration */ @PolicyProperty(name = "Output timezone", hint = "Targeted timezone (optional)", type = PolicyPropertyTypes.PROPERTY_TYPE.select, selectableValues = {"", "ACT", "AET", "AGT", "ART", "AST", "BET", "BST", "CAT", "CNT", "CST", "CTT", "EAT", "ECT", "IET", "IST", "JST", "MIT", "NET", "NST", "PLT", "PNT", "PRT", "PST", "SST", "UTC", "VST", "EST", "MST", "HST"}, value = "") private String outputTimezone; private transient DateTimeFormatter outputFormatter; private transient DateTimeFormatter inputFormatter; private boolean valid; public DateTimeStandardizer(OutputFormats outputFormat) { this(null, outputFormat, null, null); } public DateTimeStandardizer(String inputDateFormat, OutputFormats outputFormat) { this(inputDateFormat, outputFormat, null, null); } public DateTimeStandardizer(@PolicyPropertyRef(name = "Date Format") String inputDateFormat, @PolicyPropertyRef(name = "Output Format") OutputFormats outputFormat, @PolicyPropertyRef(name = "Input Timezone") String inputTimezone, @PolicyPropertyRef(name = "Output Timezone") String outputTimezone) { Validate.notNull(outputFormat); this.inputDateFormat = inputDateFormat; this.outputFormat = outputFormat; this.inputTimezone = inputTimezone; this.outputTimezone = outputTimezone; initializeFormatters(); } /** * Unix timestamp is in seconds.. not ms. detect if the string has only 10 chars being its in seconds, not ms */ private boolean isInputUnixTimestamp(String value) { return StringUtils.isNotBlank(value) && StringUtils.isNumeric(value) && value.length() == 10; } @Override public String convertValue(String value) { if (valid) { try { if (inputFormatter == null) { if (isInputUnixTimestamp(value)) { //unix timestamp are in seconds long lValue = Long.parseLong(value); lValue *= 1000; return outputFormatter.print(lValue); } else { long lValue = Long.parseLong(value); return outputFormatter.print(lValue); } } DateTime dt = inputFormatter.parseDateTime(value); return outputFormatter.print(dt); } catch (IllegalArgumentException e) { log.debug("Failed to convert string [{}] to date pattern [{}], value, inputDateFormat"); } } return value; } /** * Returns a time formatter for the specified timezone * * @param format the current formatter * @param timezone the timezone string * @return a time formatter for the specified timezone */ protected DateTimeFormatter formatterForTimezone(DateTimeFormatter format, String timezone) { if (StringUtils.isEmpty(timezone)) { return format; } if ("UTC".equals(timezone)) { return format.withZoneUTC(); } return format.withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone(timezone))); } protected void initializeFormatters() { try { valid = false; if (outputFormat == null) { outputFormat = OutputFormats.DATE_ONLY; } switch (outputFormat) { case DATE_ONLY: this.outputFormatter = DateTimeFormat.forPattern("yyyy-MM-dd"); break; case DATETIME: this.outputFormatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS"); break; case DATETIME_NOMILLIS: this.outputFormatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); break; } this.outputFormatter = formatterForTimezone(this.outputFormatter, outputTimezone); if (StringUtils.isNotBlank(inputDateFormat)) { this.inputFormatter = DateTimeFormat.forPattern(this.inputDateFormat); this.inputFormatter = formatterForTimezone(this.inputFormatter, inputTimezone); } valid = true; } catch (IllegalArgumentException e) { log.warn("Illegal configuration input format [{}], tz [{}] Output format [{}], tz [{}]" + "]. Standardizer will be skipped.", inputDateFormat, inputTimezone, outputFormat, outputTimezone); } } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); initializeFormatters(); } public String getInputDateFormat() { return inputDateFormat; } public OutputFormats getOutputFormat() { return outputFormat; } public enum OutputFormats {DATE_ONLY, DATETIME, DATETIME_NOMILLIS} public Boolean accepts (Object value) { return (value instanceof String); } public Object convertRawValue(Object value) { if (accepts(value)) { return String.valueOf(convertValue(value.toString())); } return value; } }