/*
* Copyright © 2015-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.dataset.lib;
import co.cask.cdap.api.annotation.Beta;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.TimeZone;
import javax.annotation.Nullable;
/**
* Helpers for manipulating runtime arguments of time-partitioned file sets.
*/
@Beta
public class TimePartitionedFileSetArguments extends PartitionedFileSetArguments {
public static final String OUTPUT_PATH_FORMAT = "output.file.path.format";
public static final String OUTPUT_TIME_ZONE = "output.time.zone";
public static final String OUTPUT_PARTITION_TIME = "output.partition.time";
public static final String INPUT_START_TIME = "input.start.time";
public static final String INPUT_END_TIME = "input.end.time";
/**
* Set the time of the output partition when using TimePartitionedFileSet as an OutputFormatProvider.
* This time is used as the partition key for the new file, and also to generate an output file path - if that path
* is not explicitly given as an argument itself.
* @param time The time in milli seconds.
*/
public static void setOutputPartitionTime(Map<String, String> arguments, long time) {
arguments.put(OUTPUT_PARTITION_TIME, Long.toString(time));
}
/**
* @return the time of the output partition to be written
*/
@Nullable
public static Long getOutputPartitionTime(Map<String, String> arguments) {
String str = arguments.get(OUTPUT_PARTITION_TIME);
if (str == null) {
return null;
}
return Long.parseLong(str);
}
/**
* This is the file extension for each partition when using TimePartitionedFileSet as an OutputFormatProvider.
* It's used to generate the end of the output file path for each partition.
* @param pathFormat The format for the path; for example: 'yyyy-MM-dd/HH-mm,America/Los_Angeles',
* which will create a file path ending in the format of 2015-01-01/20-42,
* with the time of the partition being the time in the timezone of Los Angeles (PST or PDT).
* The pathFormat will be the format provided to
* {@link java.text.SimpleDateFormat}. If left blank, then the partitions will be of the form
* 2015-01-01/20-42.142017372000, with the time being the time UTC.
* Note that each partition must have a unique file path or a runtime exception will be thrown.
*/
public static void setOutputPathFormat(Map<String, String> arguments, String pathFormat) {
setOutputPathFormat(arguments, pathFormat, null);
}
/**
* This is the file extension for each partition when using TimePartitionedFileSet as an OutputFormatProvider.
* It's used to generate the end of the output file path for each partition.
* @param pathFormat The format for the path; for example: 'yyyy-MM-dd/HH-mm,America/Los_Angeles',
* which will create a file path ending in the format of 2015-01-01/20-42,
* with the time of the partition being the time in the timezone of Los Angeles (PST or PDT).
* The pathFormat will be the format provided to
* {@link java.text.SimpleDateFormat}. If left blank, then the partitions will be of the form
* 2015-01-01/20-42.142017372000, with the time being the time UTC.
* Note that each partition must have a unique file path or a runtime exception will be thrown.
* @param timeZone The string ID of the time zone. It is parsed by {@link TimeZone#getTimeZone(String)},
* and if the string ID is not a valid time zone, UTC is used.
*/
public static void setOutputPathFormat(Map<String, String> arguments, String pathFormat, @Nullable String timeZone) {
long curTime = System.currentTimeMillis();
boolean hasTimeZone = timeZone != null && !timeZone.isEmpty();
try {
SimpleDateFormat format = new SimpleDateFormat(pathFormat);
if (hasTimeZone) {
format.setTimeZone(TimeZone.getTimeZone(timeZone));
}
format.format(new Date(curTime));
} catch (Exception e) {
throw new IllegalArgumentException("Invalid date format: " + pathFormat + '\n' + e);
}
arguments.put(OUTPUT_PATH_FORMAT, pathFormat);
if (hasTimeZone) {
arguments.put(OUTPUT_TIME_ZONE, timeZone);
}
}
/**
* This is the file extension for each partition when using TimePartitionedFileSet as an OutputFormatProvider.
* It's used to generate the end of the output file path for each partition.
* May be null.
*/
@Nullable
public static String getOutputPathFormat(Map<String, String> arguments) {
return arguments.get(OUTPUT_PATH_FORMAT);
}
/**
* This is the time zone used to format the date for the output partition.
* @return The String ID of the time zone of the date.
* May be null.
*/
@Nullable
public static String getOutputPathTimeZone(Map<String, String> arguments) {
return arguments.get(OUTPUT_TIME_ZONE);
}
/**
* Set the start (inclusive) of the time range to be read as input.
* @param time The time in milli seconds.
*/
public static void setInputStartTime(Map<String, String> arguments, long time) {
arguments.put(INPUT_START_TIME, Long.toString(time));
}
/**
* Get the start (inclusive) of the time range to be read as input.
*/
@Nullable
public static Long getInputStartTime(Map<String, String> arguments) {
String str = arguments.get(INPUT_START_TIME);
if (str == null) {
return null;
}
return Long.parseLong(str);
}
/**
* Set the end (exclusive) of the time range to be read as input.
* @param time The time in milli seconds.
*/
public static void setInputEndTime(Map<String, String> arguments, long time) {
arguments.put(INPUT_END_TIME, Long.toString(time));
}
/**
* Get the end (exclusive) of the time range to be read as input.
*/
public static Long getInputEndTime(Map<String, String> arguments) {
String str = arguments.get(INPUT_END_TIME);
if (str == null) {
return null;
}
return Long.parseLong(str);
}
}