/* * Copyright © 2015-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.api.dataset.lib; import co.cask.cdap.api.annotation.Beta; import co.cask.cdap.api.dataset.lib.Partitioning.FieldType; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.reflect.TypeToken; import java.lang.reflect.Type; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; import javax.annotation.Nullable; /** * Helpers for manipulating runtime arguments of time-partitioned file sets. */ @Beta public class PartitionedFileSetArguments { public static final String OUTPUT_PARTITION_KEY_PREFIX = "output.partition.key."; public static final String OUTPUT_PARTITION_METADATA_PREFIX = "output.partition.metadata."; public static final String DYNAMIC_PARTITIONER_CLASS_NAME = "output.dynamic.partitioner.class.name"; public static final String INPUT_PARTITION_FILTER = "input.partition.filter"; private static final Gson GSON = new GsonBuilder().registerTypeAdapter(PartitionFilter.Condition.class, new ConditionCodec()).create(); private static final Type PARTITION_FILTER_LIST_TYPE = new TypeToken<List<PartitionFilter>>() { }.getType(); /** * Set the partition key of the output partition when using PartitionedFileSet as an OutputFormatProvider. * This key is used as the partition key for the new file, and also to generate an output file path - if that path * is not explicitly given as an argument itself. * * @param key the partition key * @param arguments the runtime arguments for a partitioned dataset */ public static void setOutputPartitionKey(Map<String, String> arguments, PartitionKey key) { for (Map.Entry<String, ? extends Comparable> entry : key.getFields().entrySet()) { arguments.put(OUTPUT_PARTITION_KEY_PREFIX + entry.getKey(), entry.getValue().toString()); } } /** * @return the partition key of the output partition to be written; or null if no partition key was found * * @param arguments the runtime arguments for a partitioned dataset * @param partitioning the declared partitioning for the dataset, needed for proper interpretation of values */ @Nullable public static PartitionKey getOutputPartitionKey(Map<String, String> arguments, Partitioning partitioning) { // extract the arguments that describe the output partition key Map<String, String> keyArguments = FileSetProperties.propertiesWithPrefix(arguments, OUTPUT_PARTITION_KEY_PREFIX); if (keyArguments.isEmpty()) { return null; // there is no output partition key } // there is a partition key; now it is required to match the partitioning PartitionKey.Builder builder = PartitionKey.builder(); for (Map.Entry<String, FieldType> entry : partitioning.getFields().entrySet()) { String fieldName = entry.getKey(); FieldType fieldType = entry.getValue(); String stringValue = keyArguments.get(fieldName); Comparable fieldValue = convertFieldValue("key", "value", fieldName, fieldType, stringValue, false); builder.addField(fieldName, fieldValue); } return builder.build(); } /** * Sets the metadata of the output partition with using PartitionedFileSet as an OutputFormatProvider. * * @param arguments the arguments to set the metadata in to * @param metadata the metadata to be written to the output partition */ public static void setOutputPartitionMetadata(Map<String, String> arguments, Map<String, String> metadata) { for (Map.Entry<String, String> entry : metadata.entrySet()) { arguments.put(OUTPUT_PARTITION_METADATA_PREFIX + entry.getKey(), entry.getValue()); } } /** * @return the metadata of the output partition to be written */ public static Map<String, String> getOutputPartitionMetadata(Map<String, String> arguments) { return FileSetProperties.propertiesWithPrefix(arguments, OUTPUT_PARTITION_METADATA_PREFIX); } /** * Set the partition filter for the input to be read. * * @param arguments the runtime arguments for a partitioned dataset * @param filter The partition filter. */ public static void setInputPartitionFilter(Map<String, String> arguments, PartitionFilter filter) { // Serialize a singleton list for now. Support for multiple PartitionFilters can be added in the future. // See: https://issues.cask.co/browse/CDAP-5618 arguments.put(INPUT_PARTITION_FILTER, GSON.toJson(Collections.singletonList(filter))); } /** * Get the partition filter for the input to be read. * * @param arguments the runtime arguments for a partitioned dataset * @param partitioning the declared partitioning for the dataset, needed for proper interpretation of values * @return the PartitionFilter specified in the arguments or null if no filter is specified. * @deprecated as of 3.4.0. Use {@link #getInputPartitionFilter(Map)} instead. */ @Nullable @Deprecated public static PartitionFilter getInputPartitionFilter(Map<String, String> arguments, Partitioning partitioning) { return getInputPartitionFilter(arguments); } /** * Get the partition filter for the input to be read. * * @param arguments the runtime arguments for a partitioned dataset * @return the PartitionFilter specified in the arguments or null if no filter is specified. */ @Nullable public static PartitionFilter getInputPartitionFilter(Map<String, String> arguments) { if (!arguments.containsKey(INPUT_PARTITION_FILTER)) { return null; } List<PartitionFilter> singletonList = GSON.fromJson(arguments.get(INPUT_PARTITION_FILTER), PARTITION_FILTER_LIST_TYPE); // this shouldn't happen based upon how we are serializing in #setInputPartitionFilter. // however, user might not use that method and attempt to specify/construct the runtime arguments themselves. if (singletonList.size() != 1) { throw new IllegalArgumentException("Expected serialized list to have length 1. Actual: " + singletonList.size()); } return singletonList.get(0); } // helper to convert a string value into a field value in a partition key or filter public static Comparable convertFieldValue(String where, String kind, String fieldName, FieldType fieldType, String stringValue, boolean acceptNull) { if (null == stringValue) { if (acceptNull) { return null; } else { throw new IllegalArgumentException( String.format("Incomplete partition %s: %s for field '%s' is missing", where, kind, fieldName)); } } try { return fieldType.parse(stringValue); } catch (Exception e) { throw new IllegalArgumentException( String.format("Invalid partition %s: %s '%s' for field '%s' cannot be converted to %s.", where, kind, stringValue, fieldName, fieldType.name()), e); } } /** * Sets partitions as input for a PartitionedFileSet. If both a PartitionFilter and Partition(s) are specified, the * PartitionFilter takes precedence and the specified Partition(s) will be ignored. * * @param arguments the runtime arguments for a partitioned dataset * @param partitionIterator the iterator of partitions to add as input */ public static void addInputPartitions(Map<String, String> arguments, Iterator<? extends Partition> partitionIterator) { while (partitionIterator.hasNext()) { addInputPartition(arguments, partitionIterator.next()); } } /** * Sets partitions as input for a PartitionedFileSet. If both a PartitionFilter and Partition(s) are specified, the * PartitionFilter takes precedence and the specified Partition(s) will be ignored. * * @param arguments the runtime arguments for a partitioned dataset * @param partitions an iterable of partitions to add as input */ public static void addInputPartitions(Map<String, String> arguments, Iterable<? extends Partition> partitions) { addInputPartitions(arguments, partitions.iterator()); } /** * Sets a partition as input for a PartitionedFileSet. If both a PartitionFilter and Partition(s) are specified, the * PartitionFilter takes precedence and the specified Partition(s) will be ignored. * * @param arguments the runtime arguments for a partitioned dataset * @param partition the partition to add as input */ public static void addInputPartition(Map<String, String> arguments, Partition partition) { FileSetArguments.addInputPath(arguments, partition.getRelativePath()); } /** * Sets a DynamicPartitioner class to be used during the output of a PartitionedFileSet. * * @param arguments the runtime arguments for a partitioned dataset * @param dynamicPartitionerClass the class to set * @param <K> type of key * @param <V> type of value */ public static <K, V> void setDynamicPartitioner(Map<String, String> arguments, Class<? extends DynamicPartitioner<K, V>> dynamicPartitionerClass) { setDynamicPartitioner(arguments, dynamicPartitionerClass.getName()); } /** * Sets a DynamicPartitioner class to be used during the output of a PartitionedFileSet. * * @param arguments the runtime arguments for a partitioned dataset * @param dynamicPartitionerClassName the name of the class to set */ public static void setDynamicPartitioner(Map<String, String> arguments, String dynamicPartitionerClassName) { arguments.put(DYNAMIC_PARTITIONER_CLASS_NAME, dynamicPartitionerClassName); } /** * Return the DynamicPartitioner class that was previously assigned onto runtime arguments. * * @param arguments the runtime arguments to get the class from * @return name of the DynamicPartitioner class */ public static String getDynamicPartitioner(Map<String, String> arguments) { return arguments.get(DYNAMIC_PARTITIONER_CLASS_NAME); } }