/* * Copyright © 2014 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.dataset2.lib.partitioned; import co.cask.cdap.api.dataset.DatasetContext; import co.cask.cdap.api.dataset.DatasetDefinition; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.api.dataset.DatasetSpecification; import co.cask.cdap.api.dataset.lib.FileSet; import co.cask.cdap.api.dataset.lib.FileSetArguments; import co.cask.cdap.api.dataset.lib.IndexedTable; import co.cask.cdap.api.dataset.lib.PartitionKey; import co.cask.cdap.api.dataset.lib.PartitionedFileSet; import co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments; import co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties; import co.cask.cdap.api.dataset.lib.TimePartitionedFileSetArguments; import com.google.common.base.Strings; import com.google.common.collect.Maps; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Map; import java.util.TimeZone; /** * Defines the partitioned dataset type. At this time, the partitions are not managed by the * partitioned dataset, so all admin is simply on the partition table. */ public class TimePartitionedFileSetDefinition extends PartitionedFileSetDefinition { public TimePartitionedFileSetDefinition(String name, DatasetDefinition<? extends FileSet, ?> filesetDef, DatasetDefinition<? extends IndexedTable, ?> tableDef) { super(name, filesetDef, tableDef); } @Override public DatasetSpecification configure(String instanceName, DatasetProperties properties) { // add the partition key to the properties. properties = PartitionedFileSetProperties .builder() .setPartitioning(TimePartitionedFileSetDataset.PARTITIONING) .addAll(properties.getProperties()) .build(); return super.configure(instanceName, properties); } @Override public PartitionedFileSet getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException { // make any necessary updates to the arguments arguments = updateArgumentsIfNeeded(arguments); FileSet fileset = filesetDef.getDataset(datasetContext, spec.getSpecification(FILESET_NAME), arguments, classLoader); IndexedTable table = indexedTableDef.getDataset(datasetContext, spec.getSpecification(PARTITION_TABLE_NAME), arguments, classLoader); return new TimePartitionedFileSetDataset(datasetContext, spec.getName(), fileset, table, spec, arguments, getExploreProvider()); } // if the arguments do not contain an output path, but an output partition time, generate an output path from that; // also convert the output partition time to a partition key and add it to the arguments; // also call the super class' method to update arguments if it needs to protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) { Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments); if (time != null) { // set the output path according to partition time if (FileSetArguments.getOutputPath(arguments) == null) { String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments); String path; if (Strings.isNullOrEmpty(outputPathFormat)) { path = String.format("%tF/%tH-%tM.%d", time, time, time, time); } else { SimpleDateFormat format = new SimpleDateFormat(outputPathFormat); String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments); if (!Strings.isNullOrEmpty(timeZoneID)) { format.setTimeZone(TimeZone.getTimeZone(timeZoneID)); } path = format.format(new Date(time)); } arguments = Maps.newHashMap(arguments); FileSetArguments.setOutputPath(arguments, path); } // add the corresponding partition key to the arguments PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time); PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey); } // delegate to super class for anything it needs to do return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING); } }