/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.dataset.lib;
import co.cask.cdap.api.annotation.Beta;
import co.cask.cdap.api.dataset.DataSetException;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
/**
* Represents a dataset that is split into partitions that can be uniquely addressed
* by time. Each partition is a path in a file set, with a timestamp attached as meta data.
* The timestamp is mapped to a partition key of a {@link co.cask.cdap.api.dataset.lib.PartitionedFileSet}
* with five integer partitioning fields: the year, month, day, hour and minute. Partitions can
* be retrieved using time range or using a {@link co.cask.cdap.api.dataset.lib.PartitionFilter}.
* <p>
* The granularity of time is in minutes, that is, any seconds or milliseconds after the
* full minute is ignored for the partition keys. That means, there can not be be two partitions
* in the same minute. Also, when retrieving partitions via time or time range using
* {@link #getPartitionByTime}, {@link #getPartitionsByTime}, or when writing a partition using
* {@link #getPartitionOutput}, the seconds and milliseconds on the
* time or time range are ignored.
* <p>
* This dataset can be made available for querying with SQL (explore). This is enabled through dataset
* properties when the dataset is created. See {@link co.cask.cdap.api.dataset.lib.FileSetProperties}
* for details. If it is enabled for explore, a Hive external table will be created when the dataset is
* created. The Hive table is partitioned by year, month, day, hour and minute.
*/
@Beta
public interface TimePartitionedFileSet extends PartitionedFileSet {
/**
* Add a partition for a given time, stored at a given path (relative to the file set's base path).
*/
void addPartition(long time, String path);
/**
* Add a partition for a given time, stored at a given path (relative to the file set's base path),
* with given metadata.
*/
void addPartition(long time, String path, Map<String, String> metadata);
/**
* Adds a new metadata entry for a particular partition.
* Note that existing entries can not be updated.
* @throws DataSetException in case an attempt is made to update existing entries.
*/
void addMetadata(long time, String metadataKey, String metadataValue);
/**
* Adds a set of new metadata entries for a particular partition
* Note that existing entries can not be updated.
* * @throws DataSetException in case an attempt is made to update existing entries.
*/
void addMetadata(long time, Map<String, String> metadata);
/**
* Remove a partition for a given time.
*/
void dropPartition(long time);
/**
* Return the partition associated with the given time, rounded to the minute;
* or null if no such partition exists.
*/
@Nullable
TimePartitionDetail getPartitionByTime(long time);
/**
* Return all partitions within the time range given by startTime (inclusive) and endTime (exclusive),
* both rounded to the full minute.
*/
Set<TimePartitionDetail> getPartitionsByTime(long startTime, long endTime);
/**
* Return a partition output for a specific time, rounded to the minute, in preparation for creating a new partition.
* Obtain the location to write from the PartitionOutput, then call the {@link PartitionOutput#addPartition}
* to add the partition to this dataset.
*/
TimePartitionOutput getPartitionOutput(long time);
}