/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi.hive; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.kitesdk.data.Dataset; import org.kitesdk.data.DatasetDescriptor; import org.kitesdk.data.spi.DatasetRepository; import org.kitesdk.data.spi.DefaultConfiguration; import org.kitesdk.data.spi.MetadataProvider; import org.kitesdk.data.spi.filesystem.FileSystemDataset; /** * <p> * A {@link DatasetRepository} that uses the Hive/HCatalog metastore for metadata, * and stores data in a Hadoop {@link FileSystem}. * </p> * <p> * The location of the data directory is either chosen by Hive/HCatalog (so called * "managed tables"), or specified when creating an instance of this class by providing * a {@link FileSystem}, and a root directory in the constructor ("external tables"). * </p> * <p> * The primary methods of interest will be * {@link #create(String, String, DatasetDescriptor)}, {@link #load(String, String)}, and * {@link #delete(String, String)} which create a new dataset, load an existing * dataset, or delete an existing dataset, respectively. Once a dataset has been created * or loaded, users can invoke the appropriate {@link Dataset} methods to get a reader * or writer as needed. * </p> * * @see DatasetRepository * @see Dataset */ public class HiveManagedDatasetRepository extends HiveAbstractDatasetRepository { /** * Create an HCatalog dataset repository with managed tables. */ HiveManagedDatasetRepository(Configuration conf) { super(conf, new HiveManagedMetadataProvider(conf)); } /** * Create an HCatalog dataset repository with managed tables. */ HiveManagedDatasetRepository(Configuration conf, MetadataProvider provider) { super(conf, provider); } @Override @SuppressWarnings("unchecked") public <E> Dataset<E> create(String namespace, String name, DatasetDescriptor descriptor) { // avoids calling fsRepository.create, which creates the data path getMetadataProvider().create(namespace, name, descriptor); FileSystemDataset<E> dataset = (FileSystemDataset<E>) load(namespace, name); dataset.addExistingPartitions(); return dataset; } @Override public <E> Dataset<E> create(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { // avoids calling fsRepository.create, which creates the data path getMetadataProvider().create(namespace, name, descriptor); return load(namespace, name, type); } /** * A fluent builder to aid in the construction of {@link HiveManagedDatasetRepository} * instances. * @since 0.3.0 */ public static class Builder { private Path rootDirectory; private Configuration configuration; /** * The root directory for dataset files. */ public Builder rootDirectory(Path path) { this.rootDirectory = path; return this; } /** * The root directory for dataset files. */ public Builder rootDirectory(URI uri) { this.rootDirectory = new Path(uri); return this; } /** * The root directory for metadata and dataset files. * * @param uri a String to parse as a URI * @return this Builder for method chaining. * @throws URISyntaxException * * @since 0.8.0 */ public Builder rootDirectory(String uri) throws URISyntaxException { this.rootDirectory = new Path(new URI(uri)); return this; } /** * The {@link Configuration} used to find the {@link FileSystem}. Optional. If not * specified, the default configuration will be used. */ public Builder configuration(Configuration configuration) { this.configuration = configuration; return this; } /** * Build an instance of the configured {@link HiveManagedDatasetRepository}. * * @since 0.9.0 */ @SuppressWarnings("deprecation") public DatasetRepository build() { if (configuration == null) { this.configuration = DefaultConfiguration.get(); } if (rootDirectory != null) { return new HiveExternalDatasetRepository(configuration, rootDirectory); } else { return new HiveManagedDatasetRepository(configuration); } } } }