/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.conversion.hive.source; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; import com.google.common.base.Optional; import gobblin.configuration.ConfigurationKeys; import gobblin.data.management.copy.hive.HiveDataset; import gobblin.source.workunit.WorkUnit; /** * A {@link WorkUnit} wrapper for {@link HiveSource}. This is class is meant to hide the keys at which workunit values are stored. * The source class is supposed to read/write values into the {@link WorkUnit} through getters/setters instead of directly accessing * through {@link #getProp(String, String)}/{@link #setProp(String, Object)} */ public class HiveWorkUnit extends WorkUnit { private static final String HIVE_DATASET_SERIALIZED_KEY = "hive.source.dataset.serialized"; private static final String HIVE_TABLE_SCHEMA_URL_KEY = "hive.source.dataset.table.schemaUrl"; private static final String HIVE_PARTITION_SCHEMA_URL_KEY = "hive.source.dataset.partition.schemaUrl"; private static final String HIVE_PARTITION_NAME_KEY = "hive.source.dataset.partition.name"; @SuppressWarnings("deprecation") public HiveWorkUnit() { super(); } @SuppressWarnings("deprecation") public HiveWorkUnit(WorkUnit workunit) { super(workunit); } /** * Automatically serializes the {@link HiveDataset} by calling {@link #setHiveDataset(HiveDataset)} * @param hiveDataset for which the workunit is being created */ @SuppressWarnings("deprecation") public HiveWorkUnit(HiveDataset hiveDataset) { super(); setHiveDataset(hiveDataset); } /** * Sets the {@link ConfigurationKeys#DATASET_URN_KEY} key. */ public void setDatasetUrn(String datasetUrn) { this.setProp(ConfigurationKeys.DATASET_URN_KEY, datasetUrn); } public String getDatasetUrn(String datasetUrn) { return this.getProp(ConfigurationKeys.DATASET_URN_KEY); } /** * Automatically sets the dataset urn by calling {@link #setDatasetUrn(String)} */ public void setHiveDataset(HiveDataset hiveDataset) { this.setProp(HIVE_DATASET_SERIALIZED_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(hiveDataset, HiveDataset.class)); setDatasetUrn(hiveDataset.getTable().getCompleteName()); } public HiveDataset getHiveDataset() { return HiveSource.GENERICS_AWARE_GSON.fromJson(this.getProp(HIVE_DATASET_SERIALIZED_KEY), HiveDataset.class); } /** * Set the schema url for this table into the {@link WorkUnit} */ public void setTableSchemaUrl(Path schemaUrl) { this.setProp(HIVE_TABLE_SCHEMA_URL_KEY, schemaUrl.toString()); } public Path getTableSchemaUrl() { return new Path(this.getProp(HIVE_TABLE_SCHEMA_URL_KEY)); } /** * Set the schema url for a partition into the {@link WorkUnit} */ public void setPartitionSchemaUrl(Path schemaUrl) { this.setProp(HIVE_PARTITION_SCHEMA_URL_KEY, schemaUrl.toString()); } /** * Get the schema url path for the partition if this {@link WorkUnit} is for a partitioned table. * If not, return {@link Optional#absent()} */ public Optional<Path> getPartitionSchemaUrl() { return StringUtils.isNotBlank(this.getProp(HIVE_PARTITION_SCHEMA_URL_KEY)) ? Optional.<Path> of(new Path(this.getProp(HIVE_PARTITION_SCHEMA_URL_KEY))) : Optional.<Path> absent(); } /** * Set the name of the partition into the {@link WorkUnit} * @param partitionName */ public void setPartitionName(String partitionName) { this.setProp(HIVE_PARTITION_NAME_KEY, partitionName); } /** * Get the name for the partition if this {@link WorkUnit} is for a partitioned table. * If not, return {@link Optional#absent()} */ public Optional<String> getPartitionName() { return Optional.fromNullable(this.getProp(HIVE_PARTITION_NAME_KEY)); } }