/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.hive.datasets;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.ConfigurationUtil;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.hive.context.CConfCodec;
import com.google.common.base.Throwables;
import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
/**
* HiveStorageHandler to access Datasets.
*
* <p>
* Referred to by string rather than {@code Class.getName()} in {@code ExploreServiceUtils.traceExploreDependencies()}
* because this class extends a Hive class, which isn't present in the {@code ExploreServiceUtils} class loader.
* </p>
*/
public class DatasetStorageHandler extends DefaultStorageHandler {
private static final Logger LOG = LoggerFactory.getLogger(DatasetStorageHandler.class);
@SuppressWarnings("unchecked")
@Override
public Class<? extends InputFormat> getInputFormatClass() {
return DatasetInputFormat.class;
}
@Override
public Class<? extends OutputFormat> getOutputFormatClass() {
// Even if writes are not allowed, we must return an output format because it is used during table creation.
if (writesEnabled()) {
return DatasetOutputFormat.class;
} else {
return SequenceFileOutputFormat.class;
}
}
@Override
public Class<? extends SerDe> getSerDeClass() {
return DatasetSerDe.class;
}
@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
configureTableJobProperties(tableDesc, jobProperties);
}
@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
if (writesEnabled()) {
configureTableJobProperties(tableDesc, jobProperties);
} else {
throw new UnsupportedOperationException("Writing to datasets through Hive is not enabled.");
}
}
@Override
public void configureTableJobProperties(TableDesc tableDesc,
Map<String, String> jobProperties) {
// NOTE: the jobProperties map will be put in the jobConf passed to the DatasetOutputFormat/DatasetInputFormat.
// Hive ensures that the properties of the right table will be passed at the right time to those classes.
String datasetName = tableDesc.getProperties().getProperty(Constants.Explore.DATASET_NAME);
String namespce = tableDesc.getProperties().getProperty(Constants.Explore.DATASET_NAMESPACE);
jobProperties.put(Constants.Explore.DATASET_NAME, datasetName);
jobProperties.put(Constants.Explore.DATASET_NAMESPACE, namespce);
LOG.debug("Got dataset {} in namespace {} for external table {}", datasetName, namespce, tableDesc.getTableName());
}
private boolean writesEnabled() {
try {
CConfiguration cConf = ConfigurationUtil.get(getConf(), Constants.Explore.CCONF_KEY, CConfCodec.INSTANCE);
return cConf.getBoolean(Constants.Explore.WRITES_ENABLED);
} catch (IOException e) {
LOG.error("Unable to get CDAP Configuration to check if writes are enabled.", e);
throw Throwables.propagate(e);
}
}
}