/*
* Copyright © 2015-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data2.datafabric.dataset.service;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.api.dataset.DatasetSpecification;
import co.cask.cdap.common.BadRequestException;
import co.cask.cdap.common.DatasetAlreadyExistsException;
import co.cask.cdap.common.DatasetNotFoundException;
import co.cask.cdap.common.DatasetTypeNotFoundException;
import co.cask.cdap.common.HandlerException;
import co.cask.cdap.common.NamespaceNotFoundException;
import co.cask.cdap.common.NotFoundException;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.data2.audit.AuditPublisher;
import co.cask.cdap.data2.audit.AuditPublishers;
import co.cask.cdap.data2.datafabric.dataset.AbstractDatasetProvider;
import co.cask.cdap.data2.datafabric.dataset.DatasetsUtil;
import co.cask.cdap.data2.datafabric.dataset.instance.DatasetInstanceManager;
import co.cask.cdap.data2.datafabric.dataset.service.executor.DatasetAdminOpResponse;
import co.cask.cdap.data2.datafabric.dataset.service.executor.DatasetOpExecutor;
import co.cask.cdap.data2.datafabric.dataset.type.DatasetTypeManager;
import co.cask.cdap.data2.dataset2.DatasetDefinitionRegistryFactory;
import co.cask.cdap.data2.registry.UsageRegistry;
import co.cask.cdap.explore.client.ExploreFacade;
import co.cask.cdap.proto.DatasetInstanceConfiguration;
import co.cask.cdap.proto.DatasetMeta;
import co.cask.cdap.proto.DatasetTypeMeta;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.audit.AuditPayload;
import co.cask.cdap.proto.audit.AuditType;
import co.cask.cdap.store.NamespaceStore;
import co.cask.tephra.TransactionExecutorFactory;
import com.google.common.collect.ImmutableList;
import com.google.inject.Inject;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
/**
* Handles dataset instance management calls.
*/
public class DatasetInstanceService {
private static final Logger LOG = LoggerFactory.getLogger(DatasetInstanceService.class);
private final DatasetTypeManager implManager;
private final DatasetInstanceManager instanceManager;
private final DatasetOpExecutor opExecutorClient;
private final ExploreFacade exploreFacade;
private final boolean allowDatasetUncheckedUpgrade;
private final UsageRegistry usageRegistry;
private final NamespaceStore nsStore;
private AuditPublisher auditPublisher;
@Inject
public DatasetInstanceService(DatasetTypeManager implManager, DatasetInstanceManager instanceManager,
DatasetOpExecutor opExecutorClient, ExploreFacade exploreFacade, CConfiguration conf,
TransactionExecutorFactory txFactory,
DatasetDefinitionRegistryFactory registryFactory,
NamespaceStore nsStore) {
this.opExecutorClient = opExecutorClient;
this.implManager = implManager;
this.instanceManager = instanceManager;
this.exploreFacade = exploreFacade;
this.usageRegistry = new UsageRegistry(txFactory, new AbstractDatasetProvider(registryFactory) {
@Override
public DatasetMeta getMeta(Id.DatasetInstance instance) throws Exception {
return DatasetInstanceService.this.get(instance, ImmutableList.<Id>of());
}
@Override
public void createIfNotExists(Id.DatasetInstance instance, String type,
DatasetProperties creationProps) throws Exception {
DatasetInstanceService.this.createIfNotExists(
instance.getNamespace(), instance.getId(),
new DatasetInstanceConfiguration(type, creationProps.getProperties(), creationProps.getDescription()));
}
});
this.nsStore = nsStore;
this.allowDatasetUncheckedUpgrade = conf.getBoolean(Constants.Dataset.DATASET_UNCHECKED_UPGRADE);
}
@SuppressWarnings("unused")
@Inject(optional = true)
public void setAuditPublisher(AuditPublisher auditPublisher) {
this.auditPublisher = auditPublisher;
}
/**
* Lists all dataset instances in a namespace.
*
* @param namespace the namespace to list datasets for
* @return the dataset instances in the provided namespace
* @throws NotFoundException if the namespace was not found
* @throws IOException if there is a problem in making an HTTP request to check if the namespace exists.
*/
public Collection<DatasetSpecification> list(Id.Namespace namespace) throws Exception {
// Throws NamespaceNotFoundException if the namespace does not exist
ensureNamespaceExists(namespace);
return instanceManager.getAll(namespace);
}
/**
* Gets a dataset instance.
*
* @param instance instance to get
* @param owners the {@link Id}s that will be using the dataset instance
* @return the dataset instance's {@link DatasetMeta}
* @throws NotFoundException if either the namespace or dataset instance is not found,
* @throws IOException if there is a problem in making an HTTP request to check if the namespace exists.
*/
public DatasetMeta get(Id.DatasetInstance instance, List<? extends Id> owners) throws Exception {
// TODO: CDAP-3901 add back namespace existence check
DatasetSpecification spec = instanceManager.get(instance);
if (spec == null) {
throw new NotFoundException(instance);
}
spec = DatasetsUtil.fixOriginalProperties(spec);
Id.DatasetType datasetTypeId = Id.DatasetType.from(instance.getNamespace(), spec.getType());
DatasetTypeMeta typeMeta = getTypeInfo(instance.getNamespace(), spec.getType());
if (typeMeta == null) {
// TODO: This shouldn't happen unless CDAP is in an invalid state - maybe give different error
throw new NotFoundException(datasetTypeId);
}
registerUsage(instance, owners);
return new DatasetMeta(spec, typeMeta, null);
}
/**
* Return the original properties of a dataset instance, that is, the properties with which the dataset was
* created or last reconfigured.
* @param instance the id of the dataset
* @return The original properties as stored in the dataset's spec, or if they are not available, a best effort
* to derive the original properties from the top-level properties of the spec.
*/
public Map<String, String> getOriginalProperties(Id.DatasetInstance instance) throws Exception {
DatasetSpecification spec = instanceManager.get(instance);
if (spec == null) {
throw new NotFoundException(instance);
}
return DatasetsUtil.fixOriginalProperties(spec).getOriginalProperties();
}
private void registerUsage(Id.DatasetInstance instance, List<? extends Id> owners) {
for (Id owner : owners) {
try {
if (owner instanceof Id.Program) {
usageRegistry.register((Id.Program) owner, instance);
}
} catch (Exception e) {
LOG.warn("Failed to register usage of {} -> {}", owner, instance);
}
}
}
/**
* Creates a dataset instance.
*
* @param namespaceId the namespace to create the dataset instance in
* @param name the name of the new dataset instance
* @param props the properties for the new dataset instance
* @throws NamespaceNotFoundException if the specified namespace was not found
* @throws DatasetAlreadyExistsException if a dataset with the same name already exists
* @throws DatasetTypeNotFoundException if the dataset type was not found
* @throws Exception if something went wrong
*/
public void create(String namespaceId, String name, DatasetInstanceConfiguration props) throws Exception {
// Throws NamespaceNotFoundException if the namespace does not exist
Id.Namespace namespace = ConversionHelpers.toNamespaceId(namespaceId);
ensureNamespaceExists(namespace);
Id.DatasetInstance newInstance = ConversionHelpers.toDatasetInstanceId(namespaceId, name);
DatasetSpecification existing = instanceManager.get(newInstance);
if (existing != null && !allowDatasetUncheckedUpgrade) {
throw new DatasetAlreadyExistsException(newInstance);
}
DatasetTypeMeta typeMeta = getTypeInfo(namespace, props.getTypeName());
if (typeMeta == null) {
// Type not found in the instance's namespace and the system namespace. Bail out.
throw new DatasetTypeNotFoundException(ConversionHelpers.toDatasetTypeId(namespace, props.getTypeName()));
}
LOG.info("Creating dataset {}.{}, type name: {}, properties: {}",
namespaceId, name, props.getTypeName(), props.getProperties());
// Disable explore if the table already existed
if (existing != null) {
disableExplore(newInstance);
}
// Note how we execute configure() via opExecutorClient (outside of ds service) to isolate running user code
DatasetSpecification spec = opExecutorClient.create(newInstance, typeMeta,
DatasetProperties.builder()
.addAll(props.getProperties())
.setDescription(props.getDescription())
.build(),
false);
instanceManager.add(namespace, spec);
publishAudit(newInstance, AuditType.CREATE);
// Enable explore
enableExplore(newInstance, props);
}
private void createIfNotExists(Id.Namespace namespace, String name,
DatasetInstanceConfiguration props) throws Exception {
try {
create(namespace.getId(), name, props);
} catch (DatasetAlreadyExistsException e) {
// ignore
}
}
/**
* Updates an existing Dataset specification properties.
* {@link DatasetInstanceConfiguration} is constructed based on request and the Dataset instance is updated.
*
* @param instance the dataset instance
* @param properties the dataset properties to be used
* @throws NamespaceNotFoundException if the specified namespace was not found
* @throws DatasetNotFoundException if the dataset was not found
* @throws DatasetTypeNotFoundException if the type of the existing dataset was not found
*/
public void update(Id.DatasetInstance instance, Map<String, String> properties) throws Exception {
// Throws NamespaceNotFoundException if the namespace does not exist
ensureNamespaceExists(instance.getNamespace());
DatasetSpecification existing = instanceManager.get(instance);
if (existing == null) {
throw new DatasetNotFoundException(instance);
}
LOG.info("Update dataset {}, properties: {}", instance.getId(), ConversionHelpers.toJson(properties));
disableExplore(instance);
DatasetTypeMeta typeMeta = getTypeInfo(instance.getNamespace(), existing.getType());
if (typeMeta == null) {
// Type not found in the instance's namespace and the system namespace. Bail out.
throw new DatasetTypeNotFoundException(
ConversionHelpers.toDatasetTypeId(instance.getNamespace(), existing.getType()));
}
// Note how we execute configure() via opExecutorClient (outside of ds service) to isolate running user code
DatasetSpecification spec = opExecutorClient.create(instance, typeMeta,
DatasetProperties.builder()
.addAll(properties)
.build(),
true);
instanceManager.add(instance.getNamespace(), spec);
DatasetInstanceConfiguration creationProperties =
new DatasetInstanceConfiguration(existing.getType(), properties, null);
enableExplore(instance, creationProperties);
//caling admin upgrade, after updating specification
// Note: audit information for upgrade is published in executeAdmin() method. Since executeAdmin() method can
// be called directly too.
executeAdmin(instance, "upgrade");
}
/**
* Drops the specified dataset instance.
*
* @param instance the {@link Id.DatasetInstance} to drop
* @throws NamespaceNotFoundException if the namespace was not found
* @throws DatasetNotFoundException if the dataset instance was not found
* @throws IOException if there was a problem in checking if the namespace exists over HTTP
*/
public void drop(Id.DatasetInstance instance) throws Exception {
// Throws NamespaceNotFoundException if the namespace does not exist
ensureNamespaceExists(instance.getNamespace());
DatasetSpecification spec = instanceManager.get(instance);
if (spec == null) {
throw new DatasetNotFoundException(instance);
}
LOG.info("Deleting dataset {}.{}", instance.getNamespaceId(), instance.getId());
dropDataset(instance, spec);
publishAudit(instance, AuditType.DELETE);
}
/**
* Executes an admin operation on a dataset instance.
*
* @param instance the instance to execute the admin operation on
* @param method the type of admin operation to execute
* @return the {@link DatasetAdminOpResponse} from the HTTP handler
* @throws NamespaceNotFoundException if the requested namespace was not found
* @throws IOException if there was a problem in checking if the namespace exists over HTTP
*/
public DatasetAdminOpResponse executeAdmin(Id.DatasetInstance instance, String method) throws Exception {
// Throws NamespaceNotFoundException if the namespace does not exist
ensureNamespaceExists(instance.getNamespace());
Object result = null;
// NOTE: one cannot directly call create and drop, instead this should be called thru
// POST/DELETE @ /data/datasets/{instance-id}. Because we must create/drop metadata for these at same time
switch (method) {
case "exists":
result = opExecutorClient.exists(instance);
break;
case "truncate":
opExecutorClient.truncate(instance);
publishAudit(instance, AuditType.TRUNCATE);
break;
case "upgrade":
opExecutorClient.upgrade(instance);
publishAudit(instance, AuditType.UPDATE);
break;
default:
throw new HandlerException(HttpResponseStatus.NOT_FOUND, "Invalid admin operation: " + method);
}
return new DatasetAdminOpResponse(result, null);
}
/**
* Finds the {@link DatasetTypeMeta} for the specified dataset type name.
* Search order - first in the specified namespace, then in the 'system' namespace from defaultModules
*
* @param namespaceId {@link Id.Namespace} for the specified namespace
* @param typeName the name of the dataset type to search
* @return {@link DatasetTypeMeta} for the type if found in either the specified namespace or in the system namespace,
* null otherwise.
* TODO: This may need to move to a util class eventually
*/
@Nullable
private DatasetTypeMeta getTypeInfo(Id.Namespace namespaceId, String typeName) throws BadRequestException {
Id.DatasetType datasetTypeId = ConversionHelpers.toDatasetTypeId(namespaceId, typeName);
DatasetTypeMeta typeMeta = implManager.getTypeInfo(datasetTypeId);
if (typeMeta == null) {
// Type not found in the instance's namespace. Now try finding it in the system namespace
Id.DatasetType systemDatasetTypeId = ConversionHelpers.toDatasetTypeId(Id.Namespace.SYSTEM, typeName);
typeMeta = implManager.getTypeInfo(systemDatasetTypeId);
}
return typeMeta;
}
/**
* Drops a dataset.
*
* @param spec specification of dataset to be dropped.
* @throws Exception on error.
*/
private void dropDataset(Id.DatasetInstance instance, DatasetSpecification spec) throws Exception {
disableExplore(instance);
if (!instanceManager.delete(instance)) {
throw new DatasetNotFoundException(instance);
}
DatasetTypeMeta typeMeta = getTypeInfo(instance.getNamespace(), spec.getType());
if (typeMeta == null) {
throw new DatasetNotFoundException(instance);
}
opExecutorClient.drop(instance, typeMeta, spec);
}
private void disableExplore(Id.DatasetInstance datasetInstance) {
// Disable ad-hoc exploration of dataset
// Note: today explore enable is not transactional with dataset create - CDAP-8
try {
exploreFacade.disableExploreDataset(datasetInstance);
} catch (Exception e) {
String msg = String.format("Cannot disable exploration of dataset instance %s: %s",
datasetInstance, e.getMessage());
LOG.error(msg, e);
// TODO: at this time we want to still allow using dataset even if it cannot be used for exploration
//responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, msg);
//return;
}
}
private void enableExplore(Id.DatasetInstance datasetInstance, DatasetInstanceConfiguration creationProperties) {
// Enable ad-hoc exploration of dataset
// Note: today explore enable is not transactional with dataset create - CDAP-8
try {
exploreFacade.enableExploreDataset(datasetInstance);
} catch (Exception e) {
String msg = String.format("Cannot enable exploration of dataset instance %s of type %s: %s",
datasetInstance, creationProperties.getProperties(), e.getMessage());
LOG.error(msg, e);
// TODO: at this time we want to still allow using dataset even if it cannot be used for exploration
//responder.sendString(HttpResponseStatus.INTERNAL_SERVER_ERROR, msg);
//return;
}
}
/**
* Throws an exception if the specified namespace is not the system namespace and does not exist
*/
private void ensureNamespaceExists(Id.Namespace namespace) throws Exception {
if (!Id.Namespace.SYSTEM.equals(namespace)) {
if (nsStore.get(namespace) == null) {
throw new NamespaceNotFoundException(namespace);
}
}
}
private void publishAudit(Id.DatasetInstance datasetInstance, AuditType auditType) {
// TODO: Add properties to Audit Payload (CDAP-5220)
AuditPublishers.publishAudit(auditPublisher, datasetInstance, auditType, AuditPayload.EMPTY_PAYLOAD);
}
}