/* * Copyright © 2014-2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.datafabric.dataset.type; import co.cask.cdap.api.dataset.DatasetDefinition; import co.cask.cdap.api.dataset.DatasetManagementException; import co.cask.cdap.api.dataset.DatasetSpecification; import co.cask.cdap.api.dataset.module.DatasetDefinitionRegistry; import co.cask.cdap.api.dataset.module.DatasetModule; import co.cask.cdap.common.conf.CConfiguration; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.common.lang.ClassLoaders; import co.cask.cdap.common.lang.ProgramClassLoader; import co.cask.cdap.common.lang.jar.BundleJarUtil; import co.cask.cdap.common.utils.DirUtils; import co.cask.cdap.data2.datafabric.dataset.service.mds.DatasetTypeMDS; import co.cask.cdap.data2.datafabric.dataset.service.mds.MDSDatasets; import co.cask.cdap.data2.datafabric.dataset.service.mds.MDSDatasetsRegistry; import co.cask.cdap.data2.dataset2.InMemoryDatasetDefinitionRegistry; import co.cask.cdap.data2.dataset2.TypeConflictException; import co.cask.cdap.data2.dataset2.module.lib.DatasetModules; import co.cask.cdap.data2.dataset2.tx.TxCallable; import co.cask.cdap.proto.DatasetModuleMeta; import co.cask.cdap.proto.DatasetTypeMeta; import co.cask.cdap.proto.Id; import co.cask.tephra.TransactionFailureException; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.io.Files; import com.google.common.util.concurrent.AbstractIdleService; import com.google.inject.Inject; import com.google.inject.name.Named; import org.apache.twill.filesystem.Location; import org.apache.twill.filesystem.LocationFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.net.URI; import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.annotation.Nullable; /** * Manages dataset types and modules metadata */ public class DatasetTypeManager extends AbstractIdleService { private static final Logger LOG = LoggerFactory.getLogger(DatasetTypeManager.class); private final CConfiguration cConf; private final MDSDatasetsRegistry mdsDatasets; private final LocationFactory locationFactory; private final Map<String, DatasetModule> defaultModules; private final boolean allowDatasetUncheckedUpgrade; private final Map<String, DatasetModule> extensionModules; @Inject public DatasetTypeManager(CConfiguration cConf, MDSDatasetsRegistry mdsDatasets, LocationFactory locationFactory, @Named("defaultDatasetModules") Map<String, DatasetModule> defaultModules) { this.cConf = cConf; this.mdsDatasets = mdsDatasets; this.locationFactory = locationFactory; this.defaultModules = new LinkedHashMap<>(defaultModules); this.allowDatasetUncheckedUpgrade = cConf.getBoolean(Constants.Dataset.DATASET_UNCHECKED_UPGRADE); this.extensionModules = getExtensionModules(this.cConf); } @Override protected void startUp() throws Exception { deleteSystemModules(); deployDefaultModules(); if (!extensionModules.isEmpty()) { deployExtensionModules(); } } @Override protected void shutDown() throws Exception { // do nothing } private Map<String, DatasetModule> getExtensionModules(CConfiguration cConf) { Map<String, DatasetModule> modules = new LinkedHashMap<String, DatasetModule>(); String moduleStr = cConf.get(Constants.Dataset.Extensions.MODULES); if (moduleStr != null) { for (String moduleName : Splitter.on(',').omitEmptyStrings().split(moduleStr)) { // create DatasetModule object try { Class tableModuleClass = Class.forName(moduleName); DatasetModule module = (DatasetModule) tableModuleClass.newInstance(); modules.put(moduleName, module); } catch (ClassCastException | ClassNotFoundException | InstantiationException | IllegalAccessException ex) { LOG.error("Failed to add {} extension module: {}", moduleName, ex.toString()); } } } return modules; } /** * Add datasets module in a namespace * * @param datasetModuleId the {@link Id.DatasetModule} to add * @param className module class * @param jarLocation location of the module jar */ public void addModule(final Id.DatasetModule datasetModuleId, final String className, final Location jarLocation) throws DatasetModuleConflictException { LOG.debug("adding module: {}, className: {}, jarLocation: {}", datasetModuleId, className, jarLocation == null ? "[local]" : jarLocation); try { mdsDatasets.execute(new TxCallable<MDSDatasets, Void>() { @Override public Void call(MDSDatasets datasets) throws DatasetModuleConflictException { DatasetModuleMeta existing = datasets.getTypeMDS().getModule(datasetModuleId); if (existing != null && !allowDatasetUncheckedUpgrade) { String msg = String.format("cannot add module %s, module with the same name already exists: %s", datasetModuleId, existing); throw new DatasetModuleConflictException(msg); } DatasetModule module; File unpackedLocation = Files.createTempDir(); DependencyTrackingRegistry reg; try { // NOTE: if jarLocation is null, we assume that this is a system module, ie. always present in classpath ClassLoader cl = getClass().getClassLoader(); if (jarLocation != null) { BundleJarUtil.unJar(jarLocation, unpackedLocation); cl = ProgramClassLoader.create(cConf, unpackedLocation, getClass().getClassLoader()); } Class clazz = ClassLoaders.loadClass(className, cl, this); module = DatasetModules.getDatasetModule(clazz); reg = new DependencyTrackingRegistry(datasetModuleId.getNamespace(), datasets); module.register(reg); } catch (Exception e) { LOG.error("Could not instantiate instance of dataset module class {} for module {} using jarLocation {}", className, datasetModuleId, jarLocation); throw Throwables.propagate(e); } finally { try { DirUtils.deleteDirectoryContents(unpackedLocation); } catch (IOException e) { LOG.warn("Failed to delete directory {}", unpackedLocation, e); } } // NOTE: we use set to avoid duplicated dependencies // NOTE: we use LinkedHashSet to preserve order in which dependencies must be loaded Set<String> moduleDependencies = new LinkedHashSet<String>(); for (Id.DatasetType usedType : reg.getUsedTypes()) { DatasetModuleMeta usedModule = datasets.getTypeMDS().getModuleByType(usedType); Preconditions.checkState(usedModule != null, String.format("Found a null used module for type %s for while adding module %s", usedType, datasetModuleId)); // adding all used types and the module itself, in this very order to keep the order of loading modules // for instantiating a type moduleDependencies.addAll(usedModule.getUsesModules()); boolean added = moduleDependencies.add(usedModule.getName()); if (added) { // also adding this module as a dependent for all modules it uses usedModule.addUsedByModule(datasetModuleId.getId()); datasets.getTypeMDS().writeModule(usedType.getNamespace(), usedModule); } } URI jarURI = jarLocation == null ? null : jarLocation.toURI(); DatasetModuleMeta moduleMeta = new DatasetModuleMeta(datasetModuleId.getId(), className, jarURI, reg.getTypes(), Lists.newArrayList(moduleDependencies)); datasets.getTypeMDS().writeModule(datasetModuleId.getNamespace(), moduleMeta); return null; } }); } catch (TransactionFailureException e) { Throwable cause = e.getCause(); if (cause != null) { if (cause instanceof DatasetModuleConflictException) { throw (DatasetModuleConflictException) cause; } else if (cause instanceof TypeConflictException) { throw new DatasetModuleConflictException(cause); } } throw Throwables.propagate(e); } catch (Exception e) { LOG.error("Operation failed", e); throw Throwables.propagate(e); } } /** * * @param namespaceId the {@link Id.Namespace} to retrieve types from * @return collection of types available in the specified namespace */ public Collection<DatasetTypeMeta> getTypes(final Id.Namespace namespaceId) { return mdsDatasets.executeUnchecked(new TxCallable<MDSDatasets, Collection<DatasetTypeMeta>>() { @Override public Collection<DatasetTypeMeta> call(MDSDatasets datasets) throws DatasetModuleConflictException { return datasets.getTypeMDS().getTypes(namespaceId); } }); } /** * Get dataset type information * @param datasetTypeId name of the type to get info for * @return instance of {@link DatasetTypeMeta} or {@code null} if type * does NOT exist */ @Nullable public DatasetTypeMeta getTypeInfo(final Id.DatasetType datasetTypeId) { return mdsDatasets.executeUnchecked(new TxCallable<MDSDatasets, DatasetTypeMeta>() { @Override public DatasetTypeMeta call(MDSDatasets datasets) throws DatasetModuleConflictException { return datasets.getTypeMDS().getType(datasetTypeId); } }); } /** * @param namespaceId {@link Id.Namespace} to retrieve the module list from * @return list of dataset modules information from the specified namespace */ public Collection<DatasetModuleMeta> getModules(final Id.Namespace namespaceId) { return mdsDatasets.executeUnchecked(new TxCallable<MDSDatasets, Collection<DatasetModuleMeta>>() { @Override public Collection<DatasetModuleMeta> call(MDSDatasets datasets) throws Exception { return datasets.getTypeMDS().getModules(namespaceId); } }); } /** * @param datasetModuleId {@link Id.DatasetModule} of the module to return info for * @return dataset module info or {@code null} if module with given name does NOT exist */ @Nullable public DatasetModuleMeta getModule(final Id.DatasetModule datasetModuleId) { return mdsDatasets.executeUnchecked(new TxCallable<MDSDatasets, DatasetModuleMeta>() { @Override public DatasetModuleMeta call(MDSDatasets datasets) throws Exception { return datasets.getTypeMDS().getModule(datasetModuleId); } }); } /** * Deletes specified dataset module * @param datasetModuleId {@link Id.DatasetModule} of the dataset module to delete * @return true if deleted successfully, false if module didn't exist: nothing to delete * @throws DatasetModuleConflictException when there are other modules depend on the specified one, in which case * deletion does NOT happen */ public boolean deleteModule(final Id.DatasetModule datasetModuleId) throws DatasetModuleConflictException { LOG.info("Deleting module {}", datasetModuleId); try { return mdsDatasets.execute(new TxCallable<MDSDatasets, Boolean>() { @Override public Boolean call(MDSDatasets datasets) throws DatasetModuleConflictException, IOException { DatasetModuleMeta module = datasets.getTypeMDS().getModule(datasetModuleId); if (module == null) { return false; } // cannot delete when there's module that uses it if (module.getUsedByModules().size() > 0) { String msg = String.format("Cannot delete module %s: other modules depend on it. Delete them first", module); throw new DatasetModuleConflictException(msg); } Collection<DatasetSpecification> dependentInstances = datasets.getInstanceMDS().getByTypes(datasetModuleId.getNamespace(), ImmutableSet.copyOf(module.getTypes())); // cannot delete when there's instance that uses it if (dependentInstances.size() > 0) { String msg = String.format("Cannot delete module %s: other instances depend on it. Delete them first", module); throw new DatasetModuleConflictException(msg); } // remove it from "usedBy" from other modules for (String usedModuleName : module.getUsesModules()) { Id.DatasetModule usedModuleId = Id.DatasetModule.from(datasetModuleId.getNamespace(), usedModuleName); // not using getModuleWithFallback here because we want to know the namespace in which usedModule was found, // so we can overwrite it in the MDS in the appropriate namespace DatasetModuleMeta usedModule = datasets.getTypeMDS().getModule(usedModuleId); // if the usedModule is not found in the current namespace, try finding it in the system namespace if (usedModule == null) { usedModuleId = Id.DatasetModule.from(Id.Namespace.SYSTEM, usedModuleName); usedModule = datasets.getTypeMDS().getModule(usedModuleId); Preconditions.checkState(usedModule != null, "Could not find a module %s that the module %s uses.", usedModuleName, datasetModuleId.getId()); } usedModule.removeUsedByModule(datasetModuleId.getId()); datasets.getTypeMDS().writeModule(usedModuleId.getNamespace(), usedModule); } datasets.getTypeMDS().deleteModule(datasetModuleId); // Also delete module jar Location moduleJarLocation = locationFactory.create(module.getJarLocation()); if (!moduleJarLocation.delete()) { LOG.debug("Could not delete dataset module archive"); } return true; } }); } catch (TransactionFailureException e) { if (e.getCause() != null && e.getCause() instanceof DatasetModuleConflictException) { throw (DatasetModuleConflictException) e.getCause(); } throw Throwables.propagate(e); } catch (Exception e) { LOG.error("Operation failed", e); throw Throwables.propagate(e); } } /** * Deletes all modules in a namespace, other than system. * Presumes that the namespace has already been checked to be non-system. * * @param namespaceId the {@link Id.Namespace} to delete modules from. */ public void deleteModules(final Id.Namespace namespaceId) throws DatasetModuleConflictException { Preconditions.checkArgument(namespaceId != null && !Id.Namespace.SYSTEM.equals(namespaceId), "Cannot delete modules from system namespace"); LOG.warn("Deleting all modules from namespace {}", namespaceId); try { mdsDatasets.execute(new TxCallable<MDSDatasets, Void>() { @Override public Void call(MDSDatasets datasets) throws DatasetModuleConflictException, IOException { Set<String> typesToDelete = new HashSet<String>(); List<Location> moduleLocations = Lists.newArrayList(); for (DatasetModuleMeta module : datasets.getTypeMDS().getModules(namespaceId)) { typesToDelete.addAll(module.getTypes()); moduleLocations.add(locationFactory.create(module.getJarLocation())); } // check if there are any instances that use types of these modules? Collection<DatasetSpecification> dependentInstances = datasets.getInstanceMDS().getByTypes(namespaceId, typesToDelete); // cannot delete when there's instance that uses it if (dependentInstances.size() > 0) { throw new DatasetModuleConflictException( "Cannot delete all modules: existing dataset instances depend on it. Delete them first" ); } datasets.getTypeMDS().deleteModules(namespaceId); // Delete module locations for (Location moduleLocation : moduleLocations) { if (!moduleLocation.delete()) { LOG.debug("Could not delete dataset module archive - {}", moduleLocation); } } return null; } }); } catch (TransactionFailureException e) { if (e.getCause() != null && e.getCause() instanceof DatasetModuleConflictException) { throw (DatasetModuleConflictException) e.getCause(); } LOG.error("Failed to delete all modules from namespace {}", namespaceId); throw Throwables.propagate(e); } catch (Exception e) { LOG.error("Operation failed", e); throw Throwables.propagate(e); } } private void deployDefaultModules() { // adding default modules to be available in dataset manager service for (Map.Entry<String, DatasetModule> module : defaultModules.entrySet()) { try { // NOTE: we assume default modules are always in classpath, hence passing null for jar location // NOTE: we add default modules in the system namespace Id.DatasetModule defaultModule = Id.DatasetModule.from(Id.Namespace.SYSTEM, module.getKey()); addModule(defaultModule, module.getValue().getClass().getName(), null); } catch (DatasetModuleConflictException e) { // perfectly fine: we need to add default modules only the very first time service is started LOG.debug("Not adding {} module: it already exists", module.getKey()); } catch (Throwable th) { LOG.error("Failed to add {} module. Aborting.", module.getKey(), th); throw Throwables.propagate(th); } } } private void deployExtensionModules() { // adding any defined extension modules to be available in dataset manager service for (Map.Entry<String, DatasetModule> module : extensionModules.entrySet()) { try { // NOTE: we assume extension modules are always in classpath, hence passing null for jar location // NOTE: we add extension modules in the system namespace Id.DatasetModule theModule = Id.DatasetModule.from(Id.Namespace.SYSTEM, module.getKey()); addModule(theModule, module.getValue().getClass().getName(), null); } catch (DatasetModuleConflictException e) { // perfectly fine: we need to add the modules only the very first time service is started LOG.debug("Not adding {} extension module: it already exists", module.getKey()); } catch (Throwable th) { LOG.error("Failed to add {} extension module. Aborting.", module.getKey(), th); throw Throwables.propagate(th); } } } private void deleteSystemModules() throws DatasetManagementException, IOException, InterruptedException, TransactionFailureException { mdsDatasets.execute(new TxCallable<MDSDatasets, Void>() { @Override public Void call(MDSDatasets context) throws Exception { DatasetTypeMDS typeMDS = context.getTypeMDS(); Collection<DatasetModuleMeta> allDatasets = typeMDS.getModules(Id.Namespace.SYSTEM); for (DatasetModuleMeta ds : allDatasets) { if (ds.getJarLocation() == null) { LOG.debug("Deleting system dataset module: {}", ds.toString()); typeMDS.deleteModule(Id.DatasetModule.from(Id.Namespace.SYSTEM, ds.getName())); } } return null; } }); } private class DependencyTrackingRegistry implements DatasetDefinitionRegistry { private final MDSDatasets datasets; private final InMemoryDatasetDefinitionRegistry registry; private final Id.Namespace namespaceId; private final List<String> types = Lists.newArrayList(); private final LinkedHashSet<Id.DatasetType> usedTypes = new LinkedHashSet<Id.DatasetType>(); private DependencyTrackingRegistry(Id.Namespace namespaceId, MDSDatasets datasets) { this.namespaceId = namespaceId; this.datasets = datasets; this.registry = new InMemoryDatasetDefinitionRegistry(); } public List<String> getTypes() { return types; } public Set<Id.DatasetType> getUsedTypes() { return usedTypes; } public Id.Namespace getNamespaceId() { return namespaceId; } @Override public void add(DatasetDefinition def) { String typeName = def.getName(); Id.DatasetType typeId = Id.DatasetType.from(namespaceId, typeName); if (datasets.getTypeMDS().getType(typeId) != null && !allowDatasetUncheckedUpgrade) { String msg = "Cannot add dataset type: it already exists: " + typeName; throw new TypeConflictException(msg); } types.add(typeName); registry.add(def); } @Override public <T extends DatasetDefinition> T get(String datasetTypeName) { T def; // Find the typeMeta for the type from the right namespace Id.DatasetType datasetTypeId = Id.DatasetType.from(namespaceId, datasetTypeName); DatasetTypeMeta typeMeta = datasets.getTypeMDS().getType(datasetTypeId); if (typeMeta == null) { // not found in the user namespace. Try finding in the system namespace datasetTypeId = Id.DatasetType.from(Id.Namespace.SYSTEM, datasetTypeName); typeMeta = datasets.getTypeMDS().getType(datasetTypeId); if (typeMeta == null) { // not found in the user namespace as well as system namespace. Bail out. throw new IllegalArgumentException("Requested dataset type is not available: " + datasetTypeName); } } if (registry.hasType(datasetTypeName)) { def = registry.get(datasetTypeName); } else { try { def = new DatasetDefinitionLoader(cConf, locationFactory).load(typeMeta, registry); } catch (IOException e) { throw Throwables.propagate(e); } } // Here, datasetTypeId has the right namespace (either user or system) where the type was found. usedTypes.add(datasetTypeId); return def; } @Override public boolean hasType(String datasetTypeName) { return datasets.getTypeMDS().getType(Id.DatasetType.from(namespaceId, datasetTypeName)) != null; } } }