/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.datafabric.dataset; import co.cask.cdap.api.dataset.Dataset; import co.cask.cdap.api.dataset.DatasetContext; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.api.dataset.DatasetSpecification; import co.cask.cdap.api.dataset.module.DatasetDefinitionRegistry; import co.cask.cdap.api.dataset.module.DatasetModule; import co.cask.cdap.common.NotFoundException; import co.cask.cdap.common.lang.ClassLoaders; import co.cask.cdap.data2.datafabric.dataset.service.DatasetInstanceService; import co.cask.cdap.data2.datafabric.dataset.type.ConstantClassLoaderProvider; import co.cask.cdap.data2.datafabric.dataset.type.DatasetClassLoaderProvider; import co.cask.cdap.data2.dataset2.DatasetDefinitionRegistryFactory; import co.cask.cdap.data2.dataset2.module.lib.DatasetModules; import co.cask.cdap.proto.DatasetMeta; import co.cask.cdap.proto.DatasetModuleMeta; import co.cask.cdap.proto.DatasetTypeMeta; import co.cask.cdap.proto.Id; import com.google.common.base.Objects; import com.google.common.base.Throwables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.List; import java.util.Map; import javax.annotation.Nullable; /** * Provides {@link Dataset} instances using methods implemented by subclasses. * Used by {@link RemoteDatasetFramework} and {@link DatasetInstanceService}. * Use this when you want to control how dataset instances are created. For example, when * you want to obtain a {@link Dataset} instance without having to make remote calls. */ public abstract class AbstractDatasetProvider implements DatasetProvider { private static final Logger LOG = LoggerFactory.getLogger(AbstractDatasetProvider.class); private final DatasetDefinitionRegistryFactory registryFactory; protected AbstractDatasetProvider(DatasetDefinitionRegistryFactory registryFactory) { this.registryFactory = registryFactory; } /** * Gets the {@link DatasetMeta} for a dataset. * * @param instance the dataset * @return the {@link DatasetMeta} */ @Nullable public abstract DatasetMeta getMeta(Id.DatasetInstance instance) throws Exception; /** * Creates the dataset if it doesn't already exist. * * @param instance the dataset * @param type the type of dataset to create * @param creationProps creation properties */ public abstract void createIfNotExists(Id.DatasetInstance instance, String type, DatasetProperties creationProps) throws Exception; @Override public <T extends Dataset> T getOrCreate( Id.DatasetInstance instance, String type, DatasetProperties creationProps, @Nullable ClassLoader classLoader, @Nullable Map<String, String> arguments) throws Exception { try { return get(instance, classLoader, arguments); } catch (NotFoundException e) { // fall-through to create } createIfNotExists(instance, type, creationProps); return get(instance, classLoader, arguments); } @Override @SuppressWarnings("unchecked") public <T extends Dataset> T get( Id.DatasetInstance instance, @Nullable ClassLoader classLoader, @Nullable Map<String, String> arguments) throws Exception { ConstantClassLoaderProvider classLoaderProvider = new ConstantClassLoaderProvider(classLoader); DatasetMeta meta = getMeta(instance); if (meta == null) { throw new NotFoundException(instance); } DatasetType type = getType(meta.getType(), classLoader, classLoaderProvider); return (T) type.getDataset( DatasetContext.from(instance.getNamespaceId()), meta.getSpec(), arguments); } public <T extends Dataset> T get( Id.DatasetInstance instance, DatasetTypeMeta typeMeta, DatasetSpecification spec, DatasetClassLoaderProvider classLoaderProvider, @Nullable ClassLoader classLoader, @Nullable Map<String, String> arguments) throws IOException { classLoaderProvider = classLoaderProvider == null ? new ConstantClassLoaderProvider(classLoader) : classLoaderProvider; DatasetType type = getType(typeMeta, classLoader, classLoaderProvider); return (T) type.getDataset(DatasetContext.from(instance.getNamespaceId()), spec, arguments); } // can be used directly if DatasetTypeMeta is known, like in create dataset by dataset ops executor service /** * Return an instance of the {@link DatasetType} corresponding to given dataset modules. Uses the given * classloader as a parent for all dataset modules, and the given classloader provider to get classloaders for * each dataset module in given the dataset type meta. Order of dataset modules in the given * {@link DatasetTypeMeta} is important. The classloader for the first dataset module is used as the parent of * the second dataset module and so on until the last dataset module. The classloader for the last dataset module * is then used as the classloader for the returned {@link DatasetType}. * * @param implementationInfo the dataset type metadata to instantiate the type from * @param classLoader the parent classloader to use for dataset modules * @param classLoaderProvider the classloader provider to get classloaders for each dataset module * @param <T> the type of DatasetType * @return an instance of the DatasetType */ public <T extends DatasetType> T getType(DatasetTypeMeta implementationInfo, ClassLoader classLoader, DatasetClassLoaderProvider classLoaderProvider) { if (classLoader == null) { classLoader = Objects.firstNonNull(Thread.currentThread().getContextClassLoader(), getClass().getClassLoader()); } DatasetDefinitionRegistry registry = registryFactory.create(); List<DatasetModuleMeta> modulesToLoad = implementationInfo.getModules(); for (DatasetModuleMeta moduleMeta : modulesToLoad) { // adding dataset module jar to classloader try { classLoader = classLoaderProvider.get(moduleMeta, classLoader); } catch (IOException e) { LOG.error("Was not able to init classloader for module {} while trying to load type {}", moduleMeta, implementationInfo, e); throw Throwables.propagate(e); } Class<?> moduleClass; // try program class loader then cdap class loader try { ClassLoader currentClassLoader = ClassLoaders.setContextClassLoader(classLoader); try { moduleClass = classLoader.loadClass(moduleMeta.getClassName()); } finally { ClassLoaders.setContextClassLoader(currentClassLoader); } } catch (ClassNotFoundException e) { // Load it with the CDAP system class loader ClassLoader currentClassLoader = ClassLoaders.setContextClassLoader(getClass().getClassLoader()); try { moduleClass = getClass().getClassLoader().loadClass(moduleMeta.getClassName()); } catch (ClassNotFoundException e2) { e.addSuppressed(e2); LOG.error("Was not able to load dataset module class {} while trying to load type {}", moduleMeta.getClassName(), implementationInfo, e); throw Throwables.propagate(e); } finally { ClassLoaders.setContextClassLoader(currentClassLoader); } } try { DatasetModule module = DatasetModules.getDatasetModule(moduleClass); module.register(registry); } catch (Exception e) { LOG.error("Was not able to load dataset module class {} while trying to load type {}", moduleMeta.getClassName(), implementationInfo, e); throw Throwables.propagate(e); } } // contract of DatasetTypeMeta is that the last module returned by getModules() is the one // that announces the dataset's type. The classloader for the returned DatasetType must be the classloader // for that last module. return (T) new DatasetType(registry.get(implementationInfo.getName()), classLoader); } }