/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.internal.app.runtime.artifact;
import co.cask.cdap.api.artifact.ApplicationClass;
import co.cask.cdap.api.artifact.ArtifactClasses;
import co.cask.cdap.api.artifact.ArtifactVersion;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.DatasetDefinition;
import co.cask.cdap.api.dataset.DatasetManagementException;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.api.dataset.table.ConflictDetection;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Scan;
import co.cask.cdap.api.dataset.table.Scanner;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.plugin.PluginClass;
import co.cask.cdap.common.ArtifactAlreadyExistsException;
import co.cask.cdap.common.ArtifactNotFoundException;
import co.cask.cdap.common.io.Locations;
import co.cask.cdap.common.namespace.NamespacedLocationFactory;
import co.cask.cdap.common.utils.ImmutablePair;
import co.cask.cdap.data2.datafabric.dataset.DatasetsUtil;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.data2.dataset2.tx.DatasetContext;
import co.cask.cdap.data2.dataset2.tx.Transactional;
import co.cask.cdap.internal.app.runtime.plugin.PluginNotExistsException;
import co.cask.cdap.internal.io.SchemaTypeAdapter;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.artifact.ArtifactRange;
import co.cask.cdap.proto.artifact.InvalidArtifactRangeException;
import co.cask.cdap.proto.id.Ids;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.tephra.TransactionConflictException;
import co.cask.tephra.TransactionExecutor;
import co.cask.tephra.TransactionExecutorFactory;
import co.cask.tephra.TransactionFailureException;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Splitter;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.ByteStreams;
import com.google.common.io.InputSupplier;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonParseException;
import com.google.gson.JsonPrimitive;
import com.google.gson.JsonSerializationContext;
import com.google.gson.JsonSerializer;
import com.google.inject.Inject;
import org.apache.twill.filesystem.Location;
import org.apache.twill.filesystem.LocationFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.reflect.Type;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
/**
* This class manages artifacts as well as metadata for each artifact. Artifacts and their metadata cannot be changed
* once they are written, with the exception of snapshot versions. An Artifact can contain
* plugin classes and/or application classes. We may want to extend this to include other types of classes, such
* as datasets.
*
* Every time an artifact is added, the artifact contents are stored at a base location based on its id:
* /namespaces/{namespace-id}/artifacts/{artifact-name}/{artifact-version}
* Several metadata writes are then performed.
*
* The first adds metadata about the artifact, with
* rowkey r:{namespace}:{artifact-name}, column {artifact-version}, and ArtifactData as the value
*
* The second adds metadata about any Application Class contained in the artifact, with
* rowkey a:{namespace}:{classname}, column {artifact-name}:{artifact-version}, and AppClass as the value
*
* The third adds metadata about any Plugin contained in the artifact, with
* rowkey p:{parent-namespace}:{parent-name}:{plugin-type}:{plugin-name},
* column {artifact-namespace}:{artifact-name}:{artifact-version},
* and PluginData as the value
*
* For example, suppose we add a system artifact etlbatch-3.1.0, which contains an ETLBatch application class.
* The meta table will look like:
*
* rowkey columns
* a:system:ETLBatch etlbatch:3.1.0 -> {AppData}
* r:system:etlbatch 3.1.0 -> {ArtifactData}
*
* After that, a system artifact etlbatch-lib-3.1.0 is added, which extends etlbatch and contains
* stream sink and table sink plugins. The meta table will look like:
*
* rowkey columns
* a:system:ETLBatch etlbatch:3.1.0 -> {AppData}
* p:system:etlbatch:sink:stream system:etlbatch-lib:3.1.0 -> {PluginData}
* p:system:etlbatch:sink:table system:etlbatch-lib:3.1.0 -> {PluginData}
* r:system:etlbatch 3.1.0 -> {ArtifactData}
* r:system:etlbatch-lib 3.1.0 -> {ArtifactData}
*
* Finally a user adds artifact custom-sources-1.0.0 to the default namespace,
* which extends etlbatch and contains a db source plugin. The meta table will look like:
*
* rowkey columns
* a:system:ETLBatch etlbatch:3.1.0 -> {AppData}
* p:system:etlbatch:sink:stream system:etlbatch-lib:3.1.0 -> {PluginData}
* p:system:etlbatch:sink:table system:etlbatch-lib:3.1.0 -> {PluginData}
* p:system:etlbatch:source:db default:custom-sources:1.0.0 -> {PluginData}
* r:default:custom-sources 1.0.0 -> {ArtifactData}
* r:system:etlbatch 3.1.0 -> {ArtifactData}
* r:system:etlbatch-lib 3.1.0 -> {ArtifactData}
*
* With this schema we can perform a scan to look up AppClasses, a scan to look up plugins that extend a specific
* artifact, and a scan to look up artifacts.
*/
public class ArtifactStore {
private static final String ARTIFACTS_PATH = "artifacts";
private static final String ARTIFACT_PREFIX = "r";
private static final String PLUGIN_PREFIX = "p";
private static final String APPCLASS_PREFIX = "a";
private static final Id.DatasetInstance META_ID = Id.DatasetInstance.from(Id.Namespace.SYSTEM, "artifact.meta");
private static final DatasetProperties META_PROPERTIES =
DatasetProperties.builder().add(Table.PROPERTY_CONFLICT_LEVEL, ConflictDetection.COLUMN.name()).build();
private final LocationFactory locationFactory;
private final NamespacedLocationFactory namespacedLocationFactory;
private final Transactional<DatasetContext<Table>, Table> metaTable;
private final Gson gson;
@Inject
ArtifactStore(final DatasetFramework datasetFramework,
NamespacedLocationFactory namespacedLocationFactory,
LocationFactory locationFactory,
TransactionExecutorFactory txExecutorFactory) {
this.locationFactory = locationFactory;
this.namespacedLocationFactory = namespacedLocationFactory;
this.gson = new GsonBuilder()
.registerTypeAdapter(Schema.class, new SchemaTypeAdapter())
.registerTypeAdapter(ArtifactRange.class, new ArtifactRangeCodec())
.create();
this.metaTable = Transactional.of(txExecutorFactory, new Supplier<DatasetContext<Table>>() {
@Override
public DatasetContext<Table> get() {
try {
return DatasetContext.of((Table) DatasetsUtil.getOrCreateDataset(
datasetFramework, META_ID, Table.class.getName(),
META_PROPERTIES, DatasetDefinition.NO_ARGUMENTS, null));
} catch (Exception e) {
// there's nothing much we can do here
throw Throwables.propagate(e);
}
}
});
}
/**
* Adds datasets and types to the given {@link DatasetFramework} used by artifact store.
*
* @param framework framework to add types and datasets to
*/
public static void setupDatasets(DatasetFramework framework) throws IOException, DatasetManagementException {
framework.addInstance(Table.class.getName(), META_ID, META_PROPERTIES);
}
/**
* Get information about all artifacts in the given namespace. If there are no artifacts in the namespace,
* this will return an empty list. Note that existence of the namespace is not checked.
*
* @param namespace the namespace to get artifact information about
* @return unmodifiable list of artifact info about every artifact in the given namespace
* @throws IOException if there was an exception reading the artifact information from the metastore
*/
public List<ArtifactDetail> getArtifacts(final NamespaceId namespace) throws IOException {
return metaTable.executeUnchecked(new TransactionExecutor.Function<DatasetContext<Table>, List<ArtifactDetail>>() {
@Override
public List<ArtifactDetail> apply(DatasetContext<Table> context) throws Exception {
List<ArtifactDetail> artifacts = Lists.newArrayList();
Scanner scanner = context.get().scan(scanArtifacts(namespace));
Row row;
while ((row = scanner.next()) != null) {
addArtifactsToList(artifacts, row);
}
scanner.close();
return Collections.unmodifiableList(artifacts);
}
});
}
/**
* Get all artifacts that match artifacts in the given ranges.
*
* @param range the range to match artifacts in
* @return an unmodifiable list of all artifacts that match the given ranges. If none exist, an empty list
* is returned
*/
public List<ArtifactDetail> getArtifacts(final ArtifactRange range) {
return metaTable.executeUnchecked(new TransactionExecutor.Function<DatasetContext<Table>, List<ArtifactDetail>>() {
@Override
public List<ArtifactDetail> apply(DatasetContext<Table> context) throws Exception {
List<ArtifactDetail> artifacts = Lists.newArrayList();
Table table = context.get();
ArtifactKey artifactKey = new ArtifactKey(range.getNamespace().toEntityId(), range.getName());
Row row = table.get(artifactKey.getRowKey());
for (Map.Entry<byte[], byte[]> columnEntry : row.getColumns().entrySet()) {
String version = Bytes.toString(columnEntry.getKey());
if (range.versionIsInRange(new ArtifactVersion(version))) {
ArtifactData data = gson.fromJson(Bytes.toString(columnEntry.getValue()), ArtifactData.class);
Id.Artifact artifactId = Id.Artifact.from(artifactKey.namespace.toId(), artifactKey.name, version);
artifacts.add(new ArtifactDetail(new ArtifactDescriptor(
artifactId.toArtifactId(), locationFactory.create(data.locationURI)), data.meta));
}
}
return Collections.unmodifiableList(artifacts);
}
});
}
/**
* Get information about all versions of the given artifact.
*
* @param namespace the namespace to get artifacts from
* @param artifactName the name of the artifact to get
* @return unmodifiable list of information about all versions of the given artifact
* @throws ArtifactNotFoundException if no version of the given artifact exists
* @throws IOException if there was an exception reading the artifact information from the metastore
*/
public List<ArtifactDetail> getArtifacts(final NamespaceId namespace, final String artifactName)
throws ArtifactNotFoundException, IOException {
List<ArtifactDetail> artifacts = metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, List<ArtifactDetail>>() {
@Override
public List<ArtifactDetail> apply(DatasetContext<Table> context) throws Exception {
List<ArtifactDetail> archives = Lists.newArrayList();
ArtifactKey artifactKey = new ArtifactKey(namespace, artifactName);
Row row = context.get().get(artifactKey.getRowKey());
if (!row.isEmpty()) {
addArtifactsToList(archives, row);
}
return archives;
}
});
if (artifacts.isEmpty()) {
throw new ArtifactNotFoundException(namespace.toId(), artifactName);
}
return Collections.unmodifiableList(artifacts);
}
/**
* Get information about the given artifact.
*
* @param artifactId the artifact to get
* @return information about the artifact
* @throws ArtifactNotFoundException if the given artifact does not exist
* @throws IOException if there was an exception reading the artifact information from the metastore
*/
public ArtifactDetail getArtifact(final Id.Artifact artifactId) throws ArtifactNotFoundException, IOException {
ArtifactData data = metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, ArtifactData>() {
@Override
public ArtifactData apply(DatasetContext<Table> context) throws Exception {
ArtifactCell artifactCell = new ArtifactCell(artifactId);
byte[] value = context.get().get(artifactCell.rowkey, artifactCell.column);
return value == null ? null : gson.fromJson(Bytes.toString(value), ArtifactData.class);
}
});
if (data == null) {
throw new ArtifactNotFoundException(artifactId);
}
return new ArtifactDetail(
new ArtifactDescriptor(artifactId.toArtifactId(), locationFactory.create(data.locationURI)),
data.meta);
}
/**
* Get all application classes that belong to the specified namespace.
* Results are returned as a sorted map from artifact to application classes in that artifact.
* Map entries are sorted by the artifact.
*
* @param namespace the namespace from which to get application classes
* @return an unmodifiable map of artifact to a list of all application classes in that artifact.
* The map will never be null. If there are no application classes, an empty map will be returned.
*/
public SortedMap<ArtifactDescriptor, List<ApplicationClass>> getApplicationClasses(final NamespaceId namespace) {
return metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, SortedMap<ArtifactDescriptor, List<ApplicationClass>>>() {
@Override
public SortedMap<ArtifactDescriptor, List<ApplicationClass>> apply(DatasetContext<Table> context) {
SortedMap<ArtifactDescriptor, List<ApplicationClass>> result = Maps.newTreeMap();
Scanner scanner = context.get().scan(scanAppClasses(namespace));
Row row;
while ((row = scanner.next()) != null) {
// columns are {artifact-name}:{artifact-version}. vals are serialized AppData
for (Map.Entry<byte[], byte[]> column : row.getColumns().entrySet()) {
ArtifactColumn artifactColumn = ArtifactColumn.parse(column.getKey());
AppData appData = gson.fromJson(Bytes.toString(column.getValue()), AppData.class);
ArtifactDescriptor artifactDescriptor = new ArtifactDescriptor(
artifactColumn.artifactId.toArtifactId(), locationFactory.create(appData.artifactLocationURI));
List<ApplicationClass> existingAppClasses = result.get(artifactDescriptor);
if (existingAppClasses == null) {
existingAppClasses = new ArrayList<>();
result.put(artifactDescriptor, existingAppClasses);
}
existingAppClasses.add(appData.appClass);
}
}
scanner.close();
return Collections.unmodifiableSortedMap(result);
}
});
}
/**
* Get all application classes that belong to the specified namespace of the specified classname.
* Results are returned as a sorted map from artifact to application classes in that artifact.
* Map entries are sorted by the artifact.
*
* @param namespace the namespace from which to get application classes
* @param className the classname of application classes to get
* @return an unmodifiable map of artifact the application classes in that artifact.
* The map will never be null. If there are no application classes, an empty map will be returned.
*/
public SortedMap<ArtifactDescriptor, ApplicationClass> getApplicationClasses(final NamespaceId namespace,
final String className) {
return metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, SortedMap<ArtifactDescriptor, ApplicationClass>>() {
@Override
public SortedMap<ArtifactDescriptor, ApplicationClass> apply(DatasetContext<Table> context) {
SortedMap<ArtifactDescriptor, ApplicationClass> result = Maps.newTreeMap();
Row row = context.get().get(new AppClassKey(namespace, className).getRowKey());
if (!row.isEmpty()) {
// columns are {artifact-name}:{artifact-version}. vals are serialized AppData
for (Map.Entry<byte[], byte[]> column : row.getColumns().entrySet()) {
ArtifactColumn artifactColumn = ArtifactColumn.parse(column.getKey());
AppData appData = gson.fromJson(Bytes.toString(column.getValue()), AppData.class);
ArtifactDescriptor artifactDescriptor = new ArtifactDescriptor(
artifactColumn.artifactId.toArtifactId(), locationFactory.create(appData.artifactLocationURI));
result.put(artifactDescriptor, appData.appClass);
}
}
return Collections.unmodifiableSortedMap(result);
}
});
}
/**
* Get all plugin classes that extend the given parent artifact.
* Results are returned as a sorted map from plugin artifact to plugins in that artifact.
* Map entries are sorted by the artifact id of the plugin.
*
* @param parentArtifactId the id of the artifact to find plugins for
* @return an unmodifiable map of plugin artifact to plugin classes for all plugin classes accessible by the given
* artifact. The map will never be null. If there are no plugin classes, an empty map will be returned.
* @throws ArtifactNotFoundException if the artifact to find plugins for does not exist
* @throws IOException if there was an exception reading metadata from the metastore
*/
public SortedMap<ArtifactDescriptor, Set<PluginClass>> getPluginClasses(final NamespaceId namespace,
final Id.Artifact parentArtifactId)
throws ArtifactNotFoundException, IOException {
SortedMap<ArtifactDescriptor, Set<PluginClass>> pluginClasses = metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, SortedMap<ArtifactDescriptor, Set<PluginClass>>>() {
@Override
public SortedMap<ArtifactDescriptor, Set<PluginClass>> apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
SortedMap<ArtifactDescriptor, Set<PluginClass>> result = getPluginsInArtifact(table, parentArtifactId);
if (result == null) {
return null;
}
// should be able to scan by column prefix as well... instead, we have to filter out by namespace
Scanner scanner = table.scan(scanPlugins(parentArtifactId));
Row row;
while ((row = scanner.next()) != null) {
addPluginsToMap(namespace, parentArtifactId, result, row);
}
scanner.close();
return Collections.unmodifiableSortedMap(result);
}
});
if (pluginClasses == null) {
throw new ArtifactNotFoundException(parentArtifactId);
}
return pluginClasses;
}
/**
* Get all plugin classes of the given type that extend the given parent artifact.
* Results are returned as a map from plugin artifact to plugins in that artifact.
*
* @param parentArtifactId the id of the artifact to find plugins for
* @param type the type of plugin to look for
* @return an unmodifiable map of plugin artifact to plugin classes for all plugin classes accessible by the
* given artifact. The map will never be null. If there are no plugin classes, an empty map will be returned.
* @throws ArtifactNotFoundException if the artifact to find plugins for does not exist
* @throws IOException if there was an exception reading metadata from the metastore
*/
public SortedMap<ArtifactDescriptor, Set<PluginClass>> getPluginClasses(final NamespaceId namespace,
final Id.Artifact parentArtifactId,
final String type)
throws IOException, ArtifactNotFoundException {
SortedMap<ArtifactDescriptor, Set<PluginClass>> pluginClasses = metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, SortedMap<ArtifactDescriptor, Set<PluginClass>>>() {
@Override
public SortedMap<ArtifactDescriptor, Set<PluginClass>> apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
SortedMap<ArtifactDescriptor, Set<PluginClass>> result =
getPluginsInArtifact(table, parentArtifactId, new Predicate<PluginClass>() {
@Override
public boolean apply(PluginClass input) {
return type.equals(input.getType());
}
});
if (result == null) {
return null;
}
Scanner scanner = table.scan(scanPlugins(parentArtifactId, type));
Row row;
while ((row = scanner.next()) != null) {
addPluginsToMap(namespace, parentArtifactId, result, row);
}
scanner.close();
return Collections.unmodifiableSortedMap(result);
}
});
if (pluginClasses == null) {
throw new ArtifactNotFoundException(parentArtifactId);
}
return pluginClasses;
}
/**
* Get all plugin classes of the given type and name that extend the given parent artifact.
* Results are returned as a map from plugin artifact to plugins in that artifact.
*
* @param parentArtifactId the id of the artifact to find plugins for
* @param type the type of plugin to look for
* @param name the name of the plugin to look for
* @return an unmodifiable map of plugin artifact to plugin classes of the given type and name, accessible by the
* given artifact. The map will never be null, and will never be empty.
* @throws PluginNotExistsException if no plugin with the given type and name exists in the namespace
* @throws IOException if there was an exception reading metadata from the metastore
*/
public SortedMap<ArtifactDescriptor, PluginClass> getPluginClasses(final NamespaceId namespace,
final Id.Artifact parentArtifactId,
final String type, final String name)
throws IOException, ArtifactNotFoundException, PluginNotExistsException {
SortedMap<ArtifactDescriptor, PluginClass> plugins = metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, SortedMap<ArtifactDescriptor, PluginClass>>() {
@Override
public SortedMap<ArtifactDescriptor, PluginClass> apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
SortedMap<ArtifactDescriptor, PluginClass> result = new TreeMap<>();
// check parent exists
ArtifactCell parentCell = new ArtifactCell(parentArtifactId);
byte[] parentDataBytes = table.get(parentCell.rowkey, parentCell.column);
if (parentDataBytes == null) {
return null;
}
// check if any plugins of that type and name exist in the parent artifact already
ArtifactData parentData = gson.fromJson(Bytes.toString(parentDataBytes), ArtifactData.class);
Set<PluginClass> parentPlugins = parentData.meta.getClasses().getPlugins();
for (PluginClass pluginClass : parentPlugins) {
if (pluginClass.getName().equals(name) && pluginClass.getType().equals(type)) {
ArtifactDescriptor parentDescriptor =
new ArtifactDescriptor(parentArtifactId.toArtifactId(), locationFactory.create(parentData.locationURI));
result.put(parentDescriptor, pluginClass);
break;
}
}
PluginKey pluginKey = new PluginKey(parentArtifactId.getNamespace(), parentArtifactId.getName(), type, name);
Row row = context.get().get(pluginKey.getRowKey());
if (!row.isEmpty()) {
// column is the artifact name and version, value is the serialized PluginClass
for (Map.Entry<byte[], byte[]> column : row.getColumns().entrySet()) {
ImmutablePair<ArtifactDescriptor, PluginClass> pluginEntry =
getPluginEntry(namespace, parentArtifactId, column);
if (pluginEntry != null) {
result.put(pluginEntry.getFirst(), pluginEntry.getSecond());
}
}
}
return result;
}
});
if (plugins == null) {
throw new ArtifactNotFoundException(parentArtifactId);
}
if (plugins.isEmpty()) {
throw new PluginNotExistsException(parentArtifactId.getNamespace(), type, name);
}
return Collections.unmodifiableSortedMap(plugins);
}
/**
* Update artifact properties using an update function. Functions will receive an immutable map.
*
* @param artifactId the id of the artifact to add
* @param updateFunction the function used to update existing properties
* @throws ArtifactNotFoundException if the artifact does not exist
* @throws IOException if there was an exception writing the properties to the metastore
*/
public void updateArtifactProperties(final Id.Artifact artifactId,
final Function<Map<String, String>, Map<String, String>> updateFunction)
throws ArtifactNotFoundException, IOException {
try {
boolean exists = metaTable.execute(new TransactionExecutor.Function<DatasetContext<Table>, Boolean>() {
@Override
public Boolean apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
ArtifactCell artifactCell = new ArtifactCell(artifactId);
byte[] existingMetaBytes = table.get(artifactCell.rowkey, artifactCell.column);
if (existingMetaBytes == null) {
return false;
}
ArtifactData old = gson.fromJson(Bytes.toString(existingMetaBytes), ArtifactData.class);
ArtifactMeta updatedMeta = new ArtifactMeta(old.meta.getClasses(), old.meta.getUsableBy(),
updateFunction.apply(old.meta.getProperties()));
ArtifactData updatedData = new ArtifactData(locationFactory.create(old.locationURI), updatedMeta);
// write artifact metadata
table.put(artifactCell.rowkey, artifactCell.column, Bytes.toBytes(gson.toJson(updatedData)));
return true;
}
});
if (!exists) {
throw new ArtifactNotFoundException(artifactId);
}
} catch (TransactionFailureException | InterruptedException e) {
throw new IOException(e);
}
}
/**
* Write the artifact and its metadata to the store. Once added, artifacts cannot be changed unless they are
* snapshot versions.
*
* @param artifactId the id of the artifact to add
* @param artifactMeta the metadata for the artifact
* @param artifactContentSupplier the supplier for the input stream of the contents of the artifact
* @return detail about the newly added artifact
* @throws WriteConflictException if the artifact is already currently being written
* @throws ArtifactAlreadyExistsException if a non-snapshot version of the artifact already exists
* @throws IOException if there was an exception persisting the artifact contents to the filesystem,
* of persisting the artifact metadata to the metastore
*/
public ArtifactDetail write(final Id.Artifact artifactId,
final ArtifactMeta artifactMeta,
final InputSupplier<? extends InputStream> artifactContentSupplier)
throws WriteConflictException, ArtifactAlreadyExistsException, IOException {
// if we're not a snapshot version, check that the artifact doesn't exist already.
final ArtifactCell artifactCell = new ArtifactCell(artifactId);
if (!artifactId.getVersion().isSnapshot()) {
byte[] existingMeta = metaTable.executeUnchecked(
new TransactionExecutor.Function<DatasetContext<Table>, byte[]>() {
@Override
public byte[] apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
return table.get(artifactCell.rowkey, artifactCell.column);
}
});
if (existingMeta != null) {
throw new ArtifactAlreadyExistsException(artifactId);
}
}
Location fileDirectory =
namespacedLocationFactory.get(artifactId.getNamespace(), ARTIFACTS_PATH).append(artifactId.getName());
Locations.mkdirsIfNotExists(fileDirectory);
// write the file contents
final Location destination = fileDirectory.append(artifactId.getVersion().getVersion()).getTempFile(".jar");
try (InputStream artifactContents = artifactContentSupplier.getInput();
OutputStream destinationStream = destination.getOutputStream()) {
ByteStreams.copy(artifactContents, destinationStream);
}
// now try and write the metadata for the artifact
try {
boolean written = metaTable.execute(new TransactionExecutor.Function<DatasetContext<Table>, Boolean>() {
@Override
public Boolean apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
// we have to check that the metadata doesn't exist again since somebody else may have written
// the artifact while we were copying the artifact to the filesystem.
byte[] existingMetaBytes = table.get(artifactCell.rowkey, artifactCell.column);
boolean isSnapshot = artifactId.getVersion().isSnapshot();
if (existingMetaBytes != null && !isSnapshot) {
// non-snapshot artifacts are immutable. If there is existing metadata, stop here.
return false;
}
ArtifactData data = new ArtifactData(destination, artifactMeta);
// cleanup existing metadata if it exists and this is a snapshot
// if we are overwriting a previous snapshot, need to clean up the old snapshot data
// this means cleaning up the old jar, and deleting plugin and app rows.
if (existingMetaBytes != null) {
deleteMeta(table, artifactId, existingMetaBytes);
}
// write artifact metadata
writeMeta(table, artifactId, data);
return true;
}
});
if (!written) {
throw new ArtifactAlreadyExistsException(artifactId);
}
} catch (TransactionConflictException e) {
destination.delete();
throw new WriteConflictException(artifactId);
} catch (TransactionFailureException | InterruptedException e) {
destination.delete();
throw new IOException(e);
}
return new ArtifactDetail(new ArtifactDescriptor(artifactId.toArtifactId(), destination), artifactMeta);
}
/**
* Delete the specified artifact. Programs that use the artifact will no longer be able to start.
*
* @param artifactId the id of the artifact to delete
* @throws IOException if there was an IO error deleting the metadata or the actual artifact
*/
public void delete(final Id.Artifact artifactId) throws IOException {
// delete everything in a transaction
metaTable.executeUnchecked(new TransactionExecutor.Function<DatasetContext<Table>, Void>() {
@Override
public Void apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
// first look up details to get plugins and apps in the artifact
ArtifactCell artifactCell = new ArtifactCell(artifactId);
byte[] detailBytes = table.get(artifactCell.rowkey, artifactCell.column);
if (detailBytes == null) {
// ok there is nothing to delete, we're done
return null;
}
deleteMeta(table, artifactId, detailBytes);
return null;
}
});
}
/**
* Clear all data in the given namespace. Used only in unit tests.
*
* @param namespace the namespace to delete data in
* @throws IOException if there was some problem deleting the data
*/
@VisibleForTesting
void clear(final NamespaceId namespace) throws IOException {
namespacedLocationFactory.get(namespace.toId(), ARTIFACTS_PATH).delete(true);
metaTable.executeUnchecked(new TransactionExecutor.Function<DatasetContext<Table>, Void>() {
@Override
public Void apply(DatasetContext<Table> context) throws Exception {
Table table = context.get();
// delete all rows about artifacts in the namespace
Scanner scanner = table.scan(scanArtifacts(namespace));
Row row;
while ((row = scanner.next()) != null) {
table.delete(row.getRow());
}
scanner.close();
// delete all rows about artifacts in the namespace and the plugins they have access to
Scan pluginsScan = new Scan(
Bytes.toBytes(String.format("%s:%s:", PLUGIN_PREFIX, namespace.getNamespace())),
Bytes.toBytes(String.format("%s:%s;", PLUGIN_PREFIX, namespace.getNamespace()))
);
scanner = table.scan(pluginsScan);
while ((row = scanner.next()) != null) {
table.delete(row.getRow());
}
scanner.close();
// delete app classes in this namespace
scanner = table.scan(scanAppClasses(namespace));
while ((row = scanner.next()) != null) {
table.delete(row.getRow());
}
scanner.close();
// delete plugins in this namespace from system artifacts
// for example, if there was an artifact in this namespace that extends a system artifact
Scan systemPluginsScan = new Scan(
Bytes.toBytes(String.format("%s:%s:", PLUGIN_PREFIX, Id.Namespace.SYSTEM.getId())),
Bytes.toBytes(String.format("%s:%s;", PLUGIN_PREFIX, Id.Namespace.SYSTEM.getId()))
);
scanner = table.scan(systemPluginsScan);
while ((row = scanner.next()) != null) {
for (Map.Entry<byte[], byte[]> columnVal : row.getColumns().entrySet()) {
// the column is the id of the artifact the plugin is from
ArtifactColumn column = ArtifactColumn.parse(columnVal.getKey());
// if the plugin artifact is in the namespace we're deleting, delete this column.
if (column.artifactId.getNamespace().equals(namespace)) {
table.delete(row.getRow(), column.getColumn());
}
}
}
scanner.close();
return null;
}
});
}
// write a new artifact snapshot and clean up the old snapshot data
private void writeMeta(Table table, Id.Artifact artifactId, ArtifactData data) throws IOException {
ArtifactCell artifactCell = new ArtifactCell(artifactId);
table.put(artifactCell.rowkey, artifactCell.column, Bytes.toBytes(gson.toJson(data)));
// column for plugin meta and app meta. {artifact-name}:{artifact-version}
// does not need to contain namespace because namespace is in the rowkey
byte[] artifactColumn = new ArtifactColumn(artifactId).getColumn();
ArtifactClasses classes = data.meta.getClasses();
Location artifactLocation = locationFactory.create(data.locationURI);
// write pluginClass metadata
for (PluginClass pluginClass : classes.getPlugins()) {
// write metadata for each artifact this plugin extends
for (ArtifactRange artifactRange : data.meta.getUsableBy()) {
// p:{namespace}:{type}:{name}
PluginKey pluginKey = new PluginKey(
artifactRange.getNamespace(), artifactRange.getName(), pluginClass.getType(), pluginClass.getName());
byte[] pluginDataBytes = Bytes.toBytes(
gson.toJson(new PluginData(pluginClass, artifactRange, artifactLocation)));
table.put(pluginKey.getRowKey(), artifactColumn, pluginDataBytes);
}
}
// write appClass metadata
for (ApplicationClass appClass : classes.getApps()) {
// a:{namespace}:{classname}
AppClassKey appClassKey = new AppClassKey(artifactId.getNamespace().toEntityId(), appClass.getClassName());
byte[] appDataBytes = Bytes.toBytes(gson.toJson(new AppData(appClass, artifactLocation)));
table.put(appClassKey.getRowKey(), artifactColumn, appDataBytes);
}
}
private void deleteMeta(Table table, Id.Artifact artifactId, byte[] oldData) throws IOException {
// delete old artifact data
ArtifactCell artifactCell = new ArtifactCell(artifactId);
table.delete(artifactCell.rowkey, artifactCell.column);
// delete old plugins
ArtifactData oldMeta = gson.fromJson(Bytes.toString(oldData), ArtifactData.class);
byte[] artifactColumn = new ArtifactColumn(artifactId).getColumn();
for (PluginClass pluginClass : oldMeta.meta.getClasses().getPlugins()) {
// delete metadata for each artifact this plugin extends
for (ArtifactRange artifactRange : oldMeta.meta.getUsableBy()) {
// p:{namespace}:{type}:{name}
PluginKey pluginKey = new PluginKey(
artifactRange.getNamespace(), artifactRange.getName(), pluginClass.getType(), pluginClass.getName());
table.delete(pluginKey.getRowKey(), artifactColumn);
}
}
// delete old appclass metadata
for (ApplicationClass appClass : oldMeta.meta.getClasses().getApps()) {
AppClassKey appClassKey = new AppClassKey(artifactId.getNamespace().toEntityId(), appClass.getClassName());
table.delete(appClassKey.getRowKey(), artifactColumn);
}
// delete the old jar file
locationFactory.create(oldMeta.locationURI).delete();
}
private SortedMap<ArtifactDescriptor, Set<PluginClass>> getPluginsInArtifact(Table table, Id.Artifact artifactId) {
return getPluginsInArtifact(table, artifactId, Predicates.<PluginClass>alwaysTrue());
}
private SortedMap<ArtifactDescriptor, Set<PluginClass>> getPluginsInArtifact(Table table, Id.Artifact artifactId,
Predicate<PluginClass> filter) {
SortedMap<ArtifactDescriptor, Set<PluginClass>> result = new TreeMap<>();
// Make sure the artifact exists
ArtifactCell parentCell = new ArtifactCell(artifactId);
byte[] parentDataBytes = table.get(parentCell.rowkey, parentCell.column);
if (parentDataBytes == null) {
return null;
}
// include any plugin classes that are inside the artifact itself
ArtifactData parentData = gson.fromJson(Bytes.toString(parentDataBytes), ArtifactData.class);
Set<PluginClass> parentPlugins = parentData.meta.getClasses().getPlugins();
Set<PluginClass> filteredPlugins = Sets.newLinkedHashSet(Iterables.filter(parentPlugins, filter));
if (!filteredPlugins.isEmpty()) {
Location parentLocation = locationFactory.create(parentData.locationURI);
ArtifactDescriptor descriptor = new ArtifactDescriptor(artifactId.toArtifactId(), parentLocation);
result.put(descriptor, filteredPlugins);
}
return result;
}
private void addArtifactsToList(List<ArtifactDetail> artifactDetails, Row row) throws IOException {
ArtifactKey artifactKey = ArtifactKey.parse(row.getRow());
for (Map.Entry<byte[], byte[]> columnVal : row.getColumns().entrySet()) {
String version = Bytes.toString(columnVal.getKey());
ArtifactData data = gson.fromJson(Bytes.toString(columnVal.getValue()), ArtifactData.class);
Id.Artifact artifactId = Id.Artifact.from(artifactKey.namespace.toId(), artifactKey.name, version);
artifactDetails.add(new ArtifactDetail(
new ArtifactDescriptor(artifactId.toArtifactId(), locationFactory.create(data.locationURI)),
data.meta));
}
}
// this method examines all plugins in the given row and checks if they extend the given parent artifact
// and are from an artifact in the given namespace.
// if so, information about the plugin artifact and the plugin details are added to the given map.
private void addPluginsToMap(NamespaceId namespace, Id.Artifact parentArtifactId,
SortedMap<ArtifactDescriptor, Set<PluginClass>> map,
Row row) throws IOException {
// column is the artifact namespace, name, and version. value is the serialized PluginData
for (Map.Entry<byte[], byte[]> column : row.getColumns().entrySet()) {
ImmutablePair<ArtifactDescriptor, PluginClass> pluginEntry = getPluginEntry(namespace, parentArtifactId, column);
if (pluginEntry != null) {
ArtifactDescriptor artifactDescriptor = pluginEntry.getFirst();
if (!map.containsKey(artifactDescriptor)) {
map.put(artifactDescriptor, Sets.<PluginClass>newHashSet());
}
map.get(artifactDescriptor).add(pluginEntry.getSecond());
}
}
}
/**
* Decode the PluginClass from the table column if it is from an artifact in the given namespace and
* extends the given parent artifact. If the plugin's artifact is not in the given namespace, or it does not
* extend the given parent artifact, return null.
*/
private ImmutablePair<ArtifactDescriptor, PluginClass> getPluginEntry(NamespaceId namespace,
Id.Artifact parentArtifactId,
Map.Entry<byte[], byte[]> column) {
// column is the artifact namespace, name, and version. value is the serialized PluginData
ArtifactColumn artifactColumn = ArtifactColumn.parse(column.getKey());
Id.Namespace artifactNamespace = artifactColumn.artifactId.getNamespace();
// filter out plugins whose artifacts are not in the system namespace and not in this namespace
if (!Id.Namespace.SYSTEM.equals(artifactNamespace) && !artifactNamespace.equals(namespace.toId())) {
return null;
}
PluginData pluginData = gson.fromJson(Bytes.toString(column.getValue()), PluginData.class);
// filter out plugins that don't extend this version of the parent artifact
if (pluginData.usableBy.versionIsInRange(parentArtifactId.getVersion())) {
ArtifactDescriptor artifactDescriptor = new ArtifactDescriptor(
artifactColumn.artifactId.toArtifactId(), locationFactory.create(pluginData.artifactLocationURI));
return ImmutablePair.of(artifactDescriptor, pluginData.pluginClass);
}
return null;
}
private Scan scanArtifacts(NamespaceId namespace) {
return new Scan(
Bytes.toBytes(String.format("%s:%s:", ARTIFACT_PREFIX, namespace.getNamespace())),
Bytes.toBytes(String.format("%s:%s;", ARTIFACT_PREFIX, namespace.getNamespace())));
}
private Scan scanPlugins(Id.Artifact parentArtifactId) {
return new Scan(
Bytes.toBytes(String.format("%s:%s:%s:",
PLUGIN_PREFIX, parentArtifactId.getNamespace().getId(), parentArtifactId.getName())),
Bytes.toBytes(String.format("%s:%s:%s;",
PLUGIN_PREFIX, parentArtifactId.getNamespace().getId(), parentArtifactId.getName())));
}
private Scan scanPlugins(Id.Artifact parentArtifactId, String type) {
return new Scan(
Bytes.toBytes(String.format("%s:%s:%s:%s:",
PLUGIN_PREFIX, parentArtifactId.getNamespace().getId(), parentArtifactId.getName(), type)),
Bytes.toBytes(String.format("%s:%s:%s:%s;",
PLUGIN_PREFIX, parentArtifactId.getNamespace().getId(), parentArtifactId.getName(), type)));
}
private Scan scanAppClasses(NamespaceId namespace) {
return new Scan(
Bytes.toBytes(String.format("%s:%s:", APPCLASS_PREFIX, namespace.getNamespace())),
Bytes.toBytes(String.format("%s:%s;", APPCLASS_PREFIX, namespace.getNamespace())));
}
private static class AppClassKey {
private final NamespaceId namespace;
private final String className;
public AppClassKey(NamespaceId namespace, String className) {
this.namespace = namespace;
this.className = className;
}
private byte[] getRowKey() {
return Bytes.toBytes(Joiner.on(':').join(APPCLASS_PREFIX, namespace.getNamespace(), className));
}
}
private static class PluginKey {
private final Id.Namespace parentArtifactNamespace;
private final String parentArtifactName;
private final String type;
private final String name;
private PluginKey(Id.Namespace parentArtifactNamespace, String parentArtifactName, String type, String name) {
this.parentArtifactNamespace = parentArtifactNamespace;
this.parentArtifactName = parentArtifactName;
this.type = type;
this.name = name;
}
// p:system:etlbatch:sink:table
private byte[] getRowKey() {
return Bytes.toBytes(
Joiner.on(':').join(PLUGIN_PREFIX, parentArtifactNamespace.getId(), parentArtifactName, type, name));
}
}
private static class ArtifactColumn {
private final Id.Artifact artifactId;
private ArtifactColumn(Id.Artifact artifactId) {
this.artifactId = artifactId;
}
private byte[] getColumn() {
return Bytes.toBytes(String.format("%s:%s:%s",
artifactId.getNamespace().getId(), artifactId.getName(), artifactId.getVersion().getVersion()));
}
private static ArtifactColumn parse(byte[] columnBytes) {
String columnStr = Bytes.toString(columnBytes);
Iterator<String> parts = Splitter.on(':').limit(3).split(columnStr).iterator();
Id.Namespace namespace = Id.Namespace.from(parts.next());
return new ArtifactColumn(Id.Artifact.from(namespace, parts.next(), parts.next()));
}
}
// utilities for creating and parsing row keys for artifacts. Keys are of the form 'r:{namespace}:{artifact-name}'
private static class ArtifactKey {
private final NamespaceId namespace;
private final String name;
private ArtifactKey(NamespaceId namespace, String name) {
this.namespace = namespace;
this.name = name;
}
private byte[] getRowKey() {
return Bytes.toBytes(Joiner.on(':').join(ARTIFACT_PREFIX, namespace.getNamespace(), name));
}
private static ArtifactKey parse(byte[] rowkey) {
String key = Bytes.toString(rowkey);
Iterator<String> parts = Splitter.on(':').limit(4).split(key).iterator();
// first part is the artifact prefix
parts.next();
// next is namespace, then name
return new ArtifactKey(Ids.namespace(parts.next()), parts.next());
}
}
private static class ArtifactCell {
private final byte[] rowkey;
private final byte[] column;
private ArtifactCell(Id.Artifact artifactId) {
rowkey = new ArtifactKey(artifactId.getNamespace().toEntityId(), artifactId.getName()).getRowKey();
column = Bytes.toBytes(artifactId.getVersion().getVersion());
}
}
// Data that will be stored for an artifact. Same as ArtifactDetail, expected without the id since that is redundant.
private static class ArtifactData {
private final URI locationURI;
private final ArtifactMeta meta;
public ArtifactData(Location location, ArtifactMeta meta) {
this.locationURI = location.toURI();
this.meta = meta;
}
}
// Data that will be stored for a plugin.
private static class PluginData {
private final PluginClass pluginClass;
private final ArtifactRange usableBy;
private final URI artifactLocationURI;
public PluginData(PluginClass pluginClass, ArtifactRange usableBy, Location artifactLocation) {
this.pluginClass = pluginClass;
this.usableBy = usableBy;
this.artifactLocationURI = artifactLocation.toURI();
}
}
// Data that will be stored for an application class.
private static class AppData {
private final ApplicationClass appClass;
private final URI artifactLocationURI;
public AppData(ApplicationClass appClass, Location artifactLocation) {
this.appClass = appClass;
this.artifactLocationURI = artifactLocation.toURI();
}
}
// serialize and deserialize artifact range
private static class ArtifactRangeCodec implements JsonDeserializer<ArtifactRange>, JsonSerializer<ArtifactRange> {
@Override
public ArtifactRange deserialize(JsonElement json, Type typeOfT,
JsonDeserializationContext context) throws JsonParseException {
try {
return ArtifactRange.parse(json.getAsString());
} catch (InvalidArtifactRangeException e) {
throw new JsonParseException(e);
}
}
@Override
public JsonElement serialize(ArtifactRange src, Type typeOfSrc, JsonSerializationContext context) {
return new JsonPrimitive(src.toString());
}
}
}