/*
*
* Copyright 2013 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.netflix.nicobar.cassandra;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Future;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Iterables;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.model.Column;
import com.netflix.astyanax.model.ColumnList;
import com.netflix.astyanax.model.Row;
import com.netflix.astyanax.model.Rows;
import com.netflix.nicobar.core.archive.JarScriptArchive;
import com.netflix.nicobar.core.archive.ModuleId;
import com.netflix.nicobar.core.archive.ScriptArchive;
import com.netflix.nicobar.core.archive.ScriptModuleSpec;
import com.netflix.nicobar.core.persistence.ArchiveRepository;
import com.netflix.nicobar.core.persistence.ArchiveSummary;
import com.netflix.nicobar.core.persistence.RepositorySummary;
import com.netflix.nicobar.core.persistence.RepositoryView;
/**
* Data access object of {@link ScriptArchive}s stored in Cassandra.
* This implementation is based on the Astyanax and requires CQL 3 support to be enabled.
* <p>
* The query algorithm attempts to divide up read operations such that they won't overwhelm Cassandra
* if many instances are using this implementation to poll for updates.
* Upon insertion, all archives are assigned a shard number calculated as {@code (moduleId.hashCode() % shardNum)}.
* The shard number is subsequently inserted into a column for which a secondary index has been defined.
* RepositoryView poller methods will first search each shard for any rows with an update timestamp greater than
* the last poll time, and if any are found, the contents of those archives are loaded in small batches.
*
*
*<pre>
* Default Schema:
*
* CREATE TABLE script_repo (
* module_id varchar,
* module_name varchar,
* module_version varchar,
* shard_num int,
* last_update timestamp,
* module_spec varchar,
* archive_content_hash blob,
* archive_content blob,
* PRIMARY KEY (module_id)
* );
*
* CREATE INDEX script_repo_shard_num_index on script_repo (shard_num);
* </pre>
*
* See {@link CassandraArchiveRepositoryConfig} to override the default table name.
* @author James Kojo
* @author Vasanth Asokan
*/
public class CassandraArchiveRepository implements ArchiveRepository {
private final static Logger logger = LoggerFactory.getLogger(CassandraArchiveRepository.class);
/** column names */
public static enum Columns {
module_id,
module_name,
module_version,
shard_num,
last_update,
module_spec,
archive_content_hash,
archive_content;
}
protected final RepositoryView defaultView;
private final CassandraArchiveRepositoryConfig config;
private final CassandraGateway cassandra;
/**
* Construct a instance of the repository with the given configuration
* @param config
*/
public CassandraArchiveRepository(CassandraArchiveRepositoryConfig config) {
this.config = Objects.requireNonNull(config, "config");
this.cassandra = this.config.getCassandraGateway();
defaultView = new DefaultView();
}
/**
* Construct a instance of the repository with the given configuration
* @param config
*/
public CassandraArchiveRepository(CassandraArchiveRepositoryConfig config, RepositoryView defaultView) {
this.config = Objects.requireNonNull(config, "config");
this.cassandra = this.config.getCassandraGateway();
this.defaultView = defaultView;
}
@Override
public String getRepositoryId() {
return getConfig().getRepositoryId();
}
/**
* The default view reports all archives inserted into this repository.
* @return the default view into all archives.
*/
@Override
public RepositoryView getDefaultView() {
return defaultView;
}
/**
* No named views supported by this repository!
* Throws UnsupportedOperationException.
*/
@Override
public RepositoryView getView(String view) {
throw new UnsupportedOperationException();
}
/**
* insert a Jar into the script archive
*/
@Override
public void insertArchive(JarScriptArchive jarScriptArchive) throws IOException {
Objects.requireNonNull(jarScriptArchive, "jarScriptArchive");
ScriptModuleSpec moduleSpec = jarScriptArchive.getModuleSpec();
ModuleId moduleId = moduleSpec.getModuleId();
Path jarFilePath;
try {
jarFilePath = Paths.get(jarScriptArchive.getRootUrl().toURI());
} catch (URISyntaxException e) {
throw new IOException(e);
}
int shardNum = calculateShardNum(moduleId);
byte[] jarBytes = Files.readAllBytes(jarFilePath);
byte[] hash = calculateHash(jarBytes);
Map<String, Object> columns = new HashMap<String, Object>();
columns.put(Columns.module_id.name(), moduleId.toString());
columns.put(Columns.module_name.name(), moduleId.getName());
columns.put(Columns.module_version.name(), moduleId.getVersion());
columns.put(Columns.shard_num.name(), shardNum);
columns.put(Columns.last_update.name(), jarScriptArchive.getCreateTime());
columns.put(Columns.archive_content_hash.name(), hash);
columns.put(Columns.archive_content.name(), jarBytes);
String serialized = getConfig().getModuleSpecSerializer().serialize(moduleSpec);
columns.put(Columns.module_spec.name(), serialized);
try {
cassandra.upsert(moduleId.toString(), columns);
} catch (Exception e) {
throw new IOException(e);
}
}
/**
* Unsupported.
*/
@Override
public void insertArchive(JarScriptArchive jarScriptArchive, Map<String, Object> initialDeploySpecs)
throws IOException {
throw new UnsupportedOperationException("This repository does not support deployment specs.");
}
/**
* Get all of the {@link ScriptArchive}s for the given set of moduleIds. Will perform the operation in batches
* as specified by {@link CassandraArchiveRepositoryConfig#getArchiveFetchBatchSize()} and outputs the jar files in
* the path specified by {@link CassandraArchiveRepositoryConfig#getArchiveOutputDirectory()}.
*
* @param moduleIds keys to search for
* @return set of ScriptArchives retrieved from the database
*/
@Override
public Set<ScriptArchive> getScriptArchives(Set<ModuleId> moduleIds) throws IOException {
Set<ScriptArchive> archives = new LinkedHashSet<ScriptArchive>(moduleIds.size()*2);
Path archiveOuputDir = getConfig().getArchiveOutputDirectory();
List<ModuleId> moduleIdList = new LinkedList<ModuleId>(moduleIds);
int batchSize = getConfig().getArchiveFetchBatchSize();
int start = 0;
try {
while (start < moduleIdList.size()) {
int end = Math.min(moduleIdList.size(), start + batchSize);
List<ModuleId> batchModuleIds = moduleIdList.subList(start, end);
List<String> rowKeys = new ArrayList<String>(batchModuleIds.size());
for (ModuleId batchModuleId:batchModuleIds) {
rowKeys.add(batchModuleId.toString());
}
Rows<String, String> rows = cassandra.getRows(rowKeys.toArray(new String[0]));
for (Row<String, String> row : rows) {
String moduleId = row.getKey();
ColumnList<String> columns = row.getColumns();
Column<String> lastUpdateColumn = columns.getColumnByName(Columns.last_update.name());
Column<String> hashColumn = columns.getColumnByName(Columns.archive_content_hash.name());
Column<String> contentColumn = columns.getColumnByName(Columns.archive_content.name());
if (lastUpdateColumn == null || hashColumn == null || contentColumn == null) {
continue;
}
ScriptModuleSpec moduleSpec = getModuleSpec(columns);
long lastUpdateTime = lastUpdateColumn.getLongValue();
byte[] hash = hashColumn.getByteArrayValue();
byte[] content = contentColumn.getByteArrayValue();
// verify the hash
if (hash != null && hash.length > 0 && !verifyHash(hash, content)) {
logger.warn("Content hash validation failed for moduleId {}. size: {}", moduleId, content.length);
continue;
}
String fileName = new StringBuilder().append(moduleId).append("-").append(lastUpdateTime).append(".jar").toString();
Path jarFile = archiveOuputDir.resolve(fileName);
Files.write(jarFile, content);
JarScriptArchive scriptArchive = new JarScriptArchive.Builder(jarFile)
.setModuleSpec(moduleSpec)
.setCreateTime(lastUpdateTime)
.build();
archives.add(scriptArchive);
}
start = end;
}
} catch (Exception e) {
throw new IOException(e);
}
return archives;
}
/**
* Delete an archive by ID
* @param moduleId module id to delete
* @throws IOException
*/
@Override
public void deleteArchive(ModuleId moduleId) throws IOException {
Objects.requireNonNull(moduleId, "moduleId");
cassandra.deleteRow(moduleId.toString());
}
/**
* Get all of the rows in in the table. Attempts to reduce the load on cassandra by splitting up the query into smaller sub-queries
* @param columns which columns to select
* @return result rows
*/
protected Iterable<Row<String, String>> getRows(EnumSet<?> columns) throws Exception {
int shardCount = config.getShardCount();
List<Future<Rows<String, String>>> futures = new ArrayList<Future<Rows<String, String>>>();
for (int i = 0; i < shardCount; i++) {
futures.add(cassandra.selectAsync(generateSelectByShardCql(columns, i)));
}
List<Row<String, String>> rows = new LinkedList<Row<String, String>>();
for (Future<Rows<String, String>> f: futures) {
Rows<String, String> shardRows = f.get();
Iterables.addAll(rows, shardRows);
}
return rows;
}
/**
* Generate the CQL to select specific columns by shard number.
* <pre>
* SELECT ${columns}... FROM script_repo WHERE shard_num = ?
* </pre>
*/
protected String generateSelectByShardCql(EnumSet<?> columns, Integer shardNum) {
StringBuilder sb = new StringBuilder()
.append("SELECT ");
boolean first = true;
for (Enum<?> column : columns) {
if (first) {
first = false;
} else {
sb.append(",");
}
sb.append(column.name());
}
sb.append("\n")
.append("FROM ").append(cassandra.getColumnFamily())
.append("\n").append("WHERE ").append(Columns.shard_num.name())
.append(" = ").append(shardNum).append("\n");
return sb.toString();
}
protected boolean verifyHash(byte[] expectedHashCode, byte[] content) {
byte[] hashCode = calculateHash(content);
return Arrays.equals(expectedHashCode, hashCode);
}
protected byte[] calculateHash(byte[] content) {
MessageDigest digester;
try {
digester = MessageDigest.getInstance("SHA-1");
} catch (NoSuchAlgorithmException e) {
// should never happen
return null;
}
byte[] hashCode = digester.digest(content);
return hashCode;
}
protected int calculateShardNum(ModuleId moduleId) {
return Math.abs(moduleId.hashCode() % getConfig().getShardCount());
}
private ScriptModuleSpec getModuleSpec(ColumnList<String> columns) {
ScriptModuleSpec moduleSpec = null;
if (columns != null) {
Column<String> moduleSpecColumn = columns.getColumnByName(Columns.module_spec.name());
if (moduleSpecColumn != null && moduleSpecColumn.hasValue()) {
String moduleSpecString = moduleSpecColumn.getStringValue();
moduleSpec = getConfig().getModuleSpecSerializer().deserialize(moduleSpecString);
}
}
return moduleSpec;
}
/**
* @return configuration settings for this repository
*/
public CassandraArchiveRepositoryConfig getConfig() {
return config;
}
protected class DefaultView implements RepositoryView {
@Override
public String getName() {
return "Default View";
}
/**
* Get the last update times of all of the script archives managed by this Repository.
* @return map of moduleId to last update time
*/
@Override
public Map<ModuleId, Long> getArchiveUpdateTimes() throws IOException {
Iterable<Row<String, String>> rows;
try {
rows = getRows((EnumSet<?>)EnumSet.of(Columns.module_id, Columns.last_update));
} catch (Exception e) {
throw new IOException(e);
}
Map<ModuleId, Long> updateTimes = new LinkedHashMap<ModuleId, Long>();
for (Row<String, String> row : rows) {
String moduleId = row.getKey();
Column<String> lastUpdateColumn = row.getColumns().getColumnByName(Columns.last_update.name());
Long updateTime = lastUpdateColumn != null ? lastUpdateColumn.getLongValue() : null;
if (StringUtils.isNotBlank(moduleId) && updateTime != null) {
updateTimes.put(ModuleId.fromString(moduleId), updateTime);
}
}
return updateTimes;
}
@Override
public RepositorySummary getRepositorySummary() throws IOException {
Map<ModuleId, Long> updateTimes = getArchiveUpdateTimes();
int archiveCount = updateTimes.size();
long maxUpdateTime = 0;
for (Long updateTime : updateTimes.values()) {
if (updateTime > maxUpdateTime) {
maxUpdateTime = updateTime;
}
}
String description = String.format("Cassandra Keyspace: %s Column Family: %s",
cassandra.getKeyspace().getKeyspaceName(), cassandra.getColumnFamily());
RepositorySummary repositorySummary = new RepositorySummary(getRepositoryId(),
description, archiveCount, maxUpdateTime);
return repositorySummary;
}
/**
* Get a summary of all archives in this Repository
* @return List of summaries
*/
@Override
public List<ArchiveSummary> getArchiveSummaries() throws IOException {
List<ArchiveSummary> summaries = new LinkedList<ArchiveSummary>();
Iterable<Row<String, String>> rows;
try {
rows = getRows((EnumSet<?>)EnumSet.of(Columns.module_id, Columns.last_update, Columns.module_spec));
} catch (Exception e) {
throw new IOException(e);
}
for (Row<String, String> row : rows) {
String moduleId = row.getKey();
ColumnList<String> columns = row.getColumns();
Column<String> lastUpdateColumn = columns.getColumnByName(Columns.last_update.name());
long updateTime = lastUpdateColumn != null ? lastUpdateColumn.getLongValue() : 0;
ScriptModuleSpec moduleSpec = getModuleSpec(columns);
ArchiveSummary summary = new ArchiveSummary(ModuleId.fromString(moduleId), moduleSpec, updateTime, null);
summaries.add(summary);
}
return summaries;
}
}
}