/*
* Copyright (c) 2013-2015 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.db.server.impl;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import com.netflix.astyanax.AstyanaxContext;
import com.netflix.astyanax.CassandraOperationType;
import com.netflix.astyanax.Cluster;
import com.netflix.astyanax.KeyspaceTracerFactory;
import com.netflix.astyanax.connectionpool.ConnectionContext;
import com.netflix.astyanax.connectionpool.ConnectionPool;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.connectionpool.exceptions.OperationException;
import com.netflix.astyanax.ddl.ColumnFamilyDefinition;
import com.netflix.astyanax.ddl.KeyspaceDefinition;
import com.netflix.astyanax.model.ColumnFamily;
import com.netflix.astyanax.shallows.EmptyKeyspaceTracerFactory;
import com.netflix.astyanax.thrift.AbstractOperationImpl;
import com.netflix.astyanax.thrift.ddl.ThriftColumnFamilyDefinitionImpl;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.gms.Gossiper;
import org.apache.cassandra.locator.IEndpointSnitch;
import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.CfDef;
import org.apache.cassandra.thrift.KsDef;
import org.apache.commons.lang.StringUtils;
import org.apache.curator.framework.recipes.locks.InterProcessLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import com.emc.storageos.coordinator.client.model.Constants;
import com.emc.storageos.coordinator.client.model.MigrationStatus;
import com.emc.storageos.coordinator.client.model.Site;
import com.emc.storageos.coordinator.client.model.SiteInfo;
import com.emc.storageos.coordinator.client.model.SiteState;
import com.emc.storageos.coordinator.client.service.CoordinatorClient;
import com.emc.storageos.coordinator.client.service.DrUtil;
import com.emc.storageos.coordinator.common.Configuration;
import com.emc.storageos.coordinator.common.Service;
import com.emc.storageos.coordinator.common.impl.ConfigurationImpl;
import com.emc.storageos.db.client.DbClient;
import com.emc.storageos.db.client.URIUtil;
import com.emc.storageos.db.client.constraint.AlternateIdConstraint;
import com.emc.storageos.db.client.constraint.ContainmentConstraint;
import com.emc.storageos.db.client.constraint.URIQueryResultList;
import com.emc.storageos.db.client.model.LongMap;
import com.emc.storageos.db.client.model.NamedURI;
import com.emc.storageos.db.client.model.PasswordHistory;
import com.emc.storageos.db.client.model.StringMap;
import com.emc.storageos.db.client.model.TenantOrg;
import com.emc.storageos.db.client.model.VdcVersion;
import com.emc.storageos.db.client.model.VirtualDataCenter;
import com.emc.storageos.db.client.impl.ClassNameTimeSeriesSerializer;
import com.emc.storageos.db.client.impl.CompositeColumnNameSerializer;
import com.emc.storageos.db.client.impl.DbClientImpl;
import com.emc.storageos.db.client.impl.IndexColumnNameSerializer;
import com.emc.storageos.db.client.impl.TimeSeriesColumnNameSerializer;
import com.emc.storageos.db.client.impl.DbClientContext;
import com.emc.storageos.db.client.impl.TimeSeriesType;
import com.emc.storageos.db.client.impl.TypeMap;
import com.emc.storageos.db.common.DataObjectScanner;
import com.emc.storageos.db.common.DbConfigConstants;
import com.emc.storageos.db.common.DbSchemaInterceptorImpl;
import com.emc.storageos.db.common.DbServiceStatusChecker;
import com.emc.storageos.db.common.VdcUtil;
import com.emc.storageos.db.exceptions.DatabaseException;
import com.emc.storageos.security.password.PasswordUtils;
/**
* Utility class for initializing DB schema from model classes
*/
public class SchemaUtil {
private static final Logger _log = LoggerFactory.getLogger(SchemaUtil.class);
private static final String COMPARATOR_PACKAGE = "org.apache.cassandra.db.marshal.";
private static final String DB_BOOTSTRAP_LOCK = "dbbootstrap";
private static final String VDC_NODE_PREFIX = "node";
private static final String GEODB_BOOTSTRAP_LOCK = "geodbbootstrap";
private static final String STORAGE_SYSTEM_TYPE_INIT_LOCK = "storagesystemtypeinitlock";
private static final int DEFAULT_REPLICATION_FACTOR = 1;
private static final int MAX_REPLICATION_FACTOR = 5;
private static final int DBINIT_RETRY_INTERVAL = 5;
// waiting 5 mins to init schema
private static final int DBINIT_RETRY_MAX = 60;
public static final String PERCENTILE = "99.0PERCENTILE";
private String _clusterName = DbClientContext.LOCAL_CLUSTER_NAME;
private String _keyspaceName = DbClientContext.LOCAL_KEYSPACE_NAME;
private CoordinatorClient _coordinator;
private Service _service;
private DataObjectScanner _doScanner;
private DbServiceStatusChecker _statusChecker;
private String _vdcShortId;
private StringMap _vdcHosts;
private String _vdcEndpoint;
private List<String> _vdcList; // List of all joined vdc
private Properties _dbCommonInfo;
private PasswordUtils _passwordUtils;
private DbClientContext clientContext;
private boolean onStandby = false;
private DrUtil drUtil;
private Boolean backCompatPreYoda = false;
@Autowired
private DbRebuildRunnable dbRebuildRunnable;
public void setClientContext(DbClientContext clientContext) {
this.clientContext = clientContext;
}
/**
* Set service info
*
* @param service
*/
public void setService(Service service) {
_service = service;
}
/**
* Set coordinator client
*
* @param coordinator
*/
public void setCoordinator(CoordinatorClient coordinator) {
_coordinator = coordinator;
}
/**
* Return true if current ViPR is standby mode
*
* @return
*/
public boolean isStandby() {
return onStandby;
}
/**
* Set DataObjectScanner
*
* @param scanner
*/
public void setDataObjectScanner(DataObjectScanner scanner) {
_doScanner = scanner;
}
@Autowired
public void setStatusChecker(DbServiceStatusChecker statusChecker) {
_statusChecker = statusChecker;
}
/**
* Set keyspace name
*
* @param keyspaceName
*/
public void setKeyspaceName(String keyspaceName) {
_keyspaceName = keyspaceName;
}
public String getKeyspaceName() {
return _keyspaceName;
}
/**
* Set cluster name
*
* @param clusterName
*/
public void setClusterName(String clusterName) {
_clusterName = clusterName;
}
/**
* Set the vdc id of current site. Must have for geodbsvc
*
* @param vdcId the vdc id of current site
*/
public void setVdcShortId(String vdcId) {
_vdcShortId = vdcId;
}
/**
* Set the endpoint of current vdc, for example, vip
*
* @param vdcEndpoint vdc end point
*/
public void setVdcEndpoint(String vdcEndpoint) {
_vdcEndpoint = vdcEndpoint;
}
public void setDbCommonInfo(Properties dbCommonInfo) {
_dbCommonInfo = dbCommonInfo;
}
public void setPasswordUtils(PasswordUtils passwordUtils) {
_passwordUtils = passwordUtils;
}
/**
* Set node list in current vdc.
*
* @param nodelist vdc host list
*/
public void setVdcNodeList(List<String> nodelist) {
if (_vdcHosts == null) {
_vdcHosts = new StringMap();
} else {
_vdcHosts.clear();
}
for (int i = 0; i < nodelist.size(); i++) {
int nodeIndex = i + 1;
String nodeId = VDC_NODE_PREFIX + nodeIndex;
// TODO: support both ipv4 and ipv6 later
_vdcHosts.put(nodeId, nodelist.get(i));
}
}
/**
* Set all vdc id list.
*
* @param vdcList vdc id list
*/
public void setVdcList(List<String> vdcList) {
_vdcList = vdcList;
}
public List<String> getVdcList() {
return _vdcList;
}
public Map<String, ColumnFamily> getCfMap() {
return isGeoDbsvc() ? _doScanner.getGeoCfMap() : _doScanner.getCfMap();
}
public void setBackCompatPreYoda(Boolean backCompatPreYoda) {
this.backCompatPreYoda = backCompatPreYoda;
}
/**
* Check if it is geodbsvc
*
* @return
*/
protected boolean isGeoDbsvc() {
return _service.getName().equalsIgnoreCase(Constants.GEODBSVC_NAME);
}
/**
* Initializes database. Assumes that caller is serializing this call
* across cluster.
*
* @param waitForSchema - indicate we should wait from schema from other site.
* false to create keyspace by our own
*/
public void scanAndSetupDb(boolean waitForSchema) {
int retryIntervalSecs = DBINIT_RETRY_INTERVAL;
int retryTimes = 0;
while (true) {
retryTimes++;
try {
KeyspaceDefinition kd = clientContext.getCluster().describeKeyspace(_keyspaceName);
boolean inited = false;
if (onStandby) {
inited = checkAndInitSchemaOnStandby(kd);
} else {
inited = checkAndInitSchemaOnActive(kd, waitForSchema);
}
if (inited) {
return;
}
} catch (ConnectionException e) {
_log.warn("Unable to verify DB keyspace, will retry in {} secs", retryIntervalSecs, e);
} catch (InterruptedException e) {
_log.warn("DB keyspace verification interrupted, will retry in {} secs", retryIntervalSecs, e);
} catch (IllegalStateException e) {
_log.warn("IllegalStateException: ", e);
throw e;
}
if (retryTimes > DBINIT_RETRY_MAX) {
throw new IllegalStateException("Unable to setup schema");
}
try {
Thread.sleep(retryIntervalSecs * 1000);
} catch (InterruptedException ex) {
_log.warn("Thread is interrupted during wait for retry", ex);
}
}
}
private boolean checkAndInitSchemaOnActive(KeyspaceDefinition kd, boolean waitForSchema) throws InterruptedException,
ConnectionException {
_log.info("try scan and setup db ...");
if (kd == null) {
_log.info("keyspace not exist yet");
if (waitForSchema) {
_log.info("wait for schema from other site");
} else {
// fresh install
_log.info("setting current version to {} in zk for fresh install", _service.getVersion());
setCurrentVersion(_service.getVersion());
// this must be a new cluster - no schema is present so we create keyspace first
Map<String, String> strategyOptions = new HashMap<String, String>() {
{
put(_vdcShortId, Integer.toString(getReplicationFactor()));
}
};
clientContext.setCassandraStrategyOptions(strategyOptions, true);
}
} else {
_log.info("keyspace exist already");
checkStrategyOptions();
}
// create CF's
if (kd != null) {
String currentDbSchemaVersion = _coordinator.getCurrentDbSchemaVersion();
String targetVersion = _service.getVersion();
// A known Cassandra behaviour is that schema changes cannot converge if Cassandra nodes arenot in the same
// version(MessagingService.currentVersion). As the result checkCf() will fail with schema disagreement errors. So
// - During upgrade, we scan and create new column families before db migration starts(see MigrationHandlerImpl.run.
// All cassandra nodes has been upgraded to same version at that time
// - For each dbsvc startup, we run checkCf only when we are sure it is not in the middle of upgrade.
_log.info("Current db schema version {}", currentDbSchemaVersion);
if (StringUtils.isEmpty(currentDbSchemaVersion) || StringUtils.equals(currentDbSchemaVersion, targetVersion)) {
checkCf();
_log.info("scan and setup db schema succeed");
}
return true;
}
return false;
}
private boolean checkAndInitSchemaOnStandby(KeyspaceDefinition kd) throws ConnectionException {
_log.info("try scan and setup db on standby site ...");
if (kd == null) {
_log.info("keyspace not exist yet. Wait {} seconds for schema from active site", DBINIT_RETRY_INTERVAL);
return false;
} else {
_log.info("keyspace exist already");
String currentDbSchemaVersion = _coordinator.getCurrentDbSchemaVersion();
if (currentDbSchemaVersion == null) {
_log.info("set current version for standby site {}", _service.getVersion());
setCurrentVersion(_service.getVersion());
}
Site currentSite = drUtil.getLocalSite();
if (SiteState.STANDBY_SYNCING.equals(currentSite.getState())) {
// Ensure schema agreement before checking the strategy options,
// since the strategy options from the local site might be older than the active site
// and shouldn't be relied on any more.
while (clientContext.ensureSchemaAgreement()) {
// If there are unreachable nodes, wait until there is at least
// one reachable node from the other site (which contains the latest db schema).
if (getReachableDcCount() > 1) {
break;
}
}
}
checkStrategyOptions();
return true;
}
}
private int getReachableDcCount() {
Set<String> dcNames = new HashSet<>();
IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch();
Set<InetAddress> liveNodes = Gossiper.instance.getLiveMembers();
for (InetAddress nodeIp : liveNodes) {
dcNames.add(snitch.getDatacenter(nodeIp));
}
_log.info("Number of reachable data centers: {}", dcNames.size());
return dcNames.size();
}
public void checkDataRevision(String localDataRevision) {
Site currentSite = drUtil.getLocalSite();
SiteState siteState = currentSite.getState();
if (siteState == SiteState.STANDBY_ADDING || siteState == SiteState.STANDBY_RESUMING || siteState == SiteState.STANDBY_SYNCING) {
SiteInfo targetSiteInfo = _coordinator.getTargetInfo(_coordinator.getSiteId(), SiteInfo.class);
String targetDataRevision = targetSiteInfo.getTargetDataRevision();
_log.info("Target data revision {}", targetDataRevision);
if (localDataRevision.equals(targetDataRevision)) {
if (siteState != SiteState.STANDBY_SYNCING) {
_log.info("Change site state to SYNCING and rebuild data from active site");
currentSite.setLastState(siteState);
currentSite.setState(SiteState.STANDBY_SYNCING);
_coordinator.persistServiceConfiguration(currentSite.toConfiguration());
}
dbRebuildRunnable.run();
} else {
_log.info("Incompatible data revision - local {} target {}. Skip data rebuild", localDataRevision, targetDataRevision);
}
}
}
/**
* Remove paused sites from db/geodb strategy options on the active site.
*
* @param strategyOptions
* @return true to indicate keyspace strategy option is changed
*/
private boolean checkStrategyOptionsForDROnActive(Map<String, String> strategyOptions) {
boolean changed = false;
// iterate through all the sites and exclude the paused ones
for (Site site : drUtil.listSites()) {
String dcId = drUtil.getCassandraDcId(site);
if (site.getState().equals(SiteState.STANDBY_PAUSED) && strategyOptions.containsKey(dcId)) {
_log.info("Remove dc {} from strategy options", dcId);
strategyOptions.remove(dcId);
changed = true;
}
}
return changed;
}
/**
* Put to be added or resumed standby site into the db/geodb strategy options on each new standby site
*
* @param strategyOptions
* @return true to indicate keyspace strategy option is changed
*/
private boolean checkStrategyOptionsForDROnStandby(Map<String, String> strategyOptions) {
// no need to add new site on active site, since dbsvc/geodbsvc are not restarted
String dcId = drUtil.getCassandraDcId(drUtil.getLocalSite());
if (strategyOptions.containsKey(dcId)) {
return false;
}
Site localSite = drUtil.getLocalSite();
if (localSite.getState().equals(SiteState.STANDBY_PAUSED) ||
localSite.getState().equals(SiteState.STANDBY_DEGRADED) ||
localSite.getState().equals(SiteState.STANDBY_DEGRADING)) {
// don't add back the paused site
_log.info("local standby site has been paused/degraded and removed from strategy options. Do nothing");
return false;
}
_log.info("Add {} to strategy options", dcId);
strategyOptions.put(dcId, Integer.toString(getReplicationFactor()));
// If we upgrade from pre-yoda versions, the strategy option does not contains active site.
// we do it once during first add-standby operation on standby site
Site activeSite = drUtil.getActiveSite();
String activeSiteDcId = drUtil.getCassandraDcId(activeSite);
if (!strategyOptions.containsKey(activeSiteDcId)) {
_log.info("Add {} to strategy options", activeSiteDcId);
strategyOptions.put(activeSiteDcId, Integer.toString(activeSite.getNodeCount()));
if (strategyOptions.containsKey("replication_factor")) {
strategyOptions.remove("replication_factor");
}
}
return true;
}
/**
* Add new VDC into the geodb strategy options
*
* @param strategyOptions
* @return true to indicate keyspace strategy option is changed
*/
private boolean checkStrategyOptionsForGeo(Map<String, String> strategyOptions) {
if (onStandby) {
_log.info("Only active site updates geo strategy operation. Do nothing on standby site");
return false;
}
if (!isGeoDbsvc()) {
// update local db strategy option in multivdc configuration only
if (!drUtil.isMultivdc()) {
return false;
}
if (backCompatPreYoda) {
_log.info("Upgraded from preyoda release. Keep db strategy options unchanged.");
return false;
}
// for local db, check if current vdc id is in the list
if (!strategyOptions.containsKey(_vdcShortId)) {
strategyOptions.clear();
_log.info("Add {} to strategy options", _vdcShortId);
strategyOptions.put(_vdcShortId, Integer.toString(getReplicationFactor()));
return true;
}
return false;
}
_log.debug("vdcList = {}", _vdcList);
// on newly added vdc - vdc short id is changed
if (_vdcList.size() == 1 && !_vdcList.contains(_vdcShortId)) {
strategyOptions.clear();
}
// on removed vdc, its strategyOption need be reset
boolean isDrConfig = drUtil.listSites().size() > 1;
if (_vdcList.size() == 1 && strategyOptions.size() > 1 && !isDrConfig) {
strategyOptions.clear();
}
String dcName = _vdcShortId;
Site currentSite = null;
try {
currentSite = drUtil.getLocalSite();
} catch (Exception e) {
// ignore
}
if (currentSite != null) {
dcName = drUtil.getCassandraDcId(currentSite);
}
if (strategyOptions.containsKey(dcName)) {
return false;
}
_log.info("Add {} to strategy options", dcName);
strategyOptions.put(dcName, Integer.toString(getReplicationFactor()));
return true;
}
/**
* Check keyspace strategy options for an existing keyspace and update if necessary
*/
private void checkStrategyOptions() throws ConnectionException {
KeyspaceDefinition kd = clientContext.getCluster().describeKeyspace(_keyspaceName);
Map<String, String> strategyOptions = kd.getStrategyOptions();
_log.info("Current strategyOptions={}", strategyOptions);
boolean changed = false;
changed |= onStandby ? checkStrategyOptionsForDROnStandby(strategyOptions) : checkStrategyOptionsForDROnActive(strategyOptions);
changed |= checkStrategyOptionsForGeo(strategyOptions);
if (changed) {
_log.info("strategyOptions changed to {}", strategyOptions);
clientContext.setCassandraStrategyOptions(strategyOptions, true);
}
}
private Integer getIntProperty(String key, Integer defValue) {
String strVal = _dbCommonInfo == null ? null : _dbCommonInfo.getProperty(key);
if (strVal == null) {
return defValue;
}
return Integer.parseInt(strVal);
}
/**
* Don't require all nodes online, just require there's only one Cassandra
* schema version across current cluster
*/
public void checkCf() throws InterruptedException, ConnectionException {
checkCf(false);
}
/**
* Checks all required CF's against keyspace definition. Any missing
* CF's are created on the fly.
* Note: it will require all nodes are online and converge at target Cassandra schema version
* if parameter waitAllNodesConverge is true, otherwise, only require there's only one schema
* version across current cluster.
*/
public void checkCf(boolean waitAllNodesConverge) throws InterruptedException, ConnectionException {
KeyspaceDefinition kd = clientContext.getCluster().describeKeyspace(_keyspaceName);
Cluster cluster = clientContext.getCluster();
// Get default GC grace period for all index CFs in local DB
Integer indexGcGrace = isGeoDbsvc() ? null : getIntProperty(DbClientImpl.DB_CASSANDRA_INDEX_GC_GRACE_PERIOD, null);
Iterator<ColumnFamily> it = getCfMap().values().iterator();
String latestSchemaVersion = null;
while (it.hasNext()) {
ColumnFamily cf = it.next();
String comparator = cf.getColumnSerializer().getComparatorType().getTypeName();
if (comparator.equals("CompositeType")) {
if (cf.getColumnSerializer() instanceof CompositeColumnNameSerializer) {
comparator = CompositeColumnNameSerializer.getComparatorName();
} else if (cf.getColumnSerializer() instanceof IndexColumnNameSerializer) {
comparator = IndexColumnNameSerializer.getComparatorName();
} else if (cf.getColumnSerializer() instanceof ClassNameTimeSeriesSerializer) {
comparator = ClassNameTimeSeriesSerializer.getComparatorName();
} else if (cf.getColumnSerializer() instanceof TimeSeriesColumnNameSerializer) {
comparator = TimeSeriesColumnNameSerializer.getComparatorName();
} else {
throw new IllegalArgumentException();
}
}
ThriftColumnFamilyDefinitionImpl cfd = (ThriftColumnFamilyDefinitionImpl)kd.getColumnFamily(cf.getName());
CfDef cdef = null;
// The CF's gc_grace_period will be set if it's an index CF
Integer cfGcGrace = cf.getColumnSerializer() instanceof IndexColumnNameSerializer ? indexGcGrace : null;
// If there's specific configuration particular for this CF, take it.
cfGcGrace = getIntProperty(DbClientImpl.DB_CASSANDRA_GC_GRACE_PERIOD_PREFIX + cf.getName(), cfGcGrace);
if (cfd == null) {
cfd = (ThriftColumnFamilyDefinitionImpl)cluster.makeColumnFamilyDefinition()
.setKeyspace(_keyspaceName)
.setName(cf.getName())
.setComparatorType(comparator)
.setKeyValidationClass(cf.getKeySerializer().getComparatorType().getTypeName());
if (_keyspaceName.equals(DbClientContext.LOCAL_KEYSPACE_NAME)) {
cdef = cfd.getThriftColumnFamilyDefinition();
String retry = cdef.getSpeculative_retry();
if (!retry.equals(PERCENTILE)) {
try {
cdef.setSpeculative_retry(PERCENTILE);
} catch (Exception e) {
_log.info("Failed to set speculative_retry e=", e);
}
}
}
TimeSeriesType tsType = TypeMap.getTimeSeriesType(cf.getName());
if (tsType != null &&
tsType.getCompactOptimized() &&
_dbCommonInfo != null &&
Boolean.TRUE.toString().equalsIgnoreCase(
_dbCommonInfo.getProperty(DbClientImpl.DB_STAT_OPTIMIZE_DISK_SPACE, "false"))) {
String compactionStrategy = _dbCommonInfo.getProperty(DbClientImpl.DB_CASSANDRA_OPTIMIZED_COMPACTION_STRATEGY,
"SizeTieredCompactionStrategy");
_log.info("Setting DB compaction strategy to {}", compactionStrategy);
int gcGrace = Integer.parseInt(_dbCommonInfo.getProperty(DbClientImpl.DB_CASSANDRA_GC_GRACE_PERIOD,
"864000")); // default is 10 days
_log.info("Setting DB GC grace period to {}", gcGrace);
cfd.setCompactionStrategy(compactionStrategy)
.setGcGraceSeconds(gcGrace);
} else if (cfGcGrace != null) {
_log.info("Setting CF:{} gc_grace_period to {}", cf.getName(), cfGcGrace.intValue());
cfd.setGcGraceSeconds(cfGcGrace.intValue());
}
latestSchemaVersion = addColumnFamily(cfd);
} else {
boolean modified = false;
String existingComparator = cfd.getComparatorType();
if (!matchComparator(existingComparator, comparator)) {
_log.info("Comparator mismatch: db {} / schema {}", existingComparator, comparator);
cfd.setComparatorType(comparator);
modified = true;
}
TimeSeriesType tsType = TypeMap.getTimeSeriesType(cf.getName());
if (tsType != null &&
tsType.getCompactOptimized() &&
_dbCommonInfo != null) {
String compactionStrategy = _dbCommonInfo.getProperty(DbClientImpl.DB_CASSANDRA_OPTIMIZED_COMPACTION_STRATEGY,
"SizeTieredCompactionStrategy");
String existingStrategy = cfd.getCompactionStrategy();
if (existingStrategy == null || !existingStrategy.contains(compactionStrategy)) {
_log.info("Setting DB compaction strategy to {}", compactionStrategy);
cfd.setCompactionStrategy(compactionStrategy);
modified = true;
}
int gcGrace = Integer.parseInt(_dbCommonInfo.getProperty(DbClientImpl.DB_CASSANDRA_GC_GRACE_PERIOD,
"864000"));
if (gcGrace != cfd.getGcGraceSeconds()) {
_log.info("Setting DB GC grace period to {}", gcGrace);
cfd.setGcGraceSeconds(gcGrace);
modified = true;
}
}
else if (cfGcGrace != null && cfd.getGcGraceSeconds() != cfGcGrace.intValue()) {
_log.info("Setting CF:{} gc_grace_period to {}", cf.getName(), cfGcGrace.intValue());
cfd.setGcGraceSeconds(cfGcGrace.intValue());
modified = true;
}
if (_keyspaceName.equals(DbClientContext.LOCAL_KEYSPACE_NAME)) {
cdef = cfd.getThriftColumnFamilyDefinition();
String retry = cdef.getSpeculative_retry();
if (!retry.equals(PERCENTILE)) {
try {
cdef.setSpeculative_retry(PERCENTILE);
modified = true;
} catch (Exception e) {
_log.info("Failed to set speculative retry e=", e);
}
}
}
if (modified) {
latestSchemaVersion = updateColumnFamily(cfd);
}
}
}
if (latestSchemaVersion != null) {
if (waitAllNodesConverge) {
clientContext.waitForSchemaAgreement(latestSchemaVersion, _statusChecker.getClusterNodeCount());
} else {
clientContext.waitForSchemaAgreement(latestSchemaVersion, -1);
}
}
}
void setCurrentVersion(String currentVersion) {
String configKind = _coordinator.getDbConfigPath(_service.getName());
Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, Constants.GLOBAL_ID);
if (config != null) {
config.setConfig(Constants.SCHEMA_VERSION, currentVersion);
_coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config);
} else {
// we are expecting this to exist, because its initialized from checkGlobalConfiguration
throw new IllegalStateException("unexpected error, db global configuration is null");
}
}
void setMigrationStatus(MigrationStatus status) {
Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), getDbConfigPath(), Constants.GLOBAL_ID);
_log.debug("setMigrationStatus: target version \"{}\" status {}",
_coordinator.getTargetDbSchemaVersion(), status.name());
if (config == null) {
ConfigurationImpl cfg = new ConfigurationImpl();
cfg.setKind(getDbConfigPath());
cfg.setId(Constants.GLOBAL_ID);
config = cfg;
}
config.setConfig(Constants.MIGRATION_STATUS, status.name());
_coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config);
}
/**
* Update migration checkpoint to ZK. Assume migration lock is acquired when entering this call.
*
* @param checkpoint
*/
void setMigrationCheckpoint(String checkpoint) {
Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), getDbConfigPath(), Constants.GLOBAL_ID);
_log.debug("setMigrationCheckpoint: target version \"{}\" checkpoint {}",
_coordinator.getTargetDbSchemaVersion(), checkpoint);
if (config == null) {
ConfigurationImpl cfg = new ConfigurationImpl();
cfg.setKind(getDbConfigPath());
cfg.setId(Constants.GLOBAL_ID);
config = cfg;
}
config.setConfig(DbConfigConstants.MIGRATION_CHECKPOINT, checkpoint);
_coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config);
}
/**
* Get migration check point from ZK. Db migration is supposed to start from this point.
*
*/
String getMigrationCheckpoint() {
Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), getDbConfigPath(), Constants.GLOBAL_ID);
_log.debug("getMigrationCheckpoint: target version \"{}\"",
_coordinator.getTargetDbSchemaVersion());
if (config != null) {
String checkpoint = config.getConfig(DbConfigConstants.MIGRATION_CHECKPOINT);
return checkpoint;
}
return null;
}
/**
* Remove migration checkpoint from ZK. Assume migration lock is acquired when entering this call.
*
*/
void removeMigrationCheckpoint() {
Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), getDbConfigPath(), Constants.GLOBAL_ID);
_log.debug("removeMigrationCheckpoint: target version \"{}\"",
_coordinator.getTargetDbSchemaVersion());
if (config != null) {
config.removeConfig(DbConfigConstants.MIGRATION_CHECKPOINT);
_coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config);
}
}
private String getDbConfigPath() {
return _coordinator.getVersionedDbConfigPath(_service.getName(), _coordinator.getTargetDbSchemaVersion());
}
private boolean isRootTenantExist(DbClient dbClient) {
URIQueryResultList tenants = new URIQueryResultList();
try {
dbClient.queryByConstraint(
ContainmentConstraint.Factory.getTenantOrgSubTenantConstraint(URI.create(TenantOrg.NO_PARENT)),
tenants);
if (tenants.iterator().hasNext()) {
return true;
} else {
_log.info("root tenant query returned no results");
return false;
}
} catch (DatabaseException ex) {
_log.error("failed querying for root tenant", ex);
throw ex; // Throw an DatabaseException and retry
} catch (Exception ex) {
_log.error("unexpected error during querying for root tenant", ex);
// throw IllegalStateExcpetion and stop
throw new IllegalStateException("root tenant query failed");
}
}
private VirtualDataCenter queryLocalVdc(DbClient dbClient) {
// all vdc info stored in local db
try {
_log.debug("my vdcid: " + _vdcShortId);
URIQueryResultList list = new URIQueryResultList();
AlternateIdConstraint constraints = AlternateIdConstraint.Factory.getVirtualDataCenterByShortIdConstraint(_vdcShortId);
dbClient.queryByConstraint(constraints, list);
if (list.iterator().hasNext()) {
URI vdcId = list.iterator().next();
VirtualDataCenter vdc = dbClient.queryObject(VirtualDataCenter.class, vdcId);
return vdc;
} else {
_log.info("vdc resource query returned no results");
return null;
}
} catch (DatabaseException ex) {
_log.error("failed querying for vdc resource", ex);
throw ex; // Throw an DatabaseException and retry
} catch (Exception ex) {
_log.error("unexpected error during querying for vdc info", ex);
// throw IllegalStateExcpetion and stop
throw new IllegalStateException("vdc resource query failed");
}
}
private boolean isVdcInfoExist(DbClient dbClient) {
return queryLocalVdc(dbClient) != null;
}
/**
* Insert default root tenant
*/
private void insertDefaultRootTenant(DbClient dbClient) {
if (!getCfMap().containsKey(TypeMap.getDoType(TenantOrg.class).getCF().getName())) {
_log.error("No TenantOrg CF in geodb!");
return;
}
if (isRootTenantExist(dbClient)) {
_log.info("root provider tenant exist already, skip insert");
return;
}
/*
* Following needs to move to boot strapping wizard at some point
*/
_log.info("insert root provider tenant ...");
TenantOrg org = new TenantOrg();
org.setId(URIUtil.createId(TenantOrg.class));
org.setLabel("Provider Tenant");
org.setDescription("Root Provider Tenant");
org.setParentTenant(new NamedURI(URI.create(TenantOrg.NO_PARENT), org.getLabel()));
org.addRole("SID,root", "TENANT_ADMIN");
org.setCreationTime(Calendar.getInstance());
org.setInactive(false);
dbClient.createObject(org);
_log.info("The root tenant {} has been inserted", org.getId());
}
private String getBootstrapLockName() {
return isGeoDbsvc() ? GEODB_BOOTSTRAP_LOCK : DB_BOOTSTRAP_LOCK;
}
/**
* Insert vdc info of current site
*/
private void insertMyVdcInfo(DbClient dbClient) throws UnknownHostException {
if (!getCfMap().containsKey(TypeMap.getDoType(VirtualDataCenter.class).getCF().getName())) {
_log.error("Unable to find VirtualDataCenter CF in current keyspace");
return;
}
VirtualDataCenter localVdc = queryLocalVdc(dbClient);
if (localVdc != null) {
return;
}
_log.info("insert vdc info of current site...");
VirtualDataCenter vdc = new VirtualDataCenter();
vdc.setId(URIUtil.createVirtualDataCenterId(_vdcShortId));
vdc.setShortId(_vdcShortId);
vdc.setLabel(_vdcShortId);
vdc.setConnectionStatus(VirtualDataCenter.ConnectionStatus.ISOLATED);
vdc.setRepStatus(VirtualDataCenter.GeoReplicationStatus.REP_NONE);
vdc.setVersion(new Date().getTime()); // timestamp
vdc.setApiEndpoint(_vdcEndpoint);
vdc.setLocal(true);
dbClient.createObject(vdc);
}
/**
* initialize PasswordHistory CF
*
* @param dbClient
*/
private void insertPasswordHistory(DbClient dbClient) {
String[] localUsers = { "root", "sysmonitor", "svcuser", "proxyuser" };
for (String user : localUsers) {
PasswordHistory passwordHistory = _passwordUtils.getPasswordHistory(user);
if (passwordHistory == null) {
passwordHistory = new PasswordHistory();
passwordHistory.setId(PasswordUtils.getLocalPasswordHistoryURI(user));
LongMap passwordHash = new LongMap();
String encpassword = null;
if (user.equals("proxyuser")) {
encpassword = _passwordUtils.getEncryptedString("ChangeMe");
} else {
encpassword = _passwordUtils.getUserPassword(user);
}
// set the first password history entry's time to 0, to remove the impact of ChangeInterval
// rule, if local users want to change their own password just after the installation.
passwordHash.put(encpassword, 0L);
passwordHistory.setUserPasswordHash(passwordHash);
dbClient.createObject(passwordHistory);
}
}
}
public void checkAndInitStorageSystemTypes(DbClient dbClient) {
if (onStandby) {
_log.info("Skip StorageSystemType CF initialization on standby site");
return;
}
InterProcessLock lock = null;
try {
lock = _coordinator.getLock(STORAGE_SYSTEM_TYPE_INIT_LOCK);
_log.info("StorageSystemType check - waiting for StorageSystemType CF init lock");
lock.acquire();
StorageSystemTypesInitUtils utils = new StorageSystemTypesInitUtils(dbClient);
utils.initializeStorageSystemTypes();
} catch (Exception e) {
_log.warn("Exception happend when trying to acquire lock", e);
} finally {
if (lock != null) {
try {
lock.release();
} catch (Exception e) {
_log.error("Fail to release lock", e);
}
}
}
}
/**
* Init the bootstrap info, including:
* check and setup root tenant or my vdc info, if it doesn't exist
*/
public void checkAndSetupBootStrapInfo(DbClient dbClient) {
// Standby site need not do the bootstrap
if (onStandby) {
_log.info("Skip boot strap info initialization on standby site");
return;
}
// Only the first VDC need check root tenant
if (_vdcList != null && _vdcList.size() > 1) {
_log.info("Skip root tenant check for more than one vdcs. Current number of vdcs: {}", _vdcList.size());
return;
}
int retryIntervalSecs = DBINIT_RETRY_INTERVAL;
boolean done = false;
boolean wait;
while (!done) {
wait = false;
InterProcessLock lock = null;
try {
lock = _coordinator.getLock(getBootstrapLockName());
_log.info("bootstrap info check - waiting for bootstrap lock");
lock.acquire();
if (isGeoDbsvc()) {
// insert root tenant if not exist for geodb
insertDefaultRootTenant(dbClient);
} else {
// insert default vdc info if not exist for local db
insertMyVdcInfo(dbClient);
// insert VdcVersion if not exist for geo db, don't insert in geo db to avoid race condition.
insertVdcVersion(dbClient);
// insert local user's password history if not exist for local db
insertPasswordHistory(dbClient);
}
done = true;
} catch (Exception e) {
if (e instanceof IllegalStateException) {
throw (IllegalStateException) e;
} else {
_log.warn("Exception while checking for bootstrap info, will retry in {} secs", retryIntervalSecs, e);
wait = true;
}
} finally {
if (lock != null) {
try {
lock.release();
} catch (Exception e) {
_log.error("Fail to release lock", e);
}
}
}
if (wait) {
try {
Thread.sleep(retryIntervalSecs * 1000);
} catch (InterruptedException ex) {
_log.warn("Thread is interrupted during wait for retry", ex);
}
}
}
}
/**
* Matches comparator names from db against code schema
*
* @param dbschema
* @param codeschema
* @return
*/
public boolean matchComparator(String dbschema, String codeschema) {
// todo this should take schema versions into account
// data object types should have version annotation + version info recorded into CF
if (!codeschema.startsWith(COMPARATOR_PACKAGE)) {
codeschema = COMPARATOR_PACKAGE + codeschema;
}
return dbschema.equals(codeschema);
}
/**
* Adds CF to keyspace
*
* @param def
* @return
*/
@SuppressWarnings("unchecked")
public String addColumnFamily(final ThriftColumnFamilyDefinitionImpl def) {
AstyanaxContext<Cluster> context = clientContext.getClusterContext();
final KeyspaceTracerFactory ks = EmptyKeyspaceTracerFactory.getInstance();
ConnectionPool<Cassandra.Client> pool = (ConnectionPool<Cassandra.Client>) context.getConnectionPool();
final String cfname = def.getName();
_log.info("Adding CF: {}", cfname);
try {
return pool.executeWithFailover(
new AbstractOperationImpl<String>(
ks.newTracer(CassandraOperationType.ADD_COLUMN_FAMILY)) {
@Override
public String internalExecute(Cassandra.Client client, ConnectionContext context) throws Exception {
client.set_keyspace(_keyspaceName);
// This method can be retried several times, so server may already have received the 'creating CF' request
// and created the CF, we check the existence of the CF first before issuing another 'creating CF' request
// which will cause the 'CF already exists' exception
KsDef kd = client.describe_keyspace(_keyspaceName);
List<CfDef> cfs = kd.getCf_defs();
for (CfDef cf : cfs) {
if (cf.getName().equals(cfname)) {
_log.info("The CF {} has already been created", cfname);
return null;
}
}
_log.info("To create CF {}", cfname);
return client.system_add_column_family(def.getThriftColumnFamilyDefinition());
}
}, context.getAstyanaxConfiguration().getRetryPolicy().duplicate()).getResult();
} catch (final OperationException e) {
throw DatabaseException.retryables.operationFailed(e);
} catch (final ConnectionException e) {
throw DatabaseException.retryables.connectionFailed(e);
}
}
/**
* Updates CF
*
* @param def
* @return
*/
@SuppressWarnings("unchecked")
public String updateColumnFamily(final ThriftColumnFamilyDefinitionImpl def) {
AstyanaxContext<Cluster> context = clientContext.getClusterContext();
final KeyspaceTracerFactory ks = EmptyKeyspaceTracerFactory.getInstance();
ConnectionPool<Cassandra.Client> pool = (ConnectionPool<Cassandra.Client>) context.getConnectionPool();
_log.info("Updating CF: {}", def.getName());
try {
return pool.executeWithFailover(
new AbstractOperationImpl<String>(
ks.newTracer(CassandraOperationType.UPDATE_COLUMN_FAMILY)) {
@Override
public String internalExecute(Cassandra.Client client, ConnectionContext context) throws Exception {
client.set_keyspace(_keyspaceName);
return client.system_update_column_family(def.getThriftColumnFamilyDefinition());
}
}, context.getAstyanaxConfiguration().getRetryPolicy().duplicate()).getResult();
} catch (final OperationException e) {
throw DatabaseException.retryables.operationFailed(e);
} catch (final ConnectionException e) {
throw DatabaseException.retryables.connectionFailed(e);
}
}
/**
* Drop CF
*
* @param cfName column family name
* @param context
* @return
*/
@SuppressWarnings("unchecked")
public String dropColumnFamily(final String cfName, AstyanaxContext<Cluster> context) {
final KeyspaceTracerFactory ks = EmptyKeyspaceTracerFactory.getInstance();
ConnectionPool<Cassandra.Client> pool = (ConnectionPool<Cassandra.Client>) context.getConnectionPool();
_log.info("Dropping CF: {}", cfName);
try {
return pool.executeWithFailover(
new AbstractOperationImpl<String>(
ks.newTracer(CassandraOperationType.UPDATE_COLUMN_FAMILY)) {
@Override
public String internalExecute(Cassandra.Client client, ConnectionContext context) throws Exception {
client.set_keyspace(_keyspaceName);
return client.system_drop_column_family(cfName);
}
}, context.getAstyanaxConfiguration().getRetryPolicy().duplicate()).getResult();
} catch (final OperationException e) {
throw DatabaseException.retryables.operationFailed(e);
} catch (final ConnectionException e) {
throw DatabaseException.retryables.connectionFailed(e);
}
}
/**
* Get replication factor. By default, 5 is the maximum replication factor we will use.
* If there are less than 5 nodes (where N is the number of nodes), we set replication
* factor to N
*
* @return
*/
private int getReplicationFactor() {
if (_coordinator == null) {
return DEFAULT_REPLICATION_FACTOR;
}
int clustersize = _statusChecker.getClusterNodeCount();
return (clustersize > MAX_REPLICATION_FACTOR) ? MAX_REPLICATION_FACTOR : clustersize;
}
public void insertVdcVersion(final DbClient dbClient) {
insertOrUpdateVdcVersion(dbClient, false);
}
public void insertOrUpdateVdcVersion(final DbClient dbClient, boolean update) {
String dbFullVersion = this._service.getVersion();
String[] parts = StringUtils.split(dbFullVersion, DbConfigConstants.VERSION_PART_SEPERATOR);
String version = parts[0] + "." + parts[1];
URI vdcId = VdcUtil.getLocalVdc().getId();
List<URI> vdcVersionIds = dbClient.queryByType(VdcVersion.class, true);
List<VdcVersion> vdcVersions = dbClient.queryObject(VdcVersion.class, vdcVersionIds);
VdcVersion vdcVersion = getVdcVersion(vdcVersions, vdcId);
if (vdcVersion == null) {
_log.info("insert new Vdc db version vdc={}, dbVersion={}", vdcId, version);
vdcVersion = new VdcVersion();
vdcVersion.setId(URIUtil.createId(VdcVersion.class));
vdcVersion.setVdcId(vdcId);
vdcVersion.setVersion(version);
dbClient.createObject(vdcVersion);
} else {
_log.info("Skip inserting because Vdc version exists for vdc={}, dbVersion={}", vdcId, version);
}
if (update && !vdcVersion.getVersion().equals(version)) {
_log.info("update Vdc db version vdc={} to dbVersion={}", vdcId, version);
vdcVersion.setVersion(version);
dbClient.persistObject(vdcVersion);
}
}
private static VdcVersion getVdcVersion(List<VdcVersion> vdcVersions, URI vdcId) {
if (vdcVersions == null || !vdcVersions.iterator().hasNext()) {
return null;
}
for (VdcVersion vdcVersion : vdcVersions) {
if (vdcVersion.getVdcId().equals(vdcId)) {
return vdcVersion;
}
}
return null;
}
public boolean dropUnusedCfsIfExists() {
AstyanaxContext<Cluster> context = clientContext.getClusterContext();
try {
KeyspaceDefinition kd = context.getClient().describeKeyspace(_clusterName);
if (kd == null) {
String errMsg = "Fatal error: Keyspace not exist when drop cf";
_log.error(errMsg);
throw new IllegalStateException(errMsg);
}
for (String cfName : DbSchemaInterceptorImpl.getIgnoreCfList()) {
ColumnFamilyDefinition cfd = kd.getColumnFamily(cfName);
if (cfd != null) {
_log.info("drop cf {} from db", cfName);
String schemaVersion = dropColumnFamily(cfName, context);
clientContext.waitForSchemaAgreement(schemaVersion);
}
}
} catch (Exception e) {
_log.error("drop Cf error ", e);
return false;
}
return true;
}
public void setDrUtil(DrUtil drUtil) {
this.drUtil = drUtil;
onStandby = drUtil.isStandby();
}
}