/*
* ToroDB
* Copyright © 2014 8Kdata Technology (www.8kdata.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.torodb.mongodb.commands.pojos;
import com.eightkdata.mongowp.ErrorCode;
import com.eightkdata.mongowp.Status;
import com.eightkdata.mongowp.WriteConcern;
import com.eightkdata.mongowp.WriteConcern.SyncMode;
import com.eightkdata.mongowp.WriteConcern.WType;
import com.eightkdata.mongowp.bson.BsonArray;
import com.eightkdata.mongowp.bson.BsonDocument;
import com.eightkdata.mongowp.bson.BsonDocument.Entry;
import com.eightkdata.mongowp.bson.BsonType;
import com.eightkdata.mongowp.bson.BsonValue;
import com.eightkdata.mongowp.bson.utils.DefaultBsonValues;
import com.eightkdata.mongowp.exceptions.BadValueException;
import com.eightkdata.mongowp.exceptions.FailedToParseException;
import com.eightkdata.mongowp.exceptions.NoSuchKeyException;
import com.eightkdata.mongowp.exceptions.TypesMismatchException;
import com.eightkdata.mongowp.fields.ArrayField;
import com.eightkdata.mongowp.fields.BooleanField;
import com.eightkdata.mongowp.fields.DocField;
import com.eightkdata.mongowp.fields.IntField;
import com.eightkdata.mongowp.fields.LongField;
import com.eightkdata.mongowp.fields.StringField;
import com.eightkdata.mongowp.utils.BsonArrayBuilder;
import com.eightkdata.mongowp.utils.BsonDocumentBuilder;
import com.eightkdata.mongowp.utils.BsonReaderTool;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.net.HostAndPort;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.OptionalInt;
import java.util.Set;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public class ReplicaSetConfig {
private static final IntField VERSION_FIELD = new IntField("version");
private static final StringField ID_FIELD = new StringField("_id");
private static final ArrayField MEMBERS_FIELD = new ArrayField("members");
private static final DocField SETTINGS_FIELD = new DocField("settings");
private static final String STEP_DOWN_CHECK_WRITE_CONCERN_MODE_FIELD_NAME = "$stepDownCheck";
private static final LongField PROTOCOL_VERSION_FIELD = new LongField("protocolVersion");
private static final IntField HEARTHBEAT_TIMEOUT_FIELD = new IntField("heartbeatTimeoutSecs");
private static final BooleanField CHAINING_ALLOWED_FIELD = new BooleanField("chainingAllowed");
@SuppressWarnings("checkstyle:LineLength")
private static final DocField GET_LAST_ERROR_DEFAULTS_FIELD = new DocField("getLastErrorDefaults");
private static final DocField GET_LAST_ERROR_MODES_FIELD = new DocField("getLastErrorModes");
private static final int DEFAULT_HEARTBEAT_TIMEOUT_SECONDS = 10;
private static final int DEFAULT_HEARTBEAT_TIMEOUT_MILLIS =
DEFAULT_HEARTBEAT_TIMEOUT_SECONDS * 1000;
private static final boolean DEFAULT_CHAINING_ALLOWED = true;
private static final ImmutableSet<String> VALID_FIELD_NAMES = ImmutableSet.of(
VERSION_FIELD.getFieldName(), ID_FIELD.getFieldName(),
MEMBERS_FIELD.getFieldName(), SETTINGS_FIELD.getFieldName(),
STEP_DOWN_CHECK_WRITE_CONCERN_MODE_FIELD_NAME,
PROTOCOL_VERSION_FIELD.getFieldName()
);
private final String setName;
private final int version;
private final ImmutableList<MemberConfig> members;
private final WriteConcern defaultWriteConcern;
private final int heartbeatTimeoutPeriod;
private final boolean chainingAllowed;
private final int majorityVoteCount;
private final int writeMajority;
private final int totalVotingMembers;
private final Map<String, ReplicaSetTagPattern> customWriteConcern;
private final long protocolVersion;
public ReplicaSetConfig(
String id,
int version,
ImmutableList<MemberConfig> members,
WriteConcern defaultWriteConcern,
int heartbeatTimeoutPeriod,
boolean chainingAllowed,
Map<String, ReplicaSetTagPattern> customWriteConcern,
long protocolVersion) {
this.setName = id;
this.version = version;
this.defaultWriteConcern = defaultWriteConcern;
this.heartbeatTimeoutPeriod = heartbeatTimeoutPeriod;
this.chainingAllowed = chainingAllowed;
this.protocolVersion = protocolVersion;
this.customWriteConcern = customWriteConcern;
int voters = 0;
int arbiters = 0;
for (MemberConfig member : members) {
if (member.isArbiter()) {
arbiters++;
}
if (member.isVoter()) {
voters++;
}
}
this.members = members;
this.totalVotingMembers = voters;
this.majorityVoteCount = voters / 2 + 1;
this.writeMajority = Math.min(majorityVoteCount, voters - arbiters);
}
public String getReplSetName() {
return setName;
}
public int getConfigVersion() {
return version;
}
public ImmutableList<MemberConfig> getMembers() {
return members;
}
public WriteConcern getDefaultWriteConcern() {
return defaultWriteConcern;
}
/**
* The heartbeat timeout period on millis.
*
* It cannot be negative if this config {@linkplain #validate(int, int) is valid}
*
* @return
*/
public int getHeartbeatTimeoutPeriod() {
return heartbeatTimeoutPeriod;
}
public boolean isChainingAllowed() {
return chainingAllowed;
}
public int getMajorityVoteCount() {
return majorityVoteCount;
}
public int getWriteMajority() {
return writeMajority;
}
public int getTotalVotingMembers() {
return totalVotingMembers;
}
public long getProtocolVersion() {
return protocolVersion;
}
public Map<String, ReplicaSetTagPattern> getCustomWriteConcerns() {
return Collections.unmodifiableMap(customWriteConcern);
}
/**
* @param maxMembers the maximun numbers of members a replication set should have. It could
* be different on different MongoDB versions.
* @param maxVotingMembers the maximun numbers of members who can vote in a replication set. It
* could be different on different MongoDB versions.
* @param customWCs a mp of custom WriteConcerns
* @throws BadValueException
*/
public boolean validate(int maxMembers, int maxVotingMembers) throws BadValueException {
if (version <= 0) {
throw new BadValueException(VERSION_FIELD.getFieldName() + " field value of " + version
+ " is out of range");
}
if (setName.isEmpty()) {
throw new BadValueException("Replica set configuration must have non-empty "
+ ID_FIELD.getFieldName() + " field");
}
if (heartbeatTimeoutPeriod < 0) {
throw new BadValueException(SETTINGS_FIELD.getFieldName() + "."
+ HEARTHBEAT_TIMEOUT_FIELD.getFieldName() + " field value must be non-negative, "
+ "but found " + heartbeatTimeoutPeriod);
}
if (members.size() > maxMembers || members.isEmpty()) {
throw new BadValueException("Replica set configuration contains " + members.size()
+ " members, but must have at least 1 and no more than " + maxMembers);
}
int localhostCount = 0;
int voterCount = 0;
int arbiterCount = 0;
int electableCount = 0;
for (int i = 0; i < members.size(); ++i) {
MemberConfig memberI = members.get(i);
memberI.validate();
if (isLocalhost(memberI.getHostAndPort())) {
++localhostCount;
}
if (memberI.isVoter()) {
++voterCount;
}
// Nodes may be arbiters or electable, or neither, but never both.
if (memberI.isArbiter()) {
++arbiterCount;
} else if (memberI.getPriority() > 0) {
++electableCount;
}
for (int j = 0; j < members.size(); ++j) {
if (i == j) {
continue;
}
MemberConfig memberJ = members.get(j);
if (memberI.getId() == memberJ.getId()) {
throw new BadValueException("Found two member configurations with same "
+ ID_FIELD.getFieldName() + " field, " + MEMBERS_FIELD.getFieldName()
+ "." + i + "." + MEMBERS_FIELD.getFieldName()
+ " == " + MEMBERS_FIELD.getFieldName() + "." + j + "."
+ ID_FIELD.getFieldName() + " == " + memberI.getId());
}
if (memberI.getHostAndPort().equals(memberJ.getHostAndPort())) {
throw new BadValueException("Found two member configurations with same "
+ MemberConfig.HOST_FIELD + " field, " + MEMBERS_FIELD.getFieldName()
+ "."
+ i + "." + MemberConfig.HOST_FIELD + " == "
+ MEMBERS_FIELD.getFieldName()
+ "." + j + "." + MemberConfig.HOST_FIELD + " == "
+ memberI.getHostAndPort().toString());
}
}
}
if (localhostCount != 0 && localhostCount != members.size()) {
throw new BadValueException("Either all host names in a replica set configuration must "
+ "be localhost references, or none must be; found " + localhostCount + " out "
+ "of " + members.size());
}
if (voterCount > maxVotingMembers || voterCount == 0) {
throw new BadValueException("Replica set configuration contains " + voterCount
+ " voting members, but must be at least 1 and no more than " + maxVotingMembers);
}
if (electableCount == 0) {
throw new BadValueException("Replica set configuration must contain at least one "
+ "non-arbiter member with priority > 0");
}
switch (defaultWriteConcern.getWType()) {
case INT:
if (defaultWriteConcern.getWInt() == 0) {
throw new BadValueException("Default write concern mode must wait for at least "
+ "1 member");
}
break;
case TEXT:
if (defaultWriteConcern.getWString().equals("majority")) {
break;
}
if (!customWriteConcern.containsKey(defaultWriteConcern.getWString())) {
throw new BadValueException("Default write concern requires undefined write "
+ "mode " + defaultWriteConcern.getWString());
}
break;
default:
throw new AssertionError("Unexpected write concern type " + defaultWriteConcern.getWType());
}
return true;
}
private static final boolean isLocalhost(HostAndPort hostAndPort) {
String host = hostAndPort.getHostText();
return host.equals("localhost")
|| host.startsWith("127.")
|| host.equals("::1")
|| host.equals("anonymous unix socket")
|| host.charAt(0) == '/'; // unix socket
}
public static ReplicaSetConfig fromDocument(@Nonnull BsonDocument bson)
throws BadValueException, TypesMismatchException, NoSuchKeyException, FailedToParseException {
BsonReaderTool.checkOnlyHasFields("replica set configuration", bson, VALID_FIELD_NAMES);
String id = BsonReaderTool.getString(bson, ID_FIELD);
int version = BsonReaderTool.getInteger(bson, VERSION_FIELD);
Builder builder = new Builder(id, version);
BsonArray uncastedMembers = BsonReaderTool.getArray(bson, MEMBERS_FIELD);
int i = 0;
for (BsonValue uncastedMember : uncastedMembers) {
if (uncastedMember == null || !uncastedMember.isDocument()) {
throw new TypesMismatchException(
Integer.toString(i),
"object",
uncastedMember == null ? null : uncastedMember.getType()
);
}
builder.addMemberConfig(MemberConfig.fromDocument(uncastedMember.asDocument()));
i++;
}
BsonDocument settings;
try {
settings = BsonReaderTool.getDocument(bson, SETTINGS_FIELD);
} catch (NoSuchKeyException ex) {
settings = DefaultBsonValues.EMPTY_DOC;
}
builder
.setHbTimeout(BsonReaderTool.getInteger(settings, HEARTHBEAT_TIMEOUT_FIELD,
DEFAULT_HEARTBEAT_TIMEOUT_MILLIS))
.setChainingAllowed(BsonReaderTool.getBoolean(settings, CHAINING_ALLOWED_FIELD,
DEFAULT_CHAINING_ALLOWED));
BsonDocument uncastedGetLastErrorDefaults = BsonReaderTool.getDocument(
settings,
GET_LAST_ERROR_DEFAULTS_FIELD
);
WriteConcern wc = WriteConcern.fromDocument(uncastedGetLastErrorDefaults);
builder.setWriteConcern(wc);
BsonDocument uncastedCustomWriteConcerns;
try {
uncastedCustomWriteConcerns = BsonReaderTool.getDocument(
settings, GET_LAST_ERROR_MODES_FIELD);
} catch (NoSuchKeyException ex) {
uncastedCustomWriteConcerns = DefaultBsonValues.EMPTY_DOC;
}
Map<String, ReplicaSetTagPattern> customWriteConcernsBuilder = parseCustomWriteConcerns(
uncastedCustomWriteConcerns);
for (Map.Entry<String, ReplicaSetTagPattern> customWriteConcern : customWriteConcernsBuilder
.entrySet()) {
builder.putCustomWriteConcern(customWriteConcern.getKey(), customWriteConcern.getValue());
}
builder.setProtocolVersion(BsonReaderTool.getLong(bson, PROTOCOL_VERSION_FIELD));
return builder.build();
}
public static class Builder {
private final String setName;
private final int version;
private final ImmutableList.Builder<MemberConfig> membersBuilder = ImmutableList.builder();
private int hbTimeout = DEFAULT_HEARTBEAT_TIMEOUT_MILLIS;
boolean chainingAllowed = DEFAULT_CHAINING_ALLOWED;
private WriteConcern wc = WriteConcern.with(SyncMode.NONE, 0, 0);
private final Map<String, ReplicaSetTagPattern> customWriteConcernsBuilder = new HashMap<>();
private long protocolVersion = 0;
public Builder(String setName, int version) {
this.setName = setName;
this.version = version;
}
public Builder(ReplicaSetConfig other) {
this.setName = other.setName;
this.version = other.version;
this.membersBuilder.addAll(other.members);
this.hbTimeout = other.heartbeatTimeoutPeriod;
this.chainingAllowed = other.chainingAllowed;
this.wc = other.defaultWriteConcern;
this.customWriteConcernsBuilder.putAll(other.customWriteConcern);
this.protocolVersion = other.protocolVersion;
}
public Builder addMemberConfig(MemberConfig memberConfig) {
this.membersBuilder.add(memberConfig);
return this;
}
public Builder addAllMemberConfig(List<MemberConfig> memberConfigList) {
this.membersBuilder.addAll(memberConfigList);
return this;
}
public Builder setHbTimeout(int hbTimeout) {
this.hbTimeout = hbTimeout;
return this;
}
public Builder setChainingAllowed(boolean chainingAllowed) {
this.chainingAllowed = chainingAllowed;
return this;
}
public Builder setWriteConcern(WriteConcern wc) {
this.wc = wc;
return this;
}
public Builder setProtocolVersion(long protocolVersion) {
this.protocolVersion = protocolVersion;
return this;
}
public Builder putCustomWriteConcern(String key, ReplicaSetTagPattern value) {
this.customWriteConcernsBuilder.put(key, value);
return this;
}
public ReplicaSetConfig build() {
return new ReplicaSetConfig(
setName,
version,
membersBuilder.build(),
wc,
hbTimeout,
chainingAllowed,
customWriteConcernsBuilder,
protocolVersion
);
}
}
public Status<ReplicaSetTagPattern> getCustomWriteConcernTagPattern(String patternName) {
ReplicaSetTagPattern result = customWriteConcern.get(patternName);
if (result == null) {
return Status.from(ErrorCode.UNKNOWN_REPL_WRITE_CONCERN, "No write concern mode "
+ "named '" + patternName + "' found in replica set configuration");
}
return Status.ok(result);
}
private static Map<String, ReplicaSetTagPattern> parseCustomWriteConcerns(BsonDocument bson)
throws TypesMismatchException, NoSuchKeyException, BadValueException {
Map<String, ReplicaSetTagPattern> map = new HashMap<>(bson.size());
for (Entry<?> customWriteNameEntry : bson) {
BsonDocument constraintDoc = BsonReaderTool.getDocument(bson, customWriteNameEntry.getKey());
Map<String, Integer> constraintMap = new HashMap<>(constraintDoc.size());
for (Entry<?> tagEntry : constraintDoc) {
int intValue;
try {
intValue = tagEntry.getValue().asNumber().intValue();
} catch (UnsupportedOperationException ex) {
String fieldName =
SETTINGS_FIELD.getFieldName()
+ '.' + GET_LAST_ERROR_MODES_FIELD.getFieldName()
+ '.' + customWriteNameEntry
+ '.' + constraintDoc;
BsonType tagType = tagEntry.getValue().getType();
throw new TypesMismatchException(
fieldName,
"number",
tagType,
"Expected " + fieldName + " to be a number, not " + tagType.toString().toLowerCase(
Locale.ROOT)
);
}
if (intValue <= 0) {
String fieldName =
SETTINGS_FIELD.getFieldName()
+ '.' + GET_LAST_ERROR_MODES_FIELD.getFieldName()
+ '.' + customWriteNameEntry
+ '.' + constraintDoc;
throw new BadValueException("Value of " + fieldName + " must be positive, but found "
+ intValue);
}
constraintMap.put(tagEntry.getKey(), intValue);
}
map.put(customWriteNameEntry.getKey(), new ReplicaSetTagPattern(constraintMap));
}
return map;
}
public BsonDocument toBson() {
BsonDocumentBuilder result = new BsonDocumentBuilder();
result.append(ID_FIELD, setName);
result.append(VERSION_FIELD, version);
BsonArrayBuilder membersList = new BsonArrayBuilder();
for (MemberConfig member : members) {
membersList.add(member.toBson());
}
result.append(MEMBERS_FIELD, membersList.build());
BsonDocumentBuilder settingsBuilder = new BsonDocumentBuilder();
settingsBuilder.append(CHAINING_ALLOWED_FIELD, chainingAllowed);
settingsBuilder.append(HEARTHBEAT_TIMEOUT_FIELD, heartbeatTimeoutPeriod);
BsonDocumentBuilder customWrites = new BsonDocumentBuilder();
for (Map.Entry<String, ReplicaSetTagPattern> entry : customWriteConcern.entrySet()) {
String customWriteName = entry.getKey();
if (customWriteName.startsWith("$")) { //MongoDB uses $ as an internal mode
continue;
}
BsonDocument tagMap = entry.getValue().toBson();
customWrites.appendUnsafe(customWriteName, tagMap);
}
settingsBuilder.append(GET_LAST_ERROR_MODES_FIELD, customWrites);
settingsBuilder.append(GET_LAST_ERROR_DEFAULTS_FIELD, defaultWriteConcern.toDocument());
settingsBuilder.append(PROTOCOL_VERSION_FIELD, protocolVersion);
result.append(SETTINGS_FIELD, settingsBuilder);
return result.build();
}
@Override
public String toString() {
return toBson().toString();
}
public Status<?> checkIfWriteConcernCanBeSatisfied(WriteConcern writeConcern) {
if (writeConcern.getWType() == WType.TEXT && !writeConcern.getWString().equals("majority")) {
Status<ReplicaSetTagPattern> pattern = getCustomWriteConcernTagPattern(writeConcern
.getWString());
if (!pattern.isOk()) {
return pattern;
}
ReplicaSetTagMatch matcher = pattern.getResult().matcher();
for (MemberConfig member : getMembers()) {
for (Map.Entry<String, String> entry : member.getTags().entrySet()) {
if (matcher.update(entry.getKey(), entry.getValue())) {
return Status.ok();
}
}
}
// Even if all the nodes in the set had a given write it still would not satisfy this
// write concern mode.
return Status.from(ErrorCode.CANNOT_SATISFY_WRITE_CONCERN, "Not enough nodes match "
+ "write concern mode \"" + writeConcern.getWString() + "\"");
} else {
int nodesRemaining = writeConcern.getWInt();
for (MemberConfig member : getMembers()) {
if (!member.isArbiter()) {
nodesRemaining--;
}
}
if (nodesRemaining <= 0) {
return Status.ok();
}
return Status.from(ErrorCode.CANNOT_SATISFY_WRITE_CONCERN, "Not enough data-bearing nodes");
}
}
@Nullable
public MemberConfig findMemberById(int memberId) {
for (MemberConfig member : members) {
if (member.getId() == memberId) {
return member;
}
}
return null;
}
/**
*
* @param currentSource
* @return the index of the member with the given host and port or empty if there is no member
* with that host and port
*/
public OptionalInt findMemberIndexByHostAndPort(HostAndPort currentSource) {
for (int i = 0; i < members.size(); i++) {
MemberConfig member = members.get(i);
if (member.getHostAndPort().equals(currentSource)) {
return OptionalInt.of(i);
}
}
return OptionalInt.empty();
}
/**
* Representation of a tag matching pattern, like { "dc": 2, "rack": 3 }, of the form used for
* tagged replica set writes.
*
* Patterns match on a set of members iff, for each tag there are, at least <em>i</em> different
* members where the tag is defined and the value of the tag key is different between them, where
* <em>i</em> is the value of the tag on the pattern constraint.
*/
public static class ReplicaSetTagPattern {
private final Map<String, Integer> constraints;
public ReplicaSetTagPattern(Map<String, Integer> constraints) {
this.constraints = constraints;
}
public ReplicaSetTagMatch matcher() {
return new ReplicaSetTagMatch(this);
}
private BsonDocument toBson() {
BsonDocumentBuilder result = new BsonDocumentBuilder();
for (java.util.Map.Entry<String, Integer> entry : constraints.entrySet()) {
result.appendUnsafe(entry.getKey(), DefaultBsonValues.newInt(entry.getValue()));
}
return result.build();
}
}
/**
* State object for progressive detection of {@link ReplicaSetTagPattern} constraint satisfaction.
* <p>
* This is an abstraction of the replica set write tag satisfaction problem.
* <p>
* Replica set tag matching is an event-driven constraint satisfaction process. This type
* represents the state of that process. It is initialized from a pattern object, then
* progressively updated with tags. After processing a sequence of tags sufficient to satisfy the
* pattern, isSatisfied() becomes true.
*/
public static class ReplicaSetTagMatch {
private final Map<String, BoundTagValue> boundTagValues;
private ReplicaSetTagMatch(ReplicaSetTagPattern pattern) {
boundTagValues = new HashMap<>(pattern.constraints.size());
for (java.util.Map.Entry<String, Integer> entry : pattern.constraints.entrySet()) {
boundTagValues.put(entry.getKey(), new BoundTagValue(entry.getValue()));
}
}
public boolean update(String key, String value) {
BoundTagValue bound = boundTagValues.get(key);
if (bound != null) {
bound.boundTagValues.add(value);
}
return isSatisfied();
}
public boolean isSatisfied() {
for (BoundTagValue value : boundTagValues.values()) {
if (value.boundTagValues.size() < value.min) {
return false;
}
}
return true;
}
}
/**
* Representation of the state related to a single tag key in the match pattern.
* <p>
* Consists of a constraint (key index and min count for satisfaction) and a list of already
* observed values.
* <p>
* A BoundTagValue is satisfied when the size of boundValues is at least constraint.getMinCount().
*/
private static class BoundTagValue {
private final int min;
private final Set<String> boundTagValues;
public BoundTagValue(int min) {
this.min = min;
this.boundTagValues = new HashSet<>(min);
}
}
}