/* * Copyright 2015 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.atomix; import io.atomix.catalyst.concurrent.Listener; import io.atomix.catalyst.concurrent.ThreadContext; import io.atomix.catalyst.serializer.Serializer; import io.atomix.catalyst.transport.*; import io.atomix.catalyst.transport.local.LocalServerRegistry; import io.atomix.catalyst.transport.local.LocalTransport; import io.atomix.catalyst.util.Assert; import io.atomix.catalyst.util.ConfigurationException; import io.atomix.cluster.ClusterManager; import io.atomix.copycat.Command; import io.atomix.copycat.Query; import io.atomix.copycat.client.*; import io.atomix.copycat.server.CopycatServer; import io.atomix.copycat.server.cluster.Cluster; import io.atomix.copycat.server.cluster.Member; import io.atomix.copycat.server.storage.Storage; import io.atomix.copycat.session.Session; import io.atomix.manager.ResourceClient; import io.atomix.manager.ResourceManagerException; import io.atomix.manager.ResourceServer; import io.atomix.manager.internal.ResourceManagerState; import io.atomix.manager.options.ServerOptions; import io.atomix.manager.util.ResourceManagerTypeResolver; import io.atomix.resource.Resource; import io.atomix.resource.ResourceRegistry; import io.atomix.resource.ResourceType; import java.time.Duration; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.function.Consumer; import java.util.stream.Collectors; /** * Provides an interface for creating and operating on {@link io.atomix.resource.Resource}s as a stateful node. * <p> * Replicas serve as a hybrid {@link AtomixClient} and server to allow a server to be embedded * in an application. From the perspective of state, replicas behave like servers in that they * maintain a replicated state machine for {@link io.atomix.resource.Resource}s and fully participate in the underlying * consensus algorithm. From the perspective of resources, replicas behave like {@link AtomixClient}s in that * they may themselves create and modify distributed resources. * <p> * To create a replica, use the {@link #builder(Address)} builder factory. Each replica must * be initially configured with a replica {@link Address} and a list of addresses for other members of the * core cluster. Note that the list of member addresses does not have to include the local replica nor does * it have to include all the replicas in the cluster. As long as the replica can reach one live member of * the cluster, it can join. * <pre> * {@code * List<Address> members = Arrays.asList(new Address("123.456.789.0", 5000), new Address("123.456.789.1", 5000)); * Atomix atomix = AtomixReplica.builder(address, members) * .withTransport(new NettyTransport()) * .withStorage(new Storage(StorageLevel.MEMORY)) * .build(); * } * </pre> * Replicas must be configured with a {@link Transport} and {@link Storage}. By default, if no transport is * configured, the {@code NettyTransport} will be used and will thus be expected to be available on the classpath. * Similarly, if no storage module is configured, replicated commit logs will be written to * {@code System.getProperty("user.dir")} with a default log name. * <h2>Storage</h2> * Replicas manage resource and replicate and store resource state changes on disk. In order to do so, users * must configure the replica's {@link Storage} configuration via the {@link Builder#withStorage(Storage)} method. * Storage does not have to be identical on all replicas, but it is important to the desired level of fault-tolerance, * consistency, and performance. Users can configure where and how state changes are stored by configuring the * {@link io.atomix.copycat.server.storage.StorageLevel}. * <pre> * {@code * Atomix atomix = AtomixReplica.builder(address, members) * .withTransport(new NettyTransport()) * .withStorage(Storage.builder() * .withDirectory(new File("logs")) * .withStorageLevel(StorageLevel.MAPPED) * .withMaxSegmentSize(1024 * 1024) * .build()) * .build(); * } * </pre> * For the strongest level of consistency, it's recommended that users use the * {@link io.atomix.copycat.server.storage.StorageLevel#DISK DISK} storage level. For the greatest mix of consistency * and performance, the {@link io.atomix.copycat.server.storage.StorageLevel#MAPPED MAPPED} storage level uses memory * mapped files to persist state changes. The {@link io.atomix.copycat.server.storage.StorageLevel#MEMORY MEMORY} * storage level is recommended only for testing. Atomix cannot guarantee writes will not be lost with {@code MEMORY} * based logs. If a majority of the active replicas in the cluster are lost or partitioned, writes can be overwritten. * Using memory-based storage amounts to recreating the entire replica each time it's started. * <h2>Replica lifecycle</h2> * When the replica is {@link #bootstrap() started}, the replica will attempt to contact members in the configured * startup {@link Address} list. If any of the members are already in an active state, the replica will request * to join the cluster. During the process of joining the cluster, the replica will notify the current cluster * leader of its existence. If the leader already knows about the joining replica, the replica will immediately * join and become a full voting member. If the joining replica is not yet known to the rest of the cluster, * it will join the cluster in a <em>passive</em> state in which it receives replicated state from other * replicas in the cluster but does not participate in elections or other quorum-based aspects of the * underlying consensus algorithm. Once the joining replica is caught up with the rest of the cluster, the * leader will promote it to a full voting member. * <p> * Once the replica has joined the cluster, it will persist the updated cluster configuration to disk via * the replica's configured {@link Storage} module. This is important to note as in the event that the replica * crashes, <em>the replica will recover from its last known configuration</em> rather than the configuration * provided to the {@link #builder(Address) builder factory}. This allows Atomix cluster structures * to change transparently and independently of the code that configures any given replica. If a persistent * {@link io.atomix.copycat.server.storage.StorageLevel} is used, user code should simply configure the replica * consistently based on the <em>initial</em> replica configuration, and the replica will recover from the last * known cluster configuration in the event of a failure. * * @author <a href="http://github.com/kuujo">Jordan Halterman</a> */ public final class AtomixReplica extends Atomix { /** * Defines the behavior of a replica within the Atomix cluster. * <p> * Each replica can be configured with a replica type. The replica type defines how the replica behaves within * the Atomix cluster. Throughout the lifetime of a cluster, each replica within the cluster may be promoted * or demoted between types by the configured {@link ClusterManager}. */ public enum Type { /** * Active replicas are full voting members of the Raft cluster. * <p> * Active replicas are stateful members of the cluster that participate fully in the Raft consensus * algorithm. Each Atomix cluster must consist of at least one active replica, and all writes to the * cluster go through an active member. The availability of the cluster is dependent on the number of * active replicas at any given time. Atomix cluster can tolerate the failure of a minority of active * replicas. */ ACTIVE, /** * Passive replicas are stateful members of the Atomix cluster that participate in replication via * an asynchronous gossip protocol. * <p> * Passive replicas serve as a mechanism to scale writes and to quickly replace failed or partitioned * active members. Writes to the cluster are synchronously replicated to active replicas, and once committed * are asynchronously replicated to passive replicas. This allows passive replicas to represent state slightly * behind the Raft cluster. Reads from passive replicas are still guaranteed to be sequentially consistent. * Resources created on a passive replica that execute sequential queries against the cluster will read from * the local replica's state, thus increasing read latency significantly. In the event an active replica * is partitioned or crashes, the configured {@link ClusterManager} can replace active replicas with passive * replicas to reduce the amount of time required to catch a server up with the Raft cluster. */ PASSIVE, /** * Reserve replicas are stateless members of the Atomix cluster. * <p> * Reserve replicas act as standby nodes for the rest of the Atomix cluster. {@link ClusterManager}s configured * for the cluster may use reserve replicas to replace failed passive or active replicas as necessary. */ RESERVE, } /** * Returns a new Atomix replica builder. * <p> * The replica {@link Address} is the address to which the replica will bind to communicate with both * clients and other replicas and through which clients and replicas will connect to the constructed replica. * <p> * The provided set of members will be used to connect to the other members in the Raft cluster. The members * do not have to be representative of the full cluster membership. * <p> * When starting a new cluster, the cluster should be formed by providing the {@code members} list of the * replicas that make up the initial members of the cluster. If the replica being built is included in the * initial membership list, its {@link Address} should be listed in the {@code members} list. Otherwise, * if the replica is joining an existing cluster, its {@link Address} should not be listed in the membership * list. * <p> * If the replica uses a persistent {@link io.atomix.copycat.server.storage.StorageLevel} like * {@link io.atomix.copycat.server.storage.StorageLevel#DISK DISK} or {@link io.atomix.copycat.server.storage.StorageLevel#MAPPED MAPPED} * then the provided membership list only applies to the first time the replica is started. Once the replica * has been started and joined with other members of the cluster, the updated cluster configuration will be * stored on disk, and if the replica crashes and is restarted it will use the persisted configuration rather * than the user-provided configuration. This behavior cannot be overridden. * * @param address The address through which clients and replicas connect to the replica. * @return The replica builder. */ public static Builder builder(Address address) { return builder(address, address); } /** * Returns a new Atomix replica builder. * <p> * The provided server {@link Address} is the address to which the replica will bind for communication with * other replicas in the cluster. * <p> * The client {@link Address} is the address to which clients will connect to the replica to open and close * resources and submit state change operations. * <p> * The provided set of members will be used to connect to the other members in the Raft cluster. The members * do not have to be representative of the full cluster membership. * <p> * When starting a new cluster, the cluster should be formed by providing the {@code members} list of the * replicas that make up the initial members of the cluster. If the replica being built is included in the * initial membership list, its {@link Address} should be listed in the {@code members} list. Otherwise, * if the replica is joining an existing cluster, its {@link Address} should not be listed in the membership * list. * <p> * If the replica uses a persistent {@link io.atomix.copycat.server.storage.StorageLevel} like * {@link io.atomix.copycat.server.storage.StorageLevel#DISK DISK} or {@link io.atomix.copycat.server.storage.StorageLevel#MAPPED MAPPED} * then the provided membership list only applies to the first time the replica is started. Once the replica * has been started and joined with other members of the cluster, the updated cluster configuration will be * stored on disk, and if the replica crashes and is restarted it will use the persisted configuration rather * than the user-provided configuration. This behavior cannot be overridden. * * @param clientAddress The address through which clients connect to the replica. * @param serverAddress The address through which other replicas connect to the replica. * @return The replica builder. */ public static Builder builder(Address clientAddress, Address serverAddress) { return new Builder(clientAddress, serverAddress); } /** * Returns a new Atomix replica builder. * <p> * The replica {@link Address} is the address to which the replica will bind to communicate with both * clients and other replicas and through which clients and replicas will connect to the constructed replica. * <p> * The provided set of members will be used to connect to the other members in the Raft cluster. The members * do not have to be representative of the full cluster membership. * <p> * When starting a new cluster, the cluster should be formed by providing the {@code members} list of the * replicas that make up the initial members of the cluster. If the replica being built is included in the * initial membership list, its {@link Address} should be listed in the {@code members} list. Otherwise, * if the replica is joining an existing cluster, its {@link Address} should not be listed in the membership * list. * <p> * If the replica uses a persistent {@link io.atomix.copycat.server.storage.StorageLevel} like * {@link io.atomix.copycat.server.storage.StorageLevel#DISK DISK} or {@link io.atomix.copycat.server.storage.StorageLevel#MAPPED MAPPED} * then the provided membership list only applies to the first time the replica is started. Once the replica * has been started and joined with other members of the cluster, the updated cluster configuration will be * stored on disk, and if the replica crashes and is restarted it will use the persisted configuration rather * than the user-provided configuration. This behavior cannot be overridden. * * @param address The address through which clients and replicas connect to the replica. * @param properties The replica properties. * @return The replica builder. */ public static Builder builder(Address address, Properties properties) { return builder(address, address, properties); } /** * Returns a new Atomix replica builder. * <p> * The provided server {@link Address} is the address to which the replica will bind for communication with * other replicas in the cluster. * <p> * The client {@link Address} is the address to which clients will connect to the replica to open and close * resources and submit state change operations. * <p> * The provided set of members will be used to connect to the other members in the Raft cluster. The members * do not have to be representative of the full cluster membership. * <p> * When starting a new cluster, the cluster should be formed by providing the {@code members} list of the * replicas that make up the initial members of the cluster. If the replica being built is included in the * initial membership list, its {@link Address} should be listed in the {@code members} list. Otherwise, * if the replica is joining an existing cluster, its {@link Address} should not be listed in the membership * list. * <p> * If the replica uses a persistent {@link io.atomix.copycat.server.storage.StorageLevel} like * {@link io.atomix.copycat.server.storage.StorageLevel#DISK DISK} or {@link io.atomix.copycat.server.storage.StorageLevel#MAPPED MAPPED} * then the provided membership list only applies to the first time the replica is started. Once the replica * has been started and joined with other members of the cluster, the updated cluster configuration will be * stored on disk, and if the replica crashes and is restarted it will use the persisted configuration rather * than the user-provided configuration. This behavior cannot be overridden. * * @param clientAddress The address through which clients connect to the replica. * @param serverAddress The address through which other replicas connect to the replica. * @return The replica builder. */ public static Builder builder(Address clientAddress, Address serverAddress, Properties properties) { ServerOptions options = new ServerOptions(properties); return new Builder(clientAddress, serverAddress) .withTransport(options.transport()) .withResourceTypes(options.resourceTypes()) .withStorage(Storage.builder() .withStorageLevel(options.storageLevel()) .withDirectory(options.storageDirectory()) .withMaxSegmentSize(options.maxSegmentSize()) .withMaxEntriesPerSegment(options.maxEntriesPerSegment()) .withRetainStaleSnapshots(options.retainStaleSnapshots()) .withCompactionThreads(options.compactionThreads()) .withMinorCompactionInterval(options.minorCompactionInterval()) .withMajorCompactionInterval(options.majorCompactionInterval()) .withCompactionThreshold(options.compactionThreshold()) .build()) .withSerializer(options.serializer()) .withElectionTimeout(options.electionTimeout()) .withHeartbeatInterval(options.heartbeatInterval()) .withSessionTimeout(options.sessionTimeout()); } private final ResourceServer server; private final ClusterManager clusterManager; private AtomixReplica(ResourceClient client, ResourceServer server, ClusterManager clusterManager) { super(client); this.server = Assert.notNull(server, "server"); this.clusterManager = Assert.notNull(clusterManager, "clusterManager"); } /** * Returns the underlying {@link ResourceServer}. * * @return the underlying {@link ResourceServer}. */ public ResourceServer server() { return server; } /** * Returns the replica type. * <p> * The replica type defines how the replica behaves within the Atomix cluster. {@link Type#ACTIVE} and * {@link Type#PASSIVE} replicas are stateful and participate in replication of state changes within the * cluster at different levels. {@link Type#RESERVE} replicas are stateless. * * @return The replica type. */ public Type type() { Member.Type type = server.server().cluster().member().type(); if (type == null || type == Member.Type.INACTIVE) { return null; } return Type.valueOf(type.name()); } /** * Bootstraps a single-node cluster. * <p> * Bootstrapping a single-node cluster results in the server forming a new cluster to which additional servers * can be joined. * <p> * Only {@link Member.Type#ACTIVE} members can be included in a bootstrap configuration. If the local server is * not initialized as an active member, it cannot be part of the bootstrap configuration for the cluster. * <p> * When the cluster is bootstrapped, the local server will be transitioned into the active state and begin * participating in the Raft consensus algorithm. When the cluster is first bootstrapped, no leader will exist. * The bootstrapped members will elect a leader amongst themselves. Once a cluster has been bootstrapped, additional * members may be {@link #join(Address...) joined} to the cluster. In the event that the bootstrapped members cannot * reach a quorum to elect a leader, bootstrap will continue until successful. * <p> * It is critical that all servers in a bootstrap configuration be started with the same exact set of members. * Bootstrapping multiple servers with different configurations may result in split brain. * <p> * The {@link CompletableFuture} returned by this method will be completed once the cluster has been bootstrapped, * a leader has been elected, and the leader has been notified of the local server's client configurations. * * @return A completable future to be completed once the cluster has been bootstrapped. */ @SuppressWarnings("unchecked") public CompletableFuture<AtomixReplica> bootstrap() { return bootstrap(Collections.EMPTY_LIST); } /** * Bootstraps the cluster using the provided cluster configuration. * <p> * Bootstrapping the cluster results in a new cluster being formed with the provided configuration. The initial * nodes in a cluster must always be bootstrapped. This is necessary to prevent split brain. If the provided * configuration is empty, the local server will form a single-node cluster. * <p> * Only {@link Member.Type#ACTIVE} members can be included in a bootstrap configuration. If the local server is * not initialized as an active member, it cannot be part of the bootstrap configuration for the cluster. * <p> * When the cluster is bootstrapped, the local server will be transitioned into the active state and begin * participating in the Raft consensus algorithm. When the cluster is first bootstrapped, no leader will exist. * The bootstrapped members will elect a leader amongst themselves. Once a cluster has been bootstrapped, additional * members may be {@link #join(Address...) joined} to the cluster. In the event that the bootstrapped members cannot * reach a quorum to elect a leader, bootstrap will continue until successful. * <p> * It is critical that all servers in a bootstrap configuration be started with the same exact set of members. * Bootstrapping multiple servers with different configurations may result in split brain. * <p> * The {@link CompletableFuture} returned by this method will be completed once the cluster has been bootstrapped, * a leader has been elected, and the leader has been notified of the local server's client configurations. * * @param cluster The bootstrap cluster configuration. * @return A completable future to be completed once the cluster has been bootstrapped. */ public CompletableFuture<AtomixReplica> bootstrap(Address... cluster) { return bootstrap(Arrays.asList(cluster)); } /** * Bootstraps the cluster using the provided cluster configuration. * <p> * Bootstrapping the cluster results in a new cluster being formed with the provided configuration. The initial * nodes in a cluster must always be bootstrapped. This is necessary to prevent split brain. If the provided * configuration is empty, the local server will form a single-node cluster. * <p> * Only {@link Member.Type#ACTIVE} members can be included in a bootstrap configuration. If the local server is * not initialized as an active member, it cannot be part of the bootstrap configuration for the cluster. * <p> * When the cluster is bootstrapped, the local server will be transitioned into the active state and begin * participating in the Raft consensus algorithm. When the cluster is first bootstrapped, no leader will exist. * The bootstrapped members will elect a leader amongst themselves. Once a cluster has been bootstrapped, additional * members may be {@link #join(Address...) joined} to the cluster. In the event that the bootstrapped members cannot * reach a quorum to elect a leader, bootstrap will continue until successful. * <p> * It is critical that all servers in a bootstrap configuration be started with the same exact set of members. * Bootstrapping multiple servers with different configurations may result in split brain. * <p> * The {@link CompletableFuture} returned by this method will be completed once the cluster has been bootstrapped, * a leader has been elected, and the leader has been notified of the local server's client configurations. * * @param cluster The bootstrap cluster configuration. * @return A completable future to be completed once the cluster has been bootstrapped. */ public CompletableFuture<AtomixReplica> bootstrap(Collection<Address> cluster) { return server.bootstrap(cluster) .thenCompose(v -> clusterManager.start(server.server().cluster(), this)) .thenCompose(v -> client.connect(cluster)) .thenApply(v -> this); } /** * Joins the cluster. * <p> * Joining the cluster results in the local server being added to an existing cluster that has already been * bootstrapped. The provided configuration will be used to connect to the existing cluster and submit a join * request. Once the server has been added to the existing cluster's configuration, the join operation is complete. * <p> * Any {@link Member.Type type} of server may join a cluster. In order to join a cluster, the provided list of * bootstrapped members must be non-empty and must include at least one active member of the cluster. If no member * in the configuration is reachable, the server will continue to attempt to join the cluster until successful. If * the provided cluster configuration is empty, the returned {@link CompletableFuture} will be completed exceptionally. * <p> * When the server joins the cluster, the local server will be transitioned into its initial state as defined by * the configured {@link Member.Type}. Once the server has joined, it will immediately begin participating in * Raft and asynchronous replication according to its configuration. * <p> * It's important to note that the provided cluster configuration will only be used the first time the server attempts * to join the cluster. Thereafter, in the event that the server crashes and is restarted by {@code join}ing the cluster * again, the last known configuration will be used assuming the server is configured with persistent storage. Only when * the server leaves the cluster will its configuration and log be reset. * <p> * In order to preserve safety during configuration changes, Copycat leaders do not allow concurrent configuration * changes. In the event that an existing configuration change (a server joining or leaving the cluster or a * member being {@link Member#promote() promoted} or {@link Member#demote() demoted}) is under way, the local * server will retry attempts to join the cluster until successful. If the server fails to reach the leader, * the join will be retried until successful. * * @param cluster A collection of cluster member addresses to join. * @return A completable future to be completed once the local server has joined the cluster. */ public CompletableFuture<AtomixReplica> join(Address... cluster) { return join(Arrays.asList(cluster)); } /** * Joins the cluster. * <p> * Joining the cluster results in the local server being added to an existing cluster that has already been * bootstrapped. The provided configuration will be used to connect to the existing cluster and submit a join * request. Once the server has been added to the existing cluster's configuration, the join operation is complete. * <p> * Any {@link Member.Type type} of server may join a cluster. In order to join a cluster, the provided list of * bootstrapped members must be non-empty and must include at least one active member of the cluster. If no member * in the configuration is reachable, the server will continue to attempt to join the cluster until successful. If * the provided cluster configuration is empty, the returned {@link CompletableFuture} will be completed exceptionally. * <p> * When the server joins the cluster, the local server will be transitioned into its initial state as defined by * the configured {@link Member.Type}. Once the server has joined, it will immediately begin participating in * Raft and asynchronous replication according to its configuration. * <p> * It's important to note that the provided cluster configuration will only be used the first time the server attempts * to join the cluster. Thereafter, in the event that the server crashes and is restarted by {@code join}ing the cluster * again, the last known configuration will be used assuming the server is configured with persistent storage. Only when * the server leaves the cluster will its configuration and log be reset. * <p> * In order to preserve safety during configuration changes, Copycat leaders do not allow concurrent configuration * changes. In the event that an existing configuration change (a server joining or leaving the cluster or a * member being {@link Member#promote() promoted} or {@link Member#demote() demoted}) is under way, the local * server will retry attempts to join the cluster until successful. If the server fails to reach the leader, * the join will be retried until successful. * * @param cluster A collection of cluster member addresses to join. * @return A completable future to be completed once the local server has joined the cluster. */ public CompletableFuture<AtomixReplica> join(Collection<Address> cluster) { return server.join(cluster) .thenCompose(v -> clusterManager.start(server.server().cluster(), this)) .thenCompose(v -> client.connect(cluster)) .thenApply(v -> this); } /** * Shuts down the server without leaving the Copycat cluster. * * @return A completable future to be completed once the server has been shutdown. */ public CompletableFuture<Void> shutdown() { CompletableFuture<Void> future = new CompletableFuture<>(); clusterManager.stop(server.server().cluster(), this) .whenComplete((managerResult, managerError) -> { client.close().whenComplete((clientResult, clientError) -> { server.shutdown().whenComplete((serverResult, serverError) -> { if (managerError != null) { future.completeExceptionally(managerError); } else if (clientError != null) { future.completeExceptionally(clientError); } else if (serverError != null) { future.completeExceptionally(serverError); } else { future.complete(null); } }); }); }); return future; } /** * Leaves the Copycat cluster. * * @return A completable future to be completed once the server has left the cluster. */ public CompletableFuture<Void> leave() { CompletableFuture<Void> future = new CompletableFuture<>(); clusterManager.stop(server.server().cluster(), this) .whenComplete((managerResult, managerError) -> { client.close().whenComplete((clientResult, clientError) -> { server.leave().whenComplete((serverResult, serverError) -> { if (managerError != null) { future.completeExceptionally(managerError); } else if (clientError != null) { future.completeExceptionally(clientError); } else if (serverError != null) { future.completeExceptionally(serverError); } else { future.complete(null); } }); }); }); return future; } /** * Builder for programmatically constructing an {@link AtomixReplica}. * <p> * The replica builder configures an {@link AtomixReplica} to listen for connections from clients and other * servers/replica, connect to other servers in a cluster, and manage a replicated log. To create a replica builder, * use the {@link #builder(Address)} method: * <pre> * {@code * Atomix replica = AtomixReplica.builder(address, members) * .withTransport(new NettyTransport()) * .withStorage(Storage.builder() * .withDirectory("logs") * .withStorageLevel(StorageLevel.MAPPED) * .build()) * .build(); * } * </pre> * The two most essential components of the builder are the {@link Transport} and {@link Storage}. The * transport provides the mechanism for the replica to communicate with clients and other replicas in the * cluster. All servers, clients, and replicas must implement the same {@link Transport} type. The {@link Storage} * module configures how the replica manages the replicated log. Logs can be written to disk or held in * memory or memory-mapped files. */ public static class Builder implements io.atomix.catalyst.util.Builder<AtomixReplica> { private final Address clientAddress; private final CopycatClient.Builder clientBuilder; private final CopycatServer.Builder serverBuilder; private final ResourceRegistry registry = new ResourceRegistry(RESOURCES); private Transport clientTransport; private Transport serverTransport; private ClusterManager clusterManager; private LocalServerRegistry localRegistry = new LocalServerRegistry(); private Builder(Address clientAddress, Address serverAddress) { Serializer serializer = new Serializer(); this.clientAddress = Assert.notNull(clientAddress, "clientAddress"); this.clientBuilder = CopycatClient.builder() .withSerializer(serializer.clone()) .withServerSelectionStrategy(ServerSelectionStrategies.ANY) .withConnectionStrategy(ConnectionStrategies.FIBONACCI_BACKOFF) .withRecoveryStrategy(RecoveryStrategies.RECOVER); this.serverBuilder = CopycatServer.builder(clientAddress, serverAddress).withSerializer(serializer.clone()); } /** * Sets the server member type. * * @param type The server member type. * @return The replica builder. */ public Builder withType(Type type) { serverBuilder.withType(Member.Type.valueOf(Assert.notNull(type, "type").name())); return this; } /** * Sets the replica transport, returning the replica builder for method chaining. * <p> * The configured transport should be the same transport as all other nodes in the cluster. * If no transport is explicitly provided, the instance will default to the {@code NettyTransport} * if available on the classpath. * * @param transport The replica transport. * @return The replica builder. * @throws NullPointerException if {@code transport} is null */ public Builder withTransport(Transport transport) { this.serverTransport = Assert.notNull(transport, "transport"); return this; } /** * Sets the client transport, returning the server builder for method chaining. * <p> * The configured transport should be the same transport as all clients. * If no transport is explicitly provided, the instance will default to the {@code NettyTransport} * if available on the classpath. * * @param transport The server transport. * @return The server builder. * @throws NullPointerException if {@code transport} is null */ public Builder withClientTransport(Transport transport) { this.clientTransport = Assert.notNull(transport, "transport"); return this; } /** * Sets the server transport, returning the server builder for method chaining. * <p> * The configured transport should be the same transport as all other servers in the cluster. * If no transport is explicitly provided, the instance will default to the {@code NettyTransport} * if available on the classpath. * * @param transport The server transport. * @return The server builder. * @throws NullPointerException if {@code transport} is null */ public Builder withServerTransport(Transport transport) { this.serverTransport = Assert.notNull(transport, "transport"); return this; } /** * Sets the serializer, returning the replica builder for method chaining. * <p> * The serializer will be used to serialize and deserialize operations that are sent over the wire. * * @param serializer The serializer. * @return The replica builder. * @throws NullPointerException if {@code serializer} is null */ public Builder withSerializer(Serializer serializer) { clientBuilder.withSerializer(serializer); serverBuilder.withSerializer(serializer); return this; } /** * Sets the replica cluster manager. * * @param clusterManager The replica cluster manager. * @return The replica builder. */ public Builder withClusterManager(ClusterManager clusterManager) { this.clusterManager = Assert.notNull(clusterManager, "clusterManager"); return this; } /** * Sets the replica storage module, returning the replica builder for method chaining. * <p> * The storage module is the interface the replica will use to store the persistent replicated log. * For simple configurations, users can simply construct a {@link Storage} object: * <pre> * {@code * Atomix replica = AtomixReplica.builder(address, members) * .withStorage(new Storage("logs")) * .build(); * } * </pre> * Users can configure how state changes are stored on the replica by setting the * {@link io.atomix.copycat.server.storage.StorageLevel}. Use the * {@link io.atomix.copycat.server.storage.Storage.Builder} for the greatest flexibility in configuring * the replica's storage layer: * <pre> * {@code * Atomix replica = AtomixReplica.builder(address, members) * .withStorage(Storage.builder() * .withDirectory("logs") * .withStorageLevel(StorageLevel.MAPPED) * .withCompactionThreads(2) * .build()) * .build(); * } * </pre> * For the greatest safety, it's recommended that users use the * {@link io.atomix.copycat.server.storage.StorageLevel#DISK DISK} storage level. Alternatively, the * {@link io.atomix.copycat.server.storage.StorageLevel#MAPPED MAPPED} storage level provides significantly * greater performance without significantly relaxing safety. The {@link io.atomix.copycat.server.storage.StorageLevel#MEMORY MEMORY} * storage level is not recommended for production. For replicas that use the {@code MEMORY} storage level, * Atomix cannot guarantee writes will not be lost of a majority of the cluster is lost. * * @param storage The replica storage module. * @return The replica builder. * @throws NullPointerException if {@code storage} is null */ public Builder withStorage(Storage storage) { serverBuilder.withStorage(storage); return this; } /** * Sets the replica election timeout, returning the replica builder for method chaining. * <p> * The election timeout is the duration since last contact with the cluster leader after which * the replica should start a new election. The election timeout should always be significantly * larger than {@link #withHeartbeatInterval(Duration)} in order to prevent unnecessary elections. * * @param electionTimeout The replica election timeout. * @return The replica builder. * @throws NullPointerException if {@code electionTimeout} is null */ public Builder withElectionTimeout(Duration electionTimeout) { serverBuilder.withElectionTimeout(electionTimeout); return this; } /** * Sets the replica heartbeat interval, returning the replica builder for method chaining. * <p> * The heartbeat interval is the interval at which the replica, if elected leader, should contact * other replicas within the cluster to maintain its leadership. The heartbeat interval should * always be some fraction of {@link #withElectionTimeout(Duration)}. * * @param heartbeatInterval The replica heartbeat interval. * @return The replica builder. * @throws NullPointerException if {@code heartbeatInterval} is null */ public Builder withHeartbeatInterval(Duration heartbeatInterval) { serverBuilder.withHeartbeatInterval(heartbeatInterval); return this; } /** * Sets the replica session timeout, returning the replica builder for method chaining. * <p> * The session timeout is assigned by the replica to a client which opens a new session. The session timeout * dictates the interval at which the client must send keep-alive requests to the cluster to maintain its * session. If a client fails to communicate with the cluster for larger than the configured session * timeout, its session may be expired. * * @param sessionTimeout The replica session timeout. * @return The replica builder. * @throws NullPointerException if {@code sessionTimeout} is null */ public Builder withSessionTimeout(Duration sessionTimeout) { clientBuilder.withSessionTimeout(sessionTimeout); serverBuilder.withSessionTimeout(sessionTimeout); return this; } /** * Sets the replica's global suspend timeout. * <p> * The global suspend timeout is an advanced configuration option that controls how long a leader * waits for a partitioned follower to rejoin the cluster before forcing that follower to truncate * its logs. Because of various consistency issues, followers must be forced to truncate their logs * after crashing or being partitioned for a lengthy amount of time in order to allow the cluster to * progress. Specifically, in Atomix while a follower is partitioned replicas cannot compact their * logs of certain types of commits like resource deletes and other tombstones. * <p> * By default, replicas will wait at least an hour for a partitioned follower to rejoin the cluster * before advancing compaction and forcing the follower to truncate its logs once the partition heals. * However, for clusters that have a surplus of replicas, Atomix may replace a partitioned follower * with a connected follower anyways, so this option frequently does not apply to larger clusters. * * @param globalSuspendTimeout The global suspend timeout. * @return The replica builder. * @throws NullPointerException if {@code globalSuspendTimeout} is null */ public Builder withGlobalSuspendTimeout(Duration globalSuspendTimeout) { serverBuilder.withGlobalSuspendTimeout(globalSuspendTimeout); return this; } /** * Sets the available resource types. * * @param types The available resource types. * @return The replica builder. */ public Builder withResourceTypes(Class<? extends Resource<?>>... types) { if (types != null) { return withResourceTypes(Arrays.asList(types).stream().map(ResourceType::new).collect(Collectors.toList())); } return this; } /** * Sets the available resource types. * * @param types The available resource types. * @return The replica builder. */ public Builder withResourceTypes(ResourceType... types) { if (types != null) { return withResourceTypes(Arrays.asList(types)); } return this; } /** * Sets the available resource types. * * @param types The available resource types. * @return The replica builder. */ public Builder withResourceTypes(Collection<ResourceType> types) { types.forEach(registry::register); return this; } /** * Adds a resource type to the replica. * * @param type The resource type. * @return The replica builder. */ public Builder addResourceType(Class<? extends Resource<?>> type) { return addResourceType(new ResourceType(type)); } /** * Adds a resource type to the replica. * * @param type The resource type. * @return The replica builder. */ public Builder addResourceType(ResourceType type) { registry.register(type); return this; } /** * Builds the replica. * <p> * If no {@link Transport} was configured for the replica, the builder will attempt to create a * {@code NettyTransport} instance. If {@code io.atomix.catalyst.transport.netty.NettyTransport} is not available * on the classpath, a {@link ConfigurationException} will be thrown. * <p> * Once the replica is built, it is not yet connected to the cluster. To connect the replica to the cluster, * call the asynchronous {@link #bootstrap()} method. * * @return The built replica. * @throws ConfigurationException if the replica is misconfigured */ @Override public AtomixReplica build() { // If no transport was configured by the user, attempt to load the Netty transport. if (serverTransport == null) { try { serverTransport = (Transport) Class.forName("io.atomix.catalyst.transport.netty.NettyTransport").newInstance(); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { throw new ConfigurationException("transport not configured"); } } // Configure the client and server with a transport that routes all local client communication // directly through the local server, ensuring we don't incur unnecessary network traffic by // sending operations to a remote server when a local server is already available in the same JVM.= clientBuilder.withTransport(new CombinedClientTransport(clientAddress, new LocalTransport(localRegistry), clientTransport != null ? clientTransport : serverTransport)) .withServerSelectionStrategy(new CombinedSelectionStrategy(clientAddress)); CopycatClient client = clientBuilder.build(); client.serializer().resolve(new ResourceManagerTypeResolver()); // Iterate through registered resource types and register serializable types on the client serializer. for (ResourceType type : registry.types()) { try { type.factory().newInstance().createSerializableTypeResolver().resolve(client.serializer().registry()); } catch (InstantiationException | IllegalAccessException e) { throw new ResourceManagerException(e); } } // Create a default cluster manager if none was specified. ClusterManager clusterManager = this.clusterManager != null ? this.clusterManager : new ClusterManager() { @Override public CompletableFuture<Void> start(Cluster cluster, AtomixReplica replica) { return CompletableFuture.completedFuture(null); } @Override public CompletableFuture<Void> stop(Cluster cluster, AtomixReplica replica) { return CompletableFuture.completedFuture(null); } }; // Construct the underlying CopycatServer. The server should have been configured with a CombinedTransport // that facilitates the local client connecting directly to the server. if (clientTransport != null) { serverBuilder.withClientTransport(new CombinedServerTransport(new LocalTransport(localRegistry), clientTransport)) .withServerTransport(serverTransport); } else { serverBuilder.withTransport(new CombinedServerTransport(new LocalTransport(localRegistry), serverTransport)); } // Set the server resource state machine. serverBuilder.withStateMachine(ResourceManagerState::new); CopycatServer server = serverBuilder.build(); server.serializer().resolve(new ResourceManagerTypeResolver()); // Iterate through registered resource types and register serializable types on the server serializer. for (ResourceType type : registry.types()) { try { type.factory().newInstance().createSerializableTypeResolver().resolve(server.serializer().registry()); } catch (InstantiationException | IllegalAccessException e) { throw new ResourceManagerException(e); } } return new AtomixReplica(new ResourceClient(new CombinedCopycatClient(client, serverTransport)), new ResourceServer(server), clusterManager); } } /** * Copycat client wrapper. */ private static final class CombinedCopycatClient implements CopycatClient { private final CopycatClient client; private final Transport transport; CombinedCopycatClient(CopycatClient client, Transport transport) { this.client = Assert.notNull(client, "client"); this.transport = Assert.notNull(transport, "transport"); } @Override public State state() { return client.state(); } @Override public Listener<State> onStateChange(Consumer<State> consumer) { return client.onStateChange(consumer); } @Override public ThreadContext context() { return client.context(); } @Override public Transport transport() { return transport; } @Override public Serializer serializer() { return client.serializer(); } @Override public Session session() { return client.session(); } @Override public <T> CompletableFuture<T> submit(Command<T> command) { return client.submit(command); } @Override public <T> CompletableFuture<T> submit(Query<T> query) { return client.submit(query); } @Override public Listener<Void> onEvent(String event, Runnable callback) { return client.onEvent(event, callback); } @Override public <T> Listener<T> onEvent(String event, Consumer<T> callback) { return client.onEvent(event, callback); } @Override public CompletableFuture<CopycatClient> connect(Collection<Address> members) { return client.connect(members); } @Override public CompletableFuture<CopycatClient> recover() { return client.recover(); } @Override public CompletableFuture<Void> close() { return client.close(); } @Override public String toString() { return client.toString(); } } /** * Combined server selection strategy. */ private static class CombinedSelectionStrategy implements ServerSelectionStrategy { private final Address address; private CombinedSelectionStrategy(Address address) { this.address = address; } @Override public List<Address> selectConnections(Address leader, List<Address> servers) { List<Address> addresses = new ArrayList<>(servers.size()); addresses.add(address); Collections.shuffle(servers); for (Address address : servers) { if (!address.equals(this.address)) { addresses.add(address); } } return addresses; } } /** * Combined client transport. */ private static class CombinedClientTransport implements Transport { private final Address address; private final Transport local; private final Transport remote; private CombinedClientTransport(Address address, Transport local, Transport remote) { this.address = address; this.local = local; this.remote = remote; } @Override public Client client() { return new CombinedClient(address, local.client(), remote.client()); } @Override public Server server() { return remote.server(); } } /** * Combined client, */ private static class CombinedClient implements Client { private final Address address; private final Client local; private final Client remote; private CombinedClient(Address address, Client local, Client remote) { this.address = address; this.local = local; this.remote = remote; } @Override public CompletableFuture<Connection> connect(Address address) { if (this.address.equals(address)) { return local.connect(address); } return remote.connect(address); } @Override public CompletableFuture<Void> close() { return remote.close().thenRun(local::close); } } /** * Combined transport that aids in the local client communicating directly with the local server. */ private static class CombinedServerTransport implements Transport { private final Transport local; private final Transport remote; private CombinedServerTransport(Transport local, Transport remote) { this.local = local; this.remote = remote; } @Override public Client client() { return remote.client(); } @Override public Server server() { return new CombinedServer(local.server(), remote.server()); } } /** * Combined server that access connections from the local client directly. */ private static class CombinedServer implements Server { private final Server local; private final Server remote; private CombinedServer(Server local, Server remote) { this.local = local; this.remote = remote; } @Override public CompletableFuture<Void> listen(Address address, Consumer<Connection> listener) { Assert.notNull(address, "address"); Assert.notNull(listener, "listener"); return local.listen(address, listener).thenCompose(v -> remote.listen(address, listener)); } @Override public CompletableFuture<Void> close() { return local.close().thenCompose(v -> remote.close()); } } }