/** * Copyright (c) 2015 The original author or authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.reveno.atp.clustering.api; import java.util.List; public interface ClusterConfiguration { /** * The network address of the current node and unique Node ID. * Depending on the concrete {@link Cluster} and {@link ClusterBuffer} implementation, * used in current Reveno instance, this host and port will be used for tcp binding, * and Node ID for unique representation of the current node with multicast failover. * * @param host of the current node in cluster * @param port of the current node in cluster * @param nodeId unqiue node identificator in among cluster */ void currentNodeAddress(String host, String port, String nodeId); /** * The network address of the current node and unique Node ID. * Depending on the concrete {@link org.reveno.atp.clustering.core.buffer.ClusterProvider} implementation, * used in current Reveno instance, this host and port will be used for tcp binding, * and Node ID for unique representation of the current node with multicast failover. * * @param host of the current node in cluster * @param port of the current node in cluster * @param nodeId unqiue node identificator in among cluster * @param mode of IO between other nodes */ void currentNodeAddress(String host, String port, String nodeId, IOMode mode); /** * Abstract Address which identifies current node in cluster. It should be * provided in the format, which currently used {@link org.reveno.atp.clustering.core.buffer.ClusterProvider} * can understand it. * * @param nodeAddress of current node in Reveno cluster */ void currentNodeAddress(Address nodeAddress); /** * List of other parties network addresses. * @param addresses */ void nodesInetAddresses(List<InetAddress> addresses); /** * List of other parties addresses. * @param nodeAddresses */ void nodesAddresses(List<Address> nodeAddresses); /** * Denotes the style of network communication which will be used between * all nodes in cluster to transfer commands from Master to Slaves. Currently, * it supports two possible values: UNICAST and MULTICAST. You should also note, that * with any value used here, all communications between nodes are reliable, means the loss or * corruption of data is always processed according to the situation, guaranteeing ordering, * data integrity, and etc. * * UNICAST is a one-to one connection between all hosts in cluster. It uses TCP NIO2 under * the hood. It is most reliable option since TCP by itself provide ordering and ack capabilities, * but in some environments it might scale not so well. * * MULTICAST works like broadcast, with differences like subscription basis, etc. It uses UDP protocol, * which means that no connections to other parties will be opened, and the packet will be sent only once, * routed to many. With good network configuration, it scales much better than TCP. But be aware, that usually * multicast connections are not possible in internet environment. * * Read more about all options on http://reveno.org * * @param transport type to be used for commands transfer. */ void commandsXmitTransport(CommandsXmitTransport transport); /** * Multicast configurations, which will be used in case of {@link #commandsXmitTransport(CommandsXmitTransport)} * was set to MULTICAST. * @return multicast configuration */ MulticastConfiguration multicast(); /** * Unicast configrations, which will be used in cast of {@link #commandsXmitTransport(CommandsXmitTransport)} * was set to UNICAST. * @return unicast configuration */ UnicastConfiguration unicast(); /** * Timeouts which are used during Leadership Election process, which is started usually by membership * changes, or some unexpected critical situations. * @return timeouts configuration */ TimeoutsConfiguration electionTimeouts(); /** * Data Synchronization configurations. Data Synchronization is a process, which is issued at some stage * of Leadership Election process. First, nodes sends to each other their last transaction IDs processed. * After that, somes might discover that their internal state is not the last in cluster. In that case, * they ask the node with the most actual state to share last transactions / snapshot, efficiently syncing * with it. After all nodes share the same state, they can continue to operate. * * @return sync configuration */ SyncConfiguration dataSync(); void authToken(String authToken); /** * Priority in cluster, which allows to effectively foresee next elected Masters in case * of failure of previous ones. The lower value, the more possibility that this node will be * elected as Master in next election round. * * @param priority of the node */ void priorityInCluster(int priority); interface TimeoutsConfiguration { void voteTimeoutNanos(long timeout); void syncTimeoutNanos(long timeout); void ackTimeoutNanos(long timeout); void barrierTimeoutNanos(long timeout); void syncBarrierTimeoutNanos(long timeout); } interface SyncConfiguration { /** * A mode in which two nodes will perform synchronization during Leadership Election. First option means that * all nodes makes snapshots in advance and then just send it by request from other nodes. * Second option is when the node sends required Journal files to other nodes, which contains events with * transaction ID more or equals to current node last transaction ID. * * First option is prefered, but is not recommended for big models, because of size of snapshot * and costs spent to make it for each Leadership Election process. * * Second option is generally not recommended if there is a big amount of Journal files or rolling * is performed very rarely, means big files will be always transmitted. * * Default value is SyncMode.SNAPSHOT * @param mode */ void mode(SyncMode mode); /** * Thread pool size of Sync Server, which handles synchronization requests and sends appropriate * data. * @param threads to be used */ void threadPoolSize(int threads); /** * Number of retries to transmit state data until it fails. * @param count */ void retries(int count); /** * Port on which Sync Server will be listening. * @param port */ void port(int port); /** * The synchronization process lasts during global Leadership Election process, which means * no node during it can operate on production. Sometimes, it is obvious that Master * candidate has the latest state, and can start to operate not awaiting other Slaves to catch up with * its changes. * In that case, if waitAllNodesSync is set to true, it will start to handle Commands normally * during "sync" stage of Leadership Election process. * * Warning!!! You should consider using that flag *very* carefully, as it means that all failover data * from Master will not be handled by Slaves as usual, but just buffered in memory, until they finish syncing. * And if synchronization process takes too long, some data might be even lost. Use it primarily if * you want Master to start operate as soon as possible (millisends matters) and you know that sync process * either not happens at all or will be very short. * * @param wait */ void waitAllNodesSync(boolean wait); } interface MulticastConfiguration { /** * Multicast host. * @param host */ void host(String host); /** * Multicast port. * @param port */ void port(int port); /** * Network interface to be used by Multicast socket. Default is lo. * @param netInterface */ void netInterface(String netInterface); /** * Socket receive buffer size. Default is 1MB. * @param size */ void receiveBufferSize(int size); /** * Socket send buffer size. Default is 1MB. * @param size */ void sendBufferSize(int size); /** * Limits the amount of packets per seconds that might be sent to multicast socket * directly. You should configure that according to your network capabilities. * Please note, that it is the same as "Commands per second" in * Reveno terms only if {@link #preferBatchingToLatency(boolean)} is set to {@code false}. * * Default value is 3k. * @param pps */ void packetsPerSecond(int pps); /** * Microseconds during which busy spin loop awaiting new packets will be performed. * 0 - blocking mode. * * Default value is 0. * @param micros */ void spinLoopMicros(int micros); /** * Microseconds during which LockSupport.parkNanos() will be called in busy spin loop. * If {@link #spinLoopMicros(int)} is set to 0, this setting makes no sense. * @param micros */ void threadParkMicros(int micros); /** * The number of packets which will be persistently stored in offheap memory for * NAK retransmition purpose. For example, if pps is 3,000, and packet history is 9,000 then * packets for the last 3 seconds might be retransmitted again on request. * @param packets */ void retransmitPacketsHistory(int packets); /** * For lowest latency choose a packet size slightly lower than netork MTU. * For high throughput choose larger packet sizes (up to 65k). Downside of large packet sizes is, * that a packet gap has worse effects (because e.g. 64k need to be retransmitted instead of just * 1k). As history and receive buffers reserve N*full packet size number of bytes, large packets * also increase required memory to hold buffers. Its good practice to choose multiples of MTU * for packet sizes, though its not that significant. Usual values are 1.5k, 3k, 8k, 16k . 64k * are also a possible setting (but large buffers). Recommendation is 4k to 8k. For low latency * requirements set small mtu sizes on your network adapter and a packet size fitting into a * single mtu size. * * Default is 1k. * @param size */ void packetSize(int size); /** * Setting that denotes whether to flush every on every Command processed or to buffer it * until max packet size is reached. Set {@code false} for real low latency and stronger failover * guarantees. * * Default value is true. * @param batching */ void preferBatchingToLatency(boolean batching); /** * Milticast transport TTL (Time to live). * * Read more about it here: http://www.tldp.org/HOWTO/Multicast-HOWTO-2.html * * @param ttl */ void ttl(int ttl); /** * An amount of retries to send in case of PPS limit reached, send buffers full, * or initialization is not yet fully happend. * * Default value is 15. * @param retries */ void sendRetries(int retries); } interface UnicastConfiguration { /** * Socket receive buffer size. Default 1MB. * @param size */ void receiveBufferSize(int size); /** * Socket send buffer size. Default 1MB. * @param size */ void sendBufferSize(int size); /** * Timeout for ping heartbeat and discovery requests. * Defaults to 5 seconds. * @param pingTimeout */ void pingTimeoutMillis(int pingTimeout); /** * Minimum number of threads to be used for messages handling. * Default is 1. * @param threads */ void minReceiveThreads(int threads); /** * Maximum number of threads to be used for messages handling. * Default is 1. * @param threads */ void maxReceiveThreads(int threads); /** * The size of the queue which will be used when all handling threads * are busy. 0 disables it. * * Default is 0. * @param size */ void receiveQueueMaxSize(int size); /** * Max number of messages a read will try to read from the socket. * Setting this to a higher value will increase speed when receiving a lot of messages. * However, when the receive message rate is small, then every read will create an array of * max_read_batch_size messages. * * Default is 10. * @param size */ void maxReadBatchMessages(int size); /** * Interval (in milliseconds) at which messages in the send windows are resent. * * Default is 1 sec. * @param millis */ void retransmitIntervalMillis(int millis); /** * Max number of milliseconds we try to retransmit a message to any given member. * After that, the connection is removed. 0 disables this * * Default is 3 sec. * @param millis */ void maxRetransmitTimeMillis(int millis); } enum CommandsXmitTransport { UNICAST, MULTICAST } }