/**
* Copyright (c) 2002-2012 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.cluster.protocol.atomicbroadcast.multipaxos;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import org.neo4j.cluster.com.message.Message;
import org.neo4j.cluster.com.message.MessageProcessor;
import org.neo4j.cluster.protocol.cluster.ClusterMessage;
import org.neo4j.cluster.statemachine.State;
/**
* State machine for Paxos Proposer
*/
public enum ProposerState
implements State<MultiPaxosContext, ProposerMessage>
{
start
{
@Override
public ProposerState handle( MultiPaxosContext context,
Message<ProposerMessage> message,
MessageProcessor outgoing
)
throws Throwable
{
switch ( message.getMessageType() )
{
case join:
{
return proposer;
}
}
return this;
}
},
proposer
{
@Override
public ProposerState handle( MultiPaxosContext context,
Message<ProposerMessage> message,
MessageProcessor outgoing
)
throws Throwable
{
switch ( message.getMessageType() )
{
case propose:
{
Object payload = message.getPayload();
if ( payload instanceof ClusterMessage.ConfigurationChangeState )
{
ClusterMessage.ConfigurationChangeState state = message.getPayload();
List<URI> acceptors = context.getAcceptors();
// Never include node that is leaving
if ( state.getLeave() != null )
{
acceptors = new ArrayList<URI>( acceptors );
acceptors.remove( state.getLeave() );
}
propose( context, message, outgoing, payload, acceptors );
}
else
{
propose( context, message, outgoing, payload, context.getAcceptors() );
}
break;
}
case rejectPrepare:
{
// Denial of prepare
ProposerMessage.RejectPrepare rejectPropose = message.getPayload();
InstanceId instanceId = new InstanceId( message );
PaxosInstance instance = context.getPaxosInstances().getPaxosInstance( instanceId );
if ( instance.isState( PaxosInstance.State.p1_pending ) )
{
long ballot = instance.ballot;
while ( ballot <= rejectPropose.getBallot() )
{
ballot += 1000; // Make sure we win next time
}
instance.phase1Timeout( ballot, context.getAcceptors() );
for ( URI acceptor : instance.getAcceptors() )
{
if ( acceptor.equals( context.clusterContext.getMe() ) )
{
}
else
{
outgoing.process( message.copyHeadersTo( Message.to( AcceptorMessage.prepare,
acceptor, new AcceptorMessage.PrepareState( ballot ) ),
InstanceId.INSTANCE ) );
}
}
context.timeouts.setTimeout( instanceId, message.copyHeadersTo( Message.timeout(
ProposerMessage
.phase1Timeout, message ), InstanceId.INSTANCE ) );
}
break;
}
case phase1Timeout:
{
InstanceId instanceId = new InstanceId( message );
PaxosInstance instance = context.getPaxosInstances().getPaxosInstance( instanceId );
if ( instance.isState( PaxosInstance.State.p1_pending ) )
{
if ( instance.ballot > 10000 )
{
// Fail this propose
outgoing.process( Message.internal( AtomicBroadcastMessage.failed,
context.proposerContext.bookedInstances.get( instance.id ) ) );
}
else
{
long ballot = instance.ballot + 1000;
instance.phase1Timeout( ballot, context.getAcceptors() );
for ( URI acceptor : instance.getAcceptors() )
{
outgoing.process( message.copyHeadersTo( Message.to( AcceptorMessage.prepare,
acceptor, new AcceptorMessage.PrepareState( ballot ) ),
InstanceId.INSTANCE ) );
}
context.timeouts.setTimeout( instanceId, message.copyHeadersTo( Message.timeout(
ProposerMessage
.phase1Timeout, message ), InstanceId.INSTANCE ) );
}
}
break;
}
case promise:
{
// P
ProposerMessage.PromiseState promiseState = message.getPayload();
PaxosInstance instance = context.getPaxosInstances().getPaxosInstance( new InstanceId(
message ) );
if ( instance.isState( PaxosInstance.State.p1_pending ) && instance.ballot ==
promiseState.getBallot() )
{
instance.promise( promiseState );
if ( instance.promises.size() == context.getMinimumQuorumSize( instance.getAcceptors
() ) )
{
context.timeouts.cancelTimeout( instance.id );
// No promises contained a value
if ( instance.value_1 == null )
{
// R0
instance.ready( instance.value_2 == null ? context.proposerContext
.bookedInstances.get( instance.id ) : instance.value_2, true );
}
else
{
// R1
if ( instance.value_2 == null )
{
// Another value was already associated with this instance. Push value
// back onto pending list
context.proposerContext.pendingValues.offerFirst( context.proposerContext
.bookedInstances.remove( instance.id ) );
instance.ready( instance.value_1, false );
}
else if ( instance.value_1.equals( instance.value_2 == null ? context
.proposerContext.bookedInstances.get( instance.id ) : instance
.value_2 ) )
{
instance.ready( instance.value_2, instance.clientValue );
}
else if ( instance.clientValue )
{
// Another value was already associated with this instance. Push value
// back onto pending list
context.proposerContext.pendingValues.offerFirst( context.proposerContext
.bookedInstances.remove( instance.id ) );
instance.ready( instance.value_1, false );
}
else
{
// Another value was already associated with this instance. Push value
// back onto pending list
context.proposerContext.pendingValues.offerFirst( context.proposerContext
.bookedInstances.remove( instance.id ) );
instance.ready( instance.value_1, false );
}
}
// E: Send to Acceptors
instance.pending();
for ( URI acceptor : instance.getAcceptors() )
{
outgoing.process( message.copyHeadersTo( Message.to( AcceptorMessage.accept,
acceptor,
new AcceptorMessage.AcceptState( instance.ballot,
instance.value_2 ) ), InstanceId.INSTANCE ) );
}
context.timeouts.setTimeout( instance.id, message.copyHeadersTo( Message.timeout(
ProposerMessage
.phase2Timeout, message ), InstanceId.INSTANCE ) );
}
}
break;
}
case rejectAccept:
{
// This instance id has been used by another proposer - try next
ProposerMessage.RejectAcceptState state = message.getPayload();
InstanceId instanceId = new InstanceId( message );
PaxosInstance instance = context.getPaxosInstances().getPaxosInstance( instanceId );
context.timeouts.cancelTimeout( instanceId );
context.clusterContext.getLogger().debug( "Accept rejected:" + instance.state );
if ( instance.isState( PaxosInstance.State.p2_pending ) )
{
if ( instance.clientValue )
{
propose( context, message, outgoing, instance.value_2, instance.getAcceptors() );
}
instance.acceptRejected();
}
break;
}
case phase2Timeout:
{
InstanceId instanceId = new InstanceId( message );
PaxosInstance instance = context.getPaxosInstances().getPaxosInstance( instanceId );
if ( instance.isState( PaxosInstance.State.p2_pending ) )
{
long ballot = instance.ballot + 1000;
instance.phase2Timeout( ballot );
for ( URI acceptor : instance.getAcceptors() )
{
outgoing.process( message.copyHeadersTo( Message.to( AcceptorMessage.prepare,
acceptor, new AcceptorMessage.PrepareState( ballot ) ),
InstanceId.INSTANCE ) );
}
context.timeouts.setTimeout( instanceId, message.copyHeadersTo( Message.timeout(
ProposerMessage
.phase1Timeout, message ), InstanceId.INSTANCE ) );
}
break;
}
case accepted:
{
ProposerMessage.AcceptedState acceptedState = message.getPayload();
PaxosInstance instance = context.getPaxosInstances().getPaxosInstance( new InstanceId(
message ) );
if ( instance.isState( PaxosInstance.State.p2_pending ) )
{
instance.accepted( acceptedState );
// Value has been accepted! Now distribute to all learners
if ( instance.accepts.size() == context.getMinimumQuorumSize( instance.getAcceptors()
) )
{
context.timeouts.cancelTimeout( instance.id );
// Might have to extra-tell myself if not yet officially part of cluster
if ( instance.value_2 instanceof ClusterMessage.ConfigurationChangeState )
{
ClusterMessage.ConfigurationChangeState state = (ClusterMessage
.ConfigurationChangeState) instance.value_2;
// TODO getLearners might return wrong list if another join happens at the
// same time
// Proper fix is to wait with this learn until we have learned all previous
// configuration changes
for ( URI learner : context.getLearners() )
{
outgoing.process( message.copyHeadersTo( Message.to( LearnerMessage
.learn, learner,
new LearnerMessage.LearnState( instance.value_2 ) ),
InstanceId.INSTANCE ) );
}
// Tell joiner of this cluster configuration change
if ( state.getJoin() != null )
{
outgoing.process( message.copyHeadersTo( Message.to( LearnerMessage
.learn, state.getJoin(),
new LearnerMessage.LearnState( instance.value_2 ) ),
InstanceId.INSTANCE ) );
}
}
else
{
// Tell learners
for ( URI learner : context.getLearners() )
{
outgoing.process( message.copyHeadersTo( Message.to( LearnerMessage
.learn, learner,
new LearnerMessage.LearnState( instance.value_2 ) ),
InstanceId.INSTANCE ) );
}
}
context.proposerContext.bookedInstances.remove( instance.id );
// Check if we have anything pending - try to start process for it
if ( !context.proposerContext.pendingValues.isEmpty() && context.proposerContext
.bookedInstances.size() < MAX_CONCURRENT_INSTANCES )
{
Object value = context.proposerContext.pendingValues.remove();
context.clusterContext.getLogger().debug( "Restarting " + value + " booked:"
+ context.proposerContext.bookedInstances.size() );
outgoing.process( Message.internal( ProposerMessage.propose, value ) );
}
}
}
break;
}
case leave:
{
context.proposerContext.leave();
context.getPaxosInstances().leave();
return start;
}
}
return this;
}
};
public final int MAX_CONCURRENT_INSTANCES = 10;
private static void propose( MultiPaxosContext context, Message message, MessageProcessor outgoing,
Object payload, List<URI> acceptors )
{
InstanceId instanceId = context.proposerContext.newInstanceId( context.learnerContext
.getLastKnownLearnedInstanceInCluster() );
context.proposerContext.bookedInstances.put( instanceId, payload );
long ballot = 1000 + context.getServerId(); // First server will have first ballot id be 1001
PaxosInstance instance = context.getPaxosInstances().getPaxosInstance( instanceId );
if ( !(instance.isState( PaxosInstance.State.closed ) || instance.isState( PaxosInstance.State.delivered )) )
{
instance.propose( ballot, acceptors );
for ( URI acceptor : acceptors )
{
outgoing.process( Message.to( AcceptorMessage.prepare, acceptor, new AcceptorMessage.PrepareState(
ballot ) ).setHeader( InstanceId.INSTANCE, instanceId.toString() ) );
}
context.timeouts.setTimeout( instanceId, Message.timeout( ProposerMessage.phase1Timeout, message,
instanceId ).setHeader( InstanceId.INSTANCE, instanceId.toString() ) );
}
else
{
// Wait with this value - we have our hands full right now
context.proposerContext.pendingValues.offerFirst( payload );
}
}
}