/*
* Copyright (c) 2013 GigaSpaces Technologies Ltd. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package beans;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import akka.util.Duration;
import beans.config.Conf;
import com.avaje.ebean.Ebean;
import models.ServerNode;
import org.apache.commons.collections.Predicate;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import play.libs.Akka;
import server.*;
import utils.CollectionUtils;
import utils.StringUtils;
import javax.inject.Inject;
import javax.persistence.OptimisticLockException;
/**
* This class manages a server pool of available/busy bootstrapped machines(servers).
* A server pool initialize with configured minimum/maximum number of servers.
* The get() method returns a bootstrap server instance.
* On init() a server-pool deletes an expires or orphans servers.
* It also provides ability to startup with a cold init which deletes a running server.
*
* @author Igor Goldenberg
* @see ServerBootstrapperImpl
*/
public class ServerPoolImpl implements ServerPool
{
private static Logger logger = LoggerFactory.getLogger( ServerPoolImpl.class );
@Inject
private ServerBootstrapper serverBootstrapper;
// counts the machines that are undergoing "create" process.
// we need to count them as well when we want to know if pool is missing resources or not.
private AtomicInteger undergoingBootstrapCount = new AtomicInteger(0);
@Inject
private Conf conf;
private static Predicate nonRemotePredicate = new Predicate() {
@Override
public boolean evaluate( Object o )
{
ServerNode node = (ServerNode) o;
boolean result = !node.isRemote() && StringUtils.isEmptyOrSpaces(node.getAdvancedParams());
logger.debug("server [{}] is remote [{}] advanced params [{}] result [{}]", node.getId(), node.isRemote(), node.getAdvancedParams() , result );
return result;
}
};
private static Predicate busyServerPredicate = new Predicate() {
@Override
public boolean evaluate(Object o) {
return ((ServerNode)o).isBusy() && nonRemotePredicate.evaluate( o );
}
};
private static Predicate nonBusyServerPredicate = new Predicate() {
@Override
public boolean evaluate(Object o) {
return !((ServerNode)o).isBusy() && nonRemotePredicate.evaluate( o );
}
};
private static Predicate failedBootstrapsPredicate = new Predicate() {
@Override
public boolean evaluate(Object o) {
return ((ServerNode)o).getNodeId() == null && nonRemotePredicate.evaluate(o);
}
};
/**
*
* @param pool - the pool we need to clean
* @return - a clean pool
*/
private List<ServerNode> cleanPool( Collection<ServerNode> pool ){
if ( CollectionUtils.isEmpty( pool )){
return new LinkedList<ServerNode>( );
}
List<ServerNode> cleanPool = new LinkedList<ServerNode>( );
for ( ServerNode serverNode : pool ) {
BootstrapValidationResult bootstrapValidationResult = serverBootstrapper.validateBootstrap( serverNode );
if ( !bootstrapValidationResult.isValid( ) ){
logger.info( "found a bad bootstrap on server [{}]. The test result showed the following [{}]. I should destroy this server..", serverNode, bootstrapValidationResult );
destroy( serverNode );
}else{
logger.info( "Found a working management server [{}]:[{}], adding to clean pool", serverNode.getNodeId(), serverNode.getId() );
cleanPool.add( serverNode );
}
}
return cleanPool;
}
@Override
public void runHealthCheck() {
if ( !isPoolWillBeSaturated() ){
logger.debug("healthcheck :: creating more instance [{}]", getStats() );
addNewServerToPool(1); // lets create just one.
} else {
logger.debug("healthcheck :: not creating more instances [{}]", getStats() );
}
}
@Override
public void init()
{
logger.info( "recovering lost machines" );
List<ServerNode> lostMachines = serverBootstrapper.recoverUnmonitoredMachines();
if ( !CollectionUtils.isEmpty( lostMachines )){
logger.info( "found [{}] lost machines [{}]", CollectionUtils.size( lostMachines ), lostMachines );
Ebean.save( lostMachines );
}else{
logger.info( "no lost machines found" );
}
logger.info( "Started to initialize ServerPool, cold-init={}", conf.server.pool.coldInit );
// get all available running servers
List<ServerNode> servers = ServerNode.all();
logger.info("investigating [{}] servers", CollectionUtils.size(servers));
Collection<ServerNode> busyServer = CollectionUtils.select( servers, busyServerPredicate );
logger.info("I found {} busy servers", CollectionUtils.size(busyServer));
Collection<ServerNode> availableServer = CollectionUtils.select( servers, nonBusyServerPredicate );
availableServer = cleanPool( availableServer );
logger.info(" I have {} available servers, I need a minimum of {} and maximum of {}", new Object[]{ CollectionUtils.size(availableServer), conf.server.pool.minNode, conf.server.pool.maxNodes} );
// create new servers if need
if ( CollectionUtils.size( availableServer ) < conf.server.pool.minNode )
{
int serversToInit = conf.server.pool.minNode - CollectionUtils.size( availableServer );
logger.info("creating {} new Servers", serversToInit);
logger.info( "ServerPool starting to initialize {} servers...", serversToInit );
addNewServerToPool( serversToInit );
// remove servers if we have too much
} else if ( CollectionUtils.size(availableServer) > conf.server.pool.maxNodes ){
int i =0;
int serversToDelete = CollectionUtils.size(availableServer) - conf.server.pool.maxNodes ;
logger.info("deleting {} servers",serversToDelete);
for (ServerNode server : availableServer) {
if ( i >= serversToDelete){
break;
}
i++;
destroy( server );
}
}
// failed bootstraps.
Collection<ServerNode> failedBootstraps = new HashSet<ServerNode>(CollectionUtils.select( servers, failedBootstrapsPredicate ));
logger.info("found [{}] failed bootstraps", CollectionUtils.size(failedBootstraps));
Collection<Long> failedIds = new HashSet<Long>();
for (ServerNode sn : failedBootstraps ) {
failedIds.add( sn.getId() );
}
if ( CollectionUtils.size( failedIds) != CollectionUtils.size(failedBootstraps)){
logger.error("ERROR : duplicate failed bootstrap machines! need to fix query. [{}] unique, [{}] total", CollectionUtils.size(failedIds), CollectionUtils.size(failedBootstraps));
}
logger.info("deleting {} failed bootstraps : {}", CollectionUtils.size( failedIds ) ,failedIds);
if (!CollectionUtils.isEmpty(failedBootstraps)) {
try{
Ebean.delete(failedBootstraps);
}catch(RuntimeException e){
logger.info("unable to delete all server nodes with failed bootstraps. iterating");
for (ServerNode failedBootstrap : failedBootstraps) {
try{
failedBootstrap.refresh();
failedBootstrap.delete();
}catch(RuntimeException e1){
logger.info("unable to delete server [{}]", failedBootstrap.getId(),e1);
}
}
}
}
}
@Override
public Collection<ServerNode> getPool() {
return ServerNode.findByCriteria(new ServerNode.QueryConf().setMaxRows(-1).criteria().setBusy(null).setRemote(false).done());
}
public ServerNodesPoolStats getStats(){
ServerNodesPoolStats stats = new ServerNodesPoolStats();
List<ServerNode> all = ServerNode.all();
stats.all = CollectionUtils.size( all );
stats.nonRemote = CollectionUtils.size( CollectionUtils.select( all, nonRemotePredicate ) );
stats.busyServers = CollectionUtils.size( CollectionUtils.select( all, busyServerPredicate ) );
stats.nonBusyServers = CollectionUtils.size( CollectionUtils.select( all, nonBusyServerPredicate ) );
stats.minLimit = conf.server.pool.minNode;
stats.maxLimit = conf.server.pool.maxNodes;
stats.undergoingBootstrap = undergoingBootstrapCount.get();
return stats;
}
/** @return a ServerNode from the pool, otherwise <code>null</code> if no free server available */
@Override
synchronized public ServerNode get( )
{
logger.info( "getting a server node" );
printStats();
List<ServerNode> freeServers = ServerNode.findByCriteria(new ServerNode.QueryConf().setMaxRows(10).criteria().setBusy(false).setRemote(false).done());
ServerNode selectedServer = null;
if ( !CollectionUtils.isEmpty( freeServers )){
for ( ServerNode freeServer : freeServers ) {
if ( tryToGetFreeServer( freeServer )){
logger.info( "successfully got a free server [{}]", freeServer );
selectedServer = freeServer;
break;
}
}
}else{
logger.info( "freeServers is empty, adding a new server. pool status is [{}]", getStats() );
}
addNewServerToPool( NoOpCallback.instance );
return selectedServer;
}
public void printStats(){
new Thread( new PrintStats(this)).start();
}
public static class PrintStats implements Runnable{
ServerPoolImpl poolImpl;
public PrintStats(ServerPoolImpl poolImpl) {
this.poolImpl = poolImpl;
}
@Override
public void run() {
logger.info("pool stats [{}]",poolImpl.getStats());
}
}
/**
* <p>
* This method checks the serverNode is accessible.<br/>
* If the serverNode is accessible it will try to mark it as "busy" which will effectively
* get it out of the pool.<br/>
* </p>
*
* @param serverNode - the server node we are trying to get
* @return - true iff successfully got the serverNode
*/
private boolean tryToGetFreeServer( ServerNode serverNode )
{
try{
BootstrapValidationResult result = serverBootstrapper.validateBootstrap( serverNode );
if ( result.isValid() ) {
serverNode.setBusySince( System.currentTimeMillis() );
serverNode.save(); // optimistic locking
return true;
} else {
logger.info( "serverNode[{}] has an invalid bootstrap. I will rebuild it.", result );
rebuild( serverNode );
}
}catch(OptimisticLockException e){
logger.info( "server [{}] already caught by another thread, could not get it", serverNode );
}
return false;
}
// this will destroy one machine and if necessary will create another.
public void rebuild( final ServerNode serverNode ){
logger.info("rebuilding machine [{}]", serverNode);
new Thread(new Runnable() {
@Override
public void run() {
logger.info("destroying machine [{}]", serverNode);
serverBootstrapper.destroyServer(serverNode);
addNewServerToPool(1);
// if ( !isPoolWillBeSaturated() ){ // we have enough machines. just kill it
// logger.info("pool is not saturated. we will create another machine");
// serverBootstrapper.createServers(1);
// }
}
}).start();
}
// this function also counts the machine undergoing creation.
// it tells if after those machines' bootstrap the pool will be saturated.
public boolean isPoolWillBeSaturated(){
return getStats().nonBusyServers + undergoingBootstrapCount.get() >= conf.server.pool.maxNodes;
}
public void destroy( ServerNode serverNode )
{
if ( serverNode == null ){
return; // nothing to do.
}
logger.info("destroying server {}", serverNode);
// guy - removing "addNewServerToPool" - this is the destroy function, not create function.
// guy - removing WidgetInstance.delete since we cascade removal
// guy - removing ServerNode.delete, since we established it does not exist.
serverBootstrapper.destroyServer( serverNode );
}
private void addNewServerToPool( int number ){
for (int i = 0; i < number; i++) {
addNewServerToPool( NoOpCallback.instance );
}
}
private boolean isPoolSaturated(){
return getStats().nonBusyServers >= conf.server.pool.maxNodes;
}
@Override
public void addNewServerToPool( final Runnable callback ) {
logger.info("adding new server to the pool");
// if ( isPoolSaturated() ){
// logger.error("pool is saturated and someone asked for more machines", new RuntimeException());
// }
Akka.system().scheduler().scheduleOnce(Duration.Zero(),
new Runnable() {
public void run() {
try {
if ( !isPoolWillBeSaturated() ){
int createNewServerCount = 1;
undergoingBootstrapCount.addAndGet(createNewServerCount);
logger.info("creating new :: undergoing bootstrap count [{}]", getStats() );
List<ServerNode> servers = serverBootstrapper.createServers(createNewServerCount);
for (ServerNode srv : servers) {
srv.save();
undergoingBootstrapCount.decrementAndGet();
logger.info("after create :: undergoing bootstrap count [{}]", getStats());
}
}
else {
logger.info("not creating :: non busy [{}] ; .", getStats().nonBusyServers, undergoingBootstrapCount.get() ,conf.server.pool.maxNodes);
}
} catch (Exception e) {
logger.error("ServerPool failed to create a new server node", e);
}
Akka.system().scheduler().scheduleOnce( Duration.Zero() , callback );
}
}
);
}
public void setServerBootstrapper(server.ServerBootstrapper serverBootstrapper) {
this.serverBootstrapper = serverBootstrapper;
}
public void setConf( Conf conf )
{
this.conf = conf;
}
}