/**
* Licensed to JumpMind Inc under one or more contributor
* license agreements. See the NOTICE file distributed
* with this work for additional information regarding
* copyright ownership. JumpMind Inc licenses this file
* to you under the GNU General Public License, version 3.0 (GPLv3)
* (the "License"); you may not use this file except in compliance
* with the License.
*
* You should have received a copy of the GNU General Public License,
* version 3.0 (GPLv3) along with this library; if not, see
* <http://www.gnu.org/licenses/>.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.jumpmind.symmetric.service.impl;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.lang.time.DateUtils;
import org.jumpmind.db.sql.ISqlRowMapper;
import org.jumpmind.db.sql.Row;
import org.jumpmind.symmetric.common.ParameterConstants;
import org.jumpmind.symmetric.db.ISymmetricDialect;
import org.jumpmind.symmetric.model.Node;
import org.jumpmind.symmetric.model.NodeCommunication;
import org.jumpmind.symmetric.model.NodeCommunication.CommunicationType;
import org.jumpmind.symmetric.model.RemoteNodeStatus;
import org.jumpmind.symmetric.model.RemoteNodeStatuses;
import org.jumpmind.symmetric.service.IClusterService;
import org.jumpmind.symmetric.service.INodeCommunicationService;
import org.jumpmind.symmetric.service.INodeService;
import org.jumpmind.symmetric.service.IParameterService;
import org.jumpmind.util.AppUtils;
import org.jumpmind.util.RandomTimeSlot;
public class NodeCommunicationService extends AbstractService implements INodeCommunicationService {
private Map<CommunicationType, ThreadPoolExecutor> executors = new HashMap<NodeCommunication.CommunicationType, ThreadPoolExecutor>();
private INodeService nodeService;
private IClusterService clusterService;
private boolean initialized = false;
private Map<CommunicationType, Set<String>> currentlyExecuting;
public NodeCommunicationService(IClusterService clusterService, INodeService nodeService, IParameterService parameterService,
ISymmetricDialect symmetricDialect) {
super(parameterService, symmetricDialect);
setSqlMap(new NodeCommunicationServiceSqlMap(symmetricDialect.getPlatform(),
createSqlReplacementTokens()));
this.clusterService = clusterService;
this.nodeService = nodeService;
this.currentlyExecuting = new HashMap<NodeCommunication.CommunicationType, Set<String>>();
CommunicationType[] types = CommunicationType.values();
for (CommunicationType communicationType : types) {
this.currentlyExecuting.put(communicationType, new HashSet<String>());
}
}
private final void initialize() {
if (!initialized) {
synchronized (this) {
if (!initialized) {
try {
int locksCleared = sqlTemplate.update(getSql("clearLocksOnRestartSql"),
clusterService.getServerId());
if (locksCleared > 0) {
log.info("Cleared {} node communication locks for {}", locksCleared,
clusterService.getServerId());
}
} finally {
initialized = true;
}
}
}
}
}
public NodeCommunication find(String nodeId, CommunicationType communicationType) {
NodeCommunication lock = sqlTemplate.queryForObject(
getSql("selectNodeCommunicationByNodeIdSql"), new NodeCommunicationMapper(),
nodeId, communicationType.name());
if (lock == null) {
lock = new NodeCommunication();
lock.setNodeId(nodeId);
lock.setCommunicationType(communicationType);
save(lock);
}
return lock;
}
public List<NodeCommunication> list(CommunicationType communicationType) {
initialize();
List<NodeCommunication> communicationRows = new ArrayList<NodeCommunication>(
sqlTemplate.query(getSql("selectNodeCommunicationSql"),
new NodeCommunicationMapper(), communicationType.name()));
List<Node> nodesToCommunicateWith = null;
switch (communicationType) {
case PULL:
case FILE_PULL:
nodesToCommunicateWith = nodeService.findNodesToPull();
break;
case FILE_PUSH:
case PUSH:
nodesToCommunicateWith = nodeService.findNodesToPushTo();
break;
default:
nodesToCommunicateWith = new ArrayList<Node>(0);
break;
}
for (Node nodeToCommunicateWith : nodesToCommunicateWith) {
NodeCommunication comm = null;
for (NodeCommunication nodeCommunication : communicationRows) {
if (nodeCommunication.getNodeId().equals(nodeToCommunicateWith.getNodeId())) {
comm = nodeCommunication;
break;
}
}
if (comm == null) {
comm = new NodeCommunication();
comm.setNodeId(nodeToCommunicateWith.getNodeId());
comm.setCommunicationType(communicationType);
save(comm);
communicationRows.add(comm);
}
comm.setNode(nodeToCommunicateWith);
}
Iterator<NodeCommunication> it = communicationRows.iterator();
while (it.hasNext()) {
NodeCommunication nodeCommunication = it.next();
Node node = null;
for (Node nodeToCommunicateWith : nodesToCommunicateWith) {
if (nodeCommunication.getNodeId().equals(nodeToCommunicateWith.getNodeId())) {
node = nodeToCommunicateWith;
break;
}
}
if (node == null) {
delete(nodeCommunication);
it.remove();
}
}
return communicationRows;
}
public boolean delete(NodeCommunication nodeCommunication) {
return 1 == sqlTemplate.update(getSql("deleteNodeCommunicationSql"),
nodeCommunication.getNodeId(), nodeCommunication.getCommunicationType().name());
}
public void save(NodeCommunication nodeCommunication) {
if (0 == sqlTemplate.update(getSql("updateNodeCommunicationSql"),
nodeCommunication.getLockTime(), nodeCommunication.getLockingServerId(),
nodeCommunication.getLastLockMillis(), nodeCommunication.getSuccessCount(),
nodeCommunication.getFailCount(), nodeCommunication.getTotalSuccessCount(),
nodeCommunication.getTotalFailCount(), nodeCommunication.getTotalSuccessMillis(),
nodeCommunication.getTotalFailMillis(), nodeCommunication.getLastLockTime(),
nodeCommunication.getNodeId(), nodeCommunication.getCommunicationType().name())) {
sqlTemplate.update(getSql("insertNodeCommunicationSql"),
nodeCommunication.getLockTime(), nodeCommunication.getLockingServerId(),
nodeCommunication.getLastLockMillis(), nodeCommunication.getSuccessCount(),
nodeCommunication.getFailCount(), nodeCommunication.getTotalSuccessCount(),
nodeCommunication.getTotalFailCount(),
nodeCommunication.getTotalSuccessMillis(),
nodeCommunication.getTotalFailMillis(), nodeCommunication.getLastLockTime(),
nodeCommunication.getNodeId(), nodeCommunication.getCommunicationType().name());
}
}
protected ThreadPoolExecutor getExecutor(final CommunicationType communicationType) {
ThreadPoolExecutor service = executors.get(communicationType);
String threadCountParameter = "";
switch (communicationType) {
case PULL:
threadCountParameter = ParameterConstants.PULL_THREAD_COUNT_PER_SERVER;
break;
case PUSH:
threadCountParameter = ParameterConstants.PUSH_THREAD_COUNT_PER_SERVER;
break;
case FILE_PULL:
threadCountParameter = ParameterConstants.FILE_PUSH_THREAD_COUNT_PER_SERVER;
break;
case FILE_PUSH:
threadCountParameter = ParameterConstants.FILE_PUSH_THREAD_COUNT_PER_SERVER;
break;
case EXTRACT:
threadCountParameter = ParameterConstants.INITIAL_LOAD_EXTRACT_THREAD_COUNT_PER_SERVER;
break;
default:
break;
}
int threadCount = parameterService.getInt(threadCountParameter, 1);
if (service != null && service.getCorePoolSize() != threadCount) {
log.info("{} has changed from {} to {}. Restarting thread pool", new Object[] { threadCountParameter, service.getCorePoolSize(), threadCount });
stop();
service = null;
}
if (service == null) {
synchronized (this) {
service = executors.get(communicationType);
if (service == null) {
if (threadCount <= 0) {
log.warn("{}={} is not a valid value. Defaulting to 1",
threadCountParameter, threadCount);
threadCount = 1;
} else if (threadCount > 1) {
log.info("{} will use {} threads", communicationType.name().toLowerCase(),
threadCount);
}
service = (ThreadPoolExecutor) Executors.newFixedThreadPool(threadCount,
new ThreadFactory() {
final AtomicInteger threadNumber = new AtomicInteger(1);
final String namePrefix = parameterService.getEngineName()
.toLowerCase()
+ "-"
+ communicationType.name().toLowerCase() + "-";
public Thread newThread(Runnable r) {
Thread t = new Thread(r);
t.setName(namePrefix + threadNumber.getAndIncrement());
if (t.isDaemon()) {
t.setDaemon(false);
}
if (t.getPriority() != Thread.NORM_PRIORITY) {
t.setPriority(Thread.NORM_PRIORITY);
}
return t;
}
});
executors.put(communicationType, service);
}
}
}
return service;
}
public int getAvailableThreads(CommunicationType communicationType) {
ThreadPoolExecutor service = getExecutor(communicationType);
return service.getMaximumPoolSize() - service.getActiveCount();
}
protected Date getLockTimeoutDate(CommunicationType communicationType) {
String parameter = "";
switch (communicationType) {
case PULL:
parameter = ParameterConstants.PULL_LOCK_TIMEOUT_MS;
break;
case PUSH:
parameter = ParameterConstants.PUSH_LOCK_TIMEOUT_MS;
break;
case FILE_PULL:
parameter = ParameterConstants.FILE_PULL_LOCK_TIMEOUT_MS;
break;
case FILE_PUSH:
parameter = ParameterConstants.FILE_PUSH_LOCK_TIMEOUT_MS;
break;
case EXTRACT:
parameter = ParameterConstants.INITIAL_LOAD_EXTRACT_TIMEOUT_MS;
break;
default:
break;
}
return DateUtils.addMilliseconds(new Date(),
-parameterService.getInt(parameter, 7200000));
}
public boolean execute(final NodeCommunication nodeCommunication, RemoteNodeStatuses statuses,
final INodeCommunicationExecutor executor) {
Date now = new Date();
Date lockTimeout = getLockTimeoutDate(nodeCommunication.getCommunicationType());
final Set<String> executing = this.currentlyExecuting.get(nodeCommunication.getCommunicationType());
boolean locked = !executing.contains(nodeCommunication.getNodeId()) &&
sqlTemplate.update(getSql("aquireLockSql"), clusterService.getServerId(), now, now,
nodeCommunication.getNodeId(), nodeCommunication.getCommunicationType().name(),
lockTimeout) == 1;
if (locked) {
executing.add(nodeCommunication.getNodeId());
nodeCommunication.setLastLockTime(now);
nodeCommunication.setLockingServerId(clusterService.getServerId());
final RemoteNodeStatus status = statuses.add(nodeCommunication.getNodeId());
Runnable r = new Runnable() {
public void run() {
long ts = System.currentTimeMillis();
boolean failed = false;
try {
executor.execute(nodeCommunication, status);
failed = status.failed();
} catch (Throwable ex) {
failed = true;
log.error(String.format("Failed to execute %s for node %s",
nodeCommunication.getCommunicationType().name(),
nodeCommunication.getNodeId()), ex);
} finally {
unlock(nodeCommunication, status, failed, ts);
executing.remove(nodeCommunication.getNodeId());
}
}
};
if (parameterService.is(ParameterConstants.SYNCHRONIZE_ALL_JOBS)) {
r.run();
} else {
ThreadPoolExecutor service = getExecutor(nodeCommunication.getCommunicationType());
service.execute(r);
}
}
return locked;
}
protected void unlock(NodeCommunication nodeCommunication,
RemoteNodeStatus status, boolean failed, long ts) {
boolean unlocked = false;
int attempts = 1;
do {
try {
long millis = System.currentTimeMillis() - ts;
nodeCommunication.setLockTime(null);
nodeCommunication.setLastLockMillis(millis);
if (failed) {
nodeCommunication.setFailCount(nodeCommunication
.getFailCount() + 1);
nodeCommunication.setTotalFailCount(nodeCommunication
.getTotalFailCount() + 1);
nodeCommunication.setTotalFailMillis(nodeCommunication
.getTotalFailMillis() + millis);
} else {
nodeCommunication.setSuccessCount(nodeCommunication
.getSuccessCount() + 1);
nodeCommunication.setTotalSuccessCount(nodeCommunication
.getTotalSuccessCount() + 1);
nodeCommunication.setTotalSuccessMillis(nodeCommunication
.getTotalSuccessMillis() + millis);
nodeCommunication.setFailCount(0);
}
status.setComplete(true);
save(nodeCommunication);
unlocked = true;
if (attempts > 1) {
log.info(String.format("Successfully unlocked %s node communication record for %s after %d attempts",
nodeCommunication.getCommunicationType().name(),
nodeCommunication.getNodeId(), attempts));
}
} catch (Throwable e) {
log.error(String.format(
"Failed to unlock %s node communication record for %s",
nodeCommunication.getCommunicationType().name(),
nodeCommunication.getNodeId()), e);
long sleepTime = DateUtils.MILLIS_PER_SECOND
* new RandomTimeSlot(nodeCommunication.getNodeId(), 30).getRandomValueSeededByExternalId();
log.warn("Sleeping for {} ms before attempting to unlock the node communication record again", sleepTime);
AppUtils.sleep(sleepTime);
attempts++;
};
} while (!unlocked);
}
public void stop() {
Collection<CommunicationType> services = new HashSet<NodeCommunication.CommunicationType>(
executors.keySet());
for (CommunicationType communicationType : services) {
try {
ExecutorService service = executors.get(communicationType);
service.shutdownNow();
} finally {
executors.remove(communicationType);
}
}
}
class NodeCommunicationMapper implements ISqlRowMapper<NodeCommunication> {
public NodeCommunication mapRow(Row rs) {
NodeCommunication nodeCommuncation = new NodeCommunication();
nodeCommuncation.setCommunicationType(CommunicationType.valueOf(rs.getString(
"communication_type").toUpperCase()));
nodeCommuncation.setNodeId(rs.getString("node_id"));
nodeCommuncation.setLockTime(rs.getDateTime("lock_time"));
nodeCommuncation.setLastLockMillis(rs.getLong("last_lock_millis"));
nodeCommuncation.setLockingServerId(rs.getString("locking_server_id"));
nodeCommuncation.setSuccessCount(rs.getLong("success_count"));
nodeCommuncation.setTotalSuccessCount(rs.getLong("total_success_count"));
nodeCommuncation.setTotalSuccessMillis(rs.getLong("total_success_millis"));
nodeCommuncation.setFailCount(rs.getLong("fail_count"));
nodeCommuncation.setTotalFailCount(rs.getLong("total_fail_count"));
nodeCommuncation.setTotalFailMillis(rs.getLong("total_fail_millis"));
nodeCommuncation.setLastLockTime(rs.getDateTime("last_lock_time"));
return nodeCommuncation;
}
}
}