/*
*
* * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com)
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
* *
* * For more information: http://www.orientechnologies.com
*
*/
package com.orientechnologies.orient.server.distributed.impl;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.spi.exception.DistributedObjectDestroyedException;
import com.orientechnologies.common.concur.OTimeoutException;
import com.orientechnologies.common.concur.lock.OModificationOperationProhibitedException;
import com.orientechnologies.common.log.OLogManager;
import com.orientechnologies.orient.core.Orient;
import com.orientechnologies.orient.core.config.OGlobalConfiguration;
import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx;
import com.orientechnologies.orient.core.exception.OConfigurationException;
import com.orientechnologies.orient.core.exception.OStorageException;
import com.orientechnologies.orient.core.metadata.security.OSecurityUser;
import com.orientechnologies.orient.core.metadata.security.OUser;
import com.orientechnologies.orient.server.distributed.*;
import com.orientechnologies.orient.server.distributed.ODistributedServerLog.DIRECTION;
import com.orientechnologies.orient.server.distributed.task.ODistributedOperationException;
import com.orientechnologies.orient.server.distributed.task.ORemoteTask;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
/**
* Hazelcast implementation of distributed peer. There is one instance per database. Each node creates own instance to talk with
* each others.
*
* @author Luca Garulli (l.garulli--at--orientechnologies.com)
*/
public class ODistributedWorker extends Thread {
protected final ODistributedDatabaseImpl distributed;
protected final ODistributedServerManager manager;
protected final ODistributedMessageServiceImpl msgService;
protected final String localNodeName;
protected final String databaseName;
protected final ArrayBlockingQueue<ODistributedRequest> localQueue;
protected final int id;
protected volatile ODatabaseDocumentTx database;
protected volatile OUser lastUser;
protected volatile boolean running = true;
private AtomicLong processedRequests = new AtomicLong(0);
private AtomicBoolean waitingForNextRequest = new AtomicBoolean(true);
private static final long MAX_SHUTDOWN_TIMEOUT = 5000l;
private volatile ODistributedRequest currentExecuting;
public ODistributedWorker(final ODistributedDatabaseImpl iDistributed, final String iDatabaseName, final int i) {
id = i;
setName("OrientDB DistributedWorker node=" + iDistributed.getLocalNodeName() + " db=" + iDatabaseName + " id=" + i);
distributed = iDistributed;
localQueue = new ArrayBlockingQueue<ODistributedRequest>(OGlobalConfiguration.DISTRIBUTED_LOCAL_QUEUESIZE.getValueAsInteger());
databaseName = iDatabaseName;
manager = distributed.getManager();
msgService = distributed.msgService;
localNodeName = manager.getLocalNodeName();
}
public void processRequest(final ODistributedRequest request) {
try {
localQueue.put(request);
} catch (InterruptedException e) {
ODistributedServerLog.warn(this, localNodeName, null, ODistributedServerLog.DIRECTION.NONE,
"Received interruption signal, closing distributed worker thread (worker=%d)", id);
shutdown();
}
}
@Override
public void run() {
for (long processedMessages = 0; running; processedMessages++) {
ODistributedRequestId reqId = null;
ODistributedRequest message = null;
try {
message = readRequest();
currentExecuting = message;
if (message != null) {
message.getId();
reqId = message.getId();
onMessage(message);
}
currentExecuting = null;
} catch (InterruptedException e) {
// EXIT CURRENT THREAD
Thread.currentThread().interrupt();
break;
} catch (DistributedObjectDestroyedException e) {
Thread.currentThread().interrupt();
break;
} catch (HazelcastInstanceNotActiveException e) {
Thread.currentThread().interrupt();
break;
} catch (Throwable e) {
try {
if (e.getCause() instanceof InterruptedException)
Thread.currentThread().interrupt();
else
ODistributedServerLog.error(this, localNodeName, reqId != null ? manager.getNodeNameById(reqId.getNodeId()) : "?",
ODistributedServerLog.DIRECTION.IN, "Error on executing distributed request %s: (%s) worker=%d", e,
message != null ? message.getId() : -1, message != null ? message.getTask() : "-", id);
} catch (Throwable t) {
ODistributedServerLog.error(this, localNodeName, "?", ODistributedServerLog.DIRECTION.IN,
"Error on executing distributed request %s: (%s) worker=%d", e, message != null ? message.getId() : -1,
message != null ? message.getTask() : "-", id);
}
}
}
ODistributedServerLog.debug(this, localNodeName, null, DIRECTION.NONE, "End of reading requests for database %s", databaseName);
}
/**
* Opens the database.
*/
public void initDatabaseInstance() {
if (database == null) {
for (int retry = 0; retry < 100; ++retry) {
try {
database = distributed.getDatabaseInstance();
// OK
break;
} catch (OStorageException e) {
// WAIT FOR A WHILE, THEN RETRY
if (!dbNotAvailable(retry))
return;
} catch (OConfigurationException e) {
// WAIT FOR A WHILE, THEN RETRY
if (!dbNotAvailable(retry))
return;
}
}
if (database == null) {
ODistributedServerLog.info(this, manager.getLocalNodeName(), null, DIRECTION.NONE,
"Database '%s' not present, shutting down database manager", databaseName);
distributed.shutdown();
throw new ODistributedException("Cannot open database '" + databaseName + "'");
}
} else if (database.isClosed()) {
// DATABASE CLOSED, REOPEN IT
database.activateOnCurrentThread();
database.close();
database.replaceStorage(Orient.instance().loadStorage(database.getURL()));
manager.getServerInstance().openDatabase(database, "internal", "internal", null, true);
}
}
protected boolean dbNotAvailable(int retry) {
try {
ODistributedServerLog.info(this, manager.getLocalNodeName(), null, DIRECTION.NONE,
"Database '%s' not present, waiting for it (retry=%d/%d)...", databaseName, retry, 100);
Thread.sleep(300);
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
return false;
}
return true;
}
public void shutdown() {
running = false;
final int pendingMsgs = localQueue.size();
if (pendingMsgs > 0)
ODistributedServerLog.info(this, localNodeName, null, ODistributedServerLog.DIRECTION.NONE,
"Received shutdown signal, waiting for distributed worker queue is empty (pending msgs=%d)...", pendingMsgs);
interrupt();
try {
if (pendingMsgs > 0)
try {
join(MAX_SHUTDOWN_TIMEOUT);
} catch (Exception e) {
ODistributedServerLog.debug(this, localNodeName, null, ODistributedServerLog.DIRECTION.NONE,
"Interrupted shutdown of distributed worker thread");
}
ODistributedServerLog
.debug(this, localNodeName, null, ODistributedServerLog.DIRECTION.NONE, "Shutdown distributed worker '%s' completed",
getName());
localQueue.clear();
if (database != null) {
database.activateOnCurrentThread();
database.close();
}
} catch (Exception e) {
ODistributedServerLog
.warn(this, localNodeName, null, ODistributedServerLog.DIRECTION.NONE, "Error on shutting down distributed worker '%s'",
e, getName());
}
}
public ODatabaseDocumentTx getDatabase() {
return database;
}
protected ODistributedRequest readRequest() throws InterruptedException {
// GET FROM DISTRIBUTED QUEUE. IF EMPTY WAIT FOR A MESSAGE
ODistributedRequest req = nextMessage();
if (manager.isOffline())
waitNodeIsOnline();
if (ODistributedServerLog.isDebugEnabled()) {
final String senderNodeName = manager.getNodeNameById(req.getId().getNodeId());
ODistributedServerLog
.debug(this, localNodeName, senderNodeName, DIRECTION.IN, "Processing request=(%s) sourceNode=%s worker=%d", req,
senderNodeName, id);
}
return req;
}
public boolean isWaitingForNextRequest() {
return waitingForNextRequest.get();
}
protected ODistributedRequest nextMessage() throws InterruptedException {
waitingForNextRequest.set(true);
final ODistributedRequest req = localQueue.take();
waitingForNextRequest.set(false);
processedRequests.incrementAndGet();
return req;
}
/**
* Executes the remote call on the local node and send back the result
*/
protected void onMessage(final ODistributedRequest iRequest) {
String senderNodeName = null;
for (int retry = 0; retry < 10; retry++) {
senderNodeName = manager.getNodeNameById(iRequest.getId().getNodeId());
if (senderNodeName != null)
break;
try {
Thread.sleep(200);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new ODistributedException("Execution has been interrupted");
}
}
if (senderNodeName == null) {
ODistributedServerLog.warn(this, localNodeName, senderNodeName, DIRECTION.IN,
"Sender server id %d is not registered in the cluster configuration, discard the request: (%s) (worker=%d)",
iRequest.getId().getNodeId(), iRequest, id);
sendResponseBack(iRequest, new ODistributedException("Sender server id " + iRequest.getId().getNodeId()
+ " is not registered in the cluster configuration, discard the request"));
return;
}
final ORemoteTask task = iRequest.getTask();
if (ODistributedServerLog.isDebugEnabled())
ODistributedServerLog
.debug(this, localNodeName, senderNodeName, DIRECTION.IN, "Received request: (%s) (worker=%d)", iRequest, id);
// EXECUTE IT LOCALLY
Object responsePayload = null;
OSecurityUser origin = null;
try {
waitNodeIsOnline();
distributed.waitIsReady(task);
if (task.isUsingDatabase()) {
initDatabaseInstance();
if (database == null)
throw new ODistributedOperationException(
"Error on executing remote request because the database '" + databaseName + "' is not available");
}
// keep original user in database, check the username passed in request and set new user in DB, after document saved,
// reset to original user
if (database != null) {
database.activateOnCurrentThread();
origin = database.getUser();
try {
if (iRequest.getUserRID() != null && iRequest.getUserRID().isValid() && (lastUser == null || !(lastUser.getIdentity())
.equals(iRequest.getUserRID()))) {
lastUser = database.getMetadata().getSecurity().getUser(iRequest.getUserRID());
database.setUser(lastUser);// set to new user
} else
origin = null;
} catch (Throwable ex) {
OLogManager.instance().error(this, "Failed on user switching database. " + ex.getMessage());
}
}
// EXECUTE THE TASK
for (int retry = 1; running; ++retry) {
responsePayload = manager.executeOnLocalNode(iRequest.getId(), iRequest.getTask(), database);
if (responsePayload instanceof OModificationOperationProhibitedException) {
// RETRY
try {
ODistributedServerLog.info(this, localNodeName, senderNodeName, DIRECTION.IN,
"Database is frozen, waiting and retrying. Request %s (retry=%d, worker=%d)", iRequest, retry, id);
Thread.sleep(1000);
} catch (InterruptedException e) {
}
} else {
// OPERATION EXECUTED (OK OR ERROR), NO RETRY NEEDED
if (retry > 1)
ODistributedServerLog
.info(this, localNodeName, senderNodeName, DIRECTION.IN, "Request %s succeed after retry=%d", iRequest, retry);
break;
}
}
} catch (RuntimeException e) {
sendResponseBack(iRequest, e);
throw e;
} finally {
if (database != null && !database.isClosed()) {
database.activateOnCurrentThread();
if (!database.isClosed()) {
database.rollback();
database.getLocalCache().clear();
if (origin != null)
database.setUser(origin);
}
}
}
sendResponseBack(iRequest, responsePayload);
}
protected String getLocalNodeName() {
return localNodeName;
}
private void sendResponseBack(final ODistributedRequest iRequest, Object responsePayload) {
sendResponseBack(this, manager, iRequest, responsePayload);
}
static void sendResponseBack(final Object current, final ODistributedServerManager manager, final ODistributedRequest iRequest,
Object responsePayload) {
if (iRequest.getId().getMessageId() < 0)
// INTERNAL MSG
return;
final String localNodeName = manager.getLocalNodeName();
final String senderNodeName = manager.getNodeNameById(iRequest.getId().getNodeId());
final ODistributedResponse response = new ODistributedResponse(iRequest.getId(), localNodeName, senderNodeName,
responsePayload);
try {
// GET THE SENDER'S RESPONSE QUEUE
final ORemoteServerController remoteSenderServer = manager.getRemoteServer(senderNodeName);
ODistributedServerLog
.debug(current, localNodeName, senderNodeName, ODistributedServerLog.DIRECTION.OUT, "Sending response %s back (reqId=%s)",
response, iRequest);
remoteSenderServer.sendResponse(response);
} catch (Exception e) {
ODistributedServerLog.debug(current, localNodeName, senderNodeName, ODistributedServerLog.DIRECTION.OUT,
"Error on sending response '%s' back (reqId=%s err=%s)", response, iRequest.getId(), e.toString());
}
}
private void waitNodeIsOnline() throws OTimeoutException {
// WAIT THE NODE IS ONLINE AGAIN
final ODistributedServerManager mgr = manager.getServerInstance().getDistributedManager();
if (mgr != null && mgr.isEnabled() && mgr.isOffline()) {
for (int retry = 0; running; ++retry) {
if (mgr != null && mgr.isOffline()) {
// NODE NOT ONLINE YET, REFUSE THE CONNECTION
ODistributedServerLog.info(this, localNodeName, null, DIRECTION.NONE,
"Node is not online yet (status=%s), blocking the command until it is online (retry=%d, queue=%d worker=%d)",
mgr.getNodeStatus(), retry + 1, localQueue.size(), id);
if (localQueue.size() >= OGlobalConfiguration.DISTRIBUTED_LOCAL_QUEUESIZE.getValueAsInteger()) {
// QUEUE FULL, EMPTY THE QUEUE, IGNORE ALL THE NEXT MESSAGES UNTIL A DELTA SYNC IS EXECUTED
ODistributedServerLog.warn(this, localNodeName, null, DIRECTION.NONE,
"Replication queue is full (retry=%d, queue=%d worker=%d), replication could be delayed", retry + 1,
localQueue.size(), id);
}
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
}
} else
// OK, RETURN
return;
}
}
}
public long getProcessedRequests() {
return processedRequests.get();
}
public void reset() {
localQueue.clear();
if (database != null) {
database.activateOnCurrentThread();
database.close();
database = null;
}
}
public void sendShutdown() {
running = false;
this.interrupt();
}
public ODistributedRequest getProcessing() {
return currentExecuting;
}
}