/** * Copyright French Prime minister Office/SGMAP/DINSIC/Vitam Program (2015-2019) * * contact.vitam@culture.gouv.fr * * This software is a computer program whose purpose is to implement a digital archiving back-office system managing * high volumetry securely and efficiently. * * This software is governed by the CeCILL 2.1 license under French law and abiding by the rules of distribution of free * software. You can use, modify and/ or redistribute the software under the terms of the CeCILL 2.1 license as * circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". * * As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, * users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the * successive licensors have only limited liability. * * In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or * developing or reproducing the software by the user in light of its specific status of free software, that may mean * that it is complicated to manipulate, and that also therefore means that it is reserved for developers and * experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the * software's suitability as regards their requirements in conditions enabling the security of their systems and/or data * to be ensured and, more generally, to use and operate it in the same conditions as regards security. * * The fact that you are presently reading this means that you have had knowledge of the CeCILL 2.1 license and that you * accept its terms. */ package fr.gouv.vitam.processing.distributor.core; import java.io.File; import java.io.IOException; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Semaphore; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import fr.gouv.vitam.common.GlobalDataRest; import fr.gouv.vitam.common.ParametersChecker; import fr.gouv.vitam.common.PropertiesUtils; import fr.gouv.vitam.common.exception.InvalidParseOperationException; import fr.gouv.vitam.common.json.JsonHandler; import fr.gouv.vitam.common.logging.VitamLogger; import fr.gouv.vitam.common.logging.VitamLoggerFactory; import fr.gouv.vitam.common.model.ItemStatus; import fr.gouv.vitam.common.model.StatusCode; import fr.gouv.vitam.common.thread.VitamThreadPoolExecutor; import fr.gouv.vitam.common.thread.VitamThreadUtils; import fr.gouv.vitam.processing.common.exception.ProcessingBadRequestException; import fr.gouv.vitam.processing.common.exception.WorkerAlreadyExistsException; import fr.gouv.vitam.processing.common.exception.WorkerFamilyNotFoundException; import fr.gouv.vitam.processing.common.exception.WorkerNotFoundException; import fr.gouv.vitam.processing.common.model.WorkerBean; import fr.gouv.vitam.processing.common.model.WorkerRemoteConfiguration; import fr.gouv.vitam.processing.model.WorkerAsyncRequest; import fr.gouv.vitam.processing.model.WorkerAsyncResponse; import fr.gouv.vitam.worker.client.WorkerClient; import fr.gouv.vitam.worker.client.WorkerClientConfiguration; import fr.gouv.vitam.worker.client.WorkerClientFactory; import fr.gouv.vitam.worker.client.exception.WorkerNotFoundClientException; import fr.gouv.vitam.worker.client.exception.WorkerServerClientException; /** * Manage the parallelism calls to worker in the same distributor */ public class WorkerManager { private static final VitamLogger LOGGER = VitamLoggerFactory.getInstance(WorkerManager.class); // No need to have a concurrent map while there is no dymanic add/remove of queues private static final ConcurrentMap<String, BlockingQueue<WorkerAsyncRequest>> STEP_BLOCKINGQUEUE_MAP = new ConcurrentHashMap<>(); // The risk of collision between a register/unregister worker is not null private static final Map<String, Map<String, WorkerThreadManager>> WORKERS_LIST = new ConcurrentHashMap<>(); private static final int DEFAULT_QUEUE_BACKLOG_SIZE = 20; private static final String WORKKER_DB_PATH = "worker.db"; private static final File WORKKER_DB_FILE = PropertiesUtils.fileFromDataFolder(WORKKER_DB_PATH); /** * Empty Constructor */ private WorkerManager() { } /** * Initialize the WorkerManager */ public static void initialize() { if (WORKKER_DB_FILE.exists()) { WorkerManager.loadWorkerList(WORKKER_DB_FILE); } else { LOGGER.warn("No worker list serialization file : " + WORKKER_DB_FILE.getName()); } } /** * To load a registered worker list * * @param registerWorkerFile */ private static void loadWorkerList(File registerWorkerFile){ // Load the list of worker from database // for now it is a file content json data ArrayNode registeredWorkerList = null; try { registeredWorkerList = (ArrayNode) JsonHandler.getFromFile(registerWorkerFile); } catch (InvalidParseOperationException e) { LOGGER.error("Cannot load worker list from database.",e); // Ignore the rest of the file return; } // load to the list of WORKERS_LIST for (JsonNode worker : registeredWorkerList) { WorkerBean workerBean; try{ workerBean = JsonHandler.getFromJsonNodeLowerCamelCase(worker, WorkerBean.class); }catch(InvalidParseOperationException e){ LOGGER.error("Invalid structure for \'"+ worker.toString()+"\'",e); // Invalid structure : Continue with the next worker continue; } String workerId = workerBean.getWorkerId(); String familyId = workerBean.getFamily(); // Ignore if the familyId or the workerId is null if (familyId == null || workerId == null){ // Mandatory argument missing : Continue with the next worker continue; } WorkerRemoteConfiguration config = workerBean.getConfiguration(); if (checkStatusWorker(config.getServerHost(), config.getServerPort())) { try{ registerWorker(workerBean); }catch(WorkerAlreadyExistsException e){ // This case should almost never happened as we are in the initialization LOGGER.error("Worker already exists during the initialization",e); } } } marshallToDB(); } private static boolean checkStatusWorker(String serverHost, int serverPort) { WorkerClientConfiguration workerClientConfiguration = new WorkerClientConfiguration(serverHost, serverPort); WorkerClientFactory.changeMode(workerClientConfiguration); WorkerClient workerClient = WorkerClientFactory.getInstance(workerClientConfiguration).getClient(); try { workerClient.checkStatus(); return true; } catch (Exception e) { LOGGER.error("Worker server [" + serverHost + ":" + serverPort + "] is not active.", e); return false; } } /** * To register a worker in the processing * * @param familyId : family of this worker * @param workerId : ID of the worker * @param workerInformation : Worker Json representation * @throws WorkerAlreadyExistsException : when the worker is already registered * @throws ProcessingBadRequestException * @throws InvalidParseOperationException */ public static void registerWorker(String familyId, String workerId, String workerInformation) throws WorkerAlreadyExistsException, ProcessingBadRequestException, InvalidParseOperationException { ParametersChecker.checkParameter("familyId is a mandatory argument", familyId); ParametersChecker.checkParameter("workerId is a mandatory argument", workerId); ParametersChecker.checkParameter("workerInformation is a mandatory argument", workerInformation); WorkerBean worker = null; try { worker = JsonHandler.getFromString(workerInformation, WorkerBean.class); if (!worker.getFamily().equals(familyId)) { throw new ProcessingBadRequestException("Cannot register a worker of another family!"); } else { worker.setWorkerId(workerId); } registerWorker(worker); } catch (final InvalidParseOperationException e) { LOGGER.error("Worker Information incorrect", e); throw new ProcessingBadRequestException("Worker description is incorrect"); } } private static void registerWorker(WorkerBean workerBean) throws WorkerAlreadyExistsException{ String familyId = workerBean.getFamily(); String workerId = workerBean.getWorkerId(); // Create the blocking queue for familyId worker if (STEP_BLOCKINGQUEUE_MAP.get(familyId) == null) { STEP_BLOCKINGQUEUE_MAP.put(familyId, new ArrayBlockingQueue<WorkerAsyncRequest>(DEFAULT_QUEUE_BACKLOG_SIZE)); } // Create the WorkerThreadManager for this new Worker final WorkerThreadManager workerThreadManager = new WorkerThreadManager(workerBean, familyId); if (WORKERS_LIST.get(familyId) != null) { final Map<String, WorkerThreadManager> familyWorkers = WORKERS_LIST.get(familyId); if (familyWorkers.get(workerId) != null) { LOGGER.warn("Worker already registered"); throw new WorkerAlreadyExistsException("Worker already registered"); } else { // Add the new WorkerThreadManager to the existing Family familyWorkers.put(workerId, workerThreadManager); // Note: not mandatory WORKERS_LIST.put(familyId, familyWorkers); VitamThreadPoolExecutor.getDefaultExecutor().execute(workerThreadManager); } } else { // Add the new WorkerThreadManager to the new Family // Note: Concurrent to prevent issue on Adding/Removing Workers final Map<String, WorkerThreadManager> familyWorkers = new ConcurrentHashMap<>(); familyWorkers.put(workerId, workerThreadManager); WORKERS_LIST.put(familyId, familyWorkers); VitamThreadPoolExecutor.getDefaultExecutor().execute(workerThreadManager); } // update new worker in the database marshallToDB(); } /** * To unregister a worker in the processing * * @param familyId : family of this worker * @param workerId : ID of the worker * @throws WorkerFamilyNotFoundException : when the family is unknown * @throws WorkerNotFoundException : when the ID of the worker is unknown in the family * @throws InterruptedException */ public static void unregisterWorker(String familyId, String workerId) throws WorkerFamilyNotFoundException, WorkerNotFoundException, InterruptedException { ParametersChecker.checkParameter("familyId is a mandatory argument", familyId); ParametersChecker.checkParameter("workerId is a mandatory argument", workerId); final Map<String, WorkerThreadManager> familyWorkers = WORKERS_LIST.get(familyId); if (familyWorkers != null) { WorkerThreadManager workerThreadManager = familyWorkers.get(workerId); if (workerThreadManager != null) { // Stop giving new tasks to this job workerThreadManager.stopWorkerThreadManager(); // Waiting for the workerThreadManager.waitingRunningJobsDone(GlobalDataRest.TIMEOUT_END_WORKER_MILLISECOND); familyWorkers.remove(workerId); // delete from database marshallToDB(); } else { LOGGER.error("Worker does not exist in this family"); throw new WorkerNotFoundException("Worker does not exist in this family"); } } else { LOGGER.error("Worker Family does not exist"); throw new WorkerFamilyNotFoundException("Worker Family does not exist"); } } /** * To submit a Job to the workerManager (blocking method) * * @param workerAsyncRequest : Asynchronous request * @throws ProcessingBadRequestException : if the queueID is unknown * @throws InterruptedException */ public static void submitJob(WorkerAsyncRequest workerAsyncRequest) throws ProcessingBadRequestException, InterruptedException { ParametersChecker.checkParameter("queue is a mandatory argument", workerAsyncRequest.getQueueID()); ParametersChecker.checkParameter("workerAsyncRequest is a mandatory argument", workerAsyncRequest); BlockingQueue<WorkerAsyncRequest> blockingQueue = STEP_BLOCKINGQUEUE_MAP.get(workerAsyncRequest.getQueueID()); if (blockingQueue != null) { blockingQueue.put(workerAsyncRequest); } else { throw new ProcessingBadRequestException( "Unknown queue in the workerManager : " + workerAsyncRequest.getQueueID()); } } /** * To remove a Job from the workerManager (non blocking method) * * @param workerAsyncRequest : Asynchronous request that must be removed * @return true if the workerAsyncRequest was present, false if not * @throws ProcessingBadRequestException : if the queueID is unknown */ public static boolean removeJobs(WorkerAsyncRequest workerAsyncRequest) throws ProcessingBadRequestException { BlockingQueue<WorkerAsyncRequest> blockingQueue = STEP_BLOCKINGQUEUE_MAP.get(workerAsyncRequest.getQueueID()); if (blockingQueue != null) { return blockingQueue.remove(workerAsyncRequest); } else { throw new ProcessingBadRequestException( "Unknown queue in the workerManager : " + workerAsyncRequest.getQueueID()); } } protected static Map<String, Map<String, WorkerThreadManager>> getWorkersList() { return WORKERS_LIST; } private synchronized static void marshallToDB() { if (!WORKKER_DB_FILE.exists()) { try { WORKKER_DB_FILE.createNewFile(); } catch (IOException e) { LOGGER.warn("Cannot create worker list serialization file : " + WORKKER_DB_FILE.getName(),e); } } ArrayNode registeredWorkers = JsonHandler.createArrayNode(); for (Entry<String, Map<String, WorkerThreadManager>> family : WORKERS_LIST.entrySet()) { for (Entry<String, WorkerThreadManager> worker : family.getValue().entrySet()) { try { JsonNode workerBean = JsonHandler.toJsonNode(worker.getValue().getWorkerBean()); registeredWorkers.add(workerBean); JsonHandler.writeAsFile(registeredWorkers, WORKKER_DB_FILE); } catch (InvalidParseOperationException e) { LOGGER.error("Cannot update database worker",e); } } } } /** * The WorkerThreadManager manages all the threads for a given Worker */ private static class WorkerThreadManager implements Runnable { private WorkerBean workerBean; // No high need to have AtomicBoolean as it is only used to stop the WorkerThreadManager (ex: during the // unregister of a worker) private volatile boolean toBeRunnable = true; private final String queue; private final Semaphore semaphore; private volatile Thread myself; private final int capacity; public WorkerThreadManager(WorkerBean workerBean, String queue) { ParametersChecker.checkParameter("workerBean is a mandatory argument", workerBean); ParametersChecker.checkParameter("queue is a mandatory argument", queue); this.workerBean = workerBean; this.queue = queue; this.capacity = workerBean.getCapacity(); this.semaphore = new Semaphore(capacity); } /** * Main forever method */ @Override public void run() { try { // Register now its own thread myself = Thread.currentThread(); // FIXME : when there is an unregisterWorker, the thread ends only after one more step (as it is // blocking on the take), but interrupt on stopWorkerThreadManager could partially resolve the issue // Order of the blocking call : first see if we have capacity in this worker (acquire the semaphore // token), then see if there is work to process . // So in this way, we don't take a task if can not treat it right now while (toBeRunnable) { semaphore.acquire(); if (STEP_BLOCKINGQUEUE_MAP.get(queue) != null) { WorkerAsyncRequest workerAsyncRequest = STEP_BLOCKINGQUEUE_MAP.get(queue).take(); VitamThreadPoolExecutor.getDefaultExecutor() .execute(new WorkerThread(this, workerAsyncRequest)); } } } catch (InterruptedException e) { // NOSONAR already taken into account LOGGER.warn("Probably unregistring this Worker", e); } } /** * Stop the workerThreadManager, both using boolean and interruption */ public void stopWorkerThreadManager() { toBeRunnable = false; if (myself != null) { myself.interrupt(); } } public void waitingRunningJobsDone(long timeout) throws InterruptedException { long epoch = System.currentTimeMillis(); while (System.currentTimeMillis() < (epoch + timeout)) { if (toBeRunnable && (semaphore.availablePermits() == capacity)) { return; } Thread.sleep(1000); } } public WorkerBean getWorkerBean() { return workerBean; } public Semaphore getSemaphore() { return semaphore; } } /** * The Worker Thread manages the actions for one thread for a given Worker */ private static class WorkerThread implements Runnable { private WorkerThreadManager workerThreadManager; private WorkerAsyncRequest workerAsyncRequest; public WorkerThread(WorkerThreadManager workerThreadManager, WorkerAsyncRequest workerAsyncRequest) { this.workerThreadManager = workerThreadManager; this.workerAsyncRequest = workerAsyncRequest; } /** * */ @Override public void run() { ItemStatus actionsResponse = null; // As this thread is not a son of a request Rest but of the WorkerThread VitamThreadUtils.getVitamSession().setRequestId(workerAsyncRequest.getSession().getRequestId()); VitamThreadUtils.getVitamSession().setTenantId(workerAsyncRequest.getSession().getTenantId()); try { actionsResponse = new ItemStatus(workerAsyncRequest.getDescriptionStep().getStep().getStepName()); loadWorkerClient(workerThreadManager.getWorkerBean()); WorkerClientConfiguration configuration = new WorkerClientConfiguration( workerThreadManager.getWorkerBean().getConfiguration().getServerHost(), workerThreadManager.getWorkerBean().getConfiguration().getServerPort()); try (WorkerClient workerClient = WorkerClientFactory.getInstance(configuration).getClient()) { actionsResponse = workerClient.submitStep(workerAsyncRequest.getDescriptionStep()); // FIXME : à voir comment retraiter } catch (WorkerNotFoundClientException | WorkerServerClientException e) { // Maybe resubmit, not throwing any state // try { // submitJob(workerAsyncRequest); // and not setting actionsResponse (using a special boolean ?) // } catch (ProcessingInternalServerException | ProcessingBadRequestException | // InterruptedException e1) { // but only once // LOGGER.error(e); // actionsResponse.increment(StatusCode.FATAL); // } // Or maybe having a way to return back to the ProcessDistributor the not launched task, letting him // handling this one, but not removing it from the to be run // and not setting actionsResponse (using a special boolean ?) // Note: method to create // workerAsyncRequest.callCallbackCannotRun(workerAsyncRequest); // check status boolean checkStatus = false; int numberCallCheckStatus = 0; while (!checkStatus && numberCallCheckStatus < GlobalDataRest.STATUS_CHECK_RETRY) { checkStatus = checkStatusWorker(workerThreadManager.getWorkerBean().getConfiguration().getServerHost(), workerThreadManager.getWorkerBean().getConfiguration().getServerPort()); numberCallCheckStatus++; if (!checkStatus) { try { this.wait(1000); } catch (final InterruptedException e1) { LOGGER.error(e); } } } if (!checkStatus) { try { WorkerManager.unregisterWorker(workerThreadManager.getWorkerBean().getFamily(), workerThreadManager.getWorkerBean().getWorkerId()); } catch (WorkerFamilyNotFoundException | WorkerNotFoundException | InterruptedException e1) { LOGGER.error("Cannot unregister the worker."); } } LOGGER.error(e); actionsResponse.increment(StatusCode.FATAL); } } catch (RuntimeException | Error e) { if (actionsResponse != null) { actionsResponse.increment(StatusCode.FATAL); } LOGGER.error(e); } finally { // except using a special boolean for resubmit ? workerAsyncRequest .callCallback(new WorkerAsyncResponse(workerAsyncRequest, actionsResponse)); workerThreadManager.getSemaphore().release(); } } private void loadWorkerClient(WorkerBean workerBean) { final WorkerClientConfiguration workerClientConfiguration = new WorkerClientConfiguration(workerBean.getConfiguration().getServerHost(), workerBean.getConfiguration().getServerPort()); WorkerClientFactory.changeMode(workerClientConfiguration); } } }