package com.sequenceiq.cloudbreak.orchestrator.salt.poller;
import static com.sequenceiq.cloudbreak.orchestrator.salt.domain.JobId.jobId;
import java.util.Collection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Multimap;
import com.sequenceiq.cloudbreak.orchestrator.OrchestratorBootstrap;
import com.sequenceiq.cloudbreak.orchestrator.exception.CloudbreakOrchestratorFailedException;
import com.sequenceiq.cloudbreak.orchestrator.exception.CloudbreakOrchestratorTerminateException;
import com.sequenceiq.cloudbreak.orchestrator.salt.client.SaltConnector;
import com.sequenceiq.cloudbreak.orchestrator.salt.client.target.Compound;
import com.sequenceiq.cloudbreak.orchestrator.salt.domain.JobId;
import com.sequenceiq.cloudbreak.orchestrator.salt.domain.JobState;
import com.sequenceiq.cloudbreak.orchestrator.salt.states.SaltStates;
public class SaltJobIdTracker implements OrchestratorBootstrap {
private static final Logger LOGGER = LoggerFactory.getLogger(SaltJobIdTracker.class);
private final SaltConnector saltConnector;
private SaltJobRunner saltJobRunner;
private final boolean retryOnFail;
public SaltJobIdTracker(SaltConnector saltConnector, SaltJobRunner saltJobRunner) {
this(saltConnector, saltJobRunner, true);
}
public SaltJobIdTracker(SaltConnector saltConnector, SaltJobRunner saltJobRunner, boolean retryOnFail) {
this.saltConnector = saltConnector;
this.saltJobRunner = saltJobRunner;
this.retryOnFail = retryOnFail;
}
@Override
public Boolean call() throws Exception {
if (JobState.NOT_STARTED.equals(saltJobRunner.getJobState())) {
LOGGER.info("Job has not started in the cluster. Starting for first time.");
JobId jobIdObject = jobId(saltJobRunner.submit(saltConnector));
String jobId = jobIdObject.getJobId();
saltJobRunner.setJid(jobIdObject);
checkIsFinished(jobId);
} else if (JobState.IN_PROGRESS.equals(saltJobRunner.getJobState())) {
String jobId = saltJobRunner.getJid().getJobId();
LOGGER.info("Job: {} is running currently checking the current state.", jobId);
checkIsFinished(jobId);
} else if (!retryOnFail && JobState.FAILED == saltJobRunner.getJobState()) {
String jobId = saltJobRunner.getJid().getJobId();
LOGGER.info("Job: {} failed. Terminate execution on these targets: {}", jobId, saltJobRunner.getTarget());
throw new CloudbreakOrchestratorTerminateException(buildErrorMessage());
} else if (JobState.FAILED == saltJobRunner.getJobState() || JobState.AMBIGUOUS == saltJobRunner.getJobState()) {
String jobId = saltJobRunner.getJid().getJobId();
LOGGER.info("Job: {} failed in the previous time. Trigger again with these targets: {}", jobId, saltJobRunner.getTarget());
saltJobRunner.setJid(jobId(saltJobRunner.submit(saltConnector)));
saltJobRunner.setJobState(JobState.IN_PROGRESS);
return call();
}
if (JobState.IN_PROGRESS.equals(saltJobRunner.getJobState())) {
String jobIsRunningMessage = String.format("Job: %s is running currently", saltJobRunner.getJid());
throw new CloudbreakOrchestratorFailedException(jobIsRunningMessage);
}
if (JobState.FAILED == saltJobRunner.getJobState() || JobState.AMBIGUOUS == saltJobRunner.getJobState()) {
throw new CloudbreakOrchestratorFailedException(buildErrorMessage());
}
LOGGER.info("Job (jid: {}) was finished. Triggering next salt event.", saltJobRunner.getJid().getJobId());
return true;
}
private void checkIsFinished(String jobId) {
boolean jobRunning = SaltStates.jobIsRunning(saltConnector, jobId);
if (jobRunning) {
LOGGER.info("Job: {} is running currently, waiting for next polling attempt.", jobId);
saltJobRunner.setJobState(JobState.IN_PROGRESS);
} else {
LOGGER.info("Job finished: {}. Collecting missing nodes", jobId);
checkJobFinishedWithSuccess();
}
}
private String buildErrorMessage() {
String jobId = saltJobRunner.getJid().getJobId();
StringBuilder errorMessageBuilder = new StringBuilder();
errorMessageBuilder.append(String.format("There are missing nodes from job (jid: %s), target: %s", jobId, saltJobRunner.getTarget()));
if (saltJobRunner.getNodesWithError() != null) {
for (String host : saltJobRunner.getNodesWithError().keySet()) {
Collection<String> errorMessages = saltJobRunner.getNodesWithError().get(host);
errorMessageBuilder.append("\n").append("Node: ").append(host).append(" Error(s): ").append(String.join(" | ", errorMessages));
}
}
return errorMessageBuilder.toString();
}
private void checkJobFinishedWithSuccess() {
String jobId = saltJobRunner.getJid().getJobId();
try {
Multimap<String, String> missingNodesWithReason = SaltStates.jidInfo(saltConnector, jobId, new Compound(saltJobRunner.getTarget()),
saltJobRunner.stateType());
if (!missingNodesWithReason.isEmpty()) {
LOGGER.info("There are missing nodes after the job (jid: {}) completion: {}", jobId, String.join(",", missingNodesWithReason.keySet()));
saltJobRunner.setJobState(JobState.FAILED);
saltJobRunner.setNodesWithError(missingNodesWithReason);
saltJobRunner.setTarget(missingNodesWithReason.keySet());
} else {
LOGGER.info("The job (jid: {}) completed successfully on every node.", jobId);
saltJobRunner.setJobState(JobState.FINISHED);
}
} catch (RuntimeException e) {
LOGGER.warn("Fail while checking the result (jid: {}), this usually occurs due to concurrency", jobId, e);
saltJobRunner.setJobState(JobState.AMBIGUOUS);
}
}
@Override
public String toString() {
return "SaltJobIdTracker{"
+ "saltJobRunner=" + saltJobRunner
+ '}';
}
}