/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.job.spawn;
import java.util.HashMap;
import java.util.Map;
import com.addthis.basis.util.JitterClock;
import com.addthis.hydra.job.Job;
import com.addthis.hydra.job.JobState;
import com.addthis.hydra.job.JobTask;
import com.addthis.hydra.job.mq.HostState;
class UpdateEventRunnable implements Runnable {
private final Spawn spawn;
private final Map<String, Long> events = new HashMap<>();
public UpdateEventRunnable(Spawn spawn) {
this.spawn = spawn;
}
@Override
public void run() {
int slotsAvailable = 0;
int slotsTotal = 0;
int jobsTotal = 0;
int jobsHung = 0;
int jobsRunning = 0;
int jobsScheduled = 0;
int jobsErrored = 0;
int tasksTotal = 0;
int tasksProcessing = 0;
int tasksReplicating = 0;
int tasksBackingUp = 0;
int tasksBusy = 0;
int tasksRebalancing = 0;
int tasksErrored = 0;
int tasksQueued = 0;
int taskQueuedNoSlot = 0;
long files = 0;
long bytes = 0;
spawn.acquireJobLock();
try {
for (Job job : spawn.spawnState.jobs.values()) {
jobsTotal++;
for (JobTask jn : job.getCopyOfTasks()) {
tasksTotal++;
switch (jn.getState()) {
case ALLOCATED:
tasksBusy++;
break;
case BUSY:
tasksProcessing++;
tasksBusy++;
break;
case BACKUP:
tasksBackingUp++;
tasksBusy++;
break;
case REPLICATE:
tasksReplicating++;
tasksBusy++;
break;
case REBALANCE:
tasksRebalancing++;
tasksBusy++;
break;
case REVERT:
tasksBusy++;
break;
case SWAPPING:
tasksBusy++;
break;
case MIGRATING:
tasksBusy++;
break;
case FULL_REPLICATE:
tasksReplicating++;
tasksBusy++;
break;
case ERROR:
tasksErrored++;
break;
case IDLE:
break;
case QUEUED:
tasksQueued++;
break;
case QUEUED_HOST_UNAVAIL:
tasksQueued++;
break;
case QUEUED_NO_SLOT:
tasksQueued++;
taskQueuedNoSlot++;
break;
}
files += jn.getFileCount();
bytes += jn.getByteCount();
}
switch (job.getState()) {
case IDLE:
break;
case RUNNING:
jobsRunning++;
if (job.getStartTime() != null && job.getMaxRunTime() != null &&
(JitterClock.globalTime() - job.getStartTime() > job.getMaxRunTime() * 2)) {
jobsHung++;
}
break;
case SCHEDULED:
jobsScheduled++;
break;
}
if (job.getState() == JobState.ERROR) {
jobsErrored++;
}
}
} finally {
spawn.releaseJobLock();
}
long diskUsed = 0;
long diskCapacity = 0;
for (HostState host : spawn.hostManager.getLiveHosts(null)) {
diskUsed += host.getUsed().getDisk();
diskCapacity += host.getMax().getDisk();
slotsAvailable += host.getAvailableTaskSlots();
slotsTotal += host.getMaxTaskSlots();
}
float diskAvailable = ((float) diskUsed) / ((float) diskCapacity);
events.clear();
events.put("time", System.currentTimeMillis());
events.put("hosts", (long) spawn.hostManager.monitored.size());
events.put("commands", (long) spawn.getJobCommandManager().size());
events.put("macros", (long) spawn.getJobMacroManager().size());
events.put("jobs", (long) spawn.spawnState.jobs.size());
events.put("jobs_running", (long) jobsRunning);
events.put("jobs_scheduled", (long) jobsScheduled);
events.put("jobs_errored", (long) jobsErrored);
events.put("jobs_hung", (long) jobsHung);
events.put("jobs_total", (long) jobsTotal);
events.put("tasks_busy", (long) tasksBusy);
events.put("tasks_queued", (long) tasksQueued);
events.put("tasks_queued_no_slot", (long) taskQueuedNoSlot);
events.put("tasks_errored", (long) tasksErrored);
events.put("tasks_total", (long) tasksTotal);
events.put("slots_available", (long) slotsAvailable);
events.put("slots_total", (long) slotsTotal);
events.put("files", files);
events.put("bytes", bytes);
events.put("disk_used", diskUsed);
events.put("disk_capacity", diskCapacity);
spawn.spawnFormattedLogger.periodicState(events);
SpawnMetrics.totalTaskCount.set(tasksTotal);
SpawnMetrics.runningTaskCount.set(tasksBusy);
SpawnMetrics.queuedTaskCount.set(tasksQueued);
SpawnMetrics.queuedTaskNoSlotCount.set(taskQueuedNoSlot);
SpawnMetrics.failTaskCount.set(tasksErrored);
SpawnMetrics.totalJobCount.set(jobsTotal);
SpawnMetrics.processingTaskCount.set(tasksProcessing);
SpawnMetrics.replicatingTaskCount.set(tasksReplicating);
SpawnMetrics.backingUpTaskCount.set(tasksBackingUp);
SpawnMetrics.runningJobCount.set(jobsRunning);
SpawnMetrics.rebalancingTaskCount.set(tasksRebalancing);
SpawnMetrics.queuedJobCount.set(jobsScheduled);
SpawnMetrics.failJobCount.set(jobsErrored);
SpawnMetrics.hungJobCount.set(jobsHung);
SpawnMetrics.diskAvailablePercent.set(diskAvailable);
SpawnMetrics.availableSlotCount.set(slotsAvailable);
SpawnMetrics.totalSlotCount.set(slotsTotal);
}
}