package se.chalmers.gdcn.replica; import net.tomp2p.peers.Number160; import se.chalmers.gdcn.compare.EqualityControl; import se.chalmers.gdcn.compare.QualityControl; import se.chalmers.gdcn.compare.Trust; import se.chalmers.gdcn.compare.TrustQuality; import se.chalmers.gdcn.control.TaskRunner; import se.chalmers.gdcn.control.WorkerReputationManager; import se.chalmers.gdcn.control.WorkerTimeoutManager; import se.chalmers.gdcn.demo.WorkerNames; import se.chalmers.gdcn.files.FileManagementUtils; import se.chalmers.gdcn.files.SelfWorker; import se.chalmers.gdcn.files.TaskMeta; import se.chalmers.gdcn.files.TaskMetaDataException; import se.chalmers.gdcn.network.WorkerID; import se.chalmers.gdcn.taskbuilder.Task; import se.chalmers.gdcn.taskbuilder.communicationToClient.TaskListener; import se.chalmers.gdcn.taskbuilder.utils.FormatString; import se.chalmers.gdcn.utils.ByteArray; import se.chalmers.gdcn.utils.Identifier; import se.chalmers.gdcn.utils.SerializableTimer; import se.chalmers.gdcn.utils.Time; import java.beans.PropertyChangeEvent; import java.beans.PropertyChangeListener; import java.io.File; import java.io.IOException; import java.io.Serializable; import java.util.*; /** * Created by Leif on 2014-03-31. * * //TODO reader-writer synchronization instead of common mutex? */ public class ReplicaManager implements Serializable, Cloneable{ private final int REPLICAS; private final int EXPECTED_REPUTATION; private final Time TIME_UNIT; private final int CALENDAR_VALUE; private transient TaskRunner runner; private final Archive archive; private final WorkerReputationManager workerReputationManager; private final WorkerTimeoutManager workerTimeoutManager; private final SerializableReplicaTimer replicaTimer; private final Map<ReplicaID, Replica> replicaMap = new HashMap<>(); private final Map<ReplicaID, TaskData> taskDataMap = new HashMap<>(); private final Map<TaskID, TaskResultData> resultDataMap = new HashMap<>(); private final Map<WorkerID, Set<TaskData>> assignedTasks = new HashMap<>(); private final TreeSet<TaskCompare> taskDatas = new TreeSet<>(new TaskComparator()); // Used for decision making based on reputation //For testing: private boolean workSelfIfRequired = true; private transient PropertyChangeListener validationListener = null; public static class ReplicaID extends Identifier{ public ReplicaID(String id) { super(id); } } public static class TaskID extends Identifier{ public TaskID(String id) { super(id); } } /** * Contains information about the status of a task */ private static class TaskResultData implements Serializable{ final Set<ReplicaID> failedReplicas = new HashSet<>(); final Set<ReplicaID> outdatedReplicas = new HashSet<>(); final Set<ReplicaID> pendingReplicas = new HashSet<>(); final Set<ReplicaID> excessPendingReplicas = new HashSet<>(); final Map<ReplicaID, byte[]> returnedReplicas = new HashMap<>(); } /** * Please use {@link se.chalmers.gdcn.replica.ReplicaManagerBuilder} for constructing this class */ ReplicaManager(WorkerReputationManager workerReputationManager, TaskRunner runner, Time timeUnit, long updateInterval, int replicas, int expectedReputation, int calendarValue){ REPLICAS = replicas; EXPECTED_REPUTATION = expectedReputation; TIME_UNIT = timeUnit; CALENDAR_VALUE = calendarValue; replicaTimer = new SerializableReplicaTimer(updateInterval); this.workerReputationManager = workerReputationManager; //TODO calibrate: are these acceptable values? workerTimeoutManager = new WorkerTimeoutManager(updateInterval*2, timeUnit, calendarValue*3); archive = new Archive(); this.runner = runner; resumeTimer(); } public synchronized void setTaskManager(TaskRunner taskManager) { this.runner = taskManager; } /** * Only for testing!!! * @param validationListener validation listener */ public void setValidationListener(PropertyChangeListener validationListener) { this.validationListener = validationListener; } public WorkerReputationManager getWorkerReputationManager() { return workerReputationManager; } /** * Mainly intended for testing * @param workSelfIfRequired true if allow JobOwner to work himself if there are too few active workers */ public synchronized void setWorkSelfIfRequired(boolean workSelfIfRequired) { this.workSelfIfRequired = workSelfIfRequired; } /** * Must be called after being deserialized for the timer to start running! * Is called in constructor. */ public void resumeTimer(){ if(runner != null){ runner.submit(replicaTimer.createUpdater()); runner.submit(workerTimeoutManager.timerRunner()); } else { //In testing... SerializableTimer.resume(replicaTimer); // workerTimeoutManager.resumeTimer(); } } /** * Load TaskMeta objects to make replicas of * @param tasks List of TaskMeta objects */ public synchronized void loadTasksAndReplicate(String jobName, List<TaskMeta> tasks){ for(TaskMeta task : tasks){ taskDatas.add(new TaskData(task, jobName, REPLICAS, EXPECTED_REPUTATION)); } } /** * @param worker Worker node * @return Replica info if there are any. Returns null if queue is empty. * */ public synchronized ReplicaBox giveReplicaToWorker(WorkerID worker){ workerTimeoutManager.activate(worker); Set<TaskData> alreadyGiven = assignedTasks.get(worker); if(alreadyGiven == null){ alreadyGiven = new HashSet<>(); assignedTasks.put(worker, alreadyGiven); } //Shallow copy intended TreeSet<TaskCompare> notGiven = (TreeSet<TaskCompare>) taskDatas.clone(); notGiven.removeAll(alreadyGiven); if(notGiven.size()==0){ //No task left to work on for that worker //TODO remove this output System.out.println("ReplicaManager: tasks "+taskDatas.size()); System.out.println("ReplicaManager: alreadyGiven "+alreadyGiven.size()); return null; } final int workerReputation = workerReputationManager.getReputation(worker); TaskCompare reputationCompare = new TaskCompare() { @Override public float value() { return workerReputation; } @Override public String order() { return ""; } }; //Assign task in a smart manner TaskData taskData = (TaskData) notGiven.floor(reputationCompare); if(taskData == null){ //Warning, might not fulfill reputation demand! taskData = (TaskData) notGiven.ceiling(reputationCompare); } taskDatas.remove(taskData); //TaskData changes state internally which affects its sorted position! Remove and insert! TaskMeta taskMeta = taskData.giveTask(worker, workerReputation); taskDatas.add(taskData); ReplicaBox replicaBox = new ReplicaBox(taskMeta); //ReplicaBox constructor randomizes a new ID. Must check for no reuse. while (replicaMap.containsKey(replicaBox.getReplicaID())){ replicaBox = new ReplicaBox(taskMeta); } final ReplicaID replicaID = replicaBox.getReplicaID(); //Update state: alreadyGiven.add(taskData); taskDataMap.put(replicaID, taskData); replicaMap.put(replicaID, new Replica(replicaBox, worker)); Date deadline = Time.futureDate(this.TIME_UNIT, CALENDAR_VALUE); replicaTimer.add(replicaID, deadline); TaskResultData taskResultData = resultDataMap.get(taskData.taskID()); if(taskResultData == null){ taskResultData = new TaskResultData(); resultDataMap.put(taskData.taskID(), taskResultData); } if(taskData.enoughReturned()){ taskResultData.excessPendingReplicas.add(replicaID); // throw new AssertionError("EXCESS PENDING REPLICA"); // return null; } else { taskResultData.pendingReplicas.add(replicaID); } return replicaBox; } /** * * @param replicaID ID of a replica * @return Key for the result file in DHT */ public synchronized Number160 getReplicaResultKey(ReplicaID replicaID){ final Replica replica = replicaMap.get(replicaID); if(replica == null){ throw new IllegalStateException("Error: Replica was not found!"); } return replica.getReplicaBox().getResultKey(); } /** * This method should only be used externally for testing! * Is called internally. * * This replica didn't get any answer within given time limit. * Doesn't have to report worker, he might still come up with an answer. * @param replicaID Replica that was outdated */ public synchronized void replicaOutdated(ReplicaID replicaID){ TaskData taskData = taskDataMap.get(replicaID); if(taskData == null){ throw new IllegalStateException("Couldn't find TaskData in taskDataMap!"); } TaskResultData resultData = resultDataMap.get(taskData.taskID()); awaitingReplica(resultData, replicaID); taskDatas.remove(taskData); taskData.timedOut(replicaMap.get(replicaID).getWorker()); taskDatas.add(taskData); resultData.outdatedReplicas.add(replicaID); decideValidate(replicaID, taskData, resultData); } public synchronized void replicaFailed(ReplicaID replicaID){ TaskData taskData = taskDataMap.get(replicaID); if(taskData == null){ throw new IllegalStateException("Couldn't find TaskData in taskDataMap!"); } TaskResultData resultData = resultDataMap.get(taskData.taskID()); awaitingReplica(resultData, replicaID); taskDatas.remove(taskData); taskData.returned(replicaMap.get(replicaID).getWorker()); taskDatas.add(taskData); resultData.failedReplicas.add(replicaID); decideValidate(replicaID, taskData, resultData); } public synchronized void replicaFinished(ReplicaID replicaID, byte[] result){ if(result == null){ throw new IllegalArgumentException("Error: don't give null result!"); } TaskData taskData = taskDataMap.get(replicaID); if(taskData == null){ throw new IllegalStateException("Couldn't find TaskData in taskDataMap!"); } TaskResultData resultData = resultDataMap.get(taskData.taskID()); awaitingReplica(resultData, replicaID); taskDatas.remove(taskData); taskData.returned(replicaMap.get(replicaID).getWorker()); taskDatas.add(taskData); resultData.returnedReplicas.put(replicaID, result); decideValidate(replicaID, taskData, resultData); } private void awaitingReplica(TaskResultData resultData, ReplicaID replicaID){ if(resultData.pendingReplicas.remove(replicaID)){ return; } if(resultData.outdatedReplicas.remove(replicaID)){ return; } if(resultData.excessPendingReplicas.remove(replicaID)){ return; } throw new IllegalStateException("Expected replicaID to be in pendingReplicas or outdatedReplicas!"); } private void decideValidate(ReplicaID replicaID, TaskData taskData, TaskResultData resultData){ //Make sure timeout will not be called on this replicaID: replicaTimer.remove(replicaID); if( workSelfIfRequired && !isThereTaskWithEnoughReputationAlready() && sumActiveReputation() < EXPECTED_REPUTATION){ //TODO loops here workSelf(); } if(! taskData.enoughReturned()){ //Ignore - cannot validate yet return; } //Can validate if(resultData.pendingReplicas.size() > 0){ //Wait for some more replicas that was given previously //However don't wait on "excess" replicas return; } CanonicalResult archivedResult = archive.getArchivedResult(taskData.taskID()); if(archivedResult == null){ //First validation validateResults(taskData, resultData); } else { //Has validated before: compare previous result ByteArray byteArray = new ByteArray(resultData.returnedReplicas.get(replicaID)); validateLatecomer(archivedResult, taskData, replicaID, byteArray); } } private float sumActiveReputation(){ float sum = 0; for( WorkerID workerID : workerTimeoutManager.getActiveWorkers() ){ sum += workerReputationManager.getReputation(workerID); } return sum; } /** * Works on the most appropriate task himself. Reuses smart assignment. */ private void workSelf(){ ReplicaBox replicaBox = giveReplicaToWorker(workerReputationManager.getMyWorkerID()); if(replicaBox == null){ //If job owner has already worked on all available tasks. return; } final ReplicaID replicaID = replicaBox.getReplicaID(); final TaskData taskData = taskDataMap.get(replicaID); TaskMeta meta = taskData.getTaskMeta(); try { SelfWorker selfWorker = new SelfWorker(meta, taskData.getJobName()); final String resultPath = selfWorker.futureResultFilePath(); final TaskResultData taskResultData = resultDataMap.get(taskData.taskID()); Task taskRunner = selfWorker.workSelf(meta, new TaskListener() { @Override public void taskFinished(String taskName) { System.out.println("ReplicaManager#workSelf: "+taskName+ " finished."); try { byte[] result = FileManagementUtils.fromFile(new File(resultPath)); replicaFinished(replicaID, result); //TODO is this really necessary: to tell UI? runner.getTaskListener().taskFinished(taskName); //TODO put jobOwner result in special position? // taskResultData.returnedReplicas.put(replicaID, result); // // decideValidate(replicaID, taskData, taskResultData); } catch (IOException e) { e.printStackTrace(); } } @Override public void taskFailed(String taskName, String reason) { System.out.println("ERROR "+taskName+": "+reason); taskResultData.failedReplicas.add(replicaID); runner.getTaskListener().taskFailed(taskName, reason); decideValidate(replicaID, taskData, taskResultData); } }); runner.submit(taskRunner); } catch (TaskMetaDataException e) { e.printStackTrace(); } } /** * @return true if there is a task that has enough reputation worked on it */ private boolean isThereTaskWithEnoughReputationAlready(){ return taskDatas.first().value() < 0; } private void validateResults(TaskData taskData, TaskResultData resultData){ String jobName = taskData.getJobName(); Map<ByteArray, Set<ReplicaID>> resultMap = EqualityControl.compareData(resultData.returnedReplicas); if(resultMap.size() == 0){ //Happens when all workers say a task failed //TODO handle return; } Set<WorkerID> correctWorkers = new HashSet<>(); double bestQuality = 0; ByteArray bestResult = null; Map<ByteArray,TrustQuality> trustMap; //TODO Implement choice of automatic or manual result validation try { if (validationListener == null) { trustMap = QualityControl.compareQuality(jobName, taskData.getTaskMeta(), resultMap.keySet()); } else { //For testing: validationListener.propertyChange(new PropertyChangeEvent(this, "Validate", taskData, resultMap)); byte[] bytes = resultData.returnedReplicas.values().iterator().next(); trustMap = new HashMap<>(); trustMap.put(new ByteArray(bytes), TrustQuality.trustworthy(1)); } System.out.println("\nValidate "+taskData.getTaskMeta().getTaskName()+":"); for(ByteArray byteArray : trustMap.keySet()){ TrustQuality trust = trustMap.get(byteArray); Set<ReplicaID> replicaIDs = resultMap.get(byteArray); if(trust.getTrust().equals(Trust.TRUSTWORTHY)){ bestQuality = trust.getQuality(); bestResult = byteArray; } for(ReplicaID replicaID:replicaIDs){ WorkerID worker = replicaMap.get(replicaID).getWorker(); // System.out.println("\t"+WorkerNames.getInstance().getName(worker) + // " was found to be "+trust.getTrust().name() + // (trust.getTrust() == Trust.UNKNOWN ? "" : " with quality " + // trust.getQuality())); System.out.print("\t"+WorkerNames.getInstance().getName(worker) + " was found to be "); switch (trust.getTrust()){ case TRUSTWORTHY: System.out.printf(FormatString.colour(trust.getTrust().name(), FormatString.Colour.GREEN) + " with quality " + "%.3f\n", trust.getQuality()); workerReputationManager.promoteWorker(worker); correctWorkers.add(worker); break; case DECEITFUL: System.out.printf(FormatString.colour(trust.getTrust().name(), FormatString.Colour.YELLOW) + " with quality " + "%.3f\n", trust.getQuality()); workerReputationManager.reportWorker(worker); break; case UNKNOWN: System.out.println(FormatString.colour(trust.getTrust().name(), FormatString.Colour.YELLOW)); //ignore break; } //Clean for each ReplicaID assignedTasks.get(worker).remove(taskData); replicaMap.remove(replicaID); taskDataMap.remove(replicaID); } } } catch (IOException e) { e.printStackTrace(); } //Clean up and store data: if(bestResult != null){ taskDatas.remove(taskData); //Disabled for demo //System.out.println("\tThe best result had the quality: "+trustMap.get(bestResult).getQuality()); System.out.println(); //OBS currently, this happens even when there are some workers who say a replica failed archive.archiveResult(taskData, bestResult, bestQuality, correctWorkers); if(resultData.excessPendingReplicas.size()==0 && resultData.pendingReplicas.size()==0 && resultData.outdatedReplicas.size()==0){ resultDataMap.remove(taskData.taskID()); } else { resultData.returnedReplicas.clear(); } } else { //Notify throw new IllegalStateException("No data was acceptable, probably an error in quality function"); } } private void validateLatecomer(CanonicalResult archivedResult, TaskData taskData, ReplicaID replicaID, ByteArray byteArray){ if(validationListener != null){ //For testing: validationListener.propertyChange(new PropertyChangeEvent(this, "Late", taskData, replicaID)); return; } try { WorkerID worker = replicaMap.get(replicaID).getWorker(); boolean resultEqual = archivedResult.compareNewWorker(byteArray, worker); if(resultEqual){ workerReputationManager.promoteWorker(worker); archivedResult.getAdvocatingWorkers().add(worker); } else { TrustQuality trustQuality = QualityControl.singleQualityTest(taskData.getJobName(), taskData.getTaskMeta(), byteArray); switch (trustQuality.getTrust()){ case TRUSTWORTHY: //continue down break; case DECEITFUL: workerReputationManager.reportWorker(worker); return; case UNKNOWN: //Might be some error with test code. System.out.println("Error when validating: "+trustQuality.getReason()); return; } double lateQuality = trustQuality.getQuality(); if(archivedResult.getQuality() > lateQuality){ workerReputationManager.reportWorker(worker); } else if(archivedResult.getQuality() < lateQuality){ workerReputationManager.promoteWorker(worker); Set<WorkerID> advocating = archivedResult.getAdvocatingWorkers(); for(WorkerID w : advocating){ workerReputationManager.reportWorker(w); } HashSet<WorkerID> workerIDs = new HashSet<>(); workerIDs.add(worker); archive.archiveResult(taskData, byteArray, lateQuality, workerIDs); } else { //Equal quality but different result workerReputationManager.promoteWorker(worker); } } } catch (IOException e) { e.printStackTrace(); } } /** * * @param workerID Worker * @param replicaID ID of a replica * @return true only if worker was assigned this replica, otherwise false. */ public synchronized boolean isWorkerAssignedReplica(WorkerID workerID, ReplicaID replicaID){ if(workerID==null || replicaID == null){ return false; } Replica replica = replicaMap.get(replicaID); return replica != null && replica.getWorker().equals(workerID); } public synchronized Set<ReplicaID> pendingReplicaIDs(){ Set<ReplicaID> replicaIDs = new HashSet<>(); for(TaskResultData taskResultData : resultDataMap.values()){ replicaIDs.addAll(taskResultData.pendingReplicas); } return replicaIDs; } /** * @return Map with pending replicas and respective location key. Results may or not be uploaded in DHT */ public synchronized Map<ReplicaID, Number160> pendingResults(){ Map<ReplicaID, Number160> pending = new HashMap<>(); for(ReplicaID replicaID : pendingReplicaIDs()){ Replica replica = replicaMap.get(replicaID); pending.put(replicaID, replica.getReplicaBox().getResultKey()); } return pending; } private class SerializableReplicaTimer extends SerializableTimer<ReplicaID>{ /** * @param updateTime Number of Milliseconds between check queue */ public SerializableReplicaTimer(long updateTime) { super(updateTime); } @Override protected void handleTimeout(ReplicaID element) { ReplicaManager.this.replicaOutdated(element); System.out.println("Replica outdated: "+element); } } }