/*
* Licensed to Think Big Analytics, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Think Big Analytics, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright 2010 Think Big Analytics. All Rights Reserved.
*/
package tap.core;
import java.util.*;
import tap.Phase;
import tap.Pipe;
public class PipePlan {
private Map<Pipe, Phase> fileDeps = new LinkedHashMap<Pipe, Phase>();
private Map<Phase, Set<Pipe>> processDeps = new LinkedHashMap<Phase, Set<Pipe>>();
private Set<Pipe> prebuilt = new HashSet<Pipe>();
private Set<Phase> failed = new HashSet<Phase>();
private List<List<Phase>> waves = new ArrayList<List<Phase>>();
private Set<Phase> executing = new HashSet<Phase>();
public synchronized void fileCreateWith(Pipe file, Phase process) {
if (process == null) {
prebuilt.add(file);
} else {
fileDeps.put(file, process);
}
}
public synchronized void processReads(Phase process, Pipe file) {
Set<Pipe> procDep = processDeps.get(process);
if (procDep == null) {
procDep = new HashSet<Pipe>();
processDeps.put(process, procDep);
}
procDep.add(file);
}
public synchronized void failed(Phase process) {
failed.add(process);
executing.remove(process);
}
public synchronized void updated(Phase process) {
executing.remove(process);
// remove dependence ON this process from files that it generates - fileDeps
for (Pipe file : process.getOutputs()) {
prebuilt.add(file);
fileDeps.remove(file);
}
processDeps.remove(process);
// could incrementally update plan, but for now we just do it in batch
}
public synchronized List<List<Phase>> plan() {
Set<Pipe> toPlan = getFileDependencies();
waves.clear();
if (!failed.isEmpty())
return null; // don't run anything else in a failed job
// pull out all the leaf nodes (i.e., those with no unplanned dependencies) as another parallel wave that can execute
// this doesn't specify exact scheduling as processes finish but provides more parallelism than pure serial operation
while (!toPlan.isEmpty()) {
Set<Pipe> wave = new HashSet<Pipe>();
Iterator<Pipe> it = toPlan.iterator();
while (it.hasNext()) {
Pipe file = it.next();
Phase process = fileDeps.get(file);
if (!executing.contains(process)) {
boolean canRemove = true;
for (Pipe dependency : processDeps.get(process)) {
if (toPlan.contains(dependency)) {
canRemove = false;
break;
}
}
if (canRemove) {
wave.add(file);
}
} else {
it.remove(); // already being built...
}
}
if (wave.isEmpty() && executing.isEmpty()) {
StringBuilder cycle = new StringBuilder();
for (Pipe file : toPlan) {
cycle.append(' ').append(file.getPath());
}
throw new IllegalStateException("Cyclic dependency among files: "+cycle);
}
List<Phase> nextWave = new ArrayList<Phase>();
for (Pipe file : wave) {
toPlan.remove(file);
// we should really score these and order them by some kind of priority
// for now we submit everything that can be done in parallel
if (!nextWave.contains(file.getProducer()))
nextWave.add(file.getProducer());
}
waves.add(nextWave);
}
return waves;
}
/**
* Must call AFTER calling plan.
*/
public synchronized List<Phase> getNextProcesses() {
return (waves==null || waves.isEmpty()) ? null : Collections.unmodifiableList(waves.get(0));
}
public synchronized boolean executing(Phase process) {
return executing.add(process);
}
public synchronized boolean isComplete() {
return executing.isEmpty() && (!failed.isEmpty() || fileDeps.isEmpty());
}
public Set<Pipe> getFileDependencies() {
return new HashSet<Pipe>(fileDeps.keySet());
}
}