package uk.ac.imperial.lsds.seepworker.core; import java.net.InetAddress; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.CountDownLatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.ac.imperial.lsds.seep.api.ConnectionType; import uk.ac.imperial.lsds.seep.api.DataReference; import uk.ac.imperial.lsds.seep.api.DataReference.ServeMode; import uk.ac.imperial.lsds.seep.api.DataStore; import uk.ac.imperial.lsds.seep.api.DataStoreType; import uk.ac.imperial.lsds.seep.api.RuntimeEvent; import uk.ac.imperial.lsds.seep.api.SeepTask; import uk.ac.imperial.lsds.seep.api.StatefulSeepTask; import uk.ac.imperial.lsds.seep.api.data.Schema; import uk.ac.imperial.lsds.seep.api.operator.LogicalOperator; import uk.ac.imperial.lsds.seep.api.operator.SeepLogicalQuery; import uk.ac.imperial.lsds.seep.api.operator.UpstreamConnection; import uk.ac.imperial.lsds.seep.api.state.SeepState; import uk.ac.imperial.lsds.seep.comm.Comm; import uk.ac.imperial.lsds.seep.comm.Connection; import uk.ac.imperial.lsds.seep.comm.protocol.StageStatusCommand.Status; import uk.ac.imperial.lsds.seep.comm.serialization.KryoFactory; import uk.ac.imperial.lsds.seep.core.DataStoreSelector; import uk.ac.imperial.lsds.seep.core.OBuffer; import uk.ac.imperial.lsds.seep.infrastructure.ControlEndPoint; import uk.ac.imperial.lsds.seep.infrastructure.DataEndPoint; import uk.ac.imperial.lsds.seep.scheduler.ScheduleDescription; import uk.ac.imperial.lsds.seep.scheduler.Stage; import uk.ac.imperial.lsds.seep.scheduler.StageType; import uk.ac.imperial.lsds.seepworker.WorkerConfig; import uk.ac.imperial.lsds.seepworker.comm.ControlAPIImplementation; import uk.ac.imperial.lsds.seepworker.comm.NetworkSelector; import uk.ac.imperial.lsds.seepworker.core.input.CoreInput; import uk.ac.imperial.lsds.seepworker.core.input.CoreInputFactory; import uk.ac.imperial.lsds.seepworker.core.output.CoreOutput; import uk.ac.imperial.lsds.seepworker.core.output.CoreOutputFactory; import com.esotericsoftware.kryo.Kryo; public class Conductor { final private Logger LOG = LoggerFactory.getLogger(Conductor.class.getName()); private WorkerConfig wc; private InetAddress myIp; private ControlAPIImplementation masterApi; private Connection masterConn; private Comm comm; private Kryo k; private int id; private SeepLogicalQuery query; private Map<Integer, ControlEndPoint> mapping; // TODO: these two are only specific to materialise tasks private Map<Integer, Map<Integer, Set<DataReference>>> inputs; private Map<Integer, Map<Integer, Set<DataReference>>> outputs; private List<DataStoreSelector> dataStoreSelectors; private CoreInput coreInput; private CoreOutput coreOutput; private ProcessingEngine engine; private DataReferenceManager drm; private CountDownLatch registerFlag; // Keep stage - scheduleTask private Map<Stage, ScheduleTask> scheduleTasks; private ScheduleDescription sd; public Conductor(InetAddress myIp, ControlAPIImplementation masterApi, Connection masterConn, WorkerConfig wc, Comm comm, DataReferenceManager drm){ this.myIp = myIp; this.masterApi = masterApi; this.masterConn = masterConn; this.wc = wc; this.scheduleTasks = new HashMap<>(); this.drm = drm; this.comm = comm; this.k = KryoFactory.buildKryoForProtocolCommands(this.getClass().getClassLoader()); this.registerFlag = new CountDownLatch(1); } public void materializeAndConfigureTask(int id, SeepLogicalQuery q, Map<Integer, ControlEndPoint> mapping, Map<Integer, Map<Integer, Set<DataReference>>> inputs, Map<Integer, Map<Integer, Set<DataReference>>> outputs) { this.id = id; this.query = q; this.mapping = mapping; this.inputs = inputs; this.outputs = outputs; int opId = getOpIdLivingInThisEU(id); LogicalOperator o = query.getOperatorWithId(opId); LOG.info("Found LogicalOperator: {} mapped to this executionUnit: {} stateful: {}", o.getOperatorName(), id, o.isStateful()); SeepTask task = o.getSeepTask(); LOG.info("Configuring local task: {}", task.toString()); // set up state if any SeepState state = o.getState(); if (o.isStateful()) { LOG.info("Configuring state of local task: {}", state.toString()); ((StatefulSeepTask)task).setState(state); } // This creates one inputAdapter per upstream stream Id Map<Integer, Set<DataReference>> input = inputs.get(o.getOperatorId()); Map<Integer, Set<DataReference>> output = outputs.get(o.getOperatorId()); Map<Integer, ConnectionType> connTypeInformation = getInputConnectionType(o); coreInput = CoreInputFactory.buildCoreInputFor(wc, drm, input, connTypeInformation); coreOutput = CoreOutputFactory.buildCoreOutputFor(wc, drm, output); // Specialized data selectors dataStoreSelectors = DataStoreSelectorFactory.buildDataStoreSelector(coreInput, coreOutput, wc, o, myIp, wc.getInt(WorkerConfig.DATA_PORT)); // Share selectors with DRM so that it can serve data directly drm.setDataStoreSelectors(dataStoreSelectors); // Register in DRM all DataReferences managed for(Set<DataReference> drefs : output.values()) { for(DataReference dr : drefs) { drm.registerDataReferenceInCatalogue(dr); } } registerFlag.countDown(); int sid = o.getOperatorId(); engine = ProcessingEngineFactory.buildSingleTaskProcessingEngine(wc, sid, task, state, coreInput, coreOutput, makeContinuousConductorCallback()); // Initialize system LOG.info("Setting up task..."); task.setUp(); // setup method of task LOG.info("Setting up task...OK"); for(DataStoreSelector dss : dataStoreSelectors) { dss.initSelector(); } // Make sure selectors are initialised, then request connections coreInput.requestInputConnections(comm, k, myIp); } public void configureScheduleTasks(int id, ScheduleDescription sd) { this.id = id; // FIXME: check whether this is going to cause problems. // FIXME: in general remove the dependency of having query in this class //this.query = slq; this.sd = sd; LOG.info("Configuring environment for scheduled operation..."); // Create ScheduleTask for every stage Set<Stage> stages = sd.getStages(); LOG.info("Physical plan with {} stages", stages.size()); for(Stage s : stages) { ScheduleTask st = ScheduleTask.buildTaskFor(id, s, sd); st.setUp(); scheduleTasks.put(s, st); } } public void scheduleTask(int stageId, Map<Integer, Set<DataReference>> input, Map<Integer, Set<DataReference>> output, List<Integer> rankedDatasets) { Stage s = sd.getStageWithId(stageId); ScheduleTask task = this.scheduleTasks.get(s); LOG.info("Scheduling Stage:Task -> {}:{}", s.getStageId(), task.getEuId()); drm.updateRankedDatasets(rankedDatasets); // TODO: Decide when to trigger the enforcement policy (and how to do it in parallel) // TODO: fix this, how useful is to configure this? Map<Integer, ConnectionType> connTypeInformation = new HashMap<>(); for(Integer i : input.keySet()) { connTypeInformation.put(i, ConnectionType.ONE_AT_A_TIME); } coreInput = CoreInputFactory.buildCoreInputFor(wc, drm, input, connTypeInformation); if(output.size() == 0) { // FIXME: // FIXME: output should arrive from scheduler, that knows about downstream. // if 0 then it means we do not have output // FIXME: at the very least is should come with the indexed downstream ids // then DataReference can be created here if any difficulty of doing so at master Schema expectedSchema = input.entrySet().iterator().next().getValue().iterator().next().getDataStore().getSchema(); // FIXME: assumption, same schema as input -> will change once SINKs have also schemas output = createOutputForTask(s, expectedSchema); } coreOutput = CoreOutputFactory.buildCoreOutputFor(wc, drm, output); // Make sure that NetworkSelector is listening for input connections // FIXME: Note this is not reusable!! Can we make NetworkSelector a service rather than a // configure on-demand thing? if (coreInput.requiresConfigureSelectorOfType(DataStoreType.NETWORK)) { /** NetworkSelector ns = DataStoreSelectorFactory.configureNetworkSelector(coreInput, wc, stageId, myIp, wc.getInt(WorkerConfig.DATA_PORT)); ns.initSelector(); ns.startSelector(); **/ } if(coreInput.requiresConfigureSelectorOfType(DataStoreType.FILE) || coreOutput.requiresConfigureSelectorOfType(DataStoreType.FILE)) { FileSelector fsel = DataStoreSelectorFactory.maybeConfigureFileSelector(coreInput, coreOutput, wc, null, myIp, wc.getInt(WorkerConfig.DATA_PORT)); fsel.initSelector(); fsel.startSelector(); } // Request (possibly) remote chunks in case of scheduling a shuffled stage if(s.hasPartitionedState()) { // We pass our info---as the target EndPoint of the comm---and the // workers will push their data to us coreInput.requestInputConnections(comm, k, myIp); } SeepState state = null; for(Set<DataReference> inputdrset : input.values()) { for (DataReference inputdr : inputdrset) { LOG.info("Stage {} input Dataset {}", stageId, inputdr.getId()); } } for(Set<DataReference> outputdrset : output.values()) { for (DataReference outputdr : outputdrset) { LOG.info("Stage {} output Dataset {}", stageId, outputdr.getId()); } } // probably pass to the callback here all info to talk with master ProcessingEngine engine = ProcessingEngineFactory.buildComposedTaskProcessingEngine(wc, s.getStageId(), task, state, coreInput, coreOutput, makeConductorCallbackForScheduleStage(stageId, id, output)); engine.start(); } private Map<Integer, Set<DataReference>> createOutputForTask(Stage s, Schema schema) { // Master did not assign output, so we need to create it here // Althouth output is indexed on an integer, this is for compatibility with // downstream interfaces. It will always be the stageId Map<Integer, Set<DataReference>> output = new HashMap<>(); if(s.hasDependantWithPartitionedStage()) { // create a DR per partition, that are managed // TODO: how to get the number of partitions int numPartitions = wc.getInt(WorkerConfig.SHUFFLE_NUM_PARTITIONS); int outputId = s.getStageId(); Set<DataReference> drefs = new HashSet<>(); // TODO: create a DR per partition and assign the partitionSeqId for(int i = 0; i < numPartitions; i++) { DataStore dataStore = new DataStore(schema, DataStoreType.IN_MEMORY); ControlEndPoint cep = new ControlEndPoint(id, wc.getString(WorkerConfig.WORKER_IP), wc.getInt(WorkerConfig.CONTROL_PORT)); DataReference dr = null; int partitionId = i; dr = DataReference.makeManagedAndPartitionedDataReference(dataStore, cep, ServeMode.STORE, partitionId); drefs.add(dr); } output.put(outputId, drefs); } else { // create a single DR, that is managed int outputId = s.getStageId(); Set<DataReference> drefs = new HashSet<>(); DataStore dataStore = new DataStore(schema, DataStoreType.IN_MEMORY); ControlEndPoint cep = new ControlEndPoint(id, wc.getString(WorkerConfig.WORKER_IP), wc.getInt(WorkerConfig.CONTROL_PORT)); DataReference dr = null; // TODO: is this enough? if(s.getStageType().equals(StageType.SINK_STAGE)) { dr = DataReference.makeSinkExternalDataReference(dataStore); } else { dr = DataReference.makeManagedDataReference(dataStore, cep, ServeMode.STORE); } drefs.add(dr); output.put(outputId, drefs); } return output; } public void startProcessing(){ LOG.info("Starting processing engine..."); for(DataStoreSelector dss : dataStoreSelectors) { dss.startSelector(); } engine.start(); } public void stopProcessing(){ LOG.info("Stopping processing engine..."); engine.stop(); for(OBuffer output: coreOutput.getBuffers().values()) { output.flush(); } for(DataStoreSelector dss : dataStoreSelectors) { dss.stopSelector(); } LOG.info("Stopping processing engine...OK"); } private Map<Integer, ConnectionType> getInputConnectionType(LogicalOperator o) { Map<Integer, ConnectionType> ct = new HashMap<>(); for(UpstreamConnection uc : o.upstreamConnections()) { ct.put(uc.getStreamId(), uc.getConnectionType()); } return ct; } private int getOpIdLivingInThisEU(int id) { for(Entry<Integer, ControlEndPoint> entry : mapping.entrySet()) { if(entry.getValue().getId() == id) return entry.getKey(); } return -1; } public ConductorCallback makeContinuousConductorCallback() { return new ConductorCallback(true); } public ConductorCallback makeConductorCallbackForScheduleStage(int stageId, int euId, Map<Integer, Set<DataReference>> output) { return new ConductorCallback(false, stageId, euId, output); } class ConductorCallback { private boolean continuousTask; private int stageId; private int euId; private Map<Integer, Set<DataReference>> refToProducedOutput; private ConductorCallback(boolean continuousTask) { this.continuousTask = continuousTask; } private ConductorCallback(boolean continuousTask, int stageId, int euId, Map<Integer, Set<DataReference>> output) { this.continuousTask = continuousTask; this.stageId = stageId; this.euId = euId; this.refToProducedOutput = output; } public boolean isContinuousTask() { return continuousTask; } public void notifyOk(List<RuntimeEvent> runtimeEvents) { masterApi.scheduleTaskStatus(masterConn, stageId, euId, Status.OK, refToProducedOutput, runtimeEvents, drm.getManagedDatasetsMetadata(coreInput.getAllDataReferences())); } } // FIXME: refactor, check where to place this method, along with the entire communication with datarefmanager // FIXME: do we need a separate entity for this? public void serveData(int dataRefId, DataEndPoint ep) { try { registerFlag.await(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Make sure DRM manages this DataReferenceId DataReference dr = drm.doesManageDataReference(dataRefId); if (dr == null) { // FIXME: error LOG.error("DataRefernece is null!!!"); System.exit(-1); } drm.serveDataSet(coreOutput, dr, ep); } }