package uk.ac.imperial.lsds.seepmaster.query;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.imperial.lsds.seep.api.DataReference;
import uk.ac.imperial.lsds.seep.api.DataReference.ServeMode;
import uk.ac.imperial.lsds.seep.api.DataStore;
import uk.ac.imperial.lsds.seep.api.operator.DownstreamConnection;
import uk.ac.imperial.lsds.seep.api.operator.LogicalOperator;
import uk.ac.imperial.lsds.seep.api.operator.Operator;
import uk.ac.imperial.lsds.seep.api.operator.SeepLogicalQuery;
import uk.ac.imperial.lsds.seep.api.operator.UpstreamConnection;
import uk.ac.imperial.lsds.seep.api.operator.sinks.MarkerSink;
import uk.ac.imperial.lsds.seep.comm.Comm;
import uk.ac.imperial.lsds.seep.comm.Connection;
import uk.ac.imperial.lsds.seep.comm.protocol.MasterWorkerCommand;
import uk.ac.imperial.lsds.seep.comm.protocol.ProtocolCommandFactory;
import uk.ac.imperial.lsds.seep.comm.protocol.SeepCommand;
import uk.ac.imperial.lsds.seep.comm.serialization.KryoFactory;
import uk.ac.imperial.lsds.seep.infrastructure.ControlEndPoint;
import uk.ac.imperial.lsds.seep.infrastructure.DataEndPoint;
import uk.ac.imperial.lsds.seep.infrastructure.SeepEndPoint;
import uk.ac.imperial.lsds.seep.util.Utils;
import uk.ac.imperial.lsds.seepmaster.LifecycleManager;
import uk.ac.imperial.lsds.seepmaster.MasterConfig;
import uk.ac.imperial.lsds.seepmaster.infrastructure.master.ExecutionUnit;
import uk.ac.imperial.lsds.seepmaster.infrastructure.master.InfrastructureManager;
import com.esotericsoftware.kryo.Kryo;
public class MaterializedQueryManager implements QueryManager {
final private Logger LOG = LoggerFactory.getLogger(MaterializedQueryManager.class);
private MasterConfig mc;
private static MaterializedQueryManager qm;
private LifecycleManager lifeManager;
private SeepLogicalQuery slq;
private int executionUnitsRequiredToStart;
private InfrastructureManager inf;
private Map<Integer, ControlEndPoint> opToEndpointMapping;
private final Comm comm;
private final Kryo k;
// Query information
private String pathToQueryJar;
private String definitionClassName;
private String[] queryArgs;
private String composeMethodName;
private short queryType;
// convenience method for testing
public static MaterializedQueryManager buildTestMaterializedQueryManager(SeepLogicalQuery lsq,
InfrastructureManager inf, Map<Integer, ControlEndPoint> mapOpToEndPoint, Comm comm) {
return new MaterializedQueryManager(lsq, inf, mapOpToEndPoint, comm);
}
private MaterializedQueryManager(SeepLogicalQuery lsq, InfrastructureManager inf,
Map<Integer, ControlEndPoint> opToEndpointMapping, Comm comm) {
this.slq = lsq;
this.executionUnitsRequiredToStart = this.computeRequiredExecutionUnits(lsq);
this.inf = inf;
this.opToEndpointMapping = opToEndpointMapping;
this.comm = comm;
this.k = KryoFactory.buildKryoForProtocolCommands(this.getClass().getClassLoader());
}
private MaterializedQueryManager(InfrastructureManager inf, Map<Integer, ControlEndPoint> mapOpToEndPoint,
Comm comm, LifecycleManager lifeManager, MasterConfig mc) {
this.inf = inf;
this.opToEndpointMapping = mapOpToEndPoint;
this.comm = comm;
this.lifeManager = lifeManager;
this.k = KryoFactory.buildKryoForProtocolCommands(this.getClass().getClassLoader());
this.mc = mc;
}
public static MaterializedQueryManager getInstance(InfrastructureManager inf, Map<Integer, ControlEndPoint> mapOpToEndPoint,
Comm comm, LifecycleManager lifeManager, MasterConfig mc) {
if(qm == null){
return new MaterializedQueryManager(inf, mapOpToEndPoint, comm, lifeManager, mc);
}
else{
return qm;
}
}
private boolean canStartExecution() {
return inf.executionUnitsAvailable() >= executionUnitsRequiredToStart;
}
@Override
public boolean loadQueryFromParameter(short queryType, SeepLogicalQuery slq, String pathToQueryJar, String definitionClass,
String[] queryArgs, String composeMethod) {
boolean allowed = lifeManager.canTransitTo(LifecycleManager.AppStatus.QUERY_SUBMITTED);
if(!allowed){
LOG.error("Attempt to violate application lifecycle");
return false;
}
this.slq = slq;
this.queryType = queryType;
this.pathToQueryJar = pathToQueryJar;
this.definitionClassName = definitionClass;
this.queryArgs = queryArgs;
this.composeMethodName = composeMethod;
LOG.debug("Logical query loaded: {}", slq.toString());
this.executionUnitsRequiredToStart = this.computeRequiredExecutionUnits(slq);
LOG.info("New query requires: {} units to start execution", this.executionUnitsRequiredToStart);
lifeManager.tryTransitTo(LifecycleManager.AppStatus.QUERY_SUBMITTED);
return true;
}
@Override
public boolean loadQueryFromFile(short queryType, String pathToQueryJar, String definitionClass, String[] queryArgs, String composeMethod) {
boolean allowed = lifeManager.canTransitTo(LifecycleManager.AppStatus.QUERY_SUBMITTED);
if(!allowed){
LOG.error("Attempt to violate application lifecycle");
return false;
}
this.pathToQueryJar = pathToQueryJar;
// get logical query
this.slq = Utils.executeComposeFromQuery(pathToQueryJar, definitionClass, queryArgs, "compose");
LOG.debug("Logical query loaded: {}", slq.toString());
this.executionUnitsRequiredToStart = this.computeRequiredExecutionUnits(slq);
LOG.info("New query requires: {} units to start execution", this.executionUnitsRequiredToStart);
lifeManager.tryTransitTo(LifecycleManager.AppStatus.QUERY_SUBMITTED);
return true;
}
@Override
public boolean deployQueryToNodes() {
boolean allowed = lifeManager.canTransitTo(LifecycleManager.AppStatus.QUERY_DEPLOYED);
if(!allowed){
LOG.error("Attempt to violate application lifecycle");
return false;
}
// Check whether there are sufficient execution units to deploy query
if(!canStartExecution()){
LOG.warn("Cannot deploy query, not enough nodes. Required: {}, available: {}"
, executionUnitsRequiredToStart, inf.executionUnitsAvailable());
return false;
}
// Build mapping for logicalquery
if(this.opToEndpointMapping != null){
LOG.info("Using provided mapping for logicalQuery...");
// TODO: do this
}
else {
LOG.info("Building mapping for logicalQuery...");
this.opToEndpointMapping = createMappingOfOperatorWithEndPoint(slq);
}
// Materialize all DataReference once there exists a mapping
Map<Integer, Map<Integer, Set<DataReference>>> outputs = generateOutputDataReferences(slq, opToEndpointMapping);
Map<Integer, Map<Integer, Set<DataReference>>> inputs = generateInputDataReferences(slq, outputs);
LOG.debug("Mapping for logicalQuery...OK {}", Utils.printMap(opToEndpointMapping));
Set<Integer> involvedEUId = getInvolvedEuIdIn(opToEndpointMapping.values());
Set<Connection> connections = inf.getConnectionsTo(involvedEUId);
sendQueryToNodes(connections, definitionClassName, queryArgs, composeMethodName);
sendMaterializeTaskToNodes(connections, this.opToEndpointMapping, inputs, outputs);
lifeManager.tryTransitTo(LifecycleManager.AppStatus.QUERY_DEPLOYED);
return true;
}
private Set<Integer> getInvolvedEuIdIn(Collection<ControlEndPoint> values) {
Set<Integer> involvedEUs = new HashSet<>();
for(ControlEndPoint ep : values) {
involvedEUs.add(ep.getId());
}
return involvedEUs;
}
private Map<Integer, Map<Integer, Set<DataReference>>> generateInputDataReferences(SeepLogicalQuery slq, Map<Integer, Map<Integer, Set<DataReference>>> outputs) {
Map<Integer, Map<Integer, Set<DataReference>>> inputs = new HashMap<>();
for(LogicalOperator lo : slq.getAllOperators()) {
int opId = lo.getOperatorId();
Map<Integer, Set<DataReference>> input = new HashMap<>();
for(UpstreamConnection uc : lo.upstreamConnections()) {
int streamId = uc.getStreamId();
// Find all DataReferences that produce to this streamId filter by upstream operator
Operator upstreamOp = uc.getUpstreamOperator();
if(upstreamOp != null) {
int upstreamOpId = upstreamOp.getOperatorId();
for(Entry<Integer, Set<DataReference>> produces : outputs.get(upstreamOpId).entrySet()) {
if(produces.getKey() == streamId) {
if(! input.containsKey(streamId)) {
input.put(streamId, new HashSet<>());
}
input.get(streamId).addAll(produces.getValue());
}
}
}
else {
// This can occur when sources simply mark data origin. In this case we can create the
// DataReference directly
DataReference dRef = DataReference.makeExternalDataReference(uc.getDataStore());
// Then we add the DataReferences
if(! input.containsKey(streamId)) {
input.put(streamId, new HashSet<>());
}
input.get(streamId).add(dRef);
}
}
inputs.put(opId, input);
}
return inputs;
}
private Map<Integer, Map<Integer, Set<DataReference>>> generateOutputDataReferences(SeepLogicalQuery slq, Map<Integer, ControlEndPoint> mapping) {
Map<Integer, Map<Integer, Set<DataReference>>> outputs = new HashMap<>();
// Generate per operator the dataReferences it produces
for(LogicalOperator lo : slq.getAllOperators()) {
Map<Integer, Set<DataReference>> output = new HashMap<>();
int opId = lo.getOperatorId();
ControlEndPoint ep = mapping.get(opId);
// One dataReference per downstream, group by streamId
for(DownstreamConnection dc : lo.downstreamConnections()) {
DataStore dataStore = dc.getExpectedDataStoreOfDownstream();
DataReference dref = null;
if(dc.getDownstreamOperator() instanceof MarkerSink) {
dref = DataReference.makeSinkExternalDataReference(dataStore);
}
else {
dref = DataReference.makeManagedDataReferenceWithOwner(opId, dataStore, ep, ServeMode.STREAM);
}
int streamId = dc.getStreamId();
if(! output.containsKey(streamId)) {
output.put(streamId, new HashSet<>());
}
output.get(streamId).add(dref);
}
outputs.put(opId, output);
}
return outputs;
}
@Override
public boolean startQuery() {
boolean allowed = lifeManager.canTransitTo(LifecycleManager.AppStatus.QUERY_RUNNING);
if(!allowed){
LOG.error("Attempt to violate application lifecycle");
return false;
}
// TODO: take a look at the following two lines. Stateless is good to keep everything lean. Yet consider caching
Set<Integer> involvedEUId = getInvolvedEuIdIn(opToEndpointMapping.values());
Set<Connection> connections = inf.getConnectionsTo(involvedEUId);
// Send start query command
SeepCommand start = ProtocolCommandFactory.buildStartQueryCommand();
comm.send_object_sync(start, connections, k);
lifeManager.tryTransitTo(LifecycleManager.AppStatus.QUERY_RUNNING);
return true;
}
@Override
public boolean stopQuery() {
boolean allowed = lifeManager.canTransitTo(LifecycleManager.AppStatus.QUERY_STOPPED);
if(!allowed){
LOG.error("Attempt to violate application lifecycle");
return false;
}
// TODO: take a look at the following two lines. Stateless is good to keep everything lean. Yet consider caching
Set<Integer> involvedEUId = getInvolvedEuIdIn(opToEndpointMapping.values());
Set<Connection> connections = inf.getConnectionsTo(involvedEUId);
// Send start query command
SeepCommand stop = ProtocolCommandFactory.buildStopQueryCommand();
comm.send_object_sync(stop, connections, k);
lifeManager.tryTransitTo(LifecycleManager.AppStatus.QUERY_STOPPED);
return true;
}
public Map<Integer, ControlEndPoint> createMappingOfOperatorWithEndPoint(SeepLogicalQuery slq) {
Map<Integer, ControlEndPoint> mapping = new HashMap<>();
for(LogicalOperator lso : slq.getAllOperators()){
int opId = lso.getOperatorId();
ExecutionUnit eu = inf.getExecutionUnit();
ControlEndPoint ep = eu.getControlEndPoint();
LOG.debug("LogicalOperator: {} will run on: {} -> ({})", opId, ep.getId(), ep.getIp().toString());
mapping.put(opId, ep);
}
return mapping;
}
private int computeRequiredExecutionUnits(SeepLogicalQuery lsq) {
return lsq.getAllOperators().size();
}
private void sendQueryToNodes(Set<Connection> connections, String definitionClassName, String[] queryArgs, String composeMethodName) {
// Send data file to nodes
byte[] queryFile = Utils.readDataFromFile(pathToQueryJar);
LOG.info("Sending query file of size: {} bytes", queryFile.length);
SeepCommand code = ProtocolCommandFactory.buildCodeCommand(queryType, queryFile, definitionClassName, queryArgs, composeMethodName);
comm.send_object_sync(code, connections, k);
LOG.info("Sending query file...DONE!");
}
private void sendMaterializeTaskToNodes(
Set<Connection> connections,
Map<Integer, ControlEndPoint> mapping,
Map<Integer, Map<Integer, Set<DataReference>>> inputs,
Map<Integer, Map<Integer, Set<DataReference>>> outputs) {
LOG.info("Sending materialize task command to nodes...");
SeepCommand materializeCommand = ProtocolCommandFactory.buildMaterializeTaskCommand(mapping, inputs, outputs);
comm.send_object_sync(materializeCommand, connections, k);
LOG.info("Sending materialize task command to nodes...OK");
}
}