package edu.washington.escience.myria.parallel;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.net.BindException;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.inject.Inject;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.lang.text.StrSubstitutor;
import org.apache.reef.tang.Configuration;
import org.apache.reef.tang.Injector;
import org.apache.reef.tang.Tang;
import org.apache.reef.tang.annotations.Parameter;
import org.apache.reef.tang.exceptions.InjectionException;
import org.apache.reef.tang.formats.AvroConfigurationSerializer;
import org.apache.reef.tang.formats.ConfigurationSerializer;
import org.apache.reef.task.TaskMessage;
import org.apache.reef.task.TaskMessageSource;
import org.apache.reef.task.events.DriverMessage;
import org.apache.reef.util.Optional;
import org.apache.reef.wake.EventHandler;
import org.jboss.netty.channel.ChannelFactory;
import org.jboss.netty.channel.ChannelPipelineFactory;
import org.jboss.netty.channel.group.ChannelGroupFuture;
import org.jboss.netty.channel.group.ChannelGroupFutureListener;
import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory;
import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
import org.jboss.netty.handler.execution.OrderedMemoryAwareThreadPoolExecutor;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Joiner;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.primitives.Ints;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.Striped;
import com.google.protobuf.InvalidProtocolBufferException;
import edu.washington.escience.myria.CsvTupleWriter;
import edu.washington.escience.myria.DbException;
import edu.washington.escience.myria.MyriaConstants;
import edu.washington.escience.myria.MyriaConstants.FunctionLanguage;
import edu.washington.escience.myria.PostgresBinaryTupleWriter;
import edu.washington.escience.myria.RelationKey;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.TupleWriter;
import edu.washington.escience.myria.Type;
import edu.washington.escience.myria.accessmethod.AccessMethod.IndexRef;
import edu.washington.escience.myria.api.MyriaJsonMapperProvider;
import edu.washington.escience.myria.api.encoding.DatasetStatus;
import edu.washington.escience.myria.api.encoding.FunctionStatus;
import edu.washington.escience.myria.api.encoding.QueryEncoding;
import edu.washington.escience.myria.coordinator.CatalogException;
import edu.washington.escience.myria.coordinator.MasterCatalog;
import edu.washington.escience.myria.expression.Expression;
import edu.washington.escience.myria.expression.MinusExpression;
import edu.washington.escience.myria.expression.VariableExpression;
import edu.washington.escience.myria.expression.WorkerIdExpression;
import edu.washington.escience.myria.io.AmazonS3Source;
import edu.washington.escience.myria.io.ByteSink;
import edu.washington.escience.myria.io.DataSink;
import edu.washington.escience.myria.io.UriSink;
import edu.washington.escience.myria.operator.Apply;
import edu.washington.escience.myria.operator.CSVFileScanFragment;
import edu.washington.escience.myria.operator.DbCreateFunction;
import edu.washington.escience.myria.operator.DbCreateIndex;
import edu.washington.escience.myria.operator.DbCreateView;
import edu.washington.escience.myria.operator.DbDelete;
import edu.washington.escience.myria.operator.DbExecute;
import edu.washington.escience.myria.operator.DbInsert;
import edu.washington.escience.myria.operator.DbQueryScan;
import edu.washington.escience.myria.operator.DuplicateTBGenerator;
import edu.washington.escience.myria.operator.EOSSource;
import edu.washington.escience.myria.operator.EmptyRelation;
import edu.washington.escience.myria.operator.EmptySink;
import edu.washington.escience.myria.operator.Operator;
import edu.washington.escience.myria.operator.RootOperator;
import edu.washington.escience.myria.operator.TupleSink;
import edu.washington.escience.myria.operator.agg.Aggregate;
import edu.washington.escience.myria.operator.agg.PrimitiveAggregator.AggregationOp;
import edu.washington.escience.myria.operator.agg.PrimitiveAggregatorFactory;
import edu.washington.escience.myria.operator.network.CollectProducer;
import edu.washington.escience.myria.operator.network.Consumer;
import edu.washington.escience.myria.operator.network.GenericShuffleProducer;
import edu.washington.escience.myria.operator.network.distribute.BroadcastDistributeFunction;
import edu.washington.escience.myria.operator.network.distribute.DistributeFunction;
import edu.washington.escience.myria.operator.network.distribute.HowDistributed;
import edu.washington.escience.myria.parallel.ipc.IPCConnectionPool;
import edu.washington.escience.myria.parallel.ipc.IPCMessage;
import edu.washington.escience.myria.parallel.ipc.InJVMLoopbackChannelSink;
import edu.washington.escience.myria.parallel.ipc.QueueBasedShortMessageProcessor;
import edu.washington.escience.myria.perfenforce.PerfEnforceDriver;
import edu.washington.escience.myria.proto.ControlProto.ControlMessage;
import edu.washington.escience.myria.proto.QueryProto.QueryMessage;
import edu.washington.escience.myria.proto.QueryProto.QueryReport;
import edu.washington.escience.myria.proto.TransportProto.TransportMessage;
import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.storage.TupleBatchBuffer;
import edu.washington.escience.myria.storage.TupleBuffer;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.DefaultInstancePath;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.FlowControlWriteBufferHighMarkBytes;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.FlowControlWriteBufferLowMarkBytes;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.MasterHost;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.MasterRpcPort;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.OperatorInputBufferCapacity;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.OperatorInputBufferRecoverTrigger;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.PersistUri;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.StorageDbms;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.TcpConnectionTimeoutMillis;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.TcpReceiveBufferSizeBytes;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.TcpSendBufferSizeBytes;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.WorkerConf;
import edu.washington.escience.myria.tools.MyriaWorkerConfigurationModule;
import edu.washington.escience.myria.util.IPCUtils;
import edu.washington.escience.myria.util.concurrent.ErrorLoggingTimerTask;
import edu.washington.escience.myria.util.concurrent.RenamingThreadFactory;
/**
* The master entrance.
*/
public final class Server implements TaskMessageSource, EventHandler<DriverMessage> {
/** Master message processor. */
private final class MessageProcessor implements Runnable {
/** Constructor, set the thread name. */
public MessageProcessor() {
super();
}
@Override
public void run() {
TERMINATE_MESSAGE_PROCESSING:
while (true) {
try {
IPCMessage.Data<TransportMessage> mw = null;
try {
mw = messageQueue.take();
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
break TERMINATE_MESSAGE_PROCESSING;
}
final TransportMessage m = mw.getPayload();
final int senderID = mw.getRemoteID();
switch (m.getType()) {
case CONTROL:
final ControlMessage controlM = m.getControlMessage();
switch (controlM.getType()) {
case RESOURCE_STATS:
queryManager.updateResourceStats(senderID, controlM);
break;
default:
LOGGER.error("Unexpected control message received at master: {}", controlM);
break;
}
break;
case QUERY:
final QueryMessage qm = m.getQueryMessage();
final SubQueryId subQueryId = new SubQueryId(qm.getQueryId(), qm.getSubqueryId());
switch (qm.getType()) {
case QUERY_READY_TO_EXECUTE:
LOGGER.info("Worker #{} is ready to execute query #{}.", senderID, subQueryId);
queryManager.workerReady(subQueryId, senderID);
break;
case QUERY_COMPLETE:
QueryReport qr = qm.getQueryReport();
if (qr.getSuccess()) {
LOGGER.info(
"Worker #{} succeeded in executing query #{}.", senderID, subQueryId);
queryManager.workerComplete(subQueryId, senderID);
} else {
ObjectInputStream osis = null;
Throwable cause = null;
try {
osis =
new ObjectInputStream(
new ByteArrayInputStream(qr.getCause().toByteArray()));
cause = (Throwable) (osis.readObject());
} catch (IOException | ClassNotFoundException e) {
LOGGER.error("Error decoding failure cause", e);
}
LOGGER.error(
"Worker #{} failed in executing query #{}.", senderID, subQueryId, cause);
queryManager.workerFailed(subQueryId, senderID, cause);
}
break;
default:
LOGGER.error("Unexpected query message received at master: {}", qm);
break;
}
break;
default:
LOGGER.error("Unknown short message received at master: {}", m.getType());
break;
}
} catch (Throwable a) {
LOGGER.error("Error occured in master message processor.", a);
if (a instanceof Error) {
throw a;
}
if (a instanceof InterruptedException) {
Thread.currentThread().interrupt();
break TERMINATE_MESSAGE_PROCESSING;
}
}
}
}
}
private final Queue<TaskMessage> pendingDriverMessages = new ConcurrentLinkedQueue<>();
private Optional<TaskMessage> dequeueDriverMessage() {
return Optional.ofNullable(pendingDriverMessages.poll());
}
private void enqueueDriverMessage(@Nonnull final TransportMessage msg) {
final TaskMessage driverMsg =
TaskMessage.from(MyriaConstants.MASTER_ID + "", msg.toByteArray());
pendingDriverMessages.add(driverMsg);
}
/* (non-Javadoc)
* @see org.apache.reef.task.TaskMessageSource#getMessage() To be used to instruct the driver to launch or abort
* workers. */
@Override
public Optional<TaskMessage> getMessage() {
// TODO: determine which messages should be sent to the driver
return dequeueDriverMessage();
}
private Striped<Lock> workerAddRemoveLock;
/** REEF event handler for driver messages indicating worker failure. */
@Override
public void onNext(final DriverMessage driverMessage) {
LOGGER.info("Driver message received");
TransportMessage m;
try {
m = TransportMessage.parseFrom(driverMessage.get().get());
} catch (InvalidProtocolBufferException e) {
LOGGER.warn("Could not parse TransportMessage from driver message", e);
return;
}
final ControlMessage controlM = m.getControlMessage();
LOGGER.info("Control message received: {}", controlM);
// We received a failed worker message from the driver.
final int workerId = controlM.getWorkerId();
Lock workerLock = workerAddRemoveLock.get(workerId);
workerLock.lock();
try {
switch (controlM.getType()) {
case REMOVE_WORKER:
{
LOGGER.info(
"Driver reported worker {} as dead, removing from alive workers.", workerId);
aliveWorkers.remove(workerId);
queryManager.workerDied(workerId);
connectionPool
.removeRemote(workerId)
.addListener(
new ChannelGroupFutureListener() {
@Override
public void operationComplete(final ChannelGroupFuture future) {
if (future.isCompleteSuccess()) {
LOGGER.info(
"removed connection for remote worker {} from connection pool",
workerId);
} else {
LOGGER.info(
"failed to remove connection for remote worker {} from connection pool",
workerId);
}
}
});
enqueueDriverMessage(IPCUtils.removeWorkerAckTM(workerId));
}
break;
case ADD_WORKER:
{
Preconditions.checkState(!aliveWorkers.contains(workerId));
LOGGER.info("Driver wants to add worker {} to alive workers.", workerId);
connectionPool.putRemote(
workerId, SocketInfo.fromProtobuf(controlM.getRemoteAddress()));
queryManager.workerRestarted(
workerId, ImmutableSet.copyOf(controlM.getAckedWorkerIdsList()));
aliveWorkers.add(workerId);
enqueueDriverMessage(IPCUtils.addWorkerAckTM(workerId));
}
break;
default:
throw new IllegalStateException(
"Unexpected driver control message type: " + controlM.getType());
}
} finally {
workerLock.unlock();
}
}
/** The usage message for this server. */
static final String USAGE = "Usage: Server catalogFile [-explain] [-f queryFile]";
/** The logger for this class. */
private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(Server.class);
/** Initial worker list. */
private ImmutableMap<Integer, SocketInfo> workers = null;
/** Manages the queries executing in this instance of Myria. */
private QueryManager queryManager = null;
/** @return the query manager. */
public QueryManager getQueryManager() {
return queryManager;
}
/** Current alive worker set. */
private final Set<Integer> aliveWorkers;
/** Execution environment variables for operators. */
private final ConcurrentHashMap<String, Object> execEnvVars;
/**
* All message queue.
*
* @TODO remove this queue as in {@link Worker}s.
*/
private final LinkedBlockingQueue<IPCMessage.Data<TransportMessage>> messageQueue;
/** The IPC Connection Pool. */
private IPCConnectionPool connectionPool;
/** {@link ExecutorService} for message processing. */
private volatile ExecutorService messageProcessingExecutor;
/** The Catalog stores the metadata about the Myria instance. */
private MasterCatalog catalog;
/**
* The {@link OrderedMemoryAwareThreadPoolExecutor} who gets messages from {@link workerExecutor} and further process
* them using application specific message handlers, e.g. {@link MasterShortMessageProcessor}.
*/
private volatile OrderedMemoryAwareThreadPoolExecutor ipcPipelineExecutor;
/** The {@link ExecutorService} who executes the master-side subqueries. */
private volatile ExecutorService serverQueryExecutor;
/** Absolute path of the directory containing the master catalog files */
private final String catalogPath;
/** The URI to persist relations */
private final String persistURI;
/** @return the query executor used in this worker. */
ExecutorService getQueryExecutor() {
return serverQueryExecutor;
}
/** max number of seconds for elegant cleanup. */
public static final int NUM_SECONDS_FOR_ELEGANT_CLEANUP = 10;
/** @return my connection pool for IPC. */
IPCConnectionPool getIPCConnectionPool() {
return connectionPool;
}
/** @return my pipeline executor. */
OrderedMemoryAwareThreadPoolExecutor getPipelineExecutor() {
return ipcPipelineExecutor;
}
/** The socket info for the master. */
private final SocketInfo masterSocketInfo;
/** The PerfEnforceDriver */
private PerfEnforceDriver perfEnforceDriver;
/**
* @return my execution environment variables for init of operators.
*/
ConcurrentHashMap<String, Object> getExecEnvVars() {
return execEnvVars;
}
/** @return execution mode. */
QueryExecutionMode getExecutionMode() {
return QueryExecutionMode.NON_BLOCKING;
}
private final String instancePath;
private final int connectTimeoutMillis;
private final int sendBufferSize;
private final int receiveBufferSize;
private final int writeBufferLowWaterMark;
private final int writeBufferHighWaterMark;
private final int inputBufferCapacity;
private final int inputBufferRecoverTrigger;
private final Injector injector;
/**
* Construct a server object, with configuration stored in the specified catalog file.
*
* @param masterHost hostname of the master
* @param masterPort RPC port of the master
* @param instancePath absolute path of the directory containing the master catalog files
* @param databaseSystem name of the storage DB system
* @param connectTimeoutMillis connect timeout for worker IPC
* @param sendBufferSize send buffer size in bytes for worker IPC
* @param receiveBufferSize receive buffer size in bytes for worker IPC
* @param writeBufferLowWaterMark low watermark for write buffer overflow recovery
* @param writeBufferHighWaterMark high watermark for write buffer overflow recovery
* @param inputBufferCapacity size of the input buffer in bytes
* @param inputBufferRecoverTrigger number of bytes in the input buffer to trigger recovery after overflow
* @param persistURI the storage endpoint URI for persisting partitioned relations
* @param injector a Tang injector for instantiating objects from configuration
*/
@Inject
public Server(
@Parameter(MasterHost.class) final String masterHost,
@Parameter(MasterRpcPort.class) final int masterPort,
@Parameter(DefaultInstancePath.class) final String instancePath,
@Parameter(StorageDbms.class) final String databaseSystem,
@Parameter(TcpConnectionTimeoutMillis.class) final int connectTimeoutMillis,
@Parameter(TcpSendBufferSizeBytes.class) final int sendBufferSize,
@Parameter(TcpReceiveBufferSizeBytes.class) final int receiveBufferSize,
@Parameter(FlowControlWriteBufferLowMarkBytes.class) final int writeBufferLowWaterMark,
@Parameter(FlowControlWriteBufferHighMarkBytes.class) final int writeBufferHighWaterMark,
@Parameter(OperatorInputBufferCapacity.class) final int inputBufferCapacity,
@Parameter(OperatorInputBufferRecoverTrigger.class) final int inputBufferRecoverTrigger,
@Parameter(PersistUri.class) final String persistURI,
final Injector injector) {
this.instancePath = instancePath;
this.connectTimeoutMillis = connectTimeoutMillis;
this.sendBufferSize = sendBufferSize;
this.receiveBufferSize = receiveBufferSize;
this.writeBufferLowWaterMark = writeBufferLowWaterMark;
this.writeBufferHighWaterMark = writeBufferHighWaterMark;
this.inputBufferCapacity = inputBufferCapacity;
this.inputBufferRecoverTrigger = inputBufferRecoverTrigger;
this.persistURI = persistURI;
this.injector = injector;
masterSocketInfo = new SocketInfo(masterHost, masterPort);
this.catalogPath = instancePath;
execEnvVars = new ConcurrentHashMap<>();
execEnvVars.put(MyriaConstants.EXEC_ENV_VAR_NODE_ID, MyriaConstants.MASTER_ID);
execEnvVars.put(MyriaConstants.EXEC_ENV_VAR_EXECUTION_MODE, getExecutionMode());
execEnvVars.put(MyriaConstants.EXEC_ENV_VAR_DATABASE_SYSTEM, databaseSystem);
aliveWorkers = Sets.newConcurrentHashSet();
messageQueue = new LinkedBlockingQueue<>();
}
/** timer task executor. */
private ScheduledExecutorService scheduledTaskExecutor;
/** This class presents only for the purpose of debugging. No other usage. */
private class DebugHelper extends ErrorLoggingTimerTask {
/** Interval of execution. */
public static final int INTERVAL = MyriaConstants.WAITING_INTERVAL_1_SECOND_IN_MS;
@Override
public final synchronized void runInner() {
System.currentTimeMillis();
}
}
private ImmutableSet<Configuration> getWorkerConfs(final Injector injector)
throws InjectionException, BindException, IOException {
final ImmutableSet.Builder<Configuration> workerConfsBuilder = new ImmutableSet.Builder<>();
final Set<String> serializedWorkerConfs = injector.getNamedInstance(WorkerConf.class);
final ConfigurationSerializer serializer = new AvroConfigurationSerializer();
for (final String serializedWorkerConf : serializedWorkerConfs) {
final Configuration workerConf = serializer.fromString(serializedWorkerConf);
workerConfsBuilder.add(workerConf);
}
return workerConfsBuilder.build();
}
private static Integer getIdFromWorkerConf(final Configuration workerConf)
throws InjectionException {
final Injector injector = Tang.Factory.getTang().newInjector(workerConf);
return injector.getNamedInstance(MyriaWorkerConfigurationModule.WorkerId.class);
}
private static String getHostFromWorkerConf(final Configuration workerConf)
throws InjectionException {
final Injector injector = Tang.Factory.getTang().newInjector(workerConf);
return injector.getNamedInstance(MyriaWorkerConfigurationModule.WorkerHost.class);
}
private static Integer getPortFromWorkerConf(final Configuration workerConf)
throws InjectionException {
final Injector injector = Tang.Factory.getTang().newInjector(workerConf);
return injector.getNamedInstance(MyriaWorkerConfigurationModule.WorkerPort.class);
}
/** Master cleanup. */
private void cleanup() {
LOGGER.info("{} is going to shutdown", MyriaConstants.SYSTEM_NAME);
queryManager.killAll();
if (messageProcessingExecutor != null && !messageProcessingExecutor.isShutdown()) {
messageProcessingExecutor.shutdownNow();
}
if (scheduledTaskExecutor != null && !scheduledTaskExecutor.isShutdown()) {
scheduledTaskExecutor.shutdownNow();
}
/* Close the catalog before shutting down the IPC because there may be Catalog jobs pending that were triggered by
* IPC events. */
catalog.close();
connectionPool.shutdown();
connectionPool.releaseExternalResources();
if (ipcPipelineExecutor != null && !ipcPipelineExecutor.isShutdown()) {
ipcPipelineExecutor.shutdown();
}
LOGGER.info("Master connection pool shutdown complete.");
LOGGER.info("Master finishes cleanup.");
}
/** Shutdown the master. */
public void shutdown() {
cleanup();
}
/**
* Start all the threads that do work for the server.
*
* @throws Exception if any error occurs.
*/
public void start() throws Exception {
LOGGER.info("Server starting on {}", masterSocketInfo);
final ImmutableSet<Configuration> workerConfs = getWorkerConfs(injector);
final ImmutableMap.Builder<Integer, SocketInfo> workersBuilder = ImmutableMap.builder();
for (Configuration workerConf : workerConfs) {
workersBuilder.put(
getIdFromWorkerConf(workerConf),
new SocketInfo(getHostFromWorkerConf(workerConf), getPortFromWorkerConf(workerConf)));
}
workers = workersBuilder.build();
// aliveWorkers.addAll(workers.keySet());
workerAddRemoveLock = Striped.lock(workers.size());
final Map<Integer, SocketInfo> computingUnits = new HashMap<>(workers);
computingUnits.put(MyriaConstants.MASTER_ID, masterSocketInfo);
try {
LOGGER.info("Attempting to open master catalog file under {}...", catalogPath);
catalog = MasterCatalog.open(catalogPath);
} catch (FileNotFoundException e) {
LOGGER.info(
"Failed to open master catalog file under {}, attempting to create it...\n({})",
catalogPath,
e.getMessage());
catalog = MasterCatalog.create(catalogPath);
}
queryManager = new QueryManager(catalog, this);
connectionPool =
new IPCConnectionPool(
MyriaConstants.MASTER_ID,
computingUnits,
IPCConfigurations.createMasterIPCServerBootstrap(
connectTimeoutMillis,
sendBufferSize,
receiveBufferSize,
writeBufferLowWaterMark,
writeBufferHighWaterMark),
IPCConfigurations.createMasterIPCClientBootstrap(
connectTimeoutMillis,
sendBufferSize,
receiveBufferSize,
writeBufferLowWaterMark,
writeBufferHighWaterMark),
new TransportMessageSerializer(),
new QueueBasedShortMessageProcessor<TransportMessage>(messageQueue),
inputBufferCapacity,
inputBufferRecoverTrigger);
scheduledTaskExecutor =
Executors.newSingleThreadScheduledExecutor(
new RenamingThreadFactory("Master global timer"));
scheduledTaskExecutor.scheduleAtFixedRate(
new DebugHelper(), DebugHelper.INTERVAL, DebugHelper.INTERVAL, TimeUnit.MILLISECONDS);
messageProcessingExecutor =
Executors.newCachedThreadPool(new RenamingThreadFactory("Master message processor"));
serverQueryExecutor =
Executors.newCachedThreadPool(new RenamingThreadFactory("Master query executor"));
/** The {@link Executor} who deals with IPC connection setup/cleanup. */
ExecutorService ipcBossExecutor =
Executors.newCachedThreadPool(new RenamingThreadFactory("Master IPC boss"));
/** The {@link Executor} who deals with IPC message delivering and transformation. */
ExecutorService ipcWorkerExecutor =
Executors.newCachedThreadPool(new RenamingThreadFactory("Master IPC worker"));
ipcPipelineExecutor = null; // Remove the pipeline executor.
// new
// OrderedMemoryAwareThreadPoolExecutor(Runtime.getRuntime().availableProcessors()
// * 2 + 1,
// 5 * MyriaConstants.MB, 0,
// MyriaConstants.THREAD_POOL_KEEP_ALIVE_TIME_IN_MS,
// TimeUnit.MILLISECONDS,
// new RenamingThreadFactory("Master Pipeline executor"));
/** The {@link ChannelFactory} for creating client side connections. */
ChannelFactory clientChannelFactory =
new NioClientSocketChannelFactory(
ipcBossExecutor, ipcWorkerExecutor, Runtime.getRuntime().availableProcessors() * 2 + 1);
/** The {@link ChannelFactory} for creating server side accepted connections. */
ChannelFactory serverChannelFactory =
new NioServerSocketChannelFactory(
ipcBossExecutor, ipcWorkerExecutor, Runtime.getRuntime().availableProcessors() * 2 + 1);
// Start server with Nb of active threads = 2*NB CPU + 1 as maximum.
ChannelPipelineFactory serverPipelineFactory =
new IPCPipelineFactories.MasterServerPipelineFactory(connectionPool, getPipelineExecutor());
ChannelPipelineFactory clientPipelineFactory =
new IPCPipelineFactories.MasterClientPipelineFactory(connectionPool, getPipelineExecutor());
ChannelPipelineFactory masterInJVMPipelineFactory =
new IPCPipelineFactories.MasterInJVMPipelineFactory(connectionPool);
connectionPool.start(
serverChannelFactory,
serverPipelineFactory,
clientChannelFactory,
clientPipelineFactory,
masterInJVMPipelineFactory,
new InJVMLoopbackChannelSink());
messageProcessingExecutor.submit(new MessageProcessor());
LOGGER.info("Server started on {}", masterSocketInfo);
if (getDBMS().equals(MyriaConstants.STORAGE_SYSTEM_POSTGRESQL)) {
final List<Integer> workerIds = ImmutableList.copyOf(workers.keySet());
addRelationToCatalog(
MyriaConstants.EVENT_PROFILING_RELATION,
MyriaConstants.EVENT_PROFILING_SCHEMA,
workerIds,
false);
addRelationToCatalog(
MyriaConstants.SENT_PROFILING_RELATION,
MyriaConstants.SENT_PROFILING_SCHEMA,
workerIds,
false);
addRelationToCatalog(
MyriaConstants.RESOURCE_PROFILING_RELATION,
MyriaConstants.RESOURCE_PROFILING_SCHEMA,
workerIds,
false);
addRelationToCatalog(
MyriaConstants.PYUDF_RELATION, MyriaConstants.PYUDF_SCHEMA, workerIds, false);
}
perfEnforceDriver = new PerfEnforceDriver(this, instancePath);
}
/**
* Manually add a relation to the catalog.
*
* @param relationKey the relation to add
* @param schema the schema of the relation to add
* @param workers the workers that have the relation
* @param force force add the relation; will replace an existing entry.
* @throws DbException if the catalog cannot be accessed
*/
private void addRelationToCatalog(
final RelationKey relationKey,
final Schema schema,
final List<Integer> workers,
final boolean force)
throws DbException {
try {
if (!force && getSchema(relationKey) != null) {
return;
}
QueryEncoding query = new QueryEncoding();
query.rawQuery = String.format("Add %s to catalog", relationKey);
query.logicalRa = query.rawQuery;
query.fragments = ImmutableList.of();
long queryId = catalog.newQuery(query);
final Query queryState =
new Query(
queryId,
query,
new SubQuery(
new SubQueryPlan(new EmptySink(new EOSSource())),
new HashMap<Integer, SubQueryPlan>()),
this);
queryState.markSuccess();
catalog.queryFinished(queryState);
Map<RelationKey, RelationWriteMetadata> relation = new HashMap<>();
RelationWriteMetadata meta = new RelationWriteMetadata(relationKey, schema, true, false);
for (Integer worker : workers) {
meta.addWorker(worker);
}
relation.put(relationKey, meta);
catalog.updateRelationMetadata(relation, new SubQueryId(queryId, 0));
} catch (CatalogException e) {
throw new DbException(e);
}
}
/** @return the dbms from {@link #execEnvVars}. */
public String getDBMS() {
return (String) execEnvVars.get(MyriaConstants.EXEC_ENV_VAR_DATABASE_SYSTEM);
}
/**
* Can be only used in test.
*
* @return true if the query plan is accepted and scheduled for execution.
* @param masterRoot the root operator of the master plan
* @param workerRoots the roots of the worker part of the plan, {workerID -> RootOperator[]}
* @throws DbException if any error occurs.
* @throws CatalogException catalog errors.
*/
public QueryFuture submitQueryPlan(
final RootOperator masterRoot, final Map<Integer, RootOperator[]> workerRoots)
throws DbException, CatalogException {
String catalogInfoPlaceHolder = "MasterPlan: " + masterRoot + "; WorkerPlan: " + workerRoots;
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Entry<Integer, RootOperator[]> entry : workerRoots.entrySet()) {
workerPlans.put(entry.getKey(), new SubQueryPlan(entry.getValue()));
}
return queryManager.submitQuery(
catalogInfoPlaceHolder,
catalogInfoPlaceHolder,
catalogInfoPlaceHolder,
new SubQueryPlan(masterRoot),
workerPlans);
}
/** @return the set of workers that are currently alive. */
public Set<Integer> getAliveWorkers() {
return ImmutableSet.copyOf(aliveWorkers);
}
/**
* Return a random subset of workers.
*
* @param number the number of alive workers returned
* @return a subset of workers that are currently alive.
*/
public Set<Integer> getRandomWorkers(final int number) {
Preconditions.checkArgument(
number <= getAliveWorkers().size(),
"The number of workers requested cannot exceed the number of alive workers.");
if (number == getAliveWorkers().size()) {
return getAliveWorkers();
}
List<Integer> workerList = new ArrayList<>(getAliveWorkers());
Collections.shuffle(workerList);
return ImmutableSet.copyOf(workerList.subList(0, number));
}
/** @return the set of known workers in this Master. */
public Map<Integer, SocketInfo> getWorkers() {
return workers;
}
/**
* Ingest the given dataset.
*
* @param relationKey the name of the dataset.
* @param workersToIngest restrict the workers to ingest data (null for all)
* @param indexes the indexes created.
* @param source the source of tuples to be ingested.
* @param df the distribute function.
* @return the status of the ingested dataset.
* @throws InterruptedException interrupted
* @throws DbException if there is an error
*/
public DatasetStatus ingestDataset(
final RelationKey relationKey,
List<Integer> workersToIngest,
final List<List<IndexRef>> indexes,
final Operator source,
final DistributeFunction df)
throws InterruptedException, DbException {
/* Figure out the workers we will use. If workersToIngest is null, use all active workers. */
if (workersToIngest == null) {
workersToIngest = ImmutableList.copyOf(getAliveWorkers());
}
int[] workersArray = Ints.toArray(workersToIngest);
Preconditions.checkArgument(workersArray.length > 0, "Must use > 0 workers");
/* The master plan: send the tuples out. */
ExchangePairID scatterId = ExchangePairID.newID();
df.setDestinations(workersArray.length, 1);
GenericShuffleProducer scatter =
new GenericShuffleProducer(source, new ExchangePairID[] {scatterId}, workersArray, df);
/* The workers' plan */
Consumer gather =
new Consumer(source.getSchema(), scatterId, ImmutableSet.of(MyriaConstants.MASTER_ID));
DbInsert insert = new DbInsert(gather, relationKey, true, indexes);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : workersArray) {
workerPlans.put(workerId, new SubQueryPlan(insert));
}
ListenableFuture<Query> qf;
try {
qf =
queryManager.submitQuery(
"ingest " + relationKey.toString(),
"ingest " + relationKey.toString(),
"ingest " + relationKey.toString(getDBMS()),
new SubQueryPlan(scatter),
workerPlans);
} catch (CatalogException e) {
throw new DbException("Error submitting query", e);
}
try {
qf.get();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
// updating the partition function only after it's successfully ingested.
updateHowDistributed(relationKey, new HowDistributed(df, workersArray));
return getDatasetStatus(relationKey);
}
/**
* Parallel Ingest
*
* @param relationKey the name of the dataset.
* @param workersToIngest restrict the workers to ingest data (null for all)
* @throws URIException
* @throws DbException
* @throws InterruptedException
*/
public DatasetStatus parallelIngestDataset(
final RelationKey relationKey,
final Schema schema,
@Nullable final Character delimiter,
@Nullable final Character quote,
@Nullable final Character escape,
@Nullable final Integer numberOfSkippedLines,
final AmazonS3Source s3Source,
final Set<Integer> workersToIngest,
final DistributeFunction distributeFunction)
throws URIException, DbException, InterruptedException {
long fileSize = s3Source.getFileSize();
Set<Integer> potentialWorkers = MoreObjects.firstNonNull(workersToIngest, getAliveWorkers());
/* Select a subset of workers */
int[] workersArray = parallelIngestComputeNumWorkers(fileSize, potentialWorkers);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (int workerID = 1; workerID <= workersArray.length; workerID++) {
CSVFileScanFragment scanFragment =
new CSVFileScanFragment(
s3Source, schema, workersArray, delimiter, quote, escape, numberOfSkippedLines);
workerPlans.put(
workersArray[workerID - 1],
new SubQueryPlan(new DbInsert(scanFragment, relationKey, true)));
}
ListenableFuture<Query> qf;
try {
qf =
queryManager.submitQuery(
"ingest " + relationKey.toString(),
"ingest " + relationKey.toString(),
"ingest " + relationKey.toString(getDBMS()),
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
} catch (CatalogException e) {
throw new DbException("Error submitting query", e);
}
try {
qf.get();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
updateHowDistributed(relationKey, new HowDistributed(distributeFunction, workersArray));
return getDatasetStatus(relationKey);
}
/**
* Helper method for parallel ingest.
*
* @param fileSize the size of the file to ingest
* @param allWorkers all workers considered for ingest
*/
public int[] parallelIngestComputeNumWorkers(long fileSize, Set<Integer> allWorkers) {
/* Determine the number of workers to ingest based on partition size */
int totalNumberOfWorkersToIngest = 0;
for (int i = allWorkers.size(); i >= 1; i--) {
totalNumberOfWorkersToIngest = i;
long currentPartitionSize = fileSize / i;
if (currentPartitionSize > MyriaConstants.PARALLEL_INGEST_WORKER_MINIMUM_PARTITION_SIZE) {
break;
}
}
int[] workersArray = new int[allWorkers.size()];
int wCounter = 0;
for (Integer w : allWorkers) {
workersArray[wCounter] = w;
wCounter++;
}
Arrays.sort(workersArray);
workersArray = Arrays.copyOfRange(workersArray, 0, totalNumberOfWorkersToIngest);
return workersArray;
}
/**
* @param relationKey the relationalKey of the dataset to import
* @param schema the schema of the dataset to import
* @param workersToImportFrom the set of workers
* @throws DbException if there is an error
* @throws InterruptedException interrupted
*/
public void addDatasetToCatalog(
final RelationKey relationKey, final Schema schema, final List<Integer> workersToImportFrom)
throws DbException, InterruptedException {
/* Figure out the workers we will use. If workersToImportFrom is null, use all active workers. */
List<Integer> actualWorkers = workersToImportFrom;
if (workersToImportFrom == null) {
actualWorkers = ImmutableList.copyOf(getWorkers().keySet());
}
addRelationToCatalog(relationKey, schema, workersToImportFrom, true);
try {
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : actualWorkers) {
workerPlans.put(
workerId, new SubQueryPlan(new DbInsert(EmptyRelation.of(schema), relationKey, false)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"add to catalog " + relationKey.toString(),
"add to catalog " + relationKey.toString(),
"add to catalog " + relationKey.toString(getDBMS()),
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
try {
qf.get();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param relationKey the relationKey of the dataset to delete
* @return the status
* @throws DbException if there is an error
* @throws InterruptedException interrupted
*/
public void deleteDataset(final RelationKey relationKey)
throws DbException, InterruptedException {
/* Mark the relation as is_deleted */
try {
catalog.markRelationDeleted(relationKey);
} catch (CatalogException e) {
throw new DbException(e);
}
/* Delete from postgres at each worker by calling the DbDelete operator */
try {
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : getWorkersForRelation(relationKey)) {
workerPlans.put(
workerId,
new SubQueryPlan(
new DbDelete(EmptyRelation.of(catalog.getSchema(relationKey)), relationKey, null)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"delete " + relationKey.toString(),
"delete " + relationKey.toString(),
"deleting from " + relationKey.toString(getDBMS()),
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
try {
qf.get();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
} catch (CatalogException e) {
throw new DbException(e);
}
/* Deleting from the catalog */
try {
catalog.deleteRelationFromCatalog(relationKey);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/** Create indexes and add the metadata to the catalog */
public long addIndexesToRelation(
final RelationKey relationKey, final Schema schema, final List<IndexRef> indexes)
throws DbException, InterruptedException {
long queryID;
/* Add indexes to relations */
try {
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : getWorkersForRelation(relationKey)) {
workerPlans.put(
workerId,
new SubQueryPlan(
new DbCreateIndex(
EmptyRelation.of(catalog.getSchema(relationKey)),
relationKey,
schema,
indexes,
null)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"add indexes to " + relationKey.toString(),
"add indexes to " + relationKey.toString(),
"add indexes to " + relationKey.toString(getDBMS()),
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
try {
queryID = qf.get().getQueryId();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
} catch (CatalogException e) {
throw new DbException(e);
}
/* Add index to catalog */
try {
catalog.markIndexesInCatalog(relationKey, indexes);
} catch (CatalogException e) {
throw new DbException(e);
}
return queryID;
}
/** Create a view */
public long createView(
final String viewName, final String viewDefinition, final Set<Integer> workers)
throws DbException, InterruptedException {
long queryID;
Set<Integer> actualWorkers = workers;
if (workers == null) {
actualWorkers = getWorkers().keySet();
}
/* Create the view */
try {
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : actualWorkers) {
workerPlans.put(
workerId,
new SubQueryPlan(
new DbCreateView(
EmptyRelation.of(Schema.EMPTY_SCHEMA), viewName, viewDefinition, false, null)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"create view",
"create view",
"create view",
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
try {
queryID = qf.get().getQueryId();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
} catch (CatalogException e) {
throw new DbException(e);
}
return queryID;
}
/**
* Create a materialized view
* @param viewName the name of the view
* @param viewDefinition the sql text for the view
* @param workers the workers creating the view
* @return the queryID for the view creation query
*/
public long createMaterializedView(
final String viewName, final String viewDefinition, final Set<Integer> workers)
throws DbException, InterruptedException {
long queryID;
Set<Integer> actualWorkers = workers;
if (workers == null) {
actualWorkers = getWorkers().keySet();
}
/* Create the view */
try {
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : actualWorkers) {
workerPlans.put(
workerId,
new SubQueryPlan(
new DbCreateView(
EmptyRelation.of(Schema.EMPTY_SCHEMA), viewName, viewDefinition, true, null)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"create materialized view",
"create materialized view",
"create materialized view",
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
try {
queryID = qf.get().getQueryId();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
} catch (CatalogException e) {
throw new DbException(e);
}
return queryID;
}
/**
* Create a function and register it in the catalog
*
* @param name the name of the function
* @param definition the function definition - this is postgres specific for postgres and function text for python.
* @param outputType the output schema of the function
* @param isMultiValued indicates if the function returns multiple tuples.
* @param lang this is the language of the function.
* @param binary this is an optional parameter for function for base64 encoded binary for function.
* @param workers list of workers on which the function is registered: default is all.
* @return the status of the function
*/
public long createFunction(
final String name,
final String definition,
final String outputType,
final Boolean isMultiValued,
final FunctionLanguage lang,
final String binary,
final Set<Integer> workers)
throws DbException, InterruptedException {
long queryID = 0;
Set<Integer> actualWorkers = workers;
if (workers == null) {
actualWorkers = getWorkers().keySet();
}
try {
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : actualWorkers) {
workerPlans.put(
workerId,
new SubQueryPlan(
new DbCreateFunction(
EmptyRelation.of(Schema.EMPTY_SCHEMA),
name,
definition,
outputType,
isMultiValued,
lang,
binary)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"create function",
"create function",
"create function",
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
try {
queryID = qf.get().getQueryId();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e);
}
} catch (CatalogException e) {
throw new DbException(e);
}
/* Register the function to the catalog don't send the binary. */
try {
catalog.registerFunction(name, definition, outputType, isMultiValued, lang);
} catch (CatalogException e) {
throw new DbException(e);
}
return queryID;
}
/**
* @return list of functions from the catalog
* @throws DbException in case of error.
*/
public List<String> getFunctions() throws DbException {
try {
return catalog.getFunctions();
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param functionName : name of the function to retrieve.
* @return functiondetails for the function
* @throws DbException in case of error.
*/
public FunctionStatus getFunctionDetails(final String functionName) throws DbException {
try {
return catalog.getFunctionStatus(functionName);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param relationKey the relationKey of the dataset to persist
* @return the queryID
* @throws DbException if there is an error
* @throws InterruptedException interrupted
*/
public long persistDataset(final RelationKey relationKey)
throws DbException, InterruptedException, URISyntaxException {
long queryID;
/* Mark the relation as is_persistent */
try {
catalog.markRelationPersistent(relationKey);
} catch (CatalogException e) {
throw new DbException(e);
}
/* Create the query plan for persist */
try {
ImmutableMap.Builder<Integer, SubQueryPlan> workerPlans =
new ImmutableMap.Builder<Integer, SubQueryPlan>();
for (Integer workerId : getWorkersForRelation(relationKey)) {
String partitionName =
String.format(
persistURI + "/myria-system/partition-%s/%s/%s/%s",
workerId,
relationKey.getUserName(),
relationKey.getProgramName(),
relationKey.getRelationName());
DataSink workerSink = new UriSink(partitionName);
workerPlans.put(
workerId,
new SubQueryPlan(
new TupleSink(
new DbQueryScan(relationKey, getSchema(relationKey)),
new PostgresBinaryTupleWriter(),
workerSink)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"persist " + relationKey.toString(),
"persist " + relationKey.toString(),
"persisting from " + relationKey.toString(getDBMS()),
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans.build());
try {
queryID = qf.get().getQueryId();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
} catch (CatalogException e) {
throw new DbException(e);
}
return queryID;
}
/**
* Directly runs a command on the underlying database based on the selected workers
*
* @param sqlString command to run on the database
* @param workers the workers that will run the command
*/
public void executeSQLStatement(final String sqlString, final Set<Integer> workers)
throws DbException, InterruptedException {
/* Execute the SQL command on the set of workers */
try {
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer workerId : workers) {
workerPlans.put(
workerId,
new SubQueryPlan(
new DbExecute(EmptyRelation.of(Schema.EMPTY_SCHEMA), sqlString, null)));
}
ListenableFuture<Query> qf =
queryManager.submitQuery(
"sql execute " + sqlString,
"sql execute " + sqlString,
"sql execute " + sqlString,
new SubQueryPlan(new EmptySink(new EOSSource())),
workerPlans);
try {
qf.get();
} catch (ExecutionException e) {
throw new DbException("Error executing query", e.getCause());
}
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* Directly runs a command on the underlying database based on the selected workers
* and returns the tuple results through a string array
*
* @param sqlString command to run on the database
* @param outputSchema the schema of the output result
* @param workers the workers that will run the command
* @return the resulting tuples from the SQL statement
*/
public String[] executeSQLStatement(
final String sqlString, final Schema outputSchema, final Set<Integer> workers)
throws DbException {
ByteSink byteSink = new ByteSink();
TupleWriter writer = new CsvTupleWriter();
DbQueryScan scan = new DbQueryScan(sqlString, outputSchema);
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>();
for (Integer w : workers) {
workerPlans.put(w, workerPlan);
}
final Consumer consumer = new Consumer(outputSchema, operatorId, workers);
TupleSink output = new TupleSink(consumer, writer, byteSink, false);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
String planString = "execute sql statement : " + sqlString;
try {
queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans).get();
} catch (Exception e) {
throw new DbException();
}
byte[] responseBytes;
try {
responseBytes = ((ByteArrayOutputStream) byteSink.getOutputStream()).toByteArray();
} catch (IOException e) {
throw new DbException();
}
String response = new String(responseBytes, Charset.forName("UTF-8"));
String[] tuples = response.split("\r\n");
return tuples;
}
/**
* @param relationKey the key of the desired relation.
* @return the schema of the specified relation, or null if not found.
* @throws CatalogException if there is an error getting the Schema out of the catalog.
*/
public Schema getSchema(final RelationKey relationKey) throws CatalogException {
if (relationKey.isTemp()) {
return queryManager.getQuery(relationKey.tempRelationQueryId()).getTempSchema(relationKey);
}
return catalog.getSchema(relationKey);
}
/**
* @param key the relation key.
* @param howPartitioned how the dataset was partitioned.
*/
public void updateHowDistributed(final RelationKey key, final HowDistributed howDistributed)
throws DbException {
try {
catalog.updateHowDistributed(key, howDistributed);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param relationKey the key of the desired temp relation.
* @throws CatalogException if there is an error accessing the catalog.
* @return the set of workers that store the specified relation.
*/
public @Nonnull Set<Integer> getWorkersForRelation(@Nonnull final RelationKey relationKey)
throws CatalogException {
if (relationKey.isTemp()) {
return queryManager
.getQuery(relationKey.tempRelationQueryId())
.getWorkersForTempRelation(relationKey);
} else {
return catalog.getWorkersForRelationKey(relationKey);
}
}
/** @return the socket info for the master. */
protected SocketInfo getSocketInfo() {
return masterSocketInfo;
}
/**
* @return A list of datasets in the system.
* @throws DbException if there is an error accessing the desired Schema.
*/
public List<DatasetStatus> getDatasets() throws DbException {
try {
return catalog.getDatasets();
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* Get the metadata about a relation.
*
* @param relationKey specified which relation to get the metadata about.
* @return the metadata of the specified relation.
* @throws DbException if there is an error getting the status.
*/
public DatasetStatus getDatasetStatus(final RelationKey relationKey) throws DbException {
try {
return catalog.getDatasetStatus(relationKey);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param searchTerm the search term
* @return the relations that match the search term
* @throws DbException if there is an error getting the relation keys.
*/
public List<RelationKey> getMatchingRelationKeys(final String searchTerm) throws DbException {
try {
return catalog.getMatchingRelationKeys(searchTerm);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param userName the user whose datasets we want to access.
* @return a list of datasets belonging to the specified user.
* @throws DbException if there is an error accessing the Catalog.
*/
public List<DatasetStatus> getDatasetsForUser(final String userName) throws DbException {
try {
return catalog.getDatasetsForUser(userName);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param userName the user whose datasets we want to access.
* @param programName the program by that user whose datasets we want to access.
* @return a list of datasets belonging to the specified program.
* @throws DbException if there is an error accessing the Catalog.
*/
public List<DatasetStatus> getDatasetsForProgram(final String userName, final String programName)
throws DbException {
try {
return catalog.getDatasetsForProgram(userName, programName);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param queryId the id of the query.
* @return a list of datasets belonging to the specified program.
* @throws DbException if there is an error accessing the Catalog.
*/
public List<DatasetStatus> getDatasetsForQuery(final int queryId) throws DbException {
try {
return catalog.getDatasetsForQuery(queryId);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @return the maximum query id that matches the search.
* @param searchTerm a token to match against the raw queries. If null, all queries match.
* @throws CatalogException if an error occurs
*/
public long getMaxQuery(final String searchTerm) throws CatalogException {
return catalog.getMaxQuery(searchTerm);
}
/**
* @return the minimum query id that matches the search.
* @param searchTerm a token to match against the raw queries. If null, all queries match.
* @throws CatalogException if an error occurs
*/
public long getMinQuery(final String searchTerm) throws CatalogException {
return catalog.getMinQuery(searchTerm);
}
/**
* Start a query that streams tuples from the specified relation to the specified {@link TupleWriter}.
*
* @param relationKey the relation to be downloaded.
* @param writer the {@link TupleWriter} which will serialize the tuples.
* @param dataSink the {@link DataSink} for the tuple destination
* @return the query future from which the query status can be looked up.
* @throws DbException if there is an error in the system.
*/
public ListenableFuture<Query> startDataStream(
final RelationKey relationKey, final TupleWriter writer, final DataSink dataSink)
throws DbException {
/* Get the relation's schema, to make sure it exists. */
final Schema schema;
try {
schema = catalog.getSchema(relationKey);
} catch (CatalogException e) {
throw new DbException(e);
}
Preconditions.checkArgument(schema != null, "relation %s was not found", relationKey);
/* Get the workers that store it. */
Set<Integer> scanWorkers;
try {
scanWorkers = getWorkersForRelation(relationKey);
} catch (CatalogException e) {
throw new DbException(e);
}
/* If relation is broadcast, pick random worker to scan. */
DistributeFunction df = getDatasetStatus(relationKey).getHowDistributed().getDf();
if (df instanceof BroadcastDistributeFunction) {
scanWorkers = ImmutableSet.of(scanWorkers.iterator().next());
}
/* Construct the operators that go elsewhere. */
DbQueryScan scan = new DbQueryScan(relationKey, schema);
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(scanWorkers.size());
for (Integer worker : scanWorkers) {
workerPlans.put(worker, workerPlan);
}
/* Construct the master plan. */
final Consumer consumer = new Consumer(schema, operatorId, ImmutableSet.copyOf(scanWorkers));
TupleSink output = new TupleSink(consumer, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString = "download " + relationKey.toString();
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* Start a query that streams tuples from the specified relation to the specified {@link TupleWriter}.
*
* @param numTB the number of {@link TupleBatch}es to download from each worker.
* @param writer the {@link TupleWriter} which will serialize the tuples.
* @param dataSink the {@link DataSink} for the tuple destination
* @return the query future from which the query status can be looked up.
* @throws DbException if there is an error in the system.
*/
public ListenableFuture<Query> startTestDataStream(
final int numTB, final TupleWriter writer, final DataSink dataSink) throws DbException {
final Schema schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.STRING_TYPE), ImmutableList.of("id", "name"));
Random r = new Random();
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
for (int i = 0; i < tbb.getBatchSize(); i++) {
tbb.putLong(0, r.nextLong());
tbb.putString(1, new java.util.Date().toString());
}
TupleBatch tb = tbb.popAny();
final DuplicateTBGenerator scanTable = new DuplicateTBGenerator(tb, numTB);
/* Get the workers that store it. */
Set<Integer> scanWorkers = getAliveWorkers();
/* Construct the operators that go elsewhere. */
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer = new CollectProducer(scanTable, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(scanWorkers.size());
for (Integer worker : scanWorkers) {
workerPlans.put(worker, workerPlan);
}
/* Construct the master plan. */
final Consumer consumer = new Consumer(schema, operatorId, ImmutableSet.copyOf(scanWorkers));
TupleSink output = new TupleSink(consumer, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString = "download test";
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param subqueryId the subquery id.
* @param fragmentId the fragment id to return data for. All fragments, if < 0.
* @param writer writer to get data.
* @param dataSink the {@link DataSink} for the tuple destination
* @return profiling logs for the query.
* @throws DbException if there is an error when accessing profiling logs.
*/
public ListenableFuture<Query> startSentLogDataStream(
final SubQueryId subqueryId,
final long fragmentId,
final TupleWriter writer,
final DataSink dataSink)
throws DbException {
Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId);
String fragmentWhere = "";
if (fragmentId >= 0) {
fragmentWhere = "AND \"fragmentId\" = " + fragmentId;
}
final Schema schema =
Schema.ofFields(
"fragmentId", Type.INT_TYPE, "destWorker", Type.INT_TYPE, "numTuples", Type.LONG_TYPE);
String sentQueryString =
Joiner.on(' ')
.join(
"SELECT \"fragmentId\", \"destWorkerId\", sum(\"numTuples\") as \"numTuples\" FROM",
MyriaConstants.SENT_PROFILING_RELATION.toString(getDBMS()),
"WHERE \"queryId\" =",
subqueryId.getQueryId(),
"AND \"subQueryId\" =",
subqueryId.getSubqueryId(),
fragmentWhere,
"GROUP BY \"fragmentId\", \"destWorkerId\"");
DbQueryScan scan = new DbQueryScan(sentQueryString, schema);
final ExchangePairID operatorId = ExchangePairID.newID();
ImmutableList.Builder<Expression> emitExpressions = ImmutableList.builder();
emitExpressions.add(new Expression("workerId", new WorkerIdExpression()));
for (int column = 0; column < schema.numColumns(); column++) {
VariableExpression copy = new VariableExpression(column);
emitExpressions.add(new Expression(schema.getColumnName(column), copy));
}
Apply addWorkerId = new Apply(scan, emitExpressions.build());
CollectProducer producer =
new CollectProducer(addWorkerId, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size());
for (Integer worker : actualWorkers) {
workerPlans.put(worker, workerPlan);
}
final Consumer consumer =
new Consumer(addWorkerId.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers));
final Aggregate aggregate =
new Aggregate(
consumer, new int[] {0, 1, 2}, new PrimitiveAggregatorFactory(3, AggregationOp.SUM));
// rename columns
ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder();
renameExpressions.add(new Expression("src", new VariableExpression(0)));
renameExpressions.add(new Expression("fragmentId", new VariableExpression(1)));
renameExpressions.add(new Expression("dest", new VariableExpression(2)));
renameExpressions.add(new Expression("numTuples", new VariableExpression(3)));
final Apply rename = new Apply(aggregate, renameExpressions.build());
TupleSink output = new TupleSink(rename, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString =
Joiner.on("")
.join(
"download profiling sent data for (query=",
subqueryId.getQueryId(),
", subquery=",
subqueryId.getSubqueryId(),
", fragment=",
fragmentId,
")");
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* Extracts the set of workers used in a saved, encoded physical plan.
*
* @param plan a {@link List<PlanFragmentEncoding>}, cached during execution.
* @return the set of workers used during the execution of this subquery.
*/
@Nonnull
private Set<Integer> getWorkersFromSubqueryPlan(final String plan) {
/* We need to accumulate the workers used in the plan. We could deserialize the plan as a
* List<PlanFragmentEncoding>... which it is, but for forwards and backwards compatiblity let's deserialize it as a
* List<Map<String,Object>>... which it also is. */
ObjectMapper mapper = MyriaJsonMapperProvider.getMapper();
List<Map<String, Object>> fragments;
Set<Integer> actualWorkers = Sets.newHashSet();
try {
fragments = mapper.readValue(plan, new TypeReference<List<Map<String, Object>>>() {});
int fragIdx = 0;
for (Map<String, Object> m : fragments) {
Object fragWorkers = m.get("workers");
Preconditions.checkNotNull(fragWorkers, "No workers recorded for fragment %s", fragIdx);
Preconditions.checkState(
fragWorkers instanceof Collection<?>,
"Expected fragWorkers to be a collection, instead found %s",
fragWorkers.getClass());
try {
@SuppressWarnings("unchecked")
Collection<Integer> curWorkers = (Collection<Integer>) fragWorkers;
actualWorkers.addAll(curWorkers);
} catch (ClassCastException e) {
throw new IllegalStateException(
"Expected fragWorkers to be a collection of ints, instead found " + fragWorkers);
}
}
} catch (IOException e) {
throw new IllegalArgumentException(
"Error deserializing workers from encoded plan " + plan, e);
}
/* Remove the MASTER from the set. */
actualWorkers.remove(MyriaConstants.MASTER_ID);
return actualWorkers;
}
/**
* Returns the set of workers that executed a particular subquery.
*
* @param subQueryId the subquery.
* @return the set of workers that executed a particular subquery.
* @throws DbException if there is an error in the catalog.
*/
private Set<Integer> getWorkersForSubQuery(final SubQueryId subQueryId) throws DbException {
String serializedPlan;
try {
serializedPlan = catalog.getQueryPlan(subQueryId);
} catch (CatalogException e) {
throw new DbException(e);
}
Preconditions.checkArgument(
serializedPlan != null, "No cached query plan for subquery %s", subQueryId);
return getWorkersFromSubqueryPlan(serializedPlan);
}
/**
* @param subqueryId the subquery id.
* @param writer writer to get data.
* @param dataSink the {@link DataSink} for the tuple destination
* @return profiling logs for the query.
* @throws DbException if there is an error when accessing profiling logs.
*/
public ListenableFuture<Query> startAggregatedSentLogDataStream(
final SubQueryId subqueryId, final TupleWriter writer, final DataSink dataSink)
throws DbException {
Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId);
final Schema schema =
Schema.ofFields(
"fragmentId",
Type.INT_TYPE,
"numTuples",
Type.LONG_TYPE,
"minTime",
Type.LONG_TYPE,
"maxTime",
Type.LONG_TYPE);
String sentQueryString =
Joiner.on(' ')
.join(
"SELECT \"fragmentId\", sum(\"numTuples\") as \"numTuples\", min(\"nanoTime\") as \"minTime\", max(\"nanoTime\") as \"maxTime\" FROM",
MyriaConstants.SENT_PROFILING_RELATION.toString(getDBMS()),
"WHERE \"queryId\" =",
subqueryId.getQueryId(),
"AND \"subQueryId\" =",
subqueryId.getSubqueryId(),
"GROUP BY \"fragmentId\"");
DbQueryScan scan = new DbQueryScan(sentQueryString, schema);
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size());
for (Integer worker : actualWorkers) {
workerPlans.put(worker, workerPlan);
}
final Consumer consumer =
new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers));
final Aggregate aggregate =
new Aggregate(
consumer,
new int[] {0},
new PrimitiveAggregatorFactory(1, AggregationOp.SUM),
new PrimitiveAggregatorFactory(2, AggregationOp.MIN),
new PrimitiveAggregatorFactory(3, AggregationOp.MAX));
// rename columns
ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder();
renameExpressions.add(new Expression("fragmentId", new VariableExpression(0)));
renameExpressions.add(new Expression("numTuples", new VariableExpression(1)));
renameExpressions.add(
new Expression(
"duration", new MinusExpression(new VariableExpression(3), new VariableExpression(2))));
final Apply rename = new Apply(aggregate, renameExpressions.build());
TupleSink output = new TupleSink(rename, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString =
Joiner.on("")
.join(
"download profiling aggregated sent data for (query=",
subqueryId.getQueryId(),
", subquery=",
subqueryId.getSubqueryId(),
")");
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param subqueryId the desired subquery.
* @param fragmentId the fragment id to return data for. All fragments, if < 0.
* @param start the earliest time where we need data
* @param end the latest time
* @param minSpanLength minimum length of a span to be returned
* @param onlyRootOperator only return data for root operator
* @param writer writer to get data.
* @param dataSink the {@link DataSink} for the tuple destination
* @return profiling logs for the query.
* @throws DbException if there is an error when accessing profiling logs.
*/
public QueryFuture startLogDataStream(
final SubQueryId subqueryId,
final long fragmentId,
final long start,
final long end,
final long minSpanLength,
final boolean onlyRootOperator,
final TupleWriter writer,
final DataSink dataSink)
throws DbException {
Preconditions.checkArgument(start < end, "range cannot be negative");
final Schema schema =
Schema.ofFields(
"opId",
Type.INT_TYPE,
"startTime",
Type.LONG_TYPE,
"endTime",
Type.LONG_TYPE,
"numTuples",
Type.LONG_TYPE);
Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId);
String opCondition = "";
if (onlyRootOperator) {
opCondition =
Joiner.on(' ')
.join(
"AND \"opId\" = (SELECT \"opId\" FROM",
MyriaConstants.EVENT_PROFILING_RELATION.toString(getDBMS()),
"WHERE \"fragmentId\" =",
fragmentId,
" AND \"queryId\"=",
subqueryId.getQueryId(),
"AND \"subQueryId\" =",
subqueryId.getSubqueryId(),
"ORDER BY \"startTime\" ASC LIMIT 1)");
}
String spanCondition = "";
if (minSpanLength > 0) {
spanCondition = Joiner.on(' ').join("AND \"endTime\" - \"startTime\" >", minSpanLength);
}
String queryString =
Joiner.on(' ')
.join(
"SELECT \"opId\", \"startTime\", \"endTime\", \"numTuples\" FROM",
MyriaConstants.EVENT_PROFILING_RELATION.toString(getDBMS()),
"WHERE \"fragmentId\" =",
fragmentId,
"AND \"queryId\" =",
subqueryId.getQueryId(),
"AND \"subQueryId\" =",
subqueryId.getSubqueryId(),
"AND \"endTime\" >",
start,
"AND \"startTime\" <",
end,
opCondition,
spanCondition,
"ORDER BY \"startTime\" ASC");
DbQueryScan scan = new DbQueryScan(queryString, schema);
ImmutableList.Builder<Expression> emitExpressions = ImmutableList.builder();
emitExpressions.add(new Expression("workerId", new WorkerIdExpression()));
for (int column = 0; column < schema.numColumns(); column++) {
VariableExpression copy = new VariableExpression(column);
emitExpressions.add(new Expression(schema.getColumnName(column), copy));
}
Apply addWorkerId = new Apply(scan, emitExpressions.build());
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer =
new CollectProducer(addWorkerId, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size());
for (Integer worker : actualWorkers) {
workerPlans.put(worker, workerPlan);
}
final Consumer consumer =
new Consumer(addWorkerId.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers));
TupleSink output = new TupleSink(consumer, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString =
Joiner.on("")
.join(
"download profiling data (query=",
subqueryId.getQueryId(),
", subquery=",
subqueryId.getSubqueryId(),
", fragment=",
fragmentId,
", range=[",
Joiner.on(", ").join(start, end),
"]",
")");
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/** Upper bound on the number of bins a profiler can ask for. */
private static final long MAX_BINS = 10000;
/**
* @param subqueryId subquery id.
* @param fragmentId the fragment id to return data for. All fragments, if < 0.
* @param start start of the histogram
* @param end the end of the histogram
* @param step the step size between min and max
* @param onlyRootOp return histogram only for root operator
* @param writer writer to get data.
* @param dataSink the {@link DataSink} for the tuple destination
* @return profiling logs for the query.
* @throws DbException if there is an error when accessing profiling logs.
*/
public QueryFuture startHistogramDataStream(
final SubQueryId subqueryId,
final long fragmentId,
final long start,
final long end,
final long step,
final boolean onlyRootOp,
final TupleWriter writer,
final DataSink dataSink)
throws DbException {
Preconditions.checkArgument(start < end, "range cannot be negative");
Preconditions.checkArgument(step > 0, "step has to be greater than 0");
long bins = (end - start + 1) / step;
Preconditions.checkArgument(
bins > 0 && bins <= MAX_BINS, "bins must be in the range [1, %s]", MAX_BINS);
Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId);
final Schema schema = Schema.ofFields("opId", Type.INT_TYPE, "nanoTime", Type.LONG_TYPE);
final RelationKey relationKey = MyriaConstants.EVENT_PROFILING_RELATION;
Map<String, Object> queryArgs = new HashMap<>();
queryArgs.put("QUERY", subqueryId.getQueryId());
queryArgs.put("SUBQUERY", subqueryId.getSubqueryId());
queryArgs.put("FRAGMENT", fragmentId);
queryArgs.put("START", start);
queryArgs.put("END", end);
queryArgs.put("STEP", step);
queryArgs.put("BINS", bins);
queryArgs.put("PROF_TABLE", relationKey.toString(getDBMS()));
StrSubstitutor sub;
String filterOpnameQueryString = "";
if (onlyRootOp) {
sub = new StrSubstitutor(queryArgs);
filterOpnameQueryString =
sub.replace(
"AND p.\"opId\"=(SELECT \"opId\" FROM ${PROF_TABLE} WHERE \"fragmentId\"=${FRAGMENT} AND \"queryId\"=${QUERY} AND \"subQueryId\"=${SUBQUERY} ORDER BY \"startTime\" ASC LIMIT 1)");
}
// Reinitialize the substitutor after including the opname filter.
queryArgs.put("OPNAME_FILTER", filterOpnameQueryString);
sub = new StrSubstitutor(queryArgs);
String histogramWorkerQueryString =
sub.replace(
Joiner.on("\n")
.join(
"SELECT \"opId\", ${START}::bigint+${STEP}::bigint*s.bin as \"nanoTime\"",
"FROM (",
"SELECT p.\"opId\", greatest((p.\"startTime\"-1-${START}::bigint)/${STEP}::bigint, -1) as \"startBin\", least((p.\"endTime\"+1-${START}::bigint)/${STEP}::bigint, ${BINS}) AS \"endBin\"",
"FROM ${PROF_TABLE} p",
"WHERE p.\"queryId\" = ${QUERY} and p.\"subQueryId\" = ${SUBQUERY} and p.\"fragmentId\" = ${FRAGMENT}",
"${OPNAME_FILTER}",
"AND greatest((p.\"startTime\"-${START}::bigint)/${STEP}::bigint, -1) < least((p.\"endTime\"-${START}::bigint)/${STEP}::bigint, ${BINS}) AND p.\"startTime\" < ${END}::bigint AND p.\"endTime\" >= ${START}::bigint",
") times,",
"generate_series(0, ${BINS}) AS s(bin)",
"WHERE s.bin > times.\"startBin\" and s.bin <= times.\"endBin\";"));
DbQueryScan scan = new DbQueryScan(histogramWorkerQueryString, schema);
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size());
for (Integer worker : actualWorkers) {
workerPlans.put(worker, workerPlan);
}
/* Aggregate histogram on master */
final Consumer consumer =
new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers));
// sum up the number of workers working
final Aggregate sumAggregate =
new Aggregate(
consumer, new int[] {0, 1}, new PrimitiveAggregatorFactory(1, AggregationOp.COUNT));
// rename columns
ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder();
renameExpressions.add(new Expression("opId", new VariableExpression(0)));
renameExpressions.add(new Expression("nanoTime", new VariableExpression(1)));
renameExpressions.add(new Expression("numWorkers", new VariableExpression(2)));
final Apply rename = new Apply(sumAggregate, renameExpressions.build());
TupleSink output = new TupleSink(rename, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString =
Joiner.on("")
.join(
"download profiling histogram (query=",
subqueryId.getQueryId(),
", subquery=",
subqueryId.getSubqueryId(),
", fragment=",
fragmentId,
", range=[",
Joiner.on(", ").join(start, end, step),
"]",
")");
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param subqueryId the subquery id.
* @param fragmentId the fragment id
* @param writer writer to get data
* @param dataSink the {@link DataSink} for the tuple destination
* @return profiling logs for the query.
* @throws DbException if there is an error when accessing profiling logs.
*/
public QueryFuture startRangeDataStream(
final SubQueryId subqueryId,
final long fragmentId,
final TupleWriter writer,
final DataSink dataSink)
throws DbException {
final Schema schema = Schema.ofFields("startTime", Type.LONG_TYPE, "endTime", Type.LONG_TYPE);
final RelationKey relationKey = MyriaConstants.EVENT_PROFILING_RELATION;
Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId);
String opnameQueryString =
Joiner.on(' ')
.join(
"SELECT min(\"startTime\"), max(\"endTime\") FROM",
relationKey.toString(getDBMS()),
"WHERE \"queryId\"=",
subqueryId.getQueryId(),
"AND \"subQueryId\"=",
subqueryId.getSubqueryId(),
"AND \"fragmentId\"=",
fragmentId);
DbQueryScan scan = new DbQueryScan(opnameQueryString, schema);
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size());
for (Integer worker : actualWorkers) {
workerPlans.put(worker, workerPlan);
}
/* Construct the master plan. */
final Consumer consumer =
new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers));
// Aggregate range on master
final Aggregate sumAggregate =
new Aggregate(
consumer,
new int[] {},
new PrimitiveAggregatorFactory(0, AggregationOp.MIN),
new PrimitiveAggregatorFactory(1, AggregationOp.MAX));
TupleSink output = new TupleSink(sumAggregate, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString =
Joiner.on("")
.join(
"download time range (query=",
subqueryId.getQueryId(),
", subquery=",
subqueryId.getSubqueryId(),
", fragment=",
fragmentId,
")");
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param subqueryId subquery id.
* @param fragmentId the fragment id to return data for. All fragments, if < 0.
* @param writer writer to get data.
* @param dataSink the {@link DataSink} for the tuple destination
* @return contributions for operator.
* @throws DbException if there is an error when accessing profiling logs.
*/
public QueryFuture startContributionsStream(
final SubQueryId subqueryId,
final long fragmentId,
final TupleWriter writer,
final DataSink dataSink)
throws DbException {
final Schema schema = Schema.ofFields("opId", Type.INT_TYPE, "nanoTime", Type.LONG_TYPE);
final RelationKey relationKey = MyriaConstants.EVENT_PROFILING_RELATION;
Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId);
String fragIdCondition = "";
if (fragmentId >= 0) {
fragIdCondition = "AND \"fragmentId\"=" + fragmentId;
}
String opContributionsQueryString =
Joiner.on(' ')
.join(
"SELECT \"opId\", sum(\"endTime\" - \"startTime\") FROM ",
relationKey.toString(getDBMS()),
"WHERE \"queryId\"=",
subqueryId.getQueryId(),
"AND \"subQueryId\"=",
subqueryId.getSubqueryId(),
fragIdCondition,
"GROUP BY \"opId\"");
DbQueryScan scan = new DbQueryScan(opContributionsQueryString, schema);
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size());
for (Integer worker : actualWorkers) {
workerPlans.put(worker, workerPlan);
}
/* Aggregate on master */
final Consumer consumer =
new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers));
// sum up contributions
final Aggregate sumAggregate =
new Aggregate(
consumer, new int[] {0}, new PrimitiveAggregatorFactory(1, AggregationOp.AVG));
// rename columns
ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder();
renameExpressions.add(new Expression("opId", new VariableExpression(0)));
renameExpressions.add(new Expression("nanoTime", new VariableExpression(1)));
final Apply rename = new Apply(sumAggregate, renameExpressions.build());
TupleSink output = new TupleSink(rename, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString =
Joiner.on("")
.join(
"download operator contributions (query=",
subqueryId.getQueryId(),
", subquery=",
subqueryId.getSubqueryId(),
", fragment=",
fragmentId,
")");
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* Update the {@link MasterCatalog} so that the specified relation has the specified tuple count.
*
* @param relation the relation to update
* @param count the number of tuples in that relation
* @throws DbException if there is an error in the catalog
*/
public void updateRelationTupleCount(final RelationKey relation, final long count)
throws DbException {
try {
catalog.updateRelationTupleCount(relation, count);
} catch (CatalogException e) {
throw new DbException("updating the number of tuples in the catalog", e);
}
}
/**
* Set the global variable owned by the specified query and named by the specified key to the specified value.
*
* @param queryId the query to whom the variable belongs.
* @param key the name of the variable
* @param value the new value for the variable
*/
public void setQueryGlobal(
final long queryId, @Nonnull final String key, @Nonnull final Object value) {
Preconditions.checkNotNull(key, "key");
Preconditions.checkNotNull(value, "value");
queryManager.getQuery(queryId).setGlobal(key, value);
}
/**
* Get the value of global variable owned by the specified query and named by the specified key.
*
* @param queryId the query to whom the variable belongs.
* @param key the name of the variable
* @return the value of the variable
*/
@Nullable
public Object getQueryGlobal(final long queryId, @Nonnull final String key) {
Preconditions.checkNotNull(key, "key");
return queryManager.getQuery(queryId).getGlobal(key);
}
/**
* @param queryId the query id to fetch
* @param writerOutput the output stream to write results to.
* @throws DbException if there is an error in the database.
*/
public void getResourceUsage(final long queryId, final DataSink dataSink) throws DbException {
Schema schema =
Schema.appendColumn(MyriaConstants.RESOURCE_PROFILING_SCHEMA, Type.INT_TYPE, "workerId");
try {
TupleWriter writer = new CsvTupleWriter();
TupleBuffer tb = queryManager.getResourceUsage(queryId);
if (tb != null) {
writer.open(dataSink.getOutputStream());
writer.writeColumnHeaders(schema.getColumnNames());
writer.writeTuples(tb);
writer.done();
return;
}
getResourceLog(queryId, writer, dataSink);
} catch (IOException e) {
throw new DbException(e);
}
}
/**
* @param queryId query id.
* @param writer writer to get data.
* @return resource logs for the query.
* @throws DbException if there is an error when accessing profiling logs.
*/
public ListenableFuture<Query> getResourceLog(
final long queryId, final TupleWriter writer, final DataSink dataSink) throws DbException {
SubQueryId sqId = new SubQueryId(queryId, 0);
String serializedPlan;
try {
serializedPlan = catalog.getQueryPlan(sqId);
} catch (CatalogException e) {
throw new DbException(e);
}
Preconditions.checkArgument(
serializedPlan != null, "No cached query plan for subquery %s", sqId);
Set<Integer> actualWorkers = getWorkersFromSubqueryPlan(serializedPlan);
final Schema schema = MyriaConstants.RESOURCE_PROFILING_SCHEMA;
String resourceQueryString =
Joiner.on(' ')
.join(
"SELECT * from",
MyriaConstants.RESOURCE_PROFILING_RELATION.toString(getDBMS()),
"WHERE \"queryId\" =",
queryId);
DbQueryScan scan = new DbQueryScan(resourceQueryString, schema);
ImmutableList.Builder<Expression> emitExpressions = ImmutableList.builder();
for (int column = 0; column < schema.numColumns(); column++) {
VariableExpression copy = new VariableExpression(column);
emitExpressions.add(new Expression(schema.getColumnName(column), copy));
}
emitExpressions.add(new Expression("workerId", new WorkerIdExpression()));
Apply addWorkerId = new Apply(scan, emitExpressions.build());
final ExchangePairID operatorId = ExchangePairID.newID();
CollectProducer producer =
new CollectProducer(addWorkerId, operatorId, MyriaConstants.MASTER_ID);
SubQueryPlan workerPlan = new SubQueryPlan(producer);
Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size());
for (Integer worker : actualWorkers) {
workerPlans.put(worker, workerPlan);
}
final Consumer consumer =
new Consumer(addWorkerId.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers));
TupleSink output = new TupleSink(consumer, writer, dataSink);
final SubQueryPlan masterPlan = new SubQueryPlan(output);
/* Submit the plan for the download. */
String planString = Joiner.on("").join("download resource log for (query=", queryId, ")");
try {
return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* Record the fact that this subquery executed this in the catalog.
*
* @param subQueryId the id of the subquery.
* @param encodedPlan the plan.
* @throws DbException if there is an error in the catalog.
*/
public void setQueryPlan(final SubQueryId subQueryId, @Nonnull final String encodedPlan)
throws DbException {
try {
catalog.setQueryPlan(subQueryId, encodedPlan);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/**
* @param subQueryId the query whose plan to look up.
* @return the execution plan for this query.
* @throws DbException if there is an error getting the query status.
*/
@Nullable
public String getQueryPlan(@Nonnull final SubQueryId subQueryId) throws DbException {
try {
return catalog.getQueryPlan(subQueryId);
} catch (CatalogException e) {
throw new DbException(e);
}
}
/** @return the master catalog. */
public MasterCatalog getCatalog() {
return catalog;
}
/**
* @return the perfenforce driver
*/
public PerfEnforceDriver getPerfEnforceDriver() {
return perfEnforceDriver;
}
}