package edu.washington.escience.myria.parallel; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.ObjectInputStream; import java.net.BindException; import java.net.URISyntaxException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Queue; import java.util.Random; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.inject.Inject; import org.apache.commons.httpclient.URIException; import org.apache.commons.lang.text.StrSubstitutor; import org.apache.reef.tang.Configuration; import org.apache.reef.tang.Injector; import org.apache.reef.tang.Tang; import org.apache.reef.tang.annotations.Parameter; import org.apache.reef.tang.exceptions.InjectionException; import org.apache.reef.tang.formats.AvroConfigurationSerializer; import org.apache.reef.tang.formats.ConfigurationSerializer; import org.apache.reef.task.TaskMessage; import org.apache.reef.task.TaskMessageSource; import org.apache.reef.task.events.DriverMessage; import org.apache.reef.util.Optional; import org.apache.reef.wake.EventHandler; import org.jboss.netty.channel.ChannelFactory; import org.jboss.netty.channel.ChannelPipelineFactory; import org.jboss.netty.channel.group.ChannelGroupFuture; import org.jboss.netty.channel.group.ChannelGroupFutureListener; import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory; import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; import org.jboss.netty.handler.execution.OrderedMemoryAwareThreadPoolExecutor; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.MoreObjects; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.Striped; import com.google.protobuf.InvalidProtocolBufferException; import edu.washington.escience.myria.CsvTupleWriter; import edu.washington.escience.myria.DbException; import edu.washington.escience.myria.MyriaConstants; import edu.washington.escience.myria.MyriaConstants.FunctionLanguage; import edu.washington.escience.myria.PostgresBinaryTupleWriter; import edu.washington.escience.myria.RelationKey; import edu.washington.escience.myria.Schema; import edu.washington.escience.myria.TupleWriter; import edu.washington.escience.myria.Type; import edu.washington.escience.myria.accessmethod.AccessMethod.IndexRef; import edu.washington.escience.myria.api.MyriaJsonMapperProvider; import edu.washington.escience.myria.api.encoding.DatasetStatus; import edu.washington.escience.myria.api.encoding.FunctionStatus; import edu.washington.escience.myria.api.encoding.QueryEncoding; import edu.washington.escience.myria.coordinator.CatalogException; import edu.washington.escience.myria.coordinator.MasterCatalog; import edu.washington.escience.myria.expression.Expression; import edu.washington.escience.myria.expression.MinusExpression; import edu.washington.escience.myria.expression.VariableExpression; import edu.washington.escience.myria.expression.WorkerIdExpression; import edu.washington.escience.myria.io.AmazonS3Source; import edu.washington.escience.myria.io.ByteSink; import edu.washington.escience.myria.io.DataSink; import edu.washington.escience.myria.io.UriSink; import edu.washington.escience.myria.operator.Apply; import edu.washington.escience.myria.operator.CSVFileScanFragment; import edu.washington.escience.myria.operator.DbCreateFunction; import edu.washington.escience.myria.operator.DbCreateIndex; import edu.washington.escience.myria.operator.DbCreateView; import edu.washington.escience.myria.operator.DbDelete; import edu.washington.escience.myria.operator.DbExecute; import edu.washington.escience.myria.operator.DbInsert; import edu.washington.escience.myria.operator.DbQueryScan; import edu.washington.escience.myria.operator.DuplicateTBGenerator; import edu.washington.escience.myria.operator.EOSSource; import edu.washington.escience.myria.operator.EmptyRelation; import edu.washington.escience.myria.operator.EmptySink; import edu.washington.escience.myria.operator.Operator; import edu.washington.escience.myria.operator.RootOperator; import edu.washington.escience.myria.operator.TupleSink; import edu.washington.escience.myria.operator.agg.Aggregate; import edu.washington.escience.myria.operator.agg.PrimitiveAggregator.AggregationOp; import edu.washington.escience.myria.operator.agg.PrimitiveAggregatorFactory; import edu.washington.escience.myria.operator.network.CollectProducer; import edu.washington.escience.myria.operator.network.Consumer; import edu.washington.escience.myria.operator.network.GenericShuffleProducer; import edu.washington.escience.myria.operator.network.distribute.BroadcastDistributeFunction; import edu.washington.escience.myria.operator.network.distribute.DistributeFunction; import edu.washington.escience.myria.operator.network.distribute.HowDistributed; import edu.washington.escience.myria.parallel.ipc.IPCConnectionPool; import edu.washington.escience.myria.parallel.ipc.IPCMessage; import edu.washington.escience.myria.parallel.ipc.InJVMLoopbackChannelSink; import edu.washington.escience.myria.parallel.ipc.QueueBasedShortMessageProcessor; import edu.washington.escience.myria.perfenforce.PerfEnforceDriver; import edu.washington.escience.myria.proto.ControlProto.ControlMessage; import edu.washington.escience.myria.proto.QueryProto.QueryMessage; import edu.washington.escience.myria.proto.QueryProto.QueryReport; import edu.washington.escience.myria.proto.TransportProto.TransportMessage; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.storage.TupleBatchBuffer; import edu.washington.escience.myria.storage.TupleBuffer; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.DefaultInstancePath; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.FlowControlWriteBufferHighMarkBytes; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.FlowControlWriteBufferLowMarkBytes; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.MasterHost; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.MasterRpcPort; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.OperatorInputBufferCapacity; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.OperatorInputBufferRecoverTrigger; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.PersistUri; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.StorageDbms; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.TcpConnectionTimeoutMillis; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.TcpReceiveBufferSizeBytes; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.TcpSendBufferSizeBytes; import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule.WorkerConf; import edu.washington.escience.myria.tools.MyriaWorkerConfigurationModule; import edu.washington.escience.myria.util.IPCUtils; import edu.washington.escience.myria.util.concurrent.ErrorLoggingTimerTask; import edu.washington.escience.myria.util.concurrent.RenamingThreadFactory; /** * The master entrance. */ public final class Server implements TaskMessageSource, EventHandler<DriverMessage> { /** Master message processor. */ private final class MessageProcessor implements Runnable { /** Constructor, set the thread name. */ public MessageProcessor() { super(); } @Override public void run() { TERMINATE_MESSAGE_PROCESSING: while (true) { try { IPCMessage.Data<TransportMessage> mw = null; try { mw = messageQueue.take(); } catch (final InterruptedException e) { Thread.currentThread().interrupt(); break TERMINATE_MESSAGE_PROCESSING; } final TransportMessage m = mw.getPayload(); final int senderID = mw.getRemoteID(); switch (m.getType()) { case CONTROL: final ControlMessage controlM = m.getControlMessage(); switch (controlM.getType()) { case RESOURCE_STATS: queryManager.updateResourceStats(senderID, controlM); break; default: LOGGER.error("Unexpected control message received at master: {}", controlM); break; } break; case QUERY: final QueryMessage qm = m.getQueryMessage(); final SubQueryId subQueryId = new SubQueryId(qm.getQueryId(), qm.getSubqueryId()); switch (qm.getType()) { case QUERY_READY_TO_EXECUTE: LOGGER.info("Worker #{} is ready to execute query #{}.", senderID, subQueryId); queryManager.workerReady(subQueryId, senderID); break; case QUERY_COMPLETE: QueryReport qr = qm.getQueryReport(); if (qr.getSuccess()) { LOGGER.info( "Worker #{} succeeded in executing query #{}.", senderID, subQueryId); queryManager.workerComplete(subQueryId, senderID); } else { ObjectInputStream osis = null; Throwable cause = null; try { osis = new ObjectInputStream( new ByteArrayInputStream(qr.getCause().toByteArray())); cause = (Throwable) (osis.readObject()); } catch (IOException | ClassNotFoundException e) { LOGGER.error("Error decoding failure cause", e); } LOGGER.error( "Worker #{} failed in executing query #{}.", senderID, subQueryId, cause); queryManager.workerFailed(subQueryId, senderID, cause); } break; default: LOGGER.error("Unexpected query message received at master: {}", qm); break; } break; default: LOGGER.error("Unknown short message received at master: {}", m.getType()); break; } } catch (Throwable a) { LOGGER.error("Error occured in master message processor.", a); if (a instanceof Error) { throw a; } if (a instanceof InterruptedException) { Thread.currentThread().interrupt(); break TERMINATE_MESSAGE_PROCESSING; } } } } } private final Queue<TaskMessage> pendingDriverMessages = new ConcurrentLinkedQueue<>(); private Optional<TaskMessage> dequeueDriverMessage() { return Optional.ofNullable(pendingDriverMessages.poll()); } private void enqueueDriverMessage(@Nonnull final TransportMessage msg) { final TaskMessage driverMsg = TaskMessage.from(MyriaConstants.MASTER_ID + "", msg.toByteArray()); pendingDriverMessages.add(driverMsg); } /* (non-Javadoc) * @see org.apache.reef.task.TaskMessageSource#getMessage() To be used to instruct the driver to launch or abort * workers. */ @Override public Optional<TaskMessage> getMessage() { // TODO: determine which messages should be sent to the driver return dequeueDriverMessage(); } private Striped<Lock> workerAddRemoveLock; /** REEF event handler for driver messages indicating worker failure. */ @Override public void onNext(final DriverMessage driverMessage) { LOGGER.info("Driver message received"); TransportMessage m; try { m = TransportMessage.parseFrom(driverMessage.get().get()); } catch (InvalidProtocolBufferException e) { LOGGER.warn("Could not parse TransportMessage from driver message", e); return; } final ControlMessage controlM = m.getControlMessage(); LOGGER.info("Control message received: {}", controlM); // We received a failed worker message from the driver. final int workerId = controlM.getWorkerId(); Lock workerLock = workerAddRemoveLock.get(workerId); workerLock.lock(); try { switch (controlM.getType()) { case REMOVE_WORKER: { LOGGER.info( "Driver reported worker {} as dead, removing from alive workers.", workerId); aliveWorkers.remove(workerId); queryManager.workerDied(workerId); connectionPool .removeRemote(workerId) .addListener( new ChannelGroupFutureListener() { @Override public void operationComplete(final ChannelGroupFuture future) { if (future.isCompleteSuccess()) { LOGGER.info( "removed connection for remote worker {} from connection pool", workerId); } else { LOGGER.info( "failed to remove connection for remote worker {} from connection pool", workerId); } } }); enqueueDriverMessage(IPCUtils.removeWorkerAckTM(workerId)); } break; case ADD_WORKER: { Preconditions.checkState(!aliveWorkers.contains(workerId)); LOGGER.info("Driver wants to add worker {} to alive workers.", workerId); connectionPool.putRemote( workerId, SocketInfo.fromProtobuf(controlM.getRemoteAddress())); queryManager.workerRestarted( workerId, ImmutableSet.copyOf(controlM.getAckedWorkerIdsList())); aliveWorkers.add(workerId); enqueueDriverMessage(IPCUtils.addWorkerAckTM(workerId)); } break; default: throw new IllegalStateException( "Unexpected driver control message type: " + controlM.getType()); } } finally { workerLock.unlock(); } } /** The usage message for this server. */ static final String USAGE = "Usage: Server catalogFile [-explain] [-f queryFile]"; /** The logger for this class. */ private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(Server.class); /** Initial worker list. */ private ImmutableMap<Integer, SocketInfo> workers = null; /** Manages the queries executing in this instance of Myria. */ private QueryManager queryManager = null; /** @return the query manager. */ public QueryManager getQueryManager() { return queryManager; } /** Current alive worker set. */ private final Set<Integer> aliveWorkers; /** Execution environment variables for operators. */ private final ConcurrentHashMap<String, Object> execEnvVars; /** * All message queue. * * @TODO remove this queue as in {@link Worker}s. */ private final LinkedBlockingQueue<IPCMessage.Data<TransportMessage>> messageQueue; /** The IPC Connection Pool. */ private IPCConnectionPool connectionPool; /** {@link ExecutorService} for message processing. */ private volatile ExecutorService messageProcessingExecutor; /** The Catalog stores the metadata about the Myria instance. */ private MasterCatalog catalog; /** * The {@link OrderedMemoryAwareThreadPoolExecutor} who gets messages from {@link workerExecutor} and further process * them using application specific message handlers, e.g. {@link MasterShortMessageProcessor}. */ private volatile OrderedMemoryAwareThreadPoolExecutor ipcPipelineExecutor; /** The {@link ExecutorService} who executes the master-side subqueries. */ private volatile ExecutorService serverQueryExecutor; /** Absolute path of the directory containing the master catalog files */ private final String catalogPath; /** The URI to persist relations */ private final String persistURI; /** @return the query executor used in this worker. */ ExecutorService getQueryExecutor() { return serverQueryExecutor; } /** max number of seconds for elegant cleanup. */ public static final int NUM_SECONDS_FOR_ELEGANT_CLEANUP = 10; /** @return my connection pool for IPC. */ IPCConnectionPool getIPCConnectionPool() { return connectionPool; } /** @return my pipeline executor. */ OrderedMemoryAwareThreadPoolExecutor getPipelineExecutor() { return ipcPipelineExecutor; } /** The socket info for the master. */ private final SocketInfo masterSocketInfo; /** The PerfEnforceDriver */ private PerfEnforceDriver perfEnforceDriver; /** * @return my execution environment variables for init of operators. */ ConcurrentHashMap<String, Object> getExecEnvVars() { return execEnvVars; } /** @return execution mode. */ QueryExecutionMode getExecutionMode() { return QueryExecutionMode.NON_BLOCKING; } private final String instancePath; private final int connectTimeoutMillis; private final int sendBufferSize; private final int receiveBufferSize; private final int writeBufferLowWaterMark; private final int writeBufferHighWaterMark; private final int inputBufferCapacity; private final int inputBufferRecoverTrigger; private final Injector injector; /** * Construct a server object, with configuration stored in the specified catalog file. * * @param masterHost hostname of the master * @param masterPort RPC port of the master * @param instancePath absolute path of the directory containing the master catalog files * @param databaseSystem name of the storage DB system * @param connectTimeoutMillis connect timeout for worker IPC * @param sendBufferSize send buffer size in bytes for worker IPC * @param receiveBufferSize receive buffer size in bytes for worker IPC * @param writeBufferLowWaterMark low watermark for write buffer overflow recovery * @param writeBufferHighWaterMark high watermark for write buffer overflow recovery * @param inputBufferCapacity size of the input buffer in bytes * @param inputBufferRecoverTrigger number of bytes in the input buffer to trigger recovery after overflow * @param persistURI the storage endpoint URI for persisting partitioned relations * @param injector a Tang injector for instantiating objects from configuration */ @Inject public Server( @Parameter(MasterHost.class) final String masterHost, @Parameter(MasterRpcPort.class) final int masterPort, @Parameter(DefaultInstancePath.class) final String instancePath, @Parameter(StorageDbms.class) final String databaseSystem, @Parameter(TcpConnectionTimeoutMillis.class) final int connectTimeoutMillis, @Parameter(TcpSendBufferSizeBytes.class) final int sendBufferSize, @Parameter(TcpReceiveBufferSizeBytes.class) final int receiveBufferSize, @Parameter(FlowControlWriteBufferLowMarkBytes.class) final int writeBufferLowWaterMark, @Parameter(FlowControlWriteBufferHighMarkBytes.class) final int writeBufferHighWaterMark, @Parameter(OperatorInputBufferCapacity.class) final int inputBufferCapacity, @Parameter(OperatorInputBufferRecoverTrigger.class) final int inputBufferRecoverTrigger, @Parameter(PersistUri.class) final String persistURI, final Injector injector) { this.instancePath = instancePath; this.connectTimeoutMillis = connectTimeoutMillis; this.sendBufferSize = sendBufferSize; this.receiveBufferSize = receiveBufferSize; this.writeBufferLowWaterMark = writeBufferLowWaterMark; this.writeBufferHighWaterMark = writeBufferHighWaterMark; this.inputBufferCapacity = inputBufferCapacity; this.inputBufferRecoverTrigger = inputBufferRecoverTrigger; this.persistURI = persistURI; this.injector = injector; masterSocketInfo = new SocketInfo(masterHost, masterPort); this.catalogPath = instancePath; execEnvVars = new ConcurrentHashMap<>(); execEnvVars.put(MyriaConstants.EXEC_ENV_VAR_NODE_ID, MyriaConstants.MASTER_ID); execEnvVars.put(MyriaConstants.EXEC_ENV_VAR_EXECUTION_MODE, getExecutionMode()); execEnvVars.put(MyriaConstants.EXEC_ENV_VAR_DATABASE_SYSTEM, databaseSystem); aliveWorkers = Sets.newConcurrentHashSet(); messageQueue = new LinkedBlockingQueue<>(); } /** timer task executor. */ private ScheduledExecutorService scheduledTaskExecutor; /** This class presents only for the purpose of debugging. No other usage. */ private class DebugHelper extends ErrorLoggingTimerTask { /** Interval of execution. */ public static final int INTERVAL = MyriaConstants.WAITING_INTERVAL_1_SECOND_IN_MS; @Override public final synchronized void runInner() { System.currentTimeMillis(); } } private ImmutableSet<Configuration> getWorkerConfs(final Injector injector) throws InjectionException, BindException, IOException { final ImmutableSet.Builder<Configuration> workerConfsBuilder = new ImmutableSet.Builder<>(); final Set<String> serializedWorkerConfs = injector.getNamedInstance(WorkerConf.class); final ConfigurationSerializer serializer = new AvroConfigurationSerializer(); for (final String serializedWorkerConf : serializedWorkerConfs) { final Configuration workerConf = serializer.fromString(serializedWorkerConf); workerConfsBuilder.add(workerConf); } return workerConfsBuilder.build(); } private static Integer getIdFromWorkerConf(final Configuration workerConf) throws InjectionException { final Injector injector = Tang.Factory.getTang().newInjector(workerConf); return injector.getNamedInstance(MyriaWorkerConfigurationModule.WorkerId.class); } private static String getHostFromWorkerConf(final Configuration workerConf) throws InjectionException { final Injector injector = Tang.Factory.getTang().newInjector(workerConf); return injector.getNamedInstance(MyriaWorkerConfigurationModule.WorkerHost.class); } private static Integer getPortFromWorkerConf(final Configuration workerConf) throws InjectionException { final Injector injector = Tang.Factory.getTang().newInjector(workerConf); return injector.getNamedInstance(MyriaWorkerConfigurationModule.WorkerPort.class); } /** Master cleanup. */ private void cleanup() { LOGGER.info("{} is going to shutdown", MyriaConstants.SYSTEM_NAME); queryManager.killAll(); if (messageProcessingExecutor != null && !messageProcessingExecutor.isShutdown()) { messageProcessingExecutor.shutdownNow(); } if (scheduledTaskExecutor != null && !scheduledTaskExecutor.isShutdown()) { scheduledTaskExecutor.shutdownNow(); } /* Close the catalog before shutting down the IPC because there may be Catalog jobs pending that were triggered by * IPC events. */ catalog.close(); connectionPool.shutdown(); connectionPool.releaseExternalResources(); if (ipcPipelineExecutor != null && !ipcPipelineExecutor.isShutdown()) { ipcPipelineExecutor.shutdown(); } LOGGER.info("Master connection pool shutdown complete."); LOGGER.info("Master finishes cleanup."); } /** Shutdown the master. */ public void shutdown() { cleanup(); } /** * Start all the threads that do work for the server. * * @throws Exception if any error occurs. */ public void start() throws Exception { LOGGER.info("Server starting on {}", masterSocketInfo); final ImmutableSet<Configuration> workerConfs = getWorkerConfs(injector); final ImmutableMap.Builder<Integer, SocketInfo> workersBuilder = ImmutableMap.builder(); for (Configuration workerConf : workerConfs) { workersBuilder.put( getIdFromWorkerConf(workerConf), new SocketInfo(getHostFromWorkerConf(workerConf), getPortFromWorkerConf(workerConf))); } workers = workersBuilder.build(); // aliveWorkers.addAll(workers.keySet()); workerAddRemoveLock = Striped.lock(workers.size()); final Map<Integer, SocketInfo> computingUnits = new HashMap<>(workers); computingUnits.put(MyriaConstants.MASTER_ID, masterSocketInfo); try { LOGGER.info("Attempting to open master catalog file under {}...", catalogPath); catalog = MasterCatalog.open(catalogPath); } catch (FileNotFoundException e) { LOGGER.info( "Failed to open master catalog file under {}, attempting to create it...\n({})", catalogPath, e.getMessage()); catalog = MasterCatalog.create(catalogPath); } queryManager = new QueryManager(catalog, this); connectionPool = new IPCConnectionPool( MyriaConstants.MASTER_ID, computingUnits, IPCConfigurations.createMasterIPCServerBootstrap( connectTimeoutMillis, sendBufferSize, receiveBufferSize, writeBufferLowWaterMark, writeBufferHighWaterMark), IPCConfigurations.createMasterIPCClientBootstrap( connectTimeoutMillis, sendBufferSize, receiveBufferSize, writeBufferLowWaterMark, writeBufferHighWaterMark), new TransportMessageSerializer(), new QueueBasedShortMessageProcessor<TransportMessage>(messageQueue), inputBufferCapacity, inputBufferRecoverTrigger); scheduledTaskExecutor = Executors.newSingleThreadScheduledExecutor( new RenamingThreadFactory("Master global timer")); scheduledTaskExecutor.scheduleAtFixedRate( new DebugHelper(), DebugHelper.INTERVAL, DebugHelper.INTERVAL, TimeUnit.MILLISECONDS); messageProcessingExecutor = Executors.newCachedThreadPool(new RenamingThreadFactory("Master message processor")); serverQueryExecutor = Executors.newCachedThreadPool(new RenamingThreadFactory("Master query executor")); /** The {@link Executor} who deals with IPC connection setup/cleanup. */ ExecutorService ipcBossExecutor = Executors.newCachedThreadPool(new RenamingThreadFactory("Master IPC boss")); /** The {@link Executor} who deals with IPC message delivering and transformation. */ ExecutorService ipcWorkerExecutor = Executors.newCachedThreadPool(new RenamingThreadFactory("Master IPC worker")); ipcPipelineExecutor = null; // Remove the pipeline executor. // new // OrderedMemoryAwareThreadPoolExecutor(Runtime.getRuntime().availableProcessors() // * 2 + 1, // 5 * MyriaConstants.MB, 0, // MyriaConstants.THREAD_POOL_KEEP_ALIVE_TIME_IN_MS, // TimeUnit.MILLISECONDS, // new RenamingThreadFactory("Master Pipeline executor")); /** The {@link ChannelFactory} for creating client side connections. */ ChannelFactory clientChannelFactory = new NioClientSocketChannelFactory( ipcBossExecutor, ipcWorkerExecutor, Runtime.getRuntime().availableProcessors() * 2 + 1); /** The {@link ChannelFactory} for creating server side accepted connections. */ ChannelFactory serverChannelFactory = new NioServerSocketChannelFactory( ipcBossExecutor, ipcWorkerExecutor, Runtime.getRuntime().availableProcessors() * 2 + 1); // Start server with Nb of active threads = 2*NB CPU + 1 as maximum. ChannelPipelineFactory serverPipelineFactory = new IPCPipelineFactories.MasterServerPipelineFactory(connectionPool, getPipelineExecutor()); ChannelPipelineFactory clientPipelineFactory = new IPCPipelineFactories.MasterClientPipelineFactory(connectionPool, getPipelineExecutor()); ChannelPipelineFactory masterInJVMPipelineFactory = new IPCPipelineFactories.MasterInJVMPipelineFactory(connectionPool); connectionPool.start( serverChannelFactory, serverPipelineFactory, clientChannelFactory, clientPipelineFactory, masterInJVMPipelineFactory, new InJVMLoopbackChannelSink()); messageProcessingExecutor.submit(new MessageProcessor()); LOGGER.info("Server started on {}", masterSocketInfo); if (getDBMS().equals(MyriaConstants.STORAGE_SYSTEM_POSTGRESQL)) { final List<Integer> workerIds = ImmutableList.copyOf(workers.keySet()); addRelationToCatalog( MyriaConstants.EVENT_PROFILING_RELATION, MyriaConstants.EVENT_PROFILING_SCHEMA, workerIds, false); addRelationToCatalog( MyriaConstants.SENT_PROFILING_RELATION, MyriaConstants.SENT_PROFILING_SCHEMA, workerIds, false); addRelationToCatalog( MyriaConstants.RESOURCE_PROFILING_RELATION, MyriaConstants.RESOURCE_PROFILING_SCHEMA, workerIds, false); addRelationToCatalog( MyriaConstants.PYUDF_RELATION, MyriaConstants.PYUDF_SCHEMA, workerIds, false); } perfEnforceDriver = new PerfEnforceDriver(this, instancePath); } /** * Manually add a relation to the catalog. * * @param relationKey the relation to add * @param schema the schema of the relation to add * @param workers the workers that have the relation * @param force force add the relation; will replace an existing entry. * @throws DbException if the catalog cannot be accessed */ private void addRelationToCatalog( final RelationKey relationKey, final Schema schema, final List<Integer> workers, final boolean force) throws DbException { try { if (!force && getSchema(relationKey) != null) { return; } QueryEncoding query = new QueryEncoding(); query.rawQuery = String.format("Add %s to catalog", relationKey); query.logicalRa = query.rawQuery; query.fragments = ImmutableList.of(); long queryId = catalog.newQuery(query); final Query queryState = new Query( queryId, query, new SubQuery( new SubQueryPlan(new EmptySink(new EOSSource())), new HashMap<Integer, SubQueryPlan>()), this); queryState.markSuccess(); catalog.queryFinished(queryState); Map<RelationKey, RelationWriteMetadata> relation = new HashMap<>(); RelationWriteMetadata meta = new RelationWriteMetadata(relationKey, schema, true, false); for (Integer worker : workers) { meta.addWorker(worker); } relation.put(relationKey, meta); catalog.updateRelationMetadata(relation, new SubQueryId(queryId, 0)); } catch (CatalogException e) { throw new DbException(e); } } /** @return the dbms from {@link #execEnvVars}. */ public String getDBMS() { return (String) execEnvVars.get(MyriaConstants.EXEC_ENV_VAR_DATABASE_SYSTEM); } /** * Can be only used in test. * * @return true if the query plan is accepted and scheduled for execution. * @param masterRoot the root operator of the master plan * @param workerRoots the roots of the worker part of the plan, {workerID -> RootOperator[]} * @throws DbException if any error occurs. * @throws CatalogException catalog errors. */ public QueryFuture submitQueryPlan( final RootOperator masterRoot, final Map<Integer, RootOperator[]> workerRoots) throws DbException, CatalogException { String catalogInfoPlaceHolder = "MasterPlan: " + masterRoot + "; WorkerPlan: " + workerRoots; Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Entry<Integer, RootOperator[]> entry : workerRoots.entrySet()) { workerPlans.put(entry.getKey(), new SubQueryPlan(entry.getValue())); } return queryManager.submitQuery( catalogInfoPlaceHolder, catalogInfoPlaceHolder, catalogInfoPlaceHolder, new SubQueryPlan(masterRoot), workerPlans); } /** @return the set of workers that are currently alive. */ public Set<Integer> getAliveWorkers() { return ImmutableSet.copyOf(aliveWorkers); } /** * Return a random subset of workers. * * @param number the number of alive workers returned * @return a subset of workers that are currently alive. */ public Set<Integer> getRandomWorkers(final int number) { Preconditions.checkArgument( number <= getAliveWorkers().size(), "The number of workers requested cannot exceed the number of alive workers."); if (number == getAliveWorkers().size()) { return getAliveWorkers(); } List<Integer> workerList = new ArrayList<>(getAliveWorkers()); Collections.shuffle(workerList); return ImmutableSet.copyOf(workerList.subList(0, number)); } /** @return the set of known workers in this Master. */ public Map<Integer, SocketInfo> getWorkers() { return workers; } /** * Ingest the given dataset. * * @param relationKey the name of the dataset. * @param workersToIngest restrict the workers to ingest data (null for all) * @param indexes the indexes created. * @param source the source of tuples to be ingested. * @param df the distribute function. * @return the status of the ingested dataset. * @throws InterruptedException interrupted * @throws DbException if there is an error */ public DatasetStatus ingestDataset( final RelationKey relationKey, List<Integer> workersToIngest, final List<List<IndexRef>> indexes, final Operator source, final DistributeFunction df) throws InterruptedException, DbException { /* Figure out the workers we will use. If workersToIngest is null, use all active workers. */ if (workersToIngest == null) { workersToIngest = ImmutableList.copyOf(getAliveWorkers()); } int[] workersArray = Ints.toArray(workersToIngest); Preconditions.checkArgument(workersArray.length > 0, "Must use > 0 workers"); /* The master plan: send the tuples out. */ ExchangePairID scatterId = ExchangePairID.newID(); df.setDestinations(workersArray.length, 1); GenericShuffleProducer scatter = new GenericShuffleProducer(source, new ExchangePairID[] {scatterId}, workersArray, df); /* The workers' plan */ Consumer gather = new Consumer(source.getSchema(), scatterId, ImmutableSet.of(MyriaConstants.MASTER_ID)); DbInsert insert = new DbInsert(gather, relationKey, true, indexes); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : workersArray) { workerPlans.put(workerId, new SubQueryPlan(insert)); } ListenableFuture<Query> qf; try { qf = queryManager.submitQuery( "ingest " + relationKey.toString(), "ingest " + relationKey.toString(), "ingest " + relationKey.toString(getDBMS()), new SubQueryPlan(scatter), workerPlans); } catch (CatalogException e) { throw new DbException("Error submitting query", e); } try { qf.get(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } // updating the partition function only after it's successfully ingested. updateHowDistributed(relationKey, new HowDistributed(df, workersArray)); return getDatasetStatus(relationKey); } /** * Parallel Ingest * * @param relationKey the name of the dataset. * @param workersToIngest restrict the workers to ingest data (null for all) * @throws URIException * @throws DbException * @throws InterruptedException */ public DatasetStatus parallelIngestDataset( final RelationKey relationKey, final Schema schema, @Nullable final Character delimiter, @Nullable final Character quote, @Nullable final Character escape, @Nullable final Integer numberOfSkippedLines, final AmazonS3Source s3Source, final Set<Integer> workersToIngest, final DistributeFunction distributeFunction) throws URIException, DbException, InterruptedException { long fileSize = s3Source.getFileSize(); Set<Integer> potentialWorkers = MoreObjects.firstNonNull(workersToIngest, getAliveWorkers()); /* Select a subset of workers */ int[] workersArray = parallelIngestComputeNumWorkers(fileSize, potentialWorkers); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (int workerID = 1; workerID <= workersArray.length; workerID++) { CSVFileScanFragment scanFragment = new CSVFileScanFragment( s3Source, schema, workersArray, delimiter, quote, escape, numberOfSkippedLines); workerPlans.put( workersArray[workerID - 1], new SubQueryPlan(new DbInsert(scanFragment, relationKey, true))); } ListenableFuture<Query> qf; try { qf = queryManager.submitQuery( "ingest " + relationKey.toString(), "ingest " + relationKey.toString(), "ingest " + relationKey.toString(getDBMS()), new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); } catch (CatalogException e) { throw new DbException("Error submitting query", e); } try { qf.get(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } updateHowDistributed(relationKey, new HowDistributed(distributeFunction, workersArray)); return getDatasetStatus(relationKey); } /** * Helper method for parallel ingest. * * @param fileSize the size of the file to ingest * @param allWorkers all workers considered for ingest */ public int[] parallelIngestComputeNumWorkers(long fileSize, Set<Integer> allWorkers) { /* Determine the number of workers to ingest based on partition size */ int totalNumberOfWorkersToIngest = 0; for (int i = allWorkers.size(); i >= 1; i--) { totalNumberOfWorkersToIngest = i; long currentPartitionSize = fileSize / i; if (currentPartitionSize > MyriaConstants.PARALLEL_INGEST_WORKER_MINIMUM_PARTITION_SIZE) { break; } } int[] workersArray = new int[allWorkers.size()]; int wCounter = 0; for (Integer w : allWorkers) { workersArray[wCounter] = w; wCounter++; } Arrays.sort(workersArray); workersArray = Arrays.copyOfRange(workersArray, 0, totalNumberOfWorkersToIngest); return workersArray; } /** * @param relationKey the relationalKey of the dataset to import * @param schema the schema of the dataset to import * @param workersToImportFrom the set of workers * @throws DbException if there is an error * @throws InterruptedException interrupted */ public void addDatasetToCatalog( final RelationKey relationKey, final Schema schema, final List<Integer> workersToImportFrom) throws DbException, InterruptedException { /* Figure out the workers we will use. If workersToImportFrom is null, use all active workers. */ List<Integer> actualWorkers = workersToImportFrom; if (workersToImportFrom == null) { actualWorkers = ImmutableList.copyOf(getWorkers().keySet()); } addRelationToCatalog(relationKey, schema, workersToImportFrom, true); try { Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : actualWorkers) { workerPlans.put( workerId, new SubQueryPlan(new DbInsert(EmptyRelation.of(schema), relationKey, false))); } ListenableFuture<Query> qf = queryManager.submitQuery( "add to catalog " + relationKey.toString(), "add to catalog " + relationKey.toString(), "add to catalog " + relationKey.toString(getDBMS()), new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); try { qf.get(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } } catch (CatalogException e) { throw new DbException(e); } } /** * @param relationKey the relationKey of the dataset to delete * @return the status * @throws DbException if there is an error * @throws InterruptedException interrupted */ public void deleteDataset(final RelationKey relationKey) throws DbException, InterruptedException { /* Mark the relation as is_deleted */ try { catalog.markRelationDeleted(relationKey); } catch (CatalogException e) { throw new DbException(e); } /* Delete from postgres at each worker by calling the DbDelete operator */ try { Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : getWorkersForRelation(relationKey)) { workerPlans.put( workerId, new SubQueryPlan( new DbDelete(EmptyRelation.of(catalog.getSchema(relationKey)), relationKey, null))); } ListenableFuture<Query> qf = queryManager.submitQuery( "delete " + relationKey.toString(), "delete " + relationKey.toString(), "deleting from " + relationKey.toString(getDBMS()), new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); try { qf.get(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } } catch (CatalogException e) { throw new DbException(e); } /* Deleting from the catalog */ try { catalog.deleteRelationFromCatalog(relationKey); } catch (CatalogException e) { throw new DbException(e); } } /** Create indexes and add the metadata to the catalog */ public long addIndexesToRelation( final RelationKey relationKey, final Schema schema, final List<IndexRef> indexes) throws DbException, InterruptedException { long queryID; /* Add indexes to relations */ try { Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : getWorkersForRelation(relationKey)) { workerPlans.put( workerId, new SubQueryPlan( new DbCreateIndex( EmptyRelation.of(catalog.getSchema(relationKey)), relationKey, schema, indexes, null))); } ListenableFuture<Query> qf = queryManager.submitQuery( "add indexes to " + relationKey.toString(), "add indexes to " + relationKey.toString(), "add indexes to " + relationKey.toString(getDBMS()), new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); try { queryID = qf.get().getQueryId(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } } catch (CatalogException e) { throw new DbException(e); } /* Add index to catalog */ try { catalog.markIndexesInCatalog(relationKey, indexes); } catch (CatalogException e) { throw new DbException(e); } return queryID; } /** Create a view */ public long createView( final String viewName, final String viewDefinition, final Set<Integer> workers) throws DbException, InterruptedException { long queryID; Set<Integer> actualWorkers = workers; if (workers == null) { actualWorkers = getWorkers().keySet(); } /* Create the view */ try { Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : actualWorkers) { workerPlans.put( workerId, new SubQueryPlan( new DbCreateView( EmptyRelation.of(Schema.EMPTY_SCHEMA), viewName, viewDefinition, false, null))); } ListenableFuture<Query> qf = queryManager.submitQuery( "create view", "create view", "create view", new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); try { queryID = qf.get().getQueryId(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } } catch (CatalogException e) { throw new DbException(e); } return queryID; } /** * Create a materialized view * @param viewName the name of the view * @param viewDefinition the sql text for the view * @param workers the workers creating the view * @return the queryID for the view creation query */ public long createMaterializedView( final String viewName, final String viewDefinition, final Set<Integer> workers) throws DbException, InterruptedException { long queryID; Set<Integer> actualWorkers = workers; if (workers == null) { actualWorkers = getWorkers().keySet(); } /* Create the view */ try { Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : actualWorkers) { workerPlans.put( workerId, new SubQueryPlan( new DbCreateView( EmptyRelation.of(Schema.EMPTY_SCHEMA), viewName, viewDefinition, true, null))); } ListenableFuture<Query> qf = queryManager.submitQuery( "create materialized view", "create materialized view", "create materialized view", new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); try { queryID = qf.get().getQueryId(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } } catch (CatalogException e) { throw new DbException(e); } return queryID; } /** * Create a function and register it in the catalog * * @param name the name of the function * @param definition the function definition - this is postgres specific for postgres and function text for python. * @param outputType the output schema of the function * @param isMultiValued indicates if the function returns multiple tuples. * @param lang this is the language of the function. * @param binary this is an optional parameter for function for base64 encoded binary for function. * @param workers list of workers on which the function is registered: default is all. * @return the status of the function */ public long createFunction( final String name, final String definition, final String outputType, final Boolean isMultiValued, final FunctionLanguage lang, final String binary, final Set<Integer> workers) throws DbException, InterruptedException { long queryID = 0; Set<Integer> actualWorkers = workers; if (workers == null) { actualWorkers = getWorkers().keySet(); } try { Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : actualWorkers) { workerPlans.put( workerId, new SubQueryPlan( new DbCreateFunction( EmptyRelation.of(Schema.EMPTY_SCHEMA), name, definition, outputType, isMultiValued, lang, binary))); } ListenableFuture<Query> qf = queryManager.submitQuery( "create function", "create function", "create function", new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); try { queryID = qf.get().getQueryId(); } catch (ExecutionException e) { throw new DbException("Error executing query", e); } } catch (CatalogException e) { throw new DbException(e); } /* Register the function to the catalog don't send the binary. */ try { catalog.registerFunction(name, definition, outputType, isMultiValued, lang); } catch (CatalogException e) { throw new DbException(e); } return queryID; } /** * @return list of functions from the catalog * @throws DbException in case of error. */ public List<String> getFunctions() throws DbException { try { return catalog.getFunctions(); } catch (CatalogException e) { throw new DbException(e); } } /** * @param functionName : name of the function to retrieve. * @return functiondetails for the function * @throws DbException in case of error. */ public FunctionStatus getFunctionDetails(final String functionName) throws DbException { try { return catalog.getFunctionStatus(functionName); } catch (CatalogException e) { throw new DbException(e); } } /** * @param relationKey the relationKey of the dataset to persist * @return the queryID * @throws DbException if there is an error * @throws InterruptedException interrupted */ public long persistDataset(final RelationKey relationKey) throws DbException, InterruptedException, URISyntaxException { long queryID; /* Mark the relation as is_persistent */ try { catalog.markRelationPersistent(relationKey); } catch (CatalogException e) { throw new DbException(e); } /* Create the query plan for persist */ try { ImmutableMap.Builder<Integer, SubQueryPlan> workerPlans = new ImmutableMap.Builder<Integer, SubQueryPlan>(); for (Integer workerId : getWorkersForRelation(relationKey)) { String partitionName = String.format( persistURI + "/myria-system/partition-%s/%s/%s/%s", workerId, relationKey.getUserName(), relationKey.getProgramName(), relationKey.getRelationName()); DataSink workerSink = new UriSink(partitionName); workerPlans.put( workerId, new SubQueryPlan( new TupleSink( new DbQueryScan(relationKey, getSchema(relationKey)), new PostgresBinaryTupleWriter(), workerSink))); } ListenableFuture<Query> qf = queryManager.submitQuery( "persist " + relationKey.toString(), "persist " + relationKey.toString(), "persisting from " + relationKey.toString(getDBMS()), new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans.build()); try { queryID = qf.get().getQueryId(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } } catch (CatalogException e) { throw new DbException(e); } return queryID; } /** * Directly runs a command on the underlying database based on the selected workers * * @param sqlString command to run on the database * @param workers the workers that will run the command */ public void executeSQLStatement(final String sqlString, final Set<Integer> workers) throws DbException, InterruptedException { /* Execute the SQL command on the set of workers */ try { Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer workerId : workers) { workerPlans.put( workerId, new SubQueryPlan( new DbExecute(EmptyRelation.of(Schema.EMPTY_SCHEMA), sqlString, null))); } ListenableFuture<Query> qf = queryManager.submitQuery( "sql execute " + sqlString, "sql execute " + sqlString, "sql execute " + sqlString, new SubQueryPlan(new EmptySink(new EOSSource())), workerPlans); try { qf.get(); } catch (ExecutionException e) { throw new DbException("Error executing query", e.getCause()); } } catch (CatalogException e) { throw new DbException(e); } } /** * Directly runs a command on the underlying database based on the selected workers * and returns the tuple results through a string array * * @param sqlString command to run on the database * @param outputSchema the schema of the output result * @param workers the workers that will run the command * @return the resulting tuples from the SQL statement */ public String[] executeSQLStatement( final String sqlString, final Schema outputSchema, final Set<Integer> workers) throws DbException { ByteSink byteSink = new ByteSink(); TupleWriter writer = new CsvTupleWriter(); DbQueryScan scan = new DbQueryScan(sqlString, outputSchema); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(); for (Integer w : workers) { workerPlans.put(w, workerPlan); } final Consumer consumer = new Consumer(outputSchema, operatorId, workers); TupleSink output = new TupleSink(consumer, writer, byteSink, false); final SubQueryPlan masterPlan = new SubQueryPlan(output); String planString = "execute sql statement : " + sqlString; try { queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans).get(); } catch (Exception e) { throw new DbException(); } byte[] responseBytes; try { responseBytes = ((ByteArrayOutputStream) byteSink.getOutputStream()).toByteArray(); } catch (IOException e) { throw new DbException(); } String response = new String(responseBytes, Charset.forName("UTF-8")); String[] tuples = response.split("\r\n"); return tuples; } /** * @param relationKey the key of the desired relation. * @return the schema of the specified relation, or null if not found. * @throws CatalogException if there is an error getting the Schema out of the catalog. */ public Schema getSchema(final RelationKey relationKey) throws CatalogException { if (relationKey.isTemp()) { return queryManager.getQuery(relationKey.tempRelationQueryId()).getTempSchema(relationKey); } return catalog.getSchema(relationKey); } /** * @param key the relation key. * @param howPartitioned how the dataset was partitioned. */ public void updateHowDistributed(final RelationKey key, final HowDistributed howDistributed) throws DbException { try { catalog.updateHowDistributed(key, howDistributed); } catch (CatalogException e) { throw new DbException(e); } } /** * @param relationKey the key of the desired temp relation. * @throws CatalogException if there is an error accessing the catalog. * @return the set of workers that store the specified relation. */ public @Nonnull Set<Integer> getWorkersForRelation(@Nonnull final RelationKey relationKey) throws CatalogException { if (relationKey.isTemp()) { return queryManager .getQuery(relationKey.tempRelationQueryId()) .getWorkersForTempRelation(relationKey); } else { return catalog.getWorkersForRelationKey(relationKey); } } /** @return the socket info for the master. */ protected SocketInfo getSocketInfo() { return masterSocketInfo; } /** * @return A list of datasets in the system. * @throws DbException if there is an error accessing the desired Schema. */ public List<DatasetStatus> getDatasets() throws DbException { try { return catalog.getDatasets(); } catch (CatalogException e) { throw new DbException(e); } } /** * Get the metadata about a relation. * * @param relationKey specified which relation to get the metadata about. * @return the metadata of the specified relation. * @throws DbException if there is an error getting the status. */ public DatasetStatus getDatasetStatus(final RelationKey relationKey) throws DbException { try { return catalog.getDatasetStatus(relationKey); } catch (CatalogException e) { throw new DbException(e); } } /** * @param searchTerm the search term * @return the relations that match the search term * @throws DbException if there is an error getting the relation keys. */ public List<RelationKey> getMatchingRelationKeys(final String searchTerm) throws DbException { try { return catalog.getMatchingRelationKeys(searchTerm); } catch (CatalogException e) { throw new DbException(e); } } /** * @param userName the user whose datasets we want to access. * @return a list of datasets belonging to the specified user. * @throws DbException if there is an error accessing the Catalog. */ public List<DatasetStatus> getDatasetsForUser(final String userName) throws DbException { try { return catalog.getDatasetsForUser(userName); } catch (CatalogException e) { throw new DbException(e); } } /** * @param userName the user whose datasets we want to access. * @param programName the program by that user whose datasets we want to access. * @return a list of datasets belonging to the specified program. * @throws DbException if there is an error accessing the Catalog. */ public List<DatasetStatus> getDatasetsForProgram(final String userName, final String programName) throws DbException { try { return catalog.getDatasetsForProgram(userName, programName); } catch (CatalogException e) { throw new DbException(e); } } /** * @param queryId the id of the query. * @return a list of datasets belonging to the specified program. * @throws DbException if there is an error accessing the Catalog. */ public List<DatasetStatus> getDatasetsForQuery(final int queryId) throws DbException { try { return catalog.getDatasetsForQuery(queryId); } catch (CatalogException e) { throw new DbException(e); } } /** * @return the maximum query id that matches the search. * @param searchTerm a token to match against the raw queries. If null, all queries match. * @throws CatalogException if an error occurs */ public long getMaxQuery(final String searchTerm) throws CatalogException { return catalog.getMaxQuery(searchTerm); } /** * @return the minimum query id that matches the search. * @param searchTerm a token to match against the raw queries. If null, all queries match. * @throws CatalogException if an error occurs */ public long getMinQuery(final String searchTerm) throws CatalogException { return catalog.getMinQuery(searchTerm); } /** * Start a query that streams tuples from the specified relation to the specified {@link TupleWriter}. * * @param relationKey the relation to be downloaded. * @param writer the {@link TupleWriter} which will serialize the tuples. * @param dataSink the {@link DataSink} for the tuple destination * @return the query future from which the query status can be looked up. * @throws DbException if there is an error in the system. */ public ListenableFuture<Query> startDataStream( final RelationKey relationKey, final TupleWriter writer, final DataSink dataSink) throws DbException { /* Get the relation's schema, to make sure it exists. */ final Schema schema; try { schema = catalog.getSchema(relationKey); } catch (CatalogException e) { throw new DbException(e); } Preconditions.checkArgument(schema != null, "relation %s was not found", relationKey); /* Get the workers that store it. */ Set<Integer> scanWorkers; try { scanWorkers = getWorkersForRelation(relationKey); } catch (CatalogException e) { throw new DbException(e); } /* If relation is broadcast, pick random worker to scan. */ DistributeFunction df = getDatasetStatus(relationKey).getHowDistributed().getDf(); if (df instanceof BroadcastDistributeFunction) { scanWorkers = ImmutableSet.of(scanWorkers.iterator().next()); } /* Construct the operators that go elsewhere. */ DbQueryScan scan = new DbQueryScan(relationKey, schema); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(scanWorkers.size()); for (Integer worker : scanWorkers) { workerPlans.put(worker, workerPlan); } /* Construct the master plan. */ final Consumer consumer = new Consumer(schema, operatorId, ImmutableSet.copyOf(scanWorkers)); TupleSink output = new TupleSink(consumer, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = "download " + relationKey.toString(); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * Start a query that streams tuples from the specified relation to the specified {@link TupleWriter}. * * @param numTB the number of {@link TupleBatch}es to download from each worker. * @param writer the {@link TupleWriter} which will serialize the tuples. * @param dataSink the {@link DataSink} for the tuple destination * @return the query future from which the query status can be looked up. * @throws DbException if there is an error in the system. */ public ListenableFuture<Query> startTestDataStream( final int numTB, final TupleWriter writer, final DataSink dataSink) throws DbException { final Schema schema = new Schema( ImmutableList.of(Type.LONG_TYPE, Type.STRING_TYPE), ImmutableList.of("id", "name")); Random r = new Random(); final TupleBatchBuffer tbb = new TupleBatchBuffer(schema); for (int i = 0; i < tbb.getBatchSize(); i++) { tbb.putLong(0, r.nextLong()); tbb.putString(1, new java.util.Date().toString()); } TupleBatch tb = tbb.popAny(); final DuplicateTBGenerator scanTable = new DuplicateTBGenerator(tb, numTB); /* Get the workers that store it. */ Set<Integer> scanWorkers = getAliveWorkers(); /* Construct the operators that go elsewhere. */ final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(scanTable, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(scanWorkers.size()); for (Integer worker : scanWorkers) { workerPlans.put(worker, workerPlan); } /* Construct the master plan. */ final Consumer consumer = new Consumer(schema, operatorId, ImmutableSet.copyOf(scanWorkers)); TupleSink output = new TupleSink(consumer, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = "download test"; try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * @param subqueryId the subquery id. * @param fragmentId the fragment id to return data for. All fragments, if < 0. * @param writer writer to get data. * @param dataSink the {@link DataSink} for the tuple destination * @return profiling logs for the query. * @throws DbException if there is an error when accessing profiling logs. */ public ListenableFuture<Query> startSentLogDataStream( final SubQueryId subqueryId, final long fragmentId, final TupleWriter writer, final DataSink dataSink) throws DbException { Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId); String fragmentWhere = ""; if (fragmentId >= 0) { fragmentWhere = "AND \"fragmentId\" = " + fragmentId; } final Schema schema = Schema.ofFields( "fragmentId", Type.INT_TYPE, "destWorker", Type.INT_TYPE, "numTuples", Type.LONG_TYPE); String sentQueryString = Joiner.on(' ') .join( "SELECT \"fragmentId\", \"destWorkerId\", sum(\"numTuples\") as \"numTuples\" FROM", MyriaConstants.SENT_PROFILING_RELATION.toString(getDBMS()), "WHERE \"queryId\" =", subqueryId.getQueryId(), "AND \"subQueryId\" =", subqueryId.getSubqueryId(), fragmentWhere, "GROUP BY \"fragmentId\", \"destWorkerId\""); DbQueryScan scan = new DbQueryScan(sentQueryString, schema); final ExchangePairID operatorId = ExchangePairID.newID(); ImmutableList.Builder<Expression> emitExpressions = ImmutableList.builder(); emitExpressions.add(new Expression("workerId", new WorkerIdExpression())); for (int column = 0; column < schema.numColumns(); column++) { VariableExpression copy = new VariableExpression(column); emitExpressions.add(new Expression(schema.getColumnName(column), copy)); } Apply addWorkerId = new Apply(scan, emitExpressions.build()); CollectProducer producer = new CollectProducer(addWorkerId, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size()); for (Integer worker : actualWorkers) { workerPlans.put(worker, workerPlan); } final Consumer consumer = new Consumer(addWorkerId.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers)); final Aggregate aggregate = new Aggregate( consumer, new int[] {0, 1, 2}, new PrimitiveAggregatorFactory(3, AggregationOp.SUM)); // rename columns ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder(); renameExpressions.add(new Expression("src", new VariableExpression(0))); renameExpressions.add(new Expression("fragmentId", new VariableExpression(1))); renameExpressions.add(new Expression("dest", new VariableExpression(2))); renameExpressions.add(new Expression("numTuples", new VariableExpression(3))); final Apply rename = new Apply(aggregate, renameExpressions.build()); TupleSink output = new TupleSink(rename, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = Joiner.on("") .join( "download profiling sent data for (query=", subqueryId.getQueryId(), ", subquery=", subqueryId.getSubqueryId(), ", fragment=", fragmentId, ")"); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * Extracts the set of workers used in a saved, encoded physical plan. * * @param plan a {@link List<PlanFragmentEncoding>}, cached during execution. * @return the set of workers used during the execution of this subquery. */ @Nonnull private Set<Integer> getWorkersFromSubqueryPlan(final String plan) { /* We need to accumulate the workers used in the plan. We could deserialize the plan as a * List<PlanFragmentEncoding>... which it is, but for forwards and backwards compatiblity let's deserialize it as a * List<Map<String,Object>>... which it also is. */ ObjectMapper mapper = MyriaJsonMapperProvider.getMapper(); List<Map<String, Object>> fragments; Set<Integer> actualWorkers = Sets.newHashSet(); try { fragments = mapper.readValue(plan, new TypeReference<List<Map<String, Object>>>() {}); int fragIdx = 0; for (Map<String, Object> m : fragments) { Object fragWorkers = m.get("workers"); Preconditions.checkNotNull(fragWorkers, "No workers recorded for fragment %s", fragIdx); Preconditions.checkState( fragWorkers instanceof Collection<?>, "Expected fragWorkers to be a collection, instead found %s", fragWorkers.getClass()); try { @SuppressWarnings("unchecked") Collection<Integer> curWorkers = (Collection<Integer>) fragWorkers; actualWorkers.addAll(curWorkers); } catch (ClassCastException e) { throw new IllegalStateException( "Expected fragWorkers to be a collection of ints, instead found " + fragWorkers); } } } catch (IOException e) { throw new IllegalArgumentException( "Error deserializing workers from encoded plan " + plan, e); } /* Remove the MASTER from the set. */ actualWorkers.remove(MyriaConstants.MASTER_ID); return actualWorkers; } /** * Returns the set of workers that executed a particular subquery. * * @param subQueryId the subquery. * @return the set of workers that executed a particular subquery. * @throws DbException if there is an error in the catalog. */ private Set<Integer> getWorkersForSubQuery(final SubQueryId subQueryId) throws DbException { String serializedPlan; try { serializedPlan = catalog.getQueryPlan(subQueryId); } catch (CatalogException e) { throw new DbException(e); } Preconditions.checkArgument( serializedPlan != null, "No cached query plan for subquery %s", subQueryId); return getWorkersFromSubqueryPlan(serializedPlan); } /** * @param subqueryId the subquery id. * @param writer writer to get data. * @param dataSink the {@link DataSink} for the tuple destination * @return profiling logs for the query. * @throws DbException if there is an error when accessing profiling logs. */ public ListenableFuture<Query> startAggregatedSentLogDataStream( final SubQueryId subqueryId, final TupleWriter writer, final DataSink dataSink) throws DbException { Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId); final Schema schema = Schema.ofFields( "fragmentId", Type.INT_TYPE, "numTuples", Type.LONG_TYPE, "minTime", Type.LONG_TYPE, "maxTime", Type.LONG_TYPE); String sentQueryString = Joiner.on(' ') .join( "SELECT \"fragmentId\", sum(\"numTuples\") as \"numTuples\", min(\"nanoTime\") as \"minTime\", max(\"nanoTime\") as \"maxTime\" FROM", MyriaConstants.SENT_PROFILING_RELATION.toString(getDBMS()), "WHERE \"queryId\" =", subqueryId.getQueryId(), "AND \"subQueryId\" =", subqueryId.getSubqueryId(), "GROUP BY \"fragmentId\""); DbQueryScan scan = new DbQueryScan(sentQueryString, schema); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size()); for (Integer worker : actualWorkers) { workerPlans.put(worker, workerPlan); } final Consumer consumer = new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers)); final Aggregate aggregate = new Aggregate( consumer, new int[] {0}, new PrimitiveAggregatorFactory(1, AggregationOp.SUM), new PrimitiveAggregatorFactory(2, AggregationOp.MIN), new PrimitiveAggregatorFactory(3, AggregationOp.MAX)); // rename columns ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder(); renameExpressions.add(new Expression("fragmentId", new VariableExpression(0))); renameExpressions.add(new Expression("numTuples", new VariableExpression(1))); renameExpressions.add( new Expression( "duration", new MinusExpression(new VariableExpression(3), new VariableExpression(2)))); final Apply rename = new Apply(aggregate, renameExpressions.build()); TupleSink output = new TupleSink(rename, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = Joiner.on("") .join( "download profiling aggregated sent data for (query=", subqueryId.getQueryId(), ", subquery=", subqueryId.getSubqueryId(), ")"); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * @param subqueryId the desired subquery. * @param fragmentId the fragment id to return data for. All fragments, if < 0. * @param start the earliest time where we need data * @param end the latest time * @param minSpanLength minimum length of a span to be returned * @param onlyRootOperator only return data for root operator * @param writer writer to get data. * @param dataSink the {@link DataSink} for the tuple destination * @return profiling logs for the query. * @throws DbException if there is an error when accessing profiling logs. */ public QueryFuture startLogDataStream( final SubQueryId subqueryId, final long fragmentId, final long start, final long end, final long minSpanLength, final boolean onlyRootOperator, final TupleWriter writer, final DataSink dataSink) throws DbException { Preconditions.checkArgument(start < end, "range cannot be negative"); final Schema schema = Schema.ofFields( "opId", Type.INT_TYPE, "startTime", Type.LONG_TYPE, "endTime", Type.LONG_TYPE, "numTuples", Type.LONG_TYPE); Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId); String opCondition = ""; if (onlyRootOperator) { opCondition = Joiner.on(' ') .join( "AND \"opId\" = (SELECT \"opId\" FROM", MyriaConstants.EVENT_PROFILING_RELATION.toString(getDBMS()), "WHERE \"fragmentId\" =", fragmentId, " AND \"queryId\"=", subqueryId.getQueryId(), "AND \"subQueryId\" =", subqueryId.getSubqueryId(), "ORDER BY \"startTime\" ASC LIMIT 1)"); } String spanCondition = ""; if (minSpanLength > 0) { spanCondition = Joiner.on(' ').join("AND \"endTime\" - \"startTime\" >", minSpanLength); } String queryString = Joiner.on(' ') .join( "SELECT \"opId\", \"startTime\", \"endTime\", \"numTuples\" FROM", MyriaConstants.EVENT_PROFILING_RELATION.toString(getDBMS()), "WHERE \"fragmentId\" =", fragmentId, "AND \"queryId\" =", subqueryId.getQueryId(), "AND \"subQueryId\" =", subqueryId.getSubqueryId(), "AND \"endTime\" >", start, "AND \"startTime\" <", end, opCondition, spanCondition, "ORDER BY \"startTime\" ASC"); DbQueryScan scan = new DbQueryScan(queryString, schema); ImmutableList.Builder<Expression> emitExpressions = ImmutableList.builder(); emitExpressions.add(new Expression("workerId", new WorkerIdExpression())); for (int column = 0; column < schema.numColumns(); column++) { VariableExpression copy = new VariableExpression(column); emitExpressions.add(new Expression(schema.getColumnName(column), copy)); } Apply addWorkerId = new Apply(scan, emitExpressions.build()); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(addWorkerId, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size()); for (Integer worker : actualWorkers) { workerPlans.put(worker, workerPlan); } final Consumer consumer = new Consumer(addWorkerId.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers)); TupleSink output = new TupleSink(consumer, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = Joiner.on("") .join( "download profiling data (query=", subqueryId.getQueryId(), ", subquery=", subqueryId.getSubqueryId(), ", fragment=", fragmentId, ", range=[", Joiner.on(", ").join(start, end), "]", ")"); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** Upper bound on the number of bins a profiler can ask for. */ private static final long MAX_BINS = 10000; /** * @param subqueryId subquery id. * @param fragmentId the fragment id to return data for. All fragments, if < 0. * @param start start of the histogram * @param end the end of the histogram * @param step the step size between min and max * @param onlyRootOp return histogram only for root operator * @param writer writer to get data. * @param dataSink the {@link DataSink} for the tuple destination * @return profiling logs for the query. * @throws DbException if there is an error when accessing profiling logs. */ public QueryFuture startHistogramDataStream( final SubQueryId subqueryId, final long fragmentId, final long start, final long end, final long step, final boolean onlyRootOp, final TupleWriter writer, final DataSink dataSink) throws DbException { Preconditions.checkArgument(start < end, "range cannot be negative"); Preconditions.checkArgument(step > 0, "step has to be greater than 0"); long bins = (end - start + 1) / step; Preconditions.checkArgument( bins > 0 && bins <= MAX_BINS, "bins must be in the range [1, %s]", MAX_BINS); Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId); final Schema schema = Schema.ofFields("opId", Type.INT_TYPE, "nanoTime", Type.LONG_TYPE); final RelationKey relationKey = MyriaConstants.EVENT_PROFILING_RELATION; Map<String, Object> queryArgs = new HashMap<>(); queryArgs.put("QUERY", subqueryId.getQueryId()); queryArgs.put("SUBQUERY", subqueryId.getSubqueryId()); queryArgs.put("FRAGMENT", fragmentId); queryArgs.put("START", start); queryArgs.put("END", end); queryArgs.put("STEP", step); queryArgs.put("BINS", bins); queryArgs.put("PROF_TABLE", relationKey.toString(getDBMS())); StrSubstitutor sub; String filterOpnameQueryString = ""; if (onlyRootOp) { sub = new StrSubstitutor(queryArgs); filterOpnameQueryString = sub.replace( "AND p.\"opId\"=(SELECT \"opId\" FROM ${PROF_TABLE} WHERE \"fragmentId\"=${FRAGMENT} AND \"queryId\"=${QUERY} AND \"subQueryId\"=${SUBQUERY} ORDER BY \"startTime\" ASC LIMIT 1)"); } // Reinitialize the substitutor after including the opname filter. queryArgs.put("OPNAME_FILTER", filterOpnameQueryString); sub = new StrSubstitutor(queryArgs); String histogramWorkerQueryString = sub.replace( Joiner.on("\n") .join( "SELECT \"opId\", ${START}::bigint+${STEP}::bigint*s.bin as \"nanoTime\"", "FROM (", "SELECT p.\"opId\", greatest((p.\"startTime\"-1-${START}::bigint)/${STEP}::bigint, -1) as \"startBin\", least((p.\"endTime\"+1-${START}::bigint)/${STEP}::bigint, ${BINS}) AS \"endBin\"", "FROM ${PROF_TABLE} p", "WHERE p.\"queryId\" = ${QUERY} and p.\"subQueryId\" = ${SUBQUERY} and p.\"fragmentId\" = ${FRAGMENT}", "${OPNAME_FILTER}", "AND greatest((p.\"startTime\"-${START}::bigint)/${STEP}::bigint, -1) < least((p.\"endTime\"-${START}::bigint)/${STEP}::bigint, ${BINS}) AND p.\"startTime\" < ${END}::bigint AND p.\"endTime\" >= ${START}::bigint", ") times,", "generate_series(0, ${BINS}) AS s(bin)", "WHERE s.bin > times.\"startBin\" and s.bin <= times.\"endBin\";")); DbQueryScan scan = new DbQueryScan(histogramWorkerQueryString, schema); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size()); for (Integer worker : actualWorkers) { workerPlans.put(worker, workerPlan); } /* Aggregate histogram on master */ final Consumer consumer = new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers)); // sum up the number of workers working final Aggregate sumAggregate = new Aggregate( consumer, new int[] {0, 1}, new PrimitiveAggregatorFactory(1, AggregationOp.COUNT)); // rename columns ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder(); renameExpressions.add(new Expression("opId", new VariableExpression(0))); renameExpressions.add(new Expression("nanoTime", new VariableExpression(1))); renameExpressions.add(new Expression("numWorkers", new VariableExpression(2))); final Apply rename = new Apply(sumAggregate, renameExpressions.build()); TupleSink output = new TupleSink(rename, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = Joiner.on("") .join( "download profiling histogram (query=", subqueryId.getQueryId(), ", subquery=", subqueryId.getSubqueryId(), ", fragment=", fragmentId, ", range=[", Joiner.on(", ").join(start, end, step), "]", ")"); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * @param subqueryId the subquery id. * @param fragmentId the fragment id * @param writer writer to get data * @param dataSink the {@link DataSink} for the tuple destination * @return profiling logs for the query. * @throws DbException if there is an error when accessing profiling logs. */ public QueryFuture startRangeDataStream( final SubQueryId subqueryId, final long fragmentId, final TupleWriter writer, final DataSink dataSink) throws DbException { final Schema schema = Schema.ofFields("startTime", Type.LONG_TYPE, "endTime", Type.LONG_TYPE); final RelationKey relationKey = MyriaConstants.EVENT_PROFILING_RELATION; Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId); String opnameQueryString = Joiner.on(' ') .join( "SELECT min(\"startTime\"), max(\"endTime\") FROM", relationKey.toString(getDBMS()), "WHERE \"queryId\"=", subqueryId.getQueryId(), "AND \"subQueryId\"=", subqueryId.getSubqueryId(), "AND \"fragmentId\"=", fragmentId); DbQueryScan scan = new DbQueryScan(opnameQueryString, schema); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size()); for (Integer worker : actualWorkers) { workerPlans.put(worker, workerPlan); } /* Construct the master plan. */ final Consumer consumer = new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers)); // Aggregate range on master final Aggregate sumAggregate = new Aggregate( consumer, new int[] {}, new PrimitiveAggregatorFactory(0, AggregationOp.MIN), new PrimitiveAggregatorFactory(1, AggregationOp.MAX)); TupleSink output = new TupleSink(sumAggregate, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = Joiner.on("") .join( "download time range (query=", subqueryId.getQueryId(), ", subquery=", subqueryId.getSubqueryId(), ", fragment=", fragmentId, ")"); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * @param subqueryId subquery id. * @param fragmentId the fragment id to return data for. All fragments, if < 0. * @param writer writer to get data. * @param dataSink the {@link DataSink} for the tuple destination * @return contributions for operator. * @throws DbException if there is an error when accessing profiling logs. */ public QueryFuture startContributionsStream( final SubQueryId subqueryId, final long fragmentId, final TupleWriter writer, final DataSink dataSink) throws DbException { final Schema schema = Schema.ofFields("opId", Type.INT_TYPE, "nanoTime", Type.LONG_TYPE); final RelationKey relationKey = MyriaConstants.EVENT_PROFILING_RELATION; Set<Integer> actualWorkers = getWorkersForSubQuery(subqueryId); String fragIdCondition = ""; if (fragmentId >= 0) { fragIdCondition = "AND \"fragmentId\"=" + fragmentId; } String opContributionsQueryString = Joiner.on(' ') .join( "SELECT \"opId\", sum(\"endTime\" - \"startTime\") FROM ", relationKey.toString(getDBMS()), "WHERE \"queryId\"=", subqueryId.getQueryId(), "AND \"subQueryId\"=", subqueryId.getSubqueryId(), fragIdCondition, "GROUP BY \"opId\""); DbQueryScan scan = new DbQueryScan(opContributionsQueryString, schema); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(scan, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size()); for (Integer worker : actualWorkers) { workerPlans.put(worker, workerPlan); } /* Aggregate on master */ final Consumer consumer = new Consumer(scan.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers)); // sum up contributions final Aggregate sumAggregate = new Aggregate( consumer, new int[] {0}, new PrimitiveAggregatorFactory(1, AggregationOp.AVG)); // rename columns ImmutableList.Builder<Expression> renameExpressions = ImmutableList.builder(); renameExpressions.add(new Expression("opId", new VariableExpression(0))); renameExpressions.add(new Expression("nanoTime", new VariableExpression(1))); final Apply rename = new Apply(sumAggregate, renameExpressions.build()); TupleSink output = new TupleSink(rename, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = Joiner.on("") .join( "download operator contributions (query=", subqueryId.getQueryId(), ", subquery=", subqueryId.getSubqueryId(), ", fragment=", fragmentId, ")"); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * Update the {@link MasterCatalog} so that the specified relation has the specified tuple count. * * @param relation the relation to update * @param count the number of tuples in that relation * @throws DbException if there is an error in the catalog */ public void updateRelationTupleCount(final RelationKey relation, final long count) throws DbException { try { catalog.updateRelationTupleCount(relation, count); } catch (CatalogException e) { throw new DbException("updating the number of tuples in the catalog", e); } } /** * Set the global variable owned by the specified query and named by the specified key to the specified value. * * @param queryId the query to whom the variable belongs. * @param key the name of the variable * @param value the new value for the variable */ public void setQueryGlobal( final long queryId, @Nonnull final String key, @Nonnull final Object value) { Preconditions.checkNotNull(key, "key"); Preconditions.checkNotNull(value, "value"); queryManager.getQuery(queryId).setGlobal(key, value); } /** * Get the value of global variable owned by the specified query and named by the specified key. * * @param queryId the query to whom the variable belongs. * @param key the name of the variable * @return the value of the variable */ @Nullable public Object getQueryGlobal(final long queryId, @Nonnull final String key) { Preconditions.checkNotNull(key, "key"); return queryManager.getQuery(queryId).getGlobal(key); } /** * @param queryId the query id to fetch * @param writerOutput the output stream to write results to. * @throws DbException if there is an error in the database. */ public void getResourceUsage(final long queryId, final DataSink dataSink) throws DbException { Schema schema = Schema.appendColumn(MyriaConstants.RESOURCE_PROFILING_SCHEMA, Type.INT_TYPE, "workerId"); try { TupleWriter writer = new CsvTupleWriter(); TupleBuffer tb = queryManager.getResourceUsage(queryId); if (tb != null) { writer.open(dataSink.getOutputStream()); writer.writeColumnHeaders(schema.getColumnNames()); writer.writeTuples(tb); writer.done(); return; } getResourceLog(queryId, writer, dataSink); } catch (IOException e) { throw new DbException(e); } } /** * @param queryId query id. * @param writer writer to get data. * @return resource logs for the query. * @throws DbException if there is an error when accessing profiling logs. */ public ListenableFuture<Query> getResourceLog( final long queryId, final TupleWriter writer, final DataSink dataSink) throws DbException { SubQueryId sqId = new SubQueryId(queryId, 0); String serializedPlan; try { serializedPlan = catalog.getQueryPlan(sqId); } catch (CatalogException e) { throw new DbException(e); } Preconditions.checkArgument( serializedPlan != null, "No cached query plan for subquery %s", sqId); Set<Integer> actualWorkers = getWorkersFromSubqueryPlan(serializedPlan); final Schema schema = MyriaConstants.RESOURCE_PROFILING_SCHEMA; String resourceQueryString = Joiner.on(' ') .join( "SELECT * from", MyriaConstants.RESOURCE_PROFILING_RELATION.toString(getDBMS()), "WHERE \"queryId\" =", queryId); DbQueryScan scan = new DbQueryScan(resourceQueryString, schema); ImmutableList.Builder<Expression> emitExpressions = ImmutableList.builder(); for (int column = 0; column < schema.numColumns(); column++) { VariableExpression copy = new VariableExpression(column); emitExpressions.add(new Expression(schema.getColumnName(column), copy)); } emitExpressions.add(new Expression("workerId", new WorkerIdExpression())); Apply addWorkerId = new Apply(scan, emitExpressions.build()); final ExchangePairID operatorId = ExchangePairID.newID(); CollectProducer producer = new CollectProducer(addWorkerId, operatorId, MyriaConstants.MASTER_ID); SubQueryPlan workerPlan = new SubQueryPlan(producer); Map<Integer, SubQueryPlan> workerPlans = new HashMap<>(actualWorkers.size()); for (Integer worker : actualWorkers) { workerPlans.put(worker, workerPlan); } final Consumer consumer = new Consumer(addWorkerId.getSchema(), operatorId, ImmutableSet.copyOf(actualWorkers)); TupleSink output = new TupleSink(consumer, writer, dataSink); final SubQueryPlan masterPlan = new SubQueryPlan(output); /* Submit the plan for the download. */ String planString = Joiner.on("").join("download resource log for (query=", queryId, ")"); try { return queryManager.submitQuery(planString, planString, planString, masterPlan, workerPlans); } catch (CatalogException e) { throw new DbException(e); } } /** * Record the fact that this subquery executed this in the catalog. * * @param subQueryId the id of the subquery. * @param encodedPlan the plan. * @throws DbException if there is an error in the catalog. */ public void setQueryPlan(final SubQueryId subQueryId, @Nonnull final String encodedPlan) throws DbException { try { catalog.setQueryPlan(subQueryId, encodedPlan); } catch (CatalogException e) { throw new DbException(e); } } /** * @param subQueryId the query whose plan to look up. * @return the execution plan for this query. * @throws DbException if there is an error getting the query status. */ @Nullable public String getQueryPlan(@Nonnull final SubQueryId subQueryId) throws DbException { try { return catalog.getQueryPlan(subQueryId); } catch (CatalogException e) { throw new DbException(e); } } /** @return the master catalog. */ public MasterCatalog getCatalog() { return catalog; } /** * @return the perfenforce driver */ public PerfEnforceDriver getPerfEnforceDriver() { return perfEnforceDriver; } }