MeshQueryMaster.java example

Explorer
hydra-master
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.query;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.io.File;
import java.io.IOException;

import java.net.InetSocketAddress;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

import com.addthis.basis.util.LessStreams;
import com.addthis.basis.util.LessFiles;
import com.addthis.basis.util.Parameter;

import com.addthis.hydra.data.query.Query;
import com.addthis.hydra.data.query.QueryException;
import com.addthis.hydra.query.aggregate.BalancedAllocator;
import com.addthis.hydra.query.aggregate.DefaultTaskAllocators;
import com.addthis.hydra.query.aggregate.MeshSourceAggregator;
import com.addthis.hydra.query.aggregate.QueryTaskSource;
import com.addthis.hydra.query.aggregate.QueryTaskSourceOption;
import com.addthis.hydra.query.loadbalance.WorkerData;
import com.addthis.hydra.query.loadbalance.WorkerTracker;
import com.addthis.hydra.query.spawndatastore.SpawnDataStoreHandler;
import com.addthis.hydra.query.tracker.QueryTracker;
import com.addthis.hydra.query.tracker.TrackerHandler;
import com.addthis.meshy.MeshyServer;
import com.addthis.meshy.service.file.FileReference;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.DiscreteDomain;
import com.google.common.collect.Multimap;
import com.google.common.collect.Range;
import com.google.common.collect.Sets;
import com.google.common.primitives.Ints;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.netty.channel.ChannelHandler;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelOutboundHandlerAdapter;
import io.netty.channel.ChannelPromise;

@ChannelHandler.Sharable
public class MeshQueryMaster extends ChannelOutboundHandlerAdapter implements AutoCloseable {

    private static final Logger log = LoggerFactory.getLogger(MeshQueryMaster.class);

    private static final String  tempDir         = Parameter.value("query.tmpdir", "query.tmpdir");
    private static final int     meshPort        = Parameter.intValue("qmaster.mesh.port", 5100);
    private static final String  meshRoot        = Parameter.value("qmaster.mesh.root", "/home/hydra");
    private static final String  meshPeers       = Parameter.value("qmaster.mesh.peers", "localhost");
    private static final int     meshPeerPort    = Parameter.intValue("qmaster.mesh.peer.port", 5101);
    private static final boolean enableZooKeeper = Parameter.boolValue("qmaster.enableZooKeeper", true);

    private static final QueryTaskSource EMPTY_TASK_SOURCE = new QueryTaskSource(new QueryTaskSourceOption[0]);

    /**
     * used for tracking metrics and other interesting things about queries that we have run.
     * Provides insight into currently running queries and gives ability to cancel a query before it completes.
     */
    private final QueryTracker tracker;

    /** Primary Mesh server */
    private final MeshyServer meshy;

    /** Abstracts away spawndatastore-reliant functions */
    private final SpawnDataStoreHandler spawnDataStoreHandler;

    /** Mesh FileRef Cache -- backed by a loading cache */
    private final MeshFileRefCache cachey;

    private final WorkerTracker worky;
    private final DefaultTaskAllocators allocators;

    public MeshQueryMaster(QueryTracker tracker) throws Exception {
        this.tracker = tracker;

        meshy = new MeshyServer(meshPort, new File(meshRoot));
        cachey = new MeshFileRefCache(meshy);
        worky = new WorkerTracker();
        allocators = new DefaultTaskAllocators(new BalancedAllocator(worky));
        connectToMeshPeers();

        try {
            // Delete the tmp directory (disk sort directory)
            File tempDirFile = new File(tempDir).getCanonicalFile();
            LessFiles.deleteDir(tempDirFile);
            LessFiles.initDirectory(tempDirFile);
        } catch (Exception e) {
            log.warn("Error while cleaning / locating the temp directory (for disk sorts).", e);
        }

        if (enableZooKeeper) {
            spawnDataStoreHandler = new SpawnDataStoreHandler();
        } else {
            spawnDataStoreHandler = null;
        }
    }

    public SpawnDataStoreHandler getSpawnDataStoreHandler() {
        return spawnDataStoreHandler;
    }

    public DefaultTaskAllocators allocators() {
        return allocators;
    }

    public WorkerTracker worky() {
        return worky;
    }

    public QueryTracker getQueryTracker() {
        return tracker;
    }

    @Override public void close() {
        try {
            if (spawnDataStoreHandler != null) {
                spawnDataStoreHandler.close();
            }
            meshy.close();
        } catch (Exception e) {
            log.error("arbitrary exception during mqmaster shutdown", e);
        }
    }

    private void connectToMeshPeers() {
        if (meshPeers != null) {
            String[] peers = meshPeers.split(",");
            for (String peer : peers) {
                meshy.connectPeer(new InetSocketAddress(peer, meshPeerPort));
            }
        }
    }

    public void handleError(Query query) {
        String job = query.getJob();
        if (job != null) {
            cachey.invalidate(job);
        }
    }

    @Override
    public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) throws Exception {
        if (msg instanceof Query) {
            writeQuery(ctx, (Query) msg, promise);
        } else {
            super.write(ctx, msg, promise);
        }
    }

    private static String getJobSubdirectory(String combinedJob) {
        int dirIndex = combinedJob.indexOf('/');
        if (dirIndex > -1) {
            return combinedJob.substring(dirIndex + 1);
        } else {
            return "";
        }
    }

    private static String getJobWithoutSubdirectory(String combinedJob) {
        int dirIndex = combinedJob.indexOf('/');
        if (dirIndex > -1) {
            return combinedJob.substring(0, dirIndex);
        } else {
            return combinedJob;
        }
    }

    private static final Splitter JOB_SPLITTER = Splitter.on(',');

    protected void writeQuery(ChannelHandlerContext ctx, Query query, ChannelPromise promise) throws Exception {
        // log rops prior to mutating query
        String[] opsLog = query.getOps();
        // creates query for worker and updates local query ops (!mutates query!)
        // query and remoteQuery are exact copies except that: query's ops contains the first element of the original,
        // and remoteQuery's ops contains the rest (i.e. query contains ops, and remoteQuery contains rops)
        Query remoteQuery = query.createPipelinedQuery();


        boolean allowPartial = Boolean.valueOf(query.getParameter("allowPartial"));
        Set<Integer> tasks = parseTasks(query.getParameter("tasks"));
        List<QueryTaskSource[]> sourcesPerDir = new ArrayList<>(2);
        for (String combinedUnresolved : JOB_SPLITTER.split(query.getJob())) {
            String jobIdOrAlias = getJobWithoutSubdirectory(combinedUnresolved);
            String subdirectory = getJobSubdirectory(combinedUnresolved);
            for (String resolved : expandAlias(jobIdOrAlias)) {
                String resolvedJobId = getJobWithoutSubdirectory(resolved);
                String resolvedSubdirectory;
                if (!subdirectory.isEmpty()) {
                    resolvedSubdirectory = subdirectory;
                } else {
                    resolvedSubdirectory = getJobSubdirectory(resolved);
                }

                sourcesPerDir.add(getSourcesById(resolvedJobId, resolvedSubdirectory, allowPartial, tasks));
            }
        }
        QueryTaskSource[] sourcesByTaskID;
        if (sourcesPerDir.size() > 1) {
            sourcesByTaskID = sourcesPerDir.stream().flatMap(Arrays::stream).toArray(QueryTaskSource[]::new);
        } else {
            sourcesByTaskID = sourcesPerDir.get(0);
        }

        MeshSourceAggregator aggregator = new MeshSourceAggregator(sourcesByTaskID, meshy, this, remoteQuery);
        ctx.pipeline().addLast(ctx.executor(), "query aggregator", aggregator);
        TrackerHandler trackerHandler = new TrackerHandler(tracker, opsLog, aggregator);
        ctx.pipeline().addLast(ctx.executor(), "query tracker", trackerHandler);
        ctx.pipeline().remove(this);
        ctx.pipeline().write(query, promise);
    }

    private static final Splitter TASKS_SPLITTER = Splitter.on(',').trimResults();

    @Nonnull private static Set<Integer> parseTasks(@Nullable String tasks) {
        if (Strings.isNullOrEmpty(tasks)) {
            return Collections.emptySet();
        } else {
            return LessStreams.stream(TASKS_SPLITTER.split(tasks))
                              .map(Ints::tryParse)
                              .filter(i -> i != null)
                              .collect(Collectors.toSet());
        }
    }

    private List<String> expandAlias(String jobId) {
        if (spawnDataStoreHandler != null) {
            return spawnDataStoreHandler.expandAlias(jobId);
        } else {
            return Collections.singletonList(jobId);
        }
    }

    /**
     * @param requestedTasks    only query these task ids. empty means query all tasks.
     */
    private QueryTaskSource[] getSourcesById(String jobId,
                                             String subdirectory,
                                             boolean allowPartial,
                                             Set<Integer> requestedTasks) {
        if (spawnDataStoreHandler != null) {
            spawnDataStoreHandler.validateJobForQuery(jobId);
        }

        String combinedJob;
        if (!subdirectory.isEmpty()) {
            combinedJob = jobId + '/' + subdirectory;
        } else {
            combinedJob = jobId;
        }

        Multimap<Integer, FileReference> fileReferenceMap;
        try {
            fileReferenceMap = cachey.get(combinedJob);
        } catch (ExecutionException e) {
            log.warn("", e);
            throw new QueryException("Exception getting file references: " + e.getMessage());
        }
        if ((fileReferenceMap == null) || fileReferenceMap.isEmpty()) {
            cachey.invalidate(combinedJob);
            throw new QueryException("[MeshQueryMaster] No file references found for job: " + combinedJob);
        }

        int canonicalTaskCount;
        try {
            canonicalTaskCount = validateRequestedTasks(jobId, fileReferenceMap.keySet(), requestedTasks, allowPartial);
        } catch (Exception ex) {
            cachey.invalidate(combinedJob);
            throw ex;
        }

        QueryTaskSource[] sourcesByTaskID = new QueryTaskSource[canonicalTaskCount];
        for (int taskId = 0; taskId < canonicalTaskCount; taskId++) {
            Collection<FileReference> sourceOptions = fileReferenceMap.get(taskId);
            if (!sourceOptions.isEmpty() && (requestedTasks.isEmpty() || requestedTasks.contains(taskId))) {
                QueryTaskSourceOption[] taskSourceOptions = new QueryTaskSourceOption[sourceOptions.size()];
                int taskSourceOptionsIndex = 0;
                for (FileReference queryReference : sourceOptions) {
                    WorkerData workerData = worky.get(queryReference.getHostUUID());
                    taskSourceOptions[taskSourceOptionsIndex] =
                            new QueryTaskSourceOption(queryReference, workerData.queryLeases);
                    taskSourceOptionsIndex += 1;
                }
                sourcesByTaskID[taskId] = new QueryTaskSource(taskSourceOptions);
            } else {
                sourcesByTaskID[taskId] = EMPTY_TASK_SOURCE;
            }
        }

        return sourcesByTaskID;
    }

    @VisibleForTesting
    protected int validateRequestedTasks(String jobId,
                                       Set<Integer> availableTasks,
                                       Set<Integer> requestedTasks,
                                       boolean allowPartial) {
        int canonicalTasks;
        if (spawnDataStoreHandler != null) {
            canonicalTasks = spawnDataStoreHandler.getCononicalTaskCount(jobId);
        } else {
            // the best guess is that there are at least max_available_task_id + 1 tasks
            canonicalTasks = Collections.max(availableTasks) + 1;
        }
        validateRequestedTasks(canonicalTasks, availableTasks, requestedTasks, allowPartial);
        return canonicalTasks;
    }

    /**
     * Validates if all requested tasks are available.
     *
     * @param canonicalTaskCount    total number of tasks.
     * @param availableTasks        available task ids.
     * @param tasks                 requested tasks ids. If empty, all tasks are requested, i.e. 0 to
     *                              {@code canonicalTaskCount-1}
     */
    private void validateRequestedTasks(int canonicalTaskCount,
                                        Set<Integer> availableTasks,
                                        Set<Integer> tasks,
                                        boolean allowPartial) {
        if (availableTasks.size() != canonicalTaskCount) {
            Set<Integer> requestedTasks = expandRequestedTasks(tasks, canonicalTaskCount);
            Set<Integer> missingTasks = new TreeSet<>(Sets.difference(requestedTasks, availableTasks));
            if (!allowPartial && !missingTasks.isEmpty()) {
                // if allowPartial = false, fail if any requested task is unavailable
                throw new QueryException("Did not find data for all " + requestedTasks.size() +
                                         " requested tasks (and allowPartial is off): " + availableTasks.size() +
                                         " available out of " + canonicalTaskCount + " total. Missing the following " +
                                         missingTasks.size() + " tasks: " + missingTasks);
            } else if (allowPartial && requestedTasks.size() == missingTasks.size()) {
                // if allowPartial = true, fail only if all requested tasks are unavailable
                throw new QueryException("Did not find data for any of the " + requestedTasks.size() +
                                         " requested tasks (and allowPartial is on): " + availableTasks.size() +
                                         " available out of " + canonicalTaskCount + " total. Missing the following " +
                                         missingTasks.size() + " tasks: " + missingTasks);
            }
        }
    }

    /** Returns the specified requested tasks as is if not empty, or expands to all known tasks if it is empty. */
    private Set<Integer> expandRequestedTasks(Set<Integer> tasks, int canonicalTaskCount) {
        if (tasks.isEmpty()) {
            return ContiguousSet.create(Range.closedOpen(0, canonicalTaskCount), DiscreteDomain.integers());
        } else {
            return tasks;
        }
    }

    /**
     * Called after MeshSourceAggregator detects that one of the FileReferences in the cache is invalid/out of date.
     * Look for an alternate FileReferenceWrapper in the cache. If none exists, fetch a replacement via a mesh lookup.
     *
     * @param failedReference The FileReference that threw the exception
     * @return A replacement FileReference, which is also placed into the cache if it was newly generated
     * @throws IOException If there is a problem fetching a replacement FileReference
     */
    public QueryTaskSourceOption getReplacementQueryTaskOption(FileReference failedReference)
            throws IOException, ExecutionException, InterruptedException {
        List<String> pathTokens = tokenizePath(failedReference.name);
        String job = getJobFromPath(pathTokens);
        int task = getTaskFromPath(pathTokens);

        Set<FileReference> oldReferences = cachey.getTaskReferencesIfPresent(job, task);
        Set<FileReference> newReferences = new HashSet<>(oldReferences);
        newReferences.remove(failedReference);
        if (newReferences.isEmpty()) {
            // there was no replacement fileReference in the cache, so we need to fetch a new one
            FileReference freshFileReference = cachey.getFileReferenceForSingleTask(job, task);
            newReferences.add(freshFileReference);
        }
        cachey.updateFileReferenceForTask(job, task, newReferences);
        FileReference cachedReplacement = newReferences.iterator().next();
        WorkerData workerData = worky.get(cachedReplacement.getHostUUID());
        return new QueryTaskSourceOption(cachedReplacement, workerData.queryLeases);
    }

    // omit empty strings so that we don't have to worry about random "//" instead of "/" or leading "/"s
    private static final Splitter FILEREF_PATH_SPLITTER = Splitter.on('/').omitEmptyStrings().limit(5);

    @VisibleForTesting static List<String> tokenizePath(String path) {
       return FILEREF_PATH_SPLITTER.splitToList(path);
    }

    @VisibleForTesting static String getJobFromPath(List<String> pathTokens) {
        String jobId = pathTokens.get(1);
        String jobDirWithSuffix = pathTokens.get(4);
        String jobDir = jobDirWithSuffix.substring(0, jobDirWithSuffix.length() - 6);
        return jobId + '/' + jobDir;
    }

    @VisibleForTesting static int getTaskFromPath(List<String> pathTokens) {
        return Integer.parseInt(pathTokens.get(2));
    }
}