/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.query; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import com.addthis.basis.util.LessStreams; import com.addthis.basis.util.LessFiles; import com.addthis.basis.util.Parameter; import com.addthis.hydra.data.query.Query; import com.addthis.hydra.data.query.QueryException; import com.addthis.hydra.query.aggregate.BalancedAllocator; import com.addthis.hydra.query.aggregate.DefaultTaskAllocators; import com.addthis.hydra.query.aggregate.MeshSourceAggregator; import com.addthis.hydra.query.aggregate.QueryTaskSource; import com.addthis.hydra.query.aggregate.QueryTaskSourceOption; import com.addthis.hydra.query.loadbalance.WorkerData; import com.addthis.hydra.query.loadbalance.WorkerTracker; import com.addthis.hydra.query.spawndatastore.SpawnDataStoreHandler; import com.addthis.hydra.query.tracker.QueryTracker; import com.addthis.hydra.query.tracker.TrackerHandler; import com.addthis.meshy.MeshyServer; import com.addthis.meshy.service.file.FileReference; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ContiguousSet; import com.google.common.collect.DiscreteDomain; import com.google.common.collect.Multimap; import com.google.common.collect.Range; import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.netty.channel.ChannelHandler; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelOutboundHandlerAdapter; import io.netty.channel.ChannelPromise; @ChannelHandler.Sharable public class MeshQueryMaster extends ChannelOutboundHandlerAdapter implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(MeshQueryMaster.class); private static final String tempDir = Parameter.value("query.tmpdir", "query.tmpdir"); private static final int meshPort = Parameter.intValue("qmaster.mesh.port", 5100); private static final String meshRoot = Parameter.value("qmaster.mesh.root", "/home/hydra"); private static final String meshPeers = Parameter.value("qmaster.mesh.peers", "localhost"); private static final int meshPeerPort = Parameter.intValue("qmaster.mesh.peer.port", 5101); private static final boolean enableZooKeeper = Parameter.boolValue("qmaster.enableZooKeeper", true); private static final QueryTaskSource EMPTY_TASK_SOURCE = new QueryTaskSource(new QueryTaskSourceOption[0]); /** * used for tracking metrics and other interesting things about queries that we have run. * Provides insight into currently running queries and gives ability to cancel a query before it completes. */ private final QueryTracker tracker; /** Primary Mesh server */ private final MeshyServer meshy; /** Abstracts away spawndatastore-reliant functions */ private final SpawnDataStoreHandler spawnDataStoreHandler; /** Mesh FileRef Cache -- backed by a loading cache */ private final MeshFileRefCache cachey; private final WorkerTracker worky; private final DefaultTaskAllocators allocators; public MeshQueryMaster(QueryTracker tracker) throws Exception { this.tracker = tracker; meshy = new MeshyServer(meshPort, new File(meshRoot)); cachey = new MeshFileRefCache(meshy); worky = new WorkerTracker(); allocators = new DefaultTaskAllocators(new BalancedAllocator(worky)); connectToMeshPeers(); try { // Delete the tmp directory (disk sort directory) File tempDirFile = new File(tempDir).getCanonicalFile(); LessFiles.deleteDir(tempDirFile); LessFiles.initDirectory(tempDirFile); } catch (Exception e) { log.warn("Error while cleaning / locating the temp directory (for disk sorts).", e); } if (enableZooKeeper) { spawnDataStoreHandler = new SpawnDataStoreHandler(); } else { spawnDataStoreHandler = null; } } public SpawnDataStoreHandler getSpawnDataStoreHandler() { return spawnDataStoreHandler; } public DefaultTaskAllocators allocators() { return allocators; } public WorkerTracker worky() { return worky; } public QueryTracker getQueryTracker() { return tracker; } @Override public void close() { try { if (spawnDataStoreHandler != null) { spawnDataStoreHandler.close(); } meshy.close(); } catch (Exception e) { log.error("arbitrary exception during mqmaster shutdown", e); } } private void connectToMeshPeers() { if (meshPeers != null) { String[] peers = meshPeers.split(","); for (String peer : peers) { meshy.connectPeer(new InetSocketAddress(peer, meshPeerPort)); } } } public void handleError(Query query) { String job = query.getJob(); if (job != null) { cachey.invalidate(job); } } @Override public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) throws Exception { if (msg instanceof Query) { writeQuery(ctx, (Query) msg, promise); } else { super.write(ctx, msg, promise); } } private static String getJobSubdirectory(String combinedJob) { int dirIndex = combinedJob.indexOf('/'); if (dirIndex > -1) { return combinedJob.substring(dirIndex + 1); } else { return ""; } } private static String getJobWithoutSubdirectory(String combinedJob) { int dirIndex = combinedJob.indexOf('/'); if (dirIndex > -1) { return combinedJob.substring(0, dirIndex); } else { return combinedJob; } } private static final Splitter JOB_SPLITTER = Splitter.on(','); protected void writeQuery(ChannelHandlerContext ctx, Query query, ChannelPromise promise) throws Exception { // log rops prior to mutating query String[] opsLog = query.getOps(); // creates query for worker and updates local query ops (!mutates query!) // query and remoteQuery are exact copies except that: query's ops contains the first element of the original, // and remoteQuery's ops contains the rest (i.e. query contains ops, and remoteQuery contains rops) Query remoteQuery = query.createPipelinedQuery(); boolean allowPartial = Boolean.valueOf(query.getParameter("allowPartial")); Set<Integer> tasks = parseTasks(query.getParameter("tasks")); List<QueryTaskSource[]> sourcesPerDir = new ArrayList<>(2); for (String combinedUnresolved : JOB_SPLITTER.split(query.getJob())) { String jobIdOrAlias = getJobWithoutSubdirectory(combinedUnresolved); String subdirectory = getJobSubdirectory(combinedUnresolved); for (String resolved : expandAlias(jobIdOrAlias)) { String resolvedJobId = getJobWithoutSubdirectory(resolved); String resolvedSubdirectory; if (!subdirectory.isEmpty()) { resolvedSubdirectory = subdirectory; } else { resolvedSubdirectory = getJobSubdirectory(resolved); } sourcesPerDir.add(getSourcesById(resolvedJobId, resolvedSubdirectory, allowPartial, tasks)); } } QueryTaskSource[] sourcesByTaskID; if (sourcesPerDir.size() > 1) { sourcesByTaskID = sourcesPerDir.stream().flatMap(Arrays::stream).toArray(QueryTaskSource[]::new); } else { sourcesByTaskID = sourcesPerDir.get(0); } MeshSourceAggregator aggregator = new MeshSourceAggregator(sourcesByTaskID, meshy, this, remoteQuery); ctx.pipeline().addLast(ctx.executor(), "query aggregator", aggregator); TrackerHandler trackerHandler = new TrackerHandler(tracker, opsLog, aggregator); ctx.pipeline().addLast(ctx.executor(), "query tracker", trackerHandler); ctx.pipeline().remove(this); ctx.pipeline().write(query, promise); } private static final Splitter TASKS_SPLITTER = Splitter.on(',').trimResults(); @Nonnull private static Set<Integer> parseTasks(@Nullable String tasks) { if (Strings.isNullOrEmpty(tasks)) { return Collections.emptySet(); } else { return LessStreams.stream(TASKS_SPLITTER.split(tasks)) .map(Ints::tryParse) .filter(i -> i != null) .collect(Collectors.toSet()); } } private List<String> expandAlias(String jobId) { if (spawnDataStoreHandler != null) { return spawnDataStoreHandler.expandAlias(jobId); } else { return Collections.singletonList(jobId); } } /** * @param requestedTasks only query these task ids. empty means query all tasks. */ private QueryTaskSource[] getSourcesById(String jobId, String subdirectory, boolean allowPartial, Set<Integer> requestedTasks) { if (spawnDataStoreHandler != null) { spawnDataStoreHandler.validateJobForQuery(jobId); } String combinedJob; if (!subdirectory.isEmpty()) { combinedJob = jobId + '/' + subdirectory; } else { combinedJob = jobId; } Multimap<Integer, FileReference> fileReferenceMap; try { fileReferenceMap = cachey.get(combinedJob); } catch (ExecutionException e) { log.warn("", e); throw new QueryException("Exception getting file references: " + e.getMessage()); } if ((fileReferenceMap == null) || fileReferenceMap.isEmpty()) { cachey.invalidate(combinedJob); throw new QueryException("[MeshQueryMaster] No file references found for job: " + combinedJob); } int canonicalTaskCount; try { canonicalTaskCount = validateRequestedTasks(jobId, fileReferenceMap.keySet(), requestedTasks, allowPartial); } catch (Exception ex) { cachey.invalidate(combinedJob); throw ex; } QueryTaskSource[] sourcesByTaskID = new QueryTaskSource[canonicalTaskCount]; for (int taskId = 0; taskId < canonicalTaskCount; taskId++) { Collection<FileReference> sourceOptions = fileReferenceMap.get(taskId); if (!sourceOptions.isEmpty() && (requestedTasks.isEmpty() || requestedTasks.contains(taskId))) { QueryTaskSourceOption[] taskSourceOptions = new QueryTaskSourceOption[sourceOptions.size()]; int taskSourceOptionsIndex = 0; for (FileReference queryReference : sourceOptions) { WorkerData workerData = worky.get(queryReference.getHostUUID()); taskSourceOptions[taskSourceOptionsIndex] = new QueryTaskSourceOption(queryReference, workerData.queryLeases); taskSourceOptionsIndex += 1; } sourcesByTaskID[taskId] = new QueryTaskSource(taskSourceOptions); } else { sourcesByTaskID[taskId] = EMPTY_TASK_SOURCE; } } return sourcesByTaskID; } @VisibleForTesting protected int validateRequestedTasks(String jobId, Set<Integer> availableTasks, Set<Integer> requestedTasks, boolean allowPartial) { int canonicalTasks; if (spawnDataStoreHandler != null) { canonicalTasks = spawnDataStoreHandler.getCononicalTaskCount(jobId); } else { // the best guess is that there are at least max_available_task_id + 1 tasks canonicalTasks = Collections.max(availableTasks) + 1; } validateRequestedTasks(canonicalTasks, availableTasks, requestedTasks, allowPartial); return canonicalTasks; } /** * Validates if all requested tasks are available. * * @param canonicalTaskCount total number of tasks. * @param availableTasks available task ids. * @param tasks requested tasks ids. If empty, all tasks are requested, i.e. 0 to * {@code canonicalTaskCount-1} */ private void validateRequestedTasks(int canonicalTaskCount, Set<Integer> availableTasks, Set<Integer> tasks, boolean allowPartial) { if (availableTasks.size() != canonicalTaskCount) { Set<Integer> requestedTasks = expandRequestedTasks(tasks, canonicalTaskCount); Set<Integer> missingTasks = new TreeSet<>(Sets.difference(requestedTasks, availableTasks)); if (!allowPartial && !missingTasks.isEmpty()) { // if allowPartial = false, fail if any requested task is unavailable throw new QueryException("Did not find data for all " + requestedTasks.size() + " requested tasks (and allowPartial is off): " + availableTasks.size() + " available out of " + canonicalTaskCount + " total. Missing the following " + missingTasks.size() + " tasks: " + missingTasks); } else if (allowPartial && requestedTasks.size() == missingTasks.size()) { // if allowPartial = true, fail only if all requested tasks are unavailable throw new QueryException("Did not find data for any of the " + requestedTasks.size() + " requested tasks (and allowPartial is on): " + availableTasks.size() + " available out of " + canonicalTaskCount + " total. Missing the following " + missingTasks.size() + " tasks: " + missingTasks); } } } /** Returns the specified requested tasks as is if not empty, or expands to all known tasks if it is empty. */ private Set<Integer> expandRequestedTasks(Set<Integer> tasks, int canonicalTaskCount) { if (tasks.isEmpty()) { return ContiguousSet.create(Range.closedOpen(0, canonicalTaskCount), DiscreteDomain.integers()); } else { return tasks; } } /** * Called after MeshSourceAggregator detects that one of the FileReferences in the cache is invalid/out of date. * Look for an alternate FileReferenceWrapper in the cache. If none exists, fetch a replacement via a mesh lookup. * * @param failedReference The FileReference that threw the exception * @return A replacement FileReference, which is also placed into the cache if it was newly generated * @throws IOException If there is a problem fetching a replacement FileReference */ public QueryTaskSourceOption getReplacementQueryTaskOption(FileReference failedReference) throws IOException, ExecutionException, InterruptedException { List<String> pathTokens = tokenizePath(failedReference.name); String job = getJobFromPath(pathTokens); int task = getTaskFromPath(pathTokens); Set<FileReference> oldReferences = cachey.getTaskReferencesIfPresent(job, task); Set<FileReference> newReferences = new HashSet<>(oldReferences); newReferences.remove(failedReference); if (newReferences.isEmpty()) { // there was no replacement fileReference in the cache, so we need to fetch a new one FileReference freshFileReference = cachey.getFileReferenceForSingleTask(job, task); newReferences.add(freshFileReference); } cachey.updateFileReferenceForTask(job, task, newReferences); FileReference cachedReplacement = newReferences.iterator().next(); WorkerData workerData = worky.get(cachedReplacement.getHostUUID()); return new QueryTaskSourceOption(cachedReplacement, workerData.queryLeases); } // omit empty strings so that we don't have to worry about random "//" instead of "/" or leading "/"s private static final Splitter FILEREF_PATH_SPLITTER = Splitter.on('/').omitEmptyStrings().limit(5); @VisibleForTesting static List<String> tokenizePath(String path) { return FILEREF_PATH_SPLITTER.splitToList(path); } @VisibleForTesting static String getJobFromPath(List<String> pathTokens) { String jobId = pathTokens.get(1); String jobDirWithSuffix = pathTokens.get(4); String jobDir = jobDirWithSuffix.substring(0, jobDirWithSuffix.length() - 6); return jobId + '/' + jobDir; } @VisibleForTesting static int getTaskFromPath(List<String> pathTokens) { return Integer.parseInt(pathTokens.get(2)); } }