/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.imhotep.client; import com.google.common.base.Predicates; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import com.google.common.primitives.Longs; import com.indeed.util.core.Pair; import com.indeed.imhotep.DatasetInfo; import com.indeed.imhotep.RemoteImhotepMultiSession; import com.indeed.imhotep.ImhotepRemoteSession; import com.indeed.imhotep.ImhotepStatusDump; import com.indeed.imhotep.ShardInfo; import com.indeed.imhotep.api.ImhotepSession; import org.apache.log4j.Logger; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; /** * @author jsgroth */ public class ImhotepClient implements Closeable { private static final Logger log = Logger.getLogger(ImhotepClient.class); private final HostsReloader hostsSource; private final ExecutorService rpcExecutor; private final ScheduledExecutorService reloader; private final ImhotepClientShardListReloader shardListReloader; /** * create an imhotep client that will periodically reload its list of hosts from a text file * @param hostsFile hosts file */ public ImhotepClient(String hostsFile) { this(new FileHostsReloader(hostsFile)); } /** * create an imhotep client with a static list of hosts * @param hosts list of hosts */ public ImhotepClient(List<Host> hosts) { this(new DummyHostsReloader(hosts)); } public ImhotepClient(String zkNodes, boolean readHostsBeforeReturning) { this(new ZkHostsReloader(zkNodes, readHostsBeforeReturning)); } public ImhotepClient(String zkNodes, String zkPath, boolean readHostsBeforeReturning) { this(new ZkHostsReloader(zkNodes, zkPath, readHostsBeforeReturning)); } public ImhotepClient(HostsReloader hostsSource) { this.hostsSource = hostsSource; rpcExecutor = Executors.newCachedThreadPool(new ThreadFactory() { @Override public Thread newThread(Runnable r) { final Thread t = new Thread(r, "ImhotepClient.RPCThread"); t.setDaemon(true); return t; } }); reloader = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() { @Override public Thread newThread(Runnable r) { final Thread t = new Thread(r, "ImhotepClient.Reloader"); t.setDaemon(true); return t; } }); reloader.scheduleAtFixedRate(hostsSource, 60L, 60L, TimeUnit.SECONDS); shardListReloader = new ImhotepClientShardListReloader(hostsSource, rpcExecutor); shardListReloader.run(); reloader.scheduleAtFixedRate(shardListReloader, 60L, 60L, TimeUnit.SECONDS); } public Map<Host, List<DatasetInfo>> getShardList() { return shardListReloader.getShardList(); } // convenience methods public Map<String, DatasetInfo> getDatasetToShardList() { final Map<Host, List<DatasetInfo>> shardListMap = getShardList(); final Map<String, DatasetInfo> ret = Maps.newHashMap(); for (final List<DatasetInfo> datasetList : shardListMap.values()) { for (final DatasetInfo dataset : datasetList) { DatasetInfo current = ret.get(dataset.getDataset()); if (current == null) { ret.put(dataset.getDataset(), current = new DatasetInfo(dataset.getDataset(), new HashSet<ShardInfo>(), new HashSet<String>(), new HashSet<String>(), new HashSet<String>())); } current.getShardList().addAll(dataset.getShardList()); current.getIntFields().addAll(dataset.getIntFields()); current.getStringFields().addAll(dataset.getStringFields()); current.getMetrics().addAll(dataset.getMetrics()); } } return ret; } public List<String> getShardList(final String dataset) { return getShardList(dataset, new AcceptAllShardFilter()); } public List<String> getShardList(final String dataset, final ShardFilter filterFunc) { final Map<Host, List<DatasetInfo>> shardListMap = getShardList(); final SortedSet<String> set = new TreeSet<String>(); for (final List<DatasetInfo> datasetList : shardListMap.values()) { for (final DatasetInfo datasetInfo : datasetList) { for (final ShardInfo shard : datasetInfo.getShardList()) { if (dataset.equals(shard.dataset) && filterFunc.accept(shard)) { set.add(shard.shardId); } } } } return new ArrayList<String>(set); } public List<ShardIdWithVersion> getShardListWithVersion(final String dataset, final ShardFilter filterFunc) { final Map<Host, List<DatasetInfo>> shardListMap = getShardList(); final Map<String,Long> latestVersionMap = new HashMap<String, Long>(); for (final List<DatasetInfo> datasetList : shardListMap.values()) { for (final DatasetInfo datasetInfo : datasetList) { for (final ShardInfo shard : datasetInfo.getShardList()) { if (dataset.equals(shard.dataset) && filterFunc.accept(shard)) { //is in time range, check version if(!latestVersionMap.containsKey(shard.shardId) || latestVersionMap.get(shard.shardId) < shard.version) { latestVersionMap.put(shard.shardId, shard.version); } } } } } final List<ShardIdWithVersion> ret = Lists.newArrayListWithCapacity(latestVersionMap.size()); for (final Map.Entry<String, Long> e : latestVersionMap.entrySet()) { ret.add(new ShardIdWithVersion(e.getKey(), e.getValue())); } Collections.sort(ret); return ret; } /** * Returns a list of non-overlapping Imhotep shards for the specified dataset and time range. * Shards in the list are sorted chronologically. */ public List<ShardIdWithVersion> findShardsForTimeRange(String dataset, final DateTime start, final DateTime end) { // get shards intersecting with (start,end) time range final List<ShardIdWithVersion> shardsForTime = getShardListWithVersion(dataset, new DateRangeShardFilter(start, end)); return removeIntersectingShards(shardsForTime, dataset, start); } // we are truncating the shard start point as part of removeIntersectingShards so we make a wrapper for the ShardIdWithVersion private static class ShardTruncatedStart { private final ShardIdWithVersion shard; private final DateTime start; private final DateTime end; private final long version; private ShardTruncatedStart(ShardIdWithVersion shard, DateTime start) { this.shard = shard; this.start = start; this.end = shard.getEnd(); this.version = shard.getVersion(); } } /** * Returns a non-intersecting list of shard ids and versions chosen from the shardsForTime list. * Shards in the list are sorted chronologically. */ static List<ShardIdWithVersion> removeIntersectingShards(List<ShardIdWithVersion> shardsForTime, String dataset, final DateTime start) { // we have to limit shard start times to the requested start time to avoid // longer shards with the earlier start time taking precedence over newer smaller shards final List<ShardTruncatedStart> shardsForTimeTruncated = new ArrayList<ShardTruncatedStart>(shardsForTime.size()); for(ShardIdWithVersion shard : shardsForTime) { ShardInfo.DateTimeRange range = shard.getRange(); if(range == null) { log.warn("Unparseable shard id encountered in dataset '" + dataset + "': " + shard.getShardId()); continue; } DateTime shardStart = range.start; if(start.isAfter(range.start)) { shardStart = start; } shardsForTimeTruncated.add(new ShardTruncatedStart(shard, shardStart)); } // now we need to resolve potential time overlaps in shards // sort by: start date asc, version desc Collections.sort(shardsForTimeTruncated, new Comparator<ShardTruncatedStart>() { @Override public int compare(ShardTruncatedStart o1, ShardTruncatedStart o2) { final int c = o1.start.compareTo(o2.start); if(c != 0) return c; return -Longs.compare(o1.version, o2.version); } }); final List<ShardIdWithVersion> chosenShards = Lists.newArrayList(); DateTime processedUpTo = new DateTime(-2000000,1,1,0,0); // 2M BC for(ShardTruncatedStart shard : shardsForTimeTruncated) { if(!shard.start.isBefore(processedUpTo)) { chosenShards.add(shard.shard); processedUpTo = shard.end; } } return chosenShards; } /** * Returns a builder that can be used to initialize an {@link ImhotepSession} instance. * @param dataset dataset/index name for the session */ public SessionBuilder sessionBuilder(final String dataset, final DateTime start, final DateTime end) { return new SessionBuilder(dataset, start, end); } /** * Constructs {@link ImhotepSession} instances. * Set optional parameters and call {@link #build}() to get an instance. */ public class SessionBuilder { private final String dataset; private final DateTime start; private final DateTime end; private Collection<String> requestedMetrics = Collections.emptyList(); private int mergeThreadLimit = ImhotepRemoteSession.DEFAULT_MERGE_THREAD_LIMIT; private String username; private boolean optimizeGroupZeroLookups = false; private int socketTimeout = -1; private long localTempFileSizeLimit = -1; private long daemonTempFileSizeLimit = -1; private List<ShardIdWithVersion> chosenShards = null; private List<String> shardsOverride = null; public SessionBuilder(final String dataset, final DateTime start, final DateTime end) { this.dataset = dataset; this.start = start; this.end = end; } public SessionBuilder requestedMetrics(Collection<String> requestedMetrics) { this.requestedMetrics = Lists.newArrayList(requestedMetrics); return this; } public SessionBuilder mergeThreadLimit(int mergeThreadLimit) { this.mergeThreadLimit = mergeThreadLimit; return this; } @Deprecated public SessionBuilder priority(int priority) { return this; } public SessionBuilder socketTimeout(int socketTimeout) { this.socketTimeout = socketTimeout; return this; } public SessionBuilder username(String username) { this.username = username; return this; } public SessionBuilder optimizeGroupZeroLookups(boolean optimizeGroupZeroLookups) { this.optimizeGroupZeroLookups = optimizeGroupZeroLookups; return this; } public SessionBuilder localTempFileSizeLimit(long localTempFileSizeLimit) { this.localTempFileSizeLimit = localTempFileSizeLimit; return this; } public SessionBuilder daemonTempFileSizeLimit(long daemonTempFileSizeLimit) { this.daemonTempFileSizeLimit = daemonTempFileSizeLimit; return this; } public SessionBuilder shardsOverride(List<String> requiredShards) { this.shardsOverride = Lists.newArrayList(requiredShards); return this; } /** * Returns shards that were selected for the time range requested in the constructor. * Shards in the list are sorted chronologically. */ public List<ShardIdWithVersion> getChosenShards() { if(chosenShards == null) { if(start == null || end == null) { throw new IllegalArgumentException("start and end times can't be null"); } if(!end.isAfter(start)) { throw new IllegalArgumentException("Illegal time range requested: " + start.toString() + " to " + end.toString()); } this.chosenShards = findShardsForTimeRange(dataset, start, end); } return Lists.newArrayList(chosenShards); } /** * Returns a list of time intervals within the requested [start, end) range that are not covered by available shards. * Intervals in the list are sorted chronologically. */ public List<Interval> getTimeIntervalsMissingShards() { // expects the returned shards to be sorted by start time final List<ShardIdWithVersion> chosenShards = getChosenShards(); final List<Interval> timeIntervalsMissingShards = Lists.newArrayList(); DateTime processedUpTo = start; for(ShardIdWithVersion shard : chosenShards) { if(processedUpTo.isBefore(shard.getStart())) { timeIntervalsMissingShards.add(new Interval(processedUpTo, shard.getStart())); } processedUpTo = shard.getEnd(); } if(processedUpTo.isBefore(end)) { timeIntervalsMissingShards.add(new Interval(processedUpTo, end)); } return timeIntervalsMissingShards; } /** * Constructs an {@link ImhotepSession} instance. */ public ImhotepSession build() { if(username == null) { username = ImhotepRemoteSession.getUsername(); } List<String> chosenShardIDs = shardsOverride != null ? shardsOverride : ShardIdWithVersion.keepShardIds(getChosenShards()); return getSessionForShards(dataset, chosenShardIDs, requestedMetrics, mergeThreadLimit, username, optimizeGroupZeroLookups, socketTimeout, localTempFileSizeLimit, daemonTempFileSizeLimit); } } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards) { return getSession(dataset, requestedShards, Collections.<String>emptyList(), ImhotepRemoteSession.DEFAULT_MERGE_THREAD_LIMIT, -1); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final int socketTimeout) { return getSession(dataset, requestedShards, Collections.<String>emptyList(), ImhotepRemoteSession.DEFAULT_MERGE_THREAD_LIMIT, socketTimeout); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics) { return getSession(dataset, requestedShards, requestedMetrics, ImhotepRemoteSession.DEFAULT_MERGE_THREAD_LIMIT, -1); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics, final int mergeThreadLimit) { return getSession(dataset, requestedShards, requestedMetrics, mergeThreadLimit, -1); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics, final int mergeThreadLimit, final int priority) { return getSession(dataset, requestedShards, requestedMetrics, mergeThreadLimit, priority, ImhotepRemoteSession.getUsername(), false, -1); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics, final int mergeThreadLimit, final int priority, final int socketTimeout) { return getSession(dataset, requestedShards, requestedMetrics, mergeThreadLimit, priority, ImhotepRemoteSession.getUsername(), false, socketTimeout); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics, final int mergeThreadLimit, final int priority, final String username) { return getSession(dataset, requestedShards, requestedMetrics, mergeThreadLimit, priority, username, false, -1); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics, final int mergeThreadLimit, final int priority, final String username, final boolean optimizeGroupZeroLookups) { return getSession(dataset, requestedShards, requestedMetrics, mergeThreadLimit, priority, username, optimizeGroupZeroLookups, -1); } /** * @deprecated replaced by {@link #sessionBuilder}().build() */ @Deprecated public ImhotepSession getSession(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics, final int mergeThreadLimit, final int priority, final String username, final boolean optimizeGroupZeroLookups, final int socketTimeout) { return getSessionForShards(dataset, requestedShards, requestedMetrics, mergeThreadLimit, username, optimizeGroupZeroLookups, socketTimeout, -1, -1); } private ImhotepSession getSessionForShards(final String dataset, final Collection<String> requestedShards, final Collection<String> requestedMetrics, final int mergeThreadLimit, final String username, final boolean optimizeGroupZeroLookups, final int socketTimeout, long localTempFileSizeLimit, long daemonTempFileSizeLimit) { if(requestedShards == null || requestedShards.size() == 0) { throw new IllegalArgumentException("No shards"); } int retries = 3; final AtomicLong localTempFileSizeBytesLeft = localTempFileSizeLimit > 0 ? new AtomicLong(localTempFileSizeLimit) : null; while (retries > 0) { final String sessionId = UUID.randomUUID().toString(); final ImhotepRemoteSession[] remoteSessions = internalGetSession(dataset, requestedShards, requestedMetrics, mergeThreadLimit, username, optimizeGroupZeroLookups, socketTimeout, sessionId, daemonTempFileSizeLimit, localTempFileSizeBytesLeft); if (remoteSessions == null) { --retries; if (retries > 0) { shardListReloader.run(); } continue; } final InetSocketAddress[] nodes = new InetSocketAddress[remoteSessions.length]; for (int i = 0; i < remoteSessions.length; i++) { nodes[i] = remoteSessions[i].getInetSocketAddress(); } return new RemoteImhotepMultiSession(remoteSessions, sessionId, nodes, localTempFileSizeLimit, localTempFileSizeBytesLeft); } throw new RuntimeException("unable to open session"); } private static class IncrementalEvaluationState { private final Map<String, ShardData> unprocessedShards; private final Multimap<Host, String> unprocessedShardsByHost; public IncrementalEvaluationState(Map<String, ShardData> shards) { unprocessedShards = shards; unprocessedShardsByHost = HashMultimap.create(); for (Map.Entry<String, ShardData> entry : shards.entrySet()) { String shardId = entry.getKey(); for (Pair<Host, Integer> pair : entry.getValue().hostToLoadedMetrics) { Host host = pair.getFirst(); unprocessedShardsByHost.put(host, shardId); } if (entry.getValue().hostToLoadedMetrics.isEmpty()) { throw new IllegalStateException("no shards for host " + entry.getKey()); }; } } public synchronized List<String> getBatch(Host host, long maxDocs) { List<String> result = new ArrayList<String>(); int docCount = 0; for (String shard : unprocessedShardsByHost.get(host)) { if (docCount >= maxDocs) break; ShardData data = unprocessedShards.get(shard); assert data != null; result.add(shard); docCount += data.numDocs; } for (String shard : result) { ShardData data = unprocessedShards.remove(shard); for (Pair<Host, Integer> pair : data.hostToLoadedMetrics) { unprocessedShardsByHost.remove(pair.getFirst(), shard); } } return result; } } public void evaluateOnSessions(final SessionCallback callback, final String dataset, Collection<String> requestedShards, final long maxDocsPerSession) { // construct Map<String, ShardData> shardMap = constructPotentialShardMap(dataset, Collections.<String>emptySet()); shardMap = Maps.newHashMap( Maps.filterKeys(shardMap, Predicates.in(ImmutableSet.copyOf( requestedShards)))); Set<Host> hosts = Sets.newTreeSet(); for (ShardData data : shardMap.values()) { for (Pair<Host, Integer> pair : data.hostToLoadedMetrics) { hosts.add(pair.getFirst()); } } final IncrementalEvaluationState state = new IncrementalEvaluationState(shardMap); final ExecutorService executor = Executors.newCachedThreadPool(); final ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<Void>(executor); final List<Callable<Void>> callables = Lists.newArrayList(); final String sessionId = UUID.randomUUID().toString(); for (final Host host : hosts) { callables.add(new Callable<Void>() { @Override public Void call() throws Exception { try { while (true) { if (Thread.interrupted()) { throw new InterruptedException(); } List<String> shards = state.getBatch(host, maxDocsPerSession); if (shards.isEmpty()) break; log.info("Processing " + shards.size() + " for " + host); ImhotepRemoteSession session = ImhotepRemoteSession.openSession(host.getHostname(), host.getPort(), dataset, shards, sessionId); callback.handle(session); } return null; } catch (Exception e) { throw new Exception("failed to get results for host " + host, e); } } }); } try { for (Callable<Void> callable : callables) { completionService.submit(callable); } for (int i = 0; i < callables.size(); i++) { Future<?> future = completionService.take(); // to wait for completion future.get(); // to propagate exceptions } } catch (ExecutionException e) { throw new RuntimeException("exception while executing operation", e); } catch (InterruptedException e) { throw new RuntimeException("interrupted while waiting for operation", e); } finally { executor.shutdownNow(); } } private static class ShardData { final int numDocs; final long highestVersion; final List<Pair<Host, Integer>> hostToLoadedMetrics; private ShardData(int numDocs, long highestVersion, List<Pair<Host, Integer>> hostToLoadedMetrics) { this.numDocs = numDocs; this.highestVersion = highestVersion; this.hostToLoadedMetrics = hostToLoadedMetrics; } } // returns null on error private ImhotepRemoteSession[] internalGetSession(final String dataset, Collection<String> requestedShards, Collection<String> requestedMetrics, final int mergeThreadLimit, final String username, final boolean optimizeGroupZeroLookups, final int socketTimeout, @Nullable final String sessionId, final long tempFileSizeLimit, @Nullable final AtomicLong tempFileSizeBytesLeft) { final Map<Host, List<String>> shardRequestMap = buildShardRequestMap(dataset, requestedShards, requestedMetrics); if (shardRequestMap.isEmpty()) { log.error("unable to find all of the requested shards in dataset " + dataset + " (shard list = " + requestedShards + ")"); return null; } final ExecutorService executor = Executors.newCachedThreadPool(); final List<Future<ImhotepRemoteSession>> futures = new ArrayList<Future<ImhotepRemoteSession>>(shardRequestMap.size()); try { for (final Map.Entry<Host, List<String>> entry : shardRequestMap.entrySet()) { final Host host = entry.getKey(); final List<String> shardList = entry.getValue(); futures.add(executor.submit(new Callable<ImhotepRemoteSession>() { @Override public ImhotepRemoteSession call() throws Exception { return ImhotepRemoteSession.openSession(host.hostname, host.port, dataset, shardList, mergeThreadLimit, username, optimizeGroupZeroLookups, socketTimeout, sessionId, tempFileSizeLimit, tempFileSizeBytesLeft); } })); } } finally { executor.shutdown(); } final ImhotepRemoteSession[] remoteSessions = new ImhotepRemoteSession[shardRequestMap.size()]; boolean error = false; for (int i = 0; i < futures.size(); ++i) { try { remoteSessions[i] = futures.get(i).get(); } catch (ExecutionException e) { log.error("exception while opening session", e); error = true; } catch (InterruptedException e) { log.error("interrupted while opening session", e); error = true; } } if (error) { for (final ImhotepRemoteSession session : remoteSessions) { if (session != null) { try { session.close(); } catch (RuntimeException e) { log.error("exception while closing session", e); } } } return null; } return remoteSessions; } private Map<Host, List<String>> buildShardRequestMap(String dataset, Collection<String> requestedShards, Collection<String> requestedMetrics) { final Set<String> requestedMetricsSet = new HashSet<String>(requestedMetrics); final Map<String, ShardData> shardMap = constructPotentialShardMap(dataset, requestedMetricsSet); boolean error = false; for (final String shard : requestedShards) { if (!shardMap.containsKey(shard)) { log.error("shard " + shard + " not found"); error = true; } } if (error) { return Maps.newHashMap(); } final List<String> sortedShards = new ArrayList<String>(requestedShards); Collections.sort(sortedShards, new Comparator<String>() { @Override public int compare(String o1, String o2) { final int c1 = shardMap.get(o1).numDocs; final int c2 = shardMap.get(o2).numDocs; return -(c1 < c2 ? -1 : c1 > c2 ? 1 : 0); } }); final Map<Host, Integer> hostDocCounts = new HashMap<Host, Integer>(); final Map<Host, List<String>> shardRequestMap = new TreeMap<Host, List<String>>(); for (final String shard : sortedShards) { final List<Pair<Host, Integer>> potentialHosts = shardMap.get(shard).hostToLoadedMetrics; int minHostDocCount = Integer.MAX_VALUE; int minHostLoadedMetricCount = 0; Host minHost = null; for (final Pair<Host, Integer> p : potentialHosts) { final Host host = p.getFirst(); final int loadedMetricCount = p.getSecond(); if (!hostDocCounts.containsKey(host)) hostDocCounts.put(host, 0); if (loadedMetricCount > minHostLoadedMetricCount || hostDocCounts.get(host) < minHostDocCount) { minHostDocCount = hostDocCounts.get(host); minHostLoadedMetricCount = loadedMetricCount; minHost = host; } } if (minHost == null) throw new RuntimeException("something has gone horribly wrong"); if (!shardRequestMap.containsKey(minHost)) { shardRequestMap.put(minHost, new ArrayList<String>()); } shardRequestMap.get(minHost).add(shard); hostDocCounts.put(minHost, hostDocCounts.get(minHost) + shardMap.get(shard).numDocs); } return shardRequestMap; } /** * Given a dataset and a list of requested metrics, compute a map from shard IDs to lists of * (host, # of loaded metrics) pairs. * * @param dataset The dataset name * @param requestedMetricsSet The set of metrics whose loaded status should be counted * @return The resulting map */ private Map<String, ShardData> constructPotentialShardMap(String dataset, Set<String> requestedMetricsSet) { final Map<String, ShardData> shardMap = Maps.newHashMap(); final Map<Host, List<DatasetInfo>> shardListMap = getShardList(); for (final Map.Entry<Host, List<DatasetInfo>> e : shardListMap.entrySet()) { final Host host = e.getKey(); final List<DatasetInfo> shardList = e.getValue(); for (final DatasetInfo datasetInfo : shardList) { if (!dataset.equals(datasetInfo.getDataset())) continue; for (final ShardInfo shard : datasetInfo.getShardList()) { if (!shardMap.containsKey(shard.shardId)) { shardMap.put(shard.shardId, new ShardData(shard.numDocs, shard.version, new ArrayList<Pair<Host, Integer>>())); } else { final ShardData shardData = shardMap.get(shard.shardId); final long highestKnownVersion = shardData.highestVersion; if (highestKnownVersion < shard.version) { // a newer version was found and all the previously encountered data for this shard should be removed shardMap.put(shard.shardId, new ShardData(shard.numDocs, shard.version, new ArrayList<Pair<Host, Integer>>())); } else if (highestKnownVersion > shard.version) { continue; // this shard has an outdated version and should be skipped } // else if (highestKnownVersion == shard.version) // just continue } final int loadedMetricsCount = Sets.intersection(requestedMetricsSet, new HashSet<String>(shard.loadedMetrics)).size(); shardMap.get(shard.shardId).hostToLoadedMetrics.add(Pair.of(host, loadedMetricsCount)); } } } return shardMap; } public Map<Host, ImhotepStatusDump> getStatusDumps() { final List<Host> hosts = hostsSource.getHosts(); final Map<Host, Future<ImhotepStatusDump>> futures = Maps.newHashMap(); for (final Host host : hosts) { final Future<ImhotepStatusDump> future = rpcExecutor.submit(new Callable<ImhotepStatusDump>() { @Override public ImhotepStatusDump call() throws Exception { return ImhotepRemoteSession.getStatusDump(host.hostname, host.port); } }); futures.put(host, future); } final Map<Host, ImhotepStatusDump> ret = new HashMap<Host, ImhotepStatusDump>(); for (final Host host : hosts) { try { final ImhotepStatusDump statusDump = futures.get(host).get(); ret.put(host, statusDump); } catch (ExecutionException e) { log.error("error getting status dump from " + host, e); } catch (InterruptedException e) { log.error("error getting status dump from " + host, e); } } return ret; } @Override public void close() throws IOException { rpcExecutor.shutdownNow(); reloader.shutdown(); hostsSource.shutdown(); try { if (!rpcExecutor.awaitTermination(10, TimeUnit.SECONDS)) { throw new IOException("RPC executor failed to terminate in time"); } if (!reloader.awaitTermination(10, TimeUnit.SECONDS)) { throw new IOException("reloader failed to terminate in time"); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } public boolean isConnectionHealthy() { return hostsSource.isLoadedDataSuccessfullyRecently() && shardListReloader.isLoadedDataSuccessfullyRecently(); } }