/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.query; import javax.annotation.Nonnull; import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import com.addthis.hydra.data.query.QueryException; import com.addthis.meshy.ChannelCloseListener; import com.addthis.meshy.MeshyServer; import com.addthis.meshy.service.file.FileReference; import com.google.common.base.Predicate; import com.google.common.cache.CacheBuilder; import com.google.common.cache.LoadingCache; import com.google.common.collect.Collections2; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableSetMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.SetMultimap; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.yammer.metrics.Metrics; import com.yammer.metrics.core.Counter; import com.yammer.metrics.core.Timer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.netty.channel.Channel; public class MeshFileRefCache implements ChannelCloseListener { static final Logger log = LoggerFactory.getLogger(MeshFileRefCache.class); // metrics static final Timer fileReferenceFetchTimes = Metrics.newTimer(MeshFileRefCache.class, "fileReferenceFetchTimes", TimeUnit.MILLISECONDS, TimeUnit.SECONDS); static final Counter fileReferenceFetches = Metrics.newCounter(MeshFileRefCache.class, "fileReferenceFetches"); private final FileRefCacheLoader loader; /** * Maintains a LRU cache of {@code FileReference} objects for a given job id. If a Job * has 32 tasks then there should be 32 references in the set to indicate a correctly functioning job */ @Nonnull private final LoadingCache<String, SetMultimap<Integer, FileReference>> fileReferenceCache; public MeshFileRefCache(MeshyServer meshy) throws Exception { this.loader = new FileRefCacheLoader(meshy); this.fileReferenceCache = createLoadingCache(loader); meshy.addChannelCloseListener(this); startCacheMaintainer(); } public Multimap<Integer, FileReference> get(String job) throws ExecutionException { return fileReferenceCache.get(job); } @Nonnull public Set<FileReference> getTaskReferencesIfPresent(String job, int taskId) { SetMultimap<Integer, FileReference> refMap = fileReferenceCache.getIfPresent(job); if (refMap != null) { return (Set<FileReference>) refMap.asMap().get(taskId); } return Collections.emptySet(); } public void invalidate(String job) { fileReferenceCache.invalidate(job); } private LoadingCache<String, SetMultimap<Integer, FileReference>> createLoadingCache(FileRefCacheLoader loader) { return CacheBuilder.newBuilder() .maximumSize(200) .refreshAfterWrite(2, TimeUnit.MINUTES) .build(loader); } private void startCacheMaintainer() { ScheduledExecutorService mqmFileRefCacheMaintainer = new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("mqmFileRefCacheMaintainer=%d").build()); mqmFileRefCacheMaintainer.scheduleAtFixedRate(fileReferenceCache::cleanUp, 30, 30, TimeUnit.SECONDS); } /** * This method filters the file references to ensure that only valid file references are returned. * <p/> * The filter checks for two things. * <p/> * <ol> * <li>the last modified date for each file for the same task should be the same, if not it will take the * newest file</li> * <li>the size of the files should be equal, if not, take the files with the largest known size</li> * </ol> * * @param fileRefDataSet - the original unfiltered file reference set * @return - filtered file reference map containing only valid file references */ @Nonnull protected static SetMultimap<Integer, FileReference> filterFileReferences( @Nonnull SetMultimap<Integer, FileReference> fileRefDataSet) { if (fileRefDataSet.isEmpty()) { return fileRefDataSet; } int baseKeySetSize = fileRefDataSet.keySet().size(); SetMultimap<Integer, FileReference> filteredFileReferenceSet = HashMultimap.create(baseKeySetSize, fileRefDataSet.size() / baseKeySetSize); for (Map.Entry<Integer, Collection<FileReference>> entry : fileRefDataSet.asMap().entrySet()) { int key = entry.getKey(); final Collection<FileReference> fileReferences = entry.getValue(); long mostRecentTime = -1; for (FileReference fileReference : fileReferences) { if ((mostRecentTime < 0) || (fileReference.lastModified > mostRecentTime)) { mostRecentTime = fileReference.lastModified; } } final long mostRecentTimeF = mostRecentTime; Predicate<FileReference> isMostRecent = input -> (input != null) && (input.lastModified == mostRecentTimeF); Collection<FileReference> filteredFileReferences = Collections2.filter(fileReferences, isMostRecent); filteredFileReferenceSet.putAll(key, filteredFileReferences); } return filteredFileReferenceSet; } public FileReference getFileReferenceForSingleTask(String job, int taskId) throws InterruptedException { Collection<FileReference> refSet = loader.getFileReferences(job, Integer.toString(taskId)).get(taskId); if ((refSet == null) || refSet.isEmpty()) { throw new QueryException("Could not find task reference for " + job + "/" + taskId); } else { return refSet.iterator().next(); } } @Override public void channelClosed(Channel channel) { // if a channel is closed then we need to invalidate the fileReferenceCache so // that we get new references the next time a query is run log.debug("[MeshQueryMaster] channel: {} has been closed", channel); invalidateFileReferenceCache(); } public void invalidateFileReferenceCache() { fileReferenceCache.invalidateAll(); } public void updateFileReferenceForTask(String job, int task, Iterable<FileReference> baseSet) { SetMultimap<Integer, FileReference> existing = fileReferenceCache.getIfPresent(job); if (existing != null) { ImmutableSetMultimap.Builder<Integer, FileReference> withReplacement = ImmutableSetMultimap.<Integer, FileReference>builder(); for (Map.Entry<Integer, Collection<FileReference>> entry : existing.asMap().entrySet()) { if (entry.getKey() != task) { withReplacement.putAll(entry.getKey(), entry.getValue()); } } withReplacement.putAll(task, baseSet); fileReferenceCache.put(job, withReplacement.build()); } } }