/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.metadata; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Set; import com.google.common.base.Preconditions; import gobblin.metadata.types.GlobalMetadata; /** * This class collects metadata records, optionally merging them with a set of default metadata. It also * keeps track of all of the merged records so they can be published at a later date. */ public class GlobalMetadataCollector { public static final int UNLIMITED_SIZE = -1; private final LinkedHashSet<GlobalMetadata> metadataRecords; private final GlobalMetadata defaultMetadata; private final int cacheSize; private String lastSeenMetadataId; /** * Initialize a MetdataCollector with the given cache size. * @param cacheSize You can pass the value -1 to have an unlimited cache size. */ public GlobalMetadataCollector(int cacheSize) { this(null, cacheSize); } /** * Initialize a MetadataCollector with some default metadata to merge incoming records with. * (Eg: a dataset-URN or a set of Transfer-Encodings). */ public GlobalMetadataCollector(GlobalMetadata defaultMetadata, int cacheSize) { Preconditions.checkArgument(cacheSize == -1 || cacheSize > 0, "cacheSize must be -1 or greater than 0"); this.defaultMetadata = defaultMetadata; this.cacheSize = cacheSize; this.lastSeenMetadataId = ""; this.metadataRecords = new LinkedHashSet<>(); } /** * Process a metadata record, merging it with default metadata. * <p> * If the combined (metadata + defaultMetadata) record is not present in the Collector's cache, * then the new metadata record will be stored in cache and returned. The oldest record in the cache will be evicted * if necessary. * <p>> * If the new record already exists in the cache, then the LRU time will be updated but this method will return null. */ public synchronized GlobalMetadata processMetadata(GlobalMetadata metadata) { GlobalMetadata recordToAdd = getRecordToAdd(metadata); if (recordToAdd != null) { boolean isNew = addRecordAndEvictIfNecessary(recordToAdd); return isNew ? recordToAdd : null; } return null; } /** * Return a Set of all merged metadata records in the cache. The set is immutable. */ public Set<GlobalMetadata> getMetadataRecords() { return Collections.unmodifiableSet(metadataRecords); } private boolean addRecordAndEvictIfNecessary(GlobalMetadata recordToAdd) { // First remove the element from the HashSet if it's already in there to reset // the 'LRU' piece; then add it back in boolean isNew = !metadataRecords.remove(recordToAdd); metadataRecords.add(recordToAdd); // Now remove the first element (which should be the oldest) from the list // if we've exceeded the cache size if (cacheSize != -1 && metadataRecords.size() > cacheSize) { Iterator<GlobalMetadata> recordIt = metadataRecords.iterator(); recordIt.next(); // Remove the oldest element - don't care what it is recordIt.remove(); } return isNew; } private GlobalMetadata getRecordToAdd(GlobalMetadata metadata) { if (metadata == null) { return defaultMetadata; } // Optimization - we know this record already has been seen, so don't // merge with defaults if (metadata.getId().equals(lastSeenMetadataId)) { return null; } lastSeenMetadataId = metadata.getId(); if (defaultMetadata != null) { metadata.mergeWithDefaults(defaultMetadata); } return metadata; } }