/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.util; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import com.addthis.codec.annotations.FieldConfig; import com.addthis.codec.codables.Codable; /** * Class that helps maintain a top N list for any String Map TODO should move * into basis libraries */ public final class FeaturesKeyTopper implements Codable { @FieldConfig(codable = true, required = true) private HashMap<String, ReplaceableFeaturesBucket> featuresMap; @FieldConfig(codable = true) private long minVal; @FieldConfig(codable = true) private String minKey; @FieldConfig(codable = true) private boolean lossy; public FeaturesKeyTopper() { } @Override public String toString() { return "topper(min:" + minKey + "=" + minVal + "->" + featuresMap.toString() + ",lossy:" + lossy + ")"; } public FeaturesKeyTopper init() { featuresMap = new HashMap<>(); return this; } public FeaturesKeyTopper setLossy(boolean isLossy) { lossy = isLossy; return this; } public boolean isLossy() { return lossy; } public int size() { return featuresMap.size(); } public ReplaceableFeaturesBucket get(String key) { return featuresMap.get(key); } /** * returns the list sorted by greatest to least count. */ @SuppressWarnings("unchecked") public Map.Entry<String, Long>[] getSortedEntries() { Map<String, Long> summaryMap = new HashMap<>(); for (String id : featuresMap.keySet()) { ReplaceableFeaturesBucket bucket = featuresMap.get(id); summaryMap.put(id + ">" + bucket.toString(), new Long(bucket.getHits())); } Map.Entry[] e = new Map.Entry[summaryMap.size()]; e = summaryMap.entrySet().toArray(e); Arrays.sort(e, new Comparator() { public int compare(Object arg0, Object arg1) { return (int) (((Long) ((Map.Entry) arg1).getValue()) - ((Long) ((Map.Entry) arg0).getValue())); } }); return e; } /** */ private void recalcMin(boolean maxed, boolean newentry, String id) { if (minKey == null || (maxed && newentry) || (!newentry && id.equals(minKey))) { minVal = 0; for (Map.Entry<String, ReplaceableFeaturesBucket> e : this.featuresMap.entrySet()) { if (minVal == 0 || e.getValue().getHits() < minVal) { minVal = e.getValue().getHits(); minKey = e.getKey(); } } } } /** * Adds 'ID' the top N if: 1) there are more empty slots or 2) count > * smallest top count in the list * * @param id * @return element dropped from top or null if accepted into top with no * drops */ public String increment(String id, int maxsize, String... features) { return increment(id, maxsize, Arrays.asList(features)); } /** * Adds 'ID' the top N if: 1) there are more empty slots or 2) count > * smallest top count in the list * * @param id * @return element dropped from top or null if accepted into top with no * drops */ public String increment(String id, int maxsize, List<String> features) { ReplaceableFeaturesBucket featuresBucket = featuresMap.get(id); if (featuresBucket != null) { if (features != null) { featuresBucket.addFeatures(features); } } else { if (features != null) { featuresBucket = new ReplaceableFeaturesBucket(); featuresBucket.setHits(Math.max(lossy && (featuresMap.size() >= maxsize) ? minVal - 1 : 0l, 0l)); featuresBucket.addFeatures(features); } } // increment hits regardless of features != null (unique use case) featuresBucket.incrementHits(features.size()); return update(id, maxsize, featuresBucket); } public void augmentFeatures(String original, List<String> values) { if (values != null && values.size() > 0) { for (ReplaceableFeaturesBucket featuresBucket : featuresMap.values()) { featuresBucket.augmentExistingFeatures(original, values); } } } /** * Adds 'ID' the top N if: 1) there are more empty slots or 2) count > * smallest top count in the list * * @param id * @param featuresBucket * @return element dropped from top or null if accepted into top with no * drops. returns the offered key if it was rejected for update * or inclusion in the top. */ public String update(String id, int maxsize, ReplaceableFeaturesBucket featuresBucket) { String removed = null; /** should go into top */ if (featuresBucket.getHits() >= minVal) { boolean newentry = featuresMap.get(id) == null; boolean maxed = featuresMap.size() >= maxsize; // only remove if topN is full and we're not updating an existing entry if (maxed && newentry) { if (minKey == null && minVal == 0) { recalcMin(maxed, newentry, id); } featuresMap.remove(minKey); removed = minKey; } // update or add entry featuresMap.put(id, featuresBucket); // recalc min *only* if the min entry was removed or updated // checking for null minkey is critical check for empty topN as it // sets first min if (featuresBucket.getHits() == minVal) { minKey = id; } else { recalcMin(maxed, newentry, id); } } /** should go into top */ else if (featuresMap.size() < maxsize) { featuresMap.put(id, featuresBucket); if (minKey == null || featuresBucket.getHits() < minVal) { minKey = id; minVal = featuresBucket.getHits(); } } /** not eligible for top */ else { removed = id; } return removed; } public static void main(String[] args) { FeaturesKeyTopper keyTopper = new FeaturesKeyTopper(); keyTopper.init().setLossy(true); keyTopper.increment("1", 10000, new String[]{"foo", "bar"}); keyTopper.increment("1", 10000, new String[]{"bar"}); keyTopper.increment("2", 10000, new String[]{"one"}); keyTopper.increment("3", 10000, new String[]{"bar"}); keyTopper.increment("3", 10000, new String[]{"foo", "bar", "foo"}); for (Map.Entry<String, Long> s : keyTopper.getSortedEntries()) { System.out.println(s.getKey() + "=" + (Long) s.getValue()); } } }