/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.util; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.text.ParsePosition; import java.text.SimpleDateFormat; import com.addthis.codec.codables.Codable; /** * Class that will assign a score to a Url based on its trend over time */ public class TimeSeriesScore implements Codable { double alpha = 0.3; public TimeSeriesScore() { } /** * returns the list of urls sorted by trending score from largest to smallest count. */ @SuppressWarnings({"unchecked", "rawtypes"}) public Map.Entry<String, Long>[] getSortedEntries(Map<String, KeyTopper> timeSeriesMap) { //timeSeriesMap = getSortedTimeSeriesMap(timeSeriesMap); Object[] dhArr = timeSeriesMap.keySet().toArray(); String firstStr = (String) dhArr[0]; String lastStr = (String) dhArr[dhArr.length - 1]; String pattern = "yyMMddHH"; SimpleDateFormat format = new SimpleDateFormat(pattern); Date firstTS = format.parse(firstStr, new ParsePosition(0)); Date lastTS = format.parse(lastStr, new ParsePosition(0)); int totalBins = (int) ((lastTS.getTime() - firstTS.getTime()) / 3600000) + 1; Map<String, Integer> timeBinLookup = new HashMap<>(); timeBinLookup.put(firstStr, 1); timeBinLookup.put(lastStr, totalBins); Date currenthour = firstTS; Calendar c = Calendar.getInstance(); for (int i = 2; i < totalBins; i++) { c.setTime(currenthour); c.add(Calendar.HOUR_OF_DAY, 1); currenthour = c.getTime(); timeBinLookup.put(format.format(currenthour), i); } List<KeyTopper> arrKT = new ArrayList<>(); Map.Entry<String, Long>[] KTmap; Set<String> urls = new HashSet<>(); Iterator<Entry<String, KeyTopper>> it = timeSeriesMap.entrySet().iterator(); while (it.hasNext()) { Entry<String, KeyTopper> pairs = it.next(); arrKT.add(pairs.getValue()); KTmap = pairs.getValue().getSortedEntries(); for (Entry<String, Long> entry : KTmap) { urls.add(entry.getKey()); } } Map<String, Long> map = new HashMap<>(); Iterator<String> itr = urls.iterator(); while (itr.hasNext()) { String urlItr = itr.next(); if (urlItr != null) { double meanSoFar = 0.0; double normalizedCount; double trendingScore = 0.0; int lastUpdated = 0; for (int i = 0; i < arrKT.size(); i++) { int binNo = timeBinLookup.get((String) dhArr[i]); Long count = arrKT.get(i).get(urlItr); Long currCount = 0L; while (lastUpdated < binNo) { lastUpdated = lastUpdated + 1; if (lastUpdated != binNo) { currCount = 0L; } else { currCount = (count == null) ? 0L : count; } meanSoFar = UpdateMeanSoFar(lastUpdated, currCount, meanSoFar); normalizedCount = currCount - meanSoFar; trendingScore = alpha * normalizedCount + (1 - alpha) * trendingScore; } } map.put(urlItr, (long) (1000 * trendingScore)); } } Map.Entry[] e = new Map.Entry[map.size()]; e = map.entrySet().toArray(e); Arrays.sort(e, new Comparator() { public int compare(Object arg0, Object arg1) { if (((Long) ((Map.Entry) arg1).getValue()) > ((Long) ((Map.Entry) arg0).getValue())) { return 1; } else if (((Long) ((Map.Entry) arg1).getValue()) < ((Long) ((Map.Entry) arg0).getValue())) { return -1; } else { return 0; } } }); return e; } /** * Update the Mean */ public double UpdateMeanSoFar(int BinNo, Long BinCount, double MeanSoFar) { return ((MeanSoFar * (BinNo - 1) + BinCount) / BinNo); } /** * Get SortedTimeSeriesMap sorted by the key * * @param timeSeriesMap * @return */ public Map<String, KeyTopper> getSortedTimeSeriesMap(Map<String, KeyTopper> timeSeriesMap) { Map<String, KeyTopper> sortedTimeSeriesMap = new LinkedHashMap<>(); TreeSet<String> keys = new TreeSet<>(timeSeriesMap.keySet()); for (String key : keys) { KeyTopper value = timeSeriesMap.get(key); if (value != null) { sortedTimeSeriesMap.put(key, value); } } return sortedTimeSeriesMap; } }