/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.util;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import com.addthis.codec.codables.Codable;
/**
* A trending algorithm that recognises rising edge based on cumulative percentage change in hits
*/
public class TrendingScore implements Codable {
// 15 mins
private static final long PERIOD_BETWEEN_RECALCULATION = 900000;
private double alpha = 0.3;
private int minResults = 50;
private static final String DEFAULT_URL_SEP = "/";
public TrendingScore() {
}
public TrendingScore(double alpha, int minResults) {
this.minResults = minResults;
this.alpha = alpha;
}
public List<UrlTreeObject.TreeValue> getTrends(TreeMap<String, KeyTopper> hourlyTrends, TreeMap<String, KeyTopper> dailyTrends, TreeMap<String, KeyTopper> monthlyTrends) {
List<UrlTreeObject.TreeValue> sortedScore = null;
URLTree scores = new URLTree();
if (calculateTrends(scores, hourlyTrends, 5) ||
calculateTrends(scores, dailyTrends, 3) ||
calculateTrends(scores, monthlyTrends, 2) ||
scores.size() > 0) {
sortedScore = sortByValue(scores);
}
return sortedScore;
}
private boolean calculateTrends(URLTree scores, TreeMap<String, KeyTopper> trends, long timeNormalizingFactor) {
boolean success = false;
if (trends != null && trends.size() > 0) {
int results = 0;
// create a list of unique urls
Set<String> urls = new HashSet<>();
// create a map of time --> (url, hits)
Map<String, Map<String, Long>> trendingMap = buildTrendingMap(trends, urls);
for (String url : urls) {
double score = 0;
// these time stamps will be in order (TreeMap impl)
for (String timeStamp : trendingMap.keySet()) {
Long count = trendingMap.get(timeStamp).get(url);
if (count != null) {
score = ema(score, count);
}
}
// List<String> urlPath = branched(url);
// remove protocol
url = url.replaceAll("^http://", "");
scores.addURLPath(url, score * timeNormalizingFactor);
results++;
}
if (results >= minResults) {
success = true;
}
}
return success;
}
private List<String> branched(String url) {
List<String> urlPath = null;
try {
urlPath = Arrays.asList(url.replaceAll("^http://", "").split(DEFAULT_URL_SEP, -1));
} catch (Exception e) {
}
return urlPath;
}
public double ema(double prevValue, double currValue) {
return prevValue > 0 ? prevValue + alpha * (currValue - prevValue) : currValue;
}
private double percentageChangeBetween(long prevCount, long currentCount) {
if (prevCount == 0 || currentCount == 0) {
return 0;
} else {
return Math.abs(100 - ((currentCount * 100) / prevCount));
}
}
private Map<String, Map<String, Long>> buildTrendingMap(Map<String, KeyTopper> timeSeriesMap, Set<String> urls) {
Map trendingMap = new TreeMap<String, Map<String, Long>>();
for (String ts : timeSeriesMap.keySet()) {
Map<String, Long> urlCount = new HashMap<>();
trendingMap.put(ts, urlCount);
for (Map.Entry<String, Long> url : timeSeriesMap.get(ts).getSortedEntries()) {
urls.add(url.getKey());
urlCount.put(url.getKey(), url.getValue());
}
}
return trendingMap;
}
public static List<UrlTreeObject.TreeValue> sortByValue(URLTree tree) {
List<UrlTreeObject.TreeValue> list = new LinkedList<>(tree.getBranches(DEFAULT_URL_SEP));
Collections.sort(list, new Comparator<UrlTreeObject.TreeValue>() {
@Override
public int compare(UrlTreeObject.TreeValue t1, UrlTreeObject.TreeValue t2) {
return -t1.getValue().compareTo(t2.getValue());
}
});
return list;
}
}