/*
* Seldon -- open source prediction engine
* =======================================
*
* Copyright 2011-2015 Seldon Technologies Ltd and Rummble Ltd (http://www.seldon.io/)
*
* ********************************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* ********************************************************************************************
*/
package io.seldon.recommendation;
import io.seldon.sv.SemanticVectorsManager;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
/**
* @author rummble
*
*/
public class CFAlgorithm implements Cloneable,Serializable {
public enum CF_RECOMMENDER {
RELEVANCE,
TRUST_ITEMBASED,
BEST_PREDICTION,
MOST_POPULAR,
MOST_POPULAR_ITEM_CATEGORY,
CLUSTER_COUNTS,
CLUSTER_COUNTS_SIGNIFICANT,
CLUSTER_COUNTS_DYNAMIC,
CLUSTER_COUNTS_GLOBAL,
CLUSTER_COUNTS_ITEM_CATEGORY,
SEMANTIC_VECTORS_SORT,
SEMANTIC_VECTORS_RECENT_TAGS,
CLUSTER_COUNTS_FOR_ITEM,
CLUSTER_COUNTS_FOR_ITEM_SIGNIFICANT,
RECENT_ITEMS,
SIMILAR_ITEMS,
RECENT_SIMILAR_ITEMS,
ITEM_SIMILARITY_RECOMENDER,
TAG_CLUSTER_COUNTS,
R_RECENT_ITEMS,
MATRIX_FACTOR,
RECENT_MATRIX_FACTOR,
TOPIC_MODEL,
RECENT_TOPIC_MODEL,
SEMANTIC_VECTORS}
public enum CF_SORTER {
RELEVANCE,
TAG_SIMILARITY,
DEMOGRAPHICS,
MAHOUT_FPGROWTH,
SEMANTIC_VECTORS,
CLUSTER_COUNTS,
CLUSTER_COUNTS_DYNAMIC,
NOOP,
MOST_POPULAR_MEMBASED,
MOST_POPULAR_WEIGHTED_MEMBASED,
MOST_RECENT_MEMBASED,
MOST_POP_RECENT_MEMBASED,
WEB_SIMILARITY,
COOCCURRENCE,
STORM_TRUST}
public enum CF_ITEM_COMPARATOR {
SEMANTIC_VECTORS,
TRUST_ITEM }
public enum CF_STRATEGY {
FIRST_SUCCESSFUL,
ORDERED,
WEIGHTED,
RANK_SUM,
ADD_MISSING
}
public enum CF_POSTPROCESSING {
NONE,
REORDER_BY_POPULARITY,
TIME_HITS,
HITS,
HITS_WEIGHTED,
WEB_SIMILARITY,
ADD_MISSING
}
public enum CF_CLUSTER_ALGORITHM {
NONE,
LDA_USER,
LDA_ITEM,
DIMENSION
}
private static Logger logger = Logger.getLogger(CFAlgorithm.class.getName());
private List<CF_ITEM_COMPARATOR> itemComparators = new ArrayList<>();
private CF_STRATEGY itemComparatorStrategy = CF_STRATEGY.FIRST_SUCCESSFUL;
private List<CF_RECOMMENDER> recommenders = new ArrayList<>();
private List<CF_SORTER> sorters = new ArrayList<>();
private CF_STRATEGY sorterStrategy = CF_STRATEGY.FIRST_SUCCESSFUL;
private Date date; // base algorithm to run as if from this date
// ~ Fields #2: originally in RummbleLabsClient ~
private String name;
private String recTag;
private int userCFLimit = 50;
private int transactionActionType = 0; // the action_type that defines transactions (purchases, page views)
// Recommendation parameters
private int recommendationCachingTimeSecs = 0;
//Semantic Vector parameters
private int minNumTxsForSV = 5;
private int txHistorySizeForSV = 1;
private int recentArticlesForSV = 1000;
private boolean ignorePerfectSVMatches = true;
private String svPrefix = SemanticVectorsManager.SV_TEXT_NEW_LOC_PATTERN;
//topic models
int minNumTagsForTopicWeights = 4;
//Cluster parameters
double longTermWeight = 1.0D;
double shortTermWeight = 1.0D;
double decayRateSecs = 3600;
String categoryDim = "category";
int numRecentActions = 0;
int tagAttrId = 9;
String tagTable = "varchar";
//Ranking parameters
private boolean rankingRemoveHistory = true; // remove recent items for a user from items to rank
//Recommendation remove ignore recommendations
boolean removeIgnoredRecommendations = false;
float recommendationDiversity = 1.0f; // should be a value >= 1.0 A value of 1.0 is no diversity.
CF_CLUSTER_ALGORITHM clusterAlgorithm = CF_CLUSTER_ALGORITHM.NONE;
int minNumberItemsForValidClusterResult = 0;
boolean useBucketCluster = false; // add cluster counts for users not in any cluster to a single "bucket" cluster.
String abTestingKey = null;
// ~ END field ~
public CFAlgorithm() {
}
// accessors
public List<CF_RECOMMENDER> getRecommenders() {
return recommenders;
}
public int getTagAttrId() {
return tagAttrId;
}
public void setTagAttrId(int tagAttrId) {
this.tagAttrId = tagAttrId;
}
public String getTagTable() {
return tagTable;
}
public void setTagTable(String tagTable) {
this.tagTable = tagTable;
}
public String getCategoryDim() {
return categoryDim;
}
public void setCategoryDim(String categoryDim) {
this.categoryDim = categoryDim;
}
public boolean isUseBucketCluster() {
return useBucketCluster;
}
public void setUseBucketCluster(boolean useBucketCluster) {
this.useBucketCluster = useBucketCluster;
}
public String getAbTestingKey() {
return abTestingKey;
}
public void setAbTestingKey(String abTestingKey) {
this.abTestingKey = abTestingKey;
}
public boolean isIgnorePerfectSVMatches() {
return ignorePerfectSVMatches;
}
public void setIgnorePerfectSVMatches(boolean ignorePerfectSVMatches) {
this.ignorePerfectSVMatches = ignorePerfectSVMatches;
}
public int getMinNumberItemsForValidClusterResult() {
return minNumberItemsForValidClusterResult;
}
public void setMinNumberItemsForValidClusterResult(
int minNumberItemsForValidClusterResult) {
this.minNumberItemsForValidClusterResult = minNumberItemsForValidClusterResult;
}
public CF_CLUSTER_ALGORITHM getClusterAlgorithm() {
return clusterAlgorithm;
}
public void setClusterAlgorithm(CF_CLUSTER_ALGORITHM clusterAlgorithm) {
this.clusterAlgorithm = clusterAlgorithm;
}
public int getRecentArticlesForSV() {
return recentArticlesForSV;
}
public void setRecentArticlesForSV(int recentArticlesForSV) {
this.recentArticlesForSV = recentArticlesForSV;
}
public boolean isRemoveIgnoredRecommendations() {
return removeIgnoredRecommendations;
}
public void setRemoveIgnoredRecommendations(boolean removeIgnoredRecommendations) {
this.removeIgnoredRecommendations = removeIgnoredRecommendations;
}
public void setRecommenders(List<CF_RECOMMENDER> recommenders) {
this.recommenders = recommenders;
}
public List<CF_SORTER> getSorters() {
return sorters;
}
public void setSorters(List<CF_SORTER> sorters) {
this.sorters = sorters;
}
public CF_STRATEGY getSorterStrategy() {
return sorterStrategy;
}
public void setSorterStrategy(CF_STRATEGY sorterStrategy) {
this.sorterStrategy = sorterStrategy;
}
public List<CF_ITEM_COMPARATOR> getItemComparators() {
return itemComparators;
}
public void setItemComparators(List<CF_ITEM_COMPARATOR> itemComparators) {
this.itemComparators = itemComparators;
}
public CF_STRATEGY getItemComparatorStrategy() {
return itemComparatorStrategy;
}
public void setItemComparatorStrategy(CF_STRATEGY itemComparatorStrategy) {
this.itemComparatorStrategy = itemComparatorStrategy;
}
public Date getDate() {
return date;
}
public void setDate(Date date) {
this.date = date;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getRecTag() {
return recTag;
}
public void setRecTag(String recTag) {
this.recTag = recTag;
}
public int getRecommendationCachingTimeSecs() {
return recommendationCachingTimeSecs;
}
public void setRecommendationCachingTimeSecs(int recommendationCachingTimeSecs) {
this.recommendationCachingTimeSecs = recommendationCachingTimeSecs;
}
// toString, equals, clone, hashCode:
public float getRecommendationDiversity() {
return recommendationDiversity;
}
public void setRecommendationDiversity(float recommendationDiversity) {
this.recommendationDiversity = recommendationDiversity;
}
public boolean isRankingRemoveHistory() {
return rankingRemoveHistory;
}
public void setRankingRemoveHistory(boolean rankingRemoveHistory) {
this.rankingRemoveHistory = rankingRemoveHistory;
}
public int getMinNumTxsForSV() {
return minNumTxsForSV;
}
public void setMinNumTxsForSV(int minNumTxsForSV) {
this.minNumTxsForSV = minNumTxsForSV;
}
public int getTxHistorySizeForSV() {
return txHistorySizeForSV;
}
public void setTxHistorySizeForSV(int txHistorySizeForSV) {
this.txHistorySizeForSV = txHistorySizeForSV;
}
public double getLongTermWeight() {
return longTermWeight;
}
public void setLongTermWeight(double longTermWeight) {
this.longTermWeight = longTermWeight;
}
public double getShortTermWeight() {
return shortTermWeight;
}
public void setShortTermWeight(double shortTermWeight) {
this.shortTermWeight = shortTermWeight;
}
public double getDecayRateSecs() {
return decayRateSecs;
}
public void setDecayRateSecs(double decayRateSecs) {
this.decayRateSecs = decayRateSecs;
}
public String getSvPrefix() {
return svPrefix;
}
public void setSvPrefix(String svPrefix) {
this.svPrefix = svPrefix;
}
public int getNumRecentActions() {
return numRecentActions;
}
private int numRecentItems= 200;
public int getNumRecentItems() {
return numRecentItems;
}
public void setNumRecentItems(int numRecentItems) {
this.numRecentItems = numRecentItems;
}
public void setNumRecentActions(int numRecentActions) {
this.numRecentActions = numRecentActions;
}
public int getMinNumTagsForTopicWeights() {
return minNumTagsForTopicWeights;
}
public void setMinNumTagsForTopicWeights(int minNumTagsForTopicWeights) {
this.minNumTagsForTopicWeights = minNumTagsForTopicWeights;
}
public String toString() {
StringBuilder buf = new StringBuilder();
if (this.recTag != null)
buf.append(" RecTag:").append(recTag);
else
buf.append(" RecTag:").append("DEFAULT");
int count = 1;
for (CF_RECOMMENDER recommender : recommenders)
buf.append(" Recommender").append(count++).append(":").append(recommender.name());
count = 1;
for (CF_SORTER sorter : sorters)
buf.append(" Sorter").append(count++).append(":").append(sorter.name());
count = 1;
for (CF_ITEM_COMPARATOR comparator : itemComparators)
buf.append(" Item Comparator").append(count++).append(":").append(comparator.name());
return buf.toString();
}
@Override
public CFAlgorithm clone() throws CloneNotSupportedException {
return (CFAlgorithm) super.clone();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CFAlgorithm that = (CFAlgorithm) o;
if (transactionActionType != that.transactionActionType) return false;
if (userCFLimit != that.userCFLimit) return false;
if (date != null ? !date.equals(that.date) : that.date != null) return false;
if (itemComparatorStrategy != that.itemComparatorStrategy) return false;
if (itemComparators != null ? !itemComparators.equals(that.itemComparators) : that.itemComparators != null)
return false;
if (name != null ? !name.equals(that.name) : that.name != null) return false;
if (recommenders != null ? !recommenders.equals(that.recommenders) : that.recommenders != null) return false;
if (sorterStrategy != that.sorterStrategy) return false;
if (sorters != null ? !sorters.equals(that.sorters) : that.sorters != null) return false;
if (longTermWeight != that.longTermWeight) return false;
if (shortTermWeight != that.shortTermWeight) return false;
if(numRecentItems != that.numRecentItems) return false;
return true;
}
@Override
public int hashCode() {
int result;
long temp;
result = itemComparators != null ? itemComparators.hashCode() : 0;
result = 31 * result + (itemComparatorStrategy != null ? itemComparatorStrategy.hashCode() : 0);
result = 31 * result + (recommenders != null ? recommenders.hashCode() : 0);
result = 31 * result + (sorters != null ? sorters.hashCode() : 0);
result = 31 * result + (sorterStrategy != null ? sorterStrategy.hashCode() : 0);
result = 31 * result + (date != null ? date.hashCode() : 0);
result = 31 * result + (name != null ? name.hashCode() : 0);
result = 31 * result + userCFLimit;
result = 31 * result + transactionActionType;
temp = longTermWeight != +0.0d ? Double.doubleToLongBits(longTermWeight) : 0L;
result = 31 * result + (int) (temp ^ (temp >>> 32));
temp = shortTermWeight != +0.0d ? Double.doubleToLongBits(shortTermWeight) : 0L;
result = 31 * result + (int) (temp ^ (temp >>> 32));
result = 31 * result + numRecentActions;
return result;
}
public void setParameter(String field, List<String> values) {
try {
//check if it's a multiple value
if(values != null && !values.isEmpty()) {
String value = values.iterator().next();
if("item_comparators".equals(field)) {
List<CF_ITEM_COMPARATOR> list = new ArrayList<>();
for(String val : values) {
list.add(CF_ITEM_COMPARATOR.valueOf(val));
}
setItemComparators(list);
}
else if("item_comparator_strategy".equals(field)) {
setItemComparatorStrategy(CF_STRATEGY.valueOf(value));
}
else if("recommenders".equals(field)) {
List<CF_RECOMMENDER> list = new ArrayList<>();
for(String val : values) {
list.add(CF_RECOMMENDER.valueOf(val));
}
setRecommenders(list);
}
else if("sorters".equals(field)) {
List<CF_SORTER> list = new ArrayList<>();
for(String val : values) {
list.add(CF_SORTER.valueOf(val));
}
setSorters(list);
}
else if("sorter_strategy".equals(field)) {
setSorterStrategy(CF_STRATEGY.valueOf(value));
}
else if ("long_term_cluster_weight".equals(field)){
this.setLongTermWeight(Double.parseDouble(value));
}
else if ("short_term_cluster_weight".equals(field)){
this.setShortTermWeight(Double.parseDouble(value));
}
else if ("recent_articles_sv".equals(field)){
this.setRecentArticlesForSV(Integer.parseInt(value));
}
else if ("tx_history_sv".equals(field)){
this.setTxHistorySizeForSV(Integer.parseInt(value));
}
else if ("tag_attr_id".equals(field)){
this.setTagAttrId(Integer.parseInt(value));
}
else if ("sv_prefix".equals(field)){
this.setSvPrefix(value);
}
else if ("num_recent_actions".equals(field)){
this.setNumRecentActions(Integer.parseInt(value));
}
else {
final String message = "Field : " + field + " not recognized";
logger.error(message, new Exception(message));
}
}
}
catch(Exception e) {
logger.error("Not able to process the field : " + field + " with value: " + values, e);
}
}
public String toLogSorter() {
String res = "";
//CF_SORTER
if(sorters!=null && sorters.size()>0) {
for(CF_SORTER s : sorters) {
res += s.name() + "|";
}
res = res.substring(0,res.length()-1);
}
//CF_STRATEGY
if(sorterStrategy!=null)
res +=";" + sorterStrategy.name();
//CF_POSTPROCESSING
return res;
}
}