/* * Seldon -- open source prediction engine * ======================================= * Copyright 2011-2015 Seldon Technologies Ltd and Rummble Ltd (http://www.seldon.io/) * ********************************************************************************************** * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ********************************************************************************************** */ package io.seldon.recommendation.filters.tag; import io.seldon.recommendation.model.ModelManager; import io.seldon.resources.external.ExternalResourceStreamer; import io.seldon.resources.external.NewResourceNotifier; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Collections; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.log4j.Logger; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import com.fasterxml.jackson.databind.ObjectMapper; @Component public class TagAffinityFilterModelManager extends ModelManager<TagAffinityFilterModelManager.TagAffinityFilterModel> { private static Logger logger = Logger.getLogger(TagAffinityFilterModelManager.class.getName()); private final ExternalResourceStreamer featuresFileHandler; public static final String FILTER_NEW_LOC_PATTERN = "tagcluster"; @Autowired public TagAffinityFilterModelManager(ExternalResourceStreamer featuresFileHandler,NewResourceNotifier notifier) { super(notifier, Collections.singleton(FILTER_NEW_LOC_PATTERN)); this.featuresFileHandler = featuresFileHandler; } private Map<Long,Set<Integer>> loadUserTagClusters(BufferedReader reader) throws IOException { String line; ObjectMapper mapper = new ObjectMapper(); Map<Long,Set<Integer>> userToClustersMap = new ConcurrentHashMap<Long,Set<Integer>>(); while ((line = reader.readLine()) != null) { UserTagCluster userCluster = mapper.readValue(line.getBytes(),UserTagCluster.class); Set<Integer> clusters = userToClustersMap.get(userCluster.user); if (clusters == null) { clusters = new HashSet<Integer>(); userToClustersMap.put(userCluster.user, clusters); } clusters.add(userCluster.cluster); } return userToClustersMap; } private TagAffinityFilterModel loadClusters(String client,Map<Long,Set<Integer>> userToClustersMap,BufferedReader reader) throws IOException, NumberFormatException { String line; ObjectMapper mapper = new ObjectMapper(); Map<Integer,Set<String>> clusterToTagsMap = new ConcurrentHashMap<Integer,Set<String>>(); Map<Integer,Set<Integer>> groupToClustersMap = new ConcurrentHashMap<Integer,Set<Integer>>(); int numTags = 0; while ((line = reader.readLine()) != null) { String[] parts = line.split(","); String tag = parts[0]; int group = Integer.parseInt(parts[1]); int cluster = Integer.parseInt(parts[2]); Set<String> tags = clusterToTagsMap.get(cluster); if (tags == null) { tags = new HashSet<String>(); clusterToTagsMap.put(cluster, tags); } tags.add(tag); numTags++; Set<Integer> clusters = groupToClustersMap.get(group); if (clusters == null) { clusters = new HashSet<Integer>(); groupToClustersMap.put(group, clusters); } clusters.add(cluster); } logger.info("Loaded "+numTags+" tags for "+client); return new TagAffinityFilterModel(groupToClustersMap, clusterToTagsMap, userToClustersMap); } @Override protected TagAffinityFilterModel loadModel(String location, String client) { logger.info("Reloading user tag clusters for client: " + client); try { BufferedReader reader = new BufferedReader(new InputStreamReader(featuresFileHandler.getResourceStream(location + "/part-00000"))); Map<Long,Set<Integer>> userClusters = loadUserTagClusters(reader); reader.close(); reader = new BufferedReader(new InputStreamReader(featuresFileHandler.getResourceStream(location + "/tags.csv"))); TagAffinityFilterModel model = loadClusters(client,userClusters, reader); logger.info("Loaded tag clusters for "+client+" userClusters:"+model.userToClustersMap.size()+" clusters:"+model.clusterToTagsMap.size()+" groups:"+model.groupToClustersMap.size()); return model; } catch (IOException e) { logger.error("Couldn't reloadFeatures for client " + client, e); return null; } catch (Exception e) { logger.error("Couldn't reloadFeatures for client " + client, e); return null; } } public static class TagAffinityFilterModel { final Map<Integer,Set<Integer>> groupToClustersMap; final Map<Integer,Set<String>> clusterToTagsMap; final Map<Long,Set<Integer>> userToClustersMap; public TagAffinityFilterModel( Map<Integer, Set<Integer>> groupToClustersMap, Map<Integer, Set<String>> clusterToTagsMap, Map<Long, Set<Integer>> userToClustersMap) { super(); this.groupToClustersMap = groupToClustersMap; this.clusterToTagsMap = clusterToTagsMap; this.userToClustersMap = userToClustersMap; } } }