package edu.umn.cs.recsys; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; import edu.umn.cs.recsys.dao.ItemTagDAO; import org.grouplens.lenskit.core.Transient; import org.grouplens.lenskit.vectors.MutableSparseVector; import javax.inject.Inject; import java.util.Map; import java.util.Set; /** * A vocabulary of tags. This is a recommender component that provides access to the set of * tags and makes tag vector operations easier. It normalizes tags to be case-insensitive. * * @author <a href="http://www.grouplens.org">GroupLens Research</a> */ public class TagVocabulary { private final Map<String, Long> tagMap; @Inject public TagVocabulary(@Transient ItemTagDAO tagDAO) { long id = 1; ImmutableMap.Builder<String,Long> bld = ImmutableMap.builder(); Set<String> seen = Sets.newHashSet(); for (String tag: tagDAO.getTagVocabulary()) { String normed = tag.toLowerCase(); if (!seen.contains(normed)) { seen.add(normed); bld.put(normed, id); id += 1; } } tagMap = bld.build(); } public MutableSparseVector newTagVector() { return MutableSparseVector.create(tagMap.values()); } /** * Query whether the vocabulary has a tag. * @param tag The tag to query for. * @return {@code true} if the vocabulary has the tag. */ public boolean hasTag(String tag) { return tagMap.containsKey(tag.toLowerCase()); } /** * Get the ID for a tag. * @param tag The tag to query for. * @return The id for the tag. * @throws IllegalArgumentException if the tag is not found. */ public long getTagId(String tag) { Long id = tagMap.get(tag.toLowerCase()); if (id == null) { throw new IllegalArgumentException("tag not found: " + tag); } else { return id; } } }