package org.styloot.hobo.itemfinders; import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.styloot.hobo.*; import org.styloot.hobo.itemfinders.*; import org.styloot.hobo.iterators.*; public class SubsetIndexItemFinder implements ItemFinder { private static final Logger log = LoggerFactory.getLogger(SubsetIndexItemFinder.class); private static int MIN_SUBSET_ITEM_COUNT = 50; public SubsetIndexItemFinder(Collection<Item> myItems, String cat) { items = new Vector<Item>(myItems); Collections.sort(items); category = cat; //First cook up map of FeatureSet to Items Map<FeatureSet,Vector<Item>> featuresToItemsMapTemp = new HashMap<FeatureSet,Vector<Item>>(); for (Item i : items) { FeatureSet f = new FeatureSet(i.features); if (!featuresToItemsMapTemp.containsKey(f)) { featuresToItemsMapTemp.put(f, new Vector<Item>()); } featuresToItemsMapTemp.get(f).add(i); } //Filter off small subsets Vector<Item> oddItems = new Vector<Item>(); for (FeatureSet f : featuresToItemsMapTemp.keySet()) { int size = featuresToItemsMapTemp.get(f).size(); if (size < MIN_SUBSET_ITEM_COUNT) { oddItems.addAll(featuresToItemsMapTemp.get(f)); } else { featuresToItemsMap.put(f, featuresToItemsMapTemp.get(f)); } } //Now featuresToItemsMap is created. //Create oddItemsIndex oddItemsFinder = new ShallowIndexItemFinder(oddItems, cat); //Now build index of Feature->FeatureSets for (FeatureSet f : featuresToItemsMap.keySet()) { for (Feature feature : f.features) { if (!featureToFeatureSetsMap.containsKey(feature)) { featureToFeatureSetsMap.put(feature, new HashSet<FeatureSet>()); } featureToFeatureSetsMap.get(feature).add(f); } } for (Feature feature : featureToFeatureSetsMap.keySet()) { //Now convert from HashSets to vectors Collection<FeatureSet> featureSets = featureToFeatureSetsMap.get(feature); featureToFeatureSetsMap.put(feature, new Vector<FeatureSet>(featureSets)); } log.info("Created SubsetIndexItemFinder for " + category + " with " + numSubsets() + " subsets, " + size() + " items and " + oddItemsFinder.size() + " odd items."); } private Vector<Item> items; private String category; private final ShallowIndexItemFinder oddItemsFinder; public int size() { return items.size(); } public int numSubsets() { return featuresToItemsMap.size(); } public Iterator<Item> getItems() { return items.iterator(); } public Iterator<Item> find(Collection<String> featuresAsStrings, CIELabColor color, double distance, int minPrice, int maxPrice) { if (featuresAsStrings == null || featuresAsStrings.size() == 0) { return findNoFeatures(color, distance, minPrice, maxPrice); } Vector<Iterator<Item>> iterators = new Vector<Iterator<Item>>(); //Make sure iterators contains oddItems iterators.add( oddItemsFinder.find(featuresAsStrings, color, distance, minPrice, maxPrice) ); Feature[] features = Feature.getFeatures(featuresAsStrings); for (FeatureSet f : getFeatureSetsContainingFeature(features)) { Iterator<Item> iterator = featuresToItemsMap.get(f).iterator(); iterators.add( filterCostColor(iterator, color, distance, minPrice, maxPrice) ); } if (iterators.size() > 1) { return new CombinedIterator(iterators); } if (iterators.size() == 1) { //Avoid overhead of CombinedIterator if there is only 1 return iterators.get(0); } return new Iterator<Item>() { //An Anonymous null iterator class - has no items in it. public boolean hasNext() { return false; } public Item next() { throw new NoSuchElementException(); } public void remove() { throw new UnsupportedOperationException("Remove not implemented"); } }; }; private Iterator<Item> filterCostColor(Iterator<Item> iterator, CIELabColor color, double distance, int minPrice, int maxPrice) { iterator = CostFilterIterator.wrap(iterator, minPrice, maxPrice); iterator = ColorFilterIterator.wrap(iterator, color, distance); return iterator; } private Iterator<Item> findNoFeatures(CIELabColor color, double distance, int minPrice, int maxPrice) { return filterCostColor(getItems(), color, distance, minPrice, maxPrice); } private Collection<FeatureSet> getFeatureSetsContainingFeature(Feature[] features) { //Precondition - features are *assumed* to be sorted //This will not work if they aren't. Collection<FeatureSet> smallestSetOfPossibleFeatureSets = null; int maxSize = Integer.MAX_VALUE; for (Feature feature : features) { Collection<FeatureSet> featureCollection = featureToFeatureSetsMap.get(feature); if (featureCollection == null) { //In this case, at least one of our features is not contained in any feature set. return new Vector<FeatureSet>(); //SmallestSetOfPossibleFeatureSets should be null here. } if (featureCollection.size() < maxSize) { maxSize = featureCollection.size(); smallestSetOfPossibleFeatureSets = featureCollection; } } if (smallestSetOfPossibleFeatureSets == null) { return new Vector<FeatureSet>(); } //We now have the minimal set of feature sets, i.e. every possible superset of features is contained in smallestSetOfPossibleFeatureSets. //Lets throw away the items that don't contain all features. Vector<FeatureSet> result = new Vector<FeatureSet>(); for (FeatureSet f : smallestSetOfPossibleFeatureSets) { if (f.containsSorted(features)) { result.add(f); } } return result; } private final Map<FeatureSet,Vector<Item>> featuresToItemsMap = new HashMap<FeatureSet,Vector<Item>>(); private final Map<Feature,Collection<FeatureSet>> featureToFeatureSetsMap = new HashMap<Feature,Collection<FeatureSet>>(); private static class FeatureSet { public FeatureSet(Feature[] features) { this.features = features; Arrays.sort(this.features); } public boolean containsSorted(Feature[] feats) { return Util.isSubsetSorted(feats, features); } @Override public int hashCode() { int result = 0; for (Feature f : features) { result += f.getId(); } return result; } @Override public boolean equals(Object otherObj) { FeatureSet other = (FeatureSet) otherObj; if (features.length != other.features.length) { return false; } for (int i=0;i<features.length;i++) { if (features[i] != other.features[i]) { return false; } } return true; } private Feature[] features; } }