/* * SlopeOne regression algorithm * */ package weka.classifiers.functions; import weka.classifiers.Classifier; import weka.core.Capabilities; import weka.core.Instance; import weka.core.Instances; import weka.core.Utils; import weka.core.WeightedInstancesHandler; import weka.core.Option; import weka.core.OptionHandler; import weka.core.Capabilities.Capability; import weka.filters.Filter; import weka.filters.supervised.attribute.NominalToBinary; import java.util.*; import java.io.*; public class SlopeOne extends Classifier implements WeightedInstancesHandler, OptionHandler { /** Array for storing coefficients of linear regression. */ private double[] m_Coefficients; /** Array for storing the average coefficients of linear regression. */ private double[] m_CoefficientsAverage; /**Number of examples for each couple of attributes */ private double[] m_Frequency; /** Which attributes are relevant? */ private boolean[] m_SelectedAttributes; /** Variable for storing transformed training data. */ private Instances m_TransformedData; /** The filter storing the transformation from nominal to binary attributes. */ private NominalToBinary m_TransformFilter; /** The index of the class attribute */ private int m_ClassIndex; /** checks true of we are dealing woth movielens-like data*/ private boolean movieLensData = false; /**The data stored for easy user-based searches*/ HashMap<Double, HashMap<Double, Double>> dataMovies; /** * Returns a string describing this classifier * @return a description of the classifier suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Slope one regresion algorithm"; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(4); newVector.addElement(new Option("\tSpecifies whether movielens dataset is present.\n" + "\t(default no)", "X", 0, "-X")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Produce debugging output. * (default no debugging output)</pre> * * <pre> -S <number of selection method> * Set the attribute selection method to use. 1 = None, 2 = Greedy. * (default 0 = M5' method)</pre> * * <pre> -C * Do not try to eliminate colinear attributes. * </pre> * * <pre> -R <double> * Set ridge parameter (default 1.0e-8). * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String selectionString = Utils.getOption('X', options); if (selectionString.length() != 0) { movieLensData = true; } else { movieLensData = false; } } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { String[] options = new String[1]; int current = 0; if (movieLensData) { options[current++] = "-X"; options[current++] = "" + movieLensData; } else { options[current++] = "-X false"; } return options; } public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Builds a SlopeOne regression model given the supplied training data. * * @param insts the training data. * @throws Exception if an error occurs */ public void buildClassifier(Instances data) throws Exception { if (movieLensData == false) { getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // Preprocess instances m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(data); data = Filter.useFilter(data, m_TransformFilter); m_TransformedData = data; m_ClassIndex = data.classIndex(); // Turn all attributes on or off m_SelectedAttributes = new boolean[data.numAttributes()]; int j=0; for (int i = 0; i < data.numAttributes(); i++) { if (i != m_ClassIndex /*&& j<newData.numAttributes()&& data.attribute(i).equals(newData.attribute(j))*/) { m_SelectedAttributes[i] = true; ++j; } else m_SelectedAttributes[i] = false; } m_Coefficients = null; m_Frequency = new double[data.numAttributes()]; m_Coefficients = new double[data.numAttributes()]; m_CoefficientsAverage = new double[data.numAttributes()]; Enumeration enu = data.enumerateInstances(); Instance instance; while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); //if(!instance.isMissing(m_ClassIndex)) for (int i = 0; i < instance.numAttributes(); ++i) { if (!instance.isMissing(i) && i != m_ClassIndex&& m_SelectedAttributes[i]) { m_Frequency[i] += instance.weight(); m_Coefficients[i] += (instance.value(m_ClassIndex) - instance.value(i)) * instance.weight(); } } } for (int i = 0; i < data.numAttributes(); ++i) { if(m_Frequency[i]!=0) m_CoefficientsAverage[i] = m_Coefficients[i] / m_Frequency[i]; } } else { m_TransformedData = data; buildClassifierMovieLens(m_TransformedData); } // Save memory //m_TransformedData = new Instances(data, 0); } private void buildClassifierMovieLens(Instances data) { dataMovies = new HashMap<Double,HashMap<Double, Double>>(); //dataMoviesMov = new HashMap<Double,HashMap<Double, Double>>(); Enumeration enu = data.enumerateInstances(); Instance instance; while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); for (int i = 0; i < 3; ++i) { if (!dataMovies.containsKey(instance.value(0))) { dataMovies.put(instance.value(0), new HashMap<Double, Double>()); } //TODO dataMoviesMov dataMovies.get(instance.value(0)).put(instance.value(1), instance.value(2)); } } } public double classifyInstance(Instance instance) throws Exception { if (movieLensData) { return regressionPredictionMovieLens(instance); } // Transform the input instance Instance transformedInstance = instance; m_TransformFilter.input(transformedInstance); m_TransformFilter.batchFinished(); transformedInstance = m_TransformFilter.output(); // Calculate the dependent variable from the regression model return regressionPrediction(transformedInstance); } private double regressionPrediction(Instance transformedInstance) throws Exception { double result = 0; int column = 0; for (int j = 0; j < transformedInstance.numAttributes(); j++) { if ((m_ClassIndex != j) && (m_SelectedAttributes[j]) && !transformedInstance.isMissing(j)) { result = result + (m_CoefficientsAverage[j] + transformedInstance.value(j)) * m_Frequency[j]; column += m_Frequency[j]; } } result /= column; return result; } private double regressionPredictionMovieLens(Instance input) { HashMap<Double, Double> ratingsMade = new HashMap<Double,Double>(dataMovies.get(input.value(0))); //HashMap<Double, Double> usersRating = new HashMap<Double,Double>(dataMovies.get(input.value(0))); /* if (ratingsMade.containsKey(input.value(1))) { return ratingsMade.get(input.value(1)); } */ double result = 0; double goalMovie = input.value(1); double myUser = input.value(0); LinkedList<Double> closeUsers=new LinkedList<Double>(); Enumeration enumer = m_TransformedData.enumerateInstances(); Instance instance; /*Mean difference and Freaquency in movies-ratings data*/ HashMap<Double, Double> diffArr=new HashMap<Double, Double>(); HashMap<Double, Double> diffArrNum=new HashMap<Double, Double>(); while (enumer.hasMoreElements()) { instance = (Instance) enumer.nextElement(); for (int i = 0; i < 3; ++i) { if (ratingsMade.containsKey(instance.value(1)) && instance.value(1) != goalMovie && instance.value(0)!=myUser) { if (!closeUsers.contains(instance.value(0)) && dataMovies.get(instance.value(0)).containsKey(goalMovie)) { closeUsers.add(instance.value(0)); } } } } HashMap<Double, Double> ratings; while (!closeUsers.isEmpty()) { ratings = dataMovies.get(closeUsers.pop()); for (Double count : ratings.keySet()) { if (ratingsMade.containsKey(count) && count != goalMovie) { if (diffArr.containsKey(count)) { double tmp = diffArr.remove(count); diffArr.put(count,tmp + ratings.get(count) - ratings.get(goalMovie)); tmp = diffArrNum.remove(count); diffArrNum.put(count, tmp + 1); } else { diffArr.put(count, ratings.get(count) - ratings.get(goalMovie)); diffArrNum.put(count, 1.0); } } } } int count=0; for (Double cnt : ratingsMade.keySet()) { if(diffArr.containsKey(cnt)) { result += ratingsMade.get(cnt) - diffArr.get(cnt) / diffArrNum.get(cnt); ++count; } } return result/count; } public String toString() { if (m_TransformedData == null) { return "SlopeOne: No model built yet."; } try { StringBuffer text = new StringBuffer(); text.append("\nSlopeOne Regression Model\n\n"); if(!movieLensData) { boolean first = true; int num_Att = 0; text.append(m_TransformedData.classAttribute().name() + " = (\n\n"); for (int i = 0; i < m_TransformedData.numAttributes(); i++) { if ((i != m_ClassIndex) && (m_SelectedAttributes[i])) { if (!first) { text.append(" +\n\n"); } else { first = false; } text.append(m_TransformedData.attribute(i).name() + " "); if (m_CoefficientsAverage[i] > 0) { text.append("+"); } text.append(Utils.doubleToString(m_CoefficientsAverage[i], 4)); ++num_Att; } } text.append(" ) /" + num_Att + ";\n"); } else text.append("MovieLens Data Detected."); return text.toString(); } catch (Exception e) { return "Can't print SlopeOne Regression!"; } } public static void main(String argv[]) { runClassifier(new SlopeOne(), argv); } }