/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * PrunedSets.java * Copyright (C) 2009-2010 Aristotle University of Thessaloniki, Thessaloniki, Greece */ package mulan.classifier.transformation; import java.util.ArrayList; import java.util.logging.Level; import java.util.logging.Logger; import mulan.data.LabelSet; import mulan.data.DataUtils; import weka.classifiers.Classifier; import weka.core.Instance; import weka.core.TechnicalInformation; import weka.core.TechnicalInformation.Field; import weka.core.TechnicalInformation.Type; /** * Class that implements the PrunedSets algorithm <p> * * @author Grigorios Tsoumakas * @version June 4, 2010 */ public class PrunedSets extends LabelsetPruning { /** strategies for processing infrequent labelsets*/ public enum Strategy { /** * Strategy A: rank subsets firstly by the number of labels they * contain and secondly by the times they occur, then keep top b ranked */ A, /** * Strategy B: keep all subsets of size greater than b */ B; }; /** strategy for processing infrequent labelsets */ private Strategy strategy; /** parameter of strategy for processing infrequent labelsets*/ private int b; /** * Constructor that initializes learner with base algorithm, parameter p * and strategy for processing infrequent labelsets * * @param classifier base single-label classification algorithm * @param aP number of instances required for a labelset to be included. * @param aStrategy strategy for processing infrequent labelsets * @param aB parameter of the strategy for processing infrequent labelsets */ public PrunedSets(Classifier classifier, int aP, Strategy aStrategy, int aB) { super(classifier, aP); b = aB; strategy = aStrategy; setConfidenceCalculationMethod(2); setMakePredictionsBasedOnConfidences(false); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Read, Jesse and Pfahringer, Bernhard and Holmes, Geoff"); result.setValue(Field.TITLE, "Multi-Label Classification using Ensembles of Pruned Sets"); result.setValue(Field.PAGES, "995-1000"); result.setValue(Field.BOOKTITLE, "Proc. 8th IEEE International Conference on Data Mining (ICDM 2008)"); result.setValue(Field.YEAR, "2008"); return result; } @Override ArrayList<Instance> processRejected(LabelSet ls) { ArrayList<LabelSet> subsets = null; ArrayList<Instance> instances = null; ArrayList<Instance> newInstances = null; switch (strategy) { case A: // split LabelSet into smaller ones //debug System.out.println("original:" + ls.toString()); subsets = null; try { subsets = ls.getSubsets(); } catch (Exception ex) { Logger.getLogger(PrunedSets.class.getName()).log(Level.SEVERE, null, ex); } //System.out.println("subsets: " + subsets.toString()); ArrayList<LabelSet> sortedSubsets = new ArrayList<LabelSet>(); for (LabelSet l : subsets) { //System.out.println(l.toString()); // check if it exists in the training set if (!ListInstancePerLabel.containsKey(l)) { continue; } // check if it occurs more than p times if (ListInstancePerLabel.get(l).size() <= p) { continue; } // boolean added = false; for (int i = 0; i < sortedSubsets.size(); i++) { LabelSet l2 = sortedSubsets.get(i); if (l.size() > l2.size()) { sortedSubsets.add(i, l); //System.out.println("adding " + l.toString()); added = true; break; } if (l.size() == l2.size() && ListInstancePerLabel.get(l).size() > ListInstancePerLabel.get(l2).size()) { sortedSubsets.add(i, l); //System.out.println("adding " + l.toString()); added = true; break; } } if (added == false) { //System.out.println("adding " + l.toString()); sortedSubsets.add(l); } //System.out.println("sorted: " + sortedSubsets.toString()); } // take the top b newInstances = new ArrayList<Instance>(); instances = ListInstancePerLabel.get(ls); for (Instance tempInstance : instances) { int counter = 0; for (LabelSet l : sortedSubsets) { double[] temp = tempInstance.toDoubleArray(); double[] tempLabels = l.toDoubleArray(); for (int i = 0; i < numLabels; i++) { if (format.attribute(labelIndices[i]).value(0).equals("0")) temp[labelIndices[i]] = tempLabels[i]; else temp[labelIndices[i]] = 1 - tempLabels[i]; } Instance newInstance = DataUtils.createInstance(tempInstance, 1, temp); newInstances.add(newInstance); counter++; if (counter == b) { break; } } } return newInstances; case B: // split LabelSet into smaller ones //debug System.out.println("original:" + ls.toString()); subsets = null; try { subsets = ls.getSubsets(); } catch (Exception ex) { Logger.getLogger(PrunedSets.class.getName()).log(Level.SEVERE, null, ex); } ArrayList<LabelSet> subsetsForInsertion = new ArrayList<LabelSet>(); for (LabelSet l : subsets) { // check if it exists in the training set if (!ListInstancePerLabel.containsKey(l)) { continue; } // check if it occurs more than p times if (ListInstancePerLabel.get(l).size() <= p) { continue; } // check if it has more than b elements if (l.size() <= b) { continue; } subsetsForInsertion.add(l); } // insert subsetsForInsertion with corresponding instances // from the original labelset instances = ListInstancePerLabel.get(ls); newInstances = new ArrayList<Instance>(); for (Instance tempInstance : instances) { for (LabelSet l : subsetsForInsertion) { double[] temp = tempInstance.toDoubleArray(); double[] tempLabels = l.toDoubleArray(); for (int i = 0; i < numLabels; i++) { if (format.attribute(labelIndices[i]).value(0).equals("0")) temp[labelIndices[i]] = tempLabels[i]; else temp[labelIndices[i]] = 1 - tempLabels[i]; } Instance newInstance = DataUtils.createInstance(tempInstance, 1, temp); newInstances.add(newInstance); } } return newInstances; default: return null; } } }