/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * HOMER.java * Copyright (C) 2009-2010 Aristotle University of Thessaloniki, Thessaloniki, Greece */ package mulan.classifier.meta; import java.util.Set; import mulan.classifier.MultiLabelLearner; import mulan.classifier.MultiLabelOutput; import mulan.data.MultiLabelInstances; import mulan.data.DataUtils; import mulan.classifier.meta.HierarchyBuilder.Method; import mulan.data.LabelsMetaData; import weka.core.Instance; import weka.core.Instances; import weka.core.TechnicalInformation; import weka.core.TechnicalInformation.*; /** * <!-- globalinfo-start --> * * <pre> * Class implementing the HOMER algorithm * </pre> * * For more information: * * <pre> * G. Tsoumakas, I. Katakis, I. Vlahavas, "Effective and Efficient Multilabel * Classification in Domains with Large Number of Labels", Proc. ECML/PKDD 2008 * Workshop on Mining Multidimensional Data (MMD'08), Antwerp, Belgium, 2008. * </pre> *f * <!-- globalinfo-end --> * * <!-- technical-bibtex-start --> BibTeX: * * <!-- technical-bibtex-end --> * * @author Grigorios Tsoumakas */ public class HOMER extends MultiLabelMetaLearner { private final int numClusters; private HMC hmc; private HierarchyBuilder hb; private Instances header; private Method method; private MultiLabelInstances m; private int numMetaLabels; public HOMER(MultiLabelLearner mll, int clusters, Method method) { super(mll); this.method = method; numClusters = clusters; } @Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { debug("Learning the hierarchy of models"); hb = new HierarchyBuilder(numClusters, method); LabelsMetaData labelHierarchy = hb.buildLabelHierarchy(trainingSet); debug("Constructing the hierarchical multilabel dataset"); MultiLabelInstances meta = HierarchyBuilder.createHierarchicalDataset(trainingSet, labelHierarchy); header = new Instances(meta.getDataSet(), 0); debug("Training the hierarchical classifier"); hmc = new HMC(baseLearner); hmc.setDebug(getDebug()); hmc.build(meta); Set<String> leafLabels = trainingSet.getLabelsMetaData().getLabelNames(); Set<String> metaLabels = labelHierarchy.getLabelNames(); for (String string : leafLabels) { metaLabels.remove(string); } numMetaLabels = metaLabels.size(); } protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { Instance transformed = DataUtils.createInstance(instance, instance.weight(), instance.toDoubleArray()); for (int i = 0; i < numMetaLabels; i++) { transformed.insertAttributeAt(transformed.numAttributes()); } transformed.setDataset(header); MultiLabelOutput mlo = hmc.makePrediction(transformed); boolean[] oldBipartition = mlo.getBipartition(); //System.out.println("old:" + Arrays.toString(oldBipartition)); boolean[] newBipartition = new boolean[numLabels]; System.arraycopy(oldBipartition, 0, newBipartition, 0, numLabels); //System.out.println("new:" + Arrays.toString(newBipartition)); double[] oldConfidences = mlo.getConfidences(); double[] newConfidences = new double[numLabels]; System.arraycopy(oldConfidences, 0, newConfidences, 0, numLabels); MultiLabelOutput newMLO = new MultiLabelOutput(newBipartition, newConfidences); return newMLO; } @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Grigorios Tsoumakas and Ioannis Katakis and Ioannis Vlahavas"); result.setValue(Field.TITLE, "Effective and Efficient Multilabel Classification in Domains with Large Number of Labels"); result.setValue(Field.BOOKTITLE, "Proc. ECML/PKDD 2008 Workshop on Mining Multidimensional Data (MMD'08)"); result.setValue(Field.LOCATION, "Antwerp, Belgium"); result.setValue(Field.YEAR, "2008"); return result; } //spark temporary edit for complexity measures public long getNoNodes() { return hmc.getNoNodes(); } public long getNoClassifierEvals() { return hmc.getNoClassifierEvals(); } public long getTotalUsedTrainInsts() { return hmc.getTotalUsedTrainInsts(); } }