/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Seeder.java * Copyright (C) 2001 Sugato Basu, Mikhail Bilenko * */ package weka.clusterers; import weka.core.*; import java.util.*; public class Seeder extends ArrayList { /** Stores the mapping between all possible seeds and their cluster assignments */ protected HashMap m_TotalSeedHash; /** Stores the current instances which are set as seeds */ protected ArrayList m_CurrentSeedInstances; /** Verbose? */ protected boolean m_Verbose = false; /* Constructor */ public Seeder(HashMap totalSeedHash) { m_TotalSeedHash = totalSeedHash; } /** * set the verbosity level of the clusterer * @param verbose messages on(true) or off (false) */ public void setVerbose (boolean verbose) { m_Verbose = verbose; } /** Constructor * @param dataWithClass: Data set which has class information in it * @param dataWithoutClass: dataWithClass dataset with class information removed */ public Seeder(Instances dataWithoutClass, Instances dataWithClass) throws Exception{ int hashSize = (int) (dataWithClass.numInstances()/0.75 + 10); // Java API recommendations int classIndex = dataWithClass.classIndex(); m_TotalSeedHash = new HashMap (hashSize); if (classIndex < 0) { throw new WekaException ("Need class information in data set"); } if (dataWithClass.numInstances() != dataWithoutClass.numInstances()) { throw new WekaException ("Both datasets should have same size"); } if (m_Verbose) { System.out.println("Total seed hash table ...\n"); } for (int i = 0; i < dataWithoutClass.numInstances(); i++) { Instance instWithClass = dataWithClass.instance(i); Instance instWithoutClass = dataWithoutClass.instance(i); m_TotalSeedHash.put(instWithoutClass, new Integer((int) instWithClass.classValue())); if (m_Verbose) { System.out.println("Inserting key: " + instWithoutClass + " and value: " + instWithClass.classValue()); } } } /** Set the current seeds */ public void createSeeds (ArrayList seed_data) { m_CurrentSeedInstances = seed_data; } /** Returns the total hashMap, with the instance to cluster assignment mapping for all the seeds * * @return the total hashMap */ public HashMap getAllSeeds() throws Exception { return m_TotalSeedHash; } /** Returns a hashMap with the instance to cluster assignment mapping for the current seeds * * @return the seed hashMap */ public HashMap getSeeds() throws Exception { int hashSize = (int) (m_CurrentSeedInstances.size()/0.75 + 10); // Java API recommendations HashMap returnHash = new HashMap(hashSize); for (int i=0; i<m_CurrentSeedInstances.size(); i++) { Instance seed = (Instance) m_CurrentSeedInstances.get(i); if(!m_TotalSeedHash.containsKey(seed)) throw new Exception("Seed does not have an entry in the totalSeedHash"); returnHash.put(seed, m_TotalSeedHash.get(seed)); } return returnHash; } }