/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Antonio Alejandro Tortosa (University of Granada) 01/07/2008 * @author Modified by Xavi Sol� (La Salle, Ram�n Llull University - Barcelona) 12/12/2008 * @version 1.1 * @since JDK1.2 * </p> */ package keel.Algorithms.Rule_Learning.C45Rules; import java.util.Enumeration; import java.util.Vector; import keel.Dataset.*; public class MyDataset{ /** * <p> * Class to implement the dataset * </p> */ /** The name of the dataset. */ protected String name = ""; /** The attributes. */ protected Vector attributes; /** The itemsets. */ protected Vector itemsets; /** The index of the class attribute. */ protected int classIndex; /** Keel dataset InstanceSet **/ protected InstanceSet IS; private double totalCond; //Number of possible conditions /** Function to read the .dat file that contains the information of the dataset. * * @param name The reader object where the itemsets are readed. * @param train The flag if the file is for training */ public MyDataset( String name, boolean train ) { try { // create the set of instances IS = new InstanceSet(); // Read the itemsets. IS.readSet(name,train); } catch (DatasetException e) { System.out.println("Error loading dataset instances"); e.printStackTrace(); System.exit(-1); } catch (HeaderFormatException e) { System.out.println("Error loading dataset instances"); e.printStackTrace(); System.exit(-1); } //Store Dataset file attributes readHeader(); itemsets = new Vector( IS.getNumInstances() ); // read all the itemsets getItemsetFull(); totalCond=this.numAllConditions(); } /** Constructor that copies another dataset. * * @param dataset The dataset to be copied. */ public MyDataset( MyDataset dataset ) { this( dataset, dataset.numItemsets() ); dataset.copyItemsets( 0, this, dataset.numItemsets() ); totalCond=dataset.totalCond; } /** Constructor to copy all the attributes of another dataset but the itemsets. * * @param dataset The dataset to be copied. * @param capacity The number of itemsets. */ public MyDataset( MyDataset dataset, int capacity ) { if ( capacity < 0 ) capacity = 0; classIndex = dataset.classIndex; name = dataset.getName(); attributes = dataset.attributes; itemsets = new Vector( capacity ); totalCond=dataset.totalCond; } /** Function to stores header of a data file. * */ private void readHeader( ) { String attributeName; Vector attributeValues; int i; name = Attributes.getRelationName(); // Create vectors to hold information temporarily. attributes = new Vector(); Attribute at; // store attribute inputs and of the header for (int j =0; j<Attributes.getInputNumAttributes(); j++) { at=Attributes.getInputAttribute(j); attributeName = at.getName(); // check if it is real if(at.getType()==2) { float min = (float) at.getMinAttribute(); float max = (float) at.getMinAttribute(); attributes.addElement( new MyAttribute( attributeName, j ) ); MyAttribute att = (MyAttribute)attributes.elementAt( j ); att.setRange( min, max ); att.activate(); } else { if(at.getType()==1) // check if it is integer { int min = (int) at.getMinAttribute(); int max = (int) at.getMinAttribute(); attributes.addElement( new MyAttribute( attributeName, j ) ); MyAttribute att = (MyAttribute)attributes.elementAt( j ); att.setRange( min, max ); att.activate(); } else // it is nominal { attributeValues = new Vector(); for(int k=0; k<at.getNumNominalValues();k++) { attributeValues.addElement(at.getNominalValue(k)); } attributes.addElement( new MyAttribute( attributeName, attributeValues, j ) ); MyAttribute att = (MyAttribute)attributes.elementAt( j ); att.activate(); } } }//for // store outputs of the header at=Attributes.getOutputAttribute(0); attributeName = at.getName(); int j = Attributes.getNumAttributes() - 1; // check if it is real if(at.getType()==2) { float min = (float) at.getMinAttribute(); float max = (float) at.getMinAttribute(); attributes.addElement( new MyAttribute( attributeName, j ) ); MyAttribute att = (MyAttribute)attributes.elementAt( j ); att.setRange( min, max ); att.activate(); } else { if(at.getType()==1) // check if it is integer { int min = (int) at.getMinAttribute(); int max = (int) at.getMinAttribute(); attributes.addElement( new MyAttribute( attributeName, j ) ); MyAttribute att = (MyAttribute)attributes.elementAt( j ); att.setRange( min, max ); att.activate(); } else // it is nominal { attributeValues = new Vector(); for(int k=0; k<at.getNumNominalValues();k++) { attributeValues.addElement(at.getNominalValue(k)); } attributes.addElement( new MyAttribute( attributeName, attributeValues, j ) ); MyAttribute att = (MyAttribute)attributes.elementAt( j ); att.activate(); } } // set the index of the output class classIndex = Attributes.getNumAttributes() - 1; } /** Function to read an itemset and appends it to the dataset. * * @return True if the itemset was readed succesfully. */ private boolean getItemsetFull( ) { //fill itemset for( int j=0; j<IS.getNumInstances();j++) { double[] itemset = new double[Attributes.getNumAttributes()]; int index; // Get values for all input attributes. for ( int i = 0; i < Attributes.getInputNumAttributes(); i++ ) { // check type and if there is null if(IS.getInstance(j).getInputMissingValues(i)) itemset[i] = Itemset.getMissingValue(); else { if(Attributes.getInputAttribute(i).getType()==0) //nominal { for(int k=0; k<Attributes.getInputAttribute(i).getNumNominalValues();k++ ) if(Attributes.getInputAttribute(i).getNominalValue(k).equals( IS.getInstance(j).getInputNominalValues(i) )) itemset[i]=(double)k; } else // real and integer { itemset[i]=IS.getInstance(j).getInputRealValues(i); } } // else } //for // Get values for output attribute. int i=Attributes.getInputNumAttributes(); //check type and if there is null if(IS.getInstance(j).getOutputMissingValues(0)) itemset[i] = Itemset.getMissingValue(); else { if(Attributes.getOutputAttribute(0).getType()==0) //nominal { for(int k=0; k<Attributes.getOutputAttribute(0).getNumNominalValues();k++ ) if(Attributes.getOutputAttribute(0).getNominalValue(k).equals( IS.getInstance(j).getOutputNominalValues(0) )) itemset[i]=(double)k; } else // real and integer { itemset[i]=IS.getInstance(j).getOutputRealValues(0); } } // else // Add itemset to dataset addItemset( new Itemset( 1, itemset ) ); }// for return true; } /** Function to add one itemset. * * @param itemset The itemset to add to the dataset. */ public final void addItemset( Itemset itemset ) { Itemset newItemset = (Itemset)itemset.copy(); newItemset.setDataset( this ); itemsets.addElement( newItemset ); } /** Returns the name of the dataset. * @return the name of the dataset. */ public String getName() { return name; } /** Returns the attribute that has the index. * * @param index int The index of the attribute. * @return the attribute that has the index. */ public final MyAttribute getAttribute( int index ) { return (MyAttribute) attributes.elementAt( index ); } /** Returns the attribute that has the name. * * @param name String The name of the attribute. * @return the attribute that has the name. */ public final MyAttribute getAttribute( String name ) { for ( int i = 0; i < attributes.size(); i++ ) if ( ( (MyAttribute)attributes.elementAt( i ) ).name().equalsIgnoreCase( name ) ) return (MyAttribute) attributes.elementAt( i ); return null; } /** Returns class attribute. * @return class attribute */ public final MyAttribute getClassAttribute() { if ( classIndex < 0 ) { System.err.println("Class index wrong:"+classIndex); return null; } return getAttribute( classIndex ); } /** Returns the index of the class attribute. * @return the index of the class attribute. */ public final int getClassIndex() { return classIndex; } /** Returns the number of attributes. * @return the number of attributes. */ public final int numAttributes() { return attributes.size(); } /** Returns the number of possible values of the class attribute. * @return the number of possible values of the class attribute. */ public final int numClasses() { if ( classIndex < 0 ) { System.err.println("Class index wrong:"+classIndex); return -1; } return getClassAttribute().numValues(); } /** Returns the number of itemsets. * @return the number of itemsets. */ public final int numItemsets() { return itemsets.size(); } /** Function to remove an itemset at the given position. * * @param index The index of the itemset to be deleted. */ public final void delete( int index ) { itemsets.removeElementAt( index ); } /** Function to remove all the attributes with missing value in the given attribute. * * @param attIndex The index of the attribute. */ public final void deleteWithMissing( int attIndex ) { Vector newItemsets = new Vector( numItemsets() ); for ( int i = 0; i < numItemsets(); i++ ) if ( !itemset(i).isMissing( attIndex ) ) newItemsets.addElement( itemset( i ) ); itemsets = newItemsets; } /** Enumerates all the attributes. * * @return An enumeration that contains all the attributes. */ public Enumeration enumerateAttributes() { Vector help = new Vector( attributes.size() - 1 ); for ( int i = 0; i < attributes.size(); i++ ) if ( i != classIndex ) help.addElement( attributes.elementAt( i ) ); return help.elements(); } /** Enumerates all the itemsets. * * @return An enumeration that contains all the itemsets. */ public final Enumeration enumerateItemsets() { return itemsets.elements(); } /** Returns the itemset at the given position. * * @param index The index of the itemset. * @return the itemset at the given position. */ public final Itemset itemset( int index ) { return (Itemset)itemsets.elementAt( index ); } /** Returns the last itemset. * * @return the last itemset */ public final Itemset lastItemset() { return (Itemset)itemsets.lastElement(); } /** Function to add the instances of one set to the end of another. * * @param from The index of the first that is going to be copied. * @param dest The dataset where the itemsets are going to be copied. * @param num The number of itemsets to copy. */ private void copyItemsets( int from, MyDataset dest, int num ) { for ( int i = 0; i < num; i++ ) dest.addItemset( itemset( from + i ) ); } /** Function to compute the sum of all the weights of the itemsets. * * @return The weight of all the itemsets. */ public final double sumOfWeights() { double sum = 0; for ( int i = 0; i < numItemsets(); i++ ) sum += itemset( i ).getWeight(); return sum; } /** Function to sort the dataset based on an attribute. * * @param attIndex The index of the attribute. */ public final void sort( int attIndex ) { int i, j; // move all dataset with missing values to end j = numItemsets() - 1; i = 0; while ( i <= j ) { if ( itemset( j ).isMissing( attIndex ) ) j--; else { if ( itemset( i ).isMissing( attIndex ) ) { swap( i, j ); j--; } i++; } } quickSort( attIndex, 0, j ); } /** Function to implementate the quicksort method. * * @param attIndex The index of the attribute used to sort the itemsets. * @param lo0 Minimum value. * @param hi0 Maximum value. */ private void quickSort( int attIndex, int lo0, int hi0 ) { int lo = lo0, hi = hi0; double mid, midPlus, midMinus; if ( hi0 > lo0 ) { // Arbitrarily establishing partition element as the // midpoint of the array. mid = itemset( ( lo0 + hi0 ) / 2 ).getValue( attIndex ); midPlus = mid + 1e-6; midMinus = mid - 1e-6; // loop through the array until indices cross while( lo <= hi ) { // find the first element that is greater than or equal to // the partition element starting from the left Index. while ( ( itemset( lo ).getValue( attIndex ) < midMinus ) && ( lo < hi0 ) ) ++lo; // find an element that is smaller than or equal to // the partition element starting from the right Index. while ( ( itemset( hi ).getValue( attIndex ) > midPlus ) && ( hi > lo0 ) ) --hi; // if the indexes have not crossed, swap if( lo <= hi ) { swap( lo,hi ); ++lo; --hi; } } // If the right index has not reached the left side of array // must now sort the left partition. if( lo0 < hi ) quickSort( attIndex, lo0, hi ); // If the left index has not reached the right side of array // must now sort the right partition. if( lo < hi0 ) quickSort( attIndex, lo, hi0 ); } } /** Function to swap two itemsets. * * @param i The first itemset. * @param j The second itemset. */ private void swap( int i, int j ) { Object help = itemsets.elementAt( i ); itemsets.insertElementAt( itemsets.elementAt( j ), i ); itemsets.removeElementAt( i + 1 ); itemsets.insertElementAt( help, j ); itemsets.removeElementAt( j + 1 ); } /*******************NEW METHODS*************************************/ /** * Compute the number of all possible conditions that could * appear in a rule of a given data. For nominal attributes, * it's the number of values that could appear; for numeric * attributes, it's the number of values * 2, i.e. <= and >= * are counted as different possible conditions. * * @return number of all conditions of the data */ public double numAllConditions(){ double total = 0; for(int i=0;i<Attributes.getInputNumAttributes();i++){ Attribute att= Attributes.getInputAttribute(i); if(att.getType()==Attribute.NOMINAL) total += (double)att.getNumNominalValues(); else total += 2.0 * (double) numDistinctValues(i); } return total; } /** * Compute the number of all possible conditions that could * appear in a rule of a given data. For nominal attributes, * it's the number of values that could appear; for numeric * attributes, it's the number of values * 2, i.e. <= and >= * are counted as different possible conditions. * * @param attIndex the attribute' index * @return number of all conditions of the data */ public double numAllConditions(int attIndex){ Attribute att= Attributes.getInputAttribute(attIndex); if(att.getType()==Attribute.NOMINAL) return (double)att.getNumNominalValues(); else return 2.0 * (double) numDistinctValues(attIndex); } /** * Returns the number of distinct values of a given attribute. * Returns the number of instances if the attribute is a * string attribute. The value 'missing' is not counted. * * @param attIndex the attribute (index starts with 0) * @return the number of distinct values of a given attribute */ public int numDistinctValues(int attIndex) { MyAttribute att= getAttribute(attIndex); if (att.isContinuous()) { double [] attVals = attributeToDoubleArray(attIndex); Utilities.mergeSort(attVals,attVals.length); double prev = 0; int counter = 0; for (int i = 0; i < size(); i++) { if (attVals[i]==Double.NaN) { break; } if ((i == 0) || (attVals[i] > prev)) { prev = attVals[i]; counter++; } } return counter; } else { return att.numValues(); } } /** * Returns the number of possible conditions. * @return the number of possible conditions. */ public double getNumCond(){ return totalCond; } /** * Gets the value of all instances in this dataset for a particular * attribute. Useful in conjunction with Utils.sort to allow iterating * through the dataset in sorted order for some attribute. * * @param index the index of the attribute. * @return an array containing the value of the desired attribute for * each instance in the dataset. */ public double [] attributeToDoubleArray(int index) { double [] result = new double[this.size()]; for (int i = 0; i < result.length; i++) { result[i] = itemset(i).getValue(index); } return result; } /** * It filters the instances covered by a simple rule from this dataset; * i.e., it deactivates the instances not covered by that rule. * @param mask Mask the mask with the active entries of the dataset * @param A int attribute's id * @param V double attribute's value * @param operator int rule operator. It could be: Rule.EQUAL(for discret attributes), * Rule.GREATER (>) or Rule.LOWER(<=) */ public void filter(Mask mask,int A,double V,int operator){ mask.resetIndex(); while(mask.next()){ //if (X[mask.getIndex()][A]!=V) if (((Itemset)itemsets.elementAt(mask.getIndex())).isMissing(A)){ mask.reset(); } else{ if (operator == Rule.EQUAL && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) != V) mask.reset(); if (operator == Rule.GREATER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) <= V) mask.reset(); if (operator == Rule.LOWER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) > V) mask.reset(); } } } /** * It filters the instances covered by a simple rule from this dataset; * i.e., it deactivates the instances not covered by that rule. * @param mask Mask the mask with the actives entries of the dataset * @param sr SimpleRule the rule */ public void filter(Mask mask,SimpleRule sr){ int A=sr.getAttribute(); double V=sr.getValue(); int operator=sr.getOperator(); mask.resetIndex(); while(mask.next()){ //if (X[mask.getIndex()][sr.getAttribute()]!=sr.getValue()) if (((Itemset)itemsets.elementAt(mask.getIndex())).isMissing(A)){ mask.reset(); } else{ if (operator == Rule.EQUAL && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) != V) mask.reset(); if (operator == Rule.GREATER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) <= V) mask.reset(); if (operator == Rule.LOWER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) > V) mask.reset(); } } } /** * It filters the instances covered by a rule from this dataset; * i.e., it deactivates the instances not covered by that rule. * @param mask Mask the mask with the active entries of the dataset * @param rule Rule the rule */ public void filter(Mask mask,Rule rule){ for (int i=0;i<rule.size();i++) this.filter(mask, rule.getSimpleRule(i)); } /** * It filters the instances covered by a set of rule from this dataset; * i.e., it deactivates the instances not covered by that ruleset. * @param mask Mask the mask with the active entries of the dataset * @param rules Ruleset the ruleset */ public void filter(Mask mask,Ruleset rules){ Mask previous=new Mask(this.itemsets.size(),false); for (int i=0;i<rules.size();i++){ Mask current=mask.copy(); filter(current,rules.getRule(i)); previous=previous.or(current); } previous.copyTo(mask); } /** * It filters the instances covered by a set of rule from this dataset; * i.e., it deactivates the instances not covered by that ruleset. * @param mask Mask the mask with the active entries of the dataset * @param rules Ruleset the ruleset * @param ignore int the algorithm ignores the i-th rule of the ruleset */ public void filter(Mask mask,Ruleset rules,int ignore){ Mask previous=new Mask(this.itemsets.size(),false); for (int i=0;i<rules.size();i++){ if (i!=ignore){ Mask current = mask.copy(); filter(current, rules.getRule(i)); previous = previous.or(current); } } previous.copyTo(mask); } /** * It filters the instances of a given class from this dataset; * i.e., it deactivates the instances from the other class. * @param mask Mask the mask whit the active entries of the dataset * @param class_name String the name of the class */ public void filterByClass(Mask mask,String class_name){ double class_id=this.getAttribute(this.classIndex).valueIndex(class_name); mask.resetIndex(); while (mask.next()) { //if (!output[mask.getIndex()].equals(value)) if (((Itemset) itemsets.elementAt(mask.getIndex())).getClassValue()!=class_id) mask.reset(); } } /** * It substracts the instances covered by a simple rule from this dataset; * i.e., it deactivates the instances covered by that rule. * @param mask Mask the mask with the active entries of the dataset * @param A int attribute's id * @param V double attribute's value * @param operator int rule operator. It could be: Rule.EQUAL(for discret attributes), * Rule.GREATER (>) or Rule.LOWER(<=) */ public void substract(Mask mask,int A,double V,int operator){ mask.resetIndex(); while(mask.next()){ //if (X[mask.getIndex()][A]==V) if (!((Itemset)itemsets.elementAt(mask.getIndex())).isMissing(A)){ if (operator == Rule.EQUAL && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) == V) mask.reset(); if (operator == Rule.GREATER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) > V) mask.reset(); if (operator == Rule.LOWER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) <= V) mask.reset(); } } } /** * It substracts the instances covered by a simple rule from this dataset; * i.e., it deactivates the instances covered by that rule. * @param mask Mask the mask with the active entries of the dataset * @param sr SimpleRule the rule */ public void substract(Mask mask,SimpleRule sr){ mask.resetIndex(); int A=sr.getAttribute(); double V=sr.getValue(); int operator=sr.getOperator(); while(mask.next()){ //if (X[mask.getIndex()][sr.getAttribute()]==sr.getValue()) if (!((Itemset)itemsets.elementAt(mask.getIndex())).isMissing(A)){ if (operator == Rule.EQUAL && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) == V) mask.reset(); if (operator == Rule.GREATER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) > V) mask.reset(); if (operator == Rule.LOWER && ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) <= V) mask.reset(); } } } /** * It substracts the instances covered by a rule from this dataset; * i.e., it deactivates the instances covered by that rule. * @param mask Mask the mask with the active entries of the dataset * @param rule Rule the rule */ public void substract(Mask mask,Rule rule){ mask.resetIndex(); while(mask.next()){ boolean seguir=true; for (int i = 0; i < rule.size() && seguir; i++){ int A=rule.getSimpleRule(i).getAttribute(); double V=rule.getSimpleRule(i).getValue(); int operator=rule.getSimpleRule(i).getOperator(); if (((Itemset)itemsets.elementAt(mask.getIndex())).isMissing(A)){ seguir=false; } else{ if (operator == Rule.EQUAL) seguir = ( ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) == V); if (operator == Rule.GREATER) seguir = ( ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) > V); if (operator == Rule.LOWER) seguir = ( ( (Itemset) itemsets.elementAt(mask.getIndex())).getValue(A) <= V); } } if (seguir) mask.reset(); } } /** * It substracts the instances covered by a set of rule from this dataset; * i.e., it deactivates the instances covered by that ruleset. * @param mask Mask the mask with the active entries of the dataset * @param rules Ruleset the set of rules */ public void substract(Mask mask,Ruleset rules){ for (int i=0;i<rules.size();i++){ substract(mask,rules.getRule(i)); } } /** * It substracts the instances covered by a set of rules from this dataset; * i.e., it deactivates the instances covered by that ruleset. * This method allows to ignore a rule of the set. * @param mask Mask the mask with the active entries of the dataset * @param rules Ruleset the set of rules * @param ignore int number of the rule to ignore */ public void substract(Mask mask,Ruleset rules,int ignore){ for (int i=0;i<rules.size();i++){ if (i!=ignore) substract(mask,rules.getRule(i)); } } /** * Classifies the entries' classes according to several sets of rules. * @param actives Mask active entries of the dataset * @param rulesets Ruleset[] the rulesets * @param length int the number of rulesets * @return a vector of the length of this dataset with the class name for each entry. */ public String[] classify(Mask actives,Ruleset[] rulesets,int length){ String[] classification=new String[this.itemsets.size()]; for(int i=0;i<classification.length;i++) if ( actives.isActive(i) ) classification[i]=rulesets[length-1].getType(); for (int i=0;i<length-1;i++){ Mask filtered=actives.copy(); this.filter(filtered,rulesets[i]); filtered.resetIndex(); while(filtered.next()){ int ind=filtered.getIndex(); classification[ind]=rulesets[i].getType(); } substract(actives,rulesets[i]); } return classification; } /** * It returns wether the value for an attribute in a given exemple is missing * @param exemple int index of the exemple in the dataset * @param attribute int index of the attribute * @return true if the value for an attribute in a given exemple is missing */ public boolean isMissing(int exemple,int attribute){ return ((Itemset) itemsets.elementAt(exemple)).isMissing(attribute); } /** * It returns wether the value for an attribute in a given exemple is missing * @param mask Mask the index of the mask signs the given exemple * @param attribute int index of the attribute * @return true if the value for an attribute in a given exemple is missing */ public boolean isMissing(Mask mask,int attribute){ return ((Itemset) itemsets.elementAt(mask.getIndex())).isMissing(attribute); } /** * It returns the number of exemple of the dataset * @return the number of exemple of the dataset */ public int size(){ return itemsets.size(); } /** * Classifies the entries' classes according to several sets of rules. * @param rulesets Ruleset[] the rulesets * @param length int the number of rulesets * @return a vector of the length of this dataset with the class name for each entry. */ public String[] classify(Ruleset[] rulesets, int length){ return classify(new Mask(itemsets.size()),rulesets,length); } /** * Output a specific example * @param pos int position (id) of the example in the data-set * @return double[] the attributes of the given example */ public double[] getExample(int pos) { return ((Itemset) this.itemsets.elementAt(pos)).values; } /** * Output a specific example * @param mask Mask with the position (id) of the example in the data-set * @return double[] the attributes of the given example */ public double[] getExample(Mask mask) { return ((Itemset) this.itemsets.elementAt(mask.getIndex())).values; } /** * Returns the frequency (number of instances) of each class. * @return the frequency (number of instances) of each class. */ public int[] getClassFequency(){ int[] frequency = new int[this.numClasses()]; for (int i=0;i<frequency.length;i++) frequency[i]=0; for (int i=0;i<this.size();i++) { double class_value=((Itemset)itemsets.elementAt(i)).getValue(this.getClassIndex()); frequency[(int)class_value]++; } return frequency; } /** * Returns the frequency (number of instances) of each class. * @param filter Mask filter * @return the frequency (number of instances) of each class. */ public int[] getClassFequency(Mask filter){ int[] frequency = new int[this.numClasses()]; for (int i=0;i<frequency.length;i++) frequency[i]=0; filter.resetIndex(); while (filter.next()) { double class_value=((Itemset)itemsets.elementAt(filter.getIndex())).getValue(this.getClassIndex()); frequency[(int)class_value]++; } return frequency; } /** * It copies the header of the dataset * @return String A string containing all the data-set information */ public String copyHeader() { String p = new String(""); p = "@relation " + Attributes.getRelationName() + "\n"; p += Attributes.getInputAttributesHeader(); p += Attributes.getOutputAttributesHeader(); p += Attributes.getInputHeader() + "\n"; p += Attributes.getOutputHeader() + "\n"; p += "@data\n"; return p; } /** * Returns a string representation of the entries of this MyDataset. * @return a string representation of the entries of this MyDataset. */ public String toString(){ String salida=""; for (int i=0;i<this.itemsets.size();i++){ //First Attribute double V=((Itemset)itemsets.elementAt(i)).getValue(0); if (((MyAttribute) attributes.elementAt(0)).isDiscret()) salida+=i+".- ("+getAttribute(0).value((int)V); else salida+=i+".- ("+V; //Attributes for (int j=1;j<this.numAttributes();j++){ if (j!=this.getClassIndex()){ V = ( (Itemset) itemsets.elementAt(i)).getValue(j); if ( ( (MyAttribute) attributes.elementAt(j)).isDiscret()) salida += "," + getAttribute(j).value( (int) V); else salida += "," + V; } } //CLASS V=((Itemset)itemsets.elementAt(i)).getValue(this.getClassIndex()); if (((MyAttribute) attributes.elementAt(this.getClassIndex())).isDiscret()) salida+=")-> "+getClassAttribute().value((int)V)+"\n"; else salida+=")-> "+V; } return salida; } /** * Returns a string representation of the active entries of this MyDataset. * @param mask Mask active entries * @return a string representation of the active entries of this MyDataset. */ public String toString(Mask mask){ String salida=""; mask.resetIndex(); while (mask.next()){ int i=mask.getIndex(); //First Attribute double V=((Itemset)itemsets.elementAt(i)).getValue(0); if (((MyAttribute) attributes.elementAt(0)).isDiscret()) salida+=i+".- ("+getAttribute(0).value((int)V); else salida+=i+".- ("+V; //Attributes for (int j=1;j<this.numAttributes();j++){ if (j!=this.getClassIndex()){ V = ( (Itemset) itemsets.elementAt(i)).getValue(j); if ( ( (MyAttribute) attributes.elementAt(j)).isDiscret()) salida += "," + getAttribute(j).value( (int) V); else salida += "," + V; } } //CLASS V=((Itemset)itemsets.elementAt(i)).getValue(this.getClassIndex()); if (((MyAttribute) attributes.elementAt(this.getClassIndex())).isDiscret()) salida+=")-> "+getClassAttribute().value((int)V)+"\n"; else salida+=")-> "+V; } return salida; } /** * Returns a string representation of the active entries of this MyDataset. * @param mask IncrementalMask active entries * @return a string representation of the active entries of this MyDataset. */ public String toString(IncrementalMask mask){ String salida=""; mask.resetIndex(); while (mask.next()){ int i=mask.getIndex(); //First Attribute double V=((Itemset)itemsets.elementAt(i)).getValue(0); if (((MyAttribute) attributes.elementAt(0)).isDiscret()) salida+=i+".- ("+getAttribute(0).value((int)V); else salida+=i+".- ("+V; //Attributes for (int j=1;j<this.numAttributes();j++){ if (j!=this.getClassIndex()){ V = ( (Itemset) itemsets.elementAt(i)).getValue(j); if ( ( (MyAttribute) attributes.elementAt(j)).isDiscret()) salida += "," + getAttribute(j).value( (int) V); else salida += "," + V; } } //CLASS V=((Itemset)itemsets.elementAt(i)).getValue(this.getClassIndex()); if (((MyAttribute) attributes.elementAt(this.getClassIndex())).isDiscret()) salida+=")-> "+getClassAttribute().value((int)V)+"\n"; else salida+=")-> "+V; } return salida; } }