Java Examples for weka.estimators.KernelEstimator

The following java examples will help you to understand the usage of weka.estimators.KernelEstimator. These source code samples are taken from different open source projects.

Example 1
Project: android-ml-weka-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 2
Project: LPmade-master  File: Evaluation.java View source code
/**
	 * Sets up the priors for numeric class attributes from the
	 * training class values that have been seen so far.
	 */
protected void setNumericPriorsFromBuffer() {
    // Default value
    double numPrecision = 0.01;
    if (m_NumTrainClassVals > 1) {
        double[] temp = new double[m_NumTrainClassVals];
        System.arraycopy(m_TrainClassVals, 0, temp, 0, m_NumTrainClassVals);
        int[] index = Utils.sort(temp);
        double lastVal = temp[index[0]];
        double deltaSum = 0;
        int distinct = 0;
        for (int i = 1; i < temp.length; i++) {
            double current = temp[index[i]];
            if (current != lastVal) {
                deltaSum += current - lastVal;
                lastVal = current;
                distinct++;
            }
        }
        if (distinct > 0) {
            numPrecision = deltaSum / distinct;
        }
    }
    m_PriorErrorEstimator = new KernelEstimator(numPrecision);
    m_ErrorEstimator = new KernelEstimator(numPrecision);
    m_ClassPriors[0] = m_ClassPriorsSum = 0;
    for (int i = 0; i < m_NumTrainClassVals; i++) {
        m_ClassPriors[0] += m_TrainClassVals[i] * m_TrainClassWeights[i];
        m_ClassPriorsSum += m_TrainClassWeights[i];
        m_PriorErrorEstimator.addValue(m_TrainClassVals[i], m_TrainClassWeights[i]);
    }
}
Example 3
Project: sad-analyzer-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 4
Project: extweka-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 5
Project: TimeSeriesClassification-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 6
Project: jDenetX-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 7
Project: wekax-master  File: Evaluation.java View source code
/**
   * Sets up the priors for numeric class attributes from the 
   * training class values that have been seen so far.
   */
protected void setNumericPriorsFromBuffer() {
    // Default value
    double numPrecision = 0.01;
    if (m_NumTrainClassVals > 1) {
        double[] temp = new double[m_NumTrainClassVals];
        System.arraycopy(m_TrainClassVals, 0, temp, 0, m_NumTrainClassVals);
        int[] index = Utils.sort(temp);
        double lastVal = temp[index[0]];
        double deltaSum = 0;
        int distinct = 0;
        for (int i = 1; i < temp.length; i++) {
            double current = temp[index[i]];
            if (current != lastVal) {
                deltaSum += current - lastVal;
                lastVal = current;
                distinct++;
            }
        }
        if (distinct > 0) {
            numPrecision = deltaSum / distinct;
        }
    }
    m_PriorErrorEstimator = new KernelEstimator(numPrecision);
    m_ErrorEstimator = new KernelEstimator(numPrecision);
    m_ClassPriors[0] = m_ClassPriorsSum = 0;
    for (int i = 0; i < m_NumTrainClassVals; i++) {
        m_ClassPriors[0] += m_TrainClassVals[i] * m_TrainClassWeights[i];
        m_ClassPriorsSum += m_TrainClassWeights[i];
        m_PriorErrorEstimator.addValue(m_TrainClassVals[i], m_TrainClassWeights[i]);
    }
}
Example 8
Project: MGPWeka-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 9
Project: AIWekaProject-master  File: Evaluation.java View source code
/**
   * Sets up the priors for numeric class attributes from the 
   * training class values that have been seen so far.
   */
protected void setNumericPriorsFromBuffer() {
    // Default value
    double numPrecision = 0.01;
    if (m_NumTrainClassVals > 1) {
        double[] temp = new double[m_NumTrainClassVals];
        System.arraycopy(m_TrainClassVals, 0, temp, 0, m_NumTrainClassVals);
        int[] index = Utils.sort(temp);
        double lastVal = temp[index[0]];
        double deltaSum = 0;
        int distinct = 0;
        for (int i = 1; i < temp.length; i++) {
            double current = temp[index[i]];
            if (current != lastVal) {
                deltaSum += current - lastVal;
                lastVal = current;
                distinct++;
            }
        }
        if (distinct > 0) {
            numPrecision = deltaSum / distinct;
        }
    }
    m_PriorErrorEstimator = new KernelEstimator(numPrecision);
    m_ErrorEstimator = new KernelEstimator(numPrecision);
    m_ClassPriors[0] = m_ClassPriorsSum = 0;
    for (int i = 0; i < m_NumTrainClassVals; i++) {
        m_ClassPriors[0] += m_TrainClassVals[i] * m_TrainClassWeights[i];
        m_ClassPriorsSum += m_TrainClassWeights[i];
        m_PriorErrorEstimator.addValue(m_TrainClassVals[i], m_TrainClassWeights[i]);
    }
}
Example 10
Project: Weka-for-Android-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 11
Project: mdrill-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}
Example 12
Project: weka-master  File: NaiveBayes.java View source code
/**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated 
   * successfully
   */
public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);
    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();
    m_NumClasses = instances.numClasses();
    // Copy the instances
    m_Instances = new Instances(instances);
    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }
    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = (Attribute) enu.nextElement();
        // If the attribute is numeric, determine the estimator 
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch(attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }
    // Compute counts
    Enumeration enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = (Instance) enumInsts.nextElement();
        updateClassifier(instance);
    }
    // Save space
    m_Instances = new Instances(m_Instances, 0);
}