GaussianProcessesModel.java example

Explorer
ComplexRapidMiner-master
- operator
- src
package de.tud.inf.operator.learner.functions.weka.gaussianprocess;

import java.util.Iterator;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;

import com.rapidminer.example.set.HeaderExampleSet;
import com.rapidminer.example.set.ReplaceMissingExampleSet;
import com.rapidminer.example.table.NominalMapping;

import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;

import com.rapidminer.operator.learner.PredictionModel;

import com.rapidminer.operator.learner.functions.kernel.RVMModel;
import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.Kernel;


public class GaussianProcessesModel extends PredictionModel{

	private static final long serialVersionUID = -4980181227681934936L;
	
	/*
	 * private Fields
	 */
	
	/** The number of training instances */
	private int m_NumTrain = 0;
	
	/** The training data. */
	private double m_avg_target;
	
	private Jama.Matrix covarianceMatrix;
	
	private Jama.Matrix targetVector;
	
	private Model normalizationModel;
	
	private double[][] trainingVectors;
	
	private Model nominalTransformationModel;
	
	private ExampleSet originalHeader;
	
	/**
	 * Turn off all checks and conversions? Turning them off assumes that data
	 * is purely numeric, doesn't contain any missing values, and has a numeric
	 * class.
	 */
	private boolean m_checksTurnedOff = false;
	
	private Kernel kernel;

	protected GaussianProcessesModel(ExampleSet trainingExampleSet) {
		super(trainingExampleSet);
		
		m_NumTrain = trainingExampleSet.size();
		
	}
	
	public GaussianProcessesModel(
			ExampleSet trainingExampleSet,
			double[][] trainingVectors,
			Kernel kernel,
			Model normalization, double avgTargetValues,
			Jama.Matrix covarianceMatrix, Jama.Matrix targetVector,
			Model nominalToBinominal,
			ExampleSet originalExampleSet) {

		super(trainingExampleSet);

		this.trainingVectors = trainingVectors;

		m_NumTrain = trainingExampleSet.size();

		this.kernel = kernel;

		this.normalizationModel = normalization;

		this.m_avg_target = avgTargetValues;

		this.covarianceMatrix = covarianceMatrix;

		this.targetVector = targetVector;
		
		this.nominalTransformationModel = nominalToBinominal;
		
		this.originalHeader = originalExampleSet;
		
	}
	
	@Override
	public ExampleSet performPrediction(ExampleSet exampleSet,
			Attribute predictedLabel) throws OperatorException {
		
		ExampleSet predictionExampleSet = (ExampleSet)exampleSet.clone();
		
		//checkCompatibility(predictionExampleSet);
		
		checkMapping(exampleSet);
		
		if(!m_checksTurnedOff)
			exampleSet = new ReplaceMissingExampleSet(exampleSet);
			
		if(nominalTransformationModel != null)
			exampleSet = nominalTransformationModel.apply(exampleSet);
		
		if(normalizationModel != null)
			exampleSet = normalizationModel.apply(exampleSet);
		
		checkExampleSetCompatibility(exampleSet);
		
		Iterator<Example> r = exampleSet.iterator();
		while (r.hasNext()) {
			Example e = r.next();

			try {
				applyModelForExample(e, predictedLabel);
				
			} catch (Exception e1) {
				
				throw new OperatorException("Couldn't apply ExampleSet to Model",e1.getCause());
			}
		}
		
		//return predictionExampleSet;
		return exampleSet;
	}

	/*
	 * adds a mapping from headerExampleSet if its not in the prediction example Set
	 */
	private void checkMapping(ExampleSet exampleSet) {
		
		//ExampleSet header = this.getTrainingHeader();
		
		for(Attribute h : originalHeader.getAttributes()) {
			
			if(h.isNominal()) {
				
				Attribute exampleAttr = exampleSet.getAttributes().get(h.getName());
				
				if(exampleAttr != null) {
					
					NominalMapping headerMapping = h.getMapping();
					NominalMapping predictionMapping = exampleAttr.getMapping();
					
					for(String nominalValues : headerMapping.getValues()) {
						
						if(!predictionMapping.getValues().contains(nominalValues)) {
							
							predictionMapping.mapString(nominalValues);
						}
					}
				}
			}
		}
	}

	public double classifyInstance(Example ex) throws Exception{
		
		/** K Vector */
		Jama.Matrix kRm = new Jama.Matrix(m_NumTrain,1);
			
			double[] trainingVector = RVMModel.makeInputVector(ex);
			
			// Build K Vector
			for(int i = 0; i< m_NumTrain; i++) {
				
				kRm.set(i, 0, kernel.eval(trainingVector, trainingVectors[i]));
			}
			
		double resultRm = kRm.transpose().times(targetVector).get(0, 0) + m_avg_target;

		return resultRm;
	}
	
	public void applyModelForExample(Example e, Attribute predictedLabelAttribute) throws Exception {
		
		double predictedLabel = Double.NaN;
		
		try {
				
				predictedLabel = classifyInstance(e);
				
		} catch (Exception exc) {
			
			logError("Exception occured while classifying example:" + exc.getMessage() + " [" + exc.getClass() + "]");
			
			throw new Exception(exc);
		}
		
		e.setValue(predictedLabelAttribute, predictedLabel);
	}
	
	
	public String toString() {

	    StringBuffer text = new StringBuffer();

		if (targetVector == null)
			return "Gaussian Processes: No model built yet.";

		try {

			text.append("Gaussian Processes\n\n");
			text.append("Kernel used:\n  " + kernel.toString() + "\n\n");

			text.append("Average Target Value : " + m_avg_target + "\n");

			text.append("Inverted Covariance Matrix:\n");
			double min = covarianceMatrix.get(0, 0);
			double max = covarianceMatrix.get(0, 0);
			for (int i = 0; i < m_NumTrain; i++)
				for (int j = 0; j < m_NumTrain; j++) {
					if (covarianceMatrix.get(i, j) < min)
						min = covarianceMatrix.get(i, j);
					else if (covarianceMatrix.get(i, j) > max)
						max = covarianceMatrix.get(i, j);
				}
			text.append("    Lowest Value = " + min + "\n");
			text.append("    Highest Value = " + max + "\n");
			text.append("Inverted Covariance Matrix * Target-value Vector:\n");
			min = targetVector.get(0, 0);
			max = targetVector.get(0, 0);
			for (int i = 0; i < m_NumTrain; i++) {
				if (targetVector.get(i, 0) < min)
					min = targetVector.get(i, 0);
				else if (targetVector.get(i, 0) > max)
					max = targetVector.get(i, 0);
			}
			text.append("    Lowest Value = " + min + "\n");
			text.append("    Highest Value = " + max + "\n \n");

		} catch (Exception e) {
			return "Can't print the classifier.";
		}

		return text.toString();
	  }

	@Override
	protected void checkCompatibility(ExampleSet exampleSet)
			throws OperatorException {
		
		/*
		 * has to be empty because PredictionModel checks the ExampleSet before! transformation
		 */
	}
	
	private void checkExampleSetCompatibility( ExampleSet exampleSet) throws OperatorException{
		
		ExampleSet header = this.getTrainingHeader();

		if (header.getAttributes().size() != exampleSet.getAttributes().size()) {

			logWarning("Training and prediction ExampleSet differ in size");

			if (exampleSet.getAttributes().size() > header.getAttributes()
					.size()) {

				logNote("Try a projection to the attributes used in training");

				Iterator<Attribute> predictionAttributes = exampleSet
						.getAttributes().iterator();
				
				Attribute nextPrediction;
				
				while(predictionAttributes.hasNext()) {
					
					nextPrediction = predictionAttributes.next();
					
					if(!header.getAttributes().contains(nextPrediction))
						predictionAttributes.remove();
				}
			}
		}

		/*
		 * double check size to see if a possible projection has helped
		 */

		if (header.getAttributes().size() != exampleSet.getAttributes().size()) {

			/*
			 * stop prediction and throw an error
			 */
			throw new UserError(null, 925, "Different numbers of attributes");
		}
		
		//TODO: check if the order of the attributes is messed up
	}
}