/*
*
* YAQP - Yet Another QSAR Project:
* Machine Learning algorithms designed for the prediction of toxicological
* features of chemical compounds become available on the Web. Yaqp is developed
* under OpenTox (http://opentox.org) which is an FP7-funded EU research project.
* This project was developed at the Automatic Control Lab in the Chemical Engineering
* School of the National Technical University of Athens. Please read README for more
* information.
*
* Copyright (C) 2009-2010 Pantelis Sopasakis & Charalampos Chomenides
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Contact:
* Pantelis Sopasakis
* chvng@mail.ntua.gr
* Address: Iroon Politechniou St. 9, Zografou, Athens Greece
* tel. +30 210 7723236
*/
package org.opentox.qsar.processors.predictors;
import java.util.ArrayList;
import org.opentox.core.exceptions.Cause;
import org.opentox.core.exceptions.YaqpException;
import org.opentox.core.processors.Processor;
import org.opentox.db.handlers.ReaderHandler;
import org.opentox.db.util.Page;
import org.opentox.ontology.components.ComponentList;
import org.opentox.ontology.components.Feature;
import org.opentox.ontology.components.QSARModel;
import org.opentox.ontology.components.YaqpComponent;
import org.opentox.qsar.exceptions.QSARException;
import weka.core.Instances;
/**
*
* @author Pantelis Sopasakis
* @author Charalampos Chomenides
*/
abstract public class WekaPredictor extends Processor<Instances, Instances> {
protected WekaPredictor(){
}
/**
*
* The QSAR model to be used by the predictor.
*/
protected QSARModel model;
/**
* Construct a new QSAR predictor given a QSARModel. The main information needed
* in the QSARModel are the code (i.e. a pointer to the filepath where the model is) and
* the algorithm used to train the model.
* @param model
* Prototype for a model in the database.
* @throws QSARException
* In case the provided prototype does not correspond to a database entry for
* a QSARModel
*/
public WekaPredictor(QSARModel model) throws QSARException {
if (model == null) {
throw new NullPointerException("Cannot predict from a null model - Provide a model first");
}
this.model = model;
// CHECK IF THE PROVIDED MODEL CONTAINS THE CODE OF THE QSAR MODEL.
if (model.getCode() == null) {
throw new QSARException(Cause.XQPred630,
"Unknown Model - Probably the model was deleted or does not exist");
}
// CHECK IF THE PROVIDED MODEL PROTOTYPE CORRESPONDS TO A
if (!modelExists()) {
throw new QSARException(Cause.XQPred631, "This model does not exist");
}
}
/**
*
* Checks whether the requested model exists in the database. If not, a status
* code 404 (not found) should be returned to the client.
* @return
* <code>true</code> if the model is found, <code>false</code> otherwise.
*/
private boolean modelExists() {
QSARModel prototype = new QSARModel();
prototype.setId(model.getId());
try {
ComponentList<YaqpComponent> list = ReaderHandler.search(model, new Page(), false);
if (list.size() > 0) {
this.model = (QSARModel) list.getFirst();
return true;
}
} catch (YaqpException ex) {
//
}
return false;
}
private ArrayList<String> missingFeatures(Instances data) {
ArrayList<String> missing = new ArrayList<String>();
for (Feature f : model.getIndependentFeatures()) {
if (data.attribute(f.getURI()) == null) {
missing.add(f.getURI());
}
}
return missing;
}
/**
* Perform the prediction using the QSARModel provided upon construction of this object
* @param data
* Data submitted to the predictor to produce predictions.
* @return
* Instances containing the predicted values for every compound in the initial
* given set of data.
* @throws YaqpException
* In case the prediction is infeasible, for example if the independent
* features of the QSAR model are not included in the given dataset.
*/
public Instances process(Instances data) throws YaqpException {
if (model.getAlgorithm() == null || model.getAlgorithm().getMeta() == null || model.getAlgorithm().getMeta().getName() == null) {
throw new QSARException(Cause.XQPred631, "Unknown Model - Probably the model was deleted or does not exist");
}
ArrayList<String> missinfFeatures = missingFeatures(data);
if (missinfFeatures.size() > 0) {
final String NEWLINE = "\n";
String message = "The dataset you provided is incompatible with this model because it does not "
+ "contain the following features : " + NEWLINE;
for (String missing : missinfFeatures) {
message += missing + NEWLINE;
}
throw new QSARException(Cause.XQPred632, message);
}
return predict(data);
}
/**
* Performs the prediction.
* @param data
* Dataset for which the predictions are requested.
* @return
* Dataset with the predictions.
*/
public abstract Instances predict(Instances data) throws QSARException;
}