/* * * YAQP - Yet Another QSAR Project: * Machine Learning algorithms designed for the prediction of toxicological * features of chemical compounds become available on the Web. Yaqp is developed * under OpenTox (http://opentox.org) which is an FP7-funded EU research project. * This project was developed at the Automatic Control Lab in the Chemical Engineering * School of the National Technical University of Athens. Please read README for more * information. * * Copyright (C) 2009-2010 Pantelis Sopasakis & Charalampos Chomenides * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * Contact: * Pantelis Sopasakis * chvng@mail.ntua.gr * Address: Iroon Politechniou St. 9, Zografou, Athens Greece * tel. +30 210 7723236 */ package org.opentox.ontology.data; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.ontology.Individual; import com.hp.hpl.jena.vocabulary.DC; import java.util.Enumeration; import org.opentox.config.Configuration; import org.opentox.io.publishable.OntObject; import org.opentox.io.publishable.RDFObject; import org.opentox.ontology.namespaces.OTClass; import org.opentox.ontology.namespaces.OTDataTypeProperties; import org.opentox.ontology.namespaces.OTObjectProperties; import weka.core.Attribute; import weka.core.Instances; /** * * @author Pantelis Sopasakis * @author Charalampos Chomenides */ public class DatasetFactory { public static Dataset getDataset(final OntObject data) { return new Dataset(data); } /** * Creates a {@link Dataset dataset object} from a weka <code>Instances</code> one. * @param data * The data in a weka <code>Instances</code> object. * @return * Instance of {@link Dataset } * @see DatasetFactory#getDataset(org.opentox.io.publishable.OntObject) create form ontological object. */ public static Dataset getDataset(final Instances data) { final int NUM_COMPOUNDS = data.numInstances(); final int NUM_FEATURES = data.numAttributes(); Attribute compoundAttribute = data.attribute(0); Attribute predictionAttribute = data.attribute(1); OntObject oo = new RDFObject(); oo.includeOntClasses(OTClass.Compound, OTClass.Feature, OTClass.NominalFeature, OTClass.NumericFeature, OTClass.FeatureValue); Individual predictionIndiv = null; int targetType = predictionAttribute.type(); if (targetType == Attribute.NUMERIC) { predictionIndiv = oo.createIndividual(predictionAttribute.name(), OTClass.NumericFeature.getOntClass(oo)); } else if (targetType == Attribute.NOMINAL) { predictionIndiv = oo.createIndividual(predictionAttribute.name(), OTClass.NominalFeature.getOntClass(oo)); Enumeration nominalValues = predictionAttribute.enumerateValues(); while (nominalValues.hasMoreElements()) { predictionIndiv.addProperty(OTDataTypeProperties.acceptValue.createProperty(oo), oo.createTypedLiteral(nominalValues.nextElement().toString())); } } Individual dataset; dataset = oo.createIndividual(OTClass.Dataset.getOntClass(oo)); dataset.addProperty(oo.createAnnotationProperty(DC.title.getURI()), oo.createTypedLiteral("Predicted values for the feature " + data.attribute(1).name())); dataset.addProperty(oo.createAnnotationProperty(DC.creator.getURI()), oo.createTypedLiteral(Configuration.BASE_URI, XSDDatatype.XSDanyURI)); dataset.addProperty( oo.createAnnotationProperty(DC.description.getURI()), oo.createTypedLiteral("A dataset containing the predicted values for the feature :" + data.attribute(1).name())); dataset.addProperty(oo.createAnnotationProperty(DC.creator.getURI()), oo.createTypedLiteral(Configuration.BASE_URI)); Individual dataEntry = null; Individual compound = null; Individual featureValue = null; for (int i = 0; i < NUM_COMPOUNDS; i++) { dataEntry = oo.createIndividual(OTClass.DataEntry.getOntClass(oo)); compound = oo.createIndividual(data.instance(i).stringValue(compoundAttribute), OTClass.Compound.getOntClass(oo)); dataEntry.addProperty(OTObjectProperties.compound.createProperty(oo), compound); featureValue = oo.createIndividual(OTClass.FeatureValue.getOntClass(oo)); featureValue.addProperty(OTObjectProperties.feature.createProperty(oo), predictionIndiv); try { if (targetType == Attribute.NUMERIC) { double value = data.instance(i).value(predictionAttribute); featureValue.addLiteral(OTDataTypeProperties.value.createProperty(oo), oo.createTypedLiteral(value, XSDDatatype.XSDdouble)); } else if (targetType == Attribute.NOMINAL) { String value = predictionAttribute.value((int) data.instance(i).value(predictionAttribute)); featureValue.addLiteral(OTDataTypeProperties.value.createProperty(oo), oo.createTypedLiteral(value, XSDDatatype.XSDstring)); } } catch (Exception ex) { System.out.println(ex); } dataEntry.addProperty(OTObjectProperties.values.createProperty(oo), featureValue); dataset.addProperty(OTObjectProperties.dataEntry.createProperty(oo), dataEntry); } dataset.addProperty(OTObjectProperties.dataEntry.createProperty(oo), dataEntry); return getDataset(oo); } }