/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.test; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintStream; import java.util.Random; import com.rapidminer.tools.Tools; /** * A helper class for testing purposes which is able to create sample example * tests for tests. * * @author Ingo Mierswa, Simon Fischer * @version $Id: ExampleGenerator.java,v 2.22 2006/03/21 15:35:53 ingomierswa * Exp $ */ public class ExampleGenerator { private static final String[][] NOMINAL_VALUES = { { "hund", "katze", "maus", "elefant" }, { "flasche", "strase", "batzen" }, { "ming", "vase", "line" } }; public static void main(String argv[]) { try { String filestem = argv[0]; int type = Integer.parseInt(argv[1]); int numOfExamples = Integer.parseInt(argv[2]); int numOfRealAttributes = Integer.parseInt(argv[3]); int numOfNominalAttributes = Integer.parseInt(argv[4]); double error = Double.parseDouble(argv[5]); boolean labelled = (argv.length <= 6) || (!argv[6].equals("nolabel")); int numOfAttributes = numOfRealAttributes + numOfNominalAttributes; boolean nominalLabel = false; PrintStream eout; eout = new PrintStream(new FileOutputStream(new File(filestem + ".dat"))); eout.println("# Generated by com.rapidminer.test.ExampleGenerator"); eout.println("# function type: " + type); eout.println("# error: " + error); eout.println("# #examples: " + numOfExamples); Random r = new Random(0); for (int i = 0; i < numOfExamples; i++) { double[] att = new double[numOfAttributes + 1]; for (int j = 0; j < numOfRealAttributes; j++) att[j] = r.nextDouble() * 20 - 10; // att[j] = r.nextDouble()*2000-1000; for (int j = 0; j < numOfNominalAttributes; j++) { int index = j + numOfRealAttributes; att[index] = r.nextInt(NOMINAL_VALUES[index % NOMINAL_VALUES.length].length); } for (int j = 0; j < numOfRealAttributes; j++) att[j] = r.nextDouble() * 20 - 10; // att[j] = r.nextDouble()*2000-1000; for (int j = 0; j < numOfNominalAttributes; j++) { int index = j + numOfRealAttributes; att[index] = r.nextInt(NOMINAL_VALUES[index % NOMINAL_VALUES.length].length); } double f = 1 + r.nextDouble() * 2 * error - error; switch (type) { case 1: att[numOfAttributes] = (att[0] + att[1] + att[2]) * f; break; case 2: att[numOfAttributes] = (att[0] + att[1] - 2 * att[2]) > 0 ? 1 : 0; break; case 3: att[numOfAttributes] = r.nextDouble(); break; case 4: att[numOfAttributes] = (att[0] * att[1] * att[2] + att[0] * att[1] + att[1] * att[1]) * f; break; case 5: att[numOfAttributes] = ((att[numOfAttributes - 3] == 1.0) || ((att[numOfAttributes - 2] == 1.0) && (att[numOfAttributes - 1] == 2.0))) ? 0.0 : 1.0; nominalLabel = true; break; case 6: att[numOfAttributes] = att[0] + att[1]; att[2] = (Math.random() < 0.25) ? Double.NaN : att[2]; break; case 7: att[numOfAttributes] = 2 * att[0] - att[1] - att[2] > 0 ? 0.0 : 1.0; nominalLabel = true; break; case 8: att[numOfAttributes] = Math.sin(att[0] * att[1]) + Math.sin(att[0] + att[1]) > 0 ? 0.0 : 1.0; nominalLabel = true; break; case 9: att[numOfAttributes] = Math.sin(att[0] * att[1]) + Math.sin(att[0] + att[1]) + att[0] * att[1] + att[0] + att[1] > 0 ? 0.0 : 1.0; nominalLabel = true; break; case 10: att[numOfAttributes] = Math.sin(att[0] * att[1]) + Math.sin(att[0] + att[1]) * f; break; case 11: att[numOfAttributes] = 50 * Math.sin(att[0] * att[1]) + 20 * Math.sin(att[0] + att[1]) + 5 * att[0] * att[1] + att[0] + 3 * att[1] * f; break; case 12: att[numOfAttributes] = 10 * Math.sin(3 * att[0]) + 12 * Math.sin(7 * att[0]) + 11 * Math.sin(5 * att[1]) + 9 * Math.sin(10 * att[1]) + 10 * Math.sin(8 * (att[0] + att[1])); break; case 13: att[numOfAttributes] = Math.sin(10 * att[0]) + Math.sin(15 * att[1]); break; case 14: att[numOfAttributes] = Math.sin(att[0]) > 0 ? 0 : 1; nominalLabel = true; break; } // if (labelled && nominalLabel && (att[numOfAttributes] == // 1.0)) { for (int j = 0; j < numOfAttributes; j++) { if (j < numOfRealAttributes) eout.print(att[j] + "\t"); else eout.print(NOMINAL_VALUES[j % NOMINAL_VALUES.length][(int) att[j]] + "\t"); } if (labelled) { if (nominalLabel) { eout.print((att[numOfAttributes] == 0.0) ? "positive" : "negative"); } else { eout.print(att[numOfAttributes]); } } eout.println(); // } } eout.close(); PrintStream aout = new PrintStream(new FileOutputStream(new File(filestem + ".att.xml"))); aout.println("<!--"); aout.println(" Generated by com.rapidminer.test.ExampleGenerator"); aout.println(" function type: " + type); aout.println(" error: " + error); aout.println("-->" + Tools.getLineSeparator()); aout.println("<attributeset default_source=\"" + filestem + ".dat\">"); for (int i = 0; i <= numOfAttributes; i++) { aout.println((i < numOfAttributes) ? " <attribute" : " <label"); aout.println(" name = \"" + ((i < numOfAttributes) ? "att" + (i + 1) + "" : "label") + "\""); String sourcecol = (i + 1) + ""; if ((i == numOfAttributes) && !labelled) sourcecol = "none"; aout.println(" sourcecol = \"" + sourcecol + "\""); aout.println(" valuetype = \"" + (((i >= numOfRealAttributes) && ((i != numOfAttributes) || nominalLabel)) ? "nominal\"" : "real\"")); if ((i >= numOfRealAttributes) && ((i != numOfAttributes) || nominalLabel)) { aout.print(" classes = \""); if (i == numOfAttributes) aout.print("negative positive"); else { for (int c = 0; c < NOMINAL_VALUES[i % NOMINAL_VALUES.length].length; c++) { aout.print((c > 0 ? " " : "") + NOMINAL_VALUES[i % NOMINAL_VALUES.length][c]); } } aout.println("\""); } aout.println(" blocktype = \"single_value\""); // aout.println(" blocknumber = \""+(i+1)+"\""); aout.println(" unit = \"\""); aout.println(" />"); } aout.println("</attributeset>"); aout.close(); PrintStream xout = new PrintStream(new FileOutputStream(new File("xp." + filestem + ".xml"))); xout.println("<!--"); xout.println(" Generated by com.rapidminer.test.ExampleGenerator"); xout.println(" function type: " + type); xout.println(" error: " + error); xout.println("-->"); xout.println(); xout.println("<operator name=\"Globalator\" class=\"OperatorChain\">"); xout.println(" <parameter key=\"logfile\" value=\"Log_" + filestem + ".txt\"/>"); xout.println(" <parameter key=\"logverbosity\" value=\"0\"/>"); xout.println(" <parameter key=\"resultfile\" value=\"Result_" + filestem + ".txt\"/>"); xout.println(" <parameter key=\"temp_dir\" value=\"./tmp\"/>"); xout.println(); xout.println(" <operator name=\"Initiator\" class=\"ExampleSource\">"); xout.println(" <parameter key=\"attributes\" value=\"./" + filestem + ".att.xml\"/>"); xout.println(" </operator>"); xout.println(); xout.println(" <!-- insert process definition here -->"); xout.println(); xout.println("</operator>"); xout.close(); } catch (FileNotFoundException e) { e.printStackTrace(); System.err.println("Usage: ExampleGenerator filestem functiontype #examples #realattributes #nominalattributes error [nolabel]"); } } }