/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package de.tud.inf.operator.io; import java.io.BufferedReader; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Random; import com.rapidminer.Process; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ProcessRootOperator; import com.rapidminer.operator.UserError; import com.rapidminer.operator.io.SimpleExampleSource; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeFile; import com.rapidminer.parameter.ParameterTypeString; /** * <p>This operator can read csv files. All values must be separated by * ",", by ";", or by white space like tabs. * The first line is used for attribute names as default.</p> * * <p>For other file formats or column separators you can * use in almost all cases the operator {@link SimpleExampleSource} * or, if this is not sufficient, the operator {@link ExampleSource}.</p> * * @rapidminer.index HTTP-CSV * @author Ingo Mierswa, Peter Benjamin Volk * @version $Id: CSVExampleSource.java,v 1.5 2008/07/07 07:06:38 ingomierswa Exp $ */ public class HTTPCSVExampleSource extends SimpleExampleSource { public static final String URL = "connectString"; public HTTPCSVExampleSource(OperatorDescription description) { super(description); } public Class<?>[] getInputClasses() { return new Class[0]; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public IOObject[] apply() throws OperatorException{ //open HTTP connection and read data from proxy Random r = new Random(); File tmpFile; try { tmpFile = File.createTempFile("httpcvssource_"+r.nextInt(), null); URL page; page = new URL(getParameterAsString(URL)); // Process the URL far enough to find the right handler URLConnection urlc = page.openConnection(); urlc.setUseCaches(false); // Don't look at possibly cached data BufferedReader br = new BufferedReader(new InputStreamReader(urlc.getInputStream())); String tmp = null; FileWriter fw = new FileWriter(tmpFile); while ((tmp = br.readLine() ) != null) { fw.write(tmp+"\n"); } // Close file writer fw.close(); setParameter(super.PARAMETER_FILENAME, tmpFile.getCanonicalPath()); } catch (MalformedURLException e) { throw new UserError(this, e, 302, new Object[] { e.getMessage() }); } catch (IOException e1) { throw new UserError(this, e1, 302, new Object[] { e1.getMessage() }); } IOObject[] retValues = super.apply(); return retValues; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = new LinkedList<ParameterType>(); types.add(new ParameterTypeString(URL, "Defines the URL that the csv file is read from", false)); types.addAll(super.getParameterTypes()); Iterator<ParameterType> p = types.iterator(); while (p.hasNext()) { ParameterType type = p.next(); if (type.getKey().equals(PARAMETER_READ_ATTRIBUTE_NAMES)) { type.setDefaultValue(true); } else if (type.getKey().equals(PARAMETER_FILENAME)) { ((ParameterTypeFile)type).setExtension("csv"); type.setDefaultValue("c:/temp"); ((ParameterTypeFile)type).setOptional(true); type.setHidden(true); } else if (type.getKey().equals(PARAMETER_USE_QUOTES)) { type.setDefaultValue(true); } else if (type.getKey().equals(PARAMETER_COLUMN_SEPARATORS)) { type.setDefaultValue(",\\s*|;\\s*"); } } return types; } }