/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.nio.model; import java.io.File; import java.nio.charset.Charset; import java.util.List; import javax.swing.table.TableModel; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.nio.CSVExampleSource; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.tools.ProgressListener; import com.rapidminer.tools.io.Encoding; /** * A class holding information about syntactical configuration for parsing * CSV files * * @author Simon Fischer */ public class CSVResultSetConfiguration implements DataResultSetFactory { private String csvFile; private boolean skipComments = true; private boolean useQuotes = true; private boolean skipUTF8BOM = false; private boolean trimLines = false; private String columnSeparators = ";"; private char quoteCharacter = '"'; private char escapeCharacter = '\\'; private String commentCharacters ="#"; private Charset encoding = Charset.defaultCharset(); private List<ParsingError> errors; /** * This will create a completely empty result set configuration */ public CSVResultSetConfiguration() { } /** * This constructor reads all settings from the parameters of the given operator. */ public CSVResultSetConfiguration(CSVExampleSource csvExampleSource) throws OperatorException { // if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_CSV_FILE)) { // setCsvFile(csvExampleSource.getParameterAsString(CSVExampleSource.PARAMETER_CSV_FILE)); // } if (csvExampleSource.isFileSpecified()) { setCsvFile(csvExampleSource.getSelectedFile().getAbsolutePath()); } setSkipComments(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_SKIP_COMMENTS)); setUseQuotes(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_USE_QUOTES)); //setFirstRowAsAttributeNames(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_USE_FIRST_ROW_AS_ATTRIBUTE_NAMES)); setTrimLines(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_TRIM_LINES)); if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_COLUMN_SEPARATORS)) { setColumnSeparators(csvExampleSource.getParameterAsString(CSVExampleSource.PARAMETER_COLUMN_SEPARATORS)); } if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_ESCAPE_CHARACTER)) { setEscapeCharacter(csvExampleSource.getParameterAsChar(CSVExampleSource.PARAMETER_ESCAPE_CHARACTER)); } if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_COMMENT_CHARS)) { setCommentCharacters(csvExampleSource.getParameterAsString(CSVExampleSource.PARAMETER_COMMENT_CHARS)); } if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_QUOTES_CHARACTER)) { setQuoteCharacter(csvExampleSource.getParameterAsChar(CSVExampleSource.PARAMETER_QUOTES_CHARACTER)); } encoding = Encoding.getEncoding(csvExampleSource); } @Override public void setParameters(AbstractDataResultSetReader source) { source.setParameter(CSVExampleSource.PARAMETER_CSV_FILE, getCsvFile()); source.setParameter(CSVExampleSource.PARAMETER_SKIP_COMMENTS, String.valueOf(isSkipComments())); source.setParameter(CSVExampleSource.PARAMETER_USE_QUOTES, String.valueOf(isUseQuotes())); //source.setParameter(CSVExampleSource.PARAMETER_USE_FIRST_ROW_AS_ATTRIBUTE_NAMES, String.valueOf(isFirstRowAsAttributeNames())); source.setParameter(CSVExampleSource.PARAMETER_COLUMN_SEPARATORS, getColumnSeparators()); source.setParameter(CSVExampleSource.PARAMETER_TRIM_LINES, String.valueOf(isTrimLines())); source.setParameter(CSVExampleSource.PARAMETER_QUOTES_CHARACTER, String.valueOf(getQuoteCharacter())); source.setParameter(CSVExampleSource.PARAMETER_ESCAPE_CHARACTER, String.valueOf(getEscapeCharacter())); source.setParameter(CSVExampleSource.PARAMETER_COMMENT_CHARS, getCommentCharacters()); source.setParameter(Encoding.PARAMETER_ENCODING, encoding.name()); } @Override public DataResultSet makeDataResultSet(Operator operator) throws OperatorException { return new CSVResultSet(this, operator); } @Override public TableModel makePreviewTableModel(ProgressListener listener) throws OperatorException, ParseException { final DataResultSet resultSet = makeDataResultSet(null); this.errors = ((CSVResultSet) resultSet).getErrors(); return new DefaultPreview(resultSet, listener); } public void setCsvFile(String csvFile) { this.csvFile = csvFile; } public String getCsvFile() { return csvFile; } public File getCsvFileAsFile() { return csvFile == null ? null : new File(csvFile); } public void setUseQuotes(boolean useQuotes) { this.useQuotes = useQuotes; } public boolean isUseQuotes() { return useQuotes; } public void setSkipComments(boolean skipComments) { this.skipComments = skipComments; } public boolean isSkipComments() { return skipComments; } public void setColumnSeparators(String columnSeparators) { this.columnSeparators = columnSeparators; } public String getColumnSeparators() { return columnSeparators; } public void setCommentCharacters(String commentCharacters) { this.commentCharacters = commentCharacters; } public String getCommentCharacters() { return commentCharacters; } public void setEscapeCharacter(char escapeCharacter) { this.escapeCharacter = escapeCharacter; } public char getEscapeCharacter() { return escapeCharacter; } public void setQuoteCharacter(char quoteCharacter) { this.quoteCharacter = quoteCharacter; } public char getQuoteCharacter() { return quoteCharacter; } public void setTrimLines(boolean trimLines) { this.trimLines = trimLines; } public boolean isTrimLines() { return trimLines; } public void setEncoding(Charset encoding) { this.encoding = encoding; } public Charset getEncoding() { return encoding; } public boolean isSkippingUTF8BOM() { return skipUTF8BOM; } public void setSkipUTF8BOM(boolean skipUTF8BOM) { this.skipUTF8BOM = skipUTF8BOM; } @Override public String getResourceName() { return getCsvFile(); } @Override public ExampleSetMetaData makeMetaData() { return new ExampleSetMetaData(); } public List<ParsingError> getErrors() { return errors; } @Override public void close() { } }