/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio.model;
import java.io.File;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.swing.table.TableModel;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.nio.CSVExampleSource;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.tools.ProgressListener;
import com.rapidminer.tools.io.Encoding;
/**
* A class holding information about syntactical configuration for parsing CSV files
*
* @author Simon Fischer
*/
public class CSVResultSetConfiguration implements DataResultSetFactory {
private String csvFile;
private boolean skipComments = true;
private boolean useQuotes = true;
private boolean skipUTF8BOM = false;
private boolean trimLines = false;
private boolean hasHeaderRow = true;
private String columnSeparators = ";";
private char quoteCharacter = '"';
private char escapeCharacter = '\\';
private char decimalCharacter = '.';
private String commentCharacters = "#";
private int startingRow = 0;
private int headerRow = 0;
private Charset encoding = Charset.defaultCharset();
private List<ParsingError> errors;
/**
* This will create a completely empty result set configuration
*/
public CSVResultSetConfiguration() {}
/**
* This constructor reads all settings from the parameters of the given operator.
*/
public CSVResultSetConfiguration(CSVExampleSource csvExampleSource) throws OperatorException {
// if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_CSV_FILE)) {
// setCsvFile(csvExampleSource.getParameterAsString(CSVExampleSource.PARAMETER_CSV_FILE));
// }
if (csvExampleSource.isFileSpecified()) {
setCsvFile(csvExampleSource.getSelectedFile().getAbsolutePath());
}
setSkipComments(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_SKIP_COMMENTS));
setUseQuotes(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_USE_QUOTES));
// setFirstRowAsAttributeNames(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_USE_FIRST_ROW_AS_ATTRIBUTE_NAMES));
setTrimLines(csvExampleSource.getParameterAsBoolean(CSVExampleSource.PARAMETER_TRIM_LINES));
if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_COLUMN_SEPARATORS)) {
setColumnSeparators(csvExampleSource.getParameterAsString(CSVExampleSource.PARAMETER_COLUMN_SEPARATORS));
}
if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_ESCAPE_CHARACTER)) {
setEscapeCharacter(csvExampleSource.getParameterAsChar(CSVExampleSource.PARAMETER_ESCAPE_CHARACTER));
}
if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_COMMENT_CHARS)) {
setCommentCharacters(csvExampleSource.getParameterAsString(CSVExampleSource.PARAMETER_COMMENT_CHARS));
}
if (csvExampleSource.isParameterSet(CSVExampleSource.PARAMETER_QUOTES_CHARACTER)) {
setQuoteCharacter(csvExampleSource.getParameterAsChar(CSVExampleSource.PARAMETER_QUOTES_CHARACTER));
}
encoding = Encoding.getEncoding(csvExampleSource);
}
@Override
public void setParameters(AbstractDataResultSetReader source) {
source.setParameter(CSVExampleSource.PARAMETER_CSV_FILE, getCsvFile());
source.setParameter(CSVExampleSource.PARAMETER_SKIP_COMMENTS, String.valueOf(isSkipComments()));
source.setParameter(CSVExampleSource.PARAMETER_USE_QUOTES, String.valueOf(isUseQuotes()));
// source.setParameter(CSVExampleSource.PARAMETER_USE_FIRST_ROW_AS_ATTRIBUTE_NAMES,
// String.valueOf(isFirstRowAsAttributeNames()));
source.setParameter(CSVExampleSource.PARAMETER_COLUMN_SEPARATORS, getColumnSeparators());
source.setParameter(CSVExampleSource.PARAMETER_TRIM_LINES, String.valueOf(isTrimLines()));
source.setParameter(CSVExampleSource.PARAMETER_QUOTES_CHARACTER, String.valueOf(getQuoteCharacter()));
source.setParameter(CSVExampleSource.PARAMETER_ESCAPE_CHARACTER, String.valueOf(getEscapeCharacter()));
source.setParameter(CSVExampleSource.PARAMETER_COMMENT_CHARS, getCommentCharacters());
source.setParameter(Encoding.PARAMETER_ENCODING, encoding.name());
}
@Override
public DataResultSet makeDataResultSet(Operator operator) throws OperatorException {
return new CSVResultSet(this, operator);
}
@Override
public TableModel makePreviewTableModel(ProgressListener listener) throws OperatorException, ParseException {
final DataResultSet resultSet = makeDataResultSet(null);
DefaultPreview preview = null;
try {
this.errors = ((CSVResultSet) resultSet).getErrors();
preview = new DefaultPreview(resultSet, listener);
} finally {
resultSet.close();
}
return preview;
}
public void setCsvFile(String csvFile) {
this.csvFile = csvFile;
}
public String getCsvFile() {
return csvFile;
}
public File getCsvFileAsFile() {
return csvFile == null ? null : new File(csvFile);
}
public void setUseQuotes(boolean useQuotes) {
this.useQuotes = useQuotes;
}
public boolean isUseQuotes() {
return useQuotes;
}
public boolean hasHeaderRow() {
return hasHeaderRow;
}
public void setHasHeaderRow(boolean hasHeaderRow) {
this.hasHeaderRow = hasHeaderRow;
}
public void setSkipComments(boolean skipComments) {
this.skipComments = skipComments;
}
public boolean isSkipComments() {
return skipComments;
}
public void setColumnSeparators(String columnSeparators) {
this.columnSeparators = columnSeparators;
}
public String getColumnSeparators() {
return columnSeparators;
}
public void setCommentCharacters(String commentCharacters) {
this.commentCharacters = commentCharacters;
}
public String getCommentCharacters() {
return commentCharacters;
}
public void setEscapeCharacter(char escapeCharacter) {
this.escapeCharacter = escapeCharacter;
}
public char getEscapeCharacter() {
return escapeCharacter;
}
public void setQuoteCharacter(char quoteCharacter) {
this.quoteCharacter = quoteCharacter;
}
public char getQuoteCharacter() {
return quoteCharacter;
}
public void setDecimalCharacter(char decimalCharacter) {
this.decimalCharacter = decimalCharacter;
}
public char getDecimalCharacter() {
return decimalCharacter;
}
public void setTrimLines(boolean trimLines) {
this.trimLines = trimLines;
}
public boolean isTrimLines() {
return trimLines;
}
public void setEncoding(Charset encoding) {
this.encoding = encoding;
}
public Charset getEncoding() {
return encoding;
}
public boolean isSkippingUTF8BOM() {
return skipUTF8BOM;
}
public void setSkipUTF8BOM(boolean skipUTF8BOM) {
this.skipUTF8BOM = skipUTF8BOM;
}
public int getStartingRow() {
return startingRow;
}
public void setStartingRow(int startingRow) {
this.startingRow = startingRow;
}
public int getHeaderRow() {
return headerRow;
}
public void setHeaderRow(int headerRow) {
this.headerRow = headerRow;
}
@Override
public String getResourceName() {
return getCsvFile();
}
@Override
public ExampleSetMetaData makeMetaData() {
return new ExampleSetMetaData();
}
public List<ParsingError> getErrors() {
return errors;
}
@Override
public void close() {}
/**
* @return a map containing all fieldNames and their values
*/
public Map<String, String> getParameterMap() {
Map<String, String> parameterMap = new HashMap<>();
parameterMap.put("csvFile", getCsvFile());
parameterMap.put("useQuotes", String.valueOf(isUseQuotes()));
parameterMap.put("hasHeaderRow", String.valueOf(hasHeaderRow()));
parameterMap.put("headerRow", String.valueOf(getHeaderRow()));
parameterMap.put("decimalCharacter", String.valueOf(getDecimalCharacter()));
parameterMap.put("startingRow", String.valueOf(getStartingRow()));
parameterMap.put("skipComments", String.valueOf(isSkipComments()));
parameterMap.put("columnSeparators", getColumnSeparators());
parameterMap.put("commentCharacters", getCommentCharacters());
parameterMap.put("escapeCharacter", String.valueOf(getEscapeCharacter()));
parameterMap.put("quoteCharacter", String.valueOf(getQuoteCharacter()));
parameterMap.put("trimLines", String.valueOf(isTrimLines()));
parameterMap.put("encoding", String.valueOf(getEncoding()));
parameterMap.put("skipUTF8BOM", String.valueOf(isSkippingUTF8BOM()));
return parameterMap;
}
}