/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.io.StringWriter;
/**
* The format specification of a CSV file.
*
* This class is immutable.
*/
@SuppressWarnings("serial")
public class CSVFormat implements Serializable {
/** According to RFC 4180, line breaks are delimited by CRLF */
public static final String CRLF = "\r\n";
private final char delimiter;
private final char encapsulator;
private final char commentStart;
private final char escape;
private final boolean surroundingSpacesIgnored; // Should leading/trailing spaces be ignored around values?
private final boolean emptyLinesIgnored;
private final String lineSeparator; // for outputs
private final String[] header;
private final boolean isEscaping;
private final boolean isCommentingEnabled;
private final boolean isEncapsulating;
/**
* Constant char to be used for disabling comments, escapes and encapsulation.
* The value -2 is used because it won't be confused with an EOF signal (-1),
* and because the unicode value FFFE would be encoded as two chars (using surrogates)
* and thus there should never be a collision with a real text char.
*/
static final char DISABLED = '\ufffe';
/**
* Starting format with no settings defined; used for creating other formats from scratch.
*/
static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, null, null);
/**
* Standard comma separated format, as for {@link #RFC4180} but allowing blank lines.
* <ul>
* <li>withDelimiter(',')</li>
* <li>withEncapsulator('"')</li>
* <li>withEmptyLinesIgnored(true)</li>
* <li>withLineSeparator(CRLF)</li>
* </ul>
*/
public static final CSVFormat DEFAULT =
PRISTINE.
withDelimiter(',')
.withEncapsulator('"')
.withEmptyLinesIgnored(true)
.withLineSeparator(CRLF);
/**
* Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
* <ul>
* <li>withDelimiter(',')</li>
* <li>withEncapsulator('"')</li>
* <li>withLineSeparator(CRLF)</li>
* <li></li>
* </ul>
*/
public static final CSVFormat RFC4180 =
PRISTINE.
withDelimiter(',')
.withEncapsulator('"')
.withLineSeparator(CRLF);
/**
* Excel file format (using a comma as the value delimiter).
* Note that the actual value delimiter used by Excel is locale dependent,
* it might be necessary to customize this format to accomodate to your
* regional settings.
* <p/>
* For example for parsing or generating a CSV file on a French system
* the following format will be used:
*
* <pre>CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');</pre>
*/
public static final CSVFormat EXCEL =
PRISTINE
.withDelimiter(',')
.withEncapsulator('"')
.withLineSeparator(CRLF);
/** Tab-delimited format, with quote; leading and trailing spaces ignored. */
public static final CSVFormat TDF =
PRISTINE
.withDelimiter('\t')
.withEncapsulator('"')
.withSurroundingSpacesIgnored(true)
.withEmptyLinesIgnored(true)
.withLineSeparator(CRLF);
/**
* Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and
* <tt>LOAD DATA INFILE</tt> operations. This is a tab-delimited
* format with a LF character as the line separator. Values are not quoted
* and special characters are escaped with '\'.
*
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
*/
public static final CSVFormat MYSQL =
PRISTINE
.withDelimiter('\t')
.withEscape('\\')
.withLineSeparator("\n");
/**
* Creates a customized CSV format.
*
* @param delimiter the char used for value separation
* @param encapsulator the char used as value encapsulation marker
* @param commentStart the char used for comment identification
* @param escape the char used to escape special characters in values
* @param surroundingSpacesIgnored <tt>true</tt> when whitespaces enclosing values should be ignored
* @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines
* @param lineSeparator the line separator to use for output
* @param header the header
*/
CSVFormat(
char delimiter,
char encapsulator,
char commentStart,
char escape,
boolean surroundingSpacesIgnored,
boolean emptyLinesIgnored,
String lineSeparator,
String[] header) {
this.delimiter = delimiter;
this.encapsulator = encapsulator;
this.commentStart = commentStart;
this.escape = escape;
this.surroundingSpacesIgnored = surroundingSpacesIgnored;
this.emptyLinesIgnored = emptyLinesIgnored;
this.lineSeparator = lineSeparator;
this.header = header;
this.isEncapsulating = encapsulator != DISABLED;
this.isCommentingEnabled = commentStart != DISABLED;
this.isEscaping = escape != DISABLED;
}
/**
* Returns true if the given character is a line break character.
*
* @param c the character to check
*
* @return true if <code>c</code> is a line break character
*/
private static boolean isLineBreak(char c) {
return c == '\n' || c == '\r';
}
/**
* Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
*/
void validate() throws IllegalArgumentException {
if (delimiter == encapsulator) {
throw new IllegalArgumentException("The encapsulator character and the delimiter cannot be the same (\"" + encapsulator + "\")");
}
if (delimiter == escape) {
throw new IllegalArgumentException("The escape character and the delimiter cannot be the same (\"" + escape + "\")");
}
if (delimiter == commentStart) {
throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same (\"" + commentStart + "\")");
}
if (encapsulator != DISABLED && encapsulator == commentStart) {
throw new IllegalArgumentException("The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
}
if (escape != DISABLED && escape == commentStart) {
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" + commentStart + "\")");
}
}
/**
* Returns the character delimiting the values (typically ';', ',' or '\t').
*
* @return the delimiter character
*/
public char getDelimiter() {
return delimiter;
}
/**
* Returns a copy of this format using the specified delimiter character.
*
* @param delimiter the delimiter character
* @return A copy of this format using the specified delimiter character
* @throws IllegalArgumentException thrown if the specified character is a line break
*/
public CSVFormat withDelimiter(char delimiter) {
if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break");
}
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
/**
* Returns the character used to encapsulate values containing special characters.
*
* @return the encapsulator character
*/
public char getEncapsulator() {
return encapsulator;
}
/**
* Returns a copy of this format using the specified encapsulator character.
*
* @param encapsulator the encapsulator character
* @return A copy of this format using the specified encapsulator character
* @throws IllegalArgumentException thrown if the specified character is a line break
*/
public CSVFormat withEncapsulator(char encapsulator) {
if (isLineBreak(encapsulator)) {
throw new IllegalArgumentException("The encapsulator cannot be a line break");
}
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
boolean isEncapsulating() {
return isEncapsulating;
}
/**
* Returns the character marking the start of a line comment.
*
* @return the comment start marker.
*/
public char getCommentStart() {
return commentStart;
}
/**
* Returns a copy of this format using the specified character as the comment start marker.
*
* Note that the comment introducer character is only recognised
* at the start of a line.
*
* @param commentStart the comment start marker
* @return A copy of this format using the specified character as the comment start marker
* @throws IllegalArgumentException thrown if the specified character is a line break
*/
public CSVFormat withCommentStart(char commentStart) {
if (isLineBreak(commentStart)) {
throw new IllegalArgumentException("The comment start character cannot be a line break");
}
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
/**
* Specifies whether comments are supported by this format.
*
* Note that the comment introducer character is only recognised
* at the start of a line.
*
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
*/
public boolean isCommentingEnabled() {
return isCommentingEnabled;
}
/**
* Returns the escape character.
*
* @return the escape character
*/
public char getEscape() {
return escape;
}
/**
* Returns a copy of this format using the specified escape character.
*
* @param escape the escape character
* @return A copy of this format using the specified escape character
* @throws IllegalArgumentException thrown if the specified character is a line break
*/
public CSVFormat withEscape(char escape) {
if (isLineBreak(escape)) {
throw new IllegalArgumentException("The escape character cannot be a line break");
}
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
boolean isEscaping() {
return isEscaping;
}
/**
* Specifies whether spaces around values are ignored when parsing input.
*
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the value.
*/
public boolean isSurroundingSpacesIgnored() {
return surroundingSpacesIgnored;
}
/**
* Returns a copy of this format with the specified trimming behavior.
*
* @param surroundingSpacesIgnored the trimming behavior, <tt>true</tt> to remove the surrounding spaces,
* <tt>false</tt> to leave the spaces as is.
* @return A copy of this format with the specified trimming behavior.
*/
public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
/**
* Specifies whether empty lines between records are ignored when parsing input.
*
* @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty records.
*/
public boolean isEmptyLinesIgnored() {
return emptyLinesIgnored;
}
/**
* Returns a copy of this format with the specified empty line skipping behavior.
*
* @param emptyLinesIgnored the empty line skipping behavior, <tt>true</tt> to ignore the empty lines
* between the records, <tt>false</tt> to translate empty lines to empty records.
* @return A copy of this format with the specified empty line skipping behavior.
*/
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
/**
* Returns the line separator delimiting output records.
*
* @return the line separator
*/
public String getLineSeparator() {
return lineSeparator;
}
/**
* Returns a copy of this format using the specified output line separator.
*
* @param lineSeparator the line separator to be used for output.
*
* @return A copy of this format using the specified output line separator
*/
public CSVFormat withLineSeparator(String lineSeparator) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
String[] getHeader() {
return header;
}
/**
* Returns a copy of this format using the specified header. The header can
* either be parsed automatically from the input file with:
*
* <pre>CSVFormat format = aformat.withHeader();</pre>
*
* or specified manually with:
*
* <pre>CSVFormat format = aformat.withHeader("name", "email", "phone");</pre>
*
* @param header the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
*
* @return A copy of this format using the specified header
*/
public CSVFormat withHeader(String... header) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
}
/**
* Parses the specified content.
*
* @param in the input stream
*/
public Iterable<CSVRecord> parse(Reader in) throws IOException {
return new CSVParser(in, this);
}
/**
* Format the specified values.
*
* @param values the values to format
*/
public String format(String... values) {
StringWriter out = new StringWriter();
try {
new CSVPrinter(out, this).println(values);
} catch (IOException e) {
// should not happen
}
return out.toString().trim();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Delimiter=<").append(delimiter).append('>');
if (isEscaping()) {
sb.append(' ');
sb.append("Escape=<").append(escape).append('>');
}
if (isEncapsulating()) {
sb.append(' ');
sb.append("Encapsulator=<").append(encapsulator).append('>');
}
if (isCommentingEnabled()) {
sb.append(' ');
sb.append("CommentStart=<").append(commentStart).append('>');
}
if (isEmptyLinesIgnored()) {
sb.append(" EmptyLines:ignored");
}
if (isSurroundingSpacesIgnored()) {
sb.append(" SurroundingSpaces:ignored");
}
return sb.toString();
}
}