package org.andork.io; import java.util.ArrayList; import java.util.Collection; import java.util.List; /** * A comma-separated value formatter and parser. * * @author James */ public class CSVFormat { private static void requireNotNullTerminator(char c) { if (c == '\0') { throw new IllegalArgumentException("you may not use the null terminator character"); } } private static void trimWhitespace(StringBuilder sb) { // find the first non-whitespace character int i; for (i = 0; i < sb.length(); i++) { if (!Character.isWhitespace(sb.charAt(i))) { break; } } // delete up to the first non-whitespace character if (i > 0) { sb.delete(0, i); } // find the last non-whitespace character for (i = sb.length() - 1; i >= 0; i--) { if (!Character.isWhitespace(sb.charAt(i))) { break; } } // delete from last non-whitespace character to the end if (i < sb.length() - 1) { sb.delete(i + 1, sb.length()); } } private char separator = ','; private char quote = '"'; private boolean trimWhitespace = true; public CSVFormat() { } /** * Formats a list of fields into a line of CSV data. * * @param fields * the fields to format. * @return a line of CSV containing the {@code fields} formatted according * to the settings of this {@link CSVFormat}. */ public String formatLine(Collection<String> fields) { StringBuilder sb = new StringBuilder(); for (String field : fields) { if (sb.length() > 0) { sb.append(separator); } if (needToQuoteField(field)) { sb.append(quote); for (int i = 0; i < field.length(); i++) { char c = field.charAt(i); if (c == quote) { sb.append(quote); } // if c is a quote, two quotes in a row will get appended. // this is what we want. sb.append(c); } sb.append(quote); } else { sb.append(field); } } return sb.toString(); } private boolean needToQuoteField(String field) { return field.indexOf(separator) >= 0 || field.indexOf(quote) >= 0 || // field begins or ends with whitespace and unquoted whitespace // is trimmed trimWhitespace && field.length() > 0 && (Character.isWhitespace(field.charAt(0)) || Character.isWhitespace(field.charAt(field.length() - 1))); } /** * Parses a line of CSV data. * * @param line * the line to parse. * @return a list of fields parsed from {@code line}. */ public List<String> parseLine(String line) { ArrayList<String> result = new ArrayList<>(); parseLine(line, result); return result; } /** * Parses a line of CSV data. * * @param line * the line to parse. * @param result * the {@link Collection} to place the fields parsed from * {@code line} into. */ public void parseLine(String line, Collection<String> result) { StringBuilder sb = new StringBuilder(); int i = 0; boolean inQuote = false; while (i <= line.length()) { char c = i < line.length() ? line.charAt(i) : '\0'; if (c == quote) { if (inQuote) { if (i == line.length() - 1 || line.charAt(i + 1) != quote) { inQuote = false; } else { i++; sb.append(quote); } } else { inQuote = true; } } else if (c == separator || c == '\0') { if (inQuote) { sb.append(separator); } else { if (trimWhitespace) { trimWhitespace(sb); } result.add(sb.toString()); sb.delete(0, sb.length()); } } else { sb.append(c); } i++; } } /** * @return the quote character. The default is a double quote ("), but you * may change it. */ public char quote() { return quote; } /** * Sets the quote character. * * @param quote * the new quote character. * @return this {@link CSVFormat}, for chaining. * @throws IllegalArgumentException * if {@code quote} is the null terminator or the same as * {@link #separator()}. */ public CSVFormat quote(char quote) { if (quote == separator) { throw new IllegalArgumentException("the separator and quote characters must not be the same"); } requireNotNullTerminator(quote); this.quote = quote; return this; } /** * @return the separator character. The default is a comma, but you may * change it. */ public char separator() { return separator; } /** * Sets the separator character. * * @param separator * the new separator character. * @return this {@link CSVFormat}, for chaining. * @throws IllegalArgumentException * if {@code separator} is the null terminator or the same as * {@link #quote()}. */ public CSVFormat separator(char separator) { if (separator == quote) { throw new IllegalArgumentException("the separator and quote characters must not be the same"); } requireNotNullTerminator(separator); this.separator = separator; return this; } /** * @return whether leading and trailing whitespace in fields will be * trimmed. */ public boolean trimWhitespace() { return trimWhitespace; } /** * Sets whether leading and trailing whitespace in fields will be trimmed. * * @param trimWhitespace * if {@code true}, leading and trailing whitespace in fields * will be trimmed. * @return this {@link CSVFormat}, for chaining. */ public CSVFormat trimWhitespace(boolean trimWhitespace) { this.trimWhitespace = trimWhitespace; return this; } }