/**
* VMware Continuent Tungsten Replicator
* Copyright (C) 2015 VMware, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial developer(s): Robert Hodges
* Contributor(s): Linas Virbalas
*/
package com.continuent.tungsten.common.csv;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Writes CSV output. This class implements CSV formatting roughly as described
* in RFC4180 (http://tools.ietf.org/html/rfc4180) with practical alterations to
* match specify DBMS implementations.
*
* @author <a href="mailto:robert.hodges@continuent.com">Robert Hodges</a>
* @version 1.0
*/
public class CsvWriter
{
// Properties.
private String fieldSeparator = ",";
private String recordSeparator = "\n";
private boolean writeHeaders = true;
private boolean quoted = false;
private NullPolicy nullPolicy = NullPolicy.skip;
private String nullValue = null;
private boolean nullAutofill = false;
private char quoteChar = '\"';
private char escapeChar = '\\';
private String escapedChars = "";
private String suppressedChars = "";
private String rowId = null;
// State.
private Map<String, Integer> names = new HashMap<String, Integer>();
private List<String> row;
private BufferedWriter writer;
private int rowCount = 0;
private int colCount = 0;
// Enum and table to describe disposition of specific characters.
enum Disposition
{
escape, suppress
}
private Map<Character, Disposition> disposition;
/**
* Instantiate a new instance with output to provided writer.
*/
public CsvWriter(Writer writer)
{
this(new BufferedWriter(writer));
}
/**
* Instantiate a new instance with output to provided buffered writer. This
* call allows clients to set buffering parameters themselves.
*/
public CsvWriter(BufferedWriter writer)
{
this.writer = writer;
}
/**
* Sets the field separator characters.
*/
public void setFieldSeparator(String fieldSeparators)
{
this.fieldSeparator = fieldSeparators;
}
/**
* Returns field separator character.
*/
public String getFieldSeparator()
{
return this.fieldSeparator;
}
/**
* Sets the record separator characters.
*/
public void setRecordSeparator(String recordSeparator)
{
this.recordSeparator = recordSeparator;
}
/**
* Returns record separator character.
*/
public String getRecordSeparator()
{
return this.recordSeparator;
}
/** Returns true if values will be enclosed by a quote character. */
public synchronized boolean isQuoted()
{
return quoted;
}
/** Set to true to enable quoting. */
public synchronized void setQuoted(boolean quoted)
{
this.quoted = quoted;
}
/** Returns the policy for handling null values. */
public synchronized NullPolicy getNullPolicy()
{
return nullPolicy;
}
/** Sets the policy for handling null values. */
public synchronized void setNullPolicy(NullPolicy nullPolicy)
{
this.nullPolicy = nullPolicy;
}
/** Gets the null value identifier string. */
public synchronized String getNullValue()
{
return nullValue;
}
/**
* Sets the null value identifier string. This applies only when null policy
* is NullPolicy.nullValue.
*/
public synchronized void setNullValue(String nullValue)
{
this.nullValue = nullValue;
}
/** Returns true to fill nulls automatically. */
public synchronized boolean isNullAutofill()
{
return nullAutofill;
}
/**
* Sets the null autofill policy for columns that have no value (partial
* rows). If true, unwritten columns are filled with the prevailing null
* value. If false, partial rows prompt an exception.
*/
public synchronized void setNullAutofill(boolean nullAutofill)
{
this.nullAutofill = nullAutofill;
}
/** Returns the quote character. */
public synchronized char getQuoteChar()
{
return quoteChar;
}
/** Sets the quote character. */
public synchronized void setQuoteChar(char quoteChar)
{
this.quoteChar = quoteChar;
}
/**
* Sets the quote character from string input.
*/
public synchronized void setQuoteChar(String quoteString)
{
if (quoteString != null && quoteString.length() > 0)
this.quoteChar = quoteString.charAt(0);
}
/**
* Sets character used to escape quotes and other escaped characters.
*/
public synchronized void setEscapeChar(char quoteEscapeChar)
{
this.escapeChar = quoteEscapeChar;
}
/**
* Sets the escape character from string input.
*/
public synchronized void setEscapeChar(String escapeString)
{
if (escapeString != null && escapeString.length() > 0)
this.escapeChar = escapeString.charAt(0);
}
/** Returns the escape character. */
public synchronized char getEscapeChar()
{
return escapeChar;
}
/**
* Returns a string of characters that must be preceded by escape character.
*/
public synchronized String getEscapedChars()
{
return escapedChars;
}
/**
* Defines zero or more characters that must be preceded by escape
* character.
*/
public synchronized void setEscapedChars(String escapedChars)
{
if (escapedChars == null)
this.escapedChars = "";
else
this.escapedChars = escapedChars;
}
/**
* Returns a string of characters that are suppressed in CSV output.
*/
public synchronized String getSuppressedChars()
{
return suppressedChars;
}
/**
* Sets characters to be suppressed in CSV output.
*/
public synchronized void setSuppressedChars(String suppressedChars)
{
if (suppressedChars == null)
this.suppressedChars = "";
else
this.suppressedChars = suppressedChars;
}
/**
* Returns the current count of rows written.
*/
public int getRowCount()
{
return rowCount;
}
/**
* Get the underlying writer.
*/
public Writer getWriter()
{
return writer;
}
/** If true, write headers. */
public synchronized boolean isWriteHeaders()
{
return writeHeaders;
}
/** Set to true to write headers. */
public synchronized void setWriteHeaders(boolean writeHeaders)
{
this.writeHeaders = writeHeaders;
}
/**
* Add a column name. Columns are indexed 1,2,3,...,N in the order added.
* You must add all names before writing the first row.
*
* @param name Column name
* @throws CsvException Thrown
*/
public void addColumnName(String name) throws CsvException
{
if (rowCount > 0)
{
throw new CsvException(
"Attempt to add column after writing one or more rows");
}
int index = names.size() + 1;
names.put(name, index);
}
/**
* Add a row id name. Row IDs are a numeric counter that can be inserted in
* any column. By defining the row id name, the matching column always has
* the batch row number automatically added to it.
*
* @param name Row ID name
* @throws CsvException Thrown if the row ID has already been set.
*/
public void addRowIdName(String name) throws CsvException
{
if (rowCount > 0)
{
throw new CsvException(
"Attempt to add row ID after writing one or more rows");
}
else if (rowId != null)
{
throw new CsvException("Attempt to add row ID twice");
}
this.rowId = name;
addColumnName(rowId);
}
/**
* Return names in column order.
*/
public List<String> getNames()
{
// Create null-filled array. The array differs by one according
// to whether we use row IDs or not.
int size = names.size();
List<String> nameList = new ArrayList<String>(names.size());
for (int i = 0; i < size; i++)
nameList.add(null);
// Add names to correct positions in array.
for (String name : names.keySet())
{
int index = names.get(name);
nameList.set(index - 1, name);
}
return nameList;
}
/**
* Return the number of columns.
*/
public int getWidth()
{
return names.size();
}
/**
* Writes current row, including headers if we are on the first row.
*
* @throws CsvException Thrown if there is an inconsistency like too many
* columns
* @throws IOException Thrown due to a write error
*/
public CsvWriter write() throws CsvException, IOException
{
// At the top of the file optionally write headers and set the row
// ID name.
if (rowCount == 0 && writeHeaders)
{
if (writeHeaders)
{
writeRow(getNames());
rowCount++;
}
}
// If we have a pending row, write it now.
if (row != null)
{
// Add the row count value if row IDs are enabled.
if (rowId != null)
{
put(rowId, new Integer(rowCount + 1).toString());
}
// Check for writing too few columns.
if (!nullAutofill && colCount < names.size())
{
throw new CsvException("Attempt to write partial row: row="
+ (rowCount + 1) + " columns required=" + names.size()
+ " columns written=" + colCount);
}
// Write the row.
writeRow(row);
row = null;
colCount = 0;
rowCount++;
}
return this;
}
/**
* Forces a write of any pending row(s) and flushes data on writer.
*
* @throws CsvException Thrown on an I/O failure
*/
public CsvWriter flush() throws IOException, CsvException
{
write();
writer.flush();
return this;
}
/**
* Writes value to current row. This is the base value.
*
* @param index Column index where indexes are numbered 1,2,3,...,N with N
* being the width of the row in columns
* @param value String value to write, already escaped if necessary
* @throws CsvException Thrown if client attempts to write same column value
* twice or the row is not wide enough
*/
public CsvWriter put(int index, String value) throws CsvException
{
// Initialize the character disposition table if necessary.
if (disposition == null)
{
disposition = new HashMap<Character, Disposition>(256);
for (char c : escapedChars.toCharArray())
{
disposition.put(c, Disposition.escape);
}
for (char c : suppressedChars.toCharArray())
{
disposition.put(c, Disposition.suppress);
}
}
// Start a new row if required and fill columns with null values.
if (row == null)
{
int size = getWidth();
row = new ArrayList<String>(size);
for (int i = 0; i < size; i++)
row.add(null);
colCount = 0;
}
// Check for invalid index.
if (index < 1 || index > row.size())
{
throw new CsvException(
"Attempt to write to invalid column index: index=" + index
+ " value=" + value + " row size=" + row.size());
}
// Check for a double write to same column. This is a safety violation.
int arrayIndex = index - 1;
if (row.get(arrayIndex) != null)
{
throw new CsvException(
"Attempt to write value twice to same row: index="
+ index
+ " old value="
+ row.get(arrayIndex)
+ " new value="
+ value
+ " (does table have a PK and is it single-column?)");
}
// Set the column value.
if (value == null)
{
// Nulls are handled according to the null value policy.
if (this.nullPolicy == NullPolicy.emptyString)
value = processString("");
else if (nullPolicy == NullPolicy.skip)
value = null;
else
value = nullValue;
}
else
{
value = processString(value);
}
row.set(arrayIndex, value);
colCount++;
return this;
}
/**
* Writes value to key in current row.
*/
public CsvWriter put(String key, String value) throws CsvException
{
int index = names.get(key);
return put(index, value);
}
// Utility routine to escape characters and enclose string in
// quotes if so desired.
private String processString(String base)
{
StringBuffer sb = new StringBuffer();
if (quoted)
sb.append(quoteChar);
for (int i = 0; i < base.length(); i++)
{
// Fetch character and look up its disposition.
char next = base.charAt(i);
Disposition disp = disposition.get(next);
// Emit the character according to CSV formatting rules.
if (next == quoteChar && quoted)
{
// Escape any quote character.
sb.append(escapeChar).append(quoteChar);
}
else if (disp == Disposition.escape)
{
// Prefix an escape character.
sb.append(escapeChar).append(next);
}
else if (disp == Disposition.suppress)
{
// Drop the character.
continue;
}
else
{
// If all else fails, emit the character as is.
sb.append(next);
}
}
if (quoted)
sb.append(quoteChar);
return sb.toString();
}
/**
* Write contents of a single row, including separator.
*
* @param row
* @throws IOException
*/
private void writeRow(List<String> row) throws IOException
{
for (int i = 0; i < row.size(); i++)
{
if (i > 0)
writer.append(fieldSeparator);
String value = row.get(i);
if (value == null)
{
// Nulls are handled according to the null value policy.
if (this.nullPolicy == NullPolicy.emptyString)
writer.append(processString(""));
else if (nullPolicy == NullPolicy.skip)
writer.append(null);
else
writer.append(nullValue);
}
else
writer.append(row.get(i));
}
writer.append(recordSeparator);
}
}