/**
* VMware Continuent Tungsten Replicator
* Copyright (C) 2015 VMware, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial developer(s): Robert Hodges
* Contributor(s): Linas Virbalas
*/
package com.continuent.tungsten.common.csv;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Reader;
import java.io.Writer;
import org.apache.commons.lang.StringEscapeUtils;
/**
* Holds specification of properties for CSV input and output from which it
* generates CSVWriter and CSVReader instances.
*/
public class CsvSpecification
{
// Properties.
private String fieldSeparator = ",";
private String recordSeparator = "\n";
private boolean collapseFieldSeparators = false;
private boolean useHeaders = false;
private boolean useQuotes = false;
private String quote = "\"";
private String escape = "\\";
private String escapedChars = "";
private String suppressedChars = "";
private NullPolicy nullPolicy = NullPolicy.nullValue;
private String nullValue = null;
private boolean nullAutofill = false;
/**
* Returns a specification suitable for a particular DBMS store type.
* Supported types include the following:
* <p/>
* <ul>
* <li>default - Default settings</li>
* <li>hive - Standard settings for Hadoop Hive external table</li>
* </ul>
* (Other settings will be added in due time.)
*
* @param type
* @return The specified type or a null if the type is unknown
*/
public static CsvSpecification getSpecification(String type)
{
CsvSpecification spec = null;
if ("default".equals(type))
{
spec = new CsvSpecification();
}
else if ("hive".equals(type))
{
spec = new CsvSpecification();
spec.setFieldSeparator("\u0001");
spec.setRecordSeparator("\n");
spec.setEscape("\\");
spec.setEscapedChars("\u0001\\");
spec.setNullPolicy(NullPolicy.nullValue);
spec.setNullValue("\\N");
spec.setUseHeaders(false);
spec.setUseQuotes(false);
spec.setSuppressedChars("\n\r");
}
else if ("mysql".equals(type))
{
spec = new CsvSpecification();
spec.setFieldSeparator(",");
spec.setRecordSeparator("\n");
spec.setEscape("\\");
spec.setEscapedChars("\\");
spec.setNullPolicy(NullPolicy.nullValue);
spec.setNullValue("\\N");
spec.setUseHeaders(false);
spec.setUseQuotes(true);
spec.setQuote("\"");
}
else if ("oracle".equals(type))
{
spec = new CsvSpecification();
spec.setFieldSeparator(",");
spec.setRecordSeparator("\n");
spec.setEscape("\\");
spec.setEscapedChars("\\");
spec.setNullPolicy(NullPolicy.nullValue);
spec.setNullValue("\\N");
spec.setUseHeaders(false);
spec.setUseQuotes(true);
spec.setQuote("\"");
}
else if ("vertica".equals(type))
{
spec = new CsvSpecification();
spec.setFieldSeparator(",");
spec.setRecordSeparator("\n");
spec.setEscape("\\");
spec.setEscapedChars("\\");
spec.setNullPolicy(NullPolicy.skip);
spec.setUseHeaders(false);
spec.setUseQuotes(true);
spec.setQuote("\"");
spec.setSuppressedChars("\n");
}
else if ("redshift".equals(type))
{
spec = new CsvSpecification();
spec.setFieldSeparator(",");
spec.setRecordSeparator("\n");
spec.setEscape("\""); // Escaped a quote with a quote in Redshift.
spec.setEscapedChars(""); // Nothing to escape apart quotes.
spec.setNullPolicy(NullPolicy.skip);
spec.setUseHeaders(false);
spec.setUseQuotes(true);
spec.setQuote("\"");
spec.setSuppressedChars("\n");
}
return spec;
}
/**
* Sets the field separator character.
*/
public void setFieldSeparator(String fieldSeparator)
{
this.fieldSeparator = StringEscapeUtils.unescapeJava(fieldSeparator);
}
/**
* Returns field separator character.
*/
public String getFieldSeparator()
{
return this.fieldSeparator;
}
/**
* Returns true if successive input separators should be treated as a single
* separator.
*/
public boolean isCollapseFieldSeparators()
{
return collapseFieldSeparators;
}
/**
* If set to true treat successive input separators as a single separator.
*/
public void setCollapseFieldSeparators(boolean collapseFieldSeparators)
{
this.collapseFieldSeparators = collapseFieldSeparators;
}
/**
* Sets the record separator character.
*/
public void setRecordSeparator(String recordSeparator)
{
this.recordSeparator = StringEscapeUtils.unescapeJava(recordSeparator);
}
/**
* Returns record separator character.
*/
public String getRecordSeparator()
{
return this.recordSeparator;
}
/**
* Returns true if CSV contains column headers in first row.
*/
public synchronized boolean isUseHeaders()
{
return useHeaders;
}
/**
* If set to true first row must contain column headers.
*/
public synchronized void setUseHeaders(boolean useHeaders)
{
this.useHeaders = useHeaders;
}
/** Returns true if values will be enclosed by a quote character. */
public synchronized boolean isUseQuotes()
{
return useQuotes;
}
/** Set to true to enable quoting. */
public synchronized void setUseQuotes(boolean quoted)
{
this.useQuotes = quoted;
}
/** Returns the policy for handling null values. */
public synchronized NullPolicy getNullPolicy()
{
return nullPolicy;
}
/** Sets the policy for handling null values. */
public synchronized void setNullPolicy(NullPolicy nullPolicy)
{
this.nullPolicy = nullPolicy;
}
/** Gets the null value identifier string. */
public synchronized String getNullValue()
{
return nullValue;
}
/**
* Sets the null value identifier string. This applies only when null policy
* is NullPolicy.nullValue.
*/
public synchronized void setNullValue(String nullValue)
{
this.nullValue = nullValue;
}
/** Returns true to fill nulls automatically. */
public synchronized boolean isNullAutofill()
{
return nullAutofill;
}
/**
* Sets the null autofill policy for columns that have no value (partial
* rows). If true, unwritten columns are filled with the prevailing null
* value. If false, partial rows prompt an exception.
*/
public synchronized void setNullAutofill(boolean nullAutofill)
{
this.nullAutofill = nullAutofill;
}
/** Returns the quote character. */
public synchronized String getQuote()
{
return this.quote;
}
/** Sets the quote character. */
public synchronized void setQuote(String quoteChar)
{
this.quote = quoteChar;
}
/**
* Sets character used to escape quotes and other escaped characters.
*/
public synchronized void setEscape(String quoteEscapeChar)
{
this.escape = StringEscapeUtils.unescapeJava(quoteEscapeChar);
}
/** Returns the escape character. */
public synchronized String getEscape()
{
return escape;
}
/**
* Returns a string of characters that must be preceded by escape character.
*/
public synchronized String getEscapedChars()
{
return escapedChars;
}
/**
* Defines zero or more characters that must be preceded by escape
* character.
*/
public synchronized void setEscapedChars(String escapedChars)
{
if (escapedChars == null)
this.escapedChars = "";
else
this.escapedChars = escapedChars;
}
/**
* Returns a string of characters that are suppressed in CSV output.
*/
public synchronized String getSuppressedChars()
{
return suppressedChars;
}
/**
* Sets characters to be suppressed in CSV output.
*/
public synchronized void setSuppressedChars(String suppressedChars)
{
if (suppressedChars == null)
this.suppressedChars = "";
else
this.suppressedChars = suppressedChars;
}
/**
* Instantiate a new CsvWriter with output to provided writer.
*/
public CsvWriter createCsvWriter(Writer writer)
{
return createCsvWriter(new BufferedWriter(writer));
}
/**
* Instantiate a new CsvWriter with output to provided buffered writer. This
* call allows clients to set buffering parameters themselves.
*/
public CsvWriter createCsvWriter(BufferedWriter writer)
{
CsvWriter csvWriter = new CsvWriter(writer);
csvWriter.setEscapeChar(escape);
csvWriter.setEscapedChars(escapedChars);
csvWriter.setNullAutofill(nullAutofill);
csvWriter.setNullPolicy(nullPolicy);
csvWriter.setNullValue(nullValue);
csvWriter.setQuoteChar(quote);
csvWriter.setQuoted(useQuotes);
csvWriter.setFieldSeparator(fieldSeparator);
csvWriter.setRecordSeparator(recordSeparator);
csvWriter.setSuppressedChars(suppressedChars);
csvWriter.setWriteHeaders(useHeaders);
return csvWriter;
}
/**
* Instantiate a new CsvReader with input from provided reader.
*/
public CsvReader createCsvReader(Reader reader)
{
return createCsvReader(new BufferedReader(reader));
}
/**
* Instantiate a new CsvWriter with input from provided buffered reader.
* This call allows clients to set buffering parameters themselves.
*/
public CsvReader createCsvReader(BufferedReader reader)
{
CsvReader csvReader = new CsvReader(reader);
csvReader.setFieldSeparator(fieldSeparator);
csvReader.setRecordSeparator(recordSeparator);
csvReader.setUseHeaders(useHeaders);
return csvReader;
}
}