/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi.filesystem; import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; import org.apache.commons.lang.StringEscapeUtils; import org.kitesdk.data.DatasetDescriptor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Immutable public class CSVProperties { private static final Logger LOG = LoggerFactory .getLogger(CSVProperties.class); public static final String CHARSET_PROPERTY = "kite.csv.charset"; public static final String DELIMITER_PROPERTY = "kite.csv.delimiter"; public static final String QUOTE_CHAR_PROPERTY = "kite.csv.quote-char"; public static final String ESCAPE_CHAR_PROPERTY = "kite.csv.escape-char"; public static final String HEADER_PROPERTY = "kite.csv.header"; public static final String HAS_HEADER_PROPERTY = "kite.csv.has-header"; public static final String LINES_TO_SKIP_PROPERTY = "kite.csv.lines-to-skip"; // old properties public static final String OLD_CHARSET_PROPERTY = "cdk.csv.charset"; public static final String OLD_DELIMITER_PROPERTY = "cdk.csv.delimiter"; public static final String OLD_QUOTE_CHAR_PROPERTY = "cdk.csv.quote-char"; public static final String OLD_ESCAPE_CHAR_PROPERTY = "cdk.csv.escape-char"; public static final String OLD_LINES_TO_SKIP_PROPERTY = "cdk.csv.lines-to-skip"; public static final String DEFAULT_CHARSET = "utf8"; public static final String DEFAULT_DELIMITER = ","; public static final String DEFAULT_QUOTE = "\""; public static final String DEFAULT_ESCAPE = "\\"; public static final String DEFAULT_HAS_HEADER = "false"; public static final int DEFAULT_LINES_TO_SKIP = 0; // configuration public final String charset; public final String delimiter; public final String quote; public final String escape; public final String header; public final boolean useHeader; public final int linesToSkip; private CSVProperties(String charset, String delimiter, String quote, String escape, String header, boolean useHeader, int linesToSkip) { this.charset = charset; this.delimiter = delimiter; this.quote = quote; this.escape = escape; this.header = header; this.useHeader = useHeader; this.linesToSkip = linesToSkip; } private CSVProperties(DatasetDescriptor descriptor) { this.charset = coalesce( descriptor.getProperty(CHARSET_PROPERTY), descriptor.getProperty(OLD_CHARSET_PROPERTY), DEFAULT_CHARSET); this.delimiter= coalesce( descriptor.getProperty(DELIMITER_PROPERTY), descriptor.getProperty(OLD_DELIMITER_PROPERTY), DEFAULT_DELIMITER); this.quote = coalesce( descriptor.getProperty(QUOTE_CHAR_PROPERTY), descriptor.getProperty(OLD_QUOTE_CHAR_PROPERTY), DEFAULT_QUOTE); this.escape = coalesce( descriptor.getProperty(ESCAPE_CHAR_PROPERTY), descriptor.getProperty(OLD_ESCAPE_CHAR_PROPERTY), DEFAULT_ESCAPE); this.header = descriptor.getProperty(HEADER_PROPERTY); this.useHeader = Boolean.parseBoolean(coalesce( descriptor.getProperty(HAS_HEADER_PROPERTY), DEFAULT_HAS_HEADER)); final String linesToSkipString = coalesce( descriptor.getProperty(LINES_TO_SKIP_PROPERTY), descriptor.getProperty(OLD_LINES_TO_SKIP_PROPERTY)); int lines = DEFAULT_LINES_TO_SKIP; if (linesToSkipString != null) { try { lines = Integer.parseInt(linesToSkipString); } catch (NumberFormatException ex) { LOG.debug("Defaulting lines to skip, failed to parse: {}", linesToSkipString); // lines remains set to the default } } this.linesToSkip = lines; } /** * Returns the first non-null value from the sequence or null if there is no * non-null value. */ private static <T> T coalesce(T... values) { for (T value : values) { if (value != null) { return value; } } return null; } public DatasetDescriptor addToDescriptor(DatasetDescriptor descriptor) { DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder(descriptor) .property(CHARSET_PROPERTY, charset) .property(DELIMITER_PROPERTY, delimiter) .property(ESCAPE_CHAR_PROPERTY, escape) .property(QUOTE_CHAR_PROPERTY, quote) .property(HAS_HEADER_PROPERTY, Boolean.toString(useHeader)) .property(LINES_TO_SKIP_PROPERTY, Integer.toString(linesToSkip)); if (header != null) { builder.property(HEADER_PROPERTY, header); } return builder.build(); } public static CSVProperties fromDescriptor(DatasetDescriptor descriptor) { return new CSVProperties(descriptor); } public static class Builder { private String charset = DEFAULT_CHARSET; private String delimiter = DEFAULT_DELIMITER; private String quote = DEFAULT_QUOTE; private String escape = DEFAULT_ESCAPE; private boolean useHeader = Boolean.valueOf(DEFAULT_HAS_HEADER); private int linesToSkip = DEFAULT_LINES_TO_SKIP; private String header = null; public Builder charset(String charset) { this.charset = charset; return this; } public Builder delimiter(String delimiter) { this.delimiter = StringEscapeUtils.unescapeJava(delimiter); return this; } public Builder quote(String quote) { this.quote = StringEscapeUtils.unescapeJava(quote); return this; } public Builder escape(String escape) { this.escape = StringEscapeUtils.unescapeJava(escape); return this; } public Builder header(@Nullable String header) { this.header = header; return this; } public Builder hasHeader() { this.useHeader = true; return this; } public Builder hasHeader(boolean hasHeader) { this.useHeader = hasHeader; return this; } public Builder linesToSkip(int linesToSkip) { this.linesToSkip = linesToSkip; return this; } public CSVProperties build() { return new CSVProperties( charset, delimiter, quote, escape, header, useHeader, linesToSkip); } } }