/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.morphline.shaded.com.googlecode.jcsv.fastreader; import java.io.BufferedReader; import java.io.IOException; import java.util.List; import org.kitesdk.morphline.api.Record; /** * A very simple csv tokenizer implementation. * If you do not need field quotations or multi line columns, this * will serve your purposes. * */ public final class SimpleCSVTokenizer implements CSVTokenizer { private final char separatorChar; private final boolean trim; private final List<String> columnNames; private final boolean addEmptyStrings; public SimpleCSVTokenizer(char separatorChar, boolean trim, boolean addEmptyStrings, List<String> columnNames) { this.separatorChar = separatorChar; this.trim = trim; this.addEmptyStrings = addEmptyStrings; this.columnNames = columnNames; } /** Splits the given input line into parts, using the given delimiter. */ @Override public boolean tokenizeLine(String line, BufferedReader reader, Record record) throws IOException { char separator = separatorChar; int len = line.length(); int start = 0; int j = 0; for (int i = 0; i < len; i++) { if (line.charAt(i) == separator) { put(line, start, i, j, record); start = i+1; j++; } } put(line, start, len, j, record); return true; } private void put(String line, int start, int i, int j, Record record) { if (j >= columnNames.size()) { columnNames.add("column" + j); } String columnName = columnNames.get(j); if (columnName.length() != 0) { // empty column name indicates omit this field on output String col = line.substring(start, i); col = trim ? col.trim() : col; if (col.length() > 0 || addEmptyStrings) { record.put(columnName, col); } } } }