/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi.filesystem; import au.com.bytecode.opencsv.CSVParser; import com.google.common.collect.Lists; import java.io.IOException; import java.util.List; import javax.annotation.Nullable; import org.apache.avro.Schema; import org.kitesdk.data.DatasetIOException; import org.kitesdk.data.View; import org.kitesdk.data.spi.DataModelUtil; public class CSVRecordParser<E> { private final CSVParser parser; private final CSVRecordBuilder<E> builder; public CSVRecordParser(CSVProperties props, View<E> view, @Nullable List<String> header) { this(props, view.getDataset().getDescriptor().getSchema(), view.getType(), header); } public CSVRecordParser(CSVProperties props, Schema schema, Class<E> type, @Nullable List<String> header) { this.parser = CSVUtil.newParser(props); this.builder = new CSVRecordBuilder<E>( DataModelUtil.getReaderSchema(type, schema), type, getHeader(props, header)); } public E read(String line) { return read(line, null); } public E read(String line, @Nullable E reuse) { try { return builder.makeRecord(parser.parseLine(line), reuse); } catch (IOException e) { throw new DatasetIOException("Cannot parse line: " + line, e); } } public static List<String> getHeader(CSVProperties props, @Nullable List<String> header) { if (header != null) { return header; } else if (props.header != null) { try { return Lists.newArrayList( CSVUtil.newParser(props).parseLine(props.header)); } catch (IOException e) { throw new DatasetIOException( "Failed to parse header from properties: " + props.header, e); } } return null; } }