/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.csv;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Date;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.TimeZone;
import org.apache.commons.csv.CSVFormat;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.serialization.MalformedRecordException;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.DataType;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.Mockito;
public class TestCSVRecordReader {
private final DataType doubleDataType = RecordFieldType.DOUBLE.getDataType();
private final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().withQuote('"');
private List<RecordField> getDefaultFields() {
final List<RecordField> fields = new ArrayList<>();
for (final String fieldName : new String[] {"id", "name", "balance", "address", "city", "state", "zipCode", "country"}) {
fields.add(new RecordField(fieldName, RecordFieldType.STRING.getDataType()));
}
return fields;
}
@Test
public void testDate() throws IOException, MalformedRecordException {
final String text = "date\n11/30/1983";
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("date", RecordFieldType.DATE.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format,
"MM/dd/yyyy", RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat())) {
final Record record = reader.nextRecord();
final java.sql.Date date = (Date) record.getValue("date");
final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("gmt"));
calendar.setTimeInMillis(date.getTime());
assertEquals(1983, calendar.get(Calendar.YEAR));
assertEquals(10, calendar.get(Calendar.MONTH));
assertEquals(30, calendar.get(Calendar.DAY_OF_MONTH));
}
}
@Test
public void testSimpleParse() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/single-bank-account.csv"))) {
final CSVRecordReader reader = new CSVRecordReader(fis, Mockito.mock(ComponentLog.class), schema, format,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat());
final Object[] record = reader.nextRecord().getValues();
final Object[] expectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"};
Assert.assertArrayEquals(expectedValues, record);
assertNull(reader.nextRecord());
}
}
@Test
public void testMultipleRecords() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account.csv"))) {
final CSVRecordReader reader = new CSVRecordReader(fis, Mockito.mock(ComponentLog.class), schema, format,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat());
final Object[] firstRecord = reader.nextRecord().getValues();
final Object[] firstExpectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"};
Assert.assertArrayEquals(firstExpectedValues, firstRecord);
final Object[] secondRecord = reader.nextRecord().getValues();
final Object[] secondExpectedValues = new Object[] {"2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA"};
Assert.assertArrayEquals(secondExpectedValues, secondRecord);
assertNull(reader.nextRecord());
}
}
@Test
public void testExtraWhiteSpace() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/extra-white-space.csv"))) {
final CSVRecordReader reader = new CSVRecordReader(fis, Mockito.mock(ComponentLog.class), schema, format,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat());
final Object[] firstRecord = reader.nextRecord().getValues();
final Object[] firstExpectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"};
Assert.assertArrayEquals(firstExpectedValues, firstRecord);
final Object[] secondRecord = reader.nextRecord().getValues();
final Object[] secondExpectedValues = new Object[] {"2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA"};
Assert.assertArrayEquals(secondExpectedValues, secondRecord);
assertNull(reader.nextRecord());
}
}
@Test
public void testMissingField() throws IOException, MalformedRecordException {
final List<RecordField> fields = getDefaultFields();
fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
final RecordSchema schema = new SimpleRecordSchema(fields);
final String headerLine = "id, name, balance, address, city, state, zipCode, country";
final String inputRecord = "1, John, 40.80, 123 My Street, My City, MS, 11111";
final String csvData = headerLine + "\n" + inputRecord;
final byte[] inputData = csvData.getBytes();
try (final InputStream baos = new ByteArrayInputStream(inputData)) {
final CSVRecordReader reader = new CSVRecordReader(baos, Mockito.mock(ComponentLog.class), schema, format,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat());
final Record record = reader.nextRecord();
assertNotNull(record);
assertEquals("1", record.getValue("id"));
assertEquals("John", record.getValue("name"));
assertEquals(40.8D, record.getValue("balance"));
assertEquals("123 My Street", record.getValue("address"));
assertEquals("My City", record.getValue("city"));
assertEquals("MS", record.getValue("state"));
assertEquals("11111", record.getValue("zipCode"));
assertNull(record.getValue("country"));
assertNull(reader.nextRecord());
}
}
}