/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.csv;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.schema.access.SchemaAccessStrategy;
import org.apache.nifi.schema.access.SchemaField;
import org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
public class CSVHeaderSchemaStrategy implements SchemaAccessStrategy {
private static final Set<SchemaField> schemaFields = EnumSet.noneOf(SchemaField.class);
private final ConfigurationContext context;
public CSVHeaderSchemaStrategy(final ConfigurationContext context) {
this.context = context;
}
public CSVHeaderSchemaStrategy(final ValidationContext context) {
this.context = null;
}
@Override
public RecordSchema getSchema(final FlowFile flowFile, final InputStream contentStream) throws SchemaNotFoundException {
if (this.context == null) {
throw new SchemaNotFoundException("Schema Access Strategy intended only for validation purposes and cannot obtain schema");
}
try {
final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader();
try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream));
final CSVParser csvParser = new CSVParser(reader, csvFormat)) {
final List<RecordField> fields = new ArrayList<>();
for (final String columnName : csvParser.getHeaderMap().keySet()) {
fields.add(new RecordField(columnName, RecordFieldType.STRING.getDataType()));
}
return new SimpleRecordSchema(fields);
}
} catch (final Exception e) {
throw new SchemaNotFoundException("Failed to read Header line from CSV", e);
}
}
@Override
public Set<SchemaField> getSuppliedSchemaFields() {
return schemaFields;
}
}