/* * Copyright (c) 2017 wetransform GmbH * * All rights reserved. This program and the accompanying materials are made * available under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation, either version 3 of the License, * or (at your option) any later version. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * wetransform GmbH <http://www.wetransform.to> */ package eu.esdihumboldt.hale.io.csv.reader.internal; import java.io.IOException; import java.text.MessageFormat; import java.util.Collections; import java.util.Map; import java.util.NoSuchElementException; import javax.xml.namespace.QName; import org.springframework.core.convert.ConversionService; import au.com.bytecode.opencsv.CSVReader; import de.fhg.igd.slf4jplus.ALogger; import de.fhg.igd.slf4jplus.ALoggerFactory; import eu.esdihumboldt.hale.common.core.HalePlatform; import eu.esdihumboldt.hale.common.instance.model.Filter; import eu.esdihumboldt.hale.common.instance.model.Instance; import eu.esdihumboldt.hale.common.instance.model.InstanceCollection; import eu.esdihumboldt.hale.common.instance.model.InstanceReference; import eu.esdihumboldt.hale.common.instance.model.MutableInstance; import eu.esdihumboldt.hale.common.instance.model.ResourceIterator; import eu.esdihumboldt.hale.common.instance.model.ext.InstanceCollection2; import eu.esdihumboldt.hale.common.instance.model.impl.DefaultInstance; import eu.esdihumboldt.hale.common.instance.model.impl.FilteredInstanceCollection; import eu.esdihumboldt.hale.common.instance.model.impl.PseudoInstanceReference; import eu.esdihumboldt.hale.common.schema.model.PropertyDefinition; import eu.esdihumboldt.hale.common.schema.model.TypeDefinition; import eu.esdihumboldt.hale.common.schema.model.constraint.type.Binding; import eu.esdihumboldt.hale.io.csv.reader.CommonSchemaConstants; /** * Instance collection based on a CSV file/stream. * * @author Simon Templer */ public class CSVInstanceCollection implements InstanceCollection, InstanceCollection2 { private static final ALogger log = ALoggerFactory.getLogger(CSVInstanceCollection.class); /** * CSV instance iterator. */ public class CSVIterator implements ResourceIterator<Instance> { @SuppressWarnings("unused") private int currentLine = -1; private boolean closed = false; private CSVReader csvReader; private String[] nextItem = null; @Override public boolean hasNext() { if (closed) { return false; } proceedToNext(); return nextItem != null; } private void proceedToNext() { if (closed) { return; } // initialize reader if necessary if (csvReader == null) { boolean skipFirst = reader.getParameter(CommonSchemaConstants.PARAM_SKIP_FIRST_LINE) .as(Boolean.class, false); try { csvReader = CSVUtil.readFirst(reader); } catch (IOException e) { log.error("Could not open CSV source", e); closed = true; } if (skipFirst) { try { csvReader.readNext(); } catch (IOException e) { // close on error close(e); } currentLine++; } } if (nextItem == null) { // item was consumed or first item try { nextItem = csvReader.readNext(); } catch (IOException e) { // close on error close(e); } currentLine++; } } private void close(IOException e) { closed = true; log.error("Error accessing CSV source", e); } @Override public Instance next() { proceedToNext(); if (nextItem == null) { throw new NoSuchElementException(); } MutableInstance instance = new DefaultInstance(type, null); try { // build instance PropertyDefinition[] propAr = type.getChildren() .toArray(new PropertyDefinition[type.getChildren().size()]); int index = 0; for (String part : nextItem) { if (index >= propAr.length) { // break if line has more columns than the specified // type log.warn("More data columns encountered than defined in the schema"); break; } PropertyDefinition property = propAr[index]; Object value = convertValue(part, property); instance.addProperty(property.getName(), value); index++; } } finally { nextItem = null; } return instance; } private Object convertValue(String part, PropertyDefinition property) { if (part == null || part.isEmpty()) { // FIXME make this configurable? return null; } Binding binding = property.getPropertyType().getConstraint(Binding.class); try { if (!binding.getBinding().equals(String.class)) { if (Number.class.isAssignableFrom(binding.getBinding()) && decimalPoint != '.') { // number binding and we don't have the // default decimal point // TODO more sophisticated behavior? // what about thousands separator char? part = part.replace(decimalPoint, '.'); } ConversionService conversionService = HalePlatform .getService(ConversionService.class); if (conversionService.canConvert(String.class, binding.getBinding())) { return conversionService.convert(part, binding.getBinding()); } else { throw new IllegalStateException("Conversion not possible!"); } } } catch (Exception e) { log.error(MessageFormat.format("Cannot convert property value to {0}", binding.getBinding().getSimpleName()), e); } return part; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public void close() { closed = true; if (csvReader != null) { try { csvReader.close(); } catch (IOException e) { log.debug("Error closing CSV reader", e); } } } } /** * The original CSV instance reader. */ protected final CSVInstanceReader reader; /** * The schema type of instances read. */ protected final TypeDefinition type; /** * The character used as a decimal point. */ protected final char decimalPoint; private Boolean empty; /** * Create a CSV instance collection based on the given CSV instance reader * (because we make use of its configuration). * * @param csvInstanceReader the CSV instance reader */ public CSVInstanceCollection(CSVInstanceReader csvInstanceReader) { this.reader = csvInstanceReader; // Decimal point decimalPoint = CSVUtil.getDecimal(reader); // Schema type type = reader.getSourceSchema().getType(QName.valueOf( reader.getParameter(CommonSchemaConstants.PARAM_TYPENAME).as(String.class))); } @Override public InstanceReference getReference(Instance instance) { // TODO reference by line? return new PseudoInstanceReference(instance); } @Override public Instance getInstance(InstanceReference reference) { if (reference instanceof PseudoInstanceReference) { return ((PseudoInstanceReference) reference).getInstance(); } return null; } @Override public ResourceIterator<Instance> iterator() { return new CSVIterator(); } @Override public boolean hasSize() { return false; } @Override public int size() { return UNKNOWN_SIZE; } @Override public boolean isEmpty() { if (empty != null) { return empty; } try (ResourceIterator<Instance> it = iterator()) { empty = !it.hasNext(); } return empty; } @Override public InstanceCollection select(Filter filter) { return FilteredInstanceCollection.applyFilter(this, filter); } @Override public boolean supportsFanout() { return true; } @Override public Map<TypeDefinition, InstanceCollection> fanout() { return Collections.<TypeDefinition, InstanceCollection> singletonMap(type, this); } }