package org.gbif.tabular; import org.gbif.dwc.terms.Term; import org.gbif.dwc.terms.TermFactory; import org.gbif.dwca.io.ArchiveField; import org.gbif.utils.file.tabular.TabularDataFileReader; import java.io.IOException; import java.util.List; import java.util.Map; import com.google.common.base.Strings; import com.google.common.collect.Maps; /** * Specialized TermTabularDataFileReader for DarwinCore archive file. * In addition of reading the underlying csv file this class will handle id field and default values that can * be provided in DarwinCore. */ public class DwcTabularDataFileReader extends TermTabularDataFileReader { //refers to a column in the archive representing the id of the line that is not mapped to a real term public static Term DEFAULT_ID_TERM = TermFactory.instance().findTerm("dwcaId"); private Map<Term, String> defaultValues = Maps.newHashMap(); public DwcTabularDataFileReader(TabularDataFileReader<List<String>> tabularDataFileReader, ArchiveField idField, List<ArchiveField> fields) { super(tabularDataFileReader, buildColumnMapping(idField, fields)); for(ArchiveField af : fields) { if(!Strings.isNullOrEmpty(af.getDefaultValue())){ defaultValues.put(af.getTerm(), af.getDefaultValue()); } } } private static Term[] buildColumnMapping(ArchiveField idField, List<ArchiveField> dwcTerms){ Map<Integer, Term> columnMapping = Maps.newTreeMap(); columnMapping.put(idField.getIndex(), idField.getTerm() != null ? idField.getTerm() : DEFAULT_ID_TERM); for(ArchiveField af : dwcTerms) { if(af.getIndex() != null && af.getIndex() >= 0){ columnMapping.put(af.getIndex(), af.getTerm()); } } return columnMapping.values().toArray(new Term[0]); } @Override public TermTabularDataLine read() throws IOException { TermTabularDataLine line = super.read(); if(line == null){ return null; } if(defaultValues.isEmpty()){ return line; } Map<Term, String> mappedData = Maps.newHashMap(line.getMappedData()); int numberOfColumn = line.getNumberOfColumn(); for(Term term : defaultValues.keySet()) { if(mappedData.containsKey(term)){ if(Strings.isNullOrEmpty(mappedData.get(term))){ mappedData.put(term, defaultValues.get(term)); } } else { mappedData.put(term, defaultValues.get(term)); numberOfColumn++; } } return new TermTabularDataLine(line.getLineNumber(), mappedData, numberOfColumn); } }