package net.sourceforge.seqware.pipeline.plugins.filelinker; import au.com.bytecode.opencsv.CSVReader; import au.com.bytecode.opencsv.bean.CsvToBean; import au.com.bytecode.opencsv.bean.HeaderColumnNameTranslateMappingStrategy; import com.google.common.annotations.VisibleForTesting; import static com.google.common.base.Preconditions.checkElementIndex; import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Set; import net.sourceforge.seqware.common.module.FileMetadata; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class FileLinkerParser { private static final String UTF8 = "UTF8"; private static final Logger LOG = LoggerFactory.getLogger(FileLinkerParser.class); public static Map<Integer, List<FileMetadata>> parse(String filename, char separator) throws FileNotFoundException, UnsupportedEncodingException { checkNotNull(filename); checkElementIndex(0, filename.length()); checkNotNull(separator); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), UTF8)); List<FileLinkerLine> lines = getFileInfo(br, separator); requiredValuesPresent(lines); return fileMetadataFromFileInfo(lines); } @VisibleForTesting static List<FileLinkerLine> getFileInfo(Reader reader) { return getFileInfo(reader, '\t'); } @VisibleForTesting static List<FileLinkerLine> getFileInfo(Reader reader, char separator) { CSVReader csvReader = new CSVReader(reader, separator); HeaderColumnNameTranslateMappingStrategy<FileLinkerLine> strat = new HeaderColumnNameTranslateMappingStrategy<>(); strat.setType(FileLinkerLine.class); Map<String, String> map = Maps.newHashMap(); map.put("sequencer_run", "sequencerRun"); map.put("sample", "sample"); map.put("lane", "laneString"); map.put("ius_sw_accession", "seqwareAccessionString"); map.put("file_status", "fileStatus"); map.put("mime_type", "mimeType"); map.put("size", "sizeString"); map.put("md5sum", "md5sum"); map.put("file", "filename"); strat.setColumnMapping(map); CsvToBean<FileLinkerLine> csvToBean = new CsvToBean<>(); List<FileLinkerLine> defaultUsers = csvToBean.parse(strat, csvReader); return defaultUsers; } private static Map<Integer, List<FileMetadata>> fileMetadataFromFileInfo(List<FileLinkerLine> lines) { Map<Integer, List<FileMetadata>> result = Maps.newHashMap(); for (FileLinkerLine line : lines) { FileMetadata fileMetadata = new FileMetadata(); fileMetadata.setMetaType(line.getMimeType()); fileMetadata.setFilePath(line.getFilename()); if (!StringUtils.isBlank(line.getMd5sum())) { fileMetadata.setMd5sum(line.getMd5sum()); } if (line.getSize() != null) { fileMetadata.setSize(line.getSize()); } if (result.containsKey(line.getSeqwareAccession())) { result.get(line.getSeqwareAccession()).add(fileMetadata); } else { List<FileMetadata> fileMetadataList = Lists.newArrayList(); fileMetadataList.add(fileMetadata); result.put(line.getSeqwareAccession(), fileMetadataList); } } return result; } static void requiredValuesPresent(List<FileLinkerLine> lines) { boolean hasRequiredValues = true; Set<Integer> badLines = Sets.newHashSet(); for (int i = 0; i < lines.size(); i++) { FileLinkerLine line = lines.get(i); if (!line.hasRequiredValues()) { hasRequiredValues = false; badLines.add(i + 2); LOG.error( "Required value(s) missing in csv file at line number [{}]. ius_sw_accession, seqwareAccession, mime_type and file are required. {}", i + 2, line); } } if (!hasRequiredValues) { throw new FileLinkerLineException("Csv file missing required values on the following lines: " + Arrays.toString(badLines.toArray())); } } }