/* * Copyright (C) 2013 SeqWare * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package net.sourceforge.seqware.pipeline.plugins.batchmetadatainjection; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import net.sourceforge.seqware.common.metadata.Metadata; import net.sourceforge.seqware.common.util.Log; import org.apache.commons.lang3.StringUtils; /** * * @author mtaschuk */ public class ParseMiseqFile extends BatchMetadataParser { public ParseMiseqFile(Metadata metadata, Map<String, String> fields, boolean interactive) { super(metadata, fields, interactive); } public RunInfo parseMiseqFile(String filepath) throws Exception { RunInfo run = null; File file = new File(filepath); try { Set<LaneInfo> lanes; try (BufferedReader freader = new BufferedReader(new FileReader(file))) { run = parseMiseqHeader(freader, file); String runName = promptString("Sequencer run name", run.getRunName(), Field.sequencer_run_name); String studyName = promptString("Study name", run.getStudyTitle(), Field.study_name); String expName = promptString("Experiment name", run.getExperimentName(), Field.experiment_name); run.setRunName(runName); run.setStudyTitle(studyName); run.setExperimentName(expName); lanes = parseMiseqData(freader); } run.setLanes(lanes); } catch (FileNotFoundException e) { Log.error(filepath, e); throw new RuntimeException(e); } catch (IOException ex) { Log.error(filepath, ex); throw new RuntimeException(ex); } return run; } public Set<LaneInfo> parseMiseqData(BufferedReader freader) throws IOException, Exception { Set<SampleInfo> samples = new HashSet<>(); // there is only one lane in Miseq LaneInfo laneInfo = generateLaneInfo("1", 4); laneInfo.setSamples(samples); String[] headerStrings = freader.readLine().split(","); List<String> header = Arrays.asList(headerStrings); String line; while ((line = freader.readLine()) != null) { String[] args = line.split(","); String[] sampleInfo = args[header.indexOf("Sample_ID")].split("-"); String prettyName = args[header.indexOf("Sample_ID")]; String projectName = sampleInfo[0]; String individualNumber = sampleInfo[1]; String librarySourceTemplateType = null; String tissueOrigin = null; String tissueType = null; String libraryType = null; String librarySizeCode = null; String targetedResequencing = null; String tissuePreparation = null; int organismId = 1; String barcode = args[header.indexOf("index")]; if (sampleInfo[2].contains("BLD")) { tissueType = "R"; tissuePreparation = "Blood"; } else if (sampleInfo[2].contains("BIO")) { tissueType = "P"; } else if (sampleInfo[2].contains("ARC")) { tissueType = "P"; } else { Log.stdout("Cannot parse tissue type from " + prettyName); } SampleInfo sample = generateSampleInfo(prettyName, projectName, individualNumber, librarySourceTemplateType, tissueOrigin, tissueType, libraryType, librarySizeCode, barcode, organismId, targetedResequencing, tissuePreparation, "", barcode, barcode); String tissueRegion = sampleInfo[2].substring(0, 1); if (StringUtils.isNumeric(tissueRegion)) { sample.setSampleAttribute("geo_tissue_region", tissueRegion); } samples.add(sample); } Set<LaneInfo> lanes = new HashSet<>(); lanes.add(laneInfo); return lanes; } public RunInfo parseMiseqHeader(BufferedReader freader, File file) throws IOException { String line; Map<String, String> headerInfo = new HashMap<>(); while (!(line = freader.readLine()).startsWith("[Data]")) { if (!line.startsWith("[")) { String[] args = line.split(","); if (args.length >= 2) { headerInfo.put(args[0].trim(), args[1].trim()); } } } String[] bits = file.getAbsolutePath().split(File.separator); String runName = bits[bits.length - 2]; String studyTitle = headerInfo.get("Project Name").split("_")[0]; String experimentName = headerInfo.get("Experiment Name").split("_")[0]; RunInfo runInfo = super.generateRunInfo(runName, runName, studyTitle, studyTitle, "Ontario Institute for Cancer Research", studyTitle.replace(" ", ""), experimentName, experimentName, file.getParentFile().getAbsolutePath(), 26, -1, true, headerInfo.get("Workflow"), headerInfo.get("Assay")); return runInfo; } }