/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.data.rnai;
//~--- non-JDK imports --------------------------------------------------------
import org.apache.log4j.Logger;
import org.broad.igv.feature.FeatureDB;
import org.broad.igv.feature.NamedFeature;
import org.broad.igv.feature.genome.Genome;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
/**
* @author jrobinso
*/
public class RNAIGeneScoreParser {
public enum Type {
GENE_SCORE, POOLED
}
private static Logger log = Logger.getLogger(RNAIGeneScoreParser.class);
private String filename;
private int maxColumn = -1;
private int batchColumn = -1;
private int conditionColumn = -1;
private int geneColumn = -1;
private int hairpinColumn = -1;
private int scoreColumn = -1;
private int confidenceColumn = -1;
private Genome genome;
public RNAIGeneScoreParser(String filename, Type type, Genome genome) {
this.genome = genome;
this.filename = filename;
if (type == Type.GENE_SCORE) {
batchColumn = 0;
conditionColumn = 2;
geneColumn = 5;
hairpinColumn = 12;
scoreColumn = 8;
confidenceColumn = 9;
maxColumn = 12;
} else {
batchColumn = -1;
conditionColumn = -1;
geneColumn = 0;
hairpinColumn = 1;
scoreColumn = 3;
confidenceColumn = 2;
maxColumn = 3;
}
}
/**
* Method description
*
* @return
*/
public Collection<RNAIDataSource> parse() {
BufferedReader reader = null;
try {
log.debug("Loading data for: " + filename);
reader = new BufferedReader(new FileReader(filename));
// Parse comments
parseAttributes(reader);
// Parse header
parseHeaderRow(reader);
// Parse data
String nextLine = reader.readLine();
// Skip empty and comment lines
while ((nextLine = reader.readLine()) != null
&& (nextLine.startsWith("#") || (nextLine.length() == 0))) {
}
Map<String, RNAIDataSource> dataSources = new HashMap();
while ((nextLine = reader.readLine()) != null) {
String[] tokens = nextLine.split("\t");
if (tokens.length > maxColumn) {
try {
String batchId = (batchColumn < 0) ? "" : tokens[batchColumn].trim();
String geneName = tokens[geneColumn].trim().toUpperCase();
NamedFeature gene = FeatureDB.getFeature(geneName);
if (gene != null) {
float geneScore = Float.NaN;
try {
geneScore = Float.parseFloat(tokens[scoreColumn]);
}
catch (NumberFormatException numberFormatException) {
// Nothing to do -- expected condition. Indicates no score for this
// gene
}
int confidence = 0;
try {
confidence = Integer.parseInt(tokens[confidenceColumn]);
}
catch (NumberFormatException numberFormatException) {
// Nothing to do -- expected condition. This will occur when the
// score is blank.
}
int numberOfHairpins = 0;
try {
numberOfHairpins = Integer.parseInt(tokens[hairpinColumn]);
}
catch (NumberFormatException numberFormatException) {
// Nothing to do -- expected condition. This will coincide with
// a blank gene score
}
// Make batch_conditon key
// Rules from Jessee -- ignore conditions starting with *. None, standard,
// and blank are all equivalent.
String cond = (conditionColumn < 0)
? "" : tokens[conditionColumn].trim();
if (!cond.startsWith("*")) {
if (cond.equals("None") || cond.equals("Standard")) {
cond = "";
}
String batchCond = batchId + "_" + cond;
RNAIDataSource ds = dataSources.get(batchCond);
// List<RNAIGeneScore> dataPoints = dataPointsMap.get(batchCond);
if (ds == null) {
ds = new RNAIDataSource(batchId, cond, genome);
dataSources.put(batchCond, ds);
}
ds.addGeneScore(new RNAIGeneScore(batchCond, gene, geneScore,
confidence, numberOfHairpins));
}
} else {
// todo -- handle unknown gene
log.info("Unknown gene: " + geneName);
}
}
catch (Exception ex) {
log.error("Skipping line: " + nextLine, ex);
}
}
}
return dataSources.values();
}
catch (FileNotFoundException e) {
log.error("RNAI file not found: " + filename);
throw new RuntimeException("File not found");
}
catch (IOException e) {
log.error(filename, e);
throw new RuntimeException("Error parsing file.", e);
}
finally {
if (reader != null) {
try {
reader.close();
}
catch (IOException iOException) {
}
}
}
}
/**
* Parse the attributes from the comment section and annotate the
*/
private void parseAttributes(BufferedReader reader) throws IOException {
// TODO -- parse comments
}
private void parseHeaderRow(BufferedReader reader) throws IOException {
// Nothing to do here. Column positions are fixed.
}
}