/*
* The MIT License (MIT)
*
* Copyright (c) 2016 University of California San Diego
* Author: Jim Robinson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.feature.bionano;
import htsjdk.tribble.Feature;
import org.broad.igv.Globals;
import org.broad.igv.feature.FeatureUtils;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.util.ParsingUtils;
import org.broad.igv.util.ResourceLocator;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.*;
/**
* For the time being we bypass tribble and just use a parser
*/
public class SMAPParser {
private SMAPParser() {
}
////SmapEntryID QryContigID RefcontigID1 RefcontigID2 QryStartPos QryEndPos RefStartPos RefEndPos Confidence Type XmapID1 XmapID2 LinkID QryStartIdx QryEndIdx RefStartIdx RefEndIdx
public static List<Feature> parseFeatures(ResourceLocator locator, Genome genome) throws IOException {
BufferedReader br = null;
ArrayList<Feature> features = new ArrayList<Feature>(1000);
ArrayList<SMAPFeature> partialFeatures = new ArrayList<SMAPFeature>(1000);
Map<Integer, SMAPFeature> featureMap = new HashMap<Integer, SMAPFeature>(1000);
br = ParsingUtils.openBufferedReader(locator);
String nextLine;
String[] headers = null;
int refContig1 = -1, refContig2 = -1, refStart = -1, refEnd = -1, confidence = -1, type = -1, idf = -1, linkIdf = -1;
while ((nextLine = br.readLine()) != null) {
if (nextLine.startsWith("#h")) {
headers = Globals.tabPattern.split(nextLine.substring(3));
for (int i = 0; i < headers.length; i++) {
String h = headers[i];
if (h.equals("RefcontigID1")) {
refContig1 = i;
} else if (h.equals("RefcontigID2")) {
refContig2 = i;
} else if (h.equals("RefStartPos")) {
refStart = i;
} else if (h.equals("RefEndPos")) {
refEnd = i;
} else if (h.equals("Confidence")) {
confidence = i;
} else if (h.equals("Type")) {
type = i;
} else if (h.equals("SmapEntryID")) {
idf = i;
} else if (h.equals("LinkID")) {
linkIdf = i;
}
}
} else if (nextLine.startsWith("#")) {
continue;
} else {
if (headers == null) {
throw new RuntimeException("Never saw #h line");
}
String[] tokens = Globals.tabPattern.split(nextLine);
int id = Integer.parseInt(tokens[idf]);
int linkId = Integer.parseInt(tokens[linkIdf]);
String c1 = tokens[refContig1];
String c2 = tokens[refContig2];
int start = (int) Double.parseDouble(tokens[refStart]);
int end = (int) Double.parseDouble(tokens[refEnd]);
double conf = confidence >= 0 ? Double.parseDouble(tokens[confidence]) : 0;
String t = type >= 0 ? tokens[type] : "";
String chr1 = genome == null ? c1 : genome.getCanonicalChrName(c1);
String chr2 = genome == null ? c2 : genome.getCanonicalChrName(c2);
if (t.endsWith("_partial")) {
partialFeatures.add(new SMAPFeature(chr1, start, end, conf, t, headers, tokens, linkId));
} else if (c1.equals(c2)) {
featureMap.put(id, new SMAPFeature(chr1, start, end, conf, t, headers, tokens, linkId));
} else {
// Don't know how to treat this interchr features. Split into 2 for now
SMAPFeature feature1 = new SMAPFeature(chr1, start, start+1, conf, t, headers, tokens);
SMAPFeature feature2 = new SMAPFeature(chr2, end, end+1, conf, t, headers, tokens);
features.add(feature1);
features.add(feature2);
}
}
}
// Link all partial features
for (SMAPFeature partialFeature : partialFeatures) {
int linkId = partialFeature.getLinkId();
SMAPFeature f = featureMap.get(linkId);
if (f != null) {
f.addPartialFeature(partialFeature);
}
}
// Link paired features
Set<Integer> pairedIds = new HashSet<Integer>();
for (Map.Entry<Integer, SMAPFeature> entry : featureMap.entrySet()) {
if(pairedIds.contains(entry.getKey())) continue;
if (entry.getValue().getType().endsWith("_paired")) {
SMAPFeature f1 = entry.getValue();
SMAPFeature f2 = featureMap.get(f1.getLinkId());
pairedIds.add(entry.getKey());
pairedIds.add(f1.getLinkId());
features.add(new SMAPPairedFeature(f1, f2));
} else {
features.add(entry.getValue());
}
}
FeatureUtils.sortFeatureList(features);
return features;
}
}