package org.nextprot.api.isoform.mapper.domain.impl;
import com.google.common.base.Preconditions;
import org.nextprot.api.commons.bio.variation.prot.SequenceVariationFormat;
import org.nextprot.api.commons.bio.variation.prot.impl.format.SequenceVariantHGVSFormat;
import org.nextprot.api.core.domain.Isoform;
import java.text.ParseException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SequenceVariant extends SequenceFeatureBase {
public SequenceVariant(String feature) throws ParseException {
super(feature);
}
@Override
protected int getPivotPoint(String feature) throws ParseException {
int index = feature.indexOf("-p.");
if (index >= 0) {
return index;
}
throw new ParseException("Cannot separate gene name from variation (missing '-p.')", 0);
}
@Override
public SequenceVariationFormat newParser() {
return new SequenceVariantHGVSFormat();
}
/**
*
* isoshort -> Short
* isolong -> Long
* iso5 -> Iso 5
* isodelta6 -> Delta 6
*
* @return null if canonical
*/
@Override
protected String parseIsoformName(String geneAndIso) throws ParseException {
String featureIsoname = extractIsoName(geneAndIso);
// canonical
if (featureIsoname == null) {
return null;
}
else if (featureIsoname.startsWith("iso")) {
String name = featureIsoname.substring(3);
if (name.matches("\\d+")) {
return "Iso "+name;
}
else {
// replace back space from underscore: some isoform names contain spaces that were replaced by underscore
// see also method formatIsoformFeatureName()
name = name.replace("_", " ");
// Delta6 -> Delta 6
Pattern pat = Pattern.compile("(\\s+)(\\d+)");
Matcher matcher = pat.matcher(name);
if (matcher.find()) {
return matcher.group(1)+" "+matcher.group(2);
}
return name;
}
}
throw new ParseException("invalid isoform name: "+featureIsoname+" (isoform name should starts with prefix 'iso')", 0);
}
/**
* @return the isoform part from feature string (null if canonical)
*/
private String extractIsoName(String feature) {
Preconditions.checkNotNull(feature);
int indexOfDash = feature.indexOf("-");
if (indexOfDash >= 0) {
return feature.substring(indexOfDash+1);
}
return null;
}
/*
Short -> isoShort
Long -> isoLong
Iso 5 -> iso5
Delta 6 -> isoDelta6
GTBP-N -> isoGTBP-N
Chain XP32 -> isoChain_XP32
*/
@Override
protected String formatIsoformFeatureName(Isoform isoform) {
String name = isoform.getMainEntityName().getName();
StringBuilder sb = new StringBuilder();
if (name.startsWith("Iso"))
sb.append(name.toLowerCase().replace(" ", ""));
else
sb.append("iso").append(name.replace(" ", "_"));
return sb.toString();
}
}