package org.nextprot.api.commons.bio.variation.prot.impl.seqchange.format; import org.nextprot.api.commons.bio.AminoAcidCode; import org.nextprot.api.commons.bio.variation.prot.ParsingMode; import org.nextprot.api.commons.bio.variation.prot.SequenceVariation; import org.nextprot.api.commons.bio.variation.prot.SequenceVariationBuilder; import org.nextprot.api.commons.bio.variation.prot.impl.seqchange.Frameshift; import org.nextprot.api.commons.bio.variation.prot.seqchange.SequenceChangeHGVSFormat; import org.nextprot.api.commons.utils.StringUtils; import java.text.ParseException; import java.util.regex.Matcher; import java.util.regex.Pattern; public class FrameshiftHGVSFormat implements SequenceChangeHGVSFormat<Frameshift> { private static final Pattern PATTERN = Pattern.compile("^p\\.([A-Z])([a-z]{2})?(\\d+)([A-Z])([a-z]{2})?fs(?:\\*|Ter)(\\d+)$"); @Override public SequenceVariation parseWithMode(String source, SequenceVariationBuilder.FluentBuilding builder, ParsingMode mode) throws ParseException { Matcher m = PATTERN.matcher(source); if (m.matches()) { AminoAcidCode affectedAA = AminoAcidCode.parseAminoAcidCode(StringUtils.concat(m.group(1), m.group(2))); int affectedAAPos = Integer.parseInt(m.group(3)); AminoAcidCode newAA = AminoAcidCode.parseAminoAcidCode(StringUtils.concat(m.group(4), m.group(5))); int shift = Integer.parseInt(m.group(6)); if (shift <= 1) throw new ParseException("the description of a frame shift variant can not contain " + "“fsTer1”, such a variant is a nonsense variant (see Substitution). The shortest frame shift variant " + "possible contains 'fsTer2' (see http://varnomen.hgvs.org/recommendations/protein/variant/frameshift/)", 0); return builder.selectAminoAcid(affectedAA, affectedAAPos).thenFrameshift(newAA, shift).build(); } return null; } @Override public boolean matchesWithMode(String source, ParsingMode mode) { return source.matches(PATTERN.pattern()); } @Override public void format(StringBuilder sb, Frameshift change, AminoAcidCode.CodeType type) { sb .append(AminoAcidCode.formatAminoAcidCode(type, change.getValue().getChangedAminoAcid())) .append("fs") .append(AminoAcidCode.formatAminoAcidCode(type, AminoAcidCode.STOP)) .append(change.getValue().getNewTerminationPosition()); } }