package org.nextprot.api.commons.bio.variation.prot.impl.seqchange.format; import org.nextprot.api.commons.bio.AminoAcidCode; import org.nextprot.api.commons.bio.variation.prot.SequenceVariation; import org.nextprot.api.commons.bio.variation.prot.SequenceVariationBuilder; import org.nextprot.api.commons.bio.variation.prot.impl.seqchange.AminoAcidModification; import org.nextprot.api.commons.bio.variation.prot.seqchange.SequenceChangeFormat; import java.text.ParseException; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Parse single PTM with the format: * MOD-AApos (example: P-Tyr223 represents a phosphorylation of tyrosine at position 223) */ public class SingleModificationBEDFormat implements SequenceChangeFormat<AminoAcidModification> { private static final Pattern PATTERN = Pattern.compile("^(\\w+)-([A-Z])([a-z]{2})?(\\d+)$"); @Override public SequenceVariation parse(String source, SequenceVariationBuilder.FluentBuilding builder) throws ParseException { Matcher m = PATTERN.matcher(source); if (m.matches()) { AminoAcidModification aaChange = AminoAcidModification.valueOfAminoAcidModification(m.group(1)); AminoAcidCode affectedAA = AminoAcidCode.parseAminoAcidCode(m.group(2) + ((m.group(3) != null) ? m.group(3) : "")); int affectedAAPos = Integer.parseInt(m.group(4)); return builder.selectAminoAcid(affectedAA, affectedAAPos).thenAddModification(aaChange).build(); } return null; } @Override public boolean matches(String source) { return source.matches(PATTERN.pattern()); } @Override public void format(StringBuilder sb, AminoAcidModification change, AminoAcidCode.CodeType type) { sb .append(change.getName()) .append("-"); } }