package org.nextprot.api.commons.constants; import org.nextprot.api.commons.exception.NextProtException; import org.nextprot.api.commons.utils.StringFormatter; import org.nextprot.api.commons.utils.StringUtils; import java.io.Serializable; import java.util.*; /** * Description: <br> * * @author Pam inspired from Oliv's OWLAnnotationCategoryOld version <br> * * NOTE: WHEN ADDING NEW ENUM DO NOT FORGET TO UPDATE "annotation-category" element of nextprot-export-vx.xsd */ public enum AnnotationCategory implements Serializable { //Special node for the root ROOT(0, "Root", "Root", "", null), /* * ENUMs with a negative dbId are virtual annotation types. Virtual means that there is NO annotation in our data of this type * ENUMs with a positive dbId are annotation types attached to at least one annotation in our data */ // names NAME(-100, "Name", "name", "Name", ROOT), // ENZYME_CLASSIFICATION and FAMILY_NAME temporarily appear in the entry overview via another mechanism FAMILY_NAME(1059, "family name", "familyName", "family name", NAME), // generic categories for annotations POSITIONAL_ANNOTATION(-3, "PositionalAnnotation", "positionalAnnotation", "Positional annotation", ROOT), PROCESSING_PRODUCT(-4, "ProcessingProduct", "processingProduct", "Processing product", POSITIONAL_ANNOTATION), TOPOLOGY(-5, "Topology", "topology", "Topology", POSITIONAL_ANNOTATION), REGION(-6, "Region", "region", "Region", POSITIONAL_ANNOTATION), GENERIC_SITE(-7, "GenericSite", "site", "Site", POSITIONAL_ANNOTATION), GENERIC_PTM(-8, "GenericPtm", "ptm", "PTM", POSITIONAL_ANNOTATION), SECONDARY_STRUCTURE(-9, "secondary structure", "secondaryStructure", "Secondary structure", POSITIONAL_ANNOTATION), MAPPING(-91, "GenericMapping", "mapping", "Mapping", POSITIONAL_ANNOTATION), GENERAL_ANNOTATION(-2, "GeneralAnnotation", "generalAnnotation", "General Annotation", ROOT), GENERIC_FUNCTION(-10, "GenericFunction", "function", "Function", GENERAL_ANNOTATION), GENERIC_INTERACTION(-11, "GenericInteraction", "interaction", "Interaction", GENERAL_ANNOTATION), CELLULAR_COMPONENT(-12, "GenericLocation", "cellularComponent", "Cellular component", GENERAL_ANNOTATION), GENERIC_EXPRESSION(-15, "GenericExpression", "expression", "Expression", GENERAL_ANNOTATION), MEDICAL(-13, "Medical", "medical", "Medical", GENERAL_ANNOTATION), KEYWORD(-14, "Keyword", "keyword", "Keywords", GENERAL_ANNOTATION), //TEST1(-1111,"test1","test1","test1", new OWLAnnotationCategory[]{POSITIONAL_ANNOTATION, GENERAL_ANNOTATION}), // ENZYME_CLASSIFICATION and FAMILY_NAME temporarily appear in the entry overview via another mechanism ENZYME_CLASSIFICATION(1065, "enzyme classification", "enzymeClassification", "Enzyme classification", GENERAL_ANNOTATION), // instantiated annotation categories with real cv_term id and data existing for them // instances of positional annotations /* * Transformations done on loading annotations from db: * OK - 1/ dbId=1002 "transit peptide": split annotations into 2 new types "mitochondrial transit peptide" and "peroxysome trasit peptide" * OK - 2/ dbId=1005 "transmembrane region": move annotations to new type "intramembrane region" if annotation.cv_term_id=51748 "In membrane" * OK - 3/ dbId=1050 "biotechnology": move all annotations to existing type dbId=1052 "Miscellaneous" */ PDB_MAPPING(116892, "3D structure", "pdbMapping", "PDB mapping", MAPPING), PEPTIDE_MAPPING(-116892, "peptide mapping", "peptideMapping", "Peptide mapping", MAPPING), SRM_PEPTIDE_MAPPING(-116893, "SRM peptide mapping", "srmPeptideMapping", "SRM Peptide mapping", MAPPING), ANTIBODY_MAPPING(-116894, "antibody mapping", "antibodyMapping", "Antibody mapping", MAPPING), NON_CONSECUTIVE_RESIDUE(1031, "non-consecutive residues", "nonConsecutiveResidue", "Non-consecutive residue", POSITIONAL_ANNOTATION), NON_TERMINAL_RESIDUE(1032, "non-terminal residue", "nonTerminalResidue", "Non-terminal residue", POSITIONAL_ANNOTATION), DOMAIN_INFO(1043, "domain information", "domainInfo", "Domain information", POSITIONAL_ANNOTATION), INITIATOR_METHIONINE(1000, "initiator methionine", "initiatorMethionine", "Initiator methionine", PROCESSING_PRODUCT), SIGNAL_PEPTIDE(1001, "signal peptide", "signalPeptide", "Signal peptide", PROCESSING_PRODUCT), // split into mitochondrial & peroxisome db annotation NORMALLY split into types of 2 next lines TRANSIT_PEPTIDE(1002,"transit peptide", "transitPeptide", "Transit peptide", PROCESSING_PRODUCT), PEROXISOME_TRANSIT_PEPTIDE(-10021, "peroxisome transit peptide", "peroxisomeTransitPeptide", "Peroxisome transit peptide", PROCESSING_PRODUCT), MITOCHONDRIAL_TRANSIT_PEPTIDE(-10022, "mitochondrial transit peptide", "mitochondrialTransitPeptide", "Mitochondrial transit peptide", PROCESSING_PRODUCT), MATURATION_PEPTIDE(1003, "maturation peptide", "propeptide", "Maturation peptide", PROCESSING_PRODUCT), MATURE_PROTEIN(1004, "mature protein", "matureProtein", "Mature protein", PROCESSING_PRODUCT), TRANSMEMBRANE_REGION(1005, "transmembrane region", "transmembraneRegion", "Transmembrane region", TOPOLOGY), INTRAMEMBRANE_REGION(-10051, "intramembrane region", "intramembraneRegion", "Intramembrane region", TOPOLOGY), // Note: this annotation type does not exist in db, it is considered a transmembrane region but is linked to the cv_term = "In membrane" TOPOLOGICAL_DOMAIN(1015, "topological domain", "topologicalDomain", "Topological domain", TOPOLOGY), DOMAIN(1006, "domain", "domain", "Domain", REGION), REPEAT(1007, "repeat", "repeat", "Repeat", REGION), CALCIUM_BINDING_REGION(1008, "calcium-binding region", "calciumBindingRegion", "Calcium-binding region", REGION), ZINC_FINGER_REGION(1009, "zinc finger region", "zincFingerRegion", "Zinc finger region", REGION), DNA_BINDING_REGION(1010, "DNA-binding region", "dnaBindingRegion", "DNA-binding region", REGION), NUCLEOTIDE_PHOSPHATE_BINDING_REGION(1011, "nucleotide phosphate-binding region", "nucleotidePhosphateBindingRegion", "Nucleotide phosphate-binding region", REGION), COILED_COIL_REGION(1012, "coiled-coil region", "coiledCoilRegion", "Coiled-coil region", REGION), SHORT_SEQUENCE_MOTIF(1013, "short sequence motif", "shortSequenceMotif", "Short sequence motif", REGION), COMPOSITIONALLY_BIASED_REGION(1014, "compositionally biased region", "compositionallyBiasedRegion", "Compositionally biased region", REGION), MISCELLANEOUS_REGION(11, "region of interest", "miscellaneousRegion", "Miscellaneous region", REGION), INTERACTING_REGION(1068, "interacting region", "interactingRegion", "Interacting region", REGION), ACTIVE_SITE(1016, "active site", "activeSite", "Active site", GENERIC_SITE), METAL_BINDING_SITE(1017, "metal ion-binding site", "metalBindingSite", "Metal binding site", GENERIC_SITE), BINDING_SITE(1018, "binding site", "bindingSite", "Binding site", GENERIC_SITE), CLEAVAGE_SITE(1067, "cleavage site", "cleavageSite", "Cleavage site", GENERIC_SITE), MISCELLANEOUS_SITE(12, "site", "miscellaneousSite", "Miscellaneous site", GENERIC_SITE), SELENOCYSTEINE(1019, "non-standard amino acid", "selenocysteine", "Selenocysteine", GENERIC_PTM), LIPIDATION_SITE(1020, "lipid moiety-binding region", "lipidationSite", "Lipid moiety-binding region", GENERIC_PTM), GLYCOSYLATION_SITE(1021, "glycosylation site", "glycosylationSite", "Glycosylation site", GENERIC_PTM), CROSS_LINK(1023, "cross-link", "crossLink", "Cross-link", GENERIC_PTM), DISULFIDE_BOND(1022, "disulfide bond", "disulfideBond", "Disulfide bond", GENERIC_PTM), MODIFIED_RESIDUE(13, "amino acid modification", "modifiedResidue", "Modified residue", GENERIC_PTM), PTM_INFO(1044, "PTM", "ptmInfo", "PTM info", GENERIC_PTM), HELIX(1024, "helix", "helix", "Helix", SECONDARY_STRUCTURE), TURN(1025, "turn", "turn", "Turn", SECONDARY_STRUCTURE), BETA_STRAND(1026, "beta strand", "betaStrand", "Beta strand", SECONDARY_STRUCTURE), VARIANT(1027, "sequence variant", "variant", "Variant", POSITIONAL_ANNOTATION), MUTAGENESIS(1028, "mutagenesis site", "mutagenesis", "Mutagenesis", POSITIONAL_ANNOTATION), SEQUENCE_CONFLICT(1029, "sequence conflict", "sequenceConflict", "Sequence conflict", POSITIONAL_ANNOTATION), // instances of general annotations VARIANT_INFO(1045, "polymorphism", "variantInfo", "Variant info", GENERAL_ANNOTATION), INDUCTION(1042, "induction", "induction", "Induction", GENERAL_ANNOTATION), //BIOTECHNOLOGY(1050,"biotechnology", "biotechnology", "biotechnology", new OWLAnnotationCategory[]{GENERAL_ANNOTATION }), // OK: only 5 annotations exist, so moved to miscellaneous MISCELLANEOUS(1052, "miscellaneous", "miscellaneous", "Miscellaneous", GENERAL_ANNOTATION), CAUTION(1054, "caution", "caution", "Caution", GENERAL_ANNOTATION), SEQUENCE_CAUTION(1056, "sequence caution", "sequenceCaution", "Sequence caution", GENERAL_ANNOTATION), UNIPROT_KEYWORD(1064, "uniprot keyword", "uniprotKeyword", "Uniprot keyword", KEYWORD), FUNCTION_INFO(1033, "function", "functionInfo", "Function info", GENERIC_FUNCTION), CATALYTIC_ACTIVITY(1034, "catalytic activity", "catalyticActivity", "Catalytic activity", GENERIC_FUNCTION), COFACTOR(1035, "cofactor", "cofactor", "Cofactor", GENERIC_INTERACTION), COFACTOR_INFO(226874, "cofactor information", "cofactorInfo", "Cofactor information", GENERIC_INTERACTION), ENZYME_REGULATION(1036, "enzyme regulation", "enzymeRegulation", "Enzyme regulation", GENERIC_INTERACTION), TRANSPORT_ACTIVITY(-25, "transport activity", "transportActivity", "Transport activity", GENERIC_FUNCTION), PATHWAY(1038, "pathway", "pathway", "Pathway", GENERIC_FUNCTION), GO_MOLECULAR_FUNCTION(1061, "go molecular function", "goMolecularFunction", "GO molecular function", GENERIC_FUNCTION), GO_BIOLOGICAL_PROCESS(1062, "go biological process", "goBiologicalProcess", "GO biological process", GENERIC_FUNCTION), SMALL_MOLECULE_INTERACTION(-112, "SmallMoleculeInteraction", "smallMoleculeInteraction", "Small molecule interaction", GENERIC_INTERACTION), INTERACTION_INFO(1037, "subunit", "interactionInfo", "Interaction info", GENERIC_INTERACTION), BINARY_INTERACTION(-111, "BinaryInteraction", "binaryInteraction", "Binary interaction", GENERIC_INTERACTION), // placeholder for data coming from intact in table db partnership SUBCELLULAR_LOCATION(1039, "subcellular location", "subcellularLocation", "Subcellular location", CELLULAR_COMPONENT), SUBCELLULAR_LOCATION_NOTE(63868, "subcellular location info", "subcellularLocationNote", "Subcellular location info", CELLULAR_COMPONENT), GO_CELLULAR_COMPONENT(1063, "go cellular component", "goCellularComponent", "GO cellular component", CELLULAR_COMPONENT), DEVELOPMENTAL_STAGE(1041, "developmental stage", "developmentalStageInfo", "Developmental stage", GENERIC_EXPRESSION), EXPRESSION_INFO(1055, "expression info", "expressionInfo", "Expression info", GENERIC_EXPRESSION), EXPRESSION_PROFILE(1040, "tissue specificity", "expressionProfile", "Expression profile", GENERIC_EXPRESSION), DISEASE(1046, "disease", "disease", "Disease", MEDICAL), ALLERGEN(1048, "allergen", "allergen", "Allergen", MEDICAL), PHARMACEUTICAL(1051, "pharmaceutical", "pharmaceutical", "Pharmaceutical", MEDICAL), BIOPHYSICOCHEMICAL_PROPERTY(-16, "Biophysicochemical property", "biophysicochemicalProperty", "Biophysicochemical property", GENERAL_ANNOTATION), ABSORPTION_MAX(-17, "absorption max", "absorptionMax", "Absorption max", BIOPHYSICOCHEMICAL_PROPERTY), ABSORPTION_NOTE(-18, "absorption note", "absorptionNote", "Absorption note", BIOPHYSICOCHEMICAL_PROPERTY), KINETIC_KM(-19, "kinetic KM", "kineticKM", "Kinetic KM", BIOPHYSICOCHEMICAL_PROPERTY), KINETIC_VMAX(-20, "kinetic Vmax", "kineticVmax", "Kinetic Vmax", BIOPHYSICOCHEMICAL_PROPERTY), KINETIC_NOTE(-21, "kinetic note", "kineticNote", "Kinetic note", BIOPHYSICOCHEMICAL_PROPERTY), PH_DEPENDENCE(-22, "pH dependence", "phDependence", "pH dependence", BIOPHYSICOCHEMICAL_PROPERTY), REDOX_POTENTIAL(-23, "redox potential", "redoxPotential", "redox potential", BIOPHYSICOCHEMICAL_PROPERTY), TEMPERATURE_DEPENDENCE(-24, "temperature dependence", "temperatureDependence", "Temperature dependence", BIOPHYSICOCHEMICAL_PROPERTY), //New categories added with BioEditor integration ELECTROPHYSIOLOGICAL_PARAMETER(-9993, "electrophysiological-parameter", "electrophysiologicalParameter", "Electrophysiological parameter", GENERAL_ANNOTATION), PROTEIN_PROPERTY(-9992, "protein-property", "proteinProperty", "Protein property", GENERAL_ANNOTATION), //to represent stable form GENERIC_PHENOTYPE(-9999, "generic-phenotype", "genericPhenotype", "Generic phenotype", GENERAL_ANNOTATION), PHENOTYPIC_VARIATION(-9990, "phenotypic-variation", "phenotypicVariation", "Phenotypic variation", GENERIC_PHENOTYPE), MAMMALIAN_PHENOTYPE(-9991, "mammalian-phenotype", "mammalianPhenotype", "Mammalian phenotype", GENERIC_PHENOTYPE), //Virtual annotations (for pepx) VIRTUAL_ANNOTATION(-70000, "virtual-annotation", "virtualAnnotation", "Virtual annotation", ROOT), //to represent stable form PEPX_VIRTUAL_ANNOTATION(-70001, "pepx-virtual-annotation", "pepxVirtualAnnotation", "Pepx virtual annotation", VIRTUAL_ANNOTATION); private final Integer dbId; // if positive, identifies a real record of the table nextprot.cv_terms (category annotation_type) private final String dbAnnotationTypeName; // if dbId is positive, dbAnnotationTypeName is an exact match of the corresponding record in nextprot.cv_terms private final String apiName; // a string from which an rdf predicate and an rdfs:type name is derived private final String label; // a human readable label for the rdf:type private String description = null; // may be set later from reading values in the db private final AnnotationCategory parent; /** * Category of control vocabulary that may be used to define the annotation */ AnnotationCategory( final Integer dbId, final String dbAnnotationTypeName, final String apiName, final String rdfLabel, final AnnotationCategory parent) { this.dbId = dbId; this.dbAnnotationTypeName = dbAnnotationTypeName; this.apiName = apiName; this.label = rdfLabel; this.parent = parent; } // *************** STATIC PRIVATE FINAL CONSTANTS initialized for performance reasons ********************************** /////////////////// // Fill the cache private static final Map<String, AnnotationCategory> MAP_TYPES = new HashMap<>(); static { for (AnnotationCategory category : AnnotationCategory.values()) { MAP_TYPES.put(category.getDbAnnotationTypeName(), category); } } // Fill the cache decamelized private static Map<String, AnnotationCategory> MAP_DECAMELIZED_TYPES = new HashMap<>(); static { for (AnnotationCategory category : AnnotationCategory.values()) { MAP_DECAMELIZED_TYPES.put(StringUtils.camelToKebabCase(category.getApiTypeName()), category); } } private static String HIERARCHY_STRING = null; static { StringBuilder sb = new StringBuilder(); getAnnotationHierarchy(AnnotationCategory.ROOT, sb, 0); HIERARCHY_STRING = sb.toString(); } private static void getAnnotationHierarchy(AnnotationCategory a, StringBuilder sb, int inc) { if (inc > 0) sb.append(new String(new char[inc]).replace('\0', '-') + StringUtils.camelToKebabCase(a.getApiTypeName()) + " " + a.getHierarchy() + "\n"); int nextInc = inc + 1; for (AnnotationCategory c : a.getChildren()) { getAnnotationHierarchy(c, sb, nextInc); } } private static List<AnnotationCategory> SORTED_CATEGORIES = null; static { SORTED_CATEGORIES = sortAnnotationCategories(); } /** * Sort categories (generic parent > direct parent annotation > annotation category name * * @return the list of LEAF annotation categories except family-name */ private static List<AnnotationCategory> sortAnnotationCategories() { List<AnnotationCategory> sortedAnnotations = new ArrayList<>(); for (AnnotationCategory category : AnnotationCategory.values()) { if (category.isLeaf() && !category.equals(AnnotationCategory.FAMILY_NAME)) sortedAnnotations.add(category); } Collections.sort(sortedAnnotations, (a1, a2) -> { int cmp = a1.getHierarchy().compareTo(a2.getHierarchy()); if (cmp == 0) return a1.apiName.compareTo(a2.apiName); return cmp; }); return sortedAnnotations; } public static List<AnnotationCategory> getSortedCategories() { return SORTED_CATEGORIES; } public static Set<AnnotationCategory> getInstantiatedCategories() { Set<AnnotationCategory> set = new HashSet<>(); for (AnnotationCategory cat : AnnotationCategory.values()) { if (cat.isInstantiated()) set.add(cat); } return set; } public static AnnotationCategory getByDbAnnotationTypeName(String typeName) { if (MAP_TYPES.containsKey(typeName)) { return MAP_TYPES.get(typeName); } else throw new NextProtException("\nCould not find annotation category for: " + typeName + "\nPossible types: \n" + MAP_TYPES.keySet()); } public static AnnotationCategory getDecamelizedAnnotationTypeName(String typeName) { String typeNameInLowerCase = typeName.toLowerCase(); if (MAP_DECAMELIZED_TYPES.containsKey(typeNameInLowerCase)) { return MAP_DECAMELIZED_TYPES.get(typeNameInLowerCase); } else { throw new NextProtException("\nCould not find annotation category for: " + typeName + "\nPossible types: \n" + HIERARCHY_STRING); } } public static boolean hasAnnotationByApiName(String typeName) { String typeNameInLowerCase = typeName.toLowerCase(); return MAP_DECAMELIZED_TYPES.containsKey(typeNameInLowerCase); } /** * Tells if this annotation category is used in the field cv_annotation_type_id of an annotations record * * @return true if at least one annotation record has this dbAnnotationTypeName otherwise false */ public boolean isInstantiated() { return (dbId > 0); } public Integer getDbId() { return dbId; } public String getDescription() { return description; } public void setDescription(String descr) { this.description = descr; } public String getDbAnnotationTypeName() { return dbAnnotationTypeName; } public String getRdfPredicate() { return StringUtils.lowerFirstChar(this.apiName); } public String getApiTypeName() { return StringUtils.upperFirstChar(this.apiName); } /** * @deprecated use getLabel() instead */ @Deprecated public String getRdfLabel() { return StringUtils.upperFirstChar(this.label); } /* * For display in feature viewer, etc. as well as as rdf:label object ! */ public String getLabel() { return StringUtils.upperFirstChar(this.label); } public AnnotationCategory getParent() { return parent; } public Set<AnnotationCategory> getChildren() { Set<AnnotationCategory> children = new HashSet<>(); for (AnnotationCategory cat : AnnotationCategory.values()) { if (cat.parent == this) children.add(cat); } return children; } public Set<AnnotationCategory> getAllChildren() { Set<AnnotationCategory> mine = getChildren(); Set<AnnotationCategory> all = new HashSet<>(mine); for (AnnotationCategory child : mine) all.addAll(child.getAllChildren()); return all; } public Set<AnnotationCategory> getAllParents() { Set<AnnotationCategory> all = new HashSet<>(); if (parent != null) { all.add(parent); all.addAll(parent.getAllParents()); } return all; } public Set<AnnotationCategory> getAllParentsButRoot() { Set<AnnotationCategory> all = getAllParents(); all.remove(AnnotationCategory.ROOT); return all; } public boolean isChildOf(AnnotationCategory aam) { return aam.getAllChildren().contains(this); } public String getHierarchy() { return getPathToRoot(':'); } public String getPathToRoot(char delimitor) { StringBuilder sb = new StringBuilder(); getPathToRoot(sb, delimitor); if (sb.length() > 0) sb.delete(sb.length() - 1, sb.length()); return sb.toString(); } void getPathToRoot(StringBuilder sb, char delimitor) { if (parent != null && parent != ROOT) { parent.getPathToRoot(sb, delimitor); sb.append(new StringFormatter(parent.getDbAnnotationTypeName()).camel().kebab().format()); sb.append(delimitor); } } public boolean isLeaf() { return getChildren().isEmpty(); } public String toString() { return /*this.getDbId() + " : " +*/ this.getDbAnnotationTypeName(); } // used by velocity public String getAnnotationCategoryHierarchyForXML() { return getPathToRoot(';'); } // used by velocity public String getAnnotationCategoryNameForXML() { return StringUtils.camelToKebabCase(getApiTypeName()); } }