EmblLoader.java example

Explorer

GeneDB-master
- Jogra
  - src
    - org
      - genedb
        jogra
        controller
        ImageFormBean.java
        ImageUtils.java
        domain
        BasicGene.java
        ExpressionZoneEditor.java
        FeatureCvTerm.java
        Gene.java
        GeneDBMessage.java
        Term.java
        drawing
        ApplicationClosingEvent.java
        ApplicationMode.java
        BaseForm.java
        ExtensionFileFilter.java
        ImagePanel.java
        Jogra.java
        JograBusiness.java
        JograBusinessImpl.java
        JograLogin.java
        JograLoginService.java
        JograPlugin.java
        JograProgressBar.java
        OpenWindowEvent.java
        Pair.java
        SplashWindow.java
        plugins
        ButtonChangeDocumentListener.java
        CCFrame.java
        CCPanel.java
        ConflictComponentFactory.java
        CvEditor.java
        GeneEditor.java
        GeneList.java
        GeneView.java
        GeneViewModel.java
        GeneViewTest.java
        GoFrame.java
        GoPanel.java
        NameNotePanel.java
        Notes.java
        OrganismEditor.java
        OrganismPanel.java
        OrganismTree.java
        TermRationaliser.java
        services
        BasicGeneService.java
        DatabaseLogin.java
        ExtendedOrganism.java
        ExtendedOrganismManager.java
        FilteringJList.java
        GeneService.java
        LockAndNotificationService.java
        LockStatus.java
        Message.java
        MessageService.java
        MethodResult.java
        NamedVector.java
        RationaliserJList.java
        RationaliserResult.java
        SqlGeneService.java
        SqlTermService.java
        TermService.java
- ng
  - src
    - org
      - genedb
        anttasks
        Password.java
        aop
        HazelcastAspect.java
        db
        adhoc
        PanGenomeManager.java
        PhylonodeManager.java
        Query.java
        analyzers
        AllNamesAnalyzer.java
        AlphaNumericAnalyzer.java
        AlphaNumericTokenizer.java
        audit
        ChangeSet.java
        ChangeTracker.java
        HibernateChangeSet.java
        HibernateChangeTracker.java
        dao
        AuditDao.java
        BaseDao.java
        CvDao.java
        GeneralDao.java
        OrganismDao.java
        PhylogenyDao.java
        PubDao.java
        SequenceDao.java
        domain
        hibernateImpls
        BasicGeneServiceImpl.java
        GeneServiceImpl.java
        LockAndNotificationServiceImpl.java
        MessageServiceImpl.java
        ProductServiceImpl.java
        luceneImpls
        BasicGeneServiceImpl.java
        misc
        GeneDBMessage.java
        GeneListReservations.java
        Message.java
        MethodResult.java
        SemanticLog.java
        objects
        BasicGene.java
        Chromosome.java
        CompoundLocatedFeature.java
        DatabasePolypeptideRegion.java
        Exon.java
        ExtendedOrganism.java
        ExtendedOrganismManager.java
        Gap.java
        Gene.java
        InterProHit.java
        LocatedFeature.java
        PolypeptideRegion.java
        PolypeptideRegionGroup.java
        Product.java
        SimplePolypeptideRegion.java
        SimpleRegionGroup.java
        Transcript.java
        TranscriptComponent.java
        UTR.java
        services
        BasicGeneService.java
        GeneService.java
        LockAndNotificationService.java
        LockStatus.java
        MessageService.java
        ProductService.java
        fixup
        FixResidues.java
        TypeCodes.java
        helpers
        LocationBridge.java
        NameLookup.java
        loading
        AGPLoader.java
        EmblFile.java
        EmblLoader.java
        EmblLocation.java
        FastaFile.java
        FastaLoader.java
        FeatureTable.java
        FeatureUtils.java
        FileProcessor.java
        GoEvidenceCode.java
        GoInstance.java
        LoadAGP.java
        LoadEmbl.java
        LoadFasta.java
        LoadOrthologues.java
        LoadPileups.java
        LoadSNPs.java
        LoadVulgar.java
        LoggingDataSource.java
        ParsingException.java
        RfamLoader.java
        Similarity.java
        SkipRetryAbort.java
        SynonymManager.java
        VulgarLoader.java
        auxiliary
        AddFeatureProperty.java
        Clear.java
        ClearDGPI.java
        ClearDomains.java
        ClearHTH.java
        ClearOPIReferences.java
        ClearPlasmoAP.java
        ClearRfam.java
        ClearSignalP.java
        ClearTMHMM.java
        ClonedGenomicInsertLoader.java
        DGPILoader.java
        DeleteRedundantGOTerms.java
        DomainFile.java
        DomainLoader.java
        GOALoader.java
        HTHLoader.java
        Load.java
        Loader.java
        LocationLoader.java
        OPIReferenceLoader.java
        PhosphopeptideLoader.java
        PlasmoAPLoader.java
        RenameFeature.java
        RfamLoader.java
        SignalPLoader.java
        SystemPropertyOverridingDataSource.java
        TMHMMLoader.java
        taxon
        TaxonNameType.java
        TaxonNode.java
        TaxonNodeArrayPropertyEditor.java
        TaxonNodeList.java
        TaxonNodeListFormatter.java
        TaxonNodeManager.java
        TaxonUtils.java
        io
        xstream
        MultiMapConvertor.java
        query
        AbstractQuery.java
        BasicQueryI.java
        Detailer.java
        NumberedQueryI.java
        Param.java
        QueryConstants.java
        QueryI.java
        QueryPlaceHolder.java
        QueryStringParser.java
        Result.java
        ResultCompatibility.java
        SimpleListResult.java
        StorageMethod.java
        bool
        BooleanOp.java
        BooleanQuery.java
        BooleanQueryNode.java
        compatability
        AlwaysTrueResultDataSetCompatibility.java
        IdentityResultCompatibility.java
        NullResultCompatibility.java
        ResultCompatibilityChain.java
        custom
        GenesByURI.java
        generation
        QueryAnnotationProcessor.java
        history
        History.java
        SimpleHistory.java
        hql
        HqlQueryTemplate.java
        jdbc
        JdbcQuery.java
        JdbcQueryTemplate.java
        params
        AbstractParam.java
        BooleanParam.java
        FloatParam.java
        IntParam.java
        ListConstraint.java
        SimpleListConstraintDelegate.java
        StringParam.java
        StringParamFromList.java
        parsing
        BasicQueryStringParser.java
        sql
        DataSourceAware.java
        SimpleJdbcTemplateAware.java
        SqlListConstraintDelegate.java
        SqlQuery.java
        querying
        core
        BooleanQuery.java
        BooleanQueryMode.java
        CachedParamDetails.java
        CopyOfLuceneQueryModsNotCompleted.java
        HqlQuery.java
        HtmlFormDetails.java
        LuceneIndex.java
        LuceneIndexFactory.java
        LuceneQuery.java
        NumericQueryVisibility.java
        PagedQuery.java
        Parameter.java
        Query.java
        QueryBeanGenerator.java
        QueryClass.java
        QueryDetails.java
        QueryException.java
        QueryFactory.java
        QueryParam.java
        QueryTemplate.java
        QueryUtils.java
        QueryVisibility.java
        history
        HistoryItem.java
        HistoryManager.java
        HistoryType.java
        QueryHistoryItem.java
        parsing
        ExprLexer.java
        ExprParser.java
        QueryLineParser.java
        tmpquery
        AdvancedQuery.java
        AdvancedSearchCategory.java
        AnnotationStatusQuery.java
        BrowseCategory.java
        ChangedGeneFeaturesQuery.java
        ControlledCurationQuery.java
        CurationQuery.java
        DateAndTypeQuery.java
        DateCountQuery.java
        DateQuery.java
        DbxrefQuery.java
        EcQuery.java
        GeneDetail.java
        GeneLocationQuery.java
        GeneSummary.java
        GeneSummaryMotif.java
        GeneTypeQuery.java
        GenesByDbQuery.java
        GoQuery.java
        IdsToGeneDetailQuery.java
        IdsToGeneSummaryQuery.java
        MotifQuery.java
        NameProductQuery.java
        OrganismHqlQuery.java
        OrganismLuceneQuery.java
        PfamQuery.java
        ProductQuery.java
        ProteinLengthQuery.java
        ProteinMassQuery.java
        ProteinMatchClusterOrthologueQuery.java
        ProteinNumTMQuery.java
        ProteinTargetingSeqQuery.java
        QuickSearchQuery.java
        SimpleNameQuery.java
        SuggestQuery.java
        TaxonQuery.java
        TopLevelFeaturesQuery.java
        smallapps
        WebImageGenerator.java
        util
        ColorUtils.java
        Counters.java
        DumpObject.java
        FontFactoryBean.java
        FontSize.java
        GeneDBFormattingConversionServiceFactoryBean.java
        IterableArray.java
        MD5Util.java
        MutableInteger.java
        Pair.java
        SequenceUtils.java
        SynchronizedTwoKeyMap.java
        TranslationException.java
        Translator.java
        Triplet.java
        TwoKeyMap.java
        web
        applications
        motifsearch
        FastaEntry.java
        FastaLoader.java
        filters
        UriSessionIdFilter.java
        gui
        AllocatedCompoundFeature.java
        ArtemisColours.java
        BdbDiagramCache.java
        ContextMapDiagram.java
        ContextMapWindowController.java
        ContextMapWindowServlet.java
        DiagramCache.java
        DiagramLayout.java
        ImageCreationException.java
        ImageMapSummary.java
        InitHeadlessToolkitServlet.java
        MakeTransparentPixel.java
        ProteinMapDiagram.java
        RenderedContextMap.java
        RenderedDiagram.java
        RenderedDiagramFactory.java
        RenderedProteinMap.java
        TrackedDiagram.java
        UnmodifiableBitSet.java
        filters
        ComboFeatureFilter.java
        NamedStrandedFeatureFilter.java
        RNAFilter.java
        menu
        CompositeMenu.java
        Menu.java
        SimpleMenu.java
        mvc
        controller
        AnnotationChangesRssController.java
        AnnotationChangesRssViewer.java
        ArtemisLaunchController.java
        BaseController.java
        BasketController.java
        BrowseBean.java
        BrowseBeanName.java
        BrowseCategoryController.java
        BrowseTermController.java
        ClassicCompatabilityController.java
        CommonUrlController.java
        ComplexQueryController.java
        ContextMapController.java
        DbController.java
        DbLinkRedirectController.java
        DbXRefListener.java
        FastaFile.java
        FeatureSequenceController.java
        FeedbackController.java
        GeneDBSessionListener.java
        GeneDBWebUtils.java
        GeneSection.java
        GeneUtils.java
        GmodRestControllerV1.java
        GoAssociationUtils.java
        GoLookup.java
        HazelCastServletContextListener.java
        HistoryController.java
        HistoryManagerFactory.java
        HomepageController.java
        HtmlUtils.java
        HttpSessionHistoryManager.java
        HttpSessionHistoryManagerFactory.java
        IdListController.java
        ImageController.java
        Message.java
        MiscPageController.java
        ModelBuilder.java
        NameLookup.java
        NamedFeatureController.java
        NewsItem.java
        NumberNameConverter.java
        OrganismChooserController.java
        OrganismTypeEditor.java
        OrthologsController.java
        PfamLookup.java
        QueryForm.java
        RegionCommand.java
        RestController.java
        ResultBean.java
        ResultCacheSessionListener.java
        ResultHit.java
        Results.java
        ResultsHolder.java
        RootController.java
        SearchHit.java
        SequenceDestination.java
        SequenceDistributorController.java
        SequenceType.java
        Strand.java
        TaxonManagerListener.java
        TracerController.java
        TranscriptFeatureController.java
        WebConstants.java
        WsQueryController.java
        analysis
        MotifSearchController.java
        MotifSearcher.java
        cgview
        CachedFile.java
        CachedFileFactory.java
        download
        AbstractGeneDBFormController.java
        BaseCachingController.java
        DownloadBean.java
        DownloadController.java
        DownloadProcess.java
        DownloadProcessUtil.java
        FeatureDTOAdaptor.java
        FormatBase.java
        FormatCSV.java
        FormatExcel.java
        FormatFASTA.java
        FormatHTML.java
        GeneDetailFieldValueExctractor.java
        OutputContent.java
        OutputDestination.java
        OutputFormat.java
        OutputManager.java
        OutputOption.java
        QueryController.java
        QueryListController.java
        QuickSearchQueryController.java
        ResultEntry.java
        ResultsController.java
        ResultsNavigatorController.java
        SequenceType.java
        WebUtils.java
        model
        AnnotationChangesRssContent.java
        BerkeleyMapFactory.java
        BmfComparison.java
        CacheDBHelper.java
        CacheSynchroniser.java
        ConfigurableGeneDBSessionFactoryBean.java
        DTOFactory.java
        DbXRefDTO.java
        DtoDb.java
        FeatureCvTermDTO.java
        FeatureDTO.java
        GeneDTO.java
        IndexSynchroniser.java
        IndexUpdater.java
        LuceneIndexBuilder.java
        LuceneIndicesComparison.java
        MembraneStructureComponentDTO.java
        MergeBerkeleyIndices.java
        MergeLuceneIndices.java
        PeriodicUpdater.java
        PolypeptideDTO.java
        PopulateCaches.java
        PopulateLuceneDictionary.java
        PopulateLuceneIndices.java
        ResultsCacheFactory.java
        SmallPopulateLuceneIndices.java
        TranscriptDTO.java
        TranscriptDTOAnalyzer.java
        TranscriptDTOAnalyzer2.java
        TranscriptDTOFactory.java
        load
        AbstractTranscriptLoader.java
        ClusterIdAndOrthologueNamesMapper.java
        DbxRefMapper.java
        FeatureCVTermPropMapper.java
        FeatureCvtermMapper.java
        FeatureMapper.java
        FeaturePropMapper.java
        FeatureRelationshipMapper.java
        FeatureTypeMapper.java
        GeneMapper.java
        OrganismMapper.java
        PolypeptideMapper.java
        PolypeptidePropertiesHelper.java
        PubNameMapper.java
        SynonymTypeMapper.java
        TimerHelper.java
        TopLevelFeatureMapper.java
        TranscriptFeatureCVTermLoader.java
        TranscriptFeaturePropLoader.java
        TranscriptLoader.java
        TranscriptMapper.java
        TranscriptRegionMapper.java
        TranscriptUpdater.java
        simple
        SimpleFeature.java
        SimpleFeatureMapper.java
        SimpleGene.java
        SimpleGeneMapper.java
        SimplePolypeptide.java
        SimplePolypeptideMapper.java
        SimpleTopLevelFeatureMapper.java
        SimpleTranscript.java
        SimpleTranscriptLoader.java
        SimpleTranscriptMapper.java
        types
        DBXRefType.java
        DtoObjectArrayField.java
        DtoStringArrayField.java
        FeatureCVTPropType.java
        FeatureCvtermType.java
        FeaturePropType.java
        PepRegionGroupType.java
        PeptidePropertiesType.java
        SynonymType.java
        TranscriptRegionType.java
        view
        FileCheckingFreemarkerViewResolver.java
        FileCheckingInternalResourceViewResolver.java
        PrefixViewResolver.java
        ServiceView.java
        UriSuffixServiceViewResolver.java
        tags
        bool
        QueryTreeWalker.java
        db
        AbstractHomepageTag.java
        BreadcrumbTag.java
        DbNameTag.java
        DbXRefLinkTag.java
        DisplayPhylogeny.java
        DisplaySimilarity.java
        FilteredPropertyLoopTag.java
        HomepageSelectTag.java
        HomepageTreeTag.java
        HyperlinkDbsInText.java
        ListStringTag.java
        OrganismNameTag.java
        PhylonodeHomePageListTag.java
        PropertyFilterTag.java
        QuickSearchMessageTag.java
        QuickSearchTaxonomicGraphTag.java
        SimpleSelectTag.java
        TableWrapper.java
        TaxonNameTag.java
        misc
        DebugTag.java
        DisplayDateTag.java
        FormatSequenceTag.java
        HistoryTag.java
        ListItemsTag.java
        SystematicNameFormatTag.java
        UrlTag.java
        utils
        DownloadUtils.java
        Gff3Utils.java
        Grep.java
      - gmod
        schema
        bulk
        AbstractIterator.java
        BulkProcessor.java
        DataIntegrityViolation.java
        DatabaseException.java
        ProcessingException.java
        TranscriptHandler.java
        TranscriptInfo.java
        cfg
        ChadoAnnotationConfiguration.java
        ChadoAnnotationException.java
        ChadoAnnotationSettingsFactory.java
        ChadoSessionFactoryBean.java
        FeatureType.java
        FeatureTypeUtils.java
        FilteringSessionFactory.java
        OrganismHeirachy.java
        feature
        AbstractExon.java
        AbstractGene.java
        AminoAcid.java
        ApicoplastChromosome.java
        BACEnd.java
        Centromere.java
        Chromosome.java
        CloneInsert.java
        ClonedGenomicInsert.java
        ConjugativeTransposon.java
        Contig.java
        CytoplasmicRegion.java
        DNATransposon.java
        Deletion.java
        DinucleotideRepeatMicrosatelliteFeature.java
        DirectRepeatRegion.java
        EST.java
        ESTMatch.java
        Exon.java
        ExtramembraneRegion.java
        FivePrimeUTR.java
        GPIAnchorCleavageSite.java
        Gap.java
        Gene.java
        GenomicIsland.java
        HelixTurnHelix.java
        Insertion.java
        IntegratedMobileGeneticElement.java
        IntegratedPlasmid.java
        IntramembraneRegion.java
        Intron.java
        InvertedRepeatRegion.java
        Junction.java
        LinearDoubleStrandedDNAChromosome.java
        LongTerminalRepeat.java
        MRNA.java
        Match.java
        MatchPart.java
        MembraneStructure.java
        MembraneStructureComponent.java
        Microsatellite.java
        MitochondrialChromosome.java
        MobileGeneticElement.java
        ModifiedAminoAcidFeature.java
        NcRNA.java
        NonCytoplasmicRegion.java
        NucleotideMatch.java
        PCRProduct.java
        Plasmid.java
        PolycistronicTranscript.java
        Polypeptide.java
        PolypeptideDomain.java
        PolypeptideMotif.java
        PolypeptideRegion.java
        PolypeptideStructuralMotif.java
        PrimaryTranscript.java
        ProductiveTranscript.java
        Promoter.java
        Prophage.java
        ProteinMatch.java
        Pseudogene.java
        PseudogenicExon.java
        PseudogenicTranscript.java
        RNApol_I_Promoter.java
        RRNA.java
        Read.java
        Reagent.java
        Region.java
        Remark.java
        RepeatRegion.java
        RepeatUnit.java
        SECISElement.java
        SNP.java
        ScRNA.java
        SequenceAlteration.java
        SequenceDifference.java
        SequenceVariant.java
        SignalPeptide.java
        SnRNA.java
        SnoRNA.java
        SpliceSite.java
        SplicedLeaderRNA.java
        Supercontig.java
        TRNA.java
        TandemRepeat.java
        TetraNucleotideRepeatMicrosatelliteFeature.java
        ThreePrimeUTR.java
        TopLevelFeature.java
        Transcript.java
        TranscriptRegion.java
        TransmembraneRegion.java
        TransposableElement.java
        TrinucleotideRepeatMicrosatelliteFeature.java
        UORF.java
        UTR.java
        mapped
        Analysis.java
        AnalysisFeature.java
        AnalysisProp.java
        Cv.java
        CvTerm.java
        CvTermDbXRef.java
        CvTermPath.java
        CvTermProp.java
        CvTermRelationship.java
        CvTermSynonym.java
        Db.java
        DbXRef.java
        DbXRefProp.java
        Feature.java
        FeatureCvTerm.java
        FeatureCvTermDbXRef.java
        FeatureCvTermProp.java
        FeatureCvTermPub.java
        FeatureDbXRef.java
        FeatureLoc.java
        FeatureLocPub.java
        FeatureProp.java
        FeaturePropPub.java
        FeaturePub.java
        FeatureRelationship.java
        FeatureRelationshipProp.java
        FeatureRelationshipPropPub.java
        FeatureRelationshipPub.java
        FeatureSynonym.java
        HasPubsAndDbXRefs.java
        Organism.java
        OrganismDbXRef.java
        OrganismProp.java
        Phylonode.java
        PhylonodeDbXRef.java
        PhylonodeOrganism.java
        PhylonodeProp.java
        PhylonodePub.java
        PhylonodeRelationship.java
        Phylotree.java
        PhylotreePub.java
        Project.java
        Pub.java
        PubAuthor.java
        PubDbXRef.java
        PubProp.java
        PubRelationship.java
        Synonym.java
        utils
        CollectionUtils.java
        CompoundLocation.java
        CountedName.java
        CvTermUtils.java
        GeneNameOrganism.java
        LocationUtils.java
        ObjectManager.java
        PeptideProperties.java
        Rankable.java
        RankableUtils.java
        SimilarityI.java
        SingleLocation.java
        Strand.java
        StrandedLocation.java
        propinterface
        PropertyI.java
  - test
    - org
      - genedb
        db
        audit
        HibernateChangeTrackerTest.java
        MockChangeSetImpl.java
        MockChangeTrackerImpl.java
        dao
        PhylogenyDaoTest.java
        domain
        test
        BasicGeneHelper.java
        MockBasicGeneService.java
        loading
        EmblFileTest.java
        EmblLoaderBergheiTest.java
        EmblLoaderMansoniTest.java
        EmblLoaderReloadTest.java
        EmblLoaderSyntheticTest.java
        EmblLoaderTestHelper.java
        EmblLocationTest.java
        FastaFileTest.java
        FastaLoaderTest.java
        FeatureTester.java
        OrthologueLoaderClusteredTest.java
        OrthologueLoaderImplicitClusterTest.java
        OrthologueLoaderUnclusteredTest.java
        OrthologueTester.java
        TestLogger.java
        auxiliary
        DomainLoaderTest.java
        HTHLoaderTest.java
        RfamLoaderTest.java
        test
        tools
        BuildTestDatabase.java
        querying
        tmpquery
        HibernateTest.java
        MockProteinLengthQuery.java
        ProteinLengthQueryTest.java
        QuickSearchQueryTest.java
        SimpleNameQueryTest.java
        util
        CountersTest.java
        IterableArrayTest.java
        web
        gui
        ContextMapDiagramTest.java
        DiagramLayoutTest.java
        mvc
        controller
        download
        DownloadTest.java
        model
        CacheSynchTestDelegate.java
        TestAbstractUpdater.java
      - gmod
        schema
        mapped
        MockChromosome.java
        MockTranscript.java
        test
        HibernateTest.java

package org.genedb.db.loading;

import org.genedb.db.dao.CvDao;
import org.genedb.db.dao.GeneralDao;
import org.genedb.db.dao.OrganismDao;
import org.genedb.db.dao.PubDao;
import org.genedb.util.Counters;
import org.genedb.util.IterableArray;

import org.gmod.schema.cfg.FeatureTypeUtils;
import org.gmod.schema.feature.AbstractExon;
import org.gmod.schema.feature.AbstractGene;
import org.gmod.schema.feature.Centromere;
import org.gmod.schema.feature.Contig;
import org.gmod.schema.feature.DirectRepeatRegion;
import org.gmod.schema.feature.FivePrimeUTR;
import org.gmod.schema.feature.Gap;
import org.gmod.schema.feature.Gene;
import org.gmod.schema.feature.InvertedRepeatRegion;
import org.gmod.schema.feature.MRNA;
import org.gmod.schema.feature.NcRNA;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.feature.PolypeptideMotif;
import org.gmod.schema.feature.ProductiveTranscript;
import org.gmod.schema.feature.Pseudogene;
import org.gmod.schema.feature.PseudogenicTranscript;
import org.gmod.schema.feature.RRNA;
import org.gmod.schema.feature.Region;
import org.gmod.schema.feature.RepeatRegion;
import org.gmod.schema.feature.RepeatUnit;
import org.gmod.schema.feature.SECISElement;
import org.gmod.schema.feature.SnRNA;
import org.gmod.schema.feature.SnoRNA;
import org.gmod.schema.feature.Supercontig;
import org.gmod.schema.feature.TRNA;
import org.gmod.schema.feature.ThreePrimeUTR;
import org.gmod.schema.feature.TopLevelFeature;
import org.gmod.schema.feature.Transcript;
import org.gmod.schema.feature.UTR;
import org.gmod.schema.mapped.Analysis;
import org.gmod.schema.mapped.DbXRef;
import org.gmod.schema.mapped.Feature;
import org.gmod.schema.mapped.FeatureCvTerm;
import org.gmod.schema.mapped.HasPubsAndDbXRefs;
import org.gmod.schema.mapped.Organism;
import org.gmod.schema.mapped.Pub;
import org.gmod.schema.mapped.PubDbXRef;
import org.gmod.schema.mapped.Synonym;
import org.gmod.schema.utils.ObjectManager;

import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.hibernate.criterion.Restrictions;
import org.springframework.orm.hibernate3.SessionFactoryUtils;
import org.springframework.transaction.annotation.Transactional;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Deals with loading an organism from an EMBL file into a Chado database.
 * It's expected to be configured as a singleton Spring bean. The main
 * calling point (and only public method, apart from the property setters)
 * is {@link #load(EmblFile)}.
 *
 * @author rh11
 *
 */
class EmblLoader {
    private static final Logger logger = Logger.getLogger(EmblLoader.class);

    // Constants

    /**
     * A unique number should be appended to the uniquename of the singly
     * spliced transcript features in the database. We must do this because
     * Artemis requires each feature to have a globally unique name, and does
     * not work correctly if a transcript has the same uniquename as its gene.
     * Similarly the GFF3 feature format requires feature names to be
     * globally unique, and we want to be able to export our data in GFF3 format.
     *
     * For alternatively-spliced genes, on the other hand, there is no need to
     * append the transcript type, because the transcript will have an assigned
     * uniquename (the /systematic_id of the CDS) that is different from the
     * uniquename of the gene (the /shared_id of the CDS).
     */
    //private enum AppendType { ALWAYS, NEVER, SINGLY_SPLICED_ONLY };
    //private static final AppendType APPEND_TYPE_TO_TRANSCRIPT_UNIQUENAME = AppendType.SINGLY_SPLICED_ONLY;

    // Injected beans
    private CvDao cvDao;
    private GeneralDao generalDao;
    private OrganismDao organismDao;
    private PubDao pubDao;
    private ObjectManager objectManager;     // See #afterPropertiesSet()
    private SessionFactory sessionFactory;
    private FeatureUtils featureUtils;

    private SynonymManager synonymManager = new SynonymManager();

    // Configurable parameters
    private Organism organism;
    private Class<? extends TopLevelFeature> topLevelFeatureClass = Supercontig.class;
    private boolean continueOnError = false;

    public enum OverwriteExisting {YES, NO, MERGE}
    private OverwriteExisting overwriteExisting = OverwriteExisting.NO;

    private boolean sloppyControlledCuration = false;
    private boolean reportUnusedQualifiers = true;
    private boolean goTermErrorsAreNotFatal = false;

    private Collection<String> ignoredFeatures = new HashSet<String>();
    private Collection<String> ignoredQualifiers = new HashSet<String>();
    private Map<String,Collection<String>> ignoredQualifiersByFeatureType = new HashMap<String,Collection<String>>();

    /**
     * Set the organism into which to load data.
     *
     * @param organismCommonName the common name of the organism
     */
    public void setOrganismCommonName(String organismCommonName) {
        this.organism = organismDao.getOrganismByCommonName(organismCommonName);
        if (organism == null) {
            throw new IllegalArgumentException(String.format("Organism '%s' not found", organismCommonName));
        }
    }

    /**
     * Set the class of top-level feature that this EMBL file represents.
     * The default, if this method is not called, is <code>Supercontig</code>.
     *
     * @param topLevelFeatureClass
     */
    public void setTopLevelFeatureClass(Class<? extends TopLevelFeature> topLevelFeatureClass) {
        this.topLevelFeatureClass = topLevelFeatureClass;
    }

    /**
     * Whether we should overwrite an existing top-level feature if it has
     * the same name as the one specified in this file. The default, if this
     * method is not called, is <code>NO</code>.
     *
     * If overwriteExisting is <code>NO</code>, the file will be skipped on the
     * grounds that it's already loaded. If it's <code>YES</code>, the previously
     * existing top-level feature, and features located on it, will
     * be deleted first. If it's <code>MERGE</code>, the existing top-level feature
     * and all features located on it will be retained, and any features
     * specified in this file will be loaded in addition.
     *
     * @param overwriteExisting <code>YES</code> if we should overwrite an
     * existing top-level feature, <code>NO</code> if not, or <code>MERGE</code>
     * if we should merge the contents of this file with an existing feature.
     */
    public void setOverwriteExisting(OverwriteExisting overwriteExisting) {
        this.overwriteExisting = overwriteExisting;
    }

    public OverwriteExisting getOverwriteExisting() {
        return this.overwriteExisting;
    }

    /**
     * Whether to deal with controlled_curation qualifiers that don't have the expected
     * format. The default, if this method is not called, is <code>false</code>.
     *
     * If set to true, we simply parse any dbxref from the /controlled_curation qualifier,
     * and add the complete text as a /curation qualifier.
     *
     * @param sloppyControlledCuration
     */
    public void setSloppyControlledCuration(boolean sloppyControlledCuration) {
        this.sloppyControlledCuration = sloppyControlledCuration;
    }

    /**
     * Whether GO term errors - in particular the case where the database does not contain
     * a term with the specified accession number - should be logged and ignored rather than
     * fatal.
     *
     * @param goTermErrorsAreNotFatal
     */
    public void setGoTermErrorsAreNotFatal(boolean goTermErrorsAreNotFatal) {
        this.goTermErrorsAreNotFatal = goTermErrorsAreNotFatal;
    }

    /**
     * Whether we should continue if we encounter an error while loading a feature.
     * You should not usually set this option; it can be useful if you need to load
     * a file quickly and don't mind if some features are missing from the result.
     * <p>
     * In particular, you should <b>not</b> use this option when loading production
     * data!
     *
     * @param continueOnError
     */
    public void setContinueOnError(boolean continueOnError) {
        if (continueOnError) {
            logger.warn("We will continue if an error is encountered loading a feature");
        }
        this.continueOnError = continueOnError;
    }

    /**
     * Whether we should log a list of unused qualifiers once the file has been loaded.
     * If set to true, this list is logged as a series of WARN messages, one for each
     * type of feature encountered in the file that has unused qualifiers. The default
     * value is <code>true</code>.
     *
     * @param reportUnusedQualifiers
     */
    public void setReportUnusedQualifiers(boolean reportUnusedQualifiers) {
        this.reportUnusedQualifiers = reportUnusedQualifiers;
    }

    /**
     * Ignore features of the named type.
     *
     * @param feature the name of the feature type to ignore
     */
    public void ignoreFeature(String featureType) {
        ignoredFeatures.add(featureType);
    }

    /**
     * Ignore the named qualifier.
     *
     * @param qualifier the name of the qualifier to ignore
     */
    public void ignoreQualifier(String qualifier) {
        ignoredQualifiers.add(qualifier);
    }

    /**
     * Ignore the named qualifier when it appears on a feature of the specified type.
     *
     * @param qualifier the name of the qualifier to ignore
     * @param featureType the type of feature on which to ignore the named qualifier
     */
    public void ignoreQualifier(String qualifier, String featureType) {
        synchronized(ignoredQualifiersByFeatureType) {
            if (!ignoredQualifiersByFeatureType.containsKey(featureType)) {
                ignoredQualifiersByFeatureType.put(featureType, new HashSet<String>());
            }
            ignoredQualifiersByFeatureType.get(featureType).add(qualifier);
        }
    }

    private void propagateIgnoreFeaturesAndQualifiers(FeatureTable featureTable) {
        for (String featureType: ignoredFeatures) {
            featureTable.ignoreFeature(featureType);
        }
        for (String qualifier: ignoredQualifiers) {
            featureTable.ignoreQualifier(qualifier);
        }
        for (Map.Entry<String,Collection<String>> entry: ignoredQualifiersByFeatureType.entrySet()) {
            String featureType = entry.getKey();
            for (String qualifier: entry.getValue()) {
                featureTable.ignoreQualifier(qualifier, featureType);
            }
        }
    }

    private Session session;

    /**
     * The main calling point for this class. Takes a parsed EMBL file, and loads
     * it into the database. Each call to this method constitutes a separate
     * Hibernate transaction. Even though the EMBL file has been parsed before
     * this method is called, its internal consistency has not been verified.
     * If we encounter a problem, a <code>DataError</code> is thrown and the
     * transaction is rolled back.
     *
     * @param emblFile the parsed EMBL file
     * @throws DataError if a data problem is discovered
     */
    public void load(EmblFile emblFile) throws DataError {
        
        //PropertyConfigurator.configure("resources/classpath/log4j.loader.properties"); 
        
        propagateIgnoreFeaturesAndQualifiers(emblFile.getFeatureTable());

        TopLevelFeature topLevelFeature;
        try {
            topLevelFeature = getTopLevelFeature(emblFile.getAccession());
        } catch (TopLevelFeatureException e) {
            logger.error(e.getMessage());
            return;
        }

        doLoad(emblFile, topLevelFeature);

        /* Unused qualifiers can only be reported if there is a featuretable.
         * In some of our embl files, there are no features (E.g., Etenella
         * contigs) and this causes the loader to fail as it tries to look 
         * in the featuretable for unused qualifiers. Hence, the if clause
         * below was modified to check if the featretable is null (the
         * reportUnusedQualifiers is true by default)
         * nds 26 august 2010
         */
        if (reportUnusedQualifiers && emblFile.getFeatureTable()!=null) { 
            reportUnusedQualifiers(emblFile.getFeatureTable());
        } else {
            logger.debug("Not reporting on unused qualifiers");
        }
    }

    @Transactional(rollbackFor=DataError.class) // Will also rollback for runtime exceptions, by default
    private void doLoad(EmblFile emblFile, TopLevelFeature topLevelFeature) throws DataError {
        /*
         * Thanks to the @Transactional annotation,
         * Spring will automatically initiate a transaction when
         * we're called, which will be committed on successful
         * return or rolled back if we throw an exception.
         */
        this.session = SessionFactoryUtils.doGetSession(sessionFactory, false);

        synonymManager.startSession(session);
        taxonomicDivision = emblFile.getTaxonomicDivision();
        logger.trace("taxonomicDivision = " + taxonomicDivision);

        if (topLevelFeature == null) {
            logger.info("Creating topLevelFeature: " + emblFile.getAccession());
            topLevelFeature = TopLevelFeature.make(topLevelFeatureClass, emblFile.getAccession(), organism);
            topLevelFeature.markAsTopLevelFeature();
            topLevelFeature.setResidues(emblFile.getSequence());
            session.persist(topLevelFeature);

            organism = (Organism) session.merge(organism);
            if (!organism.isPopulated()) {
                logger.info(String.format("Marking organism '%s' as populated", organism));
                session.persist(organism.addProperty("genedb_misc", "populated"));
            }
        } else {
            topLevelFeature = (TopLevelFeature) session.merge(topLevelFeature);
        }

        init(topLevelFeature);
        EmblLocation.Join contigLocations = emblFile.getContigLocations();
        if (contigLocations != null) {
            loadContigsAndGaps(contigLocations);
        }
        loadFeatures(emblFile.getFeatureTable());
    }

    /**
     * This exception is thrown by the <code>getTopLevelFeature</code> method
     *
     * @author rh11
     *
     */
    private static class TopLevelFeatureException extends Exception {
        public TopLevelFeatureException(String message) {
            super(message);
        }
    }

    /**
     * Get the top-level feature onto which we should add our features, based on
     * the <code>overwriteExisting</code> policy. In the commonest case, overwriteExisting
     * will have its default value of <code>NO</code>, and this method will either return
     * </code>null</code> if the feature doesn't already exist or throw an exception if it
     * does.
     *
     * @param uniqueName the unique name of the top-level feature we are loading
     * @return the existing top-level feature to use, or <code>null</code> if we should create a new one.
     * @throws TopLevelFeatureException if there is a problem; i.e. the feature exists when it shouldn't,
     *                  or fails to exist (or isn't a top-level feature) when it should.
     */
    @Transactional(rollbackFor=TopLevelFeatureException.class) // Will also rollback for runtime exceptions, by default
    private TopLevelFeature getTopLevelFeature(String uniqueName)
            throws TopLevelFeatureException {

        Session session = SessionFactoryUtils.doGetSession(sessionFactory, false);
        Feature existingTopLevelFeature = (Feature) session.createCriteria(Feature.class)
            .add(Restrictions.eq("organism", organism))
            .add(Restrictions.eq("uniqueName", uniqueName))
            .uniqueResult();

        if (existingTopLevelFeature != null) {
            switch (overwriteExisting) {
            case YES:
                logger.trace(String.format("Deleting existing feature '%s' (ID=%d)",
                    existingTopLevelFeature.getUniqueName(), existingTopLevelFeature.getFeatureId()));

                if (! (existingTopLevelFeature instanceof TopLevelFeature)) {
                    logger.warn(String.format("Existing feature is %s, not a top-level feature",
                        existingTopLevelFeature.getClass()));
                }
                existingTopLevelFeature.delete();
                break;
            case NO:
                throw new TopLevelFeatureException(String.format("The organism '%s' already has feature '%s'",
                    organism.getCommonName(), uniqueName));
            case MERGE:
                if (existingTopLevelFeature instanceof TopLevelFeature) {
                    return (TopLevelFeature) existingTopLevelFeature;
                } else {
                    throw new TopLevelFeatureException(String.format("We can't merge onto the feature '%s', because it's not a top-level feature",
                        existingTopLevelFeature.getUniqueName()));
                }
            }
        } else if (overwriteExisting == OverwriteExisting.MERGE) {
            throw new TopLevelFeatureException(String.format("Cannot MERGE because feature '%s' does not exist", uniqueName));
        }
        session.flush();
        return null;
    }

    private String taxonomicDivision;
    private TopLevelFeature topLevelFeature;
    private Map<String,AbstractGene> genesByUniqueName = new HashMap<String,AbstractGene>();
    private Map<String,Transcript> transcriptsByUniqueName = new HashMap<String,Transcript>();
    private NavigableMap<Integer,Contig> contigsByStart = new TreeMap<Integer,Contig>();
    private Set<String> repeatRegionUniqueNames = new HashSet<String>();
    private Set<String> repeatUnitUniqueNames = new HashSet<String>();
    private Map<String,Integer> syntheticNcRNAIndexByType = new HashMap<String,Integer>();

    /**
     * We want to create a single Analysis object/row for each distinct analysis program
     * referenced in /similarity qualifiers in this file. These are stored in this map.
     */
    private Map<String,Analysis> similarityAnalysisByProgram = new HashMap<String,Analysis>();

    /**
     * Reset all our local state: necessary if the user retries after an error,
     * or if the same EmblLoader object is used more than once (to load more than one file).
     *
     * @param topLevelFeature
     */
    private void init(TopLevelFeature topLevelFeature) {
        if (topLevelFeature == null) {
            throw new IllegalArgumentException("topLevelFeature cannot be null");
        }
        this.topLevelFeature = topLevelFeature;
        this.genesByUniqueName.clear();
        this.transcriptsByUniqueName.clear();
        this.contigsByStart.clear();
        this.similarityAnalysisByProgram.clear();
        this.repeatRegionUniqueNames.clear();
        this.repeatUnitUniqueNames.clear();
        this.syntheticNcRNAIndexByType.clear();
        this.archivedFeatureIndexes.clear();

        this.motifIndex = 1;

        this.objectManager.flush();
    }

    private void loadContigsAndGaps(EmblLocation.Join locations) throws DataError {
        int pos = 0; // Position (interbase) on topLevelFeature
        for(EmblLocation location: locations.locations) {
            if (location instanceof EmblLocation.External) {
                EmblLocation.External externalLocation = (EmblLocation.External) location;
                int contigLength = externalLocation.simple.getLength();
                String contigUniqueName = externalLocation.accession;
                Contig contig = createContig(pos, contigLength, contigUniqueName);
                contigsByStart.put(pos, contig);

                pos += contigLength;
            } else if (location instanceof EmblLocation.Complement) {
                EmblLocation complementedLocation = ((EmblLocation.Complement) location).location;
                if (!(complementedLocation instanceof EmblLocation.External)) {
                    throw new DataError("The CO section should contain only external references and gaps");
                }
                EmblLocation.External externalComplementedLocation = (EmblLocation.External) complementedLocation;

                int contigLength = externalComplementedLocation.simple.getLength();
                String contigUniqueName = externalComplementedLocation.accession + "_reversed";
                Contig contig = createContig(pos, contigLength, contigUniqueName);
                contigsByStart.put(pos, contig);

                pos += contigLength;
            } else if (location instanceof EmblLocation.Gap) {
                EmblLocation.Gap gapLocation = (EmblLocation.Gap) location;

                int gapLength = gapLocation.getLength();

                logger.debug(String.format("Creating gap at %d-%d", pos, pos + gapLength));
                Gap gap = topLevelFeature.addGap(pos, pos + gapLength);
                session.persist(gap);

                pos += gapLength;
            } else {
                throw new DataError("The CO section should contain only external references and gaps");
            }
        }
    }

    /**
     * @param pos
     * @param contigLength
     * @param contigUniqueName
     * @return
     */
    private Contig createContig(int pos, int contigLength, String contigUniqueName) {
        logger.debug(String.format("Creating contig '%s' at %d-%d", contigUniqueName, pos, pos + contigLength));
        Contig contig = TopLevelFeature.make(Contig.class, contigUniqueName, organism);
        contig.setResidues(topLevelFeature.getResidues(pos, pos + contigLength));
        session.persist(contig);
        topLevelFeature.addLocatedChild(contig, pos, pos + contigLength, (short) 0, 0);
        return contig;
    }

    private void locate(Feature feature, EmblLocation location) {
        locate(feature, location.getFmin(), location.getFmax(), (short) location.getStrand(), null);
    }

    private void locate(Feature feature, int fmin, int fmax, short strand, Integer phase) {
        topLevelFeature.addLocatedChild(feature, fmin, fmax, strand, phase);
        Contig contig = contigsByStart.isEmpty() ? null : contigsByStart.floorEntry(fmin).getValue();
        if (contig == null || fmax > contig.getFmax()) {
            logger.debug(String.format("The feature '%s' (%s) is not contained in a contig",
                feature.getUniqueName(), feature.getName()));
            return;
        }
        logger.debug(String.format("The feature '%s' lies on contig '%s'", feature.getUniqueName(), contig.getUniqueName()));
        contig.addLocatedChild(feature, fmin - contig.getFmin(), fmax - contig.getFmin(), strand, phase, 1, 1);
    }

    private void loadFeatures(FeatureTable featureTable) throws DataError {
        List<FeatureTable.Feature> utrs = new ArrayList<FeatureTable.Feature>();

        if (featureTable == null) {
            logger.error("No feature table found!");
            return;
        }

        for (FeatureTable.Feature feature: featureTable.getFeatures()) {
            try {
                loadFeature(utrs, feature);
            }
            catch (DataError e) {
                e.setLineNumber(feature.lineNumber);
                if (continueOnError) {
                    logger.error("Continuing after error", e);
                } else {
                    throw e;
                }
            }
        }

        for (FeatureTable.Feature utr: utrs) {
            try {
                loadUTR(utr);
            }
            catch (DataError e) {
                logger.debug("Caught DataError while loading UTR. Setting line number to " + utr.lineNumber);
                e.setLineNumber(utr.lineNumber);
                throw e;
            }
        }
    }

    private void loadFeature(List<FeatureTable.Feature> utrs,
            FeatureTable.Feature feature) throws DataError {
        String featureType = feature.type;
        
        if (feature.location.getFmax() < feature.location.getFmin()) {
            throw new DataError("Location has fmax before fmin");
        }

        Feature focalFeature = null;
        if (featureType.equals("repeat_region")) {
            focalFeature = loadRepeatRegion(feature);
        }
        else if (featureType.equals("repeat_unit")) {
            focalFeature = loadRepeatUnit(feature);
        }
        else if (featureType.equals("CDS")) {
            focalFeature = loadCDS((FeatureTable.CDSFeature) feature);
        }
        else if (featureType.equals("tRNA")) {
            focalFeature = loadNcRNA(TRNA.class, featureType, feature);
        }
        else if (featureType.equals("rRNA")) {
            focalFeature = loadNcRNA(RRNA.class, featureType, feature);
        }
        else if (featureType.equals("snRNA")) {
            focalFeature = loadNcRNA(SnRNA.class, featureType, feature);
        }
        else if (featureType.equals("snoRNA")) {
            focalFeature = loadNcRNA(SnoRNA.class, featureType, feature);
        }
        else if (featureType.equals("misc_RNA") || featureType.equals("ncRNA")) {
            focalFeature = loadNcRNA(NcRNA.class, featureType, feature);
        }
        else if (featureType.equals("3'UTR") || featureType.equals("5'UTR")) {
            utrs.add(feature);
        }
        else if (featureType.equals("gap")) {
            focalFeature = loadGap(feature);
        }
        else if (featureType.equals("CDS_motif")) {
            focalFeature = loadMotif(feature);
        }
        else if (featureType.equals("LTR")) {
            throw new DataError("Tell Robin he needs to write code for loading LTR features!");
            // TODO
        }
        else if (featureType.equals("fasta_record")) {
            loadFastaRecord(feature); // These are often used to identify individual contigs within a bin chromosome
        }
        else if (featureType.equals("misc_feature") && feature.getQualifierValues("note").contains(new String("centromere"))){
            /* Centromeres are pulled out as misc_features with note="centromere" with writedb_entry.
             * Here we check for this note and load the centromere. Rest of the misc features are archived.
             * nds, 30 Nov 2010
             */
            focalFeature = loadCentromere(feature); 
            
        }
        else if (featureType.equals("misc_feature") && feature.getQualifierValues("note").contains(new String("SECIS_element"))){
            /* SECIS_elements are pulled out with a note='SECIS_element' */
            focalFeature = loadSECISElement(feature);
        }
        else {
            logger.info(String.format("Archiving %s feature", featureType));
            archiveFeature(feature);
        }

        if (focalFeature != null) {
            archiveUnusedQualifiers(feature, focalFeature);
        }
    }

    private void loadFastaRecord(FeatureTable.Feature feature) {

        String featureUniqueName = feature.getQualifierValues("label").get(0);
        logger.warn(String.format("Creating archived contig feature '%s' from '%s' feature on line %d",
            featureUniqueName, feature.type, feature.lineNumber));

        Feature focalFeature = new Region(
            organism, featureUniqueName,
            /*analysis:*/false, /*obsolete:*/true,
            new Timestamp(System.currentTimeMillis()));
        locate(focalFeature, feature.location);

        int rank=0;
        focalFeature.addFeatureProp(
                String.format("Archived from %s feature %s with location %s; file '%s', line %d",
                feature.type, featureUniqueName, feature.location, feature.getFilePath(), feature.lineNumber),
                "feature_property", "comment", rank++);
        for (String note: feature.getQualifierValues("note")) {
            focalFeature.addFeatureProp(note, "feature_property", "comment", rank++);
        }
        for (String colour: feature.getQualifierValues("colour")) {
            focalFeature.addFeatureProp(colour, "genedb_misc", "colour", rank++);
        }
        session.persist(focalFeature);
        archiveUnusedQualifiers(feature, focalFeature);

    }
    // Centromeres
    private Centromere loadCentromere(FeatureTable.Feature feature) throws DataError {
        EmblLocation centromereLocation = feature.location;
        String centromereName = feature.getUniqueName();

        logger.info(String.format("Adding a centromere %s at %d-%d on %s", 
                   centromereName, centromereLocation.getFmin(), centromereLocation.getFmax(), topLevelFeature.getUniqueName() ));
        Centromere centromere = Centromere.make(topLevelFeature, centromereName, centromereLocation.getFmin(), centromereLocation.getFmax()); 
        session.persist(centromere);    
   
        // Add any literature (duplicated effort here since the processLiterature() method is within the geneLoader). Fix later.
        Pattern literaturePattern = Pattern.compile("(?:PMID:)?\\s*(\\d+)(?:;.*)?");
        for (String pmid: feature.getQualifierValues("literature", "citation")) {
            Matcher matcher = literaturePattern.matcher(pmid);
            if (!matcher.matches()) {
                throw new DataError("Failed to parse literature/citation qualifier: " + pmid);
            }
            String accession = matcher.group(1);      
            DbXRef dbXRef = objectManager.getDbXRef("PMID", accession);
            Pub pub = objectManager.getPub(String.format("PMID:%s", accession), "unfetched");
            session.persist(pub.addDbXRef(dbXRef, true));
            session.persist(centromere.addPub(pub));
        }
        
        return centromere;

    }

    //SECIS_elements
    private SECISElement loadSECISElement(FeatureTable.Feature feature) throws DataError {
        EmblLocation secisLocation = feature.location;
        String secisName = feature.getUniqueName();

        logger.info(String.format("Adding a SECIS_element %s at %d-%d on %s", 
                   secisName, secisLocation.getFmin(), secisLocation.getFmax(), topLevelFeature.getUniqueName() ));
      
        SECISElement secisElement = new SECISElement(organism, secisName);
        locate(secisElement,secisLocation);
        session.persist(secisElement);   
        
        int rank=0;
        for (String note: feature.getQualifierValues("note")) {
            if(!note.equalsIgnoreCase("SECIS_element") && !note.equalsIgnoreCase("false")){ //The note=false just means it is not obsolete
                secisElement.addFeatureProp(note, "feature_property", "comment", rank++);
            }
        }
 
        return secisElement;

    }
    
    
    private Counters archivedFeatureIndexes = new Counters();
    private void archiveFeature(FeatureTable.Feature feature) {
        String featureUniqueName = String.format("%s:archived:%s:%d",
            topLevelFeature.getUniqueName(), feature.type,
            archivedFeatureIndexes.nextval(feature.type));
        logger.warn(String.format("Archiving '%s' feature on line %d as '%s'",
            feature.type, feature.lineNumber, featureUniqueName));

        Feature focalFeature = new Region(
            organism, featureUniqueName,
            /*analysis:*/false, /*obsolete:*/true,
            new Timestamp(System.currentTimeMillis()));

        locate(focalFeature, feature.location);

        focalFeature.addFeatureProp(
            String.format("Archived from %s feature with location %s; file '%s', line %d",
                feature.type, feature.location, feature.getFilePath(), feature.lineNumber),
            "feature_property", "comment", 0);

        session.persist(focalFeature);
        archiveUnusedQualifiers(feature, focalFeature);
    }

    private void archiveUnusedQualifiers(FeatureTable.Feature feature, Feature focalFeature) {
        int rank = 0;
        for (String unusedQualifier: feature.getUnusedQualifiers()) {
            logger.trace(String.format("Archiving qualifier on '%s': %s",
                focalFeature.getUniqueName(), unusedQualifier));
            focalFeature.addFeatureProp(unusedQualifier, "genedb_misc", "EMBL_qualifier", rank++);
        }
    }

    private void reportUnusedQualifiers(FeatureTable featureTable) {
        Map<String,Set<String>> unusedQualifiersByFeatureType = new HashMap<String, Set<String>>();

        // Collect unused qualifiers
        for (FeatureTable.Feature feature: featureTable.getFeatures()) {
            for (String unusedQualifier: feature.getUnusedQualifierNames()) {
                if (!unusedQualifiersByFeatureType.containsKey(feature.type)) {
                    unusedQualifiersByFeatureType.put(feature.type, new HashSet<String>());
                }
                unusedQualifiersByFeatureType.get(feature.type).add(unusedQualifier);
            }
        }

        // Report unused qualifiers
        if (unusedQualifiersByFeatureType.isEmpty()) {
            logger.info("No unused qualifiers to report");
            return;
        }
        for (Map.Entry<String,Set<String>> entry: unusedQualifiersByFeatureType.entrySet()) {
            StringBuilder message = new StringBuilder(String.format("Unused qualifiers for %s features:%n", entry.getKey()));
            for (String qualifierName: entry.getValue()) {
                message.append(String.format("\t/%s%n", qualifierName));
            }
            logger.warn(message);
        }
    }

    private Gap loadGap(FeatureTable.Feature gapFeature) {
        EmblLocation gapLocation = gapFeature.location;

        logger.debug(String.format("Creating gap at %d-%d", gapLocation.getFmin(), gapLocation.getFmax()));
        Gap gap = topLevelFeature.addGap(gapLocation.getFmin(), gapLocation.getFmax());
        session.persist(gap);

        int rank=0;
        for (String note: gapFeature.getQualifierValues("note")) {
            gap.addFeatureProp(note, "feature_property", "comment", rank++);
        }

        return gap;
    }

    private int motifIndex = 1;
    private PolypeptideMotif loadMotif(FeatureTable.Feature motifFeature) {
        String motifUniqueName = String.format("%s:motif:%d", topLevelFeature.getUniqueName(), motifIndex++);
        PolypeptideMotif motif = new PolypeptideMotif(organism, motifUniqueName);
        session.persist(motif);
        locate(motif, motifFeature.location);

        int rank = 0;
        for (String note: motifFeature.getQualifierValues("note")) {
            motif.addFeatureProp(note, "feature_property", "comment", rank++);
        }

        return motif;
    }

    private Feature loadRepeatRegion(FeatureTable.Feature repeatRegionFeature) throws DataError {
        String repeatRegionName = repeatRegionFeature.getQualifierValue("FEAT_NAME");
        EmblLocation repeatRegionLocation = repeatRegionFeature.location;
        int fmin = repeatRegionLocation.getFmin();
        int fmax = repeatRegionLocation.getFmax();

        String repeatType = repeatRegionFeature.getQualifierValue("rpt_type");
        final Class<? extends RepeatRegion> repeatRegionClass;
        if (repeatType == null) {
            repeatRegionClass = RepeatRegion.class;
        } else {
            repeatType = repeatType.toLowerCase();
            if (repeatType.equals("direct")) {
                repeatRegionClass = DirectRepeatRegion.class;
            } else if (repeatType.equals("inverted")) {
                repeatRegionClass = InvertedRepeatRegion.class;
            } else {
                throw new DataError(String.format("Unknown repeat type '%s'", repeatType));
            }
        }

        String repeatRegionUniqueName = String.format("%s:repeat:%d-%d", topLevelFeature.getUniqueName(), fmin, fmax);
        if (repeatRegionUniqueNames.contains(repeatRegionUniqueName)) {
            logger.warn(String.format("The repeat region '%s' already exists." +
                    "Ignoring second (or subsequent) occurence at line %d",
                repeatRegionUniqueName, repeatRegionFeature.lineNumber));
            return null;
        }
        repeatRegionUniqueNames.add(repeatRegionUniqueName);

        logger.debug(String.format("Creating repeat region '%s' (%s) of type '%s' at %d-%d",
            repeatRegionUniqueName, repeatRegionName, repeatRegionClass.getSimpleName(), fmin, fmax));
        RepeatRegion repeatRegion = RepeatRegion.make(repeatRegionClass,
            organism, repeatRegionUniqueName, repeatRegionName);

        int rank = 0;
        String label = repeatRegionFeature.getQualifierValue("label");
        if (label != null) {
            repeatRegion.addFeatureProp(String.format("/label=%s", label), "feature_property", "comment", rank++);
        }
        for(String note : repeatRegionFeature.getQualifierValues("note")) {
            repeatRegion.addFeatureProp(note, "feature_property", "comment", rank++);
        }

        // Add a comment for the /rpt_family, if present
        String rptFamily = repeatRegionFeature.getQualifierValue("rpt_family");
        if (rptFamily != null) {
            repeatRegion.addFeatureProp(String.format("/rpt_family=%s", rptFamily),
                "feature_property", "comment", rank++);
        }

        session.persist(repeatRegion);
        locate(repeatRegion, fmin, fmax, (short)0, null);

        return repeatRegion;
    }

    // TODO loadRepeatUnit is very similar to loadRepeatRegion: unify?

    private Feature loadRepeatUnit(FeatureTable.Feature repeatUnitFeature) throws DataError {
        EmblLocation repeatUnitLocation = repeatUnitFeature.location;
        int fmin = repeatUnitLocation.getFmin();
        int fmax = repeatUnitLocation.getFmax();
        String repeatUnitUniqueName = String.format("%s:repeat_unit:%d-%d", topLevelFeature.getUniqueName(), fmin, fmax);

        if (repeatUnitUniqueNames.contains(repeatUnitUniqueName)) {
            logger.warn(String.format("The repeat region '%s' already exists." +
                    "Ignoring second (or subsequent) occurence at line %d",
                repeatUnitUniqueName, repeatUnitFeature.lineNumber));
            return null;
        }
        repeatUnitUniqueNames.add(repeatUnitUniqueName);

        logger.debug(String.format("Creating repeat unit '%s' at %d-%d",
            repeatUnitUniqueName, fmin, fmax));
        RepeatUnit repeatUnit = RepeatUnit.make(RepeatUnit.class,
            organism, repeatUnitUniqueName, null);

        String colour = repeatUnitFeature.getQualifierValue("colour");
        if (colour != null) {
            repeatUnit.addFeatureProp(colour, "genedb_misc", "colour", 0);
        }
        int rank = 0;
        String label = repeatUnitFeature.getQualifierValue("label");
        if (label != null) {
            repeatUnit.addFeatureProp(String.format("/label=%s", label), "feature_property", "comment", rank++);
        }
        for (String note: repeatUnitFeature.getQualifierValues("note")) {
            repeatUnit.addFeatureProp(note, "feature_property", "comment", rank++);
        }

        session.persist(repeatUnit);
        locate(repeatUnit, fmin, fmax, (short)0, null);

        return repeatUnit;
    }

    // Can't define static fields in inner classes, grr.
    private static final Set<String> goQualifiers = new HashSet<String>();
    static {
        Collections.addAll(goQualifiers, "aspect", "GOid", "term", "qualifier",
            "evidence", "db_xref", "with", "from", "date", "autocomment", "attribution");
    }

    /**
     * Abstract superclass for gene loaders.
     * <p>
     * It is the responsibility of each implementing class to set at least
     * the fields <code>geneUniqueName</code>, <code>transcriptUniqueName</code>
     * and <code>geneName</code> in its constructor.
     *
     * @author rh11
     *
     */
    abstract class GeneLoader {
        protected FeatureTable.Feature feature;
        protected EmblLocation location;
        protected boolean isPseudo = false;
        protected boolean singlySpliced = true;
        protected boolean isObsolete = false;
        protected String geneUniqueName = null;
        protected String transcriptUniqueName = null;
        protected String geneName;
        protected Transcript transcript;

        /**
         * The focal feature is the one to which annotation is added.
         * This is the polypeptide where possible, or the transcript if
         * there is no polypeptide.
         */
        protected Feature focalFeature;
        protected Integer phase = null;

        public GeneLoader(FeatureTable.Feature feature) {
            this.feature  = feature;
            this.location = feature.location;
        }

        protected Class<? extends AbstractGene> getGeneClass() {
            return isPseudo ? Pseudogene.class : Gene.class;
        }

        protected abstract Class<? extends Transcript> getTranscriptClass();

        protected String getTranscriptType() {
            /*
             * This assumes that transcript feature classes are annotated with term
             * rather than accession, which is true at the time of writing.
             */
            return FeatureTypeUtils.getFeatureTypeForClass(getTranscriptClass()).term();
        }

        /**
         * The main entry point to a gene loader.
         */
        public Feature load() throws DataError {
            if (geneUniqueName == null) {
                throw new RuntimeException("Cannot load a gene with no uniqueName");
            }
            if (transcriptUniqueName == null) {
                throw new RuntimeException("Cannot load a transcript with no uniqueName");
            }
            loadTranscript(loadOrFetchGene());

            return focalFeature;
        }

        private AbstractGene loadOrFetchGene() {

            if (topLevelFeature.getClass().equals(Gene.class)) {
                /* If the gene is acting as the topLevelFeature
                 * don't create a new gene feature here
                 * Instead return the existing topLevel gene feature
                 */
                logger.debug(String.format("The toplevel feature is a gene"));
                return (AbstractGene) topLevelFeature;
            }
            else if (singlySpliced) {
                AbstractGene gene = createSinglySplicedGene();
                genesByUniqueName.put(geneUniqueName, gene);
                return gene;
            }
            else {
                if (genesByUniqueName.containsKey(geneUniqueName)) {
                    logger.debug(String.format("Gene for shared ID '%s' already exists", geneUniqueName));
                    return genesByUniqueName.get(geneUniqueName);
                } else {
                    // This is the first transcript, so create the gene
                    AbstractGene gene = createGene();
                    genesByUniqueName.put(geneUniqueName, gene);
                    return gene;
                }
            }
        }

        private AbstractGene createSinglySplicedGene() {
            if (transcriptUniqueName.contains(".")) {
                logger.warn(String.format(
                    "The transcript '%s' is not alternately spliced, yet its systematic name contains a dot",
                    transcriptUniqueName));
            }
            return createGene();
        }

        private AbstractGene createGene() {
            logger.debug(String.format("Creating gene '%s' (%s)", geneUniqueName, geneName));
            AbstractGene gene = AbstractGene.make(getGeneClass(), organism, geneUniqueName, geneName);
            gene.setObsolete(isObsolete); //Is it obsolete?
            logger.info(String.format("Setting gene %s 's obsolete status to %s", gene.getUniqueName(), isObsolete));
            locate(gene, location);
            session.persist(gene);
            return gene;
        }

        private void loadTranscript(AbstractGene gene) throws DataError {
            
            logger.debug(String.format("Creating transcript '%s' for gene '%s'", transcriptUniqueName, gene.getUniqueName()));

            /**
             * A unique number should be appended to the uniquename of the singly
             * spliced transcript features in the database. We must do this because
             * Artemis requires each feature to have a globally unique name, and does
             * not work correctly if a transcript has the same uniquename as its gene.
             * Similarly the GFF3 feature format requires feature names to be
             * globally unique, and we want to be able to export our data in GFF3 format.
             *
             * For alternatively-spliced genes, on the other hand, there is no need to
             * append the transcript type, because the transcript will have an assigned
             * uniquename (the /systematic_id of the CDS) that is different from the
             * uniquename of the gene (the /shared_id of the CDS).
             */

            String actualTranscriptUniqueName;
            if (transcriptUniqueName.equals(gene.getUniqueName())) { // will occur for singly-spliced genes
                actualTranscriptUniqueName = String.format("%s.1", transcriptUniqueName); //make transcript uniquename differ from gene uniquename
            } else {
                actualTranscriptUniqueName = transcriptUniqueName;
            }

            this.transcript = gene.makeTranscript(getTranscriptClass(), actualTranscriptUniqueName, location.getFmin(), location.getFmax(), gene, location);
            transcript.setObsolete(isObsolete); //Is it obsolete?
            session.persist(transcript);

            focalFeature = transcript;
            if (transcript instanceof ProductiveTranscript) {
                Polypeptide polypeptide = ((ProductiveTranscript) transcript).getProtein();
                if (polypeptide != null) {
                    polypeptide.setObsolete(isObsolete);
                    focalFeature = polypeptide;
                }
            }

            transcriptsByUniqueName.put(actualTranscriptUniqueName /*transcriptUniqueName */, transcript);
            loadExons(actualTranscriptUniqueName);
            processTranscriptQualifiers();
        }

        /**
         * For each <code>/<qualifierName></code> qualifier, add a synonym of type
         * <code>synonymType</code> to the transcript.
         *
         * @param qualifierName the name of the qualifer
         * @param synonymType the type of synonym. Should be a term in the <code>genedb_synonym_type</code> CV
         * @param isCurrent whether the synonym is current or not
         */
        protected void addTranscriptSynonymsFromQualifier(String qualifierName, String synonymType, boolean isCurrent) {
            Set<String> synonyms = new HashSet<String>();
            for (String synonymString: feature.getQualifierValues(qualifierName)) {
                if (synonyms.contains(synonymString)) {
                    logger.error(String.format("The qualifier /%s=\"%s\" is repeated on transcript '%s'",
                        qualifierName, synonymString, transcriptUniqueName));
                    continue;
                }

                synonyms.add(synonymString);

                logger.debug(String.format("Adding %s '%s' for transcript", synonymType, synonymString));
                Synonym synonym = synonymManager.getSynonym(synonymType, synonymString);
                session.persist(transcript.addSynonym(synonym, isCurrent, /*isInternal:*/ false));
            }
        }

        /**
         * For each <code>/<qualifierName></code> qualifier, add a property of
         * the specified type to the polypeptide, if there is one, or else to the
         * transcript.
         *
         * @param qualifierName the qualifier name
         * @param propertyCvName the name of the CV to which the property term belongs.
         *          Should be either <code>feature_property</code> for built-in Chado
         *          properties, or <code>genedb_misc</code> for local additions.
         * @param propertyTermName the term name corresponding to the property to add.
         *          If it belongs to the <code>genedb_misc</code> CV, it should be a child of
         *          the term <code>genedb_misc:feature_props</code>.
         * @param isUnique whether this qualifier may appear only once.
         * @return the number of properties that were added
         * @throws DataError
         */
        protected int processPropertyQualifier(String qualifierName, String propertyCvName, String propertyTermName, boolean isUnique) throws DataError {
            return processPropertyQualifier(qualifierName, propertyCvName, propertyTermName, qualifierParsers.get(qualifierName), isUnique);
        }

        protected int processPropertyQualifier(String qualifierName, String propertyCvName, String propertyTermName) throws DataError {
            return processPropertyQualifier(qualifierName, propertyCvName, propertyTermName, qualifierParsers.get(qualifierName), false);
        }

        private int processPropertyQualifier(String qualifierName, String propertyCvName, String propertyTermName,
                TermParser parser, boolean isUnique) throws DataError {
            Set<String> values = new HashSet<String>();
            int rank = 0;
            for(String qualifierValue: feature.getQualifierValues(qualifierName)) {
                if (parser != null) {
                    for (String normalisedValue: parser.parse(qualifierValue)) {
                        rank = processNormalisedProperty(qualifierName, propertyCvName, propertyTermName,
                            isUnique, values, rank, normalisedValue);
                    }
                } else {
                    rank = processNormalisedProperty(qualifierName, propertyCvName, propertyTermName,
                        isUnique, values, rank, qualifierValue);
                }
            }
            return rank;
        }

        private int processNormalisedProperty(String qualifierName, String propertyCvName,
                String propertyTermName, boolean isUnique, Set<String> values, int rank,
                String normalisedValue) throws DataError {
            if (values.contains(normalisedValue)) {
                logger.warn(String.format("Qualifier /%s=\"%s\" appears more than once on feature at line %d. Ignoring subsequent occurrences.",
                    qualifierName, normalisedValue, this.feature.lineNumber));
            } else {
                if (isUnique && !values.isEmpty()) {
                    throw new DataError(String.format("More than one /%s qualifier found", qualifierName));
                }

                logger.debug(String.format("Adding %s:%s '%s' for transcript",
                                propertyCvName, propertyTermName, normalisedValue));
                values.add(normalisedValue);
                focalFeature.addFeatureProp(normalisedValue, propertyCvName, propertyTermName, rank++);
            }
            return rank;
        }

        protected void processCvTermQualifier(String qualifierName, String cvName, String dbName, boolean createTerms)
                throws DataError {
            processCvTermQualifier(qualifierName, cvName, dbName, createTerms, qualifierParsers.get(qualifierName));
        }

        protected void processCvTermQualifier(String qualifierName, String cvName, String dbName,
                boolean createTerms, TermParser termParser)
                throws DataError {

            Set<String> terms = new HashSet<String>();
            for (String term: feature.getQualifierValues(qualifierName)) {
                if (termParser != null) {
                    for (String partNormalisedTerm: termParser.parse(term)) {
                    	String normalisedTerm = partNormalisedTerm.trim();
                        processNormalisedCvTermQualifier(qualifierName, cvName, dbName, createTerms, terms, term,
                            normalisedTerm);
                    }
                } else {
                    processNormalisedCvTermQualifier(qualifierName, cvName, dbName, createTerms, terms, term, term);
                }
            }
        }

        private void processNormalisedCvTermQualifier(String qualifierName, String cvName,
                String dbName, boolean createTerms, Set<String> terms, String term, String normalisedTerm)
                throws DataError {
            String lcNormalisedTerm = normalisedTerm.toLowerCase();
            if (terms.contains(lcNormalisedTerm)) {
                logger.warn(
                    String.format("The qualifier /%s=\"%s\" appears more than once. Ignoring subsequent copies.",
                        qualifierName, term));
                return;
            } else {
                terms.add(lcNormalisedTerm);
            }

            FeatureCvTerm featureCvTerm = focalFeature.addCvTerm(cvName, normalisedTerm, dbName, createTerms);
            if (featureCvTerm == null) {
                throw new DataError(
                    String.format("Failed to find term '%s' in CV '%s'", normalisedTerm, cvName));
            }
            session.persist(featureCvTerm);
        }

        private void loadExons() throws DataError { //if you don't specify the transcriptUniqueName use the existing one taken from EMBL file
            loadExons(transcriptUniqueName);
        }

        /*
         * specifying a transcriptUniquename whe you load the exons allows the modified transcriptUniqueName (with appended .1) to be used
         * in making the exonUniqueName for singly spliced genes
         */
        private void loadExons(String actualTranscriptUniqueName) throws DataError {
            int exonIndex = 0;
            for (EmblLocation exonLocation: location.getParts()) {
                if (exonLocation instanceof EmblLocation.External) {
                    throw new DataError("Found an external exon (trans-splicing). We can't handle that yet.");
                }
                String exonUniqueName = String.format("%s:exon:%d", actualTranscriptUniqueName, ++exonIndex);
                logger.debug(String.format("Creating exon '%s' at %d-%d", exonUniqueName, exonLocation.getFmin(), exonLocation.getFmax()));
                AbstractExon exon = transcript.createExon(exonUniqueName, exonLocation.getFmin(), exonLocation.getFmax(), phase);
                exon.setObsolete(isObsolete);
                session.persist(exon);
            }
        }

        protected void processGO() throws DataError {

            String comment = "From EMBL file"; //default value for autocomment

            for (String go: feature.getQualifierValues("GO")) {
                GoInstance goInstance = new GoInstance();
                for (String subqualifier: go.split("; ?")) {
                    subqualifier = subqualifier.trim();
                    if (subqualifier.length() == 0) {
                        continue;
                    }
                    int equalsIndex = subqualifier.indexOf('=');
                    if (equalsIndex == -1) {
                        throw new DataError(String.format("Failed to parse /GO=\"%s\"", go));
                    }

                    String key = subqualifier.substring(0, equalsIndex);
                    String value = subqualifier.substring(equalsIndex + 1);

                    /* nds (24.6.2010):It is rare but sometimes the key here
                     * has acquired an unnecessary space either by data errors
                     * in Chado or in the EMBL file.
                     * Example: au tocomment="From EMBL file"
                     * The replace below was put in place to deal with this problem.
                     */
                    key = key.replaceAll("\\s","").trim();


                    if (!goQualifiers.contains(key)) {
                        throw new DataError(String.format("Failed to parse /GO=\"%s\"; don't know what to do with %s=%s", go, key, value));
                    }
                    // "aspect", "GOid", "term", "qualifier", "evidence", "db_xref", "with", "date", "attribution", "residue", "autocomment"

                    /* nds (25.6.2010): Sometimes the values have the same
                     * problem as above but a replace cannot be applied for
                     * all the values. I've commented out the replace below
                     * as it's possible this was a problem specific for
                     * Plasmodium reichenowi.
                     */
//                    if(!key.equals("autocomment")){
//                        value = value.replaceAll("\\s","").trim();
//                    }

                    if (key.equals("GOid")) {
                        goInstance.setId(value);
                    } else if (key.equals("date")) {
                        goInstance.setDate(value);
                    } else if (key.equals("evidence")) {
                        GoEvidenceCode evidenceCode = GoEvidenceCode.parse(value);
                        if (evidenceCode == null) {
                            throw new DataError(String.format("Failed to parse GO evidence code '%s'", value));
                        }
                        goInstance.setEvidence(evidenceCode);
                    } else if (key.equals("qualifier")) {
                        goInstance.addQualifier(value);
                    } else if (key.equals("with") || key.equals("from")) {
                        goInstance.setWithFrom(value);
                    } else if (key.equals("aspect")) {
                        goInstance.setSubtype(value);
                    } else if (key.equals("attribution")) {
                        goInstance.setAttribution(value);
                    } else if (key.equals("residue")) {
                        goInstance.setResidue(value);
                    } else if (key.equals("db_xref")) {
                        goInstance.setRef(value);
                        /* TODO: Temp fix to avoid duplicate pubdbxref entries,
                         * fix properly later using the object manager: nds*/
                        Pattern DBXREF_PATTERN = Pattern.compile("(\\w+):(\\w+)");
                        Matcher matcher = DBXREF_PATTERN.matcher(value);
                        if(matcher.matches() && (matcher.group(1).equalsIgnoreCase("PMID") || matcher.group(1).equalsIgnoreCase("PUBMED"))){
                            seenPubAccessions.add(matcher.group(2));
                        }
                    } else if (key.equals("autocomment")){
                        comment = value;

                    }
                }
                if (goTermErrorsAreNotFatal) {
                    try {
                        featureUtils.createGoEntries(focalFeature, goInstance, comment /*"From EMBL file"*/, (DbXRef) null);
                    } catch (DataError e) {
                    }
                } else {
                    featureUtils.createGoEntries(focalFeature, goInstance, comment /*"From EMBL file" */, (DbXRef) null);
                }
            }
        }


        /* Here are some examples of /similarity qualifiers from chromosome 1 of Trypanosoma brucei:

            FT                   /similarity="blastp; SWALL:Q26723 (EMBL:M20871);
            FT                   Trypanosoma brucei brucei; variant-specific antigen;
            FT                   ESAG3; ; id=70%; ; E()=2e-42; score=438; ; ;"

            FT                   /similarity="fasta; SWALL:P26328 (EMBL:56768); Trypanosoma
            FT                   brucei brucei; variant surface glycoprotein ILTat 1.23
            FT                   precursor; ; length 532 aa; id=30.35%; ungapped id=32.34%;
            FT                   E()=1.2e-34; ; 537 aa overlap; query 9-528 aa; subject
            FT                   10-530 aa"

            FT                   /similarity="fasta; SWALL:O97352 (EMBL:AJ012199);
            FT                   Trypanosoma brucei; ILTat 1.61 metacyclic VSG protein; ;
            FT                   length 518 aa; id=29.83%; ungapped id=32.99%; E()=3.6e-31;
            FT                   ; 543 aa overlap; query 2-528 aa; subject 10-516 aa"

            FT                   /similarity="blastp; SWALL:Q8WPR3 (EMBL:AL671259);
            FT                   Trypanosoma brucei; ESAG3; H25N7.29; ; id=74%; ;
            FT                   E()=4e-28; score=310; ; ;"

          And here are some examples from Schistosoma mansoni Smp_scaff000604, to show how minimal
          the provided data can sometimes be:

            FT                   /similarity="blastp; RF:XP_970827.1; ; ; ; ; id=61.0%; ;
            FT                   E()=3.9e-21; ; ; ;"

            FT                   /similarity="blastp; RF:NP_956088.1; ; ; ; ; id=58.4%; ;
            FT                   E()=1.6e-17; ; ; ;"

            FT                   /similarity="blastp; GB:BAD74067.1; ; ; ; ; id=54.4%; ;
            FT                   E()=2.6e-17; ; ; ;"

        And from Leishmania major chromosome 32, to show the use of multiple secondary cross-references:

            FT                   /similarity="fasta; SWALL:Q9BUG7 (EMBL:BC002634,
            FT                   AAH02634); Homo sapiens; hypothetical protein; ; length
            FT                   322 aa; id=40.063%; ungapped id=46.691%; E()=1.2e-32; ;
            FT                   301 aa overlap; query 23-324 aa; subject 12-298 aa"

       I don't know how normal this is, but there's at least one with a line-break in the
       middle of the E() value! (Again from Leishmania major, chromosome 32.)

            FT                   /similarity="fasta; SWALL:EAA26969 (EMBL:AABX01000759,
            FT                   EAA26969); Neurospora crassa; hypothetical protein; ;
            FT                   length 335 aa; id=38.462%; ungapped id=47.273%; E()=6.4e-
            FT                   17; ; 303 aa overlap; query 3-306 aa; subject 13-321 aa"

       And here is an example from Eimeria tenella where the algorithm, program and version are
       all specified in the first field.

            FT                   /similarity="ComparativeBlastX_uni blastall v2.2.6;
            FT                   SWALL:A6WB28.1;  ;  ;  ; ; ; ; E()=19.0063; ; 58 aa overlap;
            FT                   query 24-81 aa; subject 875-1042 aa"


         */
        
        /* Edited regex below to understand decimal points in the raw score, and
         * forward slashed & hyphens in db names and accessions. Matching this
         * using one long regex like the one below is probably not sustainable.
         * TODO: Find better alternative
         * nds, 26 Oct 2011
         */
        
        private final Pattern similarityPattern = Pattern.compile(   
        "(\\w+|\\w+ +\\w+ +v[\\d.]+);" +                                                        // 1.     Algorithm, e.g. fasta, blastp
        "\\s*([\\w+\\-/]+):([\\-\\w.]+)" +                                                      // 2,3.   Primary dbxref, e.g. SWALL:Q26723
        "(?:\\s+\\(([\\w+\\-/]+):([\\-\\w.]+(?:,\\s*(?:[\\w+\\-/]+:)?[\\-\\w.]+)*)\\))?;" +     // 4,5.   Optional secondary dbxrefs, e.g. "EMBL:M20871", "EMBL:BC002634, AAH02634"
        "\\s*([^;]+)?;" +                                                                       // 6.     Organism name
        "\\s*([^;]+)?;" +                                                                       // 7.     Product name
        "\\s*([^;]+)?;" +                                                                       // 8.     Gene name
        "\\s*(?:length\\s+(\\d+)\\s+aa)?;" +                                                    // 9.     Optional match length
        "\\s*(?:id=(\\d{1,3}(?:\\.\\d{1,3})?)%)?;" +                                            // 10.    Optional degree of identity (percentage)
        "\\s*(?:ungapped\\s+id=(\\d{1,3}(?:\\.\\d{1,3})?)%)?;" +                                // 11.    Optional ungapped identity (percentage)
        "\\s*E\\(\\)=(\\d*(?:\\.\\d+)?(?:e[+-]? ?\\d+)?);" +                                    // 12.    E-value
        "\\s*(?:score=(\\d+\\.*\\d*))?;" +                                                      // 13.    Optional score
        "\\s*(?:(\\d+)\\s+aa\\s+overlap)?;" +                                                   // 14.    Optional overlap length (integer)
        "\\s*(?:query\\s+(\\d+)-\\s*(\\d+) aa)?;" +                                             // 15,16. Optional query location
       "\\s*(?:subject\\s+(\\d+)-\\s*?(\\d+) aa)?");                                            // 17,18. Optional subject location

        protected void processSimilarityQualifiers() throws DataError {
            for (String similarityString: feature.getQualifierValues("similarity")) {
                processSimilarityQualifier(similarityString);
            }
        }

        private Map<String,Integer> numberOfSimilaritiesByPrimaryDbXRef = new HashMap<String,Integer>();
        private void processSimilarityQualifier(String similarityString) throws DataError {
            Matcher matcher = similarityPattern.matcher(similarityString);
            if (!matcher.matches()) {
                throw new DataError(String.format("Failed to parse /similarity=\"%s\"", similarityString));
            }

            Similarity similarity = new Similarity();
            String program = matcher.group(1);
            if (!similarityAnalysisByProgram.containsKey(program)) {
                logger.trace(String.format("Creating Analysis object for program '%s'", program));
                Analysis analysis = new Analysis();
                if (program.indexOf(' ') > 0) {
                    // Program string contains spaces, so it's of the form "algorithm program version"
                    String[] splitProgram = program.split(" +");
                    if (splitProgram.length != 3) {
                        throw new DataError("Unexpected problem parsing similarity program: " + program);
                    }
                    analysis.setAlgorithm(splitProgram[0]);
                    analysis.setProgram  (splitProgram[1]);
                    analysis.setProgramVersion(splitProgram[2]);
                } else {
                    analysis.setProgram(program);
                    analysis.setProgramVersion("unknown");
                }
                similarityAnalysisByProgram.put(program, analysis);
            }
            Analysis analysis = similarityAnalysisByProgram.get(program);
            session.saveOrUpdate(analysis);

            similarity.setAnalysis(analysis);
            DbXRef primaryDbXRef = objectManager.getDbXRef(matcher.group(2), matcher.group(3));
            if (primaryDbXRef == null) {
                throw new DataError(String.format("Could not find database '%s' for primary dbxref of /similarity", matcher.group(2)));
            }
            similarity.setPrimaryDbXRef(primaryDbXRef);

            {
                // Set the unique identifier to something unique

                String primaryDbXRefString = primaryDbXRef.toString();
                if (!numberOfSimilaritiesByPrimaryDbXRef.containsKey(primaryDbXRefString)) {
                    numberOfSimilaritiesByPrimaryDbXRef.put(primaryDbXRefString, 1);
                } else {
                    numberOfSimilaritiesByPrimaryDbXRef.put(primaryDbXRefString, 1 + numberOfSimilaritiesByPrimaryDbXRef.get(primaryDbXRefString));
                }
                int numberOfSimilarities = numberOfSimilaritiesByPrimaryDbXRef.get(primaryDbXRefString);
                similarity.setUniqueIdentifier(String.format("%s_%s_%d", transcriptUniqueName, primaryDbXRefString, numberOfSimilarities));
            }

            if (matcher.group(4) != null) {
                String dbName = matcher.group(4);
                for (String accession: matcher.group(5).split(",\\s*")) {
                    int colonIndex = accession.indexOf(':');
                    if (colonIndex >= 0) {
                        dbName = accession.substring(0, colonIndex);
                        accession = accession.substring(colonIndex + 1);
                    }
                    DbXRef secondaryDbXRef = objectManager.getDbXRef(dbName, accession);
                    if (secondaryDbXRef == null) {
                        throw new DataError(String.format("Could not find database '%s' for secondary dbxref of /similarity", matcher.group(4)));
                    }
                    similarity.addDbXRef(secondaryDbXRef);
                }
            }

            // These three may be null, which is okay
            similarity.setOrganismName(matcher.group(6));
            similarity.setProduct(matcher.group(7));
            similarity.setGeneName(matcher.group(8));

            if (matcher.group(9) != null) {
                try {
                    similarity.setLength(Integer.parseInt(matcher.group(9)));
                } catch (NumberFormatException e) {
                    throw new DataError("Failed to parse length field of /similarity: " + matcher.group(9));
                }
            }

            if (matcher.group(10) != null) {
                try {
                    similarity.setId(Double.parseDouble(matcher.group(10)));
                } catch (NumberFormatException e) {
                    throw new DataError("Failed to parse id field of /similarity: " + matcher.group(10));
                }
            }

            if (matcher.group(11) != null) {
                try {
                    similarity.setUngappedId(Double.parseDouble(matcher.group(11)));
                } catch (NumberFormatException e) {
                    throw new DataError("Failed to parse ungapped id field of /similarity: " + matcher.group(11));
                }
            }

            String eValueString = matcher.group(12);
            if (eValueString.startsWith("e") || eValueString.startsWith("E")) {
                eValueString = "1" + eValueString;
            }
            try {
                similarity.setEValue(Double.parseDouble(eValueString.replaceAll("\\s+", "")));
            } catch (NumberFormatException e) {
                throw new DataError("Failed to parse E() field of /similarity: " + eValueString);
            }

            if (matcher.group(13) != null) {
                try {
                    similarity.setRawScore(Double.parseDouble(matcher.group(13)));
                } catch (NumberFormatException e) {
                    throw new DataError("Failed to parse score field of /similarity: " + matcher.group(13));
                }
            }

            if (matcher.group(14) != null) {
                try {
                    similarity.setOverlap(Integer.parseInt(matcher.group(14)));
                } catch (NumberFormatException e) {
                    throw new DataError("Failed to parse score field of /similarity: " + matcher.group(13));
                }
            }

            if (matcher.group(15) != null) {
                try {
                    similarity.setQueryStart(Integer.parseInt(matcher.group(15)));
                    similarity.setQueryEnd(Integer.parseInt(matcher.group(16)));
                } catch (NumberFormatException e) {
                    throw new DataError(String.format("Failed to parse query location of /similarity: %s-%s", matcher.group(15), matcher.group(16)));
                }
            }

            if (matcher.group(17) != null) {
                try {
                    similarity.setTargetStart(Integer.parseInt(matcher.group(17)));
                    similarity.setTargetEnd(Integer.parseInt(matcher.group(18)));
                } catch (NumberFormatException e) {
                    throw new DataError(String.format("Failed to parse subject location of /similarity: %s-%s", matcher.group(17), matcher.group(18)));
                }
            }

            focalFeature.addSimilarity(similarity);
        }


        private final Pattern subqualifierPattern = Pattern.compile("\\G\\s*([^=]+)=\\s*([^;]*)\\s*(?:;|\\z)");

        protected void processCuration() throws DataError {
            processPropertyQualifier("curation", "genedb_misc", "curation");

            if (sloppyControlledCuration) {
                processControlledCurationSloppy();
            } else {
                processControlledCurationStrict();
            }
        }
        private void processControlledCurationSloppy() throws DataError {
            int rank = feature.getQualifierValues("curation").size();
            for (String controlledCuration: feature.getQualifierValues("controlled_curation")) {
                String curation = String.format("[%s]", controlledCuration);
                logger.trace(String.format("Sloppy curation: adding /curation=\"%s\" with rank %d", curation, rank));
                focalFeature.addFeatureProp(curation, "genedb_misc", "curation", rank++);

                Matcher matcher = subqualifierPattern.matcher(controlledCuration);
                while (matcher.find()) {
                    String key = matcher.group(1).toLowerCase();
                    String value = matcher.group(2);

                    if (key.equals("db_xref") && value.length() > 0) {
                        if (value.indexOf(":") < 0) {
                            value = "PMID:" + value;
                        }
                        logger.trace(String.format("Sloppy controlled_curation: adding dbxref for '%s'", value));
                        addDbXRefs(value);
                    }
                }
            }
        }

        private Set<String> subqualifiers = new HashSet<String>() {{
            Collections.addAll(this,
                "term", "cv", "qualifier", "evidence", "db_xref", "residue", "attribution", "date");
        }};

        private void processControlledCurationStrict() throws DataError {
            Set<String> seenQualifiedTerms = new HashSet<String>();
            for (String controlledCuration: feature.getQualifierValues("controlled_curation")) {
                Matcher matcher = subqualifierPattern.matcher(controlledCuration);
                Map<String, String> valuesByKey = new HashMap<String, String>();
                while (matcher.find()) {
                    String key = matcher.group(1).toLowerCase();
                    String value = matcher.group(2);

                    if (subqualifiers.contains(key)) {
                        valuesByKey.put(key, value);
                    }
                }

                if (!valuesByKey.containsKey("term")) {
                    throw new DataError("/controlled_curation has no 'term' field");
                }
                String term = valuesByKey.get("term");
                String cv   = valuesByKey.containsKey("cv") ? valuesByKey.get("cv") : "CC_genedb_controlledcuration";

                String qualifiedTerm = String.format("%s:%s", cv, term);
                if (seenQualifiedTerms.contains(qualifiedTerm)) {
                    logger.warn(String.format(
                        "There is more than one /controlled_curation qualifier with term '%s' in %s feature on line %d." +
                        "Ignoring subsequent occurences.",
                        qualifiedTerm, feature.type, feature.lineNumber));
                    continue;
                }
                seenQualifiedTerms.add(qualifiedTerm);

                logger.trace(String.format("/controlled_curation: adding term '%s:%s' to %s",
                    cv, term, focalFeature));
                FeatureCvTerm featureCvTerm = focalFeature.addCvTerm(cv, term);

                featureCvTerm.addPropIfNotNull("feature_property", "date",   valuesByKey.get("date"));
                featureCvTerm.addPropIfNotNull("genedb_misc", "attribution", valuesByKey.get("attribution"));
                featureCvTerm.addPropIfNotNull("genedb_misc", "evidence",    valuesByKey.get("evidence"));
                featureCvTerm.addPropIfNotNull("genedb_misc", "qualifier",   valuesByKey.get("qualifier"));

                if (valuesByKey.containsKey("db_xref")) {
                    addDbXRefs(featureCvTerm, valuesByKey.get("db_xref"));
                }

                session.persist(featureCvTerm);
            }
        }

        private Pattern dbxrefPattern = Pattern.compile("([^:]+):(.*)");
        /**
         * Add DbXRefs to the focal feature.
         * @param dbxrefs a pipe-separated list of <code>db:accession</code>
         * @throws DataError if the string cannot be parsed or the database does not exist
         */
        protected void addDbXRefs(String dbxrefs) throws DataError {
            for (String dbxref: dbxrefs.split("\\|")) {
                addDbXRef(focalFeature, dbxref);
            }
        }

        /**
         * Add a DbXRef to the specified object.
         * @param target the object to which the reference should be added
         * @param dbxref a string of the form <code>db:accession</code>
         * @throws DataError if the string cannot be parsed or the database does not exist
         */
        private void addDbXRef(HasPubsAndDbXRefs target, String dbxref) throws DataError {
            Matcher matcher = dbxrefPattern.matcher(dbxref);
            if (!matcher.matches()) {
                throw new DataError(String.format("db_xref '%s' is not of the form database:accession", dbxref));
            }
            String dbName = matcher.group(1);
            String accession = matcher.group(2);
            addDbXRef(target, dbName, accession);
        }

        private void addDbXRef(HasPubsAndDbXRefs target, String dbName, String accession) throws DataError {
            DbXRef dbXRef = objectManager.getDbXRef(dbName, accession);
            if (dbXRef == null) {
                throw new DataError(String.format("Database '%1$s' does not exist (for dbxref '%1$s:%2$s')",
                    dbName, accession));
            }
            if (dbName.equals("PMID")) {
                // PMID is a special case; these are stored as FeaturePubs
                addPub(target, accession, dbXRef);
            }
            else {
                session.persist(target.addDbXRef(dbXRef));
            }
        }

        private void addPub(HasPubsAndDbXRefs target, String accession, DbXRef dbXRef) {
            logger.trace(String.format("Adding publication id '%s' to %s",
                accession, target.toString()));
            Pub pub = objectManager.getPub(String.format("PMID:%s", accession), "unfetched");
            session.persist(pub.addDbXRef(dbXRef, true));
            session.persist(target.addPub(pub));
        }

        private Set<String> seenPubAccessions = new HashSet<String>();
        private void addPub(String accession) {
            if (seenPubAccessions.contains(accession)) {
                logger.info(String.format(
                    "Ignoring duplicate publication with accession '%s' on %s feature at line %d",
                    accession, feature.type, feature.lineNumber));
                return;
            }
            DbXRef dbXRef = objectManager.getDbXRef("PMID", accession);
            addPub(focalFeature, accession, dbXRef);
            seenPubAccessions.add(accession);

        }

        private Pattern literaturePattern = Pattern.compile("(?:PMID:)?\\s*(\\d+)(?:;.*)?");
        protected void processLiterature() throws DataError {
            for (String pmid: feature.getQualifierValues("literature", "citation")) {
                Matcher matcher = literaturePattern.matcher(pmid);
                if (!matcher.matches()) {
                    throw new DataError("Failed to parse literature/citation qualifier: " + pmid);
                }
                addPub(matcher.group(1));
            }
        }

        /**
         * Add DbXRefs to the specified FeatureCvTerm.
         * @param dbxrefs a pipe-separated list of <code>db:accession</code>
         * @throws DataError
         */
        protected void addDbXRefs(FeatureCvTerm featureCvTerm, String dbxrefs) throws DataError {
            for (String dbxref: dbxrefs.split("\\|")) {
                addDbXRef(featureCvTerm, dbxref);
            }
        }

        /**
         * Use the qualifiers of the CDS feature to add various bits of annotation
         * to the transcript (or to the polypeptide, if there is one). Specifically,
         * add synonyms, properties and products.
         */
        protected void processTranscriptQualifiers() throws DataError {

            checkForPreviousSystematicIdEqualToSystematicId();

            addTranscriptSynonymsFromQualifier("synonym", "synonym", true);
            addTranscriptSynonymsFromQualifier("previous_systematic_id", "previous_systematic_id", true);

            int commentRank = processPropertyQualifier("note", "feature_property", "comment");
            for (String name: qualifierProperties) {
                processPropertyQualifier(name, "genedb_misc", name, uniqueQualifiers.contains(name));
            }

            for (String translation: feature.getQualifierValues("translation")) {
                translation = translation.replaceAll("\\s+", "");
                translation = translation.toUpperCase();
                logger.debug(String.format("Setting translation to sequence from EMBL file : %s", translation));
                focalFeature.setResidues(translation);
                focalFeature.addFeatureProp("Translation loaded from EMBL", "feature_property", "comment", commentRank++);
            }

            addColourToExons();

            processCvTermQualifier("class", "RILEY", "RILEY", false, normaliseRileyNumber);
            processCvTermQualifier("product", "genedb_products", "PRODUCT", true);

            String label = feature.getQualifierValue("label");
            if (label != null) {
                focalFeature.addFeatureProp(String.format("/label=%s", label), "feature_property", "comment", commentRank++);
            }

            if (taxonomicDivision.equals("PRO")) {
                // Bacteria don't have splicing, so a CDS feature is a gene and
                // a transcript and that is the end of it. One or more /gene
                // or /synonym qualifiers may be used to indicate synonyms.
                addTranscriptSynonymsFromQualifier("gene", "synonym", true);
            }

            if (feature.hasQualifier("partial")) {
                logger.trace(String.format("Marking feature '%s' as partial", focalFeature.getUniqueName()));
                focalFeature.addFeatureProp("partial", "feature_property", "comment", commentRank++);
            }

            for (String dbxrefs: feature.getQualifierValues("db_xref")) {
                addDbXRefs(dbxrefs);
            }

            processGO();
            processSimilarityQualifiers();
            processCuration();
            processLiterature();
        }

        protected void addColourToExons() throws DataError {
            String colour = feature.getQualifierValue("colour");
            if (colour == null) {
                return;
            }

            String normalisedColour = colour;
            TermParser colourParser = qualifierParsers.get("colour");
            if (colourParser != null) {
                if (!(colourParser instanceof TermNormaliser)) {
                    throw new RuntimeException("The /colour parser is not a TermNormaliser?!");
                }
                TermNormaliser colourNormaliser = (TermNormaliser) colourParser;
                normalisedColour = colourNormaliser.normalise(colour);
            }

            logger.trace(String.format("Adding /colour=\"%s\" to exons of '%s'", normalisedColour, transcript.getUniqueName()));

            for (AbstractExon exon: transcript.getExons()) {
                logger.trace(String.format("Adding /colour=\"%s\" to exon '%s'", normalisedColour, exon.getUniqueName()));
                exon.addFeatureProp(normalisedColour, "genedb_misc", "colour", 0);
            }
        }

        /**
         * Explicitly check for the case where /previous_systematic_id is equal to the
         * actual systematic ID, because otherwise the resulting constraint violation
         * error is difficult to understand and track down.
         * @throws DataError if so
         */
        private void checkForPreviousSystematicIdEqualToSystematicId() throws DataError {
            String systematicId = feature.getQualifierValue("systematic_id");
            if (systematicId == null) {
                return;
            }
            for (String temporarySystematicId: feature.getQualifierValues("previous_systematic_id")) {
                if (systematicId.equals(temporarySystematicId)) {
                    throw new DataError("Feature has /previous_systematic_id with the same value as /systematic_id");
                }
            }
        }
    }


    /**
     * Parse a property value. What this means will depend on the specific parser used.
     * The parser may return multiple values, e.g. for semicolon-separated product terms.
     */
    private static interface TermParser {
        public Iterable<String> parse(String term) throws DataError;
    }

    /**
     * Normalise a property value. What this means will depend on the specific normaliser used.
     * A TermNormaliser is a special sort of TermParser for the common case where the result
     * is a single string.
     */
    private static abstract class TermNormaliser implements TermParser {
        public abstract String normalise(String term) throws DataError;
        public final Iterable<String> parse(String term) throws DataError {
            return Collections.singleton(normalise(term));
        }
    }

    /**
     * A term parser for products, which splits on semicolon.
     */
    private static final TermParser productParser = new TermParser() {
        public Iterable<String> parse(String term) throws DataError {
            return IterableArray.fromArray(term.split(";\\s+"));
        }
    };

    /**
     * A term normaliser for Riley classification numbers, which for example
     * will normalise "2.2.07" to "2.2.7".
     */
    private static final TermNormaliser normaliseRileyNumber = new TermNormaliser() {
        private final Pattern RILEY_PATTERN = Pattern.compile("(\\d{1,2})\\.(\\d{1,2})\\.(\\d{1,2})");
        @Override
        public String normalise(String term) throws DataError {
            Matcher matcher = RILEY_PATTERN.matcher(term);
            if (!matcher.matches()) {
                throw new DataError(String.format("Failed to parse Riley number '%s'", term));
            }
            return String.format("%d.%d.%d",
                Integer.parseInt(matcher.group(1)),
                Integer.parseInt(matcher.group(2)),
                Integer.parseInt(matcher.group(3)));
        }
    };

    /**
     * A term normaliser (and format validator) for integers.
     */
    private static final TermNormaliser normaliseInteger = new TermNormaliser() {
        @Override
        public String normalise(String term) throws DataError {
            try {
                return String.valueOf(Integer.parseInt(term));
            } catch (NumberFormatException e) {
                throw new DataError(String.format("Failed to parse integer '%s'", term));
            }
        }
    };

    /**
     * A list of the qualifiers that correspond directly to similarly-named
     * properties in the <code>genedb_misc</code> CV.
     */
    private static final List<String> qualifierProperties = new ArrayList<String>();
    private static final Map<String,TermParser> qualifierParsers = new HashMap<String,TermParser>();
    private static final Set<String> uniqueQualifiers = new HashSet<String>();
    static {
        Collections.addAll(qualifierProperties,
            "method", "colour", "status",
            "blast_file", "blastn_file", "blastp+go_file", "blastp_file",
            "blastx_file", "fasta_file", "fastx_file", "tblastn_file",
            "tblastx_file", "clustalx_file", "sigcleave_file", "pepstats_file",
            "EC_number", "private");

        Collections.addAll(uniqueQualifiers, "colour", "status");

        qualifierParsers.put("colour", normaliseInteger);
        qualifierParsers.put("product", productParser);

        // Some files (e.g. Streptococcus_pneumoniae_D39.embl) have things other than integers in /status.
        // qualifierParsers.put("status", normaliseInteger);
    }

    class CDSLoader extends GeneLoader {
        public CDSLoader(FeatureTable.CDSFeature cdsFeature) throws DataError {
            super(cdsFeature);

            isPseudo = cdsFeature.isPseudo();
            isObsolete = cdsFeature.isObsolete();
            geneUniqueName = cdsFeature.getSharedId();
            transcriptUniqueName = cdsFeature.getUniqueName();

            if (taxonomicDivision.equals("PRO")) {
                // Bacteria don't have splicing, so a CDS feature is a gene and
                // a transcript and that is the end of it. One or more /gene
                // qualifiers may be used to indicate synonyms. The primary_name
                // is optional, as usual.
                geneName = cdsFeature.getQualifierValue("primary_name");
            } else {
                geneName = cdsFeature.getGeneName();
            }

            String codonStart = cdsFeature.getQualifierValue("codon_start");
            if (codonStart != null) {
                try {
                    phase = Integer.parseInt(codonStart) - 1;
                } catch (NumberFormatException e) {
                    throw new DataError(
                        String.format("Could not parse value of /codon_start qualifier ('%s')", codonStart));
                }
                if (phase < 0 || phase > 2) {
                    throw new DataError(
                        String.format("Value of /codon_start qualifier out of range (%d)", phase+1));
                }
            }

            singlySpliced = false;
            if (geneUniqueName == null) {
                singlySpliced = true;
                geneUniqueName = transcriptUniqueName;
            }
        }


        @Override
        protected Class<? extends Transcript> getTranscriptClass() {
            return isPseudo ? PseudogenicTranscript.class : MRNA.class;
        }
    }

    private Feature loadCDS(FeatureTable.CDSFeature cdsFeature) throws DataError {
        return new CDSLoader(cdsFeature).load();
    }


    /* Loader for non-coding RNA features */
    private class NcRNALoader extends GeneLoader {
        private Class<? extends NcRNA> transcriptClass;
        private String type;
        public NcRNALoader(Class<? extends NcRNA> transcriptClass, String type,
                FeatureTable.Feature feature)
            throws DataError
        {
            super(feature);
            this.transcriptClass = transcriptClass;
            this.type = type;

            geneUniqueName = transcriptUniqueName = feature.getUniqueName(false);
            if (geneUniqueName == null) {
                geneUniqueName = transcriptUniqueName = makeSyntheticName();
            }
        }

        private String makeSyntheticName() {
            if (syntheticNcRNAIndexByType.containsKey(type)) {
                syntheticNcRNAIndexByType.put(type, 1 + syntheticNcRNAIndexByType.get(type));
            } else {
                syntheticNcRNAIndexByType.put(type, 1);
            }
            return String.format("%s_%s%d",
                topLevelFeature.getUniqueName(), type, syntheticNcRNAIndexByType.get(type));
        }

        @Override
        protected void processTranscriptQualifiers() throws DataError {
            int commentRank = processPropertyQualifier("note",  "feature_property", "comment");
            if (TRNA.class.isAssignableFrom(transcriptClass)) {
                processPropertyQualifier("anticodon", "feature_property", "anticodon", true);
            }

            processPropertyQualifier("colour", "genedb_misc", "colour", true);
            processCvTermQualifier("product", "genedb_products", "PRODUCT", true);
            addColourToExons();

            String label = feature.getQualifierValue("label");
            if (label != null) {
                logger.trace(String.format("Adding /label=\"%s\" as comment on '%s'",
                    label, focalFeature.getUniqueName()));
                focalFeature.addFeatureProp(String.format("/label=%s", label), "feature_property", "comment", commentRank++);
            }

            if (taxonomicDivision.equals("PRO")) {
                // Bacteria don't have splicing, so a CDS feature is a gene and
                // a transcript and that is the end of it. One or more /gene
                // qualifiers may be used to indicate synonyms.
                addTranscriptSynonymsFromQualifier("gene",    "synonym", true);
                addTranscriptSynonymsFromQualifier("synonym", "synonym", true);
            }

            for (String dbxrefs: feature.getQualifierValues("db_xref")) {
                addDbXRefs(dbxrefs);
            }

            processCuration();
            processLiterature();
        }

        @Override
        protected Class<? extends Transcript> getTranscriptClass() {
            return transcriptClass;
        }
    }

    private Feature loadNcRNA(Class<? extends NcRNA> rnaClass, String rnaType,
            FeatureTable.Feature feature) throws DataError {
        return new NcRNALoader(rnaClass, rnaType, feature).load();
    }

    /* UTR */
    private List<UTR> loadUTR(FeatureTable.Feature utrFeature) throws DataError {
                
        String utrType = utrFeature.type;
        EmblLocation utrLocation = utrFeature.location;
        String uniqueName = utrFeature.getUniqueName();

        logger.debug(String.format("Loading %s for '%s' at %s", utrType, uniqueName, utrLocation));

        Transcript transcript = transcriptsByUniqueName.get(uniqueName); //Straightforward case; Transcript found by name
        
        /* Due to the various forms of old-fashioned transcript names (:mRNA, .\d:mRNA etc) it is not always
         * straightforward figuring out what the transcript should be for a UTR. We do a little guesswork here
         * to find the gene name and then look for a corresponding transcript from the list we have seen already.
         * This does not work for alternatively spliced genes where several transcripts match up to the gene name.
         * Revisit this after Smansoni has been loaded.
         * nds, 16th Nov 2010 & 26th Sep 2011
         */
                
        if(transcript == null){
            List<String> possibleTranscriptNames = new ArrayList<String>();          
            String possibleGeneName; //trying to figure out what the gene name is
            
            //Various Transcript patterns
            Pattern withNumberAndMrna = Pattern.compile("(\\S+)\\.\\d+:mRNA");
            Pattern withOnlyMrna      = Pattern.compile("(\\S+):mRNA"); 
            Matcher matcher1 = withNumberAndMrna.matcher(uniqueName);
            Matcher matcher2 = withOnlyMrna.matcher(uniqueName);
            if(matcher1.matches() ){
                possibleGeneName = matcher1.group(1); 
            }else if(matcher2.matches()){
                possibleGeneName = matcher2.group(1);
            }else{            
                possibleGeneName = uniqueName;
            }
            
            //if(uniqueName.matches("\\S+\\.\\d:mRNA")){ //@$$! Need to escape the .!
            /*    possibleGeneName = uniqueName.substring(0,uniqueName.length()-7);
            }else if(uniqueName.matches("\\S+:mRNA")){
                possibleGeneName = uniqueName.substring(0,uniqueName.length()-5);
            }else{
                possibleGeneName = uniqueName;                
            } */
          
            System.out.println("The possible gene name is " + possibleGeneName);
            
            
            for(String s: transcriptsByUniqueName.keySet()){
                
                System.out.println("Transcript " + s);    
                
                if(s.matches(possibleGeneName.concat(".\\d")) || s.matches(possibleGeneName.concat(".\\d:mRNA"))){
                    possibleTranscriptNames.add(s);
                }               
            }
            if(possibleTranscriptNames.size()==1){ //No alternative splicing
                transcript = transcriptsByUniqueName.get(possibleTranscriptNames.get(0));
                logger.warn(String.format("Assuming %s is the transcript for this UTR for %s", possibleTranscriptNames.get(0), uniqueName));
            }else if (possibleTranscriptNames.size()==0){
                throw new DataError(String.format("Could not find a transcript '%s' for %s", uniqueName, utrType));
            }else if (possibleTranscriptNames.size() > 1) {
                throw new DataError(String.format("Multiple transcripts possible for this UTR", uniqueName, utrType));
            }
            
        }

 
        List<UTR> utrs = new ArrayList<UTR>();

        Class<? extends UTR> utrClass;
        if (utrType.equals("3'UTR")) {
            utrClass = ThreePrimeUTR.class;
        } else if (utrType.equals("5'UTR")) {
            utrClass = FivePrimeUTR.class;
        } else {
            throw new RuntimeException(String.format("Unrecognised UTR feature type '%s'", utrType));
        }

        int part = 1;
        List<EmblLocation> utrParts = utrLocation.getParts();
        for (EmblLocation utrPartLocation: utrParts) {
            String utrUniqueName = String.format("%s:%dutr", transcript.getUniqueName()/*uniqueName*/, utrClass == ThreePrimeUTR.class ? 3 : 5);
            if (utrParts.size() > 1) {
                utrUniqueName += ":" + part;
            }

            logger.debug(String.format("Creating %s feature '%s' at %d-%d",
                    utrType, utrUniqueName, utrPartLocation.getFmin(), utrPartLocation.getFmax()));

            UTR utr = transcript.createUTR(utrClass, utrUniqueName, utrPartLocation.getFmin(), utrPartLocation.getFmax());
            utrs.add(utr);
            session.persist(utr);
            ++ part;


        }

        return utrs;
    }

    /* Setters and Spring stuff */
    public void setOrganismDao(OrganismDao organismDao) {
        this.organismDao = organismDao;
    }

    /**
     * Set the ObjectManager. This is expected to be called by Spring.
     * We will inject the GeneralDao object into the ObjectManager ourselves from
     * {@link #afterPropertiesSet}, so this ObjectManager need not have the GeneralDao
     * injected yet. This avoids circularity.
     *
     * @param objectManager
     */
    public void setObjectManager(ObjectManager objectManager) {
        this.objectManager = objectManager;
    }

    public void setSessionFactory(SessionFactory sessionFactory) {
        this.sessionFactory = sessionFactory;
    }

    public void setGeneralDao(GeneralDao generalDao) {
        this.generalDao = generalDao;
    }

    public void setCvDao(CvDao cvDao) {
        this.cvDao = cvDao;
    }

    public void setPubDao(PubDao pubDao) {
        this.pubDao = pubDao;
    }

    public void setFeatureUtils(FeatureUtils featureUtils) {
        this.featureUtils = featureUtils;
    }

    public void afterPropertiesSet() {
        synonymManager.setObjectManager(objectManager);

        /*
         * We cannot set the DAOs of the objectManager
         * directly in Load.xml, because that creates a circular
         * reference that (understandably) causes Spring to
         * throw a tantrum. Thus we inject them into
         * here, and pass them to the ObjectManager after Spring
         * configuration.
         */
        objectManager.setGeneralDao(generalDao);
        objectManager.setCvDao(cvDao);
        objectManager.setPubDao(pubDao);
    }

}