NubBuilder.java example

Explorer

checklistbank-master
- checklistbank-cli
  - src
    - main
      - java
        org
        gbif
        checklistbank
        cli
        IndexerApp.java
        admin
        AdminCommand.java
        AdminConfiguration.java
        AdminOperation.java
        analysis
        AnalysisCommand.java
        AnalysisConfiguration.java
        AnalysisService.java
        DatasetIndexUpdater.java
        common
        Metrics.java
        NeoConfiguration.java
        NoneMatchingService.java
        RabbitBaseService.java
        RabbitDatasetService.java
        ZooKeeperConfiguration.java
        ZookeeperUtils.java
        crawler
        CrawlerCommand.java
        CrawlerConfiguration.java
        CrawlerService.java
        importer
        EmptyImportException.java
        Importer.java
        ImporterCommand.java
        ImporterConfiguration.java
        ImporterService.java
        matcher
        MatcherCommand.java
        MatcherConfiguration.java
        MatcherService.java
        model
        GraphFormat.java
        NameUsageNode.java
        RankedName.java
        UsageFacts.java
        normalizer
        ExtensionInterpreter.java
        IgnoreNameUsageException.java
        InsertMetadata.java
        NormalizationFailedException.java
        Normalizer.java
        NormalizerCommand.java
        NormalizerConfiguration.java
        NormalizerConstants.java
        NormalizerService.java
        NormalizerStats.java
        nubbuild
        NubBuildCommand.java
        NubConfiguration.java
        nubchanged
        BackboneDatasetUpdater.java
        NubChangedCommand.java
        NubChangedConfiguration.java
        NubChangedService.java
        registry
        RegistryCommand.java
        RegistryConfiguration.java
        RegistryService.java
        shell
        ShellCommand.java
        ShellConfiguration.java
        show
        ShowCommand.java
        ShowConfiguration.java
        kryo
        CliKryoFactory.java
        EnumSetSerializer.java
        ImmutableListSerializer.java
        NullSerializer.java
        TermSerializer.java
        URISerializer.java
        UUIDSerializer.java
        neo
        ImportDb.java
        Labels.java
        NeoInserter.java
        NeoProperties.java
        NotUniqueException.java
        NotUniqueRuntimeException.java
        RelType.java
        UsageDao.java
        printer
        DotPrinter.java
        GmlPrinter.java
        ListPrinter.java
        TabPrinter.java
        TreePrinter.java
        TxtPrinter.java
        XmlPrinter.java
        traverse
        AcceptedOnlyEvaluator.java
        ChunkingEvaluator.java
        LinneanRankEvaluator.java
        MultiRooIterator.java
        MultiRootNodeIterator.java
        MultiRootPathIterator.java
        NubMatchHandler.java
        RankEvaluator.java
        StartEndHandler.java
        TaxonomicOrder.java
        TaxonomicOrderExpander.java
        Traversals.java
        TreeIterables.java
        TreeIterablesSorted.java
        TreeWalker.java
        UsageMetricsHandler.java
        nub
        HomonymException.java
        IdGenerator.java
        IgnoreSourceUsageException.java
        NubBuilder.java
        NubDb.java
        ParentStack.java
        lookup
        NubMatchService.java
        model
        NubTags.java
        NubUsage.java
        NubUsageMatch.java
        SrcUsage.java
        source
        ClbSource.java
        ClbSourceList.java
        DwcaSource.java
        LoadSource.java
        NubSource.java
        NubSourceList.java
        validation
        AssertionEngine.java
        NeoAssertionEngine.java
        NubAssertions.java
        NubTreeValidation.java
        NubValidation.java
        PgAssertionEngine.java
        TreeRankValidation.java
    - test
      - java
        org
        gbif
        checklistbank
        cli
        BaseTest.java
        BaseTestIT.java
        JarTest.java
        MultiThreadingCliTest.java
        admin
        AdminCommandTest.java
        importer
        ClbConfigurationTest.java
        ImporterIT.java
        ImporterServiceIT.java
        ManualImport.java
        normalizer
        ExtensionInterpreterTest.java
        NormalizerStatsTest.java
        NormalizerTest.java
        nubchanged
        BackboneDatasetUpdaterTest.java
        kryo
        CliKryoFactoryTest.java
        neo
        ImportDbTest.java
        MultiThreadingNeoInserterTest.java
        NeoInserterTest.java
        TransactionTest.java
        UsageDaoTest.java
        traverse
        ChunkingEvaluatorTest.java
        NubMatchHandlerTest.java
        TaxonomicOrderTest.java
        TraversalsTest.java
        TreeWalkerTest.java
        UsageMetricsHandlerTest.java
        nub
        IdGeneratorTest.java
        ManualDbDump.java
        ManualNubBuild.java
        NubAssertionsTest.java
        NubBuilderIT.java
        NubDbTest.java
        NubNode.java
        NubTree.java
        NubTreeTest.java
        ParentStackTest.java
        source
        ClasspathSource.java
        ClasspathSourceList.java
        ClasspathSourceListTest.java
        ClasspathSourceTest.java
        ClbSourceListTest.java
        ClbSourceTest.java
        DwcaSourceTest.java
        RandomSource.java
        RandomSourceTest.java
        utils
        OccMetricsColAppender.java
        RunnableAdapter.java
- checklistbank-col
  - src
    - main
      - java
        org
        gbif
        checklistbank
        col
        ColAnnotationImport.java
    - test
      - java
        org
        gbif
        checklistbank
        col
        ColAnnotationImportTest.java
- checklistbank-common
  - src
    - main
      - java
        org
        gbif
        checklistbank
        authorship
        AuthorComparator.java
        BasionymGroup.java
        BasionymSorter.java
        LongestCommonSubstring.java
        SuffixArray.java
        YearComparator.java
        config
        ClbConfiguration.java
        GangliaConfiguration.java
        MetricModule.java
        RegistryServiceConfiguration.java
        iterable
        CloseableIterable.java
        CloseableIterator.java
        ColumnExtractor.java
        FutureIterator.java
        Ints.java
        StreamUtils.java
        logging
        LogContext.java
        model
        Citation.java
        Classification.java
        ColAnnotation.java
        DatasetCore.java
        Equality.java
        NameUsageWritable.java
        ParsedNameUsage.java
        RawUsage.java
        ScientificName.java
        TocEntry.java
        TreeContainer.java
        Usage.java
        UsageCount.java
        UsageExtensions.java
        UsageForeignKeys.java
        UsageRelated.java
        postgres
        TabMapperBase.java
        registry
        DatasetServiceFileImpl.java
        EmptyNetworkEntityService.java
        FileRegistryModule.java
        InstallationServiceEmptyImpl.java
        NetworkServiceEmptyImpl.java
        NodeServiceEmptyImpl.java
        OrganizationServiceEmptyImpl.java
        service
        CitationService.java
        ColAnnotationService.java
        DatasetAnalysisService.java
        DatasetImportService.java
        ImporterCallback.java
        ParsedNameService.java
        UsageService.java
        UsageSyncService.java
        utils
        CleanupUtils.java
        CloseableUtils.java
        MediaTypeUtils.java
        PropertiesUtils.java
        RankUtils.java
        ResourcesMonitor.java
        SciNameNormalizer.java
        SymmetricIdentityMatrix.java
    - test
      - java
        org
        gbif
        checklistbank
        authorship
        AuthorBucketerTest.java
        AuthorComparatorTest.java
        BasionymSorterTest.java
        LongestCommonSubstringTest.java
        YearComparatorTest.java
        iterable
        IntsTest.java
        model
        EqualityTest.java
        registry
        DatasetServiceFileImplTest.java
        FileRegistryModuleTest.java
        utils
        RankUtilsTest.java
        SciNameNormalizerTest.java
        SerdeTestUtils.java
        SymmetricIdentityMatrixTest.java
- checklistbank-mybatis-service
  - src
    - main
      - java
        org
        gbif
        checklistbank
        service
        mybatis
        CitationServiceMyBatis.java
        ColAnnotationServiceMyBatis.java
        DatasetAnalysisServiceMyBatis.java
        DatasetImportServiceMyBatis.java
        DatasetMetricsServiceMyBatis.java
        DescriptionServiceMyBatis.java
        DistributionServiceMyBatis.java
        IdentifierServiceMyBatis.java
        MultimediaServiceMyBatis.java
        NameUsageComponentServiceMyBatis.java
        NameUsageServiceMyBatis.java
        ParsedNameServiceMyBatis.java
        ReferenceServiceMyBatis.java
        SpeciesProfileServiceMyBatis.java
        TypeSpecimenServiceMyBatis.java
        UsageServiceMyBatis.java
        UsageSyncServiceMyBatis.java
        VernacularNameServiceMyBatis.java
        export
        Exporter.java
        RowHandler.java
        guice
        ChecklistBankServiceMyBatisModule.java
        InternalChecklistBankServiceMyBatisModule.java
        Mybatis.java
        liquibase
        DbSchemaUpdater.java
        mapper
        CitationMapper.java
        ColAnnotationMapper.java
        Common.java
        DatasetMapper.java
        DatasetMetricsMapper.java
        DescriptionMapper.java
        DistributionMapper.java
        IdentifierMapper.java
        MultimediaMapper.java
        NameUsageComponentMapper.java
        NameUsageMapper.java
        NameUsageMetricsMapper.java
        NubRelMapper.java
        ParsedNameMapper.java
        RawUsageMapper.java
        ReferenceMapper.java
        SpeciesProfileMapper.java
        TypeSpecimenMapper.java
        UsageCountMapper.java
        UsageMapper.java
        VerbatimNameUsageMapperJson.java
        VernacularNameMapper.java
        package-info.java
        postgres
        ArraySetIssueTypeHandler.java
        ArraySetNomenclaturalStatusTypeHandler.java
        ArraySetTypeHandler.java
        HstoreCountTypeHandler.java
        HstoreExtensionCountTypeHandler.java
        HstoreIssueCountCountTypeHandler.java
        HstoreKingdomCountCountTypeHandler.java
        HstoreLanguageCountCountTypeHandler.java
        HstoreOriginCountCountTypeHandler.java
        HstoreRankCountCountTypeHandler.java
        HstoreStringCountTypeHandler.java
        HstoreUUIDCountCountTypeHandler.java
        IntArrayPgWriter.java
        IntegerArrayTypeHandler.java
        tmp
        NameUsageReparser.java
    - test
      - java
        org
        gbif
        checklistbank
        service
        mybatis
        CitationServiceMyBatisIT.java
        ClbBatchServiceMyBatisIT.java
        ConcurrentCreateOrGetIT.java
        DatasetAnalysisServiceMyBatisIT.java
        DatasetImportServiceMyBatisIT.java
        DatasetImportServiceMyBatisTest.java
        DatasetMetricsServiceMyBatisIT.java
        DatasetMetricsServiceMyBatisTest.java
        DescriptionServiceMyBatisIT.java
        DistributionServiceMyBatisIT.java
        IdentifierServiceMyBatisIT.java
        MultimediaServiceMyBatisIT.java
        MyBatisServiceITBase.java
        NameUsageServiceMyBatisIT.java
        ParsedNameServiceMyBatisIT.java
        ReferenceServiceMyBatisIT.java
        SpeciesProfileServiceMyBatisIT.java
        TypeSpecimenServiceMyBatisTestIT.java
        UsageServiceMyBatisTest.java
        UsageSyncServiceMyBatisIT.java
        VernacularNameServiceMyBatisIT.java
        export
        ExporterIT.java
        ExporterTest.java
        mapper
        DatasetMapperTest.java
        DescriptionMapperTest.java
        DistributionMapperTest.java
        DistributionMapperWithDataIT.java
        MapperITBase.java
        MultimediaMapperTest.java
        MultimediaMapperWithDataIT.java
        NameUsageMapperIT.java
        NameUsageMapperWithDataIT.java
        NameUsageMetricsMapperIT.java
        ParsedNameMapperIT.java
        RawUsageMapperTest.java
        ReferenceMapperTest.java
        ReferenceMapperWithDataIT.java
        SpeciesProfileMapperTest.java
        TypeSpecimenMapperTest.java
        UsageCountMapperTest.java
        VerbatimNameUsageMapperJsonTest.java
        VernacularNameMapperTest.java
        VernacularNameMapperWithDataIT.java
        model
        RawUsageTest.java
        postgres
        ArraySetIssueTypeHandlerTest.java
        ClbDbTestRule.java
        DbLoader.java
        DbLoaderTest.java
        HstoreCountTypeHandlerTest.java
        IntArrayPgWriterTest.java
        utils
        ClbConfigurationUtils.java
        utils
        file
        ResourceUtilsTest.java
- checklistbank-nub
  - src
    - main
      - java
        org
        gbif
        nub
        lookup
        NubMatchingModule.java
        fuzzy
        HigherTaxaComparator.java
        IndexBuildHandler.java
        NubIndex.java
        NubMatchingServiceImpl.java
        similarity
        DamerauLevenshtein.java
        DistanceUtils.java
        JaroWinkler.java
        LevenshteinDistance.java
        ModifiedDamerauLevenshtein.java
        ModifiedJaroWinkler.java
        ScientificNameSimilarity.java
        StringSimilarity.java
        straight
        DatasetMatchFailed.java
        IdLookup.java
        IdLookupImpl.java
        IdLookupPassThru.java
        IdLookupWs.java
        LookupKryoFactory.java
        LookupUsage.java
        LookupUsageMatch.java
        mapdb
        ImmutableListSerializer.java
        MapDbObjectSerializer.java
        utils
        RsGbifOrg.java
    - test
      - java
        org
        gbif
        nub
        lookup
        NubMatchingTestModule.java
        fuzzy
        DownloadJsonUtil.java
        HigherTaxaComparatorTest.java
        NubIndexTest.java
        NubMatchingServiceImplIT.java
        NubMatchingServiceImplLegacyIT.java
        NubMatchingServiceImplStrictIT.java
        NubMatchingServiceImplTest.java
        NubMatchingServiceTestManual.java
        SynonymMerger.java
        similarity
        DistanceComparisonTest.java
        DistanceUtilsTest.java
        ScientificNameSimilarityTest.java
        StringSimilarityComparisonTest.java
        straight
        IdLookupImplIT.java
        IdLookupImplTest.java
        LookupUsageMatchTest.java
        LookupUsageTest.java
        mapdb
        MapDbObjectSerializerTest.java
- checklistbank-nub-ws
  - src
    - main
      - java
        org
        gbif
        checklistbank
        ws
        nub
        NubResource.java
        NubWsListener.java
    - test
      - java
        org
        gbif
        checklistbank
        ws
        nub
        DeployedNubLookupCorrectnessTest.java
- checklistbank-solr
  - src
    - main
      - java
        org
        gbif
        checklistbank
        index
        NameUsageDocConverter.java
        NameUsageIndexServicePassThru.java
        NameUsageIndexServiceSolr.java
        backfill
        AvroExportJob.java
        AvroExporter.java
        IndexingConfigKeys.java
        NameUsageAvroConverter.java
        NameUsageBatchProcessor.java
        NameUsageIndexingJob.java
        SolrBackfill.java
        ThreadPoolRunner.java
        guice
        AvroIndexingModule.java
        AvroIndexingModulePrivate.java
        EmbeddedSolrReference.java
        RealTimeModule.java
        SearchModule.java
        Solr.java
        SolrIndexingModule.java
        SolrIndexingModulePrivate.java
        model
        NameUsageAvro.java
        SolrUsage.java
        service
        NameUsageSearchServiceImpl.java
        ResponseBuilder.java
        SolrMapping.java
        SolrQueryBuilder.java
    - test
      - java
        org
        gbif
        checklistbank
        index
        AvroIndexingTestModule.java
        HdfsTestUtil.java
        NameUsageDocConverterTest.java
        SolrIndexingTestModule.java
        backfill
        AvroExporterIT.java
        NameUsageIndexingJobIT.java
        SolrBackfillIT.java
        SolrTestSetup.java
        guice
        SearchTestModule.java
        service
        NameUsageSearchServiceIT.java
        NameUsageSearchServiceSuggestIT.java
        ResponseBuilderTest.java
        SolrMappingTest.java
- checklistbank-solr-plugins
  - src
    - main
      - java
        org
        gbif
        checklistbank
        lucene
        LuceneUtils.java
        ScientificNameAnalyzer.java
        ScientificNameNormalizerFilter.java
        ScientificNameNormalizerFilterFactory.java
        ScientificNameSoundAlikeFilter.java
    - test
      - java
        org
        gbif
        checklistbank
        lucene
        ScientificNameAnalyzerTest.java
- checklistbank-ws
  - src
    - main
      - java
        org
        gbif
        checklistbank
        ws
        guice
        ChecklistBankWsModule.java
        resources
        DatasetMetricsResource.java
        DescriptionResource.java
        NameParserResource.java
        SpeciesResource.java
        util
        LineReader.java
    - test
      - java
        org
        gbif
        checklistbank
        ws
        resources
        SpeciesResourceTest.java
- checklistbank-ws-client
  - src
    - main
      - java
        org
        gbif
        checklistbank
        ws
        client
        DatasetMetricsWsClient.java
        DescriptionWsClient.java
        DistributionWsClient.java
        IdentifierWsClient.java
        MultimediaWsClient.java
        NameUsageComponentBaseWsClient.java
        NameUsageMatchWsClient.java
        NameUsageSearchWsClient.java
        NameUsageWsClient.java
        ReferenceWsClient.java
        SpeciesProfileWsClient.java
        TypeSpecimenWsClient.java
        VernacularNameWsClient.java
        guice
        ChecklistBankWs.java
        ChecklistBankWsClientModule.java
        ChecklistBankWsMatchClientModule.java
        ChecklistBankWsServiceClientModule.java
        NameUsageMatchWs.java
        util
        Constants.java
        SimpleParameterMap.java
    - test
      - java
        org
        gbif
        checklistbank
        ws
        client
        DatasetMetricsWsClientIT.java
        DescriptionWsClientIT.java
        DescriptionWsClientTest.java
        DistributionWsClientIT.java
        DistributionWsClientTest.java
        IdentifierWsClientIT.java
        JunitExecutionOrder.java
        JunitExecutionOrderSuite.java
        MultimediaWsClientIT.java
        MultimediaWsClientTest.java
        NameUsageComponentWsClientITBase.java
        NameUsageMatchWsClientTest.java
        NameUsageSearchWsClientIT.java
        NameUsageSearchWsClientTest.java
        NameUsageWsClientIT.java
        NameUsageWsClientTest.java
        ReferenceWsClientIT.java
        ReferenceWsClientTest.java
        SpeciesProfileWsClientIT.java
        SpeciesProfileWsClientTest.java
        TypeSpecimenWsClientIT.java
        TypeSpecimenWsClientTest.java
        VernacularNameWsClientIT.java
        VernacularNameWsClientTest.java
        WsClientBaseTest.java
        WsClientNameUsageComponentTest.java
        WsClientSuite.java
        guice
        ChecklistBankSearchWsTestModule.java
        ChecklistBankWsClientModuleTest.java
        ClbServer.java
        ClbServerTest.java
        UrlBindingModule.java
        util
        SimpleParameterMapTest.java

package org.gbif.checklistbank.nub;

import org.apache.commons.lang3.StringUtils;
import org.gbif.api.exception.UnparsableException;
import org.gbif.api.model.Constants;
import org.gbif.api.model.checklistbank.ParsedName;
import org.gbif.api.service.checklistbank.NameParser;
import org.gbif.api.vocabulary.Kingdom;
import org.gbif.api.vocabulary.NameType;
import org.gbif.api.vocabulary.NameUsageIssue;
import org.gbif.api.vocabulary.Origin;
import org.gbif.api.vocabulary.Rank;
import org.gbif.api.vocabulary.TaxonomicStatus;
import org.gbif.checklistbank.authorship.AuthorComparator;
import org.gbif.checklistbank.authorship.BasionymGroup;
import org.gbif.checklistbank.authorship.BasionymSorter;
import org.gbif.checklistbank.cli.normalizer.NormalizerStats;
import org.gbif.checklistbank.cli.nubbuild.NubConfiguration;
import org.gbif.checklistbank.iterable.CloseableIterator;
import org.gbif.checklistbank.model.Equality;
import org.gbif.checklistbank.neo.Labels;
import org.gbif.checklistbank.neo.NeoProperties;
import org.gbif.checklistbank.neo.RelType;
import org.gbif.checklistbank.neo.UsageDao;
import org.gbif.checklistbank.neo.traverse.Traversals;
import org.gbif.checklistbank.neo.traverse.TreeWalker;
import org.gbif.checklistbank.neo.traverse.UsageMetricsHandler;
import org.gbif.checklistbank.nub.model.NubUsage;
import org.gbif.checklistbank.nub.model.NubUsageMatch;
import org.gbif.checklistbank.nub.model.SrcUsage;
import org.gbif.checklistbank.nub.source.ClbSource;
import org.gbif.checklistbank.nub.source.ClbSourceList;
import org.gbif.checklistbank.nub.source.NubSource;
import org.gbif.checklistbank.nub.source.NubSourceList;
import org.gbif.checklistbank.nub.validation.NubAssertions;
import org.gbif.checklistbank.nub.validation.NubTreeValidation;
import org.gbif.checklistbank.nub.validation.NubValidation;
import org.gbif.checklistbank.utils.SciNameNormalizer;
import org.gbif.nameparser.GBIFNameParser;
import org.gbif.nub.lookup.straight.IdLookup;
import org.gbif.nub.lookup.straight.IdLookupImpl;
import org.gbif.utils.collection.MapUtils;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.common.collect.UnmodifiableIterator;
import it.unimi.dsi.fastutil.ints.Int2LongMap;
import it.unimi.dsi.fastutil.ints.Int2LongOpenHashMap;
import it.unimi.dsi.fastutil.longs.Long2IntMap;
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.objects.Object2LongMap;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.NotFoundException;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.ResourceIterable;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.traversal.Evaluators;

import org.neo4j.helpers.collection.Iterators;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NubBuilder implements Runnable {
  private static final Logger LOG = LoggerFactory.getLogger(NubBuilder.class);
  private static final Joiner SEMICOLON_JOIN = Joiner.on("; ").skipNulls();
  public static final Set<Rank> NUB_RANKS;

  static {
    List<Rank> ranks = Lists.newArrayList(Rank.LINNEAN_RANKS);
    ranks.add(Rank.SUBSPECIES);
    ranks.add(Rank.VARIETY);
    ranks.add(Rank.FORM);
    ranks.remove(Rank.KINGDOM); // we only use kingdoms from our enum
    NUB_RANKS = ImmutableSet.copyOf(ranks);
  }

  private final Set<NameType> ignoredNameTypes = Sets.newHashSet(
      NameType.CANDIDATUS, NameType.CULTIVAR, NameType.INFORMAL, NameType.NO_NAME, NameType.PLACEHOLDER, NameType.NO_NAME
  );
  private final static ImmutableMap<TaxonomicStatus, Integer> STATUS_ORDER = ImmutableMap.of(
      TaxonomicStatus.HOMOTYPIC_SYNONYM, 1,
      TaxonomicStatus.HETEROTYPIC_SYNONYM, 2,
      TaxonomicStatus.SYNONYM, 3,
      TaxonomicStatus.ACCEPTED, 4,
      TaxonomicStatus.DOUBTFUL, 5
      );
  private static final Pattern EX_AUTHOR = Pattern.compile("^(.+) ex ", Pattern.CASE_INSENSITIVE);

  private final Set<Rank> allowedRanks = Sets.newHashSet();
  private final NubDb db;
  private final boolean closeDao;
  private final NubSourceList sources;
  private final NameParser parser;
  private final NubConfiguration cfg;
  private NubSource currSrc;
  private ParentStack parents;
  private int sourceUsageCounter = 0;
  private final AuthorComparator authorComparator;
  private final IdGenerator idGen;
  private final Int2LongMap src2NubKey = new Int2LongOpenHashMap();
  private final Long2IntMap basionymRels = new Long2IntOpenHashMap(); // node.id -> src.usageKey
  private final Map<UUID, Integer> priorities = Maps.newHashMap();
  private Integer maxPriority = 0;
  private int datasetCounter = 1;

  private NubBuilder(UsageDao dao, NubSourceList sources, IdLookup idLookup, AuthorComparator authorComparator, int newIdStart,
                     boolean closeDao, NubConfiguration cfg) {
    db = NubDb.create(dao, authorComparator);
    this.sources = sources;
    this.authorComparator = authorComparator;
    idGen = new IdGenerator(idLookup, newIdStart);
    this.closeDao = closeDao;
    this.cfg = cfg;
    this.parser = new GBIFNameParser(cfg.parserTimeout);
  }

  public static NubBuilder create(NubConfiguration cfg) {
    UsageDao dao = UsageDao.persistentDao(cfg.neo, Constants.NUB_DATASET_KEY, false, null, true);
    try {
      IdLookupImpl idLookup = IdLookupImpl.temp().load(cfg.clb, true);
      return new NubBuilder(dao, ClbSourceList.create(cfg), idLookup, idLookup.getAuthorComparator(), idLookup.getKeyMax() + 1, true, cfg);
    } catch (Exception e) {
      throw new IllegalStateException("Failed to load existing backbone ids", e);
    }
  }

  /**
   * @param dao the dao to persistent the nub. Will be left open after run() is called.
   */
  public static NubBuilder create(UsageDao dao, NubSourceList sources, IdLookup idLookup, int newIdStart, int parserTimeout) {
    NubConfiguration cfg = new NubConfiguration();
    cfg.groupBasionyms = true;
    cfg.validate = true;
    cfg.runAssertions = false;
    cfg.autoImport = false;
    cfg.neo.batchSize = 5000;
    cfg.parserTimeout = parserTimeout;
    return new NubBuilder(dao, sources, idLookup, idLookup.getAuthorComparator(), newIdStart, false, cfg);
  }

  /**
   * Builds a new neo4j based backbone with metrics and stable ids already mapped.
   * The DAO is kept open if you provided it explicitly, otherwise its being closed.
   */
  @Override
  public void run() {
    try {
      addKingdoms();
      parents = new ParentStack(db.kingdom(Kingdom.INCERTAE_SEDIS));

      // main work importing all source checklists
      addDatasets();

      // change current datasource to nub algorithm, avoiding using the last source for algorithmically generated usages
      currSrc = new ClbSource(null, Constants.NUB_DATASET_KEY, "Backbone algorithm");

      // detect and group basionyms
      groupByBasionym();

      // extract synonyms from ex authors
      synonymizeExAuthors();

      // flagging of suspicous usages
      flagParentMismatch();
      flagEmptyGenera();
      cleanImplicitTaxa();
      flagDuplicateAcceptedNames();
      flagSimilarNames();
      flagDoubtfulOriginalNames();

      // persist missing autonyms
      fixInfraspeciesHierarchy();
      manageAutonyms();

      // basic neo tree checks, fail fast
      if (cfg.validate) {
        validate(new NubTreeValidation(db));
      }

      // add extra data
      addPublicationDois();
      addExtensionData();

      // match to old nub and assign (stable) usage keys for postgres
      assignUsageKeys();

      // final validation with often reported issues
      if (cfg.runAssertions) {
        validate(new NubAssertions(db));
      }

      // convert usages for the importer and build metrics
      db.dao().convertNubUsages();
      builtUsageMetrics();
      LOG.info("New backbone built successfully!");

    } catch (AssertionError e){
      LOG.error("Backbone invalid, build failed!", e);
      throw e;

    } catch (RuntimeException e){
      LOG.error("Fatal error. Backbone build failed!", e);
      db.dao().consistencyNubReport();
      throw e;

    } finally {
      sources.close();
      if (closeDao) {
        db.dao().close();
        LOG.info("Backbone dao closed orderly");
      } else {
        LOG.warn("Backbone dao not closed!");
      }
    }
  }

  /**
   * Goes through all names with ex authors (99% botanical) and creates homotypic synonyms with the ex authorship.
   * Ex authors are publications which published a name earlier than the regular author, but which are illegitimate according to the code, for example a nomen nudum.
   *
   * See http://dev.gbif.org/issues/browse/POR-3147
   */
  private void synonymizeExAuthors() {
    LOG.info("Extract ex author species synonyms");
    try (Transaction tx = db.beginTx()) {
      synonymizeExAuthors(db.dao().allSpecies());
      tx.success();
    }

    LOG.info("Extract ex author infraspecies synonyms");
    try (Transaction tx = db.beginTx()) {
      synonymizeExAuthors(db.dao().allInfraSpecies());
      tx.success();
    }
  }

  private void synonymizeExAuthors(ResourceIterator<Node> iter) {
    for (Node n : Iterators.loop(iter)) {
      NubUsage nub = read(n);
      if (!StringUtils.isBlank(nub.parsedName.getAuthorship())) {
        Matcher m = EX_AUTHOR.matcher(nub.parsedName.getAuthorship());
        if (m.find()) {
          try {
            // create synonym if not already existing
            SrcUsage syn = new SrcUsage();
            syn.parsedName = nub.parsedName;
            syn.parsedName.setAuthorship(m.group(1));
            syn.parsedName.setYear(null);
            syn.parsedName.setScientificName(syn.parsedName.canonicalNameComplete());
            syn.rank = nub.rank;
            syn.status = TaxonomicStatus.HOMOTYPIC_SYNONYM;

            // the parent nub does not matter as we always do a qualified author based matching
            NubUsageMatch match = db.findNubUsage(nub.datasetKey, syn, nub.kingdom, null);
            if (!match.isMatch() || !match.usage.parsedName.hasAuthorship()) {
              // create a new synonym
              NubUsage accepted = nub.status.isAccepted() ? nub : db.parent(nub);
              LOG.debug("Create ex author synonym {}", syn.parsedName.fullName());
              db.addUsage(accepted, syn, Origin.EX_AUTHOR_SYNONYM, currSrc.key);
            }
          } catch (IgnoreSourceUsageException e) {
            // swallow
          }
        }
      }
    }
  }

  /**
   * If there are several accepted infraspecific ranks for a given species
   * this method makes sure the species subtree makes sense.
   *
   * The same epithet can be used again within a species, at whatever level, only if the names with the re-used epithet
   * are attached to the same type. Thus there can be a form called Poa secunda f. juncifolia as well as
   * the subspecies Poa secunda subsp. juncifolia if, and only if, the type specimen of Poa secunda f. juncifolia
   * is the same as the type specimen of Poa secunda subsp. juncifolia.
   * In other words, if there is a single type specimen whose classification is Poa secunda subsp. juncifolia f. juncifolia.
   */
  private void fixInfraspeciesHierarchy() {
    //TODO
  }

  /**
   * Writes a file based report about deleted, resurrected and newly added taxa.
   */
  public void report(File reportingDir) {
    try {
      idGen.writeReports(reportingDir);
    } catch (IOException e) {
      LOG.warn("Failed to write ID report", e);
    }
  }

  private void flagDuplicateAcceptedNames() {
    for (Kingdom k : Kingdom.values()) {
      try (Transaction tx = db.beginTx()) {
        LOG.info("Start flagging doubtful duplicate names in {}", k);
        NubUsage ku = db.kingdom(k);
        markDuplicatesRedundant(Traversals.ACCEPTED_TREE.traverse(ku.node).nodes());
        tx.success();
      }
    }
  }

  /**
   * http://dev.gbif.org/issues/browse/POR-2815
   */
  private void flagDoubtfulOriginalNames() {
    LOG.info("Start flagging doubtful original names");
    try (Transaction tx = db.beginTx()) {
      for (Node gn : Iterators.loop(db.dao().allGenera())) {
        NubUsage genus = read(gn);
        Integer gYear = genus.parsedName.getYearInt();
        if (gYear != null) {
          // all accepted included taxa should have been described after the genus
          // flag the ones that have an earlier publication date!
          for (Node n : Traversals.SORTED_ACCEPTED_TREE.traverse(gn).nodes()) {
            NubUsage u = read(n);
            Integer year = u.parsedName.getYearInt();
            if (year != null && year < gYear) {
              u.issues.add(NameUsageIssue.PUBLISHED_BEFORE_GENUS);
              db.store(u);
            }
          }
        }
      }
      tx.success();
    }
  }

  /**
   * Goes through all accepted species and infraspcecies and flags suspicous similar names.
   * Adds a NameUsageIssue.ORTHOGRAPHIC_VARIANT to all similar names.
   */
  private void flagSimilarNames() {
    LOG.info("Start flagging similar species");
    try (Transaction tx = db.beginTx()) {
      flagSimilarNames(db.dao().allSpecies());
      tx.success();
    }

    LOG.info("Start flagging similar infraspecies");
    try (Transaction tx = db.beginTx()) {
      flagSimilarNames(db.dao().allInfraSpecies());
      tx.success();
    }
  }

  /**
   * Small null safe wrapper around our DAO that logs missing nub usages for existing nodes.
   */
  private NubUsage read(Node n) {
    NubUsage u = db.dao().readNub(n);
    if (u == null) {
      LOG.error("Missing kvp nub usage for node {} {}", n.getId(), NeoProperties.getScientificName(n));
      throw new IllegalStateException("Missing kvp nub usage for node " + n.getId());
    }
    return u;
  }

  private void flagSimilarNames(ResourceIterator<Node> iter) {
    Map<String, String> names = Maps.newHashMap();
    for (Node n : Iterators.loop(iter)) {
      if (!n.hasLabel(Labels.SYNONYM)) {
        NubUsage u = read(n);
        String normedName = db.dao().canonicalOrScientificName(u.parsedName, false);
        if (!StringUtils.isBlank(normedName)) {
          if (names.containsKey(normedName)) {
            u.issues.add(NameUsageIssue.ORTHOGRAPHIC_VARIANT);
            u.addRemark("Possible variant of "+names.get(normedName));
            db.store(u);
          } else {
            names.put(normedName, u.parsedName.canonicalNameComplete());
          }
        }
      }
    }
  }

  /**
   * Assigns a doubtful status to accepted names that only differ in authorship
   *
   * @param nodes any node iterable to check for names
   */
  private void markDuplicatesRedundant(ResourceIterable<Node> nodes) {
    Object2LongMap<String> names = new Object2LongOpenHashMap<>();
    for (Node n : nodes) {
      if (!n.hasLabel(Labels.SYNONYM)) {
        NubUsage u = read(n);
        String name = u.parsedName.canonicalName();
        if (u.status == TaxonomicStatus.ACCEPTED && !StringUtils.isBlank(name)) {
          // prefix with rank ordinal to become unique across ranks (ordinal is shorter than full name to save mem)
          String indexedName = u.rank.ordinal() + name;
          if (names.containsKey(indexedName)) {
            // duplicate accepted canonical name. Check which has priority
            Node n1 = db.getNode(names.get(indexedName));
            NubUsage u1 = read(n1);

            int p1 = priorities.get(u1.datasetKey);
            int p2 = priorities.get(u.datasetKey);

            if (p2 < p1) {
              // the old usage is from a less trusted source
              u1.status = TaxonomicStatus.DOUBTFUL;
              db.store(u1);
              names.put(indexedName, n.getId());
            } else {
              // the old usage is from a higher trusted source, keep it
              u.status = TaxonomicStatus.DOUBTFUL;
              db.store(u);
            }
          } else {
            names.put(indexedName, n.getId());
          }
        }
      }
    }
  }

  /**
   * Incorporate Rod Pages IPNI name DOIs from https://github.com/rdmpage/ipni-names
   */
  private void addPublicationDois() {

  }


  private void validate(NubValidation validator) throws AssertionError {
    try (Transaction tx = db.beginTx()) {
      boolean valid = validator.validate();
      if (valid) {
        LOG.info("{} passed", validator.getClass().getSimpleName());
      } else {
        LOG.error("Backbone is not valid! {} failed", validator.getClass().getSimpleName());
        throw new AssertionError("Backbone is not valid!");
      }
    }
  }

  /**
   * Goes through all accepted infraspecies and checks if a matching autonym exists,
   * creating missing autonyms where needed.
   * An autonym is an infraspecific taxon that has the same species and infraspecific epithet.
   * We do this last to not persistent autonyms that we dont need after basionyms are grouped or status has changed for some other reason.
   */
  private void manageAutonyms() {
    if (!cfg.keepLonelyAutonyms) {
      LOG.info("Delete lonely autonyms");
      try (Transaction tx = db.beginTx()) {
        for (Node n : Iterators.loop(db.dao().allAutonyms())) {
          Rank rank = NeoProperties.getRank(n, Rank.UNRANKED);
          if (!n.hasLabel(Labels.SYNONYM)) {
            Node p = db.parent(n);
            // count all childs of same rank
            int count = 0;
            for (Node c : Traversals.CHILDREN.traverse(p).nodes()) {
              if (NeoProperties.getRank(c, Rank.UNRANKED) == rank) {
                count++;
              }
            }
            if (count == 1) {
              // only this accepted autonym, try to remove!!!
              LOG.info("Removing lonely {} autonym {} {}", rank, n, NeoProperties.getScientificName(n));
              removeTaxonIfEmpty(db.dao().readNub(n));
            }
          }
        }
        tx.success();
      }
    }

    LOG.info("Start creating missing autonyms");
    try (Transaction tx = db.beginTx()) {
      int counter = 0;
      for (Node n : Iterators.loop(db.dao().allInfraSpecies())) {
        if (!n.hasLabel(Labels.SYNONYM)) {
          NubUsage u = read(n);
          // check for autonyms
          if (!u.parsedName.isAutonym()) {
            ParsedName pn = new ParsedName();
            pn.setType(NameType.SCIENTIFIC);
            pn.setGenusOrAbove(u.parsedName.getGenusOrAbove());
            pn.setSpecificEpithet(u.parsedName.getSpecificEpithet());
            pn.setInfraSpecificEpithet(u.parsedName.getSpecificEpithet());
            pn.setScientificName(pn.canonicalName());
            pn.setRank(u.rank);

            try {
              NubUsageMatch autoMatch = db.findNubUsage(pn.canonicalName(), u.rank, u.kingdom, true);
              if (!autoMatch.isMatch()) {
                NubUsage parent = db.parent(u);

                SrcUsage autonym = new SrcUsage();
                autonym.rank = u.rank;
                autonym.scientificName = pn.canonicalName();
                autonym.parsedName = pn;
                autonym.status = TaxonomicStatus.ACCEPTED;
                try {
                  createNubUsage(autonym, Origin.AUTONYM, parent);
                  counter++;
                } catch (IgnoreSourceUsageException e) {
                  LOG.warn("Fail to persistent missing autonym {}", pn.canonicalName());
                }
              }

            } catch (HomonymException e) {
              LOG.error("Homonym autonym found: {}", e.getName());
            }
          }
        }
      }
      tx.success();
      LOG.info("Created {} missing autonyms", counter);
    }
  }

  private List<Node> listFamilies() {
    List<Node> families;
    try (Transaction tx = db.beginTx()) {
      families = Iterators.asList(db.dao().allFamilies());
    }
    return families;
  }

  /**
   * Goes through all usages and tries to discover basionyms by comparing the specific or infraspecific epithet and the authorships within a family.
   * As we often see missing brackets from author names we must code defensively and allow several original names in the data for a single epithet.
   */
  private void detectBasionyms() {
    try {
      LOG.info("Discover basionyms");
      int newBasionyms = 0;
      int newRelations = 0;
      final BasionymSorter basSorter = new BasionymSorter(authorComparator);

      // load all family nodes into list so we can process them seach in a separate transaction later on
      List<Node> families = listFamilies();
      for (Node n : families) {
        try (Transaction tx = db.beginTx()) {
          NubUsage fam = read(n);
          if (!fam.status.isSynonym()) {
            Map<String, List<NubUsage>> epithets = Maps.newHashMap();
            Map<String, Set<String>> epithetBridges = Maps.newHashMap();
            LOG.debug("Discover basionyms in family {}", fam.parsedName.canonicalNameComplete());
            // key all names by their terminal epithet
            for (Node c : Traversals.DESCENDANTS.traverse(n).nodes()) {
              NubUsage nub = read(c);
              // ignore all supra specific names and autonyms
              if (nub.rank.isSpeciesOrBelow() && !c.hasLabel(Labels.AUTONYM)) {
                String epithet = SciNameNormalizer.stemEpithet(nub.parsedName.getTerminalEpithet());
                if (!epithets.containsKey(epithet)) {
                  epithets.put(epithet, Lists.newArrayList(nub));
                } else {
                  epithets.get(epithet).add(nub);
                }
                // now check if a basionym relation exists already that reaches out to some other epithet, e.g. due to gender changes
                for (Node bg : Traversals.BASIONYM_GROUP.evaluator(Evaluators.excludeStartPosition()).traverse(c).nodes()) {
                  NubUsage bgu = read(bg);
                  String epithet2 = SciNameNormalizer.stemEpithet(bgu.parsedName.getTerminalEpithet());
                  if (epithet2 != null && !epithet2.equals(epithet)) {
                    if (!epithetBridges.containsKey(epithet)) {
                      epithetBridges.put(epithet, Sets.newHashSet(epithet2));
                    } else {
                      epithetBridges.get(epithet).add(epithet2);
                    }
                  }
                }
              }
            }
            LOG.debug("{} distinct epithets found in family {}", epithets.size(), fam.parsedName.canonicalNameComplete());

            // merge epithet groups based on existing basionym relations, catching some gender changes
            LOG.debug("{} epithets are connected with explicit basionym relations", epithetBridges.size());
            for (Map.Entry<String, Set<String>> bridge : epithetBridges.entrySet()) {
              if (epithets.containsKey(bridge.getKey())) {
                List<NubUsage> usages = epithets.get(bridge.getKey());
                for (String epi2 : bridge.getValue()) {
                  if (epithets.containsKey(epi2)) {
                    LOG.debug("Merging {} usages of epithet {} into epithet group {}", epithets.get(epi2).size(), epi2, bridge.getKey());
                    usages.addAll(epithets.remove(epi2));
                  }
                }
              }
            }

            // now compare authorships for each epithet group
            for (Map.Entry<String, List<NubUsage>> epithetGroup : epithets.entrySet()) {

              Collection<BasionymGroup<NubUsage>> groups = basSorter.groupBasionyms(epithetGroup.getValue(), new Function<NubUsage, ParsedName>() {
                @Override
                public ParsedName apply(NubUsage nub) {
                  return nub.parsedName;
                }
              });
              // go through groups and persistent basionym relations where needed
              for (BasionymGroup<NubUsage> group : groups) {
                // we only need to process groups that contain recombinations
                if (group.hasRecombinations()) {
                  // if we have a basionym creating relations is straight forward
                  NubUsage basionym = null;
                  if (group.hasBasionym()) {
                    basionym = group.getBasionym();

                  } else if (group.getRecombinations().size() > 1) {
                    // we need to persistent a placeholder basionym to group the 2 or more recombinations
                    newBasionyms++;
                    basionym = createBasionymPlaceholder(fam, group);
                  }
                  // persistent basionym relations
                  if (basionym != null) {
                    for (NubUsage u : group.getRecombinations()) {
                      if (createBasionymRelationIfNotExisting(basionym.node, u.node)) {
                        newRelations++;
                        u.issues.add(NameUsageIssue.ORIGINAL_NAME_DERIVED);
                        db.store(u);
                      }
                    }
                  }
                }
              }
            }
          }
          tx.success();
        } catch (Exception e) {
          LOG.error("Error detecting basionyms for family {}", n.getProperty(NeoProperties.SCIENTIFIC_NAME, "no name"), e);
        }
      }
      LOG.info("Discovered {} new basionym relations and created {} basionym placeholders", newRelations, newBasionyms);

    } catch (Throwable e) {
      LOG.error("Error detecting basionyms", e);
    }
  }

  private NubUsage createBasionymPlaceholder(NubUsage family, BasionymGroup group) {
    NubUsage basionym = new NubUsage();
    basionym.datasetKey = null;
    basionym.origin = Origin.BASIONYM_PLACEHOLDER;
    basionym.rank = Rank.UNRANKED;
    basionym.status = TaxonomicStatus.DOUBTFUL;
    basionym.parsedName = new ParsedName();
    basionym.parsedName.setGenusOrAbove("?");
    basionym.parsedName.setSpecificEpithet(group.getEpithet());
    basionym.parsedName.setAuthorship(group.getAuthorship());
    basionym.parsedName.setYear(group.getYear());
    basionym.parsedName.setType(NameType.PLACEHOLDER);
    basionym.parsedName.setScientificName(basionym.parsedName.fullName());
    LOG.debug("creating basionym placeholder {} in family {}", basionym.parsedName.canonicalNameComplete(), family.parsedName.canonicalName());
    return db.addUsage(family, basionym);
  }

  private void addKingdoms() {
    try (Transaction tx = db.beginTx()) {
      LOG.info("Adding kingdom");
      currSrc = new ClbSource(null, Constants.NUB_DATASET_KEY, "Backbone kingdoms");
      for (Kingdom k : Kingdom.values()) {
        NubUsage ku = new NubUsage();
        ku.usageKey = idGen.reissue(k.nubUsageKey());
        ku.kingdom = k;
        ku.datasetKey = Constants.NUB_DATASET_KEY;
        ku.origin = Origin.SOURCE;
        ku.rank = Rank.KINGDOM;
        ku.status = TaxonomicStatus.ACCEPTED;
        ku.parsedName = new ParsedName();
        ku.parsedName.setType(NameType.SCIENTIFIC);
        ku.parsedName.setGenusOrAbove(k.scientificName());
        ku.parsedName.setScientificName(k.scientificName());
        // treat incertae sedis placeholder kingdom different
        if (k == Kingdom.INCERTAE_SEDIS) {
          ku.status = TaxonomicStatus.DOUBTFUL;
          ku.parsedName.setType(NameType.PLACEHOLDER);
        }
        db.addRoot(ku);
      }
      tx.success();
    }
  }


  /**
   * TODO: to be implemented.
   * Now clb still dynamically retrieves extension data from all checklists, but in the future we like to control
   * which extension record is attached to a backbone usage.
   * Adds all extension data, e.g. vernacular names, to the backbone directly.
   * TODO:
   * - build map from source usage key to nub node id
   * - stream (jdbc copy) through all extension data in postgres and attach to relevant nub node
   */
  private void addExtensionData() {
    LOG.warn("NOT IMPLEMENTED: Copy extension data to backbone");
    //Joiner commaJoin = Joiner.on(", ").skipNulls();
    //for (Node n : Iterators.loop(db.allTaxa())) {
    //    NubUsage nub = read(n);
    //    if (!nub.sourceIds.isEmpty()) {
    //        LOG.debug("Add extension data from source ids {}", commaJoin.join(nub.sourceIds));
    //    }
    //}
  }

  /**
   * Flags emtpy genera or removes them if they have an IMPLICIT origin
   */
  private void flagEmptyGenera() {
    LOG.info("flag empty genera as doubtful");
    try (Transaction tx = db.beginTx()) {
      for (Node gen : Iterators.loop(db.dao().allGenera())) {
        if (!gen.hasRelationship(RelType.PARENT_OF, Direction.OUTGOING)) {
          NubUsage nub = read(gen);
          if (nub.origin == Origin.IMPLICIT_NAME) {
            // remove this genus as it was created by the nub builder as an implicit genus name for a species we seem to have moved or deleted since
            if (removeTaxonIfEmpty(nub)) {
              continue;
            }
          }
          if (!nub.status.isSynonym()) {
            nub.issues.add(NameUsageIssue.NO_SPECIES);
            if (TaxonomicStatus.ACCEPTED == nub.status) {
              nub.status = TaxonomicStatus.DOUBTFUL;
            }
            db.store(nub);
          }
        }
      }
      tx.success();
    }
  }

  /**
   * Updates implicit names to be accepted (not doubtful) and removes implicit taxa with no children if configured to do so.
   */
  private void cleanImplicitTaxa() {
    LOG.info("Clean implicit taxa");
    try (Transaction tx = db.beginTx()) {
      for (Node n : Iterators.loop(db.dao().allImplicitNames())) {
        NubUsage nub = read(n);
        if (!cfg.keepEmptyImplicitNames) {
          if (removeTaxonIfEmpty(nub)) {
            nub = null;
          }
        }
        // update status if still existing
        if (nub != null) {
          if (nub.status == TaxonomicStatus.DOUBTFUL) {
            nub.status = TaxonomicStatus.ACCEPTED;
            db.store(nub);
          }
        }
      }
      tx.success();
    }
  }

  /**
   * Goes through all accepted species and infraspecies and makes sure the name matches the genus, species classification.
   * For example an accepted species Picea alba with a parent genus of Abies is taxonomic nonsense.
   * Badly classified names are assigned the doubtful status and an NameUsageIssue.NAME_PARENT_MISMATCH is flagged
   */
  private void flagParentMismatch() {
    LOG.info("flag classification name mismatches");
    try (Transaction tx = db.beginTx()) {
      for (Node gn : Iterators.loop(db.dao().allGenera())) {
        if (!gn.hasLabel(Labels.SYNONYM)) {
          NubUsage gen = read(gn);
          if (gen.kingdom == Kingdom.VIRUSES) {
            // virus names are unparsable...
            continue;
          }
          if (gen.parsedName == null || gen.parsedName.getGenusOrAbove() == null) {
            LOG.warn("Genus {} without genus name part: {} {}", gn, gen.rank, NeoProperties.getScientificName(gn));
            continue;
          }
          String genus = gen.parsedName.getGenusOrAbove();

          // flag non matching names
          for (Node spn : Traversals.CHILDREN.traverse(gn).nodes()) {
            NubUsage sp = db.dao().readNub(spn);
            if (sp.rank != Rank.SPECIES) {
              LOG.warn("Genus child {} is not a species: {} {}", spn, sp.rank, NeoProperties.getScientificName(spn));
              continue;
            }
            if (sp.parsedName == null || sp.parsedName.getGenusOrAbove() == null) {
              LOG.warn("Genus child {} without genus name part: {} {}", spn, sp.rank, NeoProperties.getScientificName(spn));
              continue;
            }
            if (!genus.equals(sp.parsedName.getGenusOrAbove())){
              sp.issues.add(NameUsageIssue.NAME_PARENT_MISMATCH);
            }
            db.store(sp);

            // check infraspecific names
            String species = sp.parsedName.getSpecificEpithet();
            for (Node ispn : Traversals.CHILDREN.traverse(spn).nodes()) {
              NubUsage isp = db.dao().readNub(ispn);
              if (isp.parsedName.getInfraSpecificEpithet() == null) {
                LOG.warn("Species child {} without an infraspecific epithet: {} {}", ispn, isp.rank, NeoProperties.getScientificName(ispn));
                continue;
              }
              if (!genus.equals(isp.parsedName.getGenusOrAbove()) || !species.equals(isp.parsedName.getInfraSpecificEpithet())){
                isp.issues.add(NameUsageIssue.NAME_PARENT_MISMATCH);
                db.store(isp);
              }
            }
          }
        }
      }
    }
  }

  private void addDatasets() {
    LOG.info("Start adding backbone sources");
    for (NubSource src : sources) {
      try {
        addDataset(src);
      } catch (Exception e) {
        LOG.error("Error processing source {}", src.name, e);
      } finally {
        Stopwatch sw = Stopwatch.createStarted();
        src.close();
        LOG.debug("Closing source {} took {}ms", src.name, sw.elapsed(TimeUnit.MILLISECONDS));
      }
    }
  }

  private void addDataset(NubSource source) {
    LOG.info("Adding {}th source {}", datasetCounter++, source.name);
    currSrc = source;
    priorities.put(source.key, ++maxPriority);
    // clear dataset wide caches
    parents.clear();
    basionymRels.clear();
    src2NubKey.clear();
    allowedRanks.clear();
    // prepare set of allowed ranks for this source
    for (Rank r : Rank.values()) {
      if (NUB_RANKS.contains(r) && r.ordinal() >= source.ignoreRanksAbove.ordinal()) {
        allowedRanks.add(r);
      }
    }
    int start = sourceUsageCounter;

    // do transactions in batches to dont slow down neo too much
    int batchCounter = 1;
    // makes sure to close the iterator - important for releasing neo resources, slows down considerably otherwise!
    try (CloseableIterator<SrcUsage> iter = source.iterator()) {
      UnmodifiableIterator<List<SrcUsage>> batchIter = com.google.common.collect.Iterators.partition(iter, cfg.neo.batchSize);
      while (batchIter.hasNext()) {
        try (Transaction tx = db.beginTx()) {
          List<SrcUsage> batch = batchIter.next();
          LOG.debug("process batch {} with {} usages", batchCounter++, batch.size());
          for (SrcUsage u : batch) {
            // catch errors processing individual records too
            try {
              LOG.debug("process {} {} {}", u.status, u.rank, u.scientificName);
              sourceUsageCounter++;
              parents.add(u);

              NubUsage parent = parents.nubParent();
              // replace accepted taxa with doubtful ones for all nomenclators and for synonym parents
              // http://dev.gbif.org/issues/browse/POR-2780
              if (TaxonomicStatus.ACCEPTED == u.status && (currSrc.nomenclator || parent.status.isSynonym())) {
                u.status = TaxonomicStatus.DOUBTFUL;
              }
              if (parent.status.isSynonym()) {
                // use accepted instead
                parent = db.parent(parent);
              }
              NubUsage nub = processSourceUsage(u, Origin.SOURCE, parent);
              if (nub != null) {
                parents.put(nub);
              }
            } catch (IgnoreSourceUsageException e) {
              LOG.debug("Ignore usage {} >{}< {}", u.key, u.scientificName, e.getMessage());

            } catch (StackOverflowError e) {
              // if this happens its time to fix some code!
              LOG.error("CODE BUG: StackOverflowError processing {} from source {}", u.scientificName, source.name, e);
              LOG.error("CAUSE: {}", u.parsedName);

            } catch (RuntimeException e) {
              LOG.error("RuntimeException processing {} from source {}", u.scientificName, source.name, e);
            }
          }
          tx.success();
        }
      }
    } catch (Exception e) {
      Throwables.propagate(e);
    }

    // process explicit basionym relations
    processExplicitBasionymRels();

    LOG.info("Processed {} source usages for {}", sourceUsageCounter - start, source.name);
  }

  private void processExplicitBasionymRels() {
    try (Transaction tx = db.beginTx()) {
      LOG.info("Processing {} explicit basionym relations from {}", basionymRels.size(), currSrc.name);
      for (Map.Entry<Long, Integer> entry : basionymRels.entrySet()) {
        Node n = db.getNode(entry.getKey());
        Node bas = db.getNode(src2NubKey.get(entry.getValue()));
        // find basionym node by sourceKey
        if (n != null && bas != null) {
          // basionym has not been verified yet, make sure its of rank <= genus and its name type is no placeholder
          NubUsage basUsage = read(bas);
          if (!basUsage.rank.isSpeciesOrBelow()) {
            LOG.warn("Ignore explicit basionym {} of rank {}", basUsage.parsedName.getScientificName(), basUsage.rank);
            continue;
          }
          if (!basUsage.parsedName.getType().isBackboneType()) {
            LOG.warn("Ignore explicit basionym {} with name type {}", basUsage.parsedName.getScientificName(), basUsage.parsedName.getType());
            continue;
          }
          if (!createBasionymRelationIfNotExisting(bas, n)) {
            LOG.warn("Nub usage {} already contains a contradicting basionym relation. Ignore basionym {} from source {}", n.getProperty(NeoProperties.SCIENTIFIC_NAME, n.getId()), bas.getProperty(NeoProperties.SCIENTIFIC_NAME, bas.getId()), currSrc.name);
          }
        } else {
          LOG.warn("Could not resolve basionym relation for nub {} to source usage {}", entry.getKey(), entry.getValue());
        }
      }
      tx.success();
    }
  }

  /**
   * Looks for all implicit names and tries to find a match with author to replace them.
   * This is done after we added an entire dataset so we can prefer accepted names over doubtful ones.
   * If we instead would do this on the fly as we add new names we might prefer a doubtful name.
   */
  private void replaceImplicitNames() {
    LOG.info("Replace implicit names for dataset {}", currSrc.name);
    try (Transaction tx = db.beginTx()) {
      for (Node n : Iterators.loop(db.dao().allImplicitNames())) {
        NubUsage nub = read(n);
        try {
          NubUsageMatch match = db.findNubUsage(nub.datasetKey, nub.parsedName, nub.rank, nub.status, nub.kingdom, db.parent(nub));
          if (match.isMatch()) {
            // replace implicit name with match
            LOG.debug("Replace implicit name {} with {}", nub.parsedName.fullName(), match.usage.parsedName.fullName());
            //db.store(nub);
          }
        } catch (IgnoreSourceUsageException e) {
          LOG.warn(e.name, e);
        }
      }
      tx.success();
    }
  }

  private void swapName(NubUsage target, NubUsage source) {
    db.transferChildren(target, source);
  }


  /**
   * @return true if basionym relationship was created
   */
  private boolean createBasionymRelationIfNotExisting(Node basionym, Node n) {
    if (!basionym.equals(n) && !n.hasRelationship(RelType.BASIONYM_OF, Direction.BOTH)) {
      basionym.createRelationshipTo(n, RelType.BASIONYM_OF);
      basionym.addLabel(Labels.BASIONYM);
      return true;
    }
    return false;
  }

  private NubUsage processSourceUsage(SrcUsage u, Origin origin, NubUsage parent) throws IgnoreSourceUsageException {
    Preconditions.checkNotNull(u.status);
    Preconditions.checkArgument(parent.status.isAccepted());
    // try to parse name
    addParsedNameIfNull(u);
    // match to existing usages
    NubUsageMatch match = db.findNubUsage(currSrc.key, u, parents.nubKingdom(), parent);

    // process only usages not to be ignored and with desired ranks
    if (!match.ignore && u.rank != null && allowedRanks.contains(u.rank)) {
      // from now on a rank is guaranteed!

      if (!match.isMatch()) {

        // remember if we had a doubtful match
        NubUsage doubtful = match.doubtfulUsage;
        // persistent new nub usage if there wasnt any yet
        match = createNubUsage(u, origin, parent);
        // check if we had a doubtful or implicit & accepted name match
        if (doubtful != null && u.status == TaxonomicStatus.ACCEPTED) {
          db.transferChildren(match.usage, doubtful);
        }

      } else {
        if (origin == Origin.IMPLICIT_NAME) {
          // do not update or change usages with implicit names
          return match.usage;
        }

        Equality authorEq = authorComparator.compare(match.usage.parsedName, u.parsedName);

        if (match.usage.status.isSynonym() == u.status.isSynonym()) {
          // update nub usage if status matches
          updateNub(match.usage, u, origin, parent);

        } else if (Equality.DIFFERENT == authorEq) {
          // persistent new nub usage with different status and authorship as before
          match = createNubUsage(u, origin, parent);

        } else if (fromCurrentSource(match.usage) && !u.status.isSynonym()) {
          // prefer accepted over synonym if from the same source
          LOG.debug("prefer accepted {} over synonym usage from the same source", u.scientificName);
          delete(match.usage);
          match = createNubUsage(u, origin, parent);

        } else if (fromCurrentSource(match.usage) && u.parsedName.hasAuthorship() && Equality.EQUAL != authorEq) {
          // allow new synonyms with non equal authorship to be created
          match = createNubUsage(u, origin, parent);

        } else if (currSrc.nomenclator) {
          updateNomenclature(match.usage, u);

        } else {
          LOG.debug("Ignore source usage. Status {} is different from nub ({}): {}", u.status, match.usage.status, u.scientificName);
        }
      }

      if (match.isMatch()) {
        if (u.key != null) {
          // remember all original source usage key to nub id mappings per dataset
          src2NubKey.put((int)u.key, match.usage.node.getId());
        }
        if (u.originalNameKey != null) {
          // remember basionym relation.
          // Basionyms do not follow the taxnomic hierarchy, so we might not have seen some source keys yet
          // we will process all basionyms at the end of each source dataset
          basionymRels.put(match.usage.node.getId(), (int) u.originalNameKey);
        }
      }
    } else {
      LOG.debug("Ignore {} source usage: {}", u.rank, u.scientificName);
    }
    return match.usage;
  }

  private void delete(NubUsage nub) {
    for (int sourceId : nub.sourceIds) {
      src2NubKey.remove(sourceId);
    }
    basionymRels.remove(nub.node.getId());
    db.dao().delete(nub);
  }

  /**
   * Removes a taxon if it has no accepted children or synonyms
   * @return true if usage was deleted
   */
  private boolean removeTaxonIfEmpty(NubUsage u) {
    if (u != null &&
        !u.node.hasRelationship(Direction.INCOMING, RelType.SYNONYM_OF, RelType.PROPARTE_SYNONYM_OF) &&
        !u.node.hasRelationship(Direction.OUTGOING, RelType.PARENT_OF)
        ) {
      delete(u);
      return true;
    }
    return false;
  }

  private NubUsageMatch createNubUsage(SrcUsage u, Origin origin, NubUsage p) throws IgnoreSourceUsageException {
    addParsedNameIfNull(u);
    // if this is a synonym but the parent is not part of the nub (e.g. cause its a placeholder name) ignore it!
    // http://dev.gbif.org/issues/browse/POR-2990
    if (u.status.isSynonym() && !parents.parentInNub()) {
      throw new IgnoreSourceUsageException("Ignoring synonym as accepted parent is not part of the nub", u.scientificName);
    }
    // ignore synonyms of low rank for higher taxa
    // http://dev.gbif.org/issues/browse/POR-3169
    if (u.status.isSynonym() && !u.rank.higherThan(Rank.GENUS) && p.rank.higherThan(Rank.FAMILY)) {
      String message = String.format("Ignoring %s synonym %s for %s %s", u.rank, u.scientificName, p.rank, p.parsedName.fullName());
      throw new IgnoreSourceUsageException(message, u.scientificName);
    }
    // make sure parent is accepted
    if (p.status.isSynonym()) {
      LOG.warn("Parent {} of {} is a synonym", p.parsedName.canonicalNameComplete(), u.parsedName.canonicalNameComplete());
      throw new IllegalStateException("Parent is a synonym"+u.scientificName);
    }

    // make sure we have a parsed genus to deal with implicit names and the kingdom is not viruses as these have no structured name
    if (p.kingdom != Kingdom.VIRUSES) {

      if (u.status.isAccepted()) {
        // skip badly organized rank hierarchies
        if (!p.rank.higherThan(u.rank)) {
          LOG.warn("Source {} {} with inversed parent {} {}", u.rank, u.scientificName, p.rank, p.parsedName.canonicalNameComplete());
          throw new IgnoreSourceUsageException("Ignoring source with inverted rank order", u.scientificName);
        }

        // we want the parent of any infraspecies ranks to be the species
        if (p.rank.isInfraspecific()) {
          p = findParentSpecies(p);
        }

        // check if implicit species or genus parents are needed
        SrcUsage implicit = new SrcUsage();
        NubUsage implicitParent = null;
        try {
          if (u.parsedName.getGenusOrAbove() != null) {
            if (u.rank == Rank.SPECIES && p.rank != Rank.GENUS) {
              implicit.rank = Rank.GENUS;
              implicit.scientificName = u.parsedName.getGenusOrAbove();
              implicit.status = TaxonomicStatus.DOUBTFUL;
              implicitParent = processSourceUsage(implicit, Origin.IMPLICIT_NAME, p);

            } else if (u.rank.isInfraspecific() && p.rank != Rank.SPECIES) {
              implicit.rank = Rank.SPECIES;
              implicit.scientificName = u.parsedName.canonicalSpeciesName();
              implicit.status = TaxonomicStatus.DOUBTFUL;
              implicitParent = processSourceUsage(implicit, Origin.IMPLICIT_NAME, p);
            }
          } else {
            LOG.warn("Missing genus in parsed name for {}", u.scientificName);
          }

        } catch (IgnoreSourceUsageException e) {
          LOG.warn("Ignore implicit {} {}", implicit.rank, implicit.scientificName);

        } catch (Exception e) {
          LOG.error("Failed to persistent implicit {} {}", implicit.rank, implicit.scientificName, e);
        }

        if (implicitParent != null) {
          // in case the implicit parent species is a synonym turn the infraspecies also into a synonym
          if (implicitParent.status.isSynonym() && implicitParent.rank == Rank.SPECIES) {
            // http://dev.gbif.org/issues/browse/POR-2780
            u.status = TaxonomicStatus.SYNONYM;
            p = db.parent(implicitParent);
          } else {
            // use the implicit parent
            p = implicitParent;
          }
        }

      } else {
        // a synonym
        // avoid cases where synonyms for a binnomial are monomials of rank genus or even higher!
        if (p.parsedName.isBinomial() && u.rank.ordinal() < Rank.INFRAGENERIC_NAME.ordinal()) {
          LOG.warn("Source synonym {} {} with accepted binomial name {} {}", u.rank, u.scientificName, p.rank, p.parsedName.canonicalNameComplete());
          throw new IgnoreSourceUsageException("Ignoring source with inverted rank order", u.scientificName);
        }

      }
    }
    // add to nub db
    return NubUsageMatch.match(db.addUsage(p, u, origin, currSrc.key));
  }

  /**
   * moves up the parent_of rels to the species or first taxon above.
   * Returns original usage in case rank was at species level or above already
   */
  private NubUsage findParentSpecies(NubUsage p) {
    while (p.rank.isInfraspecific()) {
      p = db.parent(p);
    }
    return p;
  }

  private void addParsedNameIfNull(SrcUsage u) throws IgnoreSourceUsageException {
    if (u.parsedName == null) {
      try {
        u.parsedName = parser.parse(u.scientificName, u.rank);
        // avoid indet names
        if (ignoredNameTypes.contains(u.parsedName.getType())) {
          throw new IgnoreSourceUsageException("Ignore " + u.parsedName.getType() + " name", u.scientificName);
        }
        // avoid incomplete names
        if ((!StringUtils.isBlank(u.parsedName.getInfraSpecificEpithet()) && StringUtils.isBlank(u.parsedName.getSpecificEpithet()))
            || !StringUtils.isBlank(u.parsedName.getSpecificEpithet()) && StringUtils.isBlank(u.parsedName.getGenusOrAbove())) {
          throw new IgnoreSourceUsageException("Ignore incomplete name", u.scientificName);
        }
        // avoid taxon concept names
        if (!StringUtils.isBlank(u.parsedName.getSensu())) {
          throw new IgnoreSourceUsageException("Ignore taxon concept names", u.scientificName);
        }
        // avoid names with nulls in epithets
        if ("null".equals(u.parsedName.getSpecificEpithet()) || "null".equals(u.parsedName.getInfraSpecificEpithet())) {
          throw new IgnoreSourceUsageException("Ignore names with null epithets", u.scientificName);
        }
        // consider infraspecific names subspecies
        if (u.parsedName.getRank() == Rank.INFRASPECIFIC_NAME && u.parsedName.isBinomial() && u.parsedName.getInfraSpecificEpithet() != null) {
          u.parsedName.setRank(Rank.SUBSPECIES);
        }
        // consider parsed rank only for bi/trinomials
        Rank pRank = u.parsedName.isBinomial() ? u.parsedName.getRank() : null;
        if (pRank != null && pRank != u.rank && !pRank.isUncomparable()) {
          if (u.rank == null) {
            LOG.debug("Use parsed rank {}", pRank);
            u.rank = pRank;
          } else if (u.rank.isUncomparable()) {
            LOG.debug("Prefer parsed rank {} over {}", pRank, u.rank);
            u.rank = pRank;
          } else {
            LOG.debug("Rank {} does not match parsed rank {}. Ignore {}", u.rank, pRank, u.scientificName);
            throw new IgnoreSourceUsageException("Parsed rank mismatch", u.scientificName);
          }

        } else if (Rank.INFRAGENERIC_NAME ==  u.rank && u.parsedName.isBinomial()) {
          // this is an aggregate species rank as we have a binomial & rank=INFRAGENERIC - treat as a species!
          u.rank = Rank.SPECIES;
          LOG.debug("Treat infrageneric name {} as species", u.scientificName);
        }

        // strip author names from higher taxa
        if (u.rank != null && u.rank.higherThan(Rank.GENUS)) {
          clearAuthorship(u.parsedName);
        }

        //TODO: rebuild name in canonical form - e.g. removes subgenus references
        //u.parsedName.setScientificName(u.parsedName.canonicalNameComplete());


      } catch (UnparsableException e) {
        // allow virus names in the nub
        if (e.type == NameType.VIRUS) {
          u.parsedName = new ParsedName();
          u.parsedName.setScientificName(u.scientificName);
          u.parsedName.setType(e.type);
        } else {
          throw new IgnoreSourceUsageException("Unparsable " + e.type, u.scientificName);
        }
      }
    }
  }

  private void clearAuthorship(ParsedName pn) {
    pn.setAuthorship(null);
    pn.setYear(null);
    pn.setBracketAuthorship(null);
    pn.setBracketYear(null);
  }

  private void updateNomenclature(NubUsage nub, SrcUsage u) {
    LOG.debug("Updating nomenclature for {} from source {}", nub.parsedName.getScientificName(), u.parsedName.getScientificName());
    // authorship
    if (!u.parsedName.authorshipComplete().isEmpty() && (nub.parsedName.authorshipComplete().isEmpty() || currSrc.nomenclator)) {
      nub.parsedName.setAuthorship(u.parsedName.getAuthorship());
      nub.parsedName.setYear(u.parsedName.getYear());
      nub.parsedName.setBracketAuthorship(u.parsedName.getBracketAuthorship());
      nub.parsedName.setBracketYear(u.parsedName.getBracketYear());
      nub.parsedName.setAuthorsParsed(true);
      nub.parsedName.setScientificName(u.parsedName.canonicalNameComplete());
    }

    // publishedIn
    if (u.publishedIn != null && (nub.publishedIn == null || currSrc.nomenclator)) {
      nub.publishedIn = u.publishedIn;
    }

    // nom status
    if (u.nomStatus != null && u.nomStatus.length > 0 && (nub.nomStatus.isEmpty() || currSrc.nomenclator)) {
      nub.nomStatus = Sets.newHashSet(u.nomStatus);
    }
  }

  private void updateNub(NubUsage nub, SrcUsage u, Origin origin, NubUsage parent) {
    LOG.debug("Updating {} from source {}", nub.parsedName.getScientificName(), u.parsedName.getScientificName());
    NubUsage currNubParent = db.parent(nub);

    // update nomenclature and status only from source usages
    if (u.key != null) {
      nub.sourceIds.add(u.key);
      // update author, publication and nom status
      updateNomenclature(nub, u);
      // prefer accepted version over doubtful if its coming from the same dataset!
      if (nub.status == TaxonomicStatus.DOUBTFUL && u.status == TaxonomicStatus.ACCEPTED && fromCurrentSource(nub)) {
        nub.status = u.status;
        if (isNewParentApplicable(nub, currNubParent, parent) && !db.existsInClassification(currNubParent.node, parent.node, false)) {
          // current classification doesnt have that parent yet, lets apply it
          LOG.debug("Update doubtful {} classification with new parent {} {}", nub.parsedName.getScientificName(), parent.rank, parent.parsedName.getScientificName());
          db.createParentRelation(nub, parent);
        }
      }
      if (origin == Origin.SOURCE) {
        // only override original origin value if we update from a true source
        nub.origin = Origin.SOURCE;
      }
    }

    if (nub.status.isSynonym()) {
      // maybe we have a proparte synonym from the same dataset?
      if (fromCurrentSource(nub) && !parent.node.equals(currNubParent.node)) {
        nub.status = TaxonomicStatus.PROPARTE_SYNONYM;
        // persistent new pro parte relation
        LOG.debug("New accepted name {} found for pro parte synonym {}", parent.parsedName.getScientificName(), nub.parsedName.getScientificName());
        db.setSingleFromRelationship(nub.node, parent.node, RelType.PROPARTE_SYNONYM_OF);

      } else {
        // this might be a more exact kind of synonym status
        if (nub.status == TaxonomicStatus.SYNONYM) {
          nub.status = u.status;
        }
      }

    } else {
      // ACCEPTED
      if (isNewParentApplicable(nub, currNubParent, parent) &&
          (currNubParent.kingdom == Kingdom.INCERTAE_SEDIS
            || db.existsInClassification(parent.node, currNubParent.node, false)  && currNubParent.rank != parent.rank
          )
        ) {
        LOG.debug("Update {} classification with new parent {} {}", nub.parsedName.getScientificName(), parent.rank, parent.parsedName.getScientificName());
        db.createParentRelation(nub, parent);
      }
    }
    db.store(nub);
  }

  private boolean isNewParentApplicable(NubUsage nub, NubUsage currParent, NubUsage newParent) {
    return newParent != null
        && !currParent.equals(newParent)
        && (currParent.rank.higherThan(newParent.rank) || currParent.rank == newParent.rank)
        && newParent.rank.higherThan(nub.rank);
  }

  private boolean fromCurrentSource(NubUsage nub) {
    return nub.datasetKey.equals(currSrc.key);
  }

  private void groupByBasionym() {
    if (cfg.groupBasionyms) {
      LOG.info("Start basionym consolidation");
      verifyBasionyms();
      detectBasionyms();
      consolidateBasionymGroups();
    } else {
      LOG.info("Skip basionym consolidation");
    }
  }

  /**
   * Verifies existing basionyms by checking that the basionym does not have an original author
   */
  private void verifyBasionyms() {
    LOG.info("Verify existing basionyms - TO BE IMPLEMENTED!");
  }

  /**
   * Make sure we only have at most one accepted name for each homotypical basionym group!
   * An entire group can consist of synonyms without a problem, but they must all refer to the same accepted name.
   * If a previously accepted name needs to be turned into a synonym it will be of type homotypical_synonym.
   *
   * As we merge names from different taxonomies it is possible there are multiple accepted names (maybe via a synonym relation) in such a group.
   * We always stick to the first combination with the highest priority and make all others
   * a) synonyms of this if it is accepted
   * b) synonyms of the primary's accepted name if it was a synonym itself
   *
   * In case of conflicting accepted names we also flag these names with CONFLICTING_BASIONYM_COMBINATION
   */
  private void consolidateBasionymGroups() {
    int counter = 0;
    int counterModified = 0;
    // first load all basionym node ids into a set so we can process them individually in separate transactions
    LongSet basIds = new LongOpenHashSet();
    try (Transaction tx = db.beginTx()) {
      for (Node bas : Iterators.loop(db.dao().allBasionyms())) {
        basIds.add(bas.getId());
      }
      LOG.info("Found {} basionyms to consolidate", basIds.size());
    }
    // now consolidate each basionym group in its own transaction
    for (long basId : basIds) {
      try (Transaction tx = db.beginTx()) {
        Node bas = db.getNode(basId);

        counter++;
        // sort all usage by source dataset priority, placing doubtful names last
        List<NubUsage> group = db.listBasionymGroup(bas);
        if (group.size() > 1) {
          // we stick to the first combination with the highest priority and make all others
          //  a) synonyms of this if it is accepted
          //  b) synonyms of the primary's accepted name if it was a synonym itself
          // if there are several usages with the same priority select one according to some defined rules
          final NubUsage primary = findPrimaryUsage(group);
          // get the accepted usage in case of synonyms
          final NubUsage accepted = primary.status.isSynonym() ? db.parent(primary) : primary;
          final TaxonomicStatus synStatus = primary.status.isSynonym() ? primary.status : TaxonomicStatus.HOMOTYPIC_SYNONYM;
          Set<Node> parents = ImmutableSet.copyOf(db.parents(accepted.node));

          LOG.debug("Consolidating basionym group with {} primary usage {}: {}", primary.status, primary.parsedName.canonicalNameComplete(), names(group));
          int modified = 0;
          for (NubUsage u : group) {
            if (u.equals(primary)) continue;
            if (parents.contains(u.node)) {
              LOG.debug("Exclude parent {} from basionym consolidation of {}", u.parsedName.canonicalNameComplete(), primary.parsedName.canonicalNameComplete());

            } else if (!hasAccepted(u, accepted)) {
              modified++;
              NubUsage previousParent = db.parent(u);
              if (previousParent != null) {
                u.addRemark(String.format("Originally found in sources as %s %s %s", u.status.toString().toLowerCase().replaceAll("_", " "),
                    u.status.isSynonym() ? "of" : "taxon within", previousParent.parsedName.canonicalNameComplete())
                );
              }
              db.convertToSynonym(u, accepted, synStatus, NameUsageIssue.CONFLICTING_BASIONYM_COMBINATION);
            }
          }
          counterModified = counterModified + modified;
        }
        tx.success();

      } catch (NotFoundException e) {
        LOG.info("Basionym {} was removed. Ignore for consolidation", basId, e);
      }
    }
    LOG.info("Consolidated {} usages from {} basionyms in total", counterModified, counter);
  }

  private int priority(NubUsage usage) {
    return priorities.containsKey(usage.datasetKey) ? priorities.get(usage.datasetKey) : maxPriority + 1;
  }

  /**
   * From a list of equally trusted usages of a basionym group select one primary usage to trust most.
   * Selection follows the order:
   * 1) the accepted usage which most usages link to (from a synonym)
   * 2) status synonym > accepted > doubtful
   * 3) highest rank
   */
  private NubUsage findPrimaryUsage(List<NubUsage> basionymGroup) {
    if (basionymGroup == null || basionymGroup.isEmpty()) {
      return null;
    }
    // a single usage only
    if (basionymGroup.size() == 1) {
      return basionymGroup.get(0);
    }
    // keep shringing this list until we get one!
    List<NubUsage> candidates = Lists.newArrayList();

    // 1. by dataset priority
    int highestPriority = Integer.MAX_VALUE;
    for (NubUsage u : basionymGroup) {
      int datasetPriority = priority(u);
      if (datasetPriority < highestPriority) {
        highestPriority = datasetPriority;
      }
    }
    for (NubUsage u : basionymGroup) {
      if (priority(u) == highestPriority) {
        candidates.add(u);
      }
    }

    if (candidates.size() > 1) {
      // 2. accepted with most usages
      Map<NubUsage, NubUsage> u2acc= Maps.newHashMap();
      Map<NubUsage, Integer> accCounts = Maps.newHashMap();
      for (NubUsage u : candidates) {
        final NubUsage accepted = u.status.isSynonym() ? db.parent(u) : u;
        u2acc.put(u, accepted);
        if (!accCounts.containsKey(accepted)) {
          accCounts.put(accepted, 1);
        } else {
          accCounts.put(accepted, accCounts.get(accepted)+1);
        }
      }
      int maxCount = MapUtils.sortByValue(accCounts, Ordering.<Integer>natural().reverse()).values().iterator().next();
      Iterator<NubUsage> iter = candidates.iterator();
      while (iter.hasNext()) {
        NubUsage u = iter.next();
        if (accCounts.get(u2acc.get(u)) != maxCount) {
          iter.remove();
        }
      }

      if (candidates.size() > 1) {
        // 3. by status: syn > acc > doubtful
        Map<NubUsage, Integer> score = Maps.newHashMap();
        for (NubUsage u : candidates) {
          score.put(u, MapUtils.getOrDefault(STATUS_ORDER, u.status, 10));
        }
        int maxScore = MapUtils.sortByValue(score).values().iterator().next();
        iter = candidates.iterator();
        while (iter.hasNext()) {
          NubUsage u = iter.next();
          if (score.get(u) != maxScore) {
            iter.remove();
          }
        }

        if (candidates.size() > 1) {
          // 4. by higher rank
          List<Rank> ranks = Lists.newArrayList();
          for (NubUsage u : candidates) {
            ranks.add(u.rank);
          }
          Collections.sort(ranks);
          Rank minRank = ranks.get(0);
          iter = candidates.iterator();
          while (iter.hasNext()) {
            NubUsage u = iter.next();
            if (minRank != u.rank) {
              iter.remove();
            }
          }
        }
      }
    }

    return candidates.get(0);
  }

  /**
   * @return true of the given usage u has a SYNONYM_OF relation to the given acc usage
   */
  private boolean hasAccepted(NubUsage u, NubUsage acc) {
    if (u.node.equals(acc.node)) return true;
    try (ResourceIterator<Node> iter = Traversals.ACCEPTED.traverse(u.node).nodes().iterator()) {
      while (iter.hasNext()) {
        if (iter.next().equals(acc.node)) {
          return true;
        }
      }
    }
    return false;
  }

  private String names(Collection<NubUsage> usages) {
    return SEMICOLON_JOIN.join(usages.stream().map(u -> u.parsedName.fullName()).iterator());
  }

  /**
   * Assigns a unique usageKey to all nodes by matching a usage to the previous backbone to keep stable identifiers.
   */
  private void assignUsageKeys() {
    LOG.info("Assigning final clb ids to all nub usages...");
    for (Map.Entry<Long, NubUsage> entry : db.dao().nubUsages()) {
      NubUsage u = entry.getValue();
      if (u.rank != Rank.KINGDOM) {
        u.usageKey = idGen.issue(u.parsedName.canonicalName(), u.parsedName.getAuthorship(), u.parsedName.getYear(), u.rank, u.kingdom);
        db.dao().update(entry.getKey(), u);
      }
    }

    // for pro parte synonyms we need to assign extra keys, one per relation!
    // http://dev.gbif.org/issues/browse/POR-2872
    try (Transaction tx = db.beginTx()) {
      try (ResourceIterator<Relationship> rels = db.dao().listAllRelationships(RelType.PROPARTE_SYNONYM_OF)) {
        while (rels.hasNext()) {
          Relationship rel = rels.next();
          NubUsage u = db.dao().readNub(rel.getStartNode());
          NubUsage acc = db.dao().readNub(rel.getEndNode());
          if (acc.usageKey <= 0) {
            LOG.warn("No usage key assigned to {}", acc);
          }
          int ppKey = idGen.issue(u.parsedName.canonicalName(), u.parsedName.getAuthorship(), u.parsedName.getYear(), u.rank, u.kingdom, acc.usageKey);
          LOG.debug("Assign id {} for pro parte relation of primary usage {} {}", ppKey, u.usageKey, u.parsedName.canonicalNameComplete());
          rel.setProperty(NeoProperties.USAGE_KEY, ppKey);
        }
      }
      tx.success();
    }
  }

  private void builtUsageMetrics() {
    LOG.info("Walk all accepted taxa and build usage metrics");
    UsageMetricsHandler metricsHandler = new UsageMetricsHandler(db.dao());
    // TaxonWalker deals with transactions
    TreeWalker.walkAcceptedTree(db.dao().getNeo(), metricsHandler);
    NormalizerStats normalizerStats = metricsHandler.getStats(0, null);
    LOG.info("Walked all taxa (root={}, total={}, synonyms={}) and built usage metrics", normalizerStats.getRoots(), normalizerStats.getCount(), normalizerStats.getSynonyms());
  }

}