package org.gbif.occurrence.persistence; import org.gbif.api.vocabulary.EndpointType; import org.gbif.api.vocabulary.OccurrenceSchemaType; import org.gbif.occurrence.common.config.OccHBaseConfiguration; import org.gbif.occurrence.common.identifier.PublisherProvidedUniqueIdentifier; import org.gbif.occurrence.common.identifier.UniqueIdentifier; import org.gbif.occurrence.persistence.api.Fragment; import org.gbif.occurrence.persistence.api.FragmentPersistenceService; import org.gbif.occurrence.persistence.keygen.HBaseLockingKeyService; import java.io.IOException; import java.util.Date; import java.util.List; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import com.google.common.base.Charsets; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; /** * Note not a real JUnit test, but an extremely expensive performance test that should use the real cluster. */ public class FragmentPersistenceImplThroughputTest { private static final OccHBaseConfiguration CFG = new OccHBaseConfiguration(); static { CFG.setEnvironment("keygen_test"); } private final FragmentPersistenceServiceImpl fragService; private static final AtomicInteger fragsPersisted = new AtomicInteger(0); public FragmentPersistenceImplThroughputTest(int hbasePoolSize) throws IOException { Configuration hBaseConfiguration = HBaseConfiguration.create(); hBaseConfiguration.set("hbase.hconnection.threads.max", Integer.toString(hbasePoolSize)); Connection connection = ConnectionFactory.createConnection(hBaseConfiguration); HBaseLockingKeyService keyService = new HBaseLockingKeyService(CFG, connection); fragService = new FragmentPersistenceServiceImpl(CFG, connection, new OccurrenceKeyPersistenceServiceImpl(keyService)); } public void testNoContention(int threadCount) throws InterruptedException { // test generating ids as fast as possible in the ideal case of no waiting for contention (all ids are globally // unique) int genPerThread = 10000; List<Thread> threads = Lists.newArrayList(); for (int i = 0; i < threadCount; i++) { Thread thread = new Thread(new FragmentPersister(fragService, UUID.randomUUID(), genPerThread)); thread.start(); threads.add(thread); } Thread rateReporter = new Thread(new RateReporter(threadCount)); rateReporter.start(); for (Thread thread : threads) { thread.join(); } rateReporter.interrupt(); rateReporter.join(); } private static class RateReporter implements Runnable { private final int threadCount; private RateReporter(int threadCount) { this.threadCount = threadCount; } @Override public void run() { int periods = 0; int runningAvg = 0; int buildAverageAfter = 15; int lastCount = 0; boolean interrupted = false; while (!interrupted) { try { TimeUnit.SECONDS.sleep(1); } catch (InterruptedException e) { interrupted = true; } int generated = fragsPersisted.intValue() - lastCount; if (periods > buildAverageAfter) { if (runningAvg == 0) { runningAvg = generated; } else { int netPeriods = periods - buildAverageAfter; runningAvg = (netPeriods * runningAvg + generated) / (netPeriods + 1); } System.out.println("Frags persisted at [" + generated + " frags/s] for running avg of [" + runningAvg + " frags/s] and per thread [" + (runningAvg / threadCount) + " frags/sec] with frag persist time of [" + (threadCount * 1000 / runningAvg) + " ms/frag]"); } else { System.out.println("Stats in [" + (buildAverageAfter - periods) + "] seconds."); } periods++; lastCount = fragsPersisted.intValue(); } } } private static class FragmentPersister implements Runnable { private final FragmentPersistenceService fragService; private final UUID datasetKey; private final int genCount; private final Fragment fragment; private FragmentPersister(FragmentPersistenceService fragService, UUID datasetKey, int genCount) { this.fragService = fragService; this.datasetKey = datasetKey; this.genCount = genCount; String xml = "<DarwinRecord>\n" + " <GlobalUniqueIdentifier>ZMA:Entomology:Diptera_Tipulidae_NL_TEMP_09183</GlobalUniqueIdentifier>\n" + " <DateLastModified>2007-05-02</DateLastModified>\n" + " <BasisOfRecord>Museum specimen</BasisOfRecord>\n" + " <InstitutionCode>ZMA</InstitutionCode>\n" + " <CollectionCode>Tipulidae</CollectionCode>\n" + " <CatalogNumber>TEMP_09183</CatalogNumber>\n" + " <ScientificName>Ctenophora (Cnemoncosis) festiva Meigen, 1804</ScientificName>\n" + " <Kingdom>Animalia</Kingdom>\n" + " <Phylum nil=\"true\"/>\n" + " <Class>Insecta</Class>\n" + " <Order nil=\"true\"/>\n" + " <Family>Tipulidae</Family>\n" + " <Genus>Ctenophora</Genus>\n" + " <SpecificEpithet>festiva</SpecificEpithet>\n" + " <InfraspecificEpithet nil=\"true\"/>\n" + " <Continent nil=\"true\"/>\n" + " <WaterBody nil=\"true\"/>\n" + " <Country>Netherlands</Country>\n" + " <StateProvince>Utrecht</StateProvince>\n" + " <Locality>Leusden</Locality>\n" + " <MinimumElevationInMeters nil=\"true\"/>\n" + " <MaximumElevationInMeters nil=\"true\"/>\n" + " <MinimumDepthInMeters nil=\"true\"/>\n" + " <MaximumDepthInMeters nil=\"true\"/>\n" + " <CollectingMethod>unrecorded</CollectingMethod>\n" + " <EarliestDateCollected>1985-06-08 00:00:00</EarliestDateCollected>\n" + " <LatestDateCollected>1985-06-08 00:00:00</LatestDateCollected>\n" + " <DayOfYear nil=\"true\"/>\n" + " <Collector>Zeegers, T.</Collector>\n" + " <DecimalLatitude nil=\"true\"/>\n" + " <DecimalLongitude nil=\"true\"/>\n" + " <CoordinateUncertaintyInMeters nil=\"true\"/>\n" + " <Preparations>dry pinned</Preparations>\n" + " <TypeStatus nil=\"true\"/>\n" + "</DarwinRecord>\n"; byte[] hash = DigestUtils.md5(xml); fragment = new Fragment(datasetKey, xml.getBytes(Charsets.UTF_8), hash, Fragment.FragmentType.XML, EndpointType.DIGIR, new Date(), 1, OccurrenceSchemaType.DWC_MANIS, null, System.currentTimeMillis()); } @Override public void run() { for (int i = 0; i < genCount; i++) { UniqueIdentifier unique = new PublisherProvidedUniqueIdentifier(datasetKey, String.valueOf(i)); fragService.insert(fragment, ImmutableSet.of(unique)); // fragment.setKey(i); // fragService.update(fragment); fragsPersisted.incrementAndGet(); } } } public static void main(String[] args) throws InterruptedException, IOException { // following stats from single regionserver, single region // on pure insert, raw rate is ~3k/sec // on pure update where some fields null, raw rate is ~8k/sec // on pure update where no fields null, raw rate is ~15k/sec int hbasePoolSize = 100; int persistingThreads = 100; if (args.length == 2) { hbasePoolSize = Integer.valueOf(args[0]); persistingThreads = Integer.valueOf(args[1]); } System.out .println("Running test with hbasePool [" + hbasePoolSize + "] and persistingThreads [" + persistingThreads + "]"); FragmentPersistenceImplThroughputTest instance = new FragmentPersistenceImplThroughputTest(hbasePoolSize); instance.testNoContention(persistingThreads); } }