/** * Copyright (C) 2009-2013 FoundationDB, LLC * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.foundationdb.server.test.it.store; import com.foundationdb.ais.model.GroupIndex; import com.foundationdb.ais.model.Index; import com.foundationdb.ais.model.TableName; import com.foundationdb.server.service.transaction.TransactionService.CloseableTransaction; import com.foundationdb.server.store.statistics.*; import com.foundationdb.server.store.statistics.histograms.Sampler; import com.foundationdb.server.test.it.ITBase; import com.foundationdb.util.AssertUtils; import org.junit.Before; import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import static org.junit.Assert.assertEquals; public final class IndexHistogramsIT extends ITBase { @Test public void customersPk() { validateHistogram("customers", PK, 1, entry("{\"0000\"}", 1, 0, 0), entry("{\"0010\"}", 1, 9, 9), entry("{\"0020\"}", 1, 9, 9), entry("{\"0030\"}", 1, 9, 9), entry("{\"0041\"}", 1, 10, 10), entry("{\"0051\"}", 1, 9, 9), entry("{\"0061\"}", 1, 9, 9), entry("{\"0072\"}", 1, 10, 10), entry("{\"0082\"}", 1, 9, 9), entry("{\"0092\"}", 1, 9, 9), entry("{\"0103\"}", 1, 10, 10), entry("{\"0113\"}", 1, 9, 9), entry("{\"0123\"}", 1, 9, 9), entry("{\"0134\"}", 1, 10, 10), entry("{\"0144\"}", 1, 9, 9), entry("{\"0154\"}", 1, 9, 9), entry("{\"0165\"}", 1, 10, 10), entry("{\"0175\"}", 1, 9, 9), entry("{\"0185\"}", 1, 9, 9), entry("{\"0196\"}", 1, 10, 10), entry("{\"0206\"}", 1, 9, 9), entry("{\"0216\"}", 1, 9, 9), entry("{\"0227\"}", 1, 10, 10), entry("{\"0237\"}", 1, 9, 9), entry("{\"0247\"}", 1, 9, 9), entry("{\"0258\"}", 1, 10, 10), entry("{\"0268\"}", 1, 9, 9), entry("{\"0278\"}", 1, 9, 9), entry("{\"0289\"}", 1, 10, 10), entry("{\"0299\"}", 1, 9, 9), entry("{\"0309\"}", 1, 9, 9), entry("{\"0319\"}", 1, 9, 9)); } @Test public void customersNameDob_1() { // 320 customers with 4 names, evenly distributed, is 80 customers per name validateHistogram("customers", "name_dob", 1, entry("{null}", 80, 0, 0), entry("{\"Bob\"}", 80, 0, 0), entry("{\"Carla\"}", 80, 0, 0), entry("{\"Dot\"}", 80, 0, 0) ); } @Test public void customersNameDob_2() { // 320 customers with 4 names, evenly distributed, is 80 customers per name // dob is 1983 if cid is divisible by 4, otherwise it's 1900 + (cid / 2). // Since both patterns have 4, that means all 80 null-names will have 1983 (they're all with cids divisible by // 4), and the rest will be striped evenly. Each bucket will represent 7 or 8 entries. // This algorithm is a bit tricky to pin down, so I'm putting the results in manually HistogramEntryDescription[] expected = new HistogramEntryDescription[32]; int i = 0; expected[i++] = entry("{null,\"1983\"}", 80, 0, 0); expected[i++] = entry("{\"Bob\",\"1914\"}", 1, 7, 7); expected[i++] = entry("{\"Bob\",\"1930\"}", 1, 7, 7); expected[i++] = entry("{\"Bob\",\"1946\"}", 1, 7, 7); expected[i++] = entry("{\"Bob\",\"1960\"}", 1, 6, 6); expected[i++] = entry("{\"Bob\",\"1976\"}", 1, 7, 7); expected[i++] = entry("{\"Bob\",\"1992\"}", 1, 7, 7); expected[i++] = entry("{\"Bob\",\"2008\"}", 1, 7, 7); expected[i++] = entry("{\"Bob\",\"2022\"}", 1, 6, 6); expected[i++] = entry("{\"Bob\",\"2038\"}", 1, 7, 7); expected[i++] = entry("{\"Bob\",\"2054\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"1911\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"1925\"}", 1, 6, 6); expected[i++] = entry("{\"Carla\",\"1941\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"1957\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"1973\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"1987\"}", 1, 6, 6); expected[i++] = entry("{\"Carla\",\"2003\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"2019\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"2035\"}", 1, 7, 7); expected[i++] = entry("{\"Carla\",\"2049\"}", 1, 6, 6); expected[i++] = entry("{\"Dot\",\"1905\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"1921\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"1937\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"1951\"}", 1, 6, 6); expected[i++] = entry("{\"Dot\",\"1967\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"1983\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"1999\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"2013\"}", 1, 6, 6); expected[i++] = entry("{\"Dot\",\"2029\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"2045\"}", 1, 7, 7); expected[i++] = entry("{\"Dot\",\"2059\"}", 1, 6, 6); validateHistogram("customers", "name_dob", 2, expected); } @Test public void ordersPk() { validateHistogram("orders", PK, 1, entry("{(long)0}", 1, 0, 0), entry("{(long)30}", 1, 29, 29), entry("{(long)61}", 1, 30, 30), entry("{(long)92}", 1, 30, 30), entry("{(long)123}", 1, 30, 30), entry("{(long)154}", 1, 30, 30), entry("{(long)185}", 1, 30, 30), entry("{(long)216}", 1, 30, 30), entry("{(long)247}", 1, 30, 30), entry("{(long)278}", 1, 30, 30), entry("{(long)309}", 1, 30, 30), entry("{(long)340}", 1, 30, 30), entry("{(long)371}", 1, 30, 30), entry("{(long)402}", 1, 30, 30), entry("{(long)433}", 1, 30, 30), entry("{(long)464}", 1, 30, 30), entry("{(long)495}", 1, 30, 30), entry("{(long)526}", 1, 30, 30), entry("{(long)557}", 1, 30, 30), entry("{(long)588}", 1, 30, 30), entry("{(long)619}", 1, 30, 30), entry("{(long)650}", 1, 30, 30), entry("{(long)681}", 1, 30, 30), entry("{(long)712}", 1, 30, 30), entry("{(long)743}", 1, 30, 30), entry("{(long)774}", 1, 30, 30), entry("{(long)805}", 1, 30, 30), entry("{(long)836}", 1, 30, 30), entry("{(long)867}", 1, 30, 30), entry("{(long)898}", 1, 30, 30), entry("{(long)929}", 1, 30, 30), entry("{(long)959}", 1, 29, 29)); } @Test public void ordersPlaced() { List<HistogramEntryDescription> expected = new ArrayList<>(); // div-by-5s first expected.add(entry("{\"0000\"}", 120, 0, 0)); // build up every 20 entries, plus an entry for the end int lessThans = 0; int lessThansOffset = 0; int max = CUSTOMERS_COUNT * ORDERS_COUNT; for(int oid=0; oid < max; ++oid) { if (oid == 5) { expected.add(entry("{\"0005\"}", 121, lessThans, lessThans)); lessThansOffset = lessThans; continue; } if (divisibleBy(oid, 4)) continue; boolean hasEntry = (oid == 959) || lessThans == 23; // last value is 559, but it's divisible by 7 if (hasEntry) { int lessCount = lessThans - lessThansOffset; expected.add(entry(String.format("{\"%04d\"}", oid), 1, lessCount, lessCount)); lessThansOffset = 0; lessThans = 0; } else { ++lessThans; } } validateHistogram("orders", "placed", 1, expected.toArray(new HistogramEntryDescription[expected.size()])); } @Test public void namePlacedGI_1() { // 320*3=960 customers with 4 names, evenly distributed, is 240 customers per name validateHistogram(namePlacedGi, 1, entry("{null}", 240, 0, 0), entry("{\"Bob\"}", 240, 0, 0), entry("{\"Carla\"}", 240, 0, 0), entry("{\"Dot\"}", 240, 0, 0) ); } @Test public void namePlacedGI_2() { // I ran the tested, eyeballed the results, and copied them here. validateHistogram(namePlacedGi, 2, entry("{null,\"0000\"}", 40, 0, 0), entry("{null,\"0005\"}", 40, 2, 2), entry("{null,\"0158\"}", 1, 25, 25), entry("{null,\"0326\"}", 1, 27, 27), entry("{null,\"0493\"}", 1, 26, 26), entry("{null,\"0661\"}", 1, 27, 27), entry("{null,\"0829\"}", 1, 27, 27), entry("{\"Bob\",\"0000\"}", 40, 21, 21), entry("{\"Bob\",\"0005\"}", 41, 1, 1), entry("{\"Bob\",\"0039\"}", 1, 4, 4), entry("{\"Bob\",\"0207\"}", 1, 27, 27), entry("{\"Bob\",\"0375\"}", 1, 27, 27), entry("{\"Bob\",\"0533\"}", 1, 26, 26), entry("{\"Bob\",\"0701\"}", 1, 27, 27), entry("{\"Bob\",\"0869\"}", 1, 27, 27), entry("{\"Carla\",\"0000\"}", 40, 14, 14), entry("{\"Carla\",\"0005\"}", 40, 0, 0), entry("{\"Carla\",\"0078\"}", 1, 12, 12), entry("{\"Carla\",\"0246\"}", 1, 27, 27), entry("{\"Carla\",\"0414\"}", 1, 27, 27), entry("{\"Carla\",\"0571\"}", 1, 26, 26), entry("{\"Carla\",\"0739\"}", 1, 27, 27), entry("{\"Carla\",\"0907\"}", 1, 27, 27), entry("{\"Dot\",\"0081\"}", 1, 26, 26), entry("{\"Dot\",\"0190\"}", 1, 27, 27), entry("{\"Dot\",\"0299\"}", 1, 27, 27), entry("{\"Dot\",\"0407\"}", 1, 26, 26), entry("{\"Dot\",\"0525\"}", 1, 27, 27), entry("{\"Dot\",\"0634\"}", 1, 27, 27), entry("{\"Dot\",\"0742\"}", 1, 26, 26), entry("{\"Dot\",\"0851\"}", 1, 27, 27), entry("{\"Dot\",\"0959\"}", 1, 26, 26) ); } @Test public void edgeAnalysis() { int cTable = getTable(SCHEMA, "customers").getTableId(); int oTable = getTable(SCHEMA, "orders").getTableId(); int maxCid = bucketCount() * Sampler.OVERSAMPLE_FACTOR; insertRows(cTable, oTable, CUSTOMERS_COUNT, maxCid); validateHistogram("customers", PK, 1, entry("{\"0000\"}", 1, 0, 0), entry("{\"0051\"}", 1, 50, 50), entry("{\"0103\"}", 1, 51, 51), entry("{\"0154\"}", 1, 50, 50), entry("{\"0206\"}", 1, 51, 51), entry("{\"0258\"}", 1, 51, 51), entry("{\"0309\"}", 1, 50, 50), entry("{\"0361\"}", 1, 51, 51), entry("{\"0412\"}", 1, 50, 50), entry("{\"0464\"}", 1, 51, 51), entry("{\"0516\"}", 1, 51, 51), entry("{\"0567\"}", 1, 50, 50), entry("{\"0619\"}", 1, 51, 51), entry("{\"0670\"}", 1, 50, 50), entry("{\"0722\"}", 1, 51, 51), entry("{\"0774\"}", 1, 51, 51), entry("{\"0825\"}", 1, 50, 50), entry("{\"0877\"}", 1, 51, 51), entry("{\"0929\"}", 1, 51, 51), entry("{\"0980\"}", 1, 50, 50), entry("{\"1032\"}", 1, 51, 51), entry("{\"1083\"}", 1, 50, 50), entry("{\"1135\"}", 1, 51, 51), entry("{\"1187\"}", 1, 51, 51), entry("{\"1238\"}", 1, 50, 50), entry("{\"1290\"}", 1, 51, 51), entry("{\"1341\"}", 1, 50, 50), entry("{\"1393\"}", 1, 51, 51), entry("{\"1445\"}", 1, 51, 51), entry("{\"1496\"}", 1, 50, 50), entry("{\"1548\"}", 1, 51, 51), entry("{\"1599\"}", 1, 50, 50)); } @Test public void largeAnalysis() { int cTable = getTable(SCHEMA, "customers").getTableId(); int oTable = getTable(SCHEMA, "orders").getTableId(); int maxCid = bucketCount() * Sampler.OVERSAMPLE_FACTOR+1; insertRows(cTable, oTable, CUSTOMERS_COUNT, maxCid); validateHistogram("customers", PK, 1, entry("{\"0000\"}", 1, 0, 0), entry("{\"0051\"}", 1, 50,50), entry("{\"0103\"}", 1, 51,51), entry("{\"0154\"}", 1, 50,50), entry("{\"0206\"}", 1, 51,51), entry("{\"0258\"}", 1, 51,51), entry("{\"0309\"}", 1, 50,50), entry("{\"0361\"}", 1, 51,51), entry("{\"0413\"}", 1, 51,51), entry("{\"0464\"}", 1, 50,50), entry("{\"0516\"}", 1, 51,51), entry("{\"0568\"}", 1, 51,51), entry("{\"0619\"}", 1, 50,50), entry("{\"0671\"}", 1, 51,51), entry("{\"0723\"}", 1, 51,51), entry("{\"0774\"}", 1, 50,50), entry("{\"0826\"}", 1, 51,51), entry("{\"0877\"}", 1, 50,50), entry("{\"0929\"}", 1, 51,51), entry("{\"0981\"}", 1, 51,51), entry("{\"1032\"}", 1, 50,50), entry("{\"1084\"}", 1, 51,51), entry("{\"1136\"}", 1, 51,51), entry("{\"1187\"}", 1, 50,50), entry("{\"1239\"}", 1, 51,51), entry("{\"1291\"}", 1, 51,51), entry("{\"1342\"}", 1, 50,50), entry("{\"1394\"}", 1, 51,51), entry("{\"1446\"}", 1, 51,51), entry("{\"1497\"}", 1, 50,50), entry("{\"1549\"}", 1, 51,51), entry("{\"1600\"}", 1, 50,50)); } @Test public void oversampleNotEvenlyDistributed() { int cTable = getTable(SCHEMA, "customers").getTableId(); int oTable = getTable(SCHEMA, "orders").getTableId(); double interval = 2.02; double oversamples = bucketCount() * Sampler.OVERSAMPLE_FACTOR; int maxCid = (int) Math.round(interval * oversamples); insertRows(cTable, oTable, CUSTOMERS_COUNT, maxCid); validateHistogram("customers", PK, 1, (HistogramEntryDescription[])null); } /** * <p>Initializes and populates the database.</p> * * <p>Customers are created as: * <ul> * <li><b>cid</b> is monotonically increasing</li> * <li><b>name</b> is one of null, "Bob", "Carla" or "Dot", rotated by cid</li> * <li><b>dob_year</b> is 1983 if cid is divisible by 4, otherwise it's 1900 + (cid / 2)</li> * </ul> * </p> * * <p>There are three orders per customer, and they are created as: * <ul> * <li><b>oid</b> is monotonically increasing</li> * <li><b>cid</b> corresponds to the parent cid</li> * <li><b>placed</b> is * <ul> * <li>0 if oid is divisible by 5</li> * <li>50 if oid is divisible by 7 (and not by 5)</li> * <li>oid otherwise</li> * </ul> * </li> * </ul> * </p> */ @Before public void createDatabase() { // schema: tables int cTable = createTable(SCHEMA, "customers", "cid varchar(4) not null primary key, name varchar(64), dob_year varchar(4)"); createIndex(SCHEMA, "customers", "name_dob", "name", "dob_year"); int oTable = createTable(SCHEMA, "orders", "oid int not null primary key, cid varchar(4), placed varchar(64)", akibanFK("cid", "customers", "cid")); createIndex(SCHEMA, "orders", "placed", "placed"); // schema: GIs TableName groupName = getTable(SCHEMA, "customers").getGroup().getName(); namePlacedGi = createLeftGroupIndex(groupName, "namePlaced", "customers.name", "orders.placed"); // insert data int startingCid = 0; int endingCid = CUSTOMERS_COUNT; insertRows(cTable, oTable, startingCid, endingCid); } private void insertRows(int cTable, int oTable, int startingCid, int endingCid) { try(CloseableTransaction txn = txnService().beginCloseableTransaction(session())) { insertRowsInternal(cTable, oTable, startingCid, endingCid); txn.commit(); } } private void insertRowsInternal(int cTable, int oTable, int startingCid, int endingCid) { String[] names = {null, "Bob", "Carla", "Dot"}; for (int cid=startingCid; cid < endingCid; ++cid) { // customer String name = names[ cid % names.length ]; int dobYear = (divisibleBy(cid, 4)) ? 1983 : (1900 + cid/2); writeRow(cTable, s(cid), name, s(dobYear)); // customer's orders for (int orderNum=0; orderNum < ORDERS_COUNT; ++orderNum) { long oid = oidCounter++; long placed; if (divisibleBy(oid, 8)) placed = 0; else if (divisibleBy(oid, 4)) placed = 5; else placed = oid; writeRow(oTable, oid, s(cid), s(placed)); } txnService().periodicallyCommit(session()); } } private boolean divisibleBy(long var, long denominator) { return var % denominator == 0; } private IndexStatisticsService statsService() { return serviceManager().getServiceByClass(IndexStatisticsService.class); } private String s(long intValue) { assert intValue >= 0 && intValue < 10000 : intValue; return String.format("%04d", intValue); } private void validateHistogram( String tableName, String indexName, int expectedColumns, HistogramEntryDescription... entries ) { Index index = PK.equals(indexName) ? getTable(SCHEMA, tableName).getPrimaryKey().getIndex() : getTable(SCHEMA, tableName).getIndex(indexName); validateHistogram( index, expectedColumns, entries ); } private void validateHistogram( final Index index, int expectedColumns, HistogramEntryDescription... entries ) { final IndexStatisticsService statsService = statsService(); if (analyzedIndexes.add(index)) { ddl().updateTableStatistics( session(), index.leafMostTable().getName(), Collections.singleton(index.getIndexName().getName()) ); } if (entries != null) { IndexStatistics stats = statsService.getIndexStatistics(session(), index); Histogram histogram = stats.getHistogram(0, expectedColumns); assertEquals("histogram column count", expectedColumns, histogram.getColumnCount()); List<HistogramEntry> actualEntries = histogram.getEntries(); List<HistogramEntryDescription> expectedList = Arrays.asList(entries); AssertUtils.assertCollectionEquals("entries", expectedList, actualEntries); } } private HistogramEntryDescription entry(String keyString, long equalCount, long lessCount, long distinctCount) { return new HistogramEntryDescription(keyString, equalCount, lessCount, distinctCount); } private int bucketCount() { return serviceManager().getServiceByClass(IndexStatisticsService.class).bucketCount(); } private GroupIndex namePlacedGi; private Set<Index> analyzedIndexes = new HashSet<>(); private int oidCounter = 0; private static final String SCHEMA = "indexes"; private static final String PK = "PK"; private static final int CUSTOMERS_COUNT = 320; private static final int ORDERS_COUNT = 3; }