/* * Copyright 2014 Red Hat, Inc. and/or its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dashbuilder.dataset; import org.dashbuilder.DataSetCore; import org.dashbuilder.dataset.engine.SharedDataSetOpEngine; import org.dashbuilder.dataset.engine.index.DataSetIndex; import org.dashbuilder.dataset.engine.index.stats.DataSetIndexStats; import org.dashbuilder.dataset.group.AggregateFunctionType; import org.junit.Before; import org.junit.Test; import static org.dashbuilder.dataset.ExpenseReportsData.*; import static org.fest.assertions.api.Assertions.*; import static org.dashbuilder.dataset.filter.FilterFactory.*; public class DataSetIndexTest { public static final String EXPENSE_REPORTS = "expense_reports_dataset"; /** * Group by department and count occurrences */ DataSetLookup groupByDeptAndCount = DataSetFactory.newDataSetLookupBuilder() .dataset(EXPENSE_REPORTS) .group(COLUMN_DEPARTMENT, "Department") .column(AggregateFunctionType.COUNT, "occurrences") .buildLookup(); /** * Group by department and sum the amount */ DataSetLookup groupByDeptAndSum = DataSetFactory.newDataSetLookupBuilder() .dataset(EXPENSE_REPORTS) .group(COLUMN_DEPARTMENT, "Department") .column(COLUMN_AMOUNT, AggregateFunctionType.AVERAGE) .buildLookup(); /** * Filter by city & department */ DataSetLookup filterByCityAndDept = DataSetFactory.newDataSetLookupBuilder() .dataset(EXPENSE_REPORTS) .filter(COLUMN_CITY, equalsTo("Barcelona")) .filter(COLUMN_DEPARTMENT, equalsTo("Engineering")) .buildLookup(); /** * Sort by amount in ascending order */ DataSetLookup sortByAmountAsc = DataSetFactory.newDataSetLookupBuilder() .dataset(EXPENSE_REPORTS) .sort(COLUMN_AMOUNT, "asc") .buildLookup(); /** * Sort by amount in descending order */ DataSetLookup sortByAmountDesc = DataSetFactory.newDataSetLookupBuilder() .dataset(EXPENSE_REPORTS) .sort(COLUMN_AMOUNT, "desc") .buildLookup(); SharedDataSetOpEngine dataSetOpEngine = DataSetCore.get().getSharedDataSetOpEngine(); @Before public void setUp() throws Exception { DataSet dataSet = ExpenseReportsData.INSTANCE.toDataSet(); dataSet.setUUID(EXPENSE_REPORTS); dataSetOpEngine.getIndexRegistry().put(dataSet); } @Test public void testGroupPerformance() throws Exception { // Apply two different group operations and measure the elapsed time. long begin = System.nanoTime(); int lookupTimes = 1000; for (int i = 0; i < lookupTimes; i++) { dataSetOpEngine.execute(EXPENSE_REPORTS, groupByDeptAndCount.getOperationList()); dataSetOpEngine.execute(EXPENSE_REPORTS, groupByDeptAndSum.getOperationList()); } long time = System.nanoTime()-begin; // Check out the resulting stats DataSetIndex dataSetIndex = dataSetOpEngine.getIndexRegistry().get(EXPENSE_REPORTS); DataSetIndexStats stats = dataSetIndex.getStats(); DataSet dataSet = dataSetIndex.getDataSet(); System.out.println(stats.toString("\n")); // Assert the reuse of group operations and aggregate calculations is working. assertThat(stats.getNumberOfGroupOps()).isEqualTo(1); assertThat(stats.getNumberOfAggFunctions()).isEqualTo(10); // The build time should be shorter than the overall lookup time. assertThat(stats.getBuildTime()).isLessThan(time); // The reuse rate must reflect the number of times the lookups are being reused. assertThat(stats.getReuseRate()).isGreaterThanOrEqualTo(lookupTimes-1); // The index size must not be greater than the 20% of the dataset's size assertThat(stats.getIndexSize()).isLessThan(dataSet.getEstimatedSize()/5); } @Test public void testFilterPerformance() throws Exception { // Apply a filter operation and measure the elapsed time. long begin = System.nanoTime(); int lookupTimes = 1000; for (int i = 0; i < lookupTimes; i++) { dataSetOpEngine.execute(EXPENSE_REPORTS, filterByCityAndDept.getOperationList()); } long time = System.nanoTime()-begin; // Check out the resulting stats DataSetIndex dataSetIndex = dataSetOpEngine.getIndexRegistry().get(EXPENSE_REPORTS); DataSetIndexStats stats = dataSetIndex.getStats(); DataSet dataSet = dataSetIndex.getDataSet(); System.out.println(stats.toString("\n")); // Assert reuse is working. assertThat(stats.getNumberOfFilterOps()).isEqualTo(2); // The build time should be shorter than the overall lookup time. assertThat(stats.getBuildTime()).isLessThan(time); // The reuse rate must reflect the number of times the lookups are being reused. assertThat(stats.getReuseRate()).isGreaterThanOrEqualTo(lookupTimes-1); // The index size must not be greater than the 20% of the dataset's size assertThat(stats.getIndexSize()).isLessThan(dataSet.getEstimatedSize()/5); } @Test public void testSortPerformance() throws Exception { // Apply the same sort operation several times and measure the elapsed time. long begin = System.nanoTime(); int lookupTimes = 1000; for (int i = 0; i < lookupTimes; i++) { dataSetOpEngine.execute(EXPENSE_REPORTS, sortByAmountAsc.getOperationList()); dataSetOpEngine.execute(EXPENSE_REPORTS, sortByAmountDesc.getOperationList()); } long time = System.nanoTime()-begin; // Check out the resulting stats DataSetIndex dataSetIndex = dataSetOpEngine.getIndexRegistry().get(EXPENSE_REPORTS); DataSetIndexStats stats = dataSetIndex.getStats(); DataSet dataSet = dataSetIndex.getDataSet(); System.out.println(stats.toString("\n")); // Assert the reuse of sort operations is working. assertThat(stats.getNumberOfSortOps()).isEqualTo(2); // The build time should be shorter than the overall lookup time. assertThat(stats.getBuildTime()).isLessThan(time); // The reuse rate must reflect the number of times the lookups are being reused. assertThat(stats.getReuseRate()).isGreaterThanOrEqualTo(lookupTimes - 1); // The index size must not be greater than the 20% of the dataset's size assertThat(stats.getIndexSize()).isLessThan(dataSet.getEstimatedSize()/5); } }