/*
* Copyright (C) 2015 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.cube;
import io.datakernel.aggregation.AggregationChunk;
import io.datakernel.aggregation.AggregationChunkStorage;
import io.datakernel.aggregation.LocalFsChunkStorage;
import io.datakernel.aggregation.fieldtype.FieldTypes;
import io.datakernel.async.IgnoreCompletionCallback;
import io.datakernel.async.ResultCallbackFuture;
import io.datakernel.codegen.DefiningClassLoader;
import io.datakernel.eventloop.Eventloop;
import io.datakernel.logfs.LogManager;
import io.datakernel.logfs.LogToCubeMetadataStorage;
import io.datakernel.logfs.LogToCubeRunner;
import io.datakernel.stream.StreamConsumers;
import io.datakernel.stream.StreamProducers;
import org.jooq.Configuration;
import org.jooq.SQLDialect;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static io.datakernel.aggregation.AggregationPredicates.alwaysTrue;
import static io.datakernel.aggregation.fieldtype.FieldTypes.ofDouble;
import static io.datakernel.aggregation.fieldtype.FieldTypes.ofLong;
import static io.datakernel.aggregation.measure.Measures.sum;
import static io.datakernel.cube.Cube.AggregationConfig.id;
import static io.datakernel.cube.CubeTestUtils.*;
import static io.datakernel.eventloop.FatalErrorHandlers.rethrowOnAnyError;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
@SuppressWarnings("ArraysAsListWithZeroOrOneArgument")
public class CubePartitioningTest {
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder();
private static final String DATABASE_PROPERTIES_PATH = "test.properties";
private static final SQLDialect DATABASE_DIALECT = SQLDialect.MYSQL;
private static final String LOG_PARTITION_NAME = "partitionA";
private static final List<String> LOG_PARTITIONS = asList(LOG_PARTITION_NAME);
private static final String LOG_NAME = "testlog";
@SuppressWarnings("ConstantConditions")
@Test
public void test() throws Exception {
ExecutorService executor = Executors.newCachedThreadPool();
DefiningClassLoader classLoader = DefiningClassLoader.create();
Eventloop eventloop = Eventloop.create().withFatalErrorHandler(rethrowOnAnyError());
Path aggregationsDir = temporaryFolder.newFolder().toPath();
Path logsDir = temporaryFolder.newFolder().toPath();
Configuration jooqConfiguration = getJooqConfiguration(DATABASE_PROPERTIES_PATH, DATABASE_DIALECT);
AggregationChunkStorage aggregationChunkStorage =
LocalFsChunkStorage.create(eventloop, executor, aggregationsDir);
CubeMetadataStorageSql cubeMetadataStorageSql =
CubeMetadataStorageSql.create(eventloop, executor, jooqConfiguration, "processId");
LogToCubeMetadataStorage logToCubeMetadataStorage =
getLogToCubeMetadataStorage(eventloop, executor, jooqConfiguration, cubeMetadataStorageSql);
Cube cube = Cube.create(eventloop, executor, classLoader, cubeMetadataStorageSql, aggregationChunkStorage)
.withDimension("date", FieldTypes.ofLocalDate())
.withDimension("advertiser", FieldTypes.ofInt())
.withDimension("campaign", FieldTypes.ofInt())
.withDimension("banner", FieldTypes.ofInt())
.withMeasure("impressions", sum(ofLong()))
.withMeasure("clicks", sum(ofLong()))
.withMeasure("conversions", sum(ofLong()))
.withMeasure("revenue", sum(ofDouble()))
.withRelation("campaign", "advertiser")
.withRelation("banner", "campaign")
.withAggregation(id("date").withDimensions("date").withMeasures(asList("impressions", "clicks", "conversions", "revenue")).withPartitioningKey("date"));
LogManager<LogItem> logManager = getLogManager(LogItem.class, eventloop, executor, classLoader, logsDir);
LogToCubeRunner<LogItem> logToCubeRunner = LogToCubeRunner.create(eventloop, cube, logManager,
LogItemSplitter.factory(), LOG_NAME, LOG_PARTITIONS, logToCubeMetadataStorage);
// Save and aggregate logs
List<LogItem> listOfRandomLogItems = LogItem.getListOfRandomLogItems(100);
StreamProducers.OfIterator<LogItem> producerOfRandomLogItems = new StreamProducers.OfIterator<>(eventloop, listOfRandomLogItems.iterator());
producerOfRandomLogItems.streamTo(logManager.consumer(LOG_PARTITION_NAME));
eventloop.run();
logToCubeRunner.processLog(IgnoreCompletionCallback.create());
eventloop.run();
List<LogItem> listOfRandomLogItems2 = LogItem.getListOfRandomLogItems(300);
producerOfRandomLogItems = new StreamProducers.OfIterator<>(eventloop, listOfRandomLogItems2.iterator());
producerOfRandomLogItems.streamTo(logManager.consumer(LOG_PARTITION_NAME));
eventloop.run();
logToCubeRunner.processLog(IgnoreCompletionCallback.create());
eventloop.run();
// Load metadata
cube.loadChunks(IgnoreCompletionCallback.create());
eventloop.run();
Map<Long, AggregationChunk> chunks = cube.getAggregation("date").getMetadata().getChunks();
assertEquals(22, chunks.size());
CubeQuery query = CubeQuery.create().withAttributes("date").withMeasures("clicks");
StreamConsumers.ToList<LogItem> queryResultConsumer = new StreamConsumers.ToList<>(eventloop);
cube.queryRawStream(asList("date"), asList("clicks"), alwaysTrue(),
LogItem.class, DefiningClassLoader.create(classLoader)).streamTo(queryResultConsumer);
eventloop.run();
// Aggregate manually
Map<Integer, Long> map = new HashMap<>();
aggregateToMap(map, listOfRandomLogItems);
aggregateToMap(map, listOfRandomLogItems2);
// Check query results
for (LogItem logItem : queryResultConsumer.getList()) {
assertEquals(logItem.clicks, map.get(logItem.date).longValue());
}
int consolidations = 0;
while (true) {
cube.loadChunks(IgnoreCompletionCallback.create());
eventloop.run();
ResultCallbackFuture<Boolean> callback = ResultCallbackFuture.create();
cube.consolidate(callback);
eventloop.run();
boolean consolidated = callback.isDone() ? callback.get() : false;
if (consolidated)
++consolidations;
else
break;
}
assertEquals(6, consolidations);
// Load metadata
cube.loadChunks(IgnoreCompletionCallback.create());
eventloop.run();
// Query
queryResultConsumer = new StreamConsumers.ToList<>(eventloop);
cube.queryRawStream(asList("date"), asList("clicks"), alwaysTrue(),
LogItem.class, DefiningClassLoader.create(classLoader)).streamTo(queryResultConsumer);
eventloop.run();
// Check query results
for (LogItem logItem : queryResultConsumer.getList()) {
assertEquals(logItem.clicks, map.get(logItem.date).longValue());
}
// Check that every chunk contains only one date
chunks = cube.getAggregation("date").getMetadata().getChunks();
assertEquals(11, chunks.size());
for (AggregationChunk chunk : chunks.values()) {
assertEquals(chunk.getMinPrimaryKey().get(0), chunk.getMaxPrimaryKey().get(0));
}
}
private void aggregateToMap(Map<Integer, Long> map, List<LogItem> logItems) {
for (LogItem logItem : logItems) {
int date = logItem.date;
long clicks = logItem.clicks;
if (map.get(date) == null) {
map.put(date, clicks);
} else {
Long clicksForDate = map.get(date);
map.put(date, clicksForDate + clicks);
}
}
}
}