/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.flatten;
import java.util.Collection;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import javax.inject.Inject;
import org.diqube.context.AutoInstatiate;
import org.diqube.data.column.AdjustableStandardColumnShard;
import org.diqube.data.flatten.FlattenedTable;
import org.diqube.data.table.Table;
import org.diqube.data.table.TableShard;
import org.diqube.executionenv.FlattenedTableInstanceManager;
import org.diqube.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Manages providing "freshly flattened" tables.
*
* <p>
* "Freshly flattened" tables are instances of {@link FlattenedTable} whose firstRowIds of its table shards align with
* the ones returned by {@link FlattenedTable#getOriginalFirstRowIdsOfShards()} and whose firstRowIds of the table
* shards can be adjusted freely without interfering with any other object instance of {@link FlattenedTable}. Note that
* the returned {@link FlattenedTable} will not be usable right away, as row IDs will overlap - it needs to be adjusted.
* This is described in
* {@link Flattener#flattenTable(org.diqube.data.table.Table, java.util.Collection, String, java.util.UUID)}, too.
*
* <p>
* The flattened tables that are provided by this class might originate from different sources:
*
* <ul>
* <li>The table might be already flattened, valid (see {@link FlattenedTable#getOriginalFirstRowIdsOfShards()}) and
* available through {@link FlattenedTableInstanceManager} in which case
* {@link FlattenedTableUtil#facadeWithDefaultRowIds(FlattenedTable, String, String, java.util.UUID)} will be used to
* return a non-interfering instance of that {@link FlattenedTable}.
* <li>A valid version of the flattened table might have been flattened before and be available on disk in the
* {@link FlattenedTableDiskCache}. In this case that version will be loaded from there.
* <li>If none of the above, the table will be flattened newly with the {@link Flattener}.
* </ul>
*
* @author Bastian Gloeckle
*/
@AutoInstatiate
public class FlattenManager {
private static final Logger logger = LoggerFactory.getLogger(FlattenManager.class);
@Inject
private FlattenedTableInstanceManager flattenedTableInstanceManager;
@Inject
private FlattenedTableUtil flattenedTableUtil;
@Inject
private FlattenedTableDiskCache flattenedTableDiskCache;
@Inject
private Flattener flattener;
/**
* Creates a new instance of a {@link FlattenedTable} for the given source table/flatten by.
*
* <p>
* On the returned table it is safe to call {@link AdjustableStandardColumnShard#adjustToFirstRowId(long)}, as that
* change will not interfere with any other instances of that flattened table, although this method tries to re-use
* the flattened data as well as possible.
*
* <p>
* The source data of the returned {@link FlattenedTable} might either come from the
* {@link FlattenedTableInstanceManager}, from an {@link FlattenedTableDiskCache} or a new flattening will be created
* using {@link Flattener}.
*
* <p>
* Note that the returned {@link FlattenedTable} is not yet ready to be used, as most likely the rowIds of the
* TableShards will overlap, since the returned {@link FlattenedTable}s shards will have the same firstRowIds as the
* original table has, but typically a each TableShard in a flattened table will contain more rows than its
* counterpart in the not-flattened one. See also result of
* {@link Flattener#flattenTable(Table, Collection, String, UUID)}.
*
* <p>
* This method will NOT call
* {@link FlattenedTableInstanceManager#registerFlattenedTableVersion(UUID, FlattenedTable, String, String)}, as this
* should be called after the rowIds are adjusted!
*
* @param sourceTable
* The table to get a flattened version of
* @param sourceTableShards
* the table shards to be flattened. If null, the tableShards of the sourceTable will be used.
* @param flattenBy
* Which field to flatten by.
* @param flattenId
* The ID of the resulting flattening.
* @return A fresh instance of {@link FlattenedTable}.
*/
public FlattenedTable createFlattenedTable(Table sourceTable, Collection<TableShard> sourceTableShards,
String flattenBy, UUID flattenId) {
if (sourceTableShards == null)
sourceTableShards = sourceTable.getShards();
Set<Long> sourceOriginalFirstRowIds =
sourceTableShards.stream().map(shard -> shard.getLowestRowId()).collect(Collectors.toSet());
// check instance manager if a valid version is loaded in memory already.
Pair<UUID, FlattenedTable> newestInstancePair =
flattenedTableInstanceManager.getNewestFlattenedTableVersion(sourceTable.getName(), flattenBy);
if (newestInstancePair != null) {
FlattenedTable newestInstance = newestInstancePair.getRight();
if (sourceOriginalFirstRowIds.equals(newestInstance.getOriginalFirstRowIdsOfShards())) {
// "newest" is still valid. Great!
logger.info("Will re-use the flattening for '{}' by '{}' from ID {} for new ID {}", sourceTable.getName(),
flattenBy, newestInstancePair.getLeft(), flattenId);
return flattenedTableUtil.facadeWithDefaultRowIds(newestInstance, sourceTable.getName(), flattenBy, flattenId);
}
}
// check disk cache.
FlattenedTable diskCacheInstance =
flattenedTableDiskCache.load(sourceTable.getName(), flattenBy, sourceOriginalFirstRowIds);
if (diskCacheInstance != null) {
// disk cache has a version, wohoo!
logger.info("Will re-use the disk-cached version for '{}' by '{}' for new ID {}", sourceTable.getName(),
flattenBy, flattenId);
return flattenedTableUtil.facadeWithDefaultRowIds(diskCacheInstance, sourceTable.getName(), flattenBy, flattenId);
}
// Create new flatten.
logger.info("No valid flattened table for '{}' by '{}' available, will therefore flatten table now.",
sourceTable.getName(), flattenBy);
FlattenedTable res = flattener.flattenTable(sourceTable, sourceTableShards, flattenBy, flattenId);
flattenedTableDiskCache.offer(res, sourceTable.getName(), flattenBy);
return res;
}
}