/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.loader; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel.MapMode; import java.util.Collection; import java.util.stream.Collectors; import javax.inject.Inject; import org.diqube.context.AutoInstatiate; import org.diqube.data.column.AdjustableStandardColumnShard; import org.diqube.data.column.StandardColumnShard; import org.diqube.data.serialize.DeserializationException; import org.diqube.data.table.DefaultTableShard; import org.diqube.data.table.TableShard; import org.diqube.file.DiqubeFileFactory; import org.diqube.file.DiqubeFileReader; import org.diqube.util.BigByteBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Loads data from .diqube files, which contain serialized data of classes from diqube-data (built with diqube-file) - * these simply have to be deserialized. * * <p> * This loader will return as many {@link TableShard}s as contained in the .diqube file. * * <p> * The corresponding files can be created using diqube-tool (transpose) or diqube-hadoop. * * <p> * This {@link Loader} ignores the {@link LoaderColumnInfo} that is provided completely, but rather fully relies on the * serialized data. * * @author Bastian Gloeckle */ @AutoInstatiate public class DiqubeLoader implements Loader { private static final Logger logger = LoggerFactory.getLogger(DiqubeLoader.class); @Inject private DiqubeFileFactory fileFactory; @Override public Collection<TableShard> load(long firstRowId, String filename, String tableName, LoaderColumnInfo columnInfo) throws LoadException { logger.info("Reading data for new table '{}' from '{}'.", new Object[] { tableName, filename }); try (RandomAccessFile f = new RandomAccessFile(filename, "r")) { BigByteBuffer buf = new BigByteBuffer(f.getChannel(), MapMode.READ_ONLY, b -> b.load()); return load(firstRowId, buf, tableName, columnInfo); } catch (IOException e) { throw new LoadException("Could not load " + filename, e); } } @Override public Collection<TableShard> load(long firstRowId, BigByteBuffer buffer, String tableName, LoaderColumnInfo columnInfo) throws LoadException { Collection<DefaultTableShard> defaultTableShards; try { DiqubeFileReader reader = fileFactory.createDiqubeFileReader(buffer); logger.info("Loading data for table '{}' by deserializing it.", tableName); defaultTableShards = reader.loadAllTableShards(); } catch (DeserializationException | IOException e) { throw new LoadException("Could not deserialize data", e); } long nextFirstRowId = firstRowId; // adjust some data. for (DefaultTableShard shard : defaultTableShards) { shard.setTableName(tableName); for (StandardColumnShard colShard : shard.getColumns().values()) ((AdjustableStandardColumnShard) colShard).adjustToFirstRowId(nextFirstRowId); nextFirstRowId += shard.getNumberOfRowsInShard(); } logger.info("Successfully loaded data for table '{}', rowIds {}-{}.", tableName, firstRowId, nextFirstRowId - 1); return defaultTableShards.stream().map(s -> (TableShard) s).collect(Collectors.toList()); } }