/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.loader.util; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; import org.diqube.loader.LoadException; import org.diqube.loader.LoaderColumnInfo; import org.diqube.loader.columnshard.ColumnShardBuilderManager; import org.diqube.threads.ExecutorManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Helper for transposing row-wise data into columnar format and applying the transformation functions on the columnar * values (see {@link LoaderColumnInfo}) in a multi-threaded way. * * @author Bastian Gloeckle */ public class ParallelLoadAndTransposeHelper { private static final Logger logger = LoggerFactory.getLogger(ParallelLoadAndTransposeHelper.class); private LoaderColumnInfo columnInfo; private ColumnShardBuilderManager columnBuilderManager; private String[] colNames; private String tableName; private ExecutorManager executorManager; /** * * @param columnInfo * Column Info about the columns that are being created. * @param columnBuilderManager * The target {@link ColumnShardBuilderManager} where the values of the columns should be put into. * @param colNames * Names of the columns, in the same ordering as the row-wise data will later provide the column values. * @param tableName * The name of the table to be created. */ public ParallelLoadAndTransposeHelper(ExecutorManager executorManager, LoaderColumnInfo columnInfo, ColumnShardBuilderManager columnBuilderManager, String[] colNames, String tableName) { this.executorManager = executorManager; this.columnInfo = columnInfo; this.columnBuilderManager = columnBuilderManager; this.colNames = colNames; this.tableName = tableName; } /** * Start a thread that will transpose the row-wise data which will be read by a custom rowWiseLoader and apply the * {@link LoaderColumnInfo#getFinalTransformFunc(String)} on those columnar values. * * <p> * This method will take care of shutting down any created threads before returning. This method will return as soon * as the rowWiseLoader has returned and all of that data has been processed. * * @param firstRowId * the first row ID that should be given to the table shard being created. * @param rowWiseLoader * This consumer will be called right after setting up the thread which will transpose the row-wise data. * This consumer then needs to load row-wise data and store it into the {@link ConcurrentLinkedDeque} that is * provided to the consumer as parameter. Each entry that is stored into the Deque can contain the data of * multiple rows, each row containing the values for all columns (as defined by the columnNames parameter in * the constructor!). The first index to the two-dimensional array is the row, the second is the value: * String[row][column] = value. * @throws LoadException * If anything goes wrong. */ public void transpose(long firstRowId, Consumer<ConcurrentLinkedDeque<String[][]>> rowWiseLoader) throws LoadException { final ConcurrentLinkedDeque<String[][]> rowWiseData = new ConcurrentLinkedDeque<>(); // Prepare a source of RowIDs. AtomicLong nextRowId = new AtomicLong(firstRowId); // This thread will continuously look at the rowWiseData deque and, if it finds new data, transform that to columnar // format and send the column values to the column builder manager. TransposeThread transposeThread = new TransposeThread( // rowWiseData, // read from this input l -> nextRowId.getAndAdd(l), // retrieve a set of new, unique rowIds (col, values, rowId) -> { // add results to the col Builders Object[] finalValues = columnInfo.getFinalTransformFunc(col).apply(values); columnBuilderManager.addValues(col, finalValues, rowId); } , colNames, tableName, executorManager); try { transposeThread.start(); rowWiseLoader.accept(rowWiseData); } finally { // try to gracefully shut down the thread and wait for it. logger.trace("Read all row-wise data, waiting for transposing to be done..."); Object notifyObject = new Object(); transposeThread.inputDequeIsFilledNotifyWhenTransposed(notifyObject); synchronized (notifyObject) { while (!transposeThread.isTransposeDone()) { logger.trace("Still waiting for transpose to be done.."); try { notifyObject.wait(1000); } catch (InterruptedException e) { throw new LoadException("Interrupted while waiting for transposing to be done."); } } } // If the thread did not finish successfully, make sure to not continue processing the input. if (!transposeThread.wasGoodShutdown()) if (transposeThread.getShutdownExceptionMessage() != null) throw new LoadException(transposeThread.getShutdownExceptionMessage()); else throw new LoadException("TransposeThread did not exit successfully. Was it interrupted?"); } } }