/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.loader.columnshard; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import org.diqube.data.column.ColumnPageFactory; import org.diqube.data.column.ColumnShardFactory; import org.diqube.data.column.StandardColumnShard; import org.diqube.data.table.TableShard; import org.diqube.loader.LoaderColumnInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Iterables; /** * Manages all {@link ColumnShardBuilder}s for loading one {@link TableShard}. * * <p> * This class is a convenience class for the loaders to use, so the loaders do not need to take care of generics * themselves. * * @author Bastian Gloeckle */ public class ColumnShardBuilderManager { private static final Logger logger = LoggerFactory.getLogger(ColumnShardBuilderManager.class); private volatile Map<String, ColumnShardBuilder<String>> stringBuilders = new ConcurrentHashMap<>(); private volatile Map<String, ColumnShardBuilder<Long>> longBuilders = new ConcurrentHashMap<>(); private volatile Map<String, ColumnShardBuilder<Double>> doubleBuilders = new ConcurrentHashMap<>(); private ColumnShardFactory columnShardFactory; private ColumnPageFactory columnPageFactory; private LoaderColumnInfo columnInfo; private long firstRowIdInShard; private AtomicLong maxRow = new AtomicLong(-1L); public ColumnShardBuilderManager(ColumnShardFactory columnShardFactory, ColumnPageFactory columnPageFactory, LoaderColumnInfo columnInfo, long firstRowIdInShard) { this.columnShardFactory = columnShardFactory; this.columnPageFactory = columnPageFactory; this.columnInfo = columnInfo; this.firstRowIdInShard = firstRowIdInShard; } /** * @return Column Names of all columns that received some data with {@link #addValues(String, Object[], long)}. */ public Set<String> getAllColumnsWithValues() { Set<String> res = new HashSet<>(stringBuilders.keySet()); res.addAll(longBuilders.keySet()); res.addAll(doubleBuilders.keySet()); return res; } /** * Walks along all rows that have been added for the given column and sets the given default value into those rows * that do not have a value set. This is done or all rows up to the maximum row added to this * {@link ColumnShardBuilderManager} for any row. */ public void fillEmptyRowsWithValue(String colName, Object value) { logger.trace("Filling col {} with fallback value {} up to row {}", colName, value, maxRow.get()); switch (columnInfo.getFinalColumnType(colName)) { case STRING: stringBuilders.get(colName).fillEmptyRowsWithValue((String) value, maxRow.get()); break; case LONG: longBuilders.get(colName).fillEmptyRowsWithValue((Long) value, maxRow.get()); break; case DOUBLE: doubleBuilders.get(colName).fillEmptyRowsWithValue((Double) value, maxRow.get()); break; } } /** * Calls {@link ColumnShardBuilder#addValues(Object[], Long)} for the column accordingly. * * @param colName * @param values * @param firstRowId */ public void addValues(String colName, Object[] values, long firstRowId) { switch (columnInfo.getFinalColumnType(colName)) { case STRING: if (!stringBuilders.containsKey(colName)) { synchronized (stringBuilders) { if (!stringBuilders.containsKey(colName)) { stringBuilders.put(colName, new ColumnShardBuilder<String>(columnShardFactory, columnPageFactory, colName, firstRowIdInShard)); } } } stringBuilders.get(colName).addValues((String[]) values, firstRowId); break; case LONG: if (!longBuilders.containsKey(colName)) { synchronized (longBuilders) { if (!longBuilders.containsKey(colName)) { longBuilders.put(colName, new ColumnShardBuilder<Long>(columnShardFactory, columnPageFactory, colName, firstRowIdInShard)); } } } longBuilders.get(colName).addValues((Long[]) values, firstRowId); break; case DOUBLE: if (!doubleBuilders.containsKey(colName)) { synchronized (doubleBuilders) { if (!doubleBuilders.containsKey(colName)) { doubleBuilders.put(colName, new ColumnShardBuilder<Double>(columnShardFactory, columnPageFactory, colName, firstRowIdInShard)); } } } doubleBuilders.get(colName).addValues((Double[]) values, firstRowId); break; } maxRow.getAndUpdate(oldVal -> Math.max(oldVal, firstRowId + values.length - 1)); } /** * Make this manager expect values up to the specified row (including). * * When the {@link #buildAndFree(String)} method is called, empty rows of columns are filled with default values. * * If this method is called, it is ensured on a call to {@link #buildAndFree(String)} that all rows up to and * including the given rowId are filled with default values. */ public void expectToFillDataUpToRow(long rowId) { maxRow.getAndUpdate(oldVal -> Math.max(oldVal, rowId)); } /** * Executes {@link ColumnShardBuilder#build()} and frees up the memory of the {@link ColumnShardBuilder} after that. * * If this method is called after adding values for /all/ columns, the columns returned by this method will all have * the same length (= same number of rows). Any empty rows of the column will be filled with default values. This is * needed to make the column compatible e.g. with ColumnAggregationStep which resolves values before checking the * length of repeated fields. * * If a caller wants to make sure that all columns contain values up to a specific rowId, call * {@link #expectToFillDataUpToRow(long)} before calling this method. */ public StandardColumnShard buildAndFree(String colName) { ColumnShardBuilder<?> colBuilder = null; switch (columnInfo.getFinalColumnType(colName)) { case STRING: fillEmptyRowsWithValue(colName, LoaderColumnInfo.DEFAULT_STRING); colBuilder = stringBuilders.get(colName); stringBuilders.remove(colName); break; case LONG: fillEmptyRowsWithValue(colName, LoaderColumnInfo.DEFAULT_LONG); colBuilder = longBuilders.get(colName); longBuilders.remove(colName); break; case DOUBLE: fillEmptyRowsWithValue(colName, LoaderColumnInfo.DEFAULT_DOUBLE); colBuilder = doubleBuilders.get(colName); doubleBuilders.remove(colName); break; } if (colBuilder == null) return null; return colBuilder.build(); } /** * Returns an approximation of the memory consumption by all ColumnShardBuilders. * * @see ColumnShardBuilder#calculateApproximateMemoryConsumption(). */ public long calculateApproximateSizeInBytes() { long res = 0; for (ColumnShardBuilder<?> builder : Iterables.concat(stringBuilders.values(), longBuilders.values(), doubleBuilders.values())) { res += builder.calculateApproximateSizeInBytes(); } return res; } public long getFirstRowIdInShard() { return firstRowIdInShard; } }