/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.loader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import org.diqube.data.column.ColumnType;
/**
* Contains information about each column for the loader.
*
* <p>
* This contains both, a specific {@link ColumnType} for each column and even custom transformation functions. These
* transformation functions could e.g. be used for a 'date' column: The function parses the date and returns a Long,
* with the column actually being a Long column internally.
*
* @author Bastian Gloeckle
*/
public class LoaderColumnInfo {
// TODO #14 support optional columns
public static final Long DEFAULT_LONG = -1L;
public static final String DEFAULT_STRING = "";
public static final Double DEFAULT_DOUBLE = 0.;
private static final Function<String[], Object[]> STRING_COL_FN =
sa -> Arrays.asList(sa).stream().sequential().map(s -> {
if (s == null)
return DEFAULT_STRING;
return s;
}).toArray(len -> new String[len]);
private static final Function<String[], Object[]> LONG_COL_FN = sa -> Arrays.asList(sa).stream().sequential()
.map(s -> LoaderColumnInfo.parseLong(s)).toArray(len -> new Long[len]);
private static final Function<String[], Object[]> DOUBLE_COL_FN = sa -> Arrays.asList(sa).stream().sequential()
.map(s -> LoaderColumnInfo.parseDouble(s)).toArray(len -> new Double[len]);
private Map<String, ColumnType> columnType = new HashMap<>();
private Map<String, Function<String[], Object[]>> customTransformationFunction = new HashMap<>();
private ColumnType defaultColumnType;
/**
* Create new {@link LoaderColumnInfo}.
*
* @param defaultColumnType
* The column type that will be assumed for columns that have not been registered explicitly.
*/
public LoaderColumnInfo(ColumnType defaultColumnType) {
this.defaultColumnType = defaultColumnType;
}
/**
* Register a specific column type for a column without specifying a custom transformation function.
*
* @param colName
* Name of the column.
* @param columnType
* The type of the column.
*/
public void registerColumnType(String colName, ColumnType columnType) {
this.columnType.put(colName, columnType);
}
/**
* @return The registered {@link ColumnType} for the given column or <code>null</code> if column not yet known.
*/
public ColumnType getRegisteredColumnType(String colName) {
return this.columnType.get(colName);
}
/**
* Register a specific column type and a specific transformation function for a column.
*
* @param colName
* The name of the column.
* @param columnType
* Internal type of the column.
* @param transformFunc
* A transformation function that will be applied to arrays of input values for the column (all these values
* are strings). This function is expected to transform each value of the array and output a resulting array,
* preserving the elements order. The resulting arrays items must be matching the {@link ColumnType}
* specified (that is, it must be a String[] in case {@link ColumnType#STRING}, a Long[] in case
* {@link ColumnType#LONG} and a Double[] in case {@link ColumnType#DOUBLE}). This function must be
* thread-safe.
*/
public void registerCustomTransformationFunc(String colName, ColumnType columnType,
Function<String[], Object[]> transformFunc) {
this.columnType.put(colName, columnType);
this.customTransformationFunction.put(colName, transformFunc);
}
/**
* Return the transformation function for a given column, transforming each input string into a result object
* according to the columns {@link ColumnType}.
*
* This will return either pre-defined functions or custom defined functions accordingly.
*
* @param column
* Name of the column.
* @return The transformation function. This function is thread-safe.
*/
public Function<String[], Object[]> getFinalTransformFunc(String column) {
Function<String[], Object[]> res = customTransformationFunction.get(column);
if (res != null) {
return res;
}
switch (getFinalColumnType(column)) {
case STRING:
return STRING_COL_FN;
case LONG:
return LONG_COL_FN;
case DOUBLE:
return DOUBLE_COL_FN;
}
// never happens
return null;
}
/**
* Find out if a specific column has a custom data type set or the default is used.
*/
public boolean isDefaultDataType(String colName) {
return !columnType.containsKey(colName);
}
/**
* Return {@link ColumnType} to be used for the given column.
*
* Will apply the default column type accordingly.
*
* @param column
* Name of the column.
* @return The {@link ColumnType} that the loader should assume for the values of this column.
*/
public ColumnType getFinalColumnType(String column) {
ColumnType res = columnType.get(column);
if (res == null)
return defaultColumnType;
return res;
}
public static Long parseLong(String s) {
if (s == null || "".equals(s))
// TODO #14 optional columns
return DEFAULT_LONG;
return Long.parseLong(s);
}
public static Double parseDouble(String s) {
if (s == null || "".equals(s))
return DEFAULT_DOUBLE;
return Double.parseDouble(s);
}
}