/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.hadoop; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.stream.Collectors; import com.google.common.collect.Sets; /** * Data prepared for a .diqube file representing one row in the resulting diqube table. * * @author Bastian Gloeckle */ public class DiqubeRow implements Serializable { private static final long serialVersionUID = 1L; private DiqubeData data; /** * Return a {@link DiqubeData} object that can be filled with the data of one row. * * The returned object contains the hierarchical data that will be written into one row of a diqube table. */ public DiqubeData withData() { data = new DiqubeData(null); return data; } public DiqubeData getData() { return data; } /** * Hierarchical data that is to be stored in the .diqube file. * * This class accepts the data in a fluent interface: * * <pre> * DiqubeRow row = ...; * row.withData().withData("columnA", "value1") * .withData("columnB", 25.1) * .withData("columnC", 1L) * .withNewDiqubeData("columnD") * .withData("d_a", "hello w") * .done() * .addNewRepeatedDiqubeData("columnE") * .withData("e_a", 1L) * .done() * .addNewRepeatedDiqubeData("columnE") * .withData("e_a", 2L) * .done() * .done(); * </pre> * * This would end up in the following row: * * <pre> * columnA | columnB | columnC | columnD.d_a | columnE[0].e_a | columnE[1].e_a | columnE[length] * --------|---------|---------|-------------|----------------|----------------|---------------- * value1 | 25.1 | 1 | hello w | 1 | 2 | 2 * </pre> */ public class DiqubeData implements Serializable { private static final long serialVersionUID = 1L; private Map<String, Object> data = new HashMap<>(); private Map<String, List<Object>> repeatedData = new HashMap<>(); private DiqubeData parent; private DiqubeData(DiqubeData parent) { this.parent = parent; } public DiqubeData withData(String fieldName, Long data) { return withDataInternal(fieldName, data); } public DiqubeData withData(String fieldName, Double data) { return withDataInternal(fieldName, data); } public DiqubeData withData(String fieldName, String data) { return withDataInternal(fieldName, data); } public DiqubeData withData(String fieldName, Object data) throws IllegalArgumentException { if (!(data instanceof String) && !(data instanceof Long) && !(data instanceof Double) && !(data instanceof DiqubeData)) throw new IllegalArgumentException(data.getClass().getSimpleName() + " not supported."); return withDataInternal(fieldName, data); } public DiqubeData withNewDiqubeData(String fieldName) { DiqubeData res = new DiqubeData(this); withDataInternal(fieldName, res); return res; } private DiqubeData withDataInternal(String fieldName, Object data) { this.data.put(fieldName, data); return this; } public DiqubeData addRepeatedData(String fieldName, String data) { return addRepeatedDataInternal(fieldName, data); } public DiqubeData addRepeatedData(String fieldName, Long data) { return addRepeatedDataInternal(fieldName, data); } public DiqubeData addRepeatedData(String fieldName, Double data) { return addRepeatedDataInternal(fieldName, data); } public DiqubeData addRepeatedData(String fieldName, Object data) throws IllegalArgumentException { if (!(data instanceof String) && !(data instanceof Long) && !(data instanceof Double) && !(data instanceof DiqubeData)) throw new IllegalArgumentException(data.getClass().getSimpleName() + " not supported."); return addRepeatedDataInternal(fieldName, data); } public DiqubeData addNewRepeatedDiqubeData(String fieldName) { DiqubeData res = new DiqubeData(this); addRepeatedDataInternal(fieldName, res); return res; } private DiqubeData addRepeatedDataInternal(String fieldName, Object data) { if (!this.repeatedData.containsKey(fieldName)) this.repeatedData.put(fieldName, new ArrayList<>()); this.repeatedData.get(fieldName).add(data); return this; } public DiqubeData done() { return parent; } public boolean isEmpty() { return data.isEmpty() && repeatedData.isEmpty(); } /** * Validates the data in this {@link DiqubeData} and in all its transitive children. Automatically called by * {@link DiqubeRecordWriter}. * * @throws IllegalStateException * If anything is wrong. */ /* package */ void validate() throws IllegalStateException { if (isEmpty()) // accept empty data object, DiqubeRecordWriter will handle that correctly. return; Set<String> multipleMappedKeys = Sets.intersection(data.keySet(), repeatedData.keySet()); if (!multipleMappedKeys.isEmpty()) throw new IllegalStateException( "The following fieldNames have been used for normal data and repeated data which is not allowed: " + multipleMappedKeys); for (Entry<String, List<Object>> repeatedEntry : repeatedData.entrySet()) { List<Object> values = repeatedEntry.getValue(); String fieldName = repeatedEntry.getKey(); List<Class<?>> valueClasses = values.stream().map(d -> d.getClass()).distinct().collect(Collectors.toList()); if (valueClasses.size() != 1) { throw new IllegalStateException( "Field '" + fieldName + "' has multiple types of data as values, which is not allowed: " + valueClasses); } } // validate children Collection<DiqubeData> children = new ArrayList<>(); data.values().stream().filter(d -> d instanceof DiqubeData).forEach(d -> children.add((DiqubeData) d)); repeatedData.values().stream().flatMap(l -> l.stream()).filter(d -> d instanceof DiqubeData) .forEach(d -> children.add((DiqubeData) d)); children.forEach(d -> d.validate()); } /* package */Map<String, Object> getData() { return data; } /* package */Map<String, List<Object>> getRepeatedData() { return repeatedData; } } }