/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.loader.compression;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NavigableMap;
import java.util.SortedSet;
import java.util.TreeMap;
import org.diqube.data.types.dbl.dict.DoubleDictionary;
import org.diqube.data.types.dbl.dict.FpcDoubleDictionary;
import org.diqube.data.types.dbl.dict.FpcPage;
import org.diqube.data.types.dbl.dict.FpcPage.State;
import org.diqube.util.Pair;
/**
* Builds a compressed string dictionary out of a map that contains values and temporary ids.
*
* TODO #83: Extract super-interface.
*
* @author Bastian Gloeckle
*/
public class CompressedDoubleDictionaryBuilder {
public static final int PAGE_SIZE = 5_000;
private NavigableMap<Double, Long> entityMap;
/**
* @param entityMap
* From decompressed string value to temporary Column Value IDs that have been assigned already.
*/
public CompressedDoubleDictionaryBuilder fromEntityMap(NavigableMap<Double, Long> entityMap) {
this.entityMap = entityMap;
return this;
}
/**
* Build the dictionary.
*
* @return {@link Pair} containing the new {@link DoubleDictionary} and an ID change map (maps from temporary ID that
* was provided in {@link #fromEntityMap(Map)} to the final ID assigned in the resulting dict).
*/
public Pair<DoubleDictionary<?>, Map<Long, Long>> build() {
SortedSet<Double> keys = (SortedSet<Double>) entityMap.keySet();
Map<Long, Long> idMap = new HashMap<>();
long newId = 0;
for (Double key : keys) {
long thisId = newId++;
if (entityMap.get(key) != thisId)
idMap.put(entityMap.get(key), thisId);
}
NavigableMap<Long, FpcPage> pages = new TreeMap<>();
long valuesLeft = newId;
long firstId = 0L;
Iterator<Double> keyIt = keys.iterator();
State lastState = null;
while (keyIt.hasNext()) {
double[] valueArray;
if (valuesLeft >= PAGE_SIZE)
valueArray = new double[PAGE_SIZE];
else
valueArray = new double[(int) valuesLeft];
for (int i = 0; i < valueArray.length; i++)
valueArray[i] = keyIt.next();
FpcPage newPage;
if (lastState != null)
newPage = new FpcPage(firstId);
else
newPage = new FpcPage(firstId, lastState);
lastState = newPage.compress(valueArray);
pages.put(firstId, newPage);
valuesLeft -= valueArray.length;
firstId += valueArray.length;
}
DoubleDictionary<?> res = new FpcDoubleDictionary(pages, keys.first(), keys.last());
return new Pair<>(res, idMap);
}
}