/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2013 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.indexStructures.rtrees;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import xxl.core.collections.MapEntry;
import xxl.core.collections.containers.Container;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.mappers.Mapper;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.Constant;
import xxl.core.functions.Function;
import xxl.core.functions.Functional.UnaryFunction;
import xxl.core.indexStructures.ORTree;
import xxl.core.indexStructures.ORTree.IndexEntry;
import xxl.core.indexStructures.ORTree.Node;
import xxl.core.indexStructures.RTree;
import xxl.core.indexStructures.rtrees.GenericPartitioner.Bucket;
import xxl.core.indexStructures.rtrees.GenericPartitioner.CostFunctionArrayProcessor;
import xxl.core.io.converters.ConvertableConverter;
import xxl.core.io.converters.Converter;
import xxl.core.io.converters.LongConverter;
import xxl.core.spatial.rectangles.DoublePointRectangle;
import xxl.core.spatial.rectangles.Rectangle;
import xxl.core.spatial.rectangles.Rectangles;
/**
* This class provides basic functionality for level-by-level loading of R-trees.
* First the leaf level is build; Produced index entries are stored in auxiliary data structure, this could be file, list or container;
*
*
* @param <T>
*/
public abstract class AbstractIterativeRtreeBulkloader<T> implements IterativeBulkLoaderInterface<T>{
/**
*
*
*
*/
public static enum ProcessingType{
GOPT, // partitioning type cost based optimal with linear run time
SOPT_F, // quadratic run time, is used if desired storage utilization should be achieved
SIMPLE, // naive, equal size partitioning
}
/**
* stores tree meta data
* @param path
* @param rootNodeId
* @param rootEntry
* @param rootDescriptor
* @throws IOException
*/
public static void storeMetaData(String path, Long rootNodeId, IndexEntry rootEntry, Rectangle rootDescriptor) throws IOException{
DataOutputStream dOut = new DataOutputStream(new FileOutputStream(path + "treeTemp.dat"));
dOut.writeInt(rootEntry.parentLevel());
dOut.writeLong(rootNodeId);
rootDescriptor.write(dOut);
dOut.close();
}
/**
*
* @param iterator
* @param size
* @return
*/
protected static DoublePointRectangle[] toArray(Iterator<DoublePointRectangle> iterator, int size){
DoublePointRectangle[] recs = new DoublePointRectangle[size];
int i = 0;
while(iterator.hasNext()){
recs[i] = iterator.next();
i++;
}
return recs;
}
/**
* Computes optimal one-dimensional distribution
*
* @param iterator
* @param level
* @param size
* @param arrayProcessor
* @param B
* @param b
* @param storageUtil
* @return
*/
public static int[] computeDistribution(Iterator<DoublePointRectangle> iterator, int level, int size, CostFunctionArrayProcessor<? extends Rectangle> arrayProcessor,
int B, int b, double storageUtil, ProcessingType pType){
arrayProcessor.reset();
switch(pType){
case SOPT_F : {
int n = (int) (Math.ceil(size/(storageUtil * B)));
Bucket[][] buckets = GenericPartitioner.computeOPTF(toArray(iterator, size), b, B, n, arrayProcessor);
// take last bucket
return GenericPartitioner.getDistribution(buckets[n-1][size-1]);
}
case SIMPLE:{
B = (int) (storageUtil * B);
List<Integer> list = new ArrayList<>();
int number = size;
while(number > 0){
int n = (number - B ) >= 0 ? B : number;
list.add(n);
number -= B;
}
int[] distribution = new int[list.size()];
for(int i = 0; i < distribution.length; i++){
distribution[i] = list.get(i);
}
return distribution;
}
default : {
// GOPT is default
Bucket[] buckets = GenericPartitioner.computeGOPT(toArray(iterator, size), b, B, arrayProcessor);
return GenericPartitioner.getDistribution(buckets[buckets.length-1]);
}
}
}
/**
* Returns cost function as extended volume (area in 2d).
*
* @param normalizedQuerySideLength is an array of avg query side length; note that we assume unit cube.
* @return cost function as volume of rectangle extended with a average query side length
*/
public static UnaryFunction<DoublePointRectangle, Double> generateDefaultFunction(
final double[] normalizedQuerySideLength){
return new UnaryFunction<DoublePointRectangle, Double>() {
@Override
public Double invoke(DoublePointRectangle arg) {
DoublePointRectangle rec = new DoublePointRectangle(arg);
double[] deltas = rec.deltas();
double cost = 1d;
for(int i = 0; i < deltas.length; i++ ){
cost *= (deltas[i] + normalizedQuerySideLength[i]);
}
return cost;
}
};
};
/**
* Returns cost function as extended volume (area in 2d).
*
* @param normalizedQuerySideLength is an array of avg query side length; note that we assume unit cube.
* @return cost function as volume of rectangle extended with a average query side length
*/
public static UnaryFunction<DoublePointRectangle, Double> generateDefaultFunctionVolume(){
return new UnaryFunction<DoublePointRectangle, Double>() {
@Override
public Double invoke(DoublePointRectangle arg) {
return arg.area();
}
};
};
/**
* Data converter, used to serialize objects in leaf nodes
*/
protected Converter<T> dataConverter;
/**
* Data size in bytes
*/
protected int dataSize;
/**
* intern representation of index entries computed in one iteration step;
* First value stores nodes address, second value stores doublePointRectangle (descriptor of the computed node)
*
*/
protected Converter<MapEntry<Long, DoublePointRectangle>> mapEntryConverter;
/**
* number of dimensions
*/
protected int dimension;
/**
* R-tree to bulk load;
* R-tree is initialized and ready to use after bulk loading is finished.
*
*/
protected RTree rtree;
/**
* Block size in bytes; Used for computation of the minimal and maximal number of object per node
*/
protected int blockSize;
/**
* Is used to compute the minimal number of objects per node;
*/
protected double ratio;
/**
*
*/
protected int partitionSize;
/**
* number of elements per leaf node
* computed from block size and data size
*/
protected int B_Leaf;
/**
* number of elements per index node
* computed from block size, dimension and double point rectangle is a descriptor additionally long is stored for node address
*
*/
protected int B_Index;
/**
* minimal number of elements per index node
* computed from block size, dimension and double point rectangle is a descriptor additionally long is stored for node address
*/
protected int b_Index;
/**
* minimal number of elements per leaf node
* computed from block size and data size
*/
protected int b_Leaf;
/**
* target container of the Rtree
*/
protected Container treeContainer;
/**
* this is used for optimal partitioning computation
*/
protected CostFunctionArrayProcessor<? extends Rectangle> arrayProcessor;
/**
* optional value for the overall number of objects to load
*/
protected int numberOfDataObjects;
/**
* partitioning types
*/
protected ProcessingType pType;
/**
* avg. storage utilization per node e.g. value 0.5 repersents avg space utilization of 50% per node.
*/
protected double storageUtil;
/**
* function for mapping the data to doublepoint rectngles
*/
UnaryFunction<T, DoublePointRectangle> toRectangle;
/**
* internal state
*/
protected int level = 0;
/**
* is used for initializing the auxiliary storage for level entries
*
* @throws IOException
*/
protected abstract void reinitTempLevelStorage() throws IOException;
/**
* stores the auxiliary entry, for index levels
*
* @param entry
* @throws IOException
*/
protected abstract void storeTempIndexEntry(MapEntry<Long,DoublePointRectangle> entry) throws IOException;
/**
* gets cursor for next level computation
*
* @return
*/
protected abstract Cursor getLevelIterator();
/**
*
*/
public AbstractIterativeRtreeBulkloader(RTree tree,
int dimension,
int blockSize,
double ratio,
double storageUtil,
int partitionSize) {
this.rtree = tree;
this.dimension = dimension;
this.blockSize = blockSize;
this.ratio = ratio;
this.partitionSize = partitionSize;
this.storageUtil = storageUtil;
// redirect container
this.treeContainer = (Container) rtree.determineContainer.invoke();
}
/**
*
* @param arrayProcessor used for computing optimal one dimensional partitioning
* @param pType partitioning type
* @param dataSize size in
* @param dataConverter
* @param toRectangle
* @return
*/
public AbstractIterativeRtreeBulkloader<T> init(
CostFunctionArrayProcessor<? extends DoublePointRectangle> arrayProcessor,
ProcessingType pType,
int dataSize,
final Converter<T> dataConverter,
UnaryFunction<T, DoublePointRectangle> toRectangle){
this.arrayProcessor = arrayProcessor;
int payload = blockSize-6;
b_Leaf = (int)( (payload * ratio)/ (dataSize ));
B_Leaf = payload / (dataSize);
b_Index = (int)( (payload * ratio)/ (dimension * 16 + 8 ));
B_Index = payload / (dimension * 16 + 8 );
this.pType = pType;
//
this.dataSize = dataSize;
this.dataConverter = dataConverter;
//
final Converter<DoublePointRectangle> rectangleConverter = new ConvertableConverter<DoublePointRectangle>(
Rectangles.factoryFunctionDoublePointRectangle(dimension));
//
this.toRectangle = toRectangle;
//
mapEntryConverter = new Converter<MapEntry<Long, DoublePointRectangle>>(){
@Override
public MapEntry<Long, DoublePointRectangle> read(DataInput arg0,
MapEntry<Long,DoublePointRectangle> arg1) throws IOException {
long key = LongConverter.DEFAULT_INSTANCE.readLong(arg0);
DoublePointRectangle value = rectangleConverter.read(arg0);
return new MapEntry<Long, DoublePointRectangle>(key, value) ;
}
@Override
public void write(DataOutput arg0,
MapEntry<Long, DoublePointRectangle> arg1) throws IOException {
LongConverter.DEFAULT_INSTANCE.write(arg0, arg1.getKey());
arg1.getValue().write(arg0);
}};
return this;
}
/* (non-Javadoc)
* @see xxl.core.indexStructures.rtrees.IterativeBulkLoaderInterface#buildRTree(java.util.Iterator)
*/
@Override
public void buildRTree(Iterator<T> rectangles) throws IOException {
Iterator tempIterator = rectangles;
while(tempIterator.hasNext()){
reinitTempLevelStorage();
int B = (level > 0) ? B_Index : B_Leaf;
int written = writeLevel(tempIterator, level, partitionSize, rtree, treeContainer, B);
level++;
Cursor levelIterator = getLevelIterator();
tempIterator = levelIterator;
numberOfDataObjects = written;
if (written <= 1){
break;
}
}
// create rtree
MapEntry<Long, DoublePointRectangle> entry = (MapEntry<Long, DoublePointRectangle>)(tempIterator.next());
DoublePointRectangle rootDescriptor = entry.getValue();
IndexEntry indexEntry = (IndexEntry) rtree.createIndexEntry(level);
IndexEntry rootEntry = (IndexEntry) ((ORTree.IndexEntry)indexEntry.initialize(entry.getKey())).initialize(rootDescriptor);
// init tree set rootEntry and root descriptor
rtree.initialize(rootEntry, rootDescriptor, new AbstractFunction() {
@Override
public Object invoke(Object argument) {
return toRectangle.invoke((T)argument);
}
},
blockSize,
treeContainer, dataSize, dimension * 8 *2, ratio);
}
/**
* Default processing; Reads the level data, partitions is in chunks of size equal to partitionSize computes optimal partitioning for each chunk and
* writes into auxiliary storage for a next level generation
*
* @param data
* @param level
* @param partitionSize
* @param rtree
* @param treeContainer
* @param B
* @return
* @throws IOException
*/
@SuppressWarnings({ "unchecked", "serial" })
public int writeLevel(Iterator data,
final int level,
int partitionSize,
RTree rtree,
Container treeContainer,
int B) throws IOException{
// read partitions size to a list
int counter = 0;
List partition = new LinkedList();
if (pType ==ProcessingType.SIMPLE){
int P = (level > 0 ) ? B_Index : B_Leaf;
P = (int) (storageUtil * P);
partitionSize = P*P;
}
while(data.hasNext()){
for(int i = 0; data.hasNext() && i < partitionSize; i++ ){
if (level > 0 ){
MapEntry<Long, DoublePointRectangle> mapEntry = (MapEntry<Long, DoublePointRectangle>) data.next();
DoublePointRectangle rec = mapEntry.getValue();
// create index entry
IndexEntry indexEntry = (IndexEntry) rtree.createIndexEntry(level);
((ORTree.IndexEntry)indexEntry.initialize(mapEntry.getKey())).initialize(rec);
partition.add(indexEntry);
}else{
Object rec = data.next();
partition.add(rec);
}
}
if (partition.size() > B ){
Function mapping = new AbstractFunction() {
public Object invoke(Object obj ){
return (level == 0 )?
(toRectangle.invoke((T)obj) ) : (DoublePointRectangle)((IndexEntry)obj).descriptor();
}
};
final int[] distribution = computeDistribution((Iterator<DoublePointRectangle>)new Mapper(mapping, partition.iterator() ), level, partition.size());
counter += writePartition(distribution, partition.iterator(), level, B, rtree, treeContainer);
}else{
// just allocate one node
MapEntry<Long, DoublePointRectangle> entry = writeNode(partition, level, rtree, treeContainer) ;
storeTempIndexEntry(entry);
//mapEntryConverter.write(out, entry );
counter++;
}
partition = new LinkedList();
}
return counter;
}
/**
* Default method for processing a chunk a of level data after applying the optimal partitioning
*
* @param distribution
* @param data
* @param level
* @param B
* @param rtree
* @param treeContainer
* @return
* @throws IOException
*/
public int writePartition(int[] distribution,
Iterator data,
int level,
int B, RTree rtree, Container treeContainer) throws IOException{
for(int i : distribution){
List entries = new ArrayList(i);
for(int k = 0; data.hasNext() && k < i ; k++){
DoublePointRectangle rec = null;
if(level != 0 ){
IndexEntry indexEntry = (IndexEntry) data.next();
entries.add(indexEntry);
}else{
Object obj = data.next();
entries.add(obj);
}
}
if (i > B ){
throw new RuntimeException("too many entries per block");
}
MapEntry<Long,DoublePointRectangle> entry = writeNode(entries, level, rtree, treeContainer);
storeTempIndexEntry(entry);
}
return distribution.length;
}
/**
* Writes node to a R-tree container
* @param entries
* @param level
* @param rtree
* @param treeContainer
* @return
*/
public MapEntry<Long,DoublePointRectangle> writeNode(List<?> entries, int level, RTree rtree, Container treeContainer) {
DoublePointRectangle descriptor = null;
for (Object o : entries ){
DoublePointRectangle rec = (level == 0) ? (DoublePointRectangle)(toRectangle.invoke((T)o)): (DoublePointRectangle)((IndexEntry)o).descriptor();
if (descriptor == null)
descriptor = new DoublePointRectangle(rec);
else
descriptor.union(rec);
}
final Node node = (Node) rtree.createNode(level);
Long nodeId = (Long) treeContainer.reserve(new Constant<Node>(node));
node.initialize(level, entries);
treeContainer.update(nodeId, node);// I/O
return new MapEntry<Long, DoublePointRectangle>(nodeId, descriptor);
}
/**
*
* @param iterator
* @param level
* @param size
* @return
*/
public int[] computeDistribution(Iterator<DoublePointRectangle> iterator, int level, int size){
int b = (level > 0 ) ? b_Index : b_Leaf;
int B = (level > 0 ) ? B_Index : B_Leaf;
return AbstractIterativeRtreeBulkloader.computeDistribution(iterator, level, size, arrayProcessor, B, b, storageUtil, pType);
}
/**
*
* @return
*/
public RTree getRTree(){
return this.rtree;
}
}