package xxl.core.indexStructures.rtrees;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import xxl.core.collections.MapEntry;
import xxl.core.collections.containers.Container;
import xxl.core.collections.containers.io.BlockFileContainer;
import xxl.core.collections.queues.Queue;
import xxl.core.collections.queues.io.BlockBasedQueue;
import xxl.core.cursors.Cursor;
import xxl.core.cursors.sorters.MergeSorter;
import xxl.core.cursors.sources.io.FileInputCursor;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.Constant;
import xxl.core.functions.Function;
import xxl.core.functions.Functional.BinaryFunction;
import xxl.core.functions.Functional.UnaryFunction;
import xxl.core.functions.Identity;
import xxl.core.indexStructures.ORTree;
import xxl.core.indexStructures.ORTree.IndexEntry;
import xxl.core.indexStructures.ORTree.Node;
import xxl.core.indexStructures.RTree;
import xxl.core.io.converters.Converter;
import xxl.core.spatial.points.DoublePoint;
import xxl.core.spatial.rectangles.DoublePointRectangle;
/**
* This is an implementation of TGS R tree loading approach:
*
* Yvan J. Garcia R, Mario A. Lopez and Scott T. Leutenegger A greedy algorithm for bulk loading R-trees
*
*
* Note: This is an experimental version. This loading implementation is conducted in main memory.
*
* @author achakeye
*
*/
public class TGSBulkLoader<T> extends AbstractIterativeRtreeBulkloader<T>{
/**
* path to store auxiliary file
*/
protected String path = null;
/**
* auxiliary storage as file
*/
protected File file = null;
/**
* stream to the file
*/
protected DataOutputStream out = null;
/**
*
*/
protected Container queueContainer;
/**
*
*/
protected int numberOfRectangles;
/**
*
*/
protected int SORT_BUFFER_SIZE = 10*1024*1024;
/**
*
*/
protected static boolean ESORT = true;
/**
*
*/
protected DoublePointRectangle universe = null;
// default function
/**
* cost function
*/
protected BinaryFunction<DoublePointRectangle, DoublePointRectangle , Double> costFunction = null;
/**
*
*/
// B_Leaf or B_Index
protected int maxRecordsProNode = 0 ;
/**
* average side length
*/
protected double[] a = null;
/**
*
* @param rtree
* @param path
* @param dimension
* @param blockSize
* @param ratio
* @param nodeUtil
* @param universe
* @param averageQuerySideLength
*/
public TGSBulkLoader(RTree rtree,
String path,
int dimension,
int blockSize,
double ratio,
double nodeUtil, DoublePointRectangle universe){
this(rtree, path, dimension, blockSize, ratio, nodeUtil, universe, null);
}
/**
*
* @param rtree
* @param path
* @param dimension
* @param blockSize
* @param ratio
* @param nodeUtil
* @param universe
* @param averageQuerySideLength
*/
public TGSBulkLoader(RTree rtree,
String path,
int dimension,
int blockSize,
double ratio,
double nodeUtil, DoublePointRectangle universe,
double[] averageQuerySideLength) {
super(rtree, dimension, blockSize, ratio, nodeUtil, 20_000);
this.universe = universe;
this.path = path;
//check if it right
queueContainer = new BlockFileContainer(path + "_queues.dat", blockSize);
this.a = averageQuerySideLength;
}
/**
*
* @param number
* @param sortMemoryBufferSize
* @param dataSize
* @param dataConverter
* @param toRectangle
* @return
*/
public IterativeBulkLoaderInterface<T> init(int number, int sortMemoryBufferSize, int dataSize,
final Converter<T> dataConverter,
UnaryFunction<T, DoublePointRectangle> toRectangle){
super.init(null, ProcessingType.SIMPLE, dataSize, dataConverter, toRectangle);
int payload = blockSize-6;
B_Leaf = (int)((double)(payload / (dataSize)) * storageUtil);
B_Index = (int)((double)(payload / (dimension * 16 + 8 ))*storageUtil);
this.numberOfDataObjects = number;
this.SORT_BUFFER_SIZE = sortMemoryBufferSize;
costFunction = (a== null) ? new BinaryFunction<DoublePointRectangle, DoublePointRectangle , Double>() {
@Override
public Double invoke(DoublePointRectangle arg, DoublePointRectangle arg1) {
DoublePointRectangle rec = new DoublePointRectangle(arg);
rec.normalize(universe);
double[] deltas = rec.deltas();
double cost = 1d;
for(int i = 0; i < deltas.length; i++ ){
cost *= (deltas[i]) ;
}
DoublePointRectangle rec1 = new DoublePointRectangle(arg1);
rec1.normalize(universe);
double[] deltas1 = rec1.deltas();
double cost1 = 1d;
for(int i = 0; i < deltas1.length; i++ ){
cost1 *= (deltas1[i]);
}
return cost + cost1 ;
}
}: new BinaryFunction<DoublePointRectangle, DoublePointRectangle , Double>() {
@Override
public Double invoke(DoublePointRectangle arg, DoublePointRectangle arg1) {
DoublePointRectangle rec = new DoublePointRectangle(arg);
rec.normalize(universe);
double[] deltas = rec.deltas();
double cost = 1d;
for(int i = 0; i < deltas.length; i++ ){
cost *= (deltas[i]+a[i]) ;
}
DoublePointRectangle rec1 = new DoublePointRectangle(arg1);
rec1.normalize(universe);
double[] deltas1 = rec1.deltas();
double cost1 = 1d;
for(int i = 0; i < deltas1.length; i++ ){
cost1 *= (deltas1[i] +a[i]);
}
return cost + cost1 ;
}
};
return this;
}
/*
* (non-Javadoc)
* @see xxl.core.indexStructures.rtrees.AbstractIterativeRtreeBulkloader#buildRTree(java.util.Iterator)
*/
public void buildRTree(Iterator<T> rectangles) throws IOException{
Iterator tempIterator = rectangles;
int level = 0;
int numberOfRecs = numberOfDataObjects;
while(tempIterator.hasNext()){
File file = File.createTempFile("levelRecs_", "dat");
DataOutputStream out = new DataOutputStream(new FileOutputStream(file));
// read data in memory
// consume data
List data = new ArrayList<Object>();
while(tempIterator.hasNext()){
data.add(tempIterator.next());
}
// main call
maxRecordsProNode = (level > 0) ? B_Index : B_Leaf;
int written = tileData(data, data.size(), level, out);
//System.out.println("level " + level);
level++;
numberOfRecs = written;
tempIterator = new FileInputCursor<MapEntry<Long,DoublePointRectangle>>(mapEntryConverter, file);
if (written <= 1){
break;
}
file.deleteOnExit();
}
// create rtree
MapEntry<Long, DoublePointRectangle> entry = (MapEntry<Long, DoublePointRectangle>)(tempIterator.next());
DoublePointRectangle rootDescriptor = entry.getValue();
IndexEntry indexEntry = (IndexEntry) rtree.createIndexEntry(level);
IndexEntry rootEntry = (IndexEntry) ((ORTree.IndexEntry)indexEntry.initialize(entry.getKey())).initialize(rootDescriptor);
//
storeMetaData(path, entry.getKey(), rootEntry, rootDescriptor);
// init tree
rtree.initialize(rootEntry, rootDescriptor, Identity.DEFAULT_INSTANCE, blockSize,
treeContainer, dimension * 8 *2 , dimension * 8 *2, ratio);
}
/**
*
* @param entries
* @param level
* @return
*/
public MapEntry<Long, DoublePointRectangle> writeNode(List entries, int level) {
DoublePointRectangle descriptor = null;
for (Object o : entries ){
DoublePointRectangle rec = (level == 0) ? (DoublePointRectangle)o: (DoublePointRectangle)((IndexEntry)o).descriptor();
if (descriptor == null)
descriptor = new DoublePointRectangle(rec);
else
descriptor.union(rec);
}
final Node node = (Node) rtree.createNode(level);
Long nodeId = (Long) treeContainer.reserve(new Constant<Node>(node));
node.initialize(level, entries);
treeContainer.update(nodeId, node);
return new MapEntry<Long, DoublePointRectangle>(nodeId, descriptor);
}
/**
*
* @param data
* @param number
* @param level
* @param out
* @return
* @throws IOException
*/
@SuppressWarnings("unchecked")
public int writeNode(Iterator data, int number, int level, DataOutput out) throws IOException{
// write rectangles to
Iterator sorter = data;
int written = 0;
int nodeSize = (level > 0) ? B_Index: B_Leaf;
while(sorter.hasNext() ){
List entries = new ArrayList(nodeSize);
for (int i = 0; i < nodeSize && sorter.hasNext(); i++ ){
DoublePointRectangle rec = null;
if(level != 0 ){
MapEntry<Long, DoublePointRectangle> mapEntry = (MapEntry<Long, DoublePointRectangle>) sorter.next();
rec = mapEntry.getValue();
// create index entry
IndexEntry indexEntry = (IndexEntry) rtree.createIndexEntry(level);
((ORTree.IndexEntry)indexEntry.initialize(mapEntry.getKey())).initialize(rec);
entries.add(indexEntry);
}else{
rec = (DoublePointRectangle) sorter.next();
entries.add(rec);
}
}
MapEntry<Long, DoublePointRectangle> entry = writeNode(entries, level);
written++;
mapEntryConverter.write(out, entry );
}
return written;
}
/**
*
* @param dataF
* @param number
* @param level
* @param out
* @return
* @throws IOException
*/
@SuppressWarnings("unchecked")
public int tileData(List dataF, int number, int level, DataOutput out) throws IOException{
if (number <= maxRecordsProNode){
return writeNode( dataF.iterator(), number, level, out);
}
List data = dataF;
int writtenNodes = 0;
// pre-process data
// sort take only center for orderings
double costs = Double.MAX_VALUE;
int argMin = 0;
int argDim = 0;
int approxTreeHeight = ((int)(Math.ceil( Math.log(number)/ Math.log(maxRecordsProNode))))-1;
double MM = Math.floor(Math.pow(maxRecordsProNode, approxTreeHeight)) ;
int M = (int)MM;
List[] sortedLists = new List[dimension];
for(int i = 0; i < dimension; i++){
// sort data
//
if (ESORT && (dataF.size() > B_Leaf*B_Leaf*20)){
sortedLists[i] = sort(dataF, getDimensionComparator(i, level),level);
data = sortedLists[i];
}
else{
Collections.sort(data, getDimensionComparator(i, level));
}
//
List<DoublePointRectangle> forwardList = computeCosts(data, true , level);
List<DoublePointRectangle> backwardList = computeCosts(data, false, level );
int splits = number/M;
for(int k = 1; k <= splits; k++){
//
int splitIndex = k*M;
if (splitIndex < number){
DoublePointRectangle costLeft = forwardList.get(splitIndex-1);
DoublePointRectangle costRight = backwardList.get(splitIndex);
double fcost = costFunction.invoke(costLeft, costRight);
if (fcost < costs){
costs = fcost;
argMin = splitIndex-1;
argDim = i;
}
}
}
}
// System.out.println("Processed " + costs + " number " + number + " index " + argMin + " dim " + argDim);
if (ESORT && (dataF.size() > B_Leaf*B_Leaf*20)){
data = sort(dataF, getDimensionComparator(argDim, level), level);
data = sortedLists[argDim];
}
else{
Collections.sort(data, getDimensionComparator(argDim, level));
}
List left = new ArrayList();
left.addAll(data.subList(0, argMin+1));
List right = new ArrayList();
right.addAll(data.subList(argMin+1, data.size()));
data.clear();
return tileData( left, left.size(), level, out )
+ tileData(right, right.size(), level, out);
}
/**
*
* @param data
* @param forward
* @param level
* @return
*/
protected List<DoublePointRectangle> computeCosts(List data, boolean forward, int level){
DoublePointRectangle[] costs = new DoublePointRectangle[data.size()];
DoublePointRectangle union = null;
if (forward)
for(int i = 0; i < data.size() ; i++){
Object o1 = data.get(i);
DoublePointRectangle rec = (level > 0) ? ((MapEntry<Long,DoublePointRectangle>)o1).getValue(): (DoublePointRectangle)o1;
if(union == null )
union = new DoublePointRectangle(rec);
else
union.union(rec);
costs[i]= new DoublePointRectangle(union);
}
else
for(int i = data.size()-1; i >= 0 ; i--){
Object o1 = data.get(i);
DoublePointRectangle rec = (level > 0) ? ((MapEntry<Long,DoublePointRectangle>)o1).getValue(): (DoublePointRectangle)o1;
if(union == null )
union = new DoublePointRectangle(rec);
else
union.union(rec);
costs[i]= new DoublePointRectangle(union);
}
return Arrays.asList(costs);
}
/**
*
* @param dim
* @param level
* @return
*/
@SuppressWarnings("unchecked")
protected Comparator getDimensionComparator(final int dim, final int level){
return new Comparator() {
@Override
public int compare(Object o1, Object o2) {
DoublePointRectangle rec1 = (level > 0) ? ((MapEntry<Long,DoublePointRectangle>)o1).getValue(): (DoublePointRectangle)o1;
DoublePointRectangle rec2 = (level > 0) ? ((MapEntry<Long,DoublePointRectangle>)o2).getValue(): (DoublePointRectangle)o2;
DoublePoint first = rec1.getCenter();
DoublePoint second = rec2.getCenter();
return ( first.getValue(dim) == second.getValue(dim)) ? 0 : ( first.getValue(dim) < second.getValue(dim)) ?
-1 : 1;
}
};
}
/**
*
* @param data
* @param comp
* @param level
* @return
* @throws IOException
*/
protected List sort(List data, Comparator comp, int level) throws IOException{
Iterator sorteddata = sort(data.iterator(), level, comp ) ;
List list = new ArrayList<>(data.size());
while(sorteddata.hasNext()){
list.add(sorteddata.next());
}
return list;
}
/**
*
* @param data
* @param level
* @param comp
* @return
* @throws IOException
*/
protected Iterator sort(Iterator data, int level, Comparator comp ) throws IOException{
final Converter converter = (level > 0 ) ? mapEntryConverter: dataConverter;
String tmp = "tmp";
int objectSize = (level > 0 ) ? dimension * 16 : dimension * 16 + 8;
Container container = new BlockFileContainer(path + "tmpsortqueue.tmp", blockSize);
final Container queueContainer = container;
final Function<Function<?, Integer>, Queue<?>> queueFunction =
new AbstractFunction<Function<?, Integer>, Queue<?>>() {
public Queue<?> invoke(Function<?, Integer> function1, Function<?, Integer> function2) {
return new BlockBasedQueue(queueContainer, blockSize, converter,
function1, function2);
}
};
//5% buffer
return new MergeSorter(data,
comp, objectSize , SORT_BUFFER_SIZE, SORT_BUFFER_SIZE, queueFunction, false);
}
/*
* (non-Javadoc)
* @see xxl.core.indexStructures.rtrees.AbstractIterativeRtreeBulkloader#reinitTempLevelStorage()
*/
protected void reinitTempLevelStorage() throws IOException{
file = File.createTempFile("levelRecs_", "dat");
out = new DataOutputStream(new FileOutputStream(file));
}
/*
* (non-Javadoc)
* @see xxl.core.indexStructures.rtrees.AbstractIterativeRtreeBulkloader#storeTempIndexEntry(xxl.core.collections.MapEntry)
*/
protected void storeTempIndexEntry(MapEntry<Long,DoublePointRectangle> entry) throws IOException{
mapEntryConverter.write(out, entry);
}
/*
* (non-Javadoc)
* @see xxl.core.indexStructures.rtrees.AbstractIterativeRtreeBulkloader#getLevelIterator()
*/
protected Cursor getLevelIterator(){
return new FileInputCursor<MapEntry<Long,DoublePointRectangle>>(mapEntryConverter, file);
}
}