/* XXL: The eXtensible and fleXible Library for data processing Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger Head of the Database Research Group Department of Mathematics and Computer Science University of Marburg Germany This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; If not, see <http://www.gnu.org/licenses/>. http://code.google.com/p/xxl/ */ package xxl.core.indexStructures; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; import xxl.core.collections.containers.Container; import xxl.core.collections.containers.io.BlockFileContainer; import xxl.core.collections.containers.io.BufferedContainer; import xxl.core.collections.containers.io.ConverterContainer; import xxl.core.cursors.Cursor; import xxl.core.functions.AbstractFunction; import xxl.core.functions.Function; import xxl.core.indexStructures.keyRanges.StringKeyRange; import xxl.core.indexStructures.separators.StringSeparator; import xxl.core.indexStructures.testData.Student; import xxl.core.indexStructures.vLengthBPlusTree.VariableLengthBPlusTree; import xxl.core.indexStructures.vLengthBPlusTree.VariableLengthBPlusTree.IndexEntry; import xxl.core.indexStructures.vLengthBPlusTree.splitStrategy.SimplePrefixBPlusTreeSplit; import xxl.core.indexStructures.vLengthBPlusTree.underflowHandlers.StandardUnderflowHandler; import xxl.core.io.LRUBuffer; import xxl.core.io.converters.Converters; import xxl.core.io.converters.IntegerConverter; import xxl.core.io.converters.LongConverter; import xxl.core.io.converters.MeasuredConverter; import xxl.core.io.converters.StringConverter; /** * This class shows how to use VariableLengthBPlusTree. * First we load Student data, which is indexed on string value ( @see {@link Student}). * And store Index meta data i order to reuse the index structure. * Second we show how to remove and update date. * Also in the last step it will described how to run queiries */ public class VariableLengthBPlusTreeUseCase { // block size of the underlined container // nodes are mapped to the blocks(pages) public static final int BLOCK_SIZE = 2048; // minimal capacity as a fraction of bytes // needed to compute appropriate split public static final double MIN_RATIO = 0.4; // size of LRU Buffer public static final int BUFFER_SIZE = 20; // number of elements which we are want to insert public static final int NUMBER_OF_ELEMENTS = 100000; // public static final String path ="vlBplus"; /** * In order to initialize a tree we need to provide converter for the data with a miximal size in bytes; * We assume that both values name and info of the student is bounded by 50 Bytes * * */ public static final MeasuredConverter<Student> dataMeasuredConverter = new MeasuredConverter<Student>(){ @Override public int getMaxObjectSize() { // 50 bytes for string name // 50 bytes for info return 50 + 50 +4; } @Override public Student read(DataInput dataInput, Student object) throws IOException { return Student.DEFAULT_CONVERTER.read(dataInput, object); } @Override public void write(DataOutput dataOutput, Student object) throws IOException { Student.DEFAULT_CONVERTER.write(dataOutput, object); } }; /** * We want to index student on their name values. That means we have also variable length keys. * */ public static final MeasuredConverter<String> keyConverter = new MeasuredConverter<String>(){ @Override public int getMaxObjectSize() { // 50 bytes for string return 50; } @Override public String read(DataInput dataInput, String object) throws IOException { return StringConverter.DEFAULT_INSTANCE.read(dataInput, object); } @Override public void write(DataOutput dataOutput, String object) throws IOException { StringConverter.DEFAULT_INSTANCE.write(dataOutput, object); } }; /** * this function is used to compute actual serialized size of the data */ public static final Function<Object, Integer> getDataSize = new AbstractFunction<Object , Integer>() { public Integer invoke(Object arg){ //cast to student Student std = (Student)arg; int nameSize = Converters.sizeOf(StringConverter.DEFAULT_INSTANCE, std.getName()); int infoSize = Converters.sizeOf(StringConverter.DEFAULT_INSTANCE, std.getInfo()); return nameSize +infoSize + 4; } }; /** * this function is used to compute actual serialized size of the key */ public static final Function<Object, Integer> getKeySize = new AbstractFunction<Object , Integer>() { public Integer invoke(Object arg){ //cast to string String std = (String)arg; int nameSize = Converters.sizeOf(StringConverter.DEFAULT_INSTANCE, std); return nameSize; } }; /** * this function is used for mapping the student to its string key */ public static final Function<Student, String> getKeyFunction = new AbstractFunction<Student, String>() { public String invoke(Student st){ return st.getName(); } }; /** * method saves meta info about the tree. * this information we need for restoring the tree * @param btree * @param path * @throws IOException */ protected static void saveTree(VariableLengthBPlusTree btree, String path) throws IOException{ DataOutputStream out = new DataOutputStream(new FileOutputStream(new File(path))); IndexEntry entry = (IndexEntry) btree.rootEntry(); StringKeyRange range = (StringKeyRange) btree.rootDescriptor(); // store root entry // 1. id -> longs LongConverter.DEFAULT_INSTANCE.write(out, (Long)entry.id()); // 2. level IntegerConverter.DEFAULT_INSTANCE.write(out, entry.parentLevel()); // 3. key of the root StringConverter.DEFAULT_INSTANCE.write(out, (String)entry.separator().sepValue()); // store root descriptor which is StringKeyRange StringConverter.DEFAULT_INSTANCE.write(out, (String)range.minBound()); StringConverter.DEFAULT_INSTANCE.write(out, (String)range.maxBound()); out.close(); } /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { /** * prepare test data */ List<Integer> stds = new ArrayList<Integer>(NUMBER_OF_ELEMENTS); for(int i = 0; i < NUMBER_OF_ELEMENTS; i++){ stds.add(i); } Collections.shuffle(stds, new Random(42)); /** * 1. initialize tree and load test data */ // at this step the blockfileContainer is initialized // the state of the container is lost if call BlockFileContainer(path, BLOCK_SIZE) // and provide BLOCK_SIZE information VariableLengthBPlusTree tree = new VariableLengthBPlusTree(BLOCK_SIZE, MIN_RATIO , false); Container fileContainer = new BlockFileContainer(path, BLOCK_SIZE); Container bufferContainer = new BufferedContainer(fileContainer, new LRUBuffer(BUFFER_SIZE)); // here we need to provide a converter for the nodes Container converterContainer = new ConverterContainer(bufferContainer, tree.nodeConverter()); // now we initialize tree // so we use standard overflow handler and simple prefix split for string keys // the split index is searched in the interval [0.4B , (1-0.4)B] B is in bytes tree.initialize(null, // <- null because we have no meta data null, // <- getKeyFunction, converterContainer, keyConverter, dataMeasuredConverter, StringSeparator.FACTORY_FUNCTION, StringKeyRange.FACTORY_FUNCTION, getKeySize, getDataSize, new SimplePrefixBPlusTreeSplit(), new StandardUnderflowHandler() ); System.out.println("insert data\n"); // load data tuple by tuple int k = 0; for(Integer i : stds){ tree.insert(new Student("name_" + i, i , "info_"+i)); k++; if (k % 5000 == 0) System.out.print("."); } System.out.println(); // flush containers before we can store meta info converterContainer.flush(); converterContainer.close(); // save tree state info // we need to store the id of the root entry its level and key // also we store KeyRange of the tree saveTree(tree, path+"_metadata.dat"); /** * 2. reload tree */ System.out.println("reload tree"); tree = new VariableLengthBPlusTree(BLOCK_SIZE, MIN_RATIO , false); // now reeds the blockfilecontainer its state fileContainer = new BlockFileContainer(path); bufferContainer = new BufferedContainer(fileContainer, new LRUBuffer(BUFFER_SIZE)); converterContainer = new ConverterContainer(bufferContainer, tree.nodeConverter()); // now we read state information about the root entry and DataInputStream in = new DataInputStream( new FileInputStream(new File(path+"_metadata.dat"))); // read and initialize Long id = LongConverter.DEFAULT_INSTANCE.read(in); int level = IntegerConverter.DEFAULT_INSTANCE.readInt(in); String key = StringConverter.DEFAULT_INSTANCE.read(in); String minKey = StringConverter.DEFAULT_INSTANCE.read(in); String maxKey = StringConverter.DEFAULT_INSTANCE.read(in); IndexEntry rootEntry = ((IndexEntry)tree.createIndexEntry(level)).initialize(id, new StringSeparator(key)); StringKeyRange rootDescriptor = new StringKeyRange(minKey, maxKey); in.close(); tree.initialize(rootEntry, // <- the restored rootEntry rootDescriptor, // <- restored key range getKeyFunction, converterContainer, // <- pass file container keyConverter, dataMeasuredConverter, StringSeparator.FACTORY_FUNCTION, StringKeyRange.FACTORY_FUNCTION, getKeySize, getDataSize, new SimplePrefixBPlusTreeSplit(), new StandardUnderflowHandler() ); // we can now remove some entries Student st = (Student)tree.remove(new Student("name_0", 0, "info_0")); System.out.println("Object deleted: " + st); // or we can update data tree.update(new Student("name_99", 99, "info_99"), new Student("name_99", -10, "info_123456789")); // /** * query tree */ // exact match query st = (Student)tree.exactMatchQuery("name_99"); System.out.println("query with key name_99 :" + st); // range query System.out.println("query with keys [name_100, name_2] :"); Cursor c = tree.rangeQuery("name_19", "name_2"); while(c.hasNext()){ Student student = (Student)c.next(); System.out.println(student); } // in order to get all students data // just call tree.query() method } }