/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.io;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import xxl.core.functions.Constant;
import xxl.core.functions.Function;
/**
* This class provides a buffer for buffering I/Os in order to increase
* performance.
*
* <p>It does not define any displacement strategy. When implementing a buffer
* by extending this class, the user only has to implement the victim method,
* that determines the <i>next</i> slot to displace in the buffer. Objects in
* the buffer are identified by their owner and an id that is unique at the
* owner. Additional, the buffer stored a map that contains the owners and
* their slots in this buffer. The owners are mapped to maps that contain the
* owner's slots. These slot maps identify the slots by their ids (for further
* detail see field {@link Slot#members members} in <code>Buffer.Slot</code>).
*
* <p>Important: every class used as identifyers has to implement the hashCode
* method of {@link java.lang.Object}.</p>
*
* <p>The buffer supports the access, update and removal of slots (the objects
* contained by the slots), that identified by their id and owner, and the the
* removal of all slot owned by a specified owner. Slots of the buffer can also
* be fixed (in order to avoid the removal of them) and unfixed. In order to
* guarantee highest flexibility, the way of flushing an object in the buffer
* must be implemented by a function. Therefore an owner can specify an own
* flush function for every object in the buffer.</p>
*
* <p>Objects can be inserted into the buffer by calling the fix or get method
* with an id that is new at the owner and a function, that returns the object
* to insert, when it is invoked with the specified id. The update method can
* also be used for inserting an object, when it is called with a new id and
* the object to insert and a flush function for it.</p>
*
* @param <O> the type of the objects specifing the owner of a buffer's slot.
* @param <I> the type of the identifiers used for identifing the buffer's
* slots.
* @param <E> the type of the elements stored by this buffer.
* @see Constant
* @see Function
* @see HashMap
* @see Iterator
* @see Map
* @see java.util.Map.Entry
*/
public abstract class Buffer<O, I, E> {
/**
* This class provides a single slot in a buffer.
*
* <p>The slots are indexed according to their position in the buffer
* beginning at index <code>0</code>. Every slot is able to contain a
* single object. In addition to the object itself, the slot stores an id
* for the object, the owner of the object and a map of all slots that are
* owned by the owner of the object. The id AND the owner of an object are
* stored in order to ease the identification of elements in the buffer.
* Therefore the id of an object must only be an unique id at the owner but
* not a global unique id. A slot can be fixed, so that its object can not
* be removed from it. In order to guarantee highest flexibility, the way
* of flushing a slot must be implemented by a function. Therefore an owner
* can specify an own flush function for every object in the buffer.</p>
*
* @see Function
* @see HashMap
* @see Map
*/
protected class Slot {
/**
* The index of the slot in the buffer. The slots are indexed according
* to their position in the buffer beginning at index <code>0</code>.
*/
protected int index;
/**
* A map that contains all slots that are owned by the owner of this
* slot. The slots are identified by their ids. Therefore the id must
* only be an unique id at the owner but not a global unique id. Every
* class of identifyers has to implement the hashCode method.
*/
protected Map<I, Slot> members;
/**
* An owner of this slot (the object contained by this slot).
*/
protected O owner;
/**
* An id that identifies this slot (the object contained by this slot).
*/
protected I id;
/**
* The object that is contained by this slot.
*/
protected E object = null;
/**
* A flag that determines whether this slot is fixed or not. The object
* contained by a fixed slot and the information belonging to it cannot
* be removed.
*/
protected boolean isFixed = false;
/**
* A function that implements the functionality of flushing a slot.
* When this slot should be flushed, the function is called* with its
* id and object. Therefore the function must implement all the
* functionality that is needed to flush a slot.
*/
protected Function<Object, ?> flush = null;
/**
* Number of bytes which are inside the slot.
*/
protected int sizeBytes = 0;
/**
* Constructs a new empty slot with the specified index. The new slot
* contains no object and is not fixed.
*
* @param index the index of the new slot.
*/
protected Slot(int index) {
this.index = index;
}
/**
* Returns the id of this slot.
*
* @return the id of this slot.
*/
protected I id() {
return id;
}
/**
* Returns the object that is contained by this slot.
*
* @return the object of this slot.
*/
protected E get() {
return object;
}
/**
* Returns whether this slot is dirty or not. In other words, returns
* <code>true</code> if <code>flush != null</code> (the
* object is not yet flushed), else returns <code>false</code>.
*
* @return <code>true</code> if this slot is dirty, else returns
* <code>false</code>.
*/
protected boolean isDirty() {
return flush!=null;
}
/**
* Returns whether this slot is fixed or not. In other words, returns
* <code>true</code> if <code>isFixed == true</code>, else
* returns <code>false</code>.
*
* @return <code>true</code> if this slot is fixed, else returns
* <code>false</code>.
*/
protected boolean isFixed() {
return isFixed;
}
/**
* Fixes this slot so that the object contained by it cannot be removed
* out of the buffer.
*/
protected void fix() {
if (!isFixed())
fixedSlots++;
isFixed = true;
}
/**
* Unfixes this slot so that the object contained by it can be removed
* out of the buffer.
*/
protected void unfix() {
if (isFixed())
fixedSlots--;
isFixed = false;
}
/**
* Flushes this slot by calling its flush function with its id and
* object. This implementation checks first whether the slot is dirty.
* When it is dirty, the flush function is called and set to
* <code>null</code>, i.e. the slot is not any longer dirty after
* calling this method.
*/
protected void flush() {
if (isDirty()) {
flush.invoke(id, object);
flush = null;
}
}
/**
* Updates the object and flush function of this slot. The object and
* flush function are replaced by the specified object and function,
* i.e. the slot is dirty after calling this method.
*
* @param object the new object of this slot.
* @param flush the new flush function of this slot.
*/
protected void update(E object, Function<Object, ?> flush) {
this.object = object;
this.flush = flush;
if (Buffer.this.capacityBytes != Integer.MAX_VALUE)
this.sizeBytes = ((SizeAware)object).getMemSize();
}
/**
* Inserts the specified object with the specified id and owner in this
* slot. Sets the object, id and owner of this slot to the specified
* objects and updates member map of this slot and the owner map of the
* buffer.
*
* @param owner the new owner of this slot.
* @param id the new id of this slot.
* @param object the new object of this slot.
*/
protected void insert(O owner, I id, E object) {
this.owner = owner;
this.id = id;
this.object = object;
if (Buffer.this.capacityBytes != Integer.MAX_VALUE)
this.sizeBytes = ((SizeAware)object).getMemSize();
if ((members = owners.get(owner)) == null)
owners.put(owner, members = new HashMap<I, Slot>());
members.put(id, this);
size++;
}
/**
* Removes the object and any information belonging to it from this
* slot so that it is empty thereafter. This implementation swaps this
* slot and the occupied slot with the highest index. Then their
* indices, this slot's member map and the owner map of the buffer are
* updated. At last, the attributes of this slot are reset.
*/
protected void remove() {
if (index < size) {
Slot slot = slots.get(--size);
slots.set(slot.index = index, slot);
slots.set(index = size, this);
if (members.containsKey(id)) {
members.remove(id);
if (members.isEmpty())
owners.remove(owner);
members = null;
}
owner = null;
if (isFixed())
fixedSlots--;
isFixed = false;
flush = null;
object = null;
}
}
/**
* Displaces this slot by flushing it and removing the object and any
* information belonging to it from it.
*/
protected void displace () {
flush();
remove();
}
}
/**
* The number of fixed slots in this buffer.
*/
protected int fixedSlots = 0;
/**
* The number of slots in this buffer that contain an object.
*/
protected int size = 0;
/**
* The number of bytes currently inside the buffer.
*/
protected int sizeBytes = 0;
/**
* The number of bytes which will be buffered given at most.
*/
protected int capacityBytes;
/**
* An array containing all the slots of this buffer.
*/
protected List<Slot> slots;
/**
* A map that contains the owners and their slots in this buffer. The
* owners are mapped to maps that contain the owner's slots. These slot
* maps identify the slots by their ids (for further detail see field
* {@link Slot#members members} in <code>Buffer.Slot</code>).
*/
protected Map<O, Map<I, Slot>> owners = new HashMap<O, Map<I, Slot>>();
/**
* Constructs a new empty buffer with a number of slots specified by the
* given capacity.
*
* @param capacity the number of slots in the new buffer.
* @param capacityBytes the capacity of the buffer in bytes. If this is
* > -1, then the buffered objects have to efficiently
* implement the interface SizeAware, so that the buffer can
* determine the correct number of bytes used.
*/
public Buffer(int capacity, int capacityBytes) {
this.capacityBytes = capacityBytes;
this.slots = new ArrayList<Slot>(capacity);
for (int i = 0; i < capacity; i++)
slots.add(newSlot(i));
}
/**
* Constructs a new empty buffer with a number of slots specified by the
* given capacity.
*
* @param capacity the number of slots in the new buffer.
*/
public Buffer(int capacity) {
this(capacity, Integer.MAX_VALUE);
}
/**
* Returns the <i>next</i> slot to displace in this buffer. This method is
* called every time a slot should be displaced and must implement the
* displacement strategy of this buffer.
*
* @return the <i>next</i> slot to displace in this buffer.
*/
protected abstract Slot victim();
/**
* Creates a new empty slot with the specified index. This factory method
* simply calls the constructor of <code>Slot</code>. Every subclass of
* <code>Buffer</code> that extends the inner class <code>Slot</code> must
* overwrite this method by defining the method
* <code><pre>
* protected Buffer.Slot newSlot(int index) {
* return new Slot(index);
* }
* </pre></code>
* This guarantees that every call of the newSlot method creates the
* correct corresponding <code>Slot</code> object of the subclass.
*
* @param index the index of the new slot.
* @return a new empty slot with the specified index.
*/
protected Slot newSlot(int index) {
return new Slot(index);
}
/**
* Returns the number of slots in this buffer that contain an object.
*
* @return the number of occupied slots in this buffer.
*/
public int size() {
return size;
}
/**
* Returns the capacity of this buffer.
*
* @return the maximal number of slots this buffer can contain.
*/
public int capacity() {
return slots.size();
}
/**
* Returns the number bytes used in this buffer.
*
* @return the number of occupied space in bytes in this buffer.
*/
public int bytesUsed() {
return sizeBytes;
}
/**
* Returns the slot with the given id owned by the specified owner. This
* implementation maps owner to the map that contains the owner's slots by
* using the map <code>owners</code>. Thereafter the slot map is used for
* mapping the id to the slot identified by the id. When there is no such
* slot <code>null</code> is returned.
*
* @param owner the owner of the slot to return.
* @param id the id of the slot to return.
* @return the slot with the given id owned by the specified owner or
* <code>null</code> if no such slot exists.
*/
protected Slot lookUp(O owner, I id) {
Map<I, Slot> members = owners.get(owner);
return members == null ? null : members.get(id);
}
/**
* Handles the situation iff the buffer contains more bytes than the
* capacity says. Some slots are removed in this case.
*/
protected final void handleSizeOverflow() {
// fix the size condition
// The biggest element has to fit into the buffer (alone)!
while (sizeBytes > capacityBytes) {
if (fixedSlots == size())
throw new IllegalStateException("Buffer overflow. Too many slots fixed.");
Slot vic = victim();
sizeBytes -= vic.sizeBytes;
vic.displace();
// checkBuffer();
}
}
/**
* Fixes the slot with the given id owned by the specified owner and
* returns it. When no such slot exists, a new object is created by calling
* the given function obtain with the specified id and this object is
* inserted into the buffer. When the buffer overflows (it is full and all
* slots are fixed), an <code>IllegalStateException</code> will be thrown.
* Otherwise the <i>next</i> slot to displace will be determined by calling
* the victim method and its object will be replaced by the new object.
*
* @param owner the owner of the slot to fix.
* @param id the id of the slot to fix.
* @param obtain a function for getting the object, when there is no slot
* the given id owned by the specified owner.
* @return the fixed slot with the given id owned by the specified owner.
* @throws IllegalStateException when the buffer overflows.
*/
protected Slot fix(O owner, I id, Function<? super I, ? extends E> obtain) throws IllegalStateException {
Slot slot = lookUp(owner, id);
if (slot == null) {
if (fixedSlots == slots.size())
throw new IllegalStateException("Buffer overflow. Too many slots fixed.");
// Make space for one new object
if (size() == slots.size()) {
Slot vic = victim();
sizeBytes -= vic.sizeBytes;
vic.displace();
}
// checkBuffer();
// insert the object
(slot = slots.get(size())).insert(owner, id, obtain.invoke(id));
slot.fix();
sizeBytes += slot.sizeBytes;
// checkBuffer();
handleSizeOverflow();
}
else
slot.fix();
// checkBuffer();
return slot;
}
/**
* Unfixes the slot with the given id owned by the specified owner. The
* desired slot is determined by calling the lookUp method. When such a
* slot exists, its unfix method is called.
*
* @param owner the owner of the slot to unfix.
* @param id the id of the slot to unfix.
*/
public void unfix(O owner, I id) {
Slot slot = lookUp(owner, id);
if (slot != null)
slot.unfix();
}
/**
* Returns whether this buffer contains a slot with the given id owned by
* the specified owner. In other words, returns <code>true</code> if
* <code>lookUp(owner,id)&bnsp;!=&bnsp;null</code>, else returns
* <code>false</code>.
*
* @param owner the owner of the desired slot.
* @param id the id of the desired slot.
* @return <code>true</code> if this buffer contains a slot with the given
* id owned by the specified owner, else returns
* <code>false</code>.
*/
public boolean contains(O owner, I id) {
return lookUp(owner, id) != null;
}
/**
* Returns whether the slot with the given id owned by the specified owner
* is fixed or not. This implementation checks whether such a slot exists
* and, when it exists, whether it is fixed.
*
* @param owner the owner of the desired slot.
* @param id the id of the desired slot.
* @return <code>true</code> if the slot with the given id owned by the
* specified owner is fixed, else returns <code>false</code>.
*/
public boolean isFixed(O owner, I id) {
Slot slot = lookUp(owner, id);
return slot != null && slot.isFixed();
}
/**
* Flushes the slot with the given id owned by the specified owner. This
* implementation checks whether such a slot exists and flushes it, when it
* exists.
*
* @param owner the owner of the slot to flush.
* @param id the id of the slot to flush.
*/
public void flush(O owner, I id) {
Slot slot = lookUp(owner, id);
if (slot != null)
slot.flush();
}
/**
* Flushes all slots in this buffer that are owned by the specified owner.
* This implementation gets all slot owned by the specified owner by using
* the map <code>owners</code> and flushes these slot thereafter.
*
* @param owner the owner of the slots to flush.
*/
public void flushAll(O owner) {
Map<I, Slot> members = owners.get(owner);
if (members != null)
for (Slot slot : members.values())
slot.flush();
}
/**
* Returns the object contained by the slot with the given id owned by the
* specified owner. When no such slot exists, a new object is created by
* calling the given function obtain with the specified id and this object
* is inserted into the buffer. When the buffer overflows (it is full and
* all slots are fixed), an <code>IllegalStateException</code> will be
* thrown. Otherwise the <i>next</i> slot to displace will be determined by
* calling the victim method and its object will be replaced by the new
* object. When <code>unfix == true</code> the slot containing
* the desired object is unfixed at last.
*
* <p>This implementation fixes the desired slot by calling this buffer's
* fix method with the specified owner, id and obtain function and calls
* the slot's get method thereafter. When
* <code>unfix == true</code>, its unfix method is called at
* last.</p>
*
* @param owner the owner of the slot containing the object to get.
* @param id the id of the slot containing the object to get.
* @param obtain a function for creating a new object, when there is no
* slot the the given id owned by the specified owner.
* @param unfix a flag that determines whether the desired slot should be
* unfixed after getting its object or not.
* @return the object contained by the slot with the given id owned by the
* specified owner.
* @throws IllegalStateException when the buffer overflows.
*/
public E get(O owner, I id, Function<? super I, ? extends E> obtain, boolean unfix) throws IllegalStateException {
Slot slot = fix(owner, id, obtain);
E object = slot.get();
if (unfix)
slot.unfix();
// checkBuffer();
return object;
}
/**
* Updates the slot with the given id owned by the specified owner with the
* specifed object and flush function. When no such slot exists, the given
* object is inserted into the buffer. When the buffer overflows (it is
* full and all slots are fixed), an <code>IllegalStateException</code>
* will be thrown. Otherwise the <i>next</i> slot to displace will be
* determined by calling the victim method and its object and flush
* function will be replaced by the given object and flush function. When
* <code>unfix == true</code> the slot containing the desired
* object is unfixed at last.
*
* <p>This implementation fixes the desired slot by calling this buffer's
* fix method with the specified owner, id and a constant function that
* always returns the given object, when it is invoked. Thereafter the
* slot's update method is called with the specified object and flush
* function. When <code>unfix == true</code>, its unfix method is
* called at last.</p>
*
* @param owner the owner of the slot to update.
* @param id the id of the slot to update.
* @param object the object that replaces the object contained by the
* desired slot.
* @param flush the function that replaces the flush function of the
* desired slot.
* @param unfix a flag that determines whether the desired slot should be
* unfixed after updating it or not.
* @throws IllegalStateException when the buffer overflows.
*/
public void update(O owner, I id, E object, Function<Object, ?> flush, boolean unfix) throws IllegalStateException {
// checkBuffer();
Slot slot = fix(owner, id, new Constant<E>(object));
// remark: The object must not already be inside the slot,
// but it can be!
if (capacityBytes < Integer.MAX_VALUE) {
sizeBytes -= slot.sizeBytes;
sizeBytes += ((SizeAware)object).getMemSize();
}
slot.update(object, flush);
handleSizeOverflow();
// checkBuffer();
if (unfix)
slot.unfix();
}
/**
* Removes the object and any information belonging to it from the slot
* with the given id owned by the specified owner. This implementation
* checks whether such a slot exists and calls its remove method, when it
* exists.
*
* @param owner the owner of the slot to remove.
* @param id the id of the slot to remove.
*/
public void remove(O owner, I id) {
Slot slot = lookUp(owner, id);
if (slot != null) {
sizeBytes -= slot.sizeBytes;
slot.remove();
// checkBuffer();
}
}
/**
* Removes the objects and any information belonging to them from all slots
* in this buffer that are owned by the specified owner. This
* implementation gets all slot owned by the specified owner by using the
* map <code>owners</code> and calls the remove methods of these slots
* thereafter.
*
* @param owner the owner of the slots to remove.
*/
public void removeAll(O owner) {
Map<I, Slot> members = owners.get(owner);
if (members != null)
for (Iterator<Entry<I, Slot>> entries = members.entrySet().iterator(); entries.hasNext();) {
Slot slot = entries.next().getValue();
sizeBytes -= slot.sizeBytes;
entries.remove();
slot.remove();
}
}
/**
* Checks wheather aggregated values are still correct inside the
* structures of the buffer (sizes).
*/
public void checkBuffer() {
int currentSize = 0;
for (Map<I, Slot> map : owners.values())
for (Slot slot : map.values())
currentSize += slot.sizeBytes;
if (currentSize != this.sizeBytes)
throw new RuntimeException("The size was not counted correctly (" + currentSize + " instead " + this.sizeBytes + ")");
}
/**
* Returns the number of fixed slots in this buffer.
*/
public int fixedSlots() {
return fixedSlots;
}
}