/******************************************************************************* * Copyright (c) 2011 Arapiki Solutions Inc. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * "Peter Smith <psmith@arapiki.com>" - initial API and * implementation and/or initial documentation *******************************************************************************/ package com.buildml.utils.types; import java.util.Iterator; import java.util.NoSuchElementException; import com.buildml.utils.errors.ErrorCode; import com.buildml.utils.errors.FatalError; /** * This is an abstract class for implementing a tree-like set of integer keys, and their * respective values. That is, the elements in the set are arranged in a tree structure * (with parents and children), although an element may or may not exist in the set. The * main use of this data structure is to select sub-sets of an overall tree structure. * <p> * Each entry is keyed by an integer, and is either in the set or not in the set. * <p> * This class is sub-classed by FileSet and ActionSet, both of which are Integer-based sets. * * @author "Peter Smith <psmith@arapiki.com>" */ public abstract class IntegerTreeSet implements Iterable<Integer>, Cloneable { /*=====================================================================================* * TYPES/FIELDS *=====================================================================================*/ /* * In order to store a bitmap of entries, we use a bucket-like system that's similar * to how Unix file systems work. That is, we keep a top-level "bucket array" where * each entry points to a second-level bitmap. If one of the bucket array's pointers * is null, then that portion of the set is considered to be completely empty. This * permits the allocation of a small amount of memory for small sets, and a larger amount * for larger sets. It also allows the creation of a sparse bit-map, which is necessary * to represents sets resulting from a database query (where the query returns a small * number of sparsely distributed results). */ /** * The number of bits represented in each bucket (must be a multiple of 8). Also, * getMaxIdNumber() must be evenly divisible by BUCKET_SIZE. */ private static final int BUCKET_SIZE = 2048; /** The initial number of buckets that a newly-created set will start with. */ private static final int INITIAL_NUM_BUCKETS = 1; /** * When the bucket array needs to grow, we'll increase it by this many new buckets. * Note that each time we grow the bucket array, we also increase this value. Therefore, * our first increase will add 1 new bucket, the second increase will add 2, the third * increase will add 3 new buckets, etc. Therefore, for small sets we'll only add * small increments, but for large sets we'll pre-allocate a larger number of new * buckets. */ private int currentBucketIncrease = 1; /** * Each entry in the bucket array has the following content. It tracks the set * membership for BUCKET_SI */ private class IntegerTreeSetBucket { /** the number of bits set in this bucket */ public int size; /** the actual bit map of set members (numbered 0 -> BUCKET_SIZE - 1) */ public byte content[]; /** create a new bucket, with a newly allocated bitmap */ public IntegerTreeSetBucket() { size = 0; content = new byte[BUCKET_SIZE]; } } /** * The top-level array of buckets. This start out having INITIAL_NUM_BUCKETS members, * and can grow to a maximum of (getMaxIdNumber() / BUCKET_SIZE) entries. */ private IntegerTreeSetBucket bucketArray[] = null; /** * The current size of bucketArray[]. */ private int currentBucketArraySize; /** * The current number of members in this set. */ private int totalMembers; /*=====================================================================================* * NESTED CLASS - IntegerTreeSetIterator *=====================================================================================*/ /** * An Iterator for traversing the content of an IntegerTreeSet. */ private class IntegerTreeSetIterator implements Iterator<Integer> { /** * The most recent element of the TreeSet that we reported to the user. */ private int currentId = -1; /** * Have we identified a "next" element to report when the user calls next()? */ private boolean nextAvailable = false; /*---------------------------------------------------------------------------------*/ /** * Search through the set to find the next element that's set. */ private void searchForNext() { int id = currentId + 1; while (true) { int idBucket = (id / BUCKET_SIZE); /* reached the end of whole bucketArray? */ if (idBucket == currentBucketArraySize) { currentId = -1; return; } /* current bucket is empty, skip to the start of the next */ IntegerTreeSetBucket bucket = bucketArray[idBucket]; if (bucket == null) { id = (id + BUCKET_SIZE) & ~(BUCKET_SIZE - 1); } /* * We know there's something in this bucket. Look forward * (beyond the current point), in case we find it (we may not, * if we already reported it). */ else { int bucketOffset = (id & (BUCKET_SIZE - 1)) >> 3; for (int i = bucketOffset; i != (BUCKET_SIZE >> 3); i++){ /* current byte is empty, skip to the start of the next */ if (bucket.content[i] == 0) { id = (id + 8) & ~7; } else { /* check the individual bits in the byte */ int bucketBit = id & 7; byte bitMap = bucket.content[i]; for (int j = bucketBit; j != 8; j++){ if ((bitMap & (1 << bucketBit)) != 0) { currentId = id; return; } else { bucketBit++; id++; } } } } } } } /*---------------------------------------------------------------------------------*/ @Override public boolean hasNext() { if (!nextAvailable){ searchForNext(); nextAvailable = true; } return (currentId != -1); } /*---------------------------------------------------------------------------------*/ @Override public Integer next() { if (!hasNext()) { throw new NoSuchElementException(); } nextAvailable = false; return currentId; } /*---------------------------------------------------------------------------------*/ @Override public void remove() { throw new UnsupportedOperationException(); } /*---------------------------------------------------------------------------------*/ }; /*=====================================================================================* * CONSTRUCTORS *=====================================================================================*/ /** * Create a new IntegerTreeSet object. */ public IntegerTreeSet() { /* create an initial bucket array, with nothing in it */ currentBucketArraySize = INITIAL_NUM_BUCKETS; bucketArray = new IntegerTreeSetBucket[currentBucketArraySize]; totalMembers = 0; } /*-------------------------------------------------------------------------------------*/ /** * Create a new IntegerTreeSet and initialize it from an array of integer values. * * @param initValues The initial values to be added to the set. */ public IntegerTreeSet(Integer[] initValues) { this(); for (int i = 0; i < initValues.length; i++) { add(initValues[i]); } } /*=====================================================================================* * PUBLIC METHODS *=====================================================================================*/ /** * Add a new member into the IntegerTreeSet. * * @param id The ID to be added to the set. If the ID already exists in the set, the * result set will be unchanged. */ public void add(int id) { /* * There are several cases to consider here: * 1) Is the 'id' within the range of 0 -> getMaxIdNumber()? If not, throw * an exception. This is a programming error. * 2) Is the 'id' within the range of 0 -> (currentBucketArraySize * BUCKET_SIZE) - 1? * If not, we'll need to grow the bucketArray to allow for this new element. * 3) Is the 'id' within a bucket that currently exists, or is the bucketArray * pointer currently null? * 4) If the bucket already exists, is the bit already set? */ /* case 1 - ID completely out of range - programming error */ if ((id < 0) || (id >= getMaxIdNumber())) { throw new FatalError( "New entry to set: " + id + " is beyond maximum allowed value: " + getMaxIdNumber()); } /* case 2 - grow the bucket array to contain the new ID. */ int idBucket = (id / BUCKET_SIZE); if (idBucket >= currentBucketArraySize) { growBucketArray(idBucket); } /* case 3 - if the bucket for this 'id' is null */ if (bucketArray[idBucket] == null) { bucketArray[idBucket] = new IntegerTreeSetBucket(); } /* case 4 - is the corresponding bit already set? */ IntegerTreeSetBucket bucket = bucketArray[idBucket]; int bucketOffset = (id & (BUCKET_SIZE - 1)) >> 3; int bucketBit = (id & 0x7); if ((bucket.content[bucketOffset] & (1 << bucketBit)) != 0) { /* bit already set - do nothing */ } /* set the bit, and update the bucket's size */ else { bucket.content[bucketOffset] |= (1 << bucketBit); bucket.size++; totalMembers++; } } /*-------------------------------------------------------------------------------------*/ /** * Test whether a particular element is in the set. * * @param id The ID of the element we're testing for. * @return True or False to indicate the members presence. */ public boolean isMember(int id) { /* * Cases to consider: * 1) Is the 'id' beyond the end of the bucketArray? Return false. * 2) Is the 'id' in a bucket that has a null pointer? Return false. * 3) Is the 'id' present in the bucket's bitmap? */ /* case 1 */ if ((id < 0) || (id >= (currentBucketArraySize * BUCKET_SIZE))) { return false; } /* case 2 */ int idBucket = (id / BUCKET_SIZE); if (bucketArray[idBucket] == null) { return false; } IntegerTreeSetBucket bucket = bucketArray[idBucket]; int bucketOffset = (id & (BUCKET_SIZE - 1)) >> 3; int bucketBit = (id & 0x7); return (bucket.content[bucketOffset] & (1 << bucketBit)) != 0; } /*-------------------------------------------------------------------------------------*/ /** * Remove the specified element from the set. If the record isn't * in the set, the set is left unchanged. * * @param id The ID of the element we're removing. */ public void remove(int id) { /* * Cases to consider: * 1) Is the 'id' beyond the end of the bucketArray? Do nothing. * 2) Is the 'id' in a bucket that has a null pointer? Do nothing. * 3) Is the 'id' bit in the bucket's bit map set? If not, do nothing. * 4) Remove the bit from the set, decrement the size, and possibly * remove the bucket (setting it to null in the array) if size is now 0. */ /* case 1 */ if ((id < 0) || (id >= (currentBucketArraySize * BUCKET_SIZE))) { return; } /* case 2 */ int idBucket = (id / BUCKET_SIZE); if (bucketArray[idBucket] == null) { return; } /* case 3 */ IntegerTreeSetBucket bucket = bucketArray[idBucket]; int bucketOffset = (id & (BUCKET_SIZE -1 )) >> 3; int bucketBit = (id & 0x7); if ((bucket.content[bucketOffset] & (1 << bucketBit)) == 0) { return; } /* case 4 - remove the element, decrement the size */ bucket.content[bucketOffset] &= ~(1 << bucketBit); bucket.size--; totalMembers--; if (bucket.size == 0) { bucketArray[idBucket] = null; } } /*-------------------------------------------------------------------------------------*/ /** * Return the number of members in the set. * * @return the number of members in the set. */ public int size() { return totalMembers; } /*-------------------------------------------------------------------------------------*/ /** * An iterator for traversing the keys in the set. * @return An iterator for traversing the keys in the set. The order the keys are visited * is not specified. */ @Override public Iterator<Integer> iterator() { return new IntegerTreeSetIterator(); } /*-------------------------------------------------------------------------------------*/ /** * Implement the standard Object clone() method for IntegerTreeSet, but perform a deep * copy, rather than a shallow copy. */ public Object clone() throws CloneNotSupportedException { /* retrieve the new object */ IntegerTreeSet newSet = (IntegerTreeSet)super.clone(); /* clone the top-level fields */ newSet.currentBucketArraySize = this.currentBucketArraySize; newSet.currentBucketIncrease = this.currentBucketIncrease; newSet.totalMembers = this.totalMembers; newSet.bucketArray = new IntegerTreeSetBucket[newSet.currentBucketArraySize]; /* clone each element of the bucket array */ for (int i = 0; i != newSet.currentBucketArraySize; i++) { IntegerTreeSetBucket oldBucket = this.bucketArray[i]; if (oldBucket != null) { IntegerTreeSetBucket newBucket = new IntegerTreeSetBucket(); newSet.bucketArray[i] = newBucket; newBucket.size = oldBucket.size; newBucket.content = oldBucket.content.clone(); } } return newSet; } /*-------------------------------------------------------------------------------------*/ /** * Abstract method for fetching the parent of a particular element. This method * must be overridden by sub-classes that actually know what the parent-child relations * should be. * * @param id The ID of the element we want to find the parent of. * @return The ID of this element's parent, or ErrorCode.NOT_FOUND. */ public abstract int getParent(int id); /*-------------------------------------------------------------------------------------*/ /** * Abstract method for determining whether a particular element is valid (exists in * the database and isn't trashed). * * @param id The ID of the element to determine the valid status of. * @return True if the ID is valid, else false. */ public abstract boolean isValid(int id); /*-------------------------------------------------------------------------------------*/ /** * Abstract method for fetching the array of children of a particular element. This * method must be overridden by sub-classes that actually know what the parent-child * relations should be. * @param id The ID of the element we want to find the children of. * @return An Integer[] of IDs of this element's children. If there are no children, * return Integer[0]. */ public abstract Integer[] getChildren(int id); /*-------------------------------------------------------------------------------------*/ /** * For all the IDs already present in the set, ensure that each of it's * parent IDs are also in the set. This is useful for when displaying the * report in the full tree hierarchy, in which case we must also know which parent * elements are to be shown. For example, in the case of FileSet, if "/a/b/c.c" is in * the set, then "/a/b", "/a" and "/" will also be added. */ public void populateWithParents() { /* * Fetch the list of IDs already in the IntegerTreeSet. We'll be modifying * ourselves, so we first need to make a copy of ourselves, as a stable * base for repetition. */ IntegerTreeSet copy; try { copy = (IntegerTreeSet)this.clone(); } catch (CloneNotSupportedException e) { throw new FatalError("clone() not support for IntegerTreeSet."); } /* * For each ID, add all of its parent Is, all the way up to the root. However, * if any of this ID's ancestors have already been added, there's no need to * add it again. */ for (Integer elementId : copy) { if (!isValid(elementId)) { continue; } int parentId; while (true) { /* * Get the parent of this ID - note that the parent of the root element * must be itself (id.getParent() == id), which terminates the loop. */ parentId = getParent(elementId); /* if the parent wasn't already added, insert a new IntegerTreeRecord */ if (!isMember(parentId)){ add(Integer.valueOf(parentId)); elementId = parentId; } /* else, quit the loop */ else { break; } }; } } /*-------------------------------------------------------------------------------------*/ /** * Given a second set, mask off any files from this set that * don't appear in the second set. This is essentially a bitwise "and". * * @param mask The second set that acts as a mask value. */ public void maskSet(IntegerTreeSet mask) { // TODO: implement this if ever needed } /*-------------------------------------------------------------------------------------*/ /** * Given a second set, remove any files from this set that appear in the second set. * * @param second The second set containing the values to be removed. */ public void extractSet(IntegerTreeSet second) { /* for each element in the second IntegerTreeSet */ for (Iterator<Integer> iterator = second.iterator(); iterator.hasNext();) { Integer elementId = (Integer) iterator.next(); /* if it's currently in "this" IntegerTreeSet, remove it */ if (isMember(elementId) && second.isMember(elementId)) { remove(elementId); } } } /*-------------------------------------------------------------------------------------*/ /** * Given a second set, merge all the files from that second set into this set. This is * essentially a bitwise "or". If a particular path is already present in "this" set, * we won't override it with the IntegerTreeRecord from "second" (this fact is only interesting if * you care about the content of the IntegerTreeRecord). * * @param second The second set to merge into this set */ public void mergeSet(IntegerTreeSet second) { /* for each element in the second IntegerTreeSet */ for (Iterator<Integer> iterator = second.iterator(); iterator.hasNext();) { Integer elementId = (Integer) iterator.next(); /* if it's not already in "this" IntegerTreeSet, add it */ if (!isMember(elementId) && second.isMember(elementId)) { add(elementId); } } } /*-------------------------------------------------------------------------------------*/ /** * Add all the elements in the sub-tree which is rooted at "id". If necessary, the parents * of "id" will also be added so that all newly added elements are reachable from * the root. * * @param id The tree element whose sub-tree should be added to the set. */ public void addSubTree(int id) { /* first, add this path and all it's descendants. This is done recursively */ addSubTreeHelper(id); /* now progress upwards, ensuring that all parents are added too */ while (true) { int parentId = getParent(id); /* stop on error, or if we hit the root (/) */ if ((parentId == ErrorCode.NOT_FOUND) || (parentId == id)) { break; } add(parentId); id = parentId; } } /*-------------------------------------------------------------------------------------*/ /** * Remove all the element in the sub-tree which is rooted at "id". * @param id The tree element whose sub-tree should be removed from the set. */ public void removeSubTree(int id) { remove(id); Integer children[] = getChildren(id); for (int i = 0; i < children.length; i++) { removeSubTree(children[i]); } } /*=====================================================================================* * PROTECTED METHODS *=====================================================================================*/ /** * Returns the maximum allowable ID number for this set. Each child class must * implement this method to return the appropriate maximum for their purpose. * * @return The maximum allowable ID number for this set. */ protected abstract int getMaxIdNumber(); /*=====================================================================================* * PRIVATE METHODS *=====================================================================================*/ /** * A helper method for addSubTree() that traverses downwards through the children * of the given path. * @param id The path whose sub-tree should be added to the set. */ private void addSubTreeHelper(int id) { add(id); Integer children [] = getChildren(id); for (int i = 0; i < children.length; i++) { addSubTreeHelper(children[i]); } } /*-------------------------------------------------------------------------------------*/ /** * Grow the current bucket array so that it's large enough to contain the specified * ID number. In fact, we may grow by more than the minimum necessary size, to make * sure we have room for growth. We already know that 'id' is less than our absolute * maximum value, so that doesn't need to be checked twice. * * @param idBucket The new bucket number that must be accommodated in the bucket array. */ private void growBucketArray(int idBucket) { int newBucketArraySize = idBucket + currentBucketIncrease; /* but never grow larger than the maximum bucket size allows */ if (newBucketArraySize > (getMaxIdNumber() / BUCKET_SIZE)) { newBucketArraySize = getMaxIdNumber() / BUCKET_SIZE; } /* allocate a new bucketArray, copy over the old content, then discard the old array */ IntegerTreeSetBucket newArray[] = new IntegerTreeSetBucket[newBucketArraySize]; System.arraycopy(bucketArray, 0, newArray, 0, currentBucketArraySize); bucketArray = newArray; currentBucketArraySize = newBucketArraySize; /* the more often we grow the array, the larger we should grow each time */ currentBucketIncrease++; } /*-------------------------------------------------------------------------------------*/ }