/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1429 $ by $Author: glycoslave $ on $Date:: 2009-07-05 #$ */ package org.eurocarbdb.util; import java.math.BigInteger; import java.io.Serializable; import java.util.Set; import java.util.Arrays; import java.util.Iterator; import java.util.Collection; import java.util.RandomAccess; import java.util.ArrayList; /** *<p> * Implementation of {@link Set} interface for {@link Integer}s backed by * a bitset and optimised for speed and smallest memory consumption. * This class supports both Set-oriented as well as bitmask/bitstring-oriented * usages. Note that while working with bitstrings, that the 0th index is on * the right, and that iteration over a BitSet effectively traverses from * right to left (ie: lowest to highest index). Lastyle, BitSets silently * grow as needed to accomodate large indexes. {@link #iterator Regular iteration} * over the Set of integers (whose indexes are set to true), and removal of values * during iteration is also supported. *</p> *<p> * Unlike {@link java.lang.BitSet}, this class is extremely memory efficient, * consuming memory only for the actual size of the bitset in bits + 2. * While bitsets are resized on demand, the mephasis on compactness makes * it very worthwhile to pre-size a bitset to or near its expected final * size prior to use, via {@link #ensureCapacity} or the {@link #BitSet(int)} * constructor. *</p> *<p> * This class is slightly slower than {@link java.lang.BitSet} but uses less * memory in all cases. Compared to a {@link java.util.HashSet} of integers, * this class is much faster and consumes much less memory for all usages * (see {@link test.eurocarbdb.util.BitSetTest} for benchmarks if you're * interested). *</p> * * @see java.util.BitSet * @see java.util.Set * @author mjh */ public class BitSet implements Set<Integer>, Iterable<Integer>, RandomAccess, Cloneable, Serializable { /** the bitset, resized as needed. */ private boolean[] bits = null; /** if true, any operations that require resizing of this bitset * will throw exceptions. */ private boolean fixedSize = false; /** if true, prevents any/all size or value changes to the BitSet */ private boolean unmodifiable = false; //~~~~~~~~~~~~~~~~~~~~~~~~ CONSTRUCTORS ~~~~~~~~~~~~~~~~~~~~~~~~~ /** Creates a bitset of zero size. */ public BitSet() { this( 0 ); } /** Creates a bitset of the given size. */ public BitSet( int size ) { bits = new boolean[ size ]; } // /** Creates a bitset of the given size that will never be resized. */ // public BitSet( int size, boolean fixedSize ) // { // this( size ); // this.fixedSize = fixedSize; // } /** Creates a new bitset initialised (copied) from the passed bit array. */ public BitSet( boolean[] bits ) { // this.bits = Arrays.copyOf( bits, bits.length ); boolean[] newbits = new boolean[ bits.length ]; System.arraycopy( bits, 0, newbits, 0, bits.length ); this.bits = newbits; } /** * Creates a new bitset initialised (copied) from the passed bit array * from the given 'from' index (inclusive), up to the given 'to' index * (exclusive). The {@link #length()} of the bitset is thus: * <code>to_index - from_index</code>. */ public BitSet( boolean[] bits, int from_index, int to_index ) { int length = to_index - from_index; if ( length < 0 ) throw new IllegalArgumentException( "to_index must be greater than from_index"); boolean[] newbits = new boolean[ length ]; System.arraycopy( bits, from_index, newbits, 0, length ); this.bits = newbits; } /** Used internally to create a null bitset */ private BitSet( String dummy ) {} //~~~~~~~~~~~~~~~~~~~~~~~~ STATIC METHODS ~~~~~~~~~~~~~~~~~~~~~~~ /** * Creates a bitset initialised to the given number converted to binary. * Eg: *<pre> * BitSet.forNumber( 7 ).toBitString() == "111" * BitSet.forNumber( 8 ).toBitString() == "1000" * BitSet.forNumber( 19 ).toBitString() == "10011" *</pre> */ public static BitSet forNumber( Number n ) { String bitstring; if ( n instanceof BigInteger ) { bitstring = ((BigInteger) n).toString( 2 ); } else { bitstring = Long.toBinaryString( n.longValue() ); } return forString( bitstring ); } /** * Creates a bitset initialised to the given bitstring * @throws IllegalArgumentException if the given String contains * anything but ones and zeroes. */ public static BitSet forString( String bitstring ) { BitSet b = new BitSet( bitstring.length() ); char[] chars = bitstring.toCharArray(); int i = 0; for ( int j = chars.length - 1; j >= 0; j-- ) { switch ( chars[j] ) { case '1': b.bits[i++] = true; break; case '0': b.bits[i++] = false; break; default: throw new IllegalArgumentException( "invalid char '" + chars[j] + "' for bitset string at index=" + j + ", only 1's and 0's allowed" ); } } return b; } /** Creates a bitset consisting of the given integers set to true. */ public static BitSet of( int... ints ) { BitSet b = new BitSet( "dummy" ); b.addAll( ints ); return b; } //~~~~~~~~~~~~~~~~~~~~~~~~~~~ METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /** Adds the given integer to the set (ie: sets that bit to true). */ public boolean add( Integer i ) { ensureCapacity( i + 1 ); if ( bits[i] ) return false; bits[i] = true; return true; } /** * Adds the given collection of integers to the set (ie: sets each of * the integer positions to true). */ public boolean addAll( Collection<? extends Integer> indexes ) { int highest = 0; for ( Integer i : indexes ) { if ( i.compareTo( highest ) > 0 ) highest = i; } ensureCapacity( highest + 1 ); boolean changed = false; for ( int i : indexes ) { if ( ! bits[i] ) { bits[i] = true; changed |= true; } } return changed; } /** * Adds the given list of integers to the set (ie: sets each of * the integer positions to true). */ public void addAll( int... ints ) { int highest = 0; for ( int i : ints ) { if ( i > highest ) highest = i; } ensureCapacity( highest + 1 ); for ( int i : ints ) bits[i] = true; } /** * Appends the given BitSets to this BitSet end-to-end, as if * each argument given were serialised to a bitstring and appended * in reverse order. * eg: *<pre> * BitSet b1 = BitSet.forString("1111"); * BitSet b2 = BitSet.forString("000"); * BitSet b3 = BitSet.forString("11"); * * b1.append( b2, b3 ); * b1.toBitString(); * * // returns "110001111" *</pre> * Note that zeroes in high bits (ie: left padding * with zeroes in the string example given) are preserved. */ public void append( BitSet... bitsets ) { int size = this.bits.length; for ( BitSet b : bitsets ) size += b.bits.length; int i = this.bits.length; ensureCapacity( size ); for ( BitSet b : bitsets ) { // System.out.println("appending " + b.toBitString() + " to " + this.toBitString() + ": " + size ); System.arraycopy( b.bits, 0, this.bits, i, b.bits.length ); i += b.bits.length; } } /** * Returns the allocated length of this bitset; note this is not * the same thing as {@link #size()}. capacity() and size() will only * be equal if all bits in this bitset are set to true. * * @see #length() */ public int capacity() { return bits.length; } /** Empties this set (sets all bits to false). */ public void clear() { Arrays.fill( bits, false ); } /** Sets the given bit index to false, removing it from the set. */ public void clear( int i ) { bits[i] = false; } /** * Sets the bits from from_index (inclusive) to to_index (exclusive) to false. * The number of bits cleared is thus: <code>to_index - from_index</code>. */ public void clear( int from_index, int to_index ) { for ( int i = from_index; i < to_index; i++ ) bits[i] = false; } /** Clones this set. */ public BitSet clone() { BitSet b = new BitSet( bits.length ); System.arraycopy( bits, 0, b.bits, 0, bits.length ); b.fixedSize = this.fixedSize; return b; } /** * This method throws a {@link ClassCastException} unless the * passed Object is a {@link Number} */ public boolean contains( Object x ) { return contains( ((Number) x).intValue() ); } /** Returns true if the passed integer index is true in this bitset. */ public boolean contains( Integer i ) { return ( i < bits.length ) ? bits[i] : false; } /** * This method throws a {@link ClassCastException} unless the * passed Collection contains only {@link Number}s. */ public boolean containsAll( Collection<?> indexes ) { int i; for ( Object x : indexes ) { i = ((Number) x).intValue(); if ( i >= bits.length || ! bits[i] ) return false; } return true; } /** * Returns true if all set bits in the current bitset * are also set to true in the given bitset; The allocated * length of each bitset is *not* considered. */ public boolean equals( BitSet b ) { if ( this == b ) return true; if ( bits.length == b.bits.length ) { return Arrays.equals( this.bits, b.bits ); } else { boolean[] shorter = bits, longer = b.bits; if ( bits.length > b.bits.length ) { shorter = b.bits; longer = bits; } for ( int i = 0; i < shorter.length; i++ ) if ( shorter[i] != longer[i] ) return false; for ( int i = shorter.length; i < longer.length; i++ ) if ( longer[i] ) return false; return true; } } /** Lengthens this BitSet to accomodate at least the size given. */ public void ensureCapacity( int size ) { if ( bits == null ) { bits = new boolean[size]; return; } if ( size != 0 && size <= bits.length ) return; // if ( fixedSize || unmodifiable ) if ( fixedSize ) throw new UnsupportedOperationException( "Cannot modify allocated size of fixed size BitSet"); boolean[] newbits = new boolean[size]; System.arraycopy( bits, 0, newbits, 0, bits.length ); bits = newbits; } /** Sets the value of the bit at the given index to its complement. */ public void flip( int i ) { ensureCapacity( i + 1 ); bits[i] ^= true; } /** * Returns the value of the bit at the given position. * @throws ArrayIndexOutOfBoundsException if given index is * negative, or equal to/greater than {@link #length()}. */ public boolean get( int i ) throws ArrayIndexOutOfBoundsException { return bits[i]; } /** * Returns true if none of the bits in this bitset are set * (ie: it is an empty set). */ public boolean isEmpty() { for ( boolean b : bits ) if ( b ) return false; return true; } /** * Returns true if all of the bits in this bitset are set. */ public boolean isFull() { for ( boolean b : bits ) if ( ! b ) return false; return true; } /** * Returns an iterator over all integers in this bitset whose position * in the bitset is set to true, in sorted (ascending) order (lowest * to highest index). Calling {@link Iterator.remove} sets the bit value * of the current index to false. */ public Iterator<Integer> iterator() { return new Iterator<Integer>() { int i = 0; public boolean hasNext() { while ( i < bits.length ) { if ( bits[i] ) return true; i++; } return false; } public Integer next() { while ( i < bits.length ) { if ( bits[i] ) { int j = i; i++; return j; } i++; } throw new java.util.NoSuchElementException( "hasNext not called or next called twice in a row"); } public void remove() { if ( ! bits[i] ) throw new IllegalStateException( "hasNext not called or remove called twice in a row"); bits[i] = false; } } // end anon inner class ; } /** Same as {@link #capacity()}. */ public int length() { return bits.length; } /** * Assumes the given Object argument is a numeric value; otherwise throws * a {@link ClassCastException}. * * @throws ClassCastException if argument cannot be cast to a {@link Number} * @see java.lang.Number.intValue() */ public boolean remove( Object x ) { int i = ((Number) x).intValue(); if ( i >= bits.length || ! bits[i] ) return false; bits[i] = false; return true; } /** * Removes all numeric values in the passed collection from this set * (ie: sets all those bits to false). * @throws ClassCastException * if any element of collection cannot be cast to a {@link Number} */ public boolean removeAll( Collection<?> indexes ) { boolean changed = false; for ( Object x : indexes ) { // throwing a ClassCastException is part of the method spec int i = ((Number) x).intValue(); if ( i >= bits.length || ! bits[i] ) continue; bits[i] = false; changed = true; } return changed; } /** * {@inheritDoc} * * @throws ClassCastException * if any element of collection cannot be cast to an {@link Integer} */ public boolean retainAll( Collection<?> indexes ) { BitSet b; if ( indexes instanceof BitSet ) { b = (BitSet) indexes; } else { b = new BitSet( indexes.size() ); b.addAll( (Collection<Integer>) indexes ); // ClassCastEx allowed to propagate } return bitwiseAnd( b ); } /** Sets the bit at the given index to true. */ public void set( int i ) { ensureCapacity( i + 1 ); bits[i] = true; } /** Sets the bit at the given index to the given boolean value. */ public void set( int i, boolean value ) { ensureCapacity( i + 1 ); bits[i] = value; } /** * Sets the bits from from_index (inclusive) to to_index (exclusive) to true. * The number of bits set is thus: <code>to_index - from_index</code>. */ public void set( int from_index, int to_index ) { ensureCapacity( to_index ); for ( int i = from_index; i < to_index; i++ ) bits[i] = true; } /** * Returns the current size of this bitset, that is, the number of * integers whose position in the bitset is true. Note: this is not * the same thing as the {@link #length()} of the bitset. */ public int size() { int size = 0; for ( boolean b : bits ) if ( b ) size++; return size; } /** Returns this bitset as an array of {@link Integer}s. */ public <T> T[] toArray( T[] a ) { int ones = 0; for ( boolean b : bits ) if ( b ) ones++; Integer[] ints = new Integer[ones]; ones = 0; for ( int i = 0; i < bits.length; i++ ) if ( bits[i] ) ints[ones++] = i; return (T[]) ints; } /** Returns this bitset as an array of {@link Integer}s. */ public Object[] toArray() { return toArray( new Integer[0] ); } // bit ops /** Sets this bitset to <tt>this AND b</tt> (this & b). */ public boolean bitwiseAnd( BitSet b ) { boolean[] shorter = bits, longer = b.bits; // note: initial order important if ( shorter.length > longer.length ) { shorter = b.bits; longer = bits; } boolean changed = false; // process common bits for ( int i = 0; i < shorter.length; i++ ) { changed |= (bits[i] && ! b.bits[i]); bits[i] &= b.bits[i]; } // process remaining bits only if we are longer, since // if we are shorter, higher bits are absent and therefore false by definition. if ( this.bits == longer ) // only true if there was a size difference at the start { for ( int i = shorter.length; i < longer.length; i++ ) { if ( bits[i] ) { bits[i] = false; changed = true; } } } return changed; } /** Returns a new bitset that is equal to <tt>this AND b</tt> (this & b). */ public BitSet bitwiseAndEquals( BitSet b ) { BitSet copy = clone(); copy.bitwiseAnd( b ); return copy; } /** Sets this bitset to the complement of the current bitset. */ public void bitComplement() { for ( int i = 0; i < bits.length; i++ ) bits[i] = ! bits[i]; } /** Returns a bitset equal to the complement of the current bitset. */ public BitSet bitComplementEquals() { BitSet copy = clone(); copy.bitComplement(); return copy; } /** * Shifts this bitset in the given direction - positive values * are equivalent to a right (<code>this >> direction</code>) * shift, negative values are equivalent to a left shift * (<code>this << direction</code>). */ public void bitShift( int direction ) { boolean[] newbits = _shift( bits, direction ); bits = newbits; } /** Returns a new bitset that is equal to a bitshift in the given direction. */ public void bitShiftEquals( int direction ) { boolean[] newbits = _shift( bits, direction ); BitSet b = new BitSet( newbits ); } private final boolean[] _shift( boolean[] bit_source, int direction ) { if ( direction == 0 ) return bit_source; int length = bit_source.length - direction; boolean[] bit_dest = new boolean[ length ]; if ( direction > 0 ) { // right shift System.arraycopy( bit_source, direction, bit_dest, 0, length ); } else // ( direction is < 0 ) { // left shift System.arraycopy( bit_source, 0, bit_dest, -direction, bit_source.length ); } return bit_dest; } /** * Inserts the given BitSet at the given (bitstring) index, shifting * all bits at or above this index to the left by the length() of the * given BitSet. */ public void bitShiftInsert( int index, BitSet b ) { int length = bits.length + b.bits.length; boolean[] newbits = new boolean[length]; if ( index > 0 ) System.arraycopy( bits, 0, newbits, 0, index ); System.arraycopy( b.bits, 0, newbits, index, b.bits.length ); System.arraycopy( bits, index, newbits, index + b.bits.length, bits.length - index ); this.bits = newbits; } /** * Returns a new BitSet consisting of bits copied from this BitSet, * in the range <tt>from_index</tt> (inclusive), to <tt>to_index</tt> * (exclusive). The length of the returned BitSet is given by * <code>to_index - from_index</code>. Note that index 0 of * a bitstring is on the right hand side. * Eg: *<pre> * BitSet b = BitSet.forString("01100110011"); * * // prints "1100" * out.println( b.bitSlice( 2, 6 ).toBitString() ); * * // prints "0011" * out.println( b.bitSlice( 0, 4 ).toBitString() ); * *</pre> */ public BitSet bitSlice( int from_index, int to_index ) { return new BitSet( bits, from_index, to_index ); } /** * Returns a new BitSet that consists of only the bits that are * boolean true in the passed BitSet, with intervening false bits * filtered out. The {@link #length()} of the bitset returned is * equal to the {@link #size()} of the given bitset (ie: * <code>indexes.size()</code>). Similarly, slicing a bitset with * itself returns a bitset equal to its own {@link size()}, in * which all bits are set to true. * Eg: *<pre> * BitSet b = BitSet.forString("01100110011"); * * // prints "011" * BitSet indexes_to_slice1 = BitSet.forString("111"); * out.println( b.bitSlice( indexes_to_slice1 ).toBitString() ); * * // prints "101" * BitSet indexes_to_slice2 = BitSet.forString("10101"); * out.println( b.bitSlice( indexes_to_slice2 ).toBitString() ); * * // prints "111111" * out.println( b.bitSlice( b ).toBitString() ); *</pre> */ public BitSet bitSlice( BitSet indexes ) { BitSet slice = new BitSet( indexes.size() ); int i = 0; for ( int index : indexes ) slice.bits[i++] = this.bits[index]; return slice; } /** Sets this bitset to <tt>this OR b</tt> (this | b). */ public boolean bitwiseOr( BitSet b ) { boolean[] longer = bits, shorter = b.bits; if ( shorter.length > longer.length ) { shorter = bits; longer = b.bits; } boolean changed = false; // process common bits for ( int i = 0; i < shorter.length; i++ ) { changed |= (bits[i] && ! b.bits[i]); bits[i] |= b.bits[i]; } if ( this.bits == shorter ) // only true if there was a size difference at the start { ensureCapacity( longer.length ); for ( int i = shorter.length; i < longer.length; i++ ) { changed |= b.bits[i]; bits[i] = b.bits[i]; } } return changed; } /** Returns a new bitset that is equal to <tt>this OR b</tt> (this | b). */ public BitSet bitwiseOrEquals( BitSet b ) { BitSet copy = clone(); copy.bitwiseOr( b ); return copy; } /** Sets this bitset to <tt>this XOR b</tt> (this ^ b). */ public void bitwiseXor( BitSet b ) { boolean[] longer = bits, shorter = b.bits; if ( shorter.length > longer.length ) { shorter = bits; longer = b.bits; } // process common bits for ( int i = 0; i < shorter.length; i++ ) { bits[i] ^= b.bits[i]; } // process remaining bits if ( this.bits == shorter ) ensureCapacity( longer.length ); for ( int i = shorter.length; i < longer.length; i++ ) { bits[i] ^= b.bits[i]; } } /** Returns a new bitset that is equal to <tt>this XOR b</tt> (this ^ b). */ public BitSet bitwiseXorEquals( BitSet b ) { BitSet copy = clone(); copy.bitwiseXor( b ); return copy; } /** * Returns the index of the highest set bit in this BitSet, or -1 if * no bits are set. */ public int highestSetBit() { for ( int i = bits.length - 1; i >= 0; i-- ) if ( bits[i] ) return i; return -1; } /** * Returns the value of this BitSet as an integer, (ie: converts binary * bitstring to base10) truncating bits equal to or greater than * {@link Integer.SIZE}. */ public int intValue() { int intValue = 0; int length = (bits.length > Integer.SIZE) ? Integer.SIZE : bits.length; for ( int i = 0; i < length; i++ ) if ( bits[i] ) intValue |= (1 << i); return intValue; } /** * Returns the index of the lowest set bit in this BitSet, or -1 if * no bits are set. */ public int lowestSetBit() { for ( int i = 0; i < bits.length; i++ ) if ( bits[i] ) return i; return -1; } /** * Returns a string representation of this bitset of form * <tt>"{1, 3, 5}"</tt>, containing only those integers set * to true. */ public String toString() { StringBuilder sb = new StringBuilder( bits.length * 2 ); sb.append('{'); Iterator<Integer> it = iterator(); if ( it.hasNext() ) { sb.append( it.next() ); while ( it.hasNext() ) { sb.append( ", " ); sb.append( it.next() ); } } sb.append('}'); return sb.toString(); } /** * Returns this BitSet as an array of booleans. Modifications to * this array are not reflected in this bitset. */ public boolean[] toBitArray() { return clone().bits; } /** Returns this bitset as a bitstring of length {@link #length()}. */ public String toBitString() { // char[] string = new char[ bits.length ]; char[] string = new char[ bits.length ]; int len = string.length - 1; for ( int i = 0; i < string.length; i++ ) string[len--] = bits[i] ? '1' : '0'; return new String( string ); } }