/* [The "BSD licence"] Copyright (c) 2005-2006 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.misc; import org.antlr.analysis.Label; import org.antlr.tool.Grammar; import java.util.*; /** A set of integers that relies on ranges being common to do * "run-length-encoded" like compression (if you view an IntSet like * a BitSet with runs of 0s and 1s). Only ranges are recorded so that * a few ints up near value 1000 don't cause massive bitsets, just two * integer intervals. * * element values may be negative. Useful for sets of EPSILON and EOF. * * 0..9 char range is index pair ['\u0030','\u0039']. * Multiple ranges are encoded with multiple index pairs. Isolated * elements are encoded with an index pair where both intervals are the same. * * The ranges are ordered and disjoint so that 2..6 appears before 101..103. */ public class IntervalSet implements IntSet { public static final IntervalSet COMPLETE_SET = IntervalSet.of(0,Label.MAX_CHAR_VALUE); /** The list of sorted, disjoint intervals. */ protected List<Interval> intervals; /** Create a set with no elements */ public IntervalSet() { intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements } public IntervalSet(List<Interval> intervals) { this.intervals = intervals; } /** Create a set with a single element, el. */ public static IntervalSet of(int a) { IntervalSet s = new IntervalSet(); s.add(a); return s; } /** Create a set with all ints within range [a..b] (inclusive) */ public static IntervalSet of(int a, int b) { IntervalSet s = new IntervalSet(); s.add(a,b); return s; } /** Add a single element to the set. An isolated element is stored * as a range el..el. */ public void add(int el) { add(el,el); } /** Add interval; i.e., add all integers from a to b to set. * If b<a, do nothing. * Keep list in sorted order (by left range value). * If overlap, combine ranges. For example, * If this is {1..5, 10..20}, adding 6..7 yields * {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. */ public void add(int a, int b) { add(Interval.create(a,b)); } // copy on write so we can cache a..a intervals and sets of that protected void add(Interval addition) { //System.out.println("add "+addition+" to "+intervals.toString()); if ( addition.b<addition.a ) { return; } // find position in list // Use iterators as we modify list in place for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { Interval r = (Interval) iter.next(); if ( addition.equals(r) ) { return; } if ( addition.adjacent(r) || !addition.disjoint(r) ) { // next to each other, make a single larger interval Interval bigger = addition.union(r); iter.set(bigger); // make sure we didn't just create an interval that // should be merged with next interval in list if ( iter.hasNext() ) { Interval next = (Interval) iter.next(); if ( bigger.adjacent(next)||!bigger.disjoint(next) ) { // if we bump up against or overlap next, merge iter.remove(); // remove this one iter.previous(); // move backwards to what we just set iter.set(bigger.union(next)); // set to 3 merged ones } } return; } if ( addition.startsBeforeDisjoint(r) ) { // insert before r iter.previous(); iter.add(addition); return; } // if disjoint and after r, a future iteration will handle it } // ok, must be after last interval (and disjoint from last interval) // just add it intervals.add(addition); } /* protected void add(Interval addition) { //System.out.println("add "+addition+" to "+intervals.toString()); if ( addition.b<addition.a ) { return; } // find position in list //for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { int n = intervals.size(); for (int i=0; i<n; i++) { Interval r = (Interval)intervals.get(i); if ( addition.equals(r) ) { return; } if ( addition.adjacent(r) || !addition.disjoint(r) ) { // next to each other, make a single larger interval Interval bigger = addition.union(r); intervals.set(i, bigger); // make sure we didn't just create an interval that // should be merged with next interval in list if ( (i+1)<n ) { i++; Interval next = (Interval)intervals.get(i); if ( bigger.adjacent(next)||!bigger.disjoint(next) ) { // if we bump up against or overlap next, merge intervals.remove(i); // remove next one i--; intervals.set(i, bigger.union(next)); // set to 3 merged ones } } return; } if ( addition.startsBeforeDisjoint(r) ) { // insert before r intervals.add(i, addition); return; } // if disjoint and after r, a future iteration will handle it } // ok, must be after last interval (and disjoint from last interval) // just add it intervals.add(addition); } */ public void addAll(IntSet set) { if ( set==null ) { return; } if ( !(set instanceof IntervalSet) ) { throw new IllegalArgumentException("can't add non IntSet ("+ set.getClass().getName()+ ") to IntervalSet"); } IntervalSet other = (IntervalSet)set; // walk set and add each interval int n = other.intervals.size(); for (int i = 0; i < n; i++) { Interval I = (Interval) other.intervals.get(i); this.add(I.a,I.b); } } public IntSet complement(int minElement, int maxElement) { return this.complement(IntervalSet.of(minElement,maxElement)); } /** Given the set of possible values (rather than, say UNICODE or MAXINT), * return a new set containing all elements in vocabulary, but not in * this. The computation is (vocabulary - this). * * 'this' is assumed to be either a subset or equal to vocabulary. */ public IntSet complement(IntSet vocabulary) { if ( vocabulary==null ) { return null; // nothing in common with null set } if ( !(vocabulary instanceof IntervalSet ) ) { throw new IllegalArgumentException("can't complement with non IntervalSet ("+ vocabulary.getClass().getName()+")"); } IntervalSet vocabularyIS = ((IntervalSet)vocabulary); int maxElement = vocabularyIS.getMaxElement(); IntervalSet compl = new IntervalSet(); int n = intervals.size(); if ( n ==0 ) { return compl; } Interval first = (Interval)intervals.get(0); // add a range from 0 to first.a constrained to vocab if ( first.a > 0 ) { IntervalSet s = IntervalSet.of(0, first.a-1); IntervalSet a = (IntervalSet)s.and(vocabularyIS); compl.addAll(a); } for (int i=1; i<n; i++) { // from 2nd interval .. nth Interval previous = (Interval)intervals.get(i-1); Interval current = (Interval)intervals.get(i); IntervalSet s = IntervalSet.of(previous.b+1, current.a-1); IntervalSet a = (IntervalSet)s.and(vocabularyIS); compl.addAll(a); } Interval last = (Interval)intervals.get(n -1); // add a range from last.b to maxElement constrained to vocab if ( last.b < maxElement ) { IntervalSet s = IntervalSet.of(last.b+1, maxElement); IntervalSet a = (IntervalSet)s.and(vocabularyIS); compl.addAll(a); } return compl; } /** Compute this-other via this&~other. * Return a new set containing all elements in this but not in other. * other is assumed to be a subset of this; * anything that is in other but not in this will be ignored. */ public IntSet subtract(IntSet other) { // assume the whole unicode range here for the complement // because it doesn't matter. Anything beyond the max of this' set // will be ignored since we are doing this & ~other. The intersection // will be empty. The only problem would be when this' set max value // goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE // will prevent this. return this.and(((IntervalSet)other).complement(COMPLETE_SET)); } /** return a new set containing all elements in this but not in other. * Intervals may have to be broken up when ranges in this overlap * with ranges in other. other is assumed to be a subset of this; * anything that is in other but not in this will be ignored. * * Keep around, but 10-20-2005, I decided to make complement work w/o * subtract and so then subtract can simply be a&~b * public IntSet subtract(IntSet other) { if ( other==null || !(other instanceof IntervalSet) ) { return null; // nothing in common with null set } IntervalSet diff = new IntervalSet(); // iterate down both interval lists ListIterator thisIter = this.intervals.listIterator(); ListIterator otherIter = ((IntervalSet)other).intervals.listIterator(); Interval mine=null; Interval theirs=null; if ( thisIter.hasNext() ) { mine = (Interval)thisIter.next(); } if ( otherIter.hasNext() ) { theirs = (Interval)otherIter.next(); } while ( mine!=null ) { //System.out.println("mine="+mine+", theirs="+theirs); // CASE 1: nothing in theirs removes a chunk from mine if ( theirs==null || mine.disjoint(theirs) ) { // SUBCASE 1a: finished traversing theirs; keep adding mine now if ( theirs==null ) { // add everything in mine to difference since theirs done diff.add(mine); mine = null; if ( thisIter.hasNext() ) { mine = (Interval)thisIter.next(); } } else { // SUBCASE 1b: mine is completely to the left of theirs // so we can add to difference; move mine, but not theirs if ( mine.startsBeforeDisjoint(theirs) ) { diff.add(mine); mine = null; if ( thisIter.hasNext() ) { mine = (Interval)thisIter.next(); } } // SUBCASE 1c: theirs is completely to the left of mine else { // keep looking in theirs theirs = null; if ( otherIter.hasNext() ) { theirs = (Interval)otherIter.next(); } } } } else { // CASE 2: theirs breaks mine into two chunks if ( mine.properlyContains(theirs) ) { // must add two intervals: stuff to left and stuff to right diff.add(mine.a, theirs.a-1); // don't actually add stuff to right yet as next 'theirs' // might overlap with it // The stuff to the right might overlap with next "theirs". // so it is considered next Interval right = new Interval(theirs.b+1, mine.b); mine = right; // move theirs forward theirs = null; if ( otherIter.hasNext() ) { theirs = (Interval)otherIter.next(); } } // CASE 3: theirs covers mine; nothing to add to diff else if ( theirs.properlyContains(mine) ) { // nothing to add, theirs forces removal totally of mine // just move mine looking for an overlapping interval mine = null; if ( thisIter.hasNext() ) { mine = (Interval)thisIter.next(); } } // CASE 4: non proper overlap else { // overlap, but not properly contained diff.add(mine.differenceNotProperlyContained(theirs)); // update iterators boolean moveTheirs = true; if ( mine.startsBeforeNonDisjoint(theirs) || theirs.b > mine.b ) { // uh oh, right of theirs extends past right of mine // therefore could overlap with next of mine so don't // move theirs iterator yet moveTheirs = false; } // always move mine mine = null; if ( thisIter.hasNext() ) { mine = (Interval)thisIter.next(); } if ( moveTheirs ) { theirs = null; if ( otherIter.hasNext() ) { theirs = (Interval)otherIter.next(); } } } } } return diff; } */ /** TODO: implement this! */ public IntSet or(IntSet a) { IntervalSet o = new IntervalSet(); o.addAll(this); o.addAll(a); //throw new NoSuchMethodError(); return o; } /** Return a new set with the intersection of this set with other. Because * the intervals are sorted, we can use an iterator for each list and * just walk them together. This is roughly O(min(n,m)) for interval * list lengths n and m. */ public IntSet and(IntSet other) { if ( other==null ) { //|| !(other instanceof IntervalSet) ) { return null; // nothing in common with null set } ArrayList myIntervals = (ArrayList)this.intervals; ArrayList theirIntervals = (ArrayList)((IntervalSet)other).intervals; IntervalSet intersection = null; int mySize = myIntervals.size(); int theirSize = theirIntervals.size(); int i = 0; int j = 0; // iterate down both interval lists looking for nondisjoint intervals while ( i<mySize && j<theirSize ) { Interval mine = (Interval)myIntervals.get(i); Interval theirs = (Interval)theirIntervals.get(j); //System.out.println("mine="+mine+" and theirs="+theirs); if ( mine.startsBeforeDisjoint(theirs) ) { // move this iterator looking for interval that might overlap i++; } else if ( theirs.startsBeforeDisjoint(mine) ) { // move other iterator looking for interval that might overlap j++; } else if ( mine.properlyContains(theirs) ) { // overlap, add intersection, get next theirs if ( intersection==null ) { intersection = new IntervalSet(); } intersection.add(mine.intersection(theirs)); j++; } else if ( theirs.properlyContains(mine) ) { // overlap, add intersection, get next mine if ( intersection==null ) { intersection = new IntervalSet(); } intersection.add(mine.intersection(theirs)); i++; } else if ( !mine.disjoint(theirs) ) { // overlap, add intersection if ( intersection==null ) { intersection = new IntervalSet(); } intersection.add(mine.intersection(theirs)); // Move the iterator of lower range [a..b], but not // the upper range as it may contain elements that will collide // with the next iterator. So, if mine=[0..115] and // theirs=[115..200], then intersection is 115 and move mine // but not theirs as theirs may collide with the next range // in thisIter. // move both iterators to next ranges if ( mine.startsAfterNonDisjoint(theirs) ) { j++; } else if ( theirs.startsAfterNonDisjoint(mine) ) { i++; } } } if ( intersection==null ) { return new IntervalSet(); } return intersection; } /** Is el in any range of this set? */ public boolean member(int el) { int n = intervals.size(); for (int i = 0; i < n; i++) { Interval I = (Interval) intervals.get(i); int a = I.a; int b = I.b; if ( el<a ) { break; // list is sorted and el is before this interval; not here } if ( el>=a && el<=b ) { return true; // found in this interval } } return false; /* for (ListIterator iter = intervals.listIterator(); iter.hasNext();) { Interval I = (Interval) iter.next(); if ( el<I.a ) { break; // list is sorted and el is before this interval; not here } if ( el>=I.a && el<=I.b ) { return true; // found in this interval } } return false; */ } /** return true if this set has no members */ public boolean isNil() { return intervals==null || intervals.size()==0; } /** If this set is a single integer, return it otherwise Label.INVALID */ public int getSingleElement() { if ( intervals!=null && intervals.size()==1 ) { Interval I = (Interval)intervals.get(0); if ( I.a == I.b ) { return I.a; } } return Label.INVALID; } public int getMaxElement() { if ( isNil() ) { return Label.INVALID; } Interval last = (Interval)intervals.get(intervals.size()-1); return last.b; } /** Return minimum element >= 0 */ public int getMinElement() { if ( isNil() ) { return Label.INVALID; } int n = intervals.size(); for (int i = 0; i < n; i++) { Interval I = (Interval) intervals.get(i); int a = I.a; int b = I.b; for (int v=a; v<=b; v++) { if ( v>=0 ) return v; } } return Label.INVALID; } /** Return a list of Interval objects. */ public List<Interval> getIntervals() { return intervals; } /** Are two IntervalSets equal? Because all intervals are sorted * and disjoint, equals is a simple linear walk over both lists * to make sure they are the same. Interval.equals() is used * by the List.equals() method to check the ranges. */ public boolean equals(Object obj) { if ( obj==null || !(obj instanceof IntervalSet) ) { return false; } IntervalSet other = (IntervalSet)obj; return this.intervals.equals(other.intervals); } public String toString() { return toString(null); } public String toString(Grammar g) { StringBuffer buf = new StringBuffer(); if ( this.intervals==null || this.intervals.size()==0 ) { return "{}"; } if ( this.intervals.size()>1 ) { buf.append("{"); } Iterator iter = this.intervals.iterator(); while (iter.hasNext()) { Interval I = (Interval) iter.next(); int a = I.a; int b = I.b; if ( a==b ) { if ( g!=null ) { buf.append(g.getTokenDisplayName(a)); } else { buf.append(a); } } else { if ( g!=null ) { buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b)); } else { buf.append(a+".."+b); } } if ( iter.hasNext() ) { buf.append(", "); } } if ( this.intervals.size()>1 ) { buf.append("}"); } return buf.toString(); } public int size() { int n = 0; int numIntervals = intervals.size(); if ( numIntervals==1 ) { Interval firstInterval = this.intervals.get(0); return firstInterval.b-firstInterval.a+1; } for (int i = 0; i < numIntervals; i++) { Interval I = (Interval) intervals.get(i); n += (I.b-I.a+1); } return n; } public List toList() { List values = new ArrayList(); int n = intervals.size(); for (int i = 0; i < n; i++) { Interval I = (Interval) intervals.get(i); int a = I.a; int b = I.b; for (int v=a; v<=b; v++) { values.add(Utils.integer(v)); } } return values; } /** Get the ith element of ordered set. Used only by RandomPhrase so * don't bother to implement if you're not doing that for a new * ANTLR code gen target. */ public int get(int i) { int n = intervals.size(); int index = 0; for (int j = 0; j < n; j++) { Interval I = (Interval) intervals.get(j); int a = I.a; int b = I.b; for (int v=a; v<=b; v++) { if ( index==i ) { return v; } index++; } } return -1; } public int[] toArray() { int[] values = new int[size()]; int n = intervals.size(); int j = 0; for (int i = 0; i < n; i++) { Interval I = (Interval) intervals.get(i); int a = I.a; int b = I.b; for (int v=a; v<=b; v++) { values[j] = v; j++; } } return values; } public org.antlr.runtime.BitSet toRuntimeBitSet() { org.antlr.runtime.BitSet s = new org.antlr.runtime.BitSet(getMaxElement()+1); int n = intervals.size(); for (int i = 0; i < n; i++) { Interval I = (Interval) intervals.get(i); int a = I.a; int b = I.b; for (int v=a; v<=b; v++) { s.add(v); } } return s; } public void remove(int el) { throw new NoSuchMethodError("IntervalSet.remove() unimplemented"); } /* protected void finalize() throws Throwable { super.finalize(); System.out.println("size "+intervals.size()+" "+size()); } */ }