/*
* #!
* Ontopia Engine
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.utils;
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
/**
* INTERNAL: Represents a set of Unicode characters, and provides a
* method to quickly determine whether or not a particular character
* is in the set. Useful for large, complex sets like "the set of XML
* name start characters". Characters outside the BMP (ie: above
* U+FFFF) are not supported.
*/
public class CharacterSet {
private List<CharacterInterval> tempset; // used while building the set
private CharacterInterval[] set;
public CharacterSet() {
tempset = new ArrayList<CharacterInterval>();
}
/**
* Adds the interval of characters to the set. To add a single
* character make low and high the same value. Cannot be called
* after close() has been called.
*/
public void addInterval(char low, char high) {
tempset.add(new CharacterInterval(low, high));
}
/**
* Called after the last interval has been added. Compiles the
* internal, efficient representation of the set. No more additions
* can be made after this method has been called.
*/
public void close() {
set = new CharacterInterval[tempset.size()];
tempset.toArray(set);
tempset = null;
Arrays.sort(set, new IntervalComparator());
}
/**
* Used to determine whether or not the character is a member of the
* set.
*/
public boolean contains(char ch) {
for (int ix = 0; ix < set.length; ix++)
if (ch >= set[ix].low && ch <= set[ix].high)
return true;
return false;
}
// It's tempting to turn this into a binary search, but performance
// testing seems to indicate that there is no point. Results below
// from running ExportSpeed on opera.xtm
// --- BEFORE ID CHECK
// Average export time in seconds: 0.27578
// Object count: 5223
// Obj/sec: 18939.01
// --- AFTER ID CHECK
// Average export time in seconds: 0.27407
// Object count: 5223
// Obj/sec: 19057.176
// Conclusion: no measurable difference, therefore the complexity of
// binary search is not warranted.
// --- IntervalComparator
/**
* Compares character intervals for sorting.
*/
static class IntervalComparator implements java.util.Comparator<CharacterInterval> {
public int compare(CharacterInterval c1, CharacterInterval c2) {
// INV: we assume o1 and o2 are both CharacterIntervals
return c1.low - c2.low;
}
}
// --- CharacterInterval
/**
* The set is made up from intervals represented using this class.
* Single, isolated characters in the set are represented as an
* interval one character wide.
*/
static class CharacterInterval {
public char low;
public char high;
public CharacterInterval(char low, char high) {
this.low = low;
this.high = high;
}
}
}