/*
* @(#)StringTable.java 1.14 06/10/10
*
* Copyright 1990-2008 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 only, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is
* included at /legal/license.txt).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 or visit www.sun.com if you need additional
* information or have any questions.
*
*/
package vm;
import jcc.Str2ID;
import components.StringConstant;
import components.UnicodeConstant;
import java.util.Hashtable;
import java.util.HashSet;
import java.util.Enumeration;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
/**
*
* There are two string-like data types in today's JDK:
* 1) zero-terminated, C-language, ASCII strings
* 2) Java Strings.
*
* The former arise from:
* (a) UTF encoding of Java class, method, field names and type
* signatures, used for linkage
* (b) UTF encoded forms of Java String constants, used as keys
* for the intern-ing of said constants.
* See the class AsciiStrings where these are manipulated
* to achieve some sharing of runtime data structures.
*
* In this, the StringTable class, we keep track of Java Strings, in
* the form of StringConstant's.
* We enter them in a Str2ID structure, which will be wanted
* at runtime. And we assign layout of the runtime char[] data.
* Much aliasing of data is possible, since this is read-only and
* not usually zero-terminated. We won't do any of that, for now.
* Threre is much potential here.
*/
public class StringTable implements Comparator {
public Str2ID stringHash = new Str2ID();
/*
* This hash table is organized by string lengths. For each string length
* the lookup finds another hash table of all strings of that length.
*/
private Hashtable htable = new Hashtable();
/*
* For every interned string, keep track of all its "copies" or "aliases"
* We are going to have to update the unicodeIndex for all these
* NOTE that we use a different Comparator for the alias table.
* This is because every element we put in
* to this table will compare as Object.equal() to any other, and
* we need to distinguish between them using
* System.identityHashCode() ordering.
*/
private Hashtable aliasTable = new Hashtable();
private StringBuffer data;
private int aggregateSize;
private int numUniqueStrings = 0;
private StringConstant[] stringTable;
//
// Comparison for sorting in ascending order of string length
//
public int compare(Object o1, Object o2) {
StringConstant obj1 = (StringConstant) o1;
StringConstant obj2 = (StringConstant) o2;
int len1 = obj1.str.string.length();
int len2 = obj2.str.string.length();
if (len1 > len2) {
return 1;
} else if (len1 == len2)
return 0;
return -1;
}
/*
* Comparison for distinuishing Objects
*/
static class identityHashComparator implements Comparator {
/*
* Note: this comparator imposes orderings that
* are inconsistent with equals.
*/
public int compare(Object o1, Object o2) {
return (System.identityHashCode(o1) - System.identityHashCode(o2));
}
}
private static Comparator identComparator = new identityHashComparator();
//
// This is a two level interning scheme, where strings with
// the same length are grouped together.
//
// The top hashtable maps string lengths to the hashtables holding
// the strings of that length.
//
public void intern( StringConstant s ){
int len = s.str.string.length();
Integer lenObj = new Integer(len);
Hashtable lenTab = (Hashtable)htable.get( lenObj );
StringConstant entry;
this.stringTable = null; // New string coming in
if (lenTab == null) {
lenTab = new Hashtable();
/* New hashtable for this new length */
htable.put(lenObj, lenTab);
entry = null;
} else {
entry = (StringConstant)lenTab.get( s );
}
if ( entry == null ){
// We will be tracking all those strings that intern to be 's'
// We need to remember those, so that we can update their
// unicodeIndex fields.
//
Set aliases = new TreeSet(identComparator);
aliasTable.put( s, aliases);
//
// Intern
//
lenTab.put( s, s );
stringHash.getID( s.str , s );
aggregateSize += len;
numUniqueStrings++;
} else {
// This string was already interned.
// Add this instance to list of aliases for the interned copy
Set aliases;
aliases = (Set)aliasTable.get(entry);
aliases.add(s);
}
}
protected void fillInChars(int srcBegin, int srcEnd,
char dst[], int dstBegin) {
data.getChars(srcBegin, srcEnd, dst, dstBegin);
}
//
// Traverse the multiple hashtables holding strings, and extract
// them to a an array of StringConstants. The strings in the resulting
// array are grouped with like length strings, and ordered in ascending
// string length.
//
public StringConstant[] allStrings(){
if (this.stringTable != null) {
return this.stringTable;
}
this.stringTable = new StringConstant[numUniqueStrings];
/*
* Now go through the hierarchical hash tables, and extract
* all strings. Adjacent strings have equal lengths in this
* table.
*/
Enumeration lengths = htable.keys();
int idx = 0;
while (lengths.hasMoreElements()) {
Integer len = (Integer)lengths.nextElement();
Hashtable stringsHtab = (Hashtable)htable.get(len);
Enumeration strings = stringsHtab.keys();
while (strings.hasMoreElements()) {
StringConstant str = (StringConstant)strings.nextElement();
this.stringTable[idx++] = str;
}
}
if (idx != numUniqueStrings) {
throw new InternalError("String count mismatch");
}
//
// Now sort this string table in increasing order of length
// This is for aesthetic value alone, so the output is more
// readable.
//
Arrays.sort(this.stringTable, this);
return this.stringTable;
}
public int internedStringCount(){ return numUniqueStrings; }
/*
* Arrange for the "data" buffer to hold all the string bodies
* in some form.
* Arrange for the unicodeOffset field of each StringConstant
* to be set to the index of the beginning of the representation
* of its data in this array.
*/
public int arrangeStringData() {
/*
* Our initial guess is simply to concatenate all the data.
* Later, we can try to be cleverer.
*/
data = new StringBuffer( aggregateSize );
int curOffset = 0;
StringConstant[] sTab = allStrings();
for (int i = 0; i < sTab.length; i++) {
StringConstant t = sTab[i];
// Update the unicodeIndex of all the aliases of this string
Set aliases = (Set)aliasTable.get(t);
Iterator aliasEnum = aliases.iterator();
while (aliasEnum.hasNext()) {
StringConstant a = (StringConstant)aliasEnum.next();
// Make sure that we have not seen this constant before
if (a.unicodeIndex != -1) {
throw new InternalError("String aliases messed up");
}
a.unicodeIndex = i;
}
// And finally update the unicodeIndex of this string
t.unicodeIndex = i;
t.unicodeOffset = curOffset;
data.append( t.str.string );
curOffset += t.str.string.length();
}
if (curOffset != aggregateSize) {
throw new InternalError("String size mismatch");
}
return curOffset;
}
}