/******************************************************************************* * Copyright (c) 2002 - 2006 IBM Corporation. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package com.ibm.wala.util.strings; import java.io.Serializable; import java.util.HashMap; import com.ibm.wala.util.collections.HashMapFactory; /** * An utf8-encoded byte string. * * Atom's are interned (canonicalized) so they may be compared for equality using the "==" operator. * * Atoms are used to represent names, descriptors, and string literals appearing in a class's constant pool. */ public final class Atom implements Serializable { /* Serial version */ private static final long serialVersionUID = -3256390509887654329L; /** * Used to canonicalize Atoms, a mapping from AtomKey -> Atom. AtomKeys are not canonical, but Atoms are. */ final private static HashMap<AtomKey, Atom> dictionary = HashMapFactory.make(); /** * The utf8 value this atom represents */ private final byte val[]; /** * Cached hash code for this atom key. */ private final int hash; /** * Find or create an atom. * * @param str atom value, as string literal whose characters are unicode * @return atom */ public static Atom findOrCreateUnicodeAtom(String str) { byte[] utf8 = UTF8Convert.toUTF8(str); return findOrCreate(utf8); } /** * Find or create an atom. * * @param str atom value, as string literal whose characters are from ascii subset of unicode (not including null) * @return atom * @throws IllegalArgumentException if str is null */ public static Atom findOrCreateAsciiAtom(String str) { if (str == null) { throw new IllegalArgumentException("str is null"); } byte[] ascii = str.getBytes(); return findOrCreate(ascii); } /** * Find or create an atom. * * @param utf8 atom value, as utf8 encoded bytes * @return atom * @throws IllegalArgumentException if utf8 is null */ public static Atom findOrCreateUtf8Atom(byte[] utf8) { if (utf8 == null) { throw new IllegalArgumentException("utf8 is null"); } return findOrCreate(utf8); } /** * create an Atom from utf8[off] of length len * * @throws IllegalArgumentException if utf8.length <= off */ public static Atom findOrCreate(byte utf8[], int off, int len) throws IllegalArgumentException, IllegalArgumentException, IllegalArgumentException { if (utf8 == null) { throw new IllegalArgumentException("utf8 == null"); } if (len < 0) { throw new IllegalArgumentException("len must be >= 0, " + len); } if (off < 0) { throw new IllegalArgumentException("off must be >= 0, " + off); } if (utf8.length < off + len) { throw new IllegalArgumentException("utf8.length < off + len"); } if (off + len < 0) { throw new IllegalArgumentException("off + len is too big: " + off + " + " + len); } byte val[] = new byte[len]; for (int i = 0; i < len; ++i) { val[i] = utf8[off++]; } return findOrCreate(val); } public static synchronized Atom findOrCreate(byte[] bytes) { if (bytes == null) { throw new IllegalArgumentException("bytes is null"); } AtomKey key = new AtomKey(bytes); Atom val = dictionary.get(key); if (val != null) { return val; } val = new Atom(key); dictionary.put(key, val); return val; } public static synchronized Atom findOrCreate(ImmutableByteArray b) { if (b == null) { throw new IllegalArgumentException("b is null"); } return findOrCreate(b.b); } public static synchronized Atom findOrCreate(ImmutableByteArray b, int start, int length) { if (b == null) { throw new IllegalArgumentException("b is null"); } return findOrCreate(b.b, start, length); } /** * Return printable representation of "this" atom. Does not correctly handle UTF8 translation. */ @Override public final String toString() { return new String(val); } /** * Return printable representation of "this" atom. */ public final String toUnicodeString() throws java.io.UTFDataFormatException { return UTF8Convert.fromUTF8(val); } /** * New Atom containing first count bytes */ public final Atom left(int count) { return findOrCreate(val, 0, count); } /** * New Atom containing last count bytes */ public final Atom right(int count) { return findOrCreate(val, val.length - count, count); } public final boolean startsWith(Atom start) { assert (start != null); for (int i = 0; i < start.val.length; ++i) { if (val[i] != start.val[i]) return false; } return true; } /** * Return array descriptor corresponding to "this" array-element descriptor. this: array-element descriptor - something like "I" * or "Ljava/lang/Object;" * * @return array descriptor - something like "[I" or "[Ljava/lang/Object;" */ public final Atom arrayDescriptorFromElementDescriptor() { byte sig[] = new byte[1 + val.length]; sig[0] = (byte) '['; for (int i = 0, n = val.length; i < n; ++i) sig[i + 1] = val[i]; return findOrCreate(sig); } /** * Is "this" atom a reserved member name? Note: Sun has reserved all member names starting with '<' for future use. At present, * only <init> and <clinit> are used. */ public final boolean isReservedMemberName() { if (length() == 0) { return false; } return val[0] == '<'; } /** * Is "this" atom a class descriptor? */ public final boolean isClassDescriptor() { if (length() == 0) { return false; } return val[0] == 'L'; } /** * Is "this" atom an array descriptor? */ public final boolean isArrayDescriptor() { if (length() == 0) { return false; } return val[0] == '['; } /** * Is "this" atom a method descriptor? */ public final boolean isMethodDescriptor() throws IllegalArgumentException { if (length() == 0) { return false; } return val[0] == '('; } public final int length() { return val.length; } /** * Create atom from given utf8 sequence. */ private Atom(AtomKey key) { this.val = key.val; this.hash = key.hash; } /** * Parse "this" array descriptor to obtain descriptor for array's element type. this: array descriptor - something like "[I" * * @return array element descriptor - something like "I" */ public final Atom parseForArrayElementDescriptor() throws IllegalArgumentException { if (val.length == 0) { throw new IllegalArgumentException("empty atom is not an array"); } return findOrCreate(val, 1, val.length - 1); } /** * Parse "this" array descriptor to obtain number of dimensions in corresponding array type. this: descriptor - something like * "[Ljava/lang/String;" or "[[I" * * @return dimensionality - something like "1" or "2" * @throws IllegalStateException if this Atom does not represent an array */ public final int parseForArrayDimensionality() throws IllegalArgumentException { if (val.length == 0) { throw new IllegalArgumentException("empty atom is not an array"); } try { for (int i = 0;; ++i) { if (val[i] != '[') { return i; } } } catch (ArrayIndexOutOfBoundsException e) { throw new IllegalStateException("not an array: " + this); } } /** * Return the innermost element type reference for an array * * @throws IllegalStateException if this Atom does not represent an array descriptor */ public final Atom parseForInnermostArrayElementDescriptor() throws IllegalArgumentException { if (val.length == 0) { throw new IllegalArgumentException("empty atom is not an array"); } try { int i = 0; while (val[i] == '[') { i++; } return findOrCreate(val, i, val.length - i); } catch (ArrayIndexOutOfBoundsException e) { throw new IllegalStateException("not an array: " + this); } } /** * key for the dictionary. */ private final static class AtomKey { /** * The utf8 value this atom key represents */ private final byte val[]; /** * Cached hash code for this atom key. */ private final int hash; /** * Create atom from given utf8 sequence. */ private AtomKey(byte utf8[]) { int tmp = 99989; for (int i = utf8.length; --i >= 0;) { tmp = 99991 * tmp + utf8[i]; } this.val = utf8; this.hash = tmp; } /** * @see java.lang.Object#equals(Object) */ @Override public final boolean equals(Object other) { assert (other != null && this.getClass().equals(other.getClass())); if (this == other) { return true; } AtomKey that = (AtomKey) other; if (hash != that.hash) return false; if (val.length != that.val.length) return false; for (int i = 0; i < val.length; i++) { if (val[i] != that.val[i]) return false; } return true; } /** * Return printable representation of "this" atom. Does not correctly handle UTF8 translation. */ @Override public final String toString() { return new String(val); } /** * @see java.lang.Object#hashCode() */ @Override public final int hashCode() { return hash; } } /** * @see java.lang.Object#hashCode() */ @Override public int hashCode() { return hash; } /* * These are canonical * * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(Object obj) { return this == obj; } /** * return an array of bytes representing the utf8 characters in this */ public byte[] getValArray() { byte[] result = new byte[val.length]; System.arraycopy(val, 0, result, 0, val.length); return result; } public byte getVal(int i) throws IllegalArgumentException { try { return val[i]; } catch (ArrayIndexOutOfBoundsException e) { throw new IllegalArgumentException("Illegal index: " + i + " length is " + val.length); } } /** * @return true iff this atom contains the specified byte */ public boolean contains(byte b) { for (int i = 0; i < val.length; i++) { if (val[i] == b) { return true; } } return false; } public int rIndex(byte b) { for (int i = val.length - 1; i >=0; --i) { if (val[i] == b) { return val.length - i; } } return -1; } private static Atom concat(byte c, byte[] bs) { byte[] val = new byte[bs.length + 1]; val[0] = c; System.arraycopy(bs, 0, val, 1, bs.length); return findOrCreate(val); } public static Atom concat(byte c, ImmutableByteArray b) { if (b == null) { throw new IllegalArgumentException("b is null"); } return concat(c, b.b); } public static Atom concat(Atom ma, Atom mb) { if ((ma == null ) || (mb == null)) { throw new IllegalArgumentException("argument may not be null!"); } byte[] val = new byte[ma.val.length + mb.val.length]; System.arraycopy(ma.val, 0, val, 0, ma.val.length); System.arraycopy(mb.val, 0, val, ma.val.length, mb.val.length); return findOrCreate(val); } public static boolean isArrayDescriptor(ImmutableByteArray b) { if (b == null) { throw new IllegalArgumentException("b is null"); } if (b.length() == 0) { return false; } return b.get(0) == '['; } /** * Special method that is called by Java deserialization process. Any HashCons'ed object should implement it, in order to make * sure that all equal objects are consolidated. * * @return */ private Object readResolve() { return findOrCreate(this.val); } }