package edu.stanford.nlp.util; import java.io.IOException; import java.io.ObjectOutputStream; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Consumer; import edu.stanford.nlp.util.logging.PrettyLogger; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.util.logging.Redwood.RedwoodChannels; /** * <p> * Base implementation of {@link CoreMap} backed by two Java arrays. * </p> * * <p> * Reasonable care has been put into ensuring that this class is both fast and * has a light memory footprint. * </p> * * <p> * Note that like the base classes in the Collections API, this implementation * is <em>not thread-safe</em>. For speed reasons, these methods are not * synchronized. A synchronized wrapper could be developed by anyone so * inclined. * </p> * * <p> * Equality is defined over the complete set of keys and values currently * stored in the map. Because this class is mutable, it should not be used * as a key in a Map. * </p> * * @author dramage * @author rafferty */ public class ArrayCoreMap implements CoreMap /*, Serializable */ { /** * A listener for when a key is retrieved by the CoreMap. * This should only be used for testing. */ public static Consumer<Class<? extends Key<?>>> listener; // = null; /** Initial capacity of the array */ private static final int INITIAL_CAPACITY = 4; /** Array of keys */ private Class<? extends Key<?>>[] keys; /** Array of values */ private Object[] values; /** Total number of elements actually in keys,values */ private int size; // = 0; /** * Default constructor - initializes with default initial annotation * capacity of 4. */ public ArrayCoreMap() { this(INITIAL_CAPACITY); } /** * Initializes this ArrayCoreMap, pre-allocating arrays to hold * up to capacity key,value pairs. This array will grow if necessary. * * @param capacity Initial capacity of object in key,value pairs */ public ArrayCoreMap(int capacity) { keys = ErasureUtils.uncheckedCast(new Class[capacity]); values = new Object[capacity]; // size starts at 0 } /** * Copy constructor. * @param other The ArrayCoreMap to copy. It may not be null. */ public ArrayCoreMap(ArrayCoreMap other) { size = other.size; keys = Arrays.copyOf(other.keys, size); values = Arrays.copyOf(other.values, size); } /** * Copy constructor. * @param other The ArrayCoreMap to copy. It may not be null. */ @SuppressWarnings("unchecked") public ArrayCoreMap(CoreMap other) { Set<Class<?>> otherKeys = other.keySet(); size = otherKeys.size(); keys = new Class[size]; values = new Object[size]; int i = 0; for (Class key : otherKeys) { this.keys[i] = key; this.values[i] = other.get(key); i++; } } /** * {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public <VALUE> VALUE get(Class<? extends Key<VALUE>> key) { for (int i = 0; i < size; i++) { if (key == keys[i]) { if (listener != null) { listener.accept(key); // For tracking which entities were returned by the CoreMap } return (VALUE)values[i]; } } return null; } /** * {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public <VALUE> VALUE set(Class<? extends Key<VALUE>> key, VALUE value) { // search array for existing value to replace for (int i = 0; i < size; i++) { if (keys[i] == key) { VALUE rv = (VALUE)values[i]; values[i] = value; return rv; } } // not found in arrays, add to end ... // increment capacity of arrays if necessary if (size >= keys.length) { int capacity = keys.length + (keys.length < 16 ? 4: 8); Class[] newKeys = new Class[capacity]; Object[] newValues = new Object[capacity]; System.arraycopy(keys, 0, newKeys, 0, size); System.arraycopy(values, 0, newValues, 0, size); keys = newKeys; values = newValues; } // store value keys[size] = key; values[size] = value; size++; return null; } /** * {@inheritDoc} */ @Override public Set<Class<?>> keySet() { return new AbstractSet<Class<?>>() { @Override public Iterator<Class<?>> iterator() { return new Iterator<Class<?>>() { private int i; // = 0; @Override public boolean hasNext() { return i < size; } @Override public Class<?> next() { try { return keys[i++]; } catch (ArrayIndexOutOfBoundsException aioobe) { throw new NoSuchElementException("ArrayCoreMap keySet iterator exhausted"); } } @Override @SuppressWarnings("unchecked") public void remove() { ArrayCoreMap.this.remove((Class)keys[i]); } }; } @Override public int size() { return size; } }; } /** * Return a set of keys such that the value of that key is not null. * * @return A hash set such that each element of the set is a key in this CoreMap that has a * non-null value. */ public Set<Class<?>> keySetNotNull() { Set<Class<?>> mapKeys = new IdentityHashSet<>(); for (int i = 0; i < size(); ++i) { if (values[i] != null) { mapKeys.add(this.keys[i]); } } return mapKeys; } /** * {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public <VALUE> VALUE remove(Class<? extends Key<VALUE>> key) { Object rv = null; for (int i = 0; i < size; i++) { if (keys[i] == key) { rv = values[i]; if (i < size - 1) { System.arraycopy(keys, i+1, keys, i, size-(i+1)); System.arraycopy(values, i+1, values, i, size-(i+1)); } size--; break; } } return (VALUE)rv; } /** * {@inheritDoc} */ @Override public <VALUE> boolean containsKey(Class<? extends Key<VALUE>> key) { for (int i = 0; i < size; i++) { if (keys[i] == key) { return true; } } return false; } /** * Reduces memory consumption to the minimum for representing the values * currently stored stored in this object. */ public void compact() { if (keys.length > size) { Class[] newKeys = new Class[size]; Object[] newValues = new Object[size]; System.arraycopy(keys, 0, newKeys, 0, size); System.arraycopy(values, 0, newValues, 0, size); keys = ErasureUtils.uncheckedCast(newKeys); values = newValues; } } public void setCapacity(int newSize) { if (size > newSize) { throw new RuntimeException("You cannot set capacity to smaller than the current size."); } Class[] newKeys = new Class[newSize]; Object[] newValues = new Object[newSize]; System.arraycopy(keys, 0, newKeys, 0, size); System.arraycopy(values, 0, newValues, 0, size); keys = ErasureUtils.uncheckedCast(newKeys); values = newValues; } /** * Returns the number of elements in this map. * @return The number of elements in this map. */ @Override public int size() { return size; } /** * Keeps track of which ArrayCoreMaps have had toString called on * them. We do not want to loop forever when there are cycles in * the annotation graph. This is kept on a per-thread basis so that * each thread where toString gets called can keep track of its own * state. When a call to toString is about to return, this is reset * to null for that particular thread. */ private static final ThreadLocal<IdentityHashSet<CoreMap>> toStringCalled = new ThreadLocal<IdentityHashSet<CoreMap>>() { @Override protected IdentityHashSet<CoreMap> initialValue() { return new IdentityHashSet<>(); } }; /** Prints a full dump of a CoreMap. This method is robust to * circularity in the CoreMap. * * @return A String representation of the CoreMap */ @Override public String toString() { IdentityHashSet<CoreMap> calledSet = toStringCalled.get(); boolean createdCalledSet = calledSet.isEmpty(); if (calledSet.contains(this)) { return "[...]"; } calledSet.add(this); StringBuilder s = new StringBuilder("["); for (int i = 0; i < size; i++) { s.append(keys[i].getSimpleName()); s.append('='); s.append(values[i]); if (i < size-1) { s.append(' '); } } s.append(']'); if (createdCalledSet) { toStringCalled.remove(); } else { // Remove the object from the already called set so that // potential later calls in this object graph have something // more description than [...] calledSet.remove(this); } return s.toString(); } // support caching of String form of keys for speedier printing private static final ConcurrentHashMap<Class, String> shortNames = new ConcurrentHashMap<>(12, 0.75f, 1); private static final int SHORTER_STRING_CHARSTRING_START_SIZE = 64; private static final int SHORTER_STRING_MAX_SIZE_BEFORE_HASHING = 5; /** * {@inheritDoc} */ @Override public String toShorterString(String... what) { StringBuilder s = new StringBuilder(SHORTER_STRING_CHARSTRING_START_SIZE); s.append('['); Set<String> whatSet = null; if (size > SHORTER_STRING_MAX_SIZE_BEFORE_HASHING && what.length > SHORTER_STRING_MAX_SIZE_BEFORE_HASHING) { // if there's a lot of stuff, hash. whatSet = new HashSet<>(Arrays.asList(what)); } for (int i = 0; i < size; i++) { Class klass = keys[i]; String name = shortNames.get(klass); if (name == null) { name = klass.getSimpleName(); int annoIdx = name.lastIndexOf("Annotation"); if (annoIdx >= 0) { name = name.substring(0, annoIdx); } shortNames.put(klass, name); } boolean include; if (what.length == 0) { include = true; } else if (whatSet != null) { include = whatSet.contains(name); } else { include = false; for (String item : what) { if (item.equals(name)) { include = true; break; } } } if (include) { if (s.length() > 1) { s.append(' '); } s.append(name); s.append('='); s.append(values[i]); } } s.append(']'); return s.toString(); } /** This gives a very short String representation of a CoreMap * by leaving it to the content to reveal what field is being printed. * * @param what An array (varargs) of Strings that say what annotation keys * to print. These need to be provided in a shortened form where you * are just giving the part of the class name without package and up to * "Annotation". That is, * edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation * -> PartOfSpeech . As a special case, an empty array means * to print everything, not nothing. * @return Brief string where the field values are just separated by a * character. If the string contains spaces, it is wrapped in "{...}". */ public String toShortString(String... what) { return toShortString('/', what); } /** This gives a very short String representation of a CoreMap * by leaving it to the content to reveal what field is being printed. * * @param separator Character placed between fields in output * @param what An array (varargs) of Strings that say what annotation keys * to print. These need to be provided in a shortened form where you * are just giving the part of the class name without package and up to * "Annotation". That is, * edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation * -> PartOfSpeech . As a special case, an empty array means * to print everything, not nothing. * @return Brief string where the field values are just separated by a * character. If the string contains spaces, it is wrapped in "{...}". */ public String toShortString(char separator, String... what) { StringBuilder s = new StringBuilder(); for (int i = 0; i < size; i++) { boolean include; if (what.length > 0) { String name = keys[i].getSimpleName(); int annoIdx = name.lastIndexOf("Annotation"); if (annoIdx >= 0) { name = name.substring(0, annoIdx); } include = false; for (String item : what) { if (item.equals(name)) { include = true; break; } } } else { include = true; } if (include) { if (s.length() > 0) { s.append(separator); } s.append(values[i]); } } String answer = s.toString(); if (answer.indexOf(' ') < 0) { return answer; } else { return '{' + answer + '}'; } } /** * Keeps track of which pairs of ArrayCoreMaps have had equals * called on them. We do not want to loop forever when there are * cycles in the annotation graph. This is kept on a per-thread * basis so that each thread where equals gets called can keep * track of its own state. When a call to toString is about to * return, this is reset to null for that particular thread. */ private static final ThreadLocal<TwoDimensionalMap<CoreMap, CoreMap, Boolean>> equalsCalled = new ThreadLocal<>(); /** * Two CoreMaps are equal iff all keys and values are .equal. */ @SuppressWarnings("unchecked") @Override public boolean equals(Object obj) { if (!(obj instanceof CoreMap)) { return false; } if (obj instanceof HashableCoreMap) { // overridden behavior for HashableCoreMap return obj.equals(this); } if (obj instanceof ArrayCoreMap) { // specialized equals for ArrayCoreMap return equals((ArrayCoreMap)obj); } // TODO: make the general equality work in the situation of loops // in the object graph // general equality CoreMap other = (CoreMap)obj; if ( ! this.keySet().equals(other.keySet())) { return false; } for (Class key : this.keySet()) { if (!other.containsKey(key)) { return false; } Object thisV = this.get(key), otherV = other.get(key); if (thisV == otherV) { continue; } // the two values must be unequal, so if either is null, the other isn't if (thisV == null || otherV == null) { return false; } if ( ! thisV.equals(otherV)) { return false; } } return true; } private boolean equals(ArrayCoreMap other) { TwoDimensionalMap<CoreMap, CoreMap, Boolean> calledMap = equalsCalled.get(); boolean createdCalledMap = (calledMap == null); if (createdCalledMap) { calledMap = TwoDimensionalMap.identityHashMap(); equalsCalled.set(calledMap); } // Note that for the purposes of recursion, we assume the two maps // are equals. The two maps will therefore be equal if they // encounter each other again during the recursion unless there is // some other key that causes the equality to fail. // We do not need to later put false, as the entire call to equals // will unwind with false if any one equality check returns false. // TODO: since we only ever keep "true", we would rather use a // TwoDimensionalSet, but no such thing exists if (calledMap.contains(this, other)) { return true; } boolean result = true; calledMap.put(this, other, true); calledMap.put(other, this, true); if (this.size != other.size) { result = false; } else { for (int i = 0; i < this.size; i++) { // test if other contains this key,value pair boolean matched = false; for (int j = 0; j < other.size; j++) { if (this.keys[i] == other.keys[j]) { if ((this.values[i] == null && other.values[j] != null) || (this.values[i] != null && other.values[j] == null)) { matched = false; break; } if ((this.values[i] == null && other.values[j] == null) || (this.values[i].equals(other.values[j]))) { matched = true; break; } } } if (!matched) { result = false; break; } } } if (createdCalledMap) { equalsCalled.set(null); } return result; } /** * Keeps track of which ArrayCoreMaps have had hashCode called on * them. We do not want to loop forever when there are cycles in * the annotation graph. This is kept on a per-thread basis so that * each thread where hashCode gets called can keep track of its own * state. When a call to toString is about to return, this is reset * to null for that particular thread. */ private static final ThreadLocal<IdentityHashSet<CoreMap>> hashCodeCalled = new ThreadLocal<>(); /** * Returns a composite hashCode over all the keys and values currently * stored in the map. Because they may change over time, this class * is not appropriate for use as map keys. */ @Override public int hashCode() { IdentityHashSet<CoreMap> calledSet = hashCodeCalled.get(); boolean createdCalledSet = (calledSet == null); if (createdCalledSet) { calledSet = new IdentityHashSet<>(); hashCodeCalled.set(calledSet); } if (calledSet.contains(this)) { return 0; } calledSet.add(this); int keysCode = 0; int valuesCode = 0; for (int i = 0; i < size; i++) { keysCode += (i < keys.length && values[i] != null ? keys[i].hashCode() : 0); valuesCode += (i < values.length && values[i] != null ? values[i].hashCode() : 0); } if (createdCalledSet) { hashCodeCalled.set(null); } else { // Remove the object after processing is complete so that if // there are multiple instances of this CoreMap in the overall // object graph, they each have their hash code calculated. // TODO: can we cache this for later? calledSet.remove(this); } return keysCode * 37 + valuesCode; } // // serialization magic // /** Serialization version id */ private static final long serialVersionUID = 1L; /** * Overridden serialization method: compacts our map before writing. * * @param out Stream to write to * @throws IOException If IO error */ private void writeObject(ObjectOutputStream out) throws IOException { compact(); out.defaultWriteObject(); } // TODO: make prettyLog work in the situation of loops // in the object graph /** * {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public void prettyLog(RedwoodChannels channels, String description) { Redwood.startTrack(description); // sort keys by class name List<Class> sortedKeys = new ArrayList<>(this.keySet()); Collections.sort(sortedKeys, (a, b) -> a.getCanonicalName().compareTo(b.getCanonicalName())); // log key/value pairs for (Class key : sortedKeys) { String keyName = key.getCanonicalName().replace("class ", ""); Object value = this.get(key); if (PrettyLogger.dispatchable(value)) { PrettyLogger.log(channels, keyName, value); } else { channels.logf("%s = %s", keyName, value); } } Redwood.endTrack(description); } }