/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.llap; import com.google.common.collect.Lists; import com.google.protobuf.UnknownFieldSet; import java.lang.reflect.AccessibleObject; import java.lang.reflect.Array; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; import org.apache.hadoop.hive.ql.util.JavaDataModel; /** * Creates size estimators for java objects. The estimators attempt to do most of the reflection * work at initialization time, and also take some shortcuts, to minimize the amount of work done * during the actual estimation. TODO: clean up */ public class IncrementalObjectSizeEstimator { public static final JavaDataModel memoryModel = JavaDataModel.get(); private enum FieldType { PRIMITIVE_ARRAY, OBJECT_ARRAY, COLLECTION, MAP, OTHER }; public static HashMap<Class<?>, ObjectEstimator> createEstimators(Object rootObj) { HashMap<Class<?>, ObjectEstimator> byType = new HashMap<>(); addHardcodedEstimators(byType); createEstimators(rootObj, byType); return byType; } public static void createEstimators(Object rootObj, HashMap<Class<?>, ObjectEstimator> byType) { // Code initially inspired by Google ObjectExplorer. // TODO: roll in the direct-only estimators from fields. Various other optimizations possible. Deque<Object> stack = createWorkStack(rootObj, byType); while (!stack.isEmpty()) { Object obj = stack.pop(); Class<?> clazz; if (obj instanceof Class<?>) { clazz = (Class<?>)obj; obj = null; } else { clazz = obj.getClass(); } ObjectEstimator estimator = byType.get(clazz); assert estimator != null; if (!estimator.isFromClass && obj == null) { // The object was added later for the same class; see addToProcessing. continue; } if (estimator.isProcessed()) continue; estimator.init(); for (Field field : getAllFields(clazz, LlapCacheableBuffer.class)) { Class<?> fieldClass = field.getType(); if (Modifier.isStatic(field.getModifiers())) continue; if (Class.class.isAssignableFrom(fieldClass)) continue; if (fieldClass.isPrimitive()) { estimator.addPrimitive(fieldClass); continue; } if (Enum.class.isAssignableFrom(fieldClass)) { estimator.addEnum(); continue; } boolean isArray = fieldClass.isArray(); if (isArray && fieldClass.getComponentType().isPrimitive()) { estimator.addField(FieldType.PRIMITIVE_ARRAY, field); continue; } Object fieldObj = null; if (obj != null) { fieldObj = extractFieldObj(obj, field); fieldClass = determineRealClass(byType, stack, field, fieldClass, fieldObj); } if (isArray) { estimator.addField(FieldType.OBJECT_ARRAY, field); addArrayEstimator(byType, stack, field, fieldObj); } else if (Collection.class.isAssignableFrom(fieldClass)) { estimator.addField(FieldType.COLLECTION, field); addCollectionEstimator(byType, stack, field, fieldClass, fieldObj); } else if (Map.class.isAssignableFrom(fieldClass)) { estimator.addField(FieldType.MAP, field); addMapEstimator(byType, stack, field, fieldClass, fieldObj); } else { estimator.addField(FieldType.OTHER, field); addToProcessing(byType, stack, fieldObj, fieldClass); } } estimator.directSize = (int) JavaDataModel.alignUp( estimator.directSize, memoryModel.memoryAlign()); } } private static Deque<Object> createWorkStack(Object rootObj, HashMap<Class<?>, ObjectEstimator> byType) { Deque<Object> stack = new ArrayDeque<Object>(32); Class<?> rootClass = rootObj.getClass(); if (Class.class.equals(rootClass)) { rootClass = (Class<?>)rootObj; rootObj = null; } else { // If root object is an array, map or collection, add estimators as for fields if (rootClass.isArray() && !rootClass.getComponentType().isPrimitive()) { addArrayEstimator(byType, stack, null, rootObj); } else if (Collection.class.isAssignableFrom(rootClass)) { addCollectionEstimator(byType, stack, null, rootClass, rootObj); } else if (Map.class.isAssignableFrom(rootClass)) { addMapEstimator(byType, stack, null, rootClass, rootObj); } } addToProcessing(byType, stack, rootObj, rootClass); return stack; } private static void addHardcodedEstimators( HashMap<Class<?>, ObjectEstimator> byType) { // Add hacks for well-known collections and maps to avoid estimating them. byType.put(ArrayList.class, new CollectionEstimator( memoryModel.arrayList(), memoryModel.ref())); byType.put(LinkedList.class, new CollectionEstimator( memoryModel.linkedListBase(), memoryModel.linkedListEntry())); byType.put(HashSet.class, new CollectionEstimator( memoryModel.hashSetBase(), memoryModel.hashSetEntry())); byType.put(HashMap.class, new CollectionEstimator( memoryModel.hashMapBase(), memoryModel.hashMapEntry())); // Add a hack for UnknownFieldSet because we assume it will never have anything (TODO: clear?) ObjectEstimator ufsEstimator = new ObjectEstimator(false); ufsEstimator.directSize = memoryModel.object() * 2 + memoryModel.ref(); byType.put(UnknownFieldSet.class, ufsEstimator); // TODO: 1-field hack for UnmodifiableCollection for protobuf too } private static Object extractFieldObj(Object obj, Field field) { try { return field.get(obj); } catch (IllegalAccessException e) { throw new AssertionError("IAE: " + field + "; " + e.getMessage()); } } private static Class<?> determineRealClass(HashMap<Class<?>, ObjectEstimator> byType, Deque<Object> stack, Field field, Class<?> fieldClass, Object fieldObj) { if (fieldObj == null) return fieldClass; Class<?> realFieldClass = fieldObj.getClass(); if (!fieldClass.equals(realFieldClass)) { addToProcessing(byType, stack, null, fieldClass); return realFieldClass; } return fieldClass; } private static void addCollectionEstimator(HashMap<Class<?>, ObjectEstimator> byType, Deque<Object> stack, Field field, Class<?> fieldClass, Object fieldObj) { Collection<?> fieldCol = null; if (fieldObj != null) { fieldCol = (Collection<?>)fieldObj; if (fieldCol.size() == 0) { fieldCol = null; LlapIoImpl.LOG.trace("Empty collection {}", field); } } if (fieldCol != null) { for (Object element : fieldCol) { if (element != null) { addToProcessing(byType, stack, element, element.getClass()); } } } if (field != null) { Class<?> collectionArg = getCollectionArg(field); if (collectionArg != null) { addToProcessing(byType, stack, null, collectionArg); } // TODO: there was code here to create guess-estimate for collection wrt how usage changes // when removing elements. However it's too error-prone for anything involving // pre-allocated capacity, so it was discarded. // We will estimate collection as an object (only if it's a field). addToProcessing(byType, stack, fieldObj, fieldClass); } } private static void addMapEstimator(HashMap<Class<?>, ObjectEstimator> byType, Deque<Object> stack, Field field, Class<?> fieldClass, Object fieldObj) { Map<?, ?> fieldCol = null; if (fieldObj != null) { fieldCol = (Map<?, ?>)fieldObj; if (fieldCol.size() == 0) { fieldCol = null; LlapIoImpl.LOG.trace("Empty map {}", field); } } if (fieldCol != null) { for (Map.Entry<?, ?> element : fieldCol.entrySet()) { Object k = element.getKey(), v = element.getValue(); if (k != null) { addToProcessing(byType, stack, k, k.getClass()); } if (v != null) { addToProcessing(byType, stack, v, v.getClass()); } } } if (field != null) { Class<?>[] mapArgs = getMapArgs(field); if (mapArgs != null) { for (Class<?> mapArg : mapArgs) { addToProcessing(byType, stack, null, mapArg); } } // We will estimate map as an object (only if it's a field). addToProcessing(byType, stack, fieldObj, fieldClass); } } private static Class<?>[] getMapArgs(Field field) { // TODO: this makes many assumptions, e.g. on how generic args are done Type genericType = field.getGenericType(); if (genericType instanceof ParameterizedType) { Type[] types = ((ParameterizedType)genericType).getActualTypeArguments(); if (types.length == 2 && types[0] instanceof Class<?> && types[1] instanceof Class<?>) { return new Class<?>[] { (Class<?>)types[0], (Class<?>)types[1] }; } else { // TODO: we could try to get the declaring object and infer argument... stupid Java. LlapIoImpl.LOG.trace("Cannot determine map type: {}", field); } } else { // TODO: we could try to get superclass or generic interfaces. LlapIoImpl.LOG.trace("Non-parametrized map type: {}", field); } return null; } private static Class<?> getCollectionArg(Field field) { // TODO: this makes many assumptions, e.g. on how generic args are done Type genericType = field.getGenericType(); if (genericType instanceof ParameterizedType) { Type type = ((ParameterizedType)genericType).getActualTypeArguments()[0]; if (type instanceof Class<?>) { return (Class<?>)type; } else { // TODO: we could try to get the declaring object and infer argument... stupid Java. LlapIoImpl.LOG.trace("Cannot determine collection type: {}", field); } } else { // TODO: we could try to get superclass or generic interfaces. LlapIoImpl.LOG.trace("Non-parametrized collection type: {}", field); } return null; } private static void addArrayEstimator( HashMap<Class<?>, ObjectEstimator> byType, Deque<Object> stack, Field field, Object fieldObj) { if (fieldObj == null) return; int arrayLen = Array.getLength(fieldObj); LlapIoImpl.LOG.trace("Empty array {}", field); for (int i = 0; i < arrayLen; ++i) { Object element = Array.get(fieldObj, i); if (element != null) { addToProcessing(byType, stack, element, element.getClass()); } } Class<?> elementClass = fieldObj.getClass().getComponentType(); addToProcessing(byType, stack, null, elementClass); } private static void addToProcessing(HashMap<Class<?>, ObjectEstimator> byType, Deque<Object> stack, Object element, Class<?> elementClass) { ObjectEstimator existing = byType.get(elementClass); if (existing != null && (!existing.isFromClass || (element == null))) return; if (elementClass.isInterface()) { if (element == null) return; elementClass = element.getClass(); } byType.put(elementClass, new ObjectEstimator(element == null)); stack.push(element == null ? elementClass : element); } private static int getPrimitiveSize(Class<?> fieldClass) { if (fieldClass == long.class || fieldClass == double.class) return 8; if (fieldClass == int.class || fieldClass == float.class) return 4; if (fieldClass == short.class || fieldClass == char.class) return 2; if (fieldClass == byte.class || fieldClass == boolean.class) return 1; throw new AssertionError("Unrecognized primitive " + fieldClass.getName()); } private static Iterable<Field> getAllFields(Class<?> clazz, Class<?> topClass) { List<Field> fields = Lists.newArrayListWithCapacity(8); while (clazz != null) { fields.addAll(Arrays.asList(clazz.getDeclaredFields())); clazz = clazz.getSuperclass(); if (clazz == topClass) break; } //all together so there is only one security check AccessibleObject.setAccessible(fields.toArray(new AccessibleObject[fields.size()]), true); return fields; } private static class FieldAndType { public FieldAndType(Field field, FieldType type) { this.field = field; this.type = type; } public Field field; public FieldType type; } public static class ObjectEstimator { private List<FieldAndType> fields; protected int directSize = -1; private boolean isFromClass; public ObjectEstimator(boolean isFromClass) { this.isFromClass = isFromClass; } boolean isProcessed() { return directSize >= 0; } private void init() { assert directSize == -1; directSize = memoryModel.object(); } private void addPrimitive(Class<?> clazz) { directSize += getPrimitiveSize(clazz); } private void addEnum() { directSize += memoryModel.ref(); } private void addField(FieldType type, Field field) { if (fields == null) { fields = new ArrayList<>(); } directSize += memoryModel.ref(); fields.add(new FieldAndType(field, type)); } public int estimate( Object obj, HashMap<Class<?>, ObjectEstimator> parent) { IdentityHashMap<Object, Boolean> uniqueObjects = new IdentityHashMap<>(); uniqueObjects.put(obj, Boolean.TRUE); return estimate(obj, parent, uniqueObjects); } protected int estimate(Object obj, HashMap<Class<?>, ObjectEstimator> parent, IdentityHashMap<Object, Boolean> uniqueObjects) { // TODO: maybe use stack of est+obj pairs instead of recursion. if (fields == null) return directSize; int referencedSize = 0; for (FieldAndType e : fields) { Object fieldObj; try { fieldObj = e.field.get(obj); } catch (IllegalAccessException ex) { throw new AssertionError("IAE: " + ex.getMessage()); } // reference is already accounted for in the directSize. if (fieldObj == null) continue; if (null != uniqueObjects.put(fieldObj, Boolean.TRUE)) continue; switch (e.type) { case COLLECTION: { Collection<?> c = (Collection<?>)fieldObj; ObjectEstimator collEstimator = parent.get(fieldObj.getClass()); if (collEstimator == null) { // We have no estimator for this type... assume low overhead and hope for the best. LlapIoImpl.LOG.trace("Approximate estimation for collection {} from {}", e.field, fieldObj.getClass().getName()); referencedSize += memoryModel.object(); referencedSize += estimateCollectionElements(parent, c, e.field, uniqueObjects); referencedSize += memoryModel.array() + c.size() * memoryModel.ref(); } else if (collEstimator instanceof CollectionEstimator) { referencedSize += memoryModel.object(); referencedSize += estimateCollectionElements(parent, c, e.field, uniqueObjects); referencedSize += ((CollectionEstimator)collEstimator).estimateOverhead(c.size()); } else { // We decided to treat this collection as regular object. LlapIoImpl.LOG.trace("Verbose estimation for collection {} from {}", fieldObj.getClass().getName(), e.field); referencedSize += collEstimator.estimate(c, parent, uniqueObjects); } break; } case MAP: { Map<?, ?> m = (Map<?, ?>)fieldObj; ObjectEstimator collEstimator = parent.get(fieldObj.getClass()); if (collEstimator == null) { // We have no estimator for this type... assume low overhead and hope for the best. LlapIoImpl.LOG.trace("Approximate estimation for map {} from {}", fieldObj.getClass().getName(), e.field); referencedSize += memoryModel.object(); referencedSize += estimateMapElements(parent, m, e.field, uniqueObjects); referencedSize += memoryModel.array() + m.size() * (memoryModel.ref() * 2 + memoryModel.object()); } else if (collEstimator instanceof CollectionEstimator) { referencedSize += memoryModel.object(); referencedSize += estimateMapElements(parent, m, e.field, uniqueObjects); referencedSize += ((CollectionEstimator)collEstimator).estimateOverhead(m.size()); } else { // We decided to treat this map as regular object. LlapIoImpl.LOG.trace("Verbose estimation for map {} from {}", fieldObj.getClass().getName(), e.field); referencedSize += collEstimator.estimate(m, parent, uniqueObjects); } break; } case OBJECT_ARRAY: { int len = Array.getLength(fieldObj); referencedSize += JavaDataModel.alignUp( memoryModel.array() + len * memoryModel.ref(), memoryModel.memoryAlign()); if (len == 0) continue; referencedSize += estimateArrayElements(parent, e, fieldObj, len, uniqueObjects); break; } case PRIMITIVE_ARRAY: { int arraySize = memoryModel.array(); int len = Array.getLength(fieldObj); if (len != 0) { int elementSize = getPrimitiveSize(e.field.getType().getComponentType()); arraySize += elementSize * len; arraySize = (int) JavaDataModel.alignUp(arraySize, memoryModel.memoryAlign()); } referencedSize += arraySize; break; } case OTHER: { ObjectEstimator fieldEstimator = parent.get(fieldObj.getClass()); if (fieldEstimator == null) { // TODO: use reflection? throw new AssertionError("Don't know how to measure " + fieldObj.getClass().getName() + " from " + e.field); } referencedSize += fieldEstimator.estimate(fieldObj, parent, uniqueObjects); break; } default: throw new AssertionError("Unknown type " + e.type); } } return directSize + referencedSize; } private int estimateArrayElements(HashMap<Class<?>, ObjectEstimator> parent, FieldAndType e, Object fieldObj, int len, IdentityHashMap<Object, Boolean> uniqueObjects) { int result = 0; Class<?> lastClass = e.field.getType().getComponentType(); ObjectEstimator lastEstimator = parent.get(lastClass); for (int i = 0; i < len; ++i) { Object element = Array.get(fieldObj, i); if (element == null) continue; if (null != uniqueObjects.put(element, Boolean.TRUE)) continue; Class<?> elementClass = element.getClass(); if (lastClass != elementClass) { lastClass = elementClass; lastEstimator = parent.get(lastClass); if (lastEstimator == null) { // TODO: use reflection? throw new AssertionError("Don't know how to measure element " + lastClass.getName() + " from " + e.field); } } result += lastEstimator.estimate(element, parent, uniqueObjects); } return result; } protected int estimateCollectionElements(HashMap<Class<?>, ObjectEstimator> parent, Collection<?> c, Field field, IdentityHashMap<Object, Boolean> uniqueObjects) { ObjectEstimator lastEstimator = null; Class<?> lastClass = null; int result = 0; for (Object element : c) { if (element == null) continue; if (null != uniqueObjects.put(element, Boolean.TRUE)) continue; Class<?> elementClass = element.getClass(); if (lastClass != elementClass) { lastClass = elementClass; lastEstimator = parent.get(lastClass); if (lastEstimator == null) { // TODO: use reflection? throw new AssertionError("Don't know how to measure element " + lastClass.getName() + " from " + field); } } result += lastEstimator.estimate(element, parent, uniqueObjects); } return result; } protected int estimateMapElements(HashMap<Class<?>, ObjectEstimator> parent, Map<?, ?> m, Field field, IdentityHashMap<Object, Boolean> uniqueObjects) { ObjectEstimator keyEstimator = null, valueEstimator = null; Class<?> lastKeyClass = null, lastValueClass = null; int result = 0; for (Map.Entry<?, ?> element : m.entrySet()) { Object key = element.getKey(), value = element.getValue(); if (null != uniqueObjects.put(key, Boolean.TRUE)) continue; Class<?> keyClass = key.getClass(); if (lastKeyClass != keyClass) { lastKeyClass = keyClass; keyEstimator = parent.get(lastKeyClass); if (keyEstimator == null) { // TODO: use reflection? throw new AssertionError("Don't know how to measure key " + lastKeyClass.getName() + " from " + field); } } result += keyEstimator.estimate(element, parent, uniqueObjects); if (value == null) continue; if (null != uniqueObjects.put(value, Boolean.TRUE)) continue; Class<?> valueClass = value.getClass(); if (lastValueClass != valueClass) { lastValueClass = valueClass; valueEstimator = parent.get(lastValueClass); if (valueEstimator == null) { // TODO: use reflection? throw new AssertionError("Don't know how to measure value " + lastValueClass.getName() + " from " + field); } } result += valueEstimator.estimate(element, parent, uniqueObjects); } return result; } } private static class CollectionEstimator extends ObjectEstimator { private int perEntryOverhead; public CollectionEstimator(int base, int perElement) { super(false); directSize = base; perEntryOverhead = perElement; } @Override protected int estimate(Object obj, HashMap<Class<?>, ObjectEstimator> parent, IdentityHashMap<Object, Boolean> uniqueObjects) { if (obj instanceof Collection<?>) { Collection<?> c = (Collection<?>)obj; int overhead = estimateOverhead(c.size()), elements = estimateCollectionElements( parent, c, null, uniqueObjects); return overhead + elements + memoryModel.object(); } else if (obj instanceof Map<?, ?>) { Map<?, ?> m = (Map<?, ?>)obj; int overhead = estimateOverhead(m.size()), elements = estimateMapElements( parent, m, null, uniqueObjects); return overhead + elements + memoryModel.object(); } throw new AssertionError(obj.getClass().getName()); } int estimateOverhead(int size) { return directSize + perEntryOverhead * size; } } public static void addEstimator(String className, HashMap<Class<?>, ObjectEstimator> sizeEstimators) { Class<?> clazz = null; try { clazz = Class.forName(className); } catch (ClassNotFoundException e) { // Ignore and hope for the best. LlapIoImpl.LOG.warn("Cannot find " + className); return; } IncrementalObjectSizeEstimator.createEstimators(clazz, sizeEstimators); } }