/*
* Copyright 2014 Edward Aftandilian. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.tufts.eaftan.hprofparser.parser;
import com.google.common.base.Preconditions;
import edu.tufts.eaftan.hprofparser.handler.RecordHandler;
import edu.tufts.eaftan.hprofparser.parser.datastructures.AllocSite;
import edu.tufts.eaftan.hprofparser.parser.datastructures.CPUSample;
import edu.tufts.eaftan.hprofparser.parser.datastructures.ClassInfo;
import edu.tufts.eaftan.hprofparser.parser.datastructures.Constant;
import edu.tufts.eaftan.hprofparser.parser.datastructures.Instance;
import edu.tufts.eaftan.hprofparser.parser.datastructures.InstanceField;
import edu.tufts.eaftan.hprofparser.parser.datastructures.Static;
import edu.tufts.eaftan.hprofparser.parser.datastructures.Type;
import edu.tufts.eaftan.hprofparser.parser.datastructures.Value;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
/**
* Parses an hprof heap dump file in binary format. The hprof dump file format is documented in
* the hprof_b_spec.h file in the hprof source, which is open-source and available from Oracle.
*/
public class HprofParser {
private RecordHandler handler;
private HashMap<Long, ClassInfo> classMap;
private ArrayList<Instance> instanceList;
public HprofParser(RecordHandler handler) {
this.handler = handler;
classMap = new HashMap<Long, ClassInfo>();
instanceList = new ArrayList<Instance>();
}
public void parse(DataInput in) throws IOException {
/* The file format looks like this:
*
* header:
* [u1]* - a null-terminated sequence of bytes representing the format
* name and version
* u4 - size of identifiers/pointers
* u4 - high number of word of number of milliseconds since 0:00 GMT,
* 1/1/70
* u4 - low number of word of number of milliseconds since 0:00 GMT,
* 1/1/70
*
* records:
* u1 - tag denoting the type of record
* u4 - number of microseconds since timestamp in header
* u4 - number of bytes that follow this field in this record
* [u1]* - body
*/
// header
String format = readUntilNull(in);
int idSize = in.readInt();
long startTime = in.readLong();
handler.header(format, idSize, startTime);
// records
boolean done;
do {
done = parseRecord(in, idSize);
} while (!done);
handler.finished();
}
public static String readUntilNull(DataInput in) throws IOException {
int bytesRead = 0;
byte[] bytes = new byte[25];
while ((bytes[bytesRead] = in.readByte()) != 0) {
bytesRead++;
if (bytesRead >= bytes.length) {
byte[] newBytes = new byte[bytesRead + 20];
for (int i=0; i<bytes.length; i++) {
newBytes[i] = bytes[i];
}
bytes = newBytes;
}
}
return new String(bytes, 0, bytesRead);
}
/**
* @return true if there are no more records to parse
*/
private boolean parseRecord(DataInput in, int idSize) throws IOException {
/* format:
* u1 - tag
* u4 - time
* u4 - length
* [u1]* - body
*/
// if we get an EOF on this read, it just means we're done
byte tag;
try {
tag = in.readByte();
} catch (EOFException e) {
return true;
}
// otherwise propagate the EOFException
int time = in.readInt(); // TODO(eaftan): we might want time passed to handler fns
int bytesLeft = in.readInt();
long l1, l2, l3, l4;
int i1, i2, i3, i4, i5, i6, i7, i8, i9;
short s1;
byte b1;
float f1;
byte[] bArr1;
long[] lArr1;
switch (tag) {
case 0x1:
// String in UTF-8
l1 = readId(idSize, in);
bytesLeft -= idSize;
bArr1 = new byte[bytesLeft];
in.readFully(bArr1);
handler.stringInUTF8(l1, new String(bArr1));
break;
case 0x2:
// Load class
i1 = in.readInt();
l1 = readId(idSize, in);
i2 = in.readInt();
l2 = readId(idSize, in);
handler.loadClass(i1, l1, i2, l2);
break;
case 0x3:
// Unload class
i1 = in.readInt();
handler.unloadClass(i1);
break;
case 0x4:
// Stack frame
l1 = readId(idSize, in);
l2 = readId(idSize, in);
l3 = readId(idSize, in);
l4 = readId(idSize, in);
i1 = in.readInt();
i2 = in.readInt();
handler.stackFrame(l1, l2, l3, l4, i1, i2);
break;
case 0x5:
// Stack trace
i1 = in.readInt();
i2 = in.readInt();
i3 = in.readInt();
bytesLeft -= 12;
lArr1 = new long[bytesLeft/idSize];
for (int i=0; i<lArr1.length; i++) {
lArr1[i] = readId(idSize, in);
}
handler.stackTrace(i1, i2, i3, lArr1);
break;
case 0x6:
// Alloc sites
s1 = in.readShort();
f1 = in.readFloat();
i1 = in.readInt();
i2 = in.readInt();
l1 = in.readLong();
l2 = in.readLong();
i3 = in.readInt(); // num of sites that follow
AllocSite[] allocSites = new AllocSite[i3];
for (int i=0; i<allocSites.length; i++) {
b1 = in.readByte();
i4 = in.readInt();
i5 = in.readInt();
i6 = in.readInt();
i7 = in.readInt();
i8 = in.readInt();
i9 = in.readInt();
allocSites[i] = new AllocSite(b1, i4, i5, i6, i7, i8, i9);
}
handler.allocSites(s1, f1, i1, i2, l1, l2, allocSites);
break;
case 0x7:
// Heap summary
i1 = in.readInt();
i2 = in.readInt();
l1 = in.readLong();
l2 = in.readLong();
handler.heapSummary(i1, i2, l1, l2);
break;
case 0xa:
// Start thread
i1 = in.readInt();
l1 = readId(idSize, in);
i2 = in.readInt();
l2 = readId(idSize, in);
l3 = readId(idSize, in);
l4 = readId(idSize, in);
handler.startThread(i1, l1, i2, l2, l3, l4);
break;
case 0xb:
// End thread
i1 = in.readInt();
handler.endThread(i1);
break;
case 0xc:
// Heap dump
handler.heapDump();
while (bytesLeft > 0) {
bytesLeft -= parseHeapDump(in, idSize);
}
processInstances(idSize);
handler.heapDumpEnd();
break;
case 0x1c:
// Heap dump segment
handler.heapDumpSegment();
while (bytesLeft > 0) {
bytesLeft -= parseHeapDump(in, idSize);
}
break;
case 0x2c:
// Heap dump end (of segments)
processInstances(idSize);
handler.heapDumpEnd();
break;
case 0xd:
// CPU samples
i1 = in.readInt();
i2 = in.readInt(); // num samples that follow
CPUSample[] samples = new CPUSample[i2];
for (int i=0; i<samples.length; i++) {
i3 = in.readInt();
i4 = in.readInt();
samples[i] = new CPUSample(i3, i4);
}
handler.cpuSamples(i1, samples);
break;
case 0xe:
// Control settings
i1 = in.readInt();
s1 = in.readShort();
handler.controlSettings(i1, s1);
break;
default:
throw new HprofParserException("Unexpected top-level record type: " + tag);
}
return false;
}
// returns number of bytes parsed
private int parseHeapDump(DataInput in, int idSize) throws IOException {
byte tag = in.readByte();
int bytesRead = 1;
long l1, l2, l3, l4, l5, l6, l7;
int i1, i2;
short s1, s2, s3;
byte b1;
byte[] bArr1;
long [] lArr1;
switch (tag) {
case -1: // 0xFF
// Root unknown
l1 = readId(idSize, in);
handler.rootUnknown(l1);
bytesRead += idSize;
break;
case 0x01:
// Root JNI global
l1 = readId(idSize, in);
l2 = readId(idSize, in);
handler.rootJNIGlobal(l1, l2);
bytesRead += 2 * idSize;
break;
case 0x02:
// Root JNI local
l1 = readId(idSize, in);
i1 = in.readInt();
i2 = in.readInt();
handler.rootJNILocal(l1, i1, i2);
bytesRead += idSize + 8;
break;
case 0x03:
// Root Java frame
l1 = readId(idSize, in);
i1 = in.readInt();
i2 = in.readInt();
handler.rootJavaFrame(l1, i1, i2);
bytesRead += idSize + 8;
break;
case 0x04:
// Root native stack
l1 = readId(idSize, in);
i1 = in.readInt();
handler.rootNativeStack(l1, i1);
bytesRead += idSize + 4;
break;
case 0x05:
// Root sticky class
l1 = readId(idSize, in);
handler.rootStickyClass(l1);
bytesRead += idSize;
break;
case 0x06:
// Root thread block
l1 = readId(idSize, in);
i1 = in.readInt();
handler.rootThreadBlock(l1, i1);
bytesRead += idSize + 4;
break;
case 0x07:
// Root monitor used
l1 = readId(idSize, in);
handler.rootMonitorUsed(l1);
bytesRead += idSize;
break;
case 0x08:
// Root thread object
l1 = readId(idSize, in);
i1 = in.readInt();
i2 = in.readInt();
handler.rootThreadObj(l1, i1, i2);
bytesRead += idSize + 8;
break;
case 0x20:
// Class dump
l1 = readId(idSize, in);
i1 = in.readInt();
l2 = readId(idSize, in);
l3 = readId(idSize, in);
l4 = readId(idSize, in);
l5 = readId(idSize, in);
l6 = readId(idSize, in);
l7 = readId(idSize, in);
i2 = in.readInt();
bytesRead += idSize * 7 + 8;
/* Constants */
s1 = in.readShort(); // number of constants
bytesRead += 2;
Preconditions.checkState(s1 >= 0);
Constant[] constants = new Constant[s1];
for (int i=0; i<s1; i++) {
short constantPoolIndex = in.readShort();
byte btype = in.readByte();
bytesRead += 3;
Type type = Type.hprofTypeToEnum(btype);
Value<?> v = null;
switch (type) {
case OBJ:
long vid = readId(idSize, in);
bytesRead += idSize;
v = new Value<>(type, vid);
break;
case BOOL:
boolean vbool = in.readBoolean();
bytesRead += 1;
v = new Value<>(type, vbool);
break;
case CHAR:
char vc = in.readChar();
bytesRead += 2;
v = new Value<>(type, vc);
break;
case FLOAT:
float vf = in.readFloat();
bytesRead += 4;
v = new Value<>(type, vf);
break;
case DOUBLE:
double vd = in.readDouble();
bytesRead += 8;
v = new Value<>(type, vd);
break;
case BYTE:
byte vbyte = in.readByte();
bytesRead += 1;
v = new Value<>(type, vbyte);
break;
case SHORT:
short vs = in.readShort();
bytesRead += 2;
v = new Value<>(type, vs);
break;
case INT:
int vi = in.readInt();
bytesRead += 4;
v = new Value<>(type, vi);
break;
case LONG:
long vl = in.readLong();
bytesRead += 8;
v = new Value<>(type, vl);
break;
}
constants[i] = new Constant(constantPoolIndex, v);
}
/* Statics */
s2 = in.readShort(); // number of static fields
bytesRead += 2;
Preconditions.checkState(s2 >= 0);
Static[] statics = new Static[s2];
for (int i=0; i<s2; i++) {
long staticFieldNameStringId = readId(idSize, in);
byte btype = in.readByte();
bytesRead += idSize + 1;
Type type = Type.hprofTypeToEnum(btype);
Value<?> v = null;
switch (type) {
case OBJ: // object
long vid = readId(idSize, in);
bytesRead += idSize;
v = new Value<>(type, vid);
break;
case BOOL: // boolean
boolean vbool = in.readBoolean();
bytesRead += 1;
v = new Value<>(type, vbool);
break;
case CHAR: // char
char vc = in.readChar();
bytesRead += 2;
v = new Value<>(type, vc);
break;
case FLOAT: // float
float vf = in.readFloat();
bytesRead += 4;
v = new Value<>(type, vf);
break;
case DOUBLE: // double
double vd = in.readDouble();
bytesRead += 8;
v = new Value<>(type, vd);
break;
case BYTE: // byte
byte vbyte = in.readByte();
bytesRead += 1;
v = new Value<>(type, vbyte);
break;
case SHORT: // short
short vs = in.readShort();
bytesRead += 2;
v = new Value<>(type, vs);
break;
case INT: // int
int vi = in.readInt();
bytesRead += 4;
v = new Value<>(type, vi);
break;
case LONG: // long
long vl = in.readLong();
bytesRead += 8;
v = new Value<>(type, vl);
break;
}
statics[i] = new Static(staticFieldNameStringId, v);
}
/* Instance fields */
s3 = in.readShort(); // number of instance fields
bytesRead += 2;
Preconditions.checkState(s3 >= 0);
InstanceField[] instanceFields = new InstanceField[s3];
for (int i=0; i<s3; i++) {
long fieldNameStringId = readId(idSize, in);
byte btype = in.readByte();
bytesRead += idSize + 1;
Type type = Type.hprofTypeToEnum(btype);
instanceFields[i] = new InstanceField(fieldNameStringId, type);
}
/**
* We need to know the types of the values in an instance record when
* we parse that record. To do that we need to look up the class and
* its superclasses. So we need to store class records in a hash
* table.
*/
classMap.put(l1, new ClassInfo(l1, l2, i2, instanceFields));
handler.classDump(l1, i1, l2, l3, l4, l5, l6, l7, i2, constants,
statics, instanceFields);
break;
case 0x21:
// Instance dump
l1 = readId(idSize, in);
i1 = in.readInt();
l2 = readId(idSize, in); // class obj id
i2 = in.readInt(); // num of bytes that follow
Preconditions.checkState(i2 >= 0);
bArr1 = new byte[i2];
in.readFully(bArr1);
/**
* because class dump records come *after* instance dump records,
* we don't know how to interpret the values yet. we have to
* record the instances and process them at the end.
*/
instanceList.add(new Instance(l1, i1, l2, bArr1));
bytesRead += idSize * 2 + 8 + i2;
break;
case 0x22:
// Object array dump
l1 = readId(idSize, in);
i1 = in.readInt();
i2 = in.readInt(); // number of elements
l2 = readId(idSize, in);
Preconditions.checkState(i2 >= 0);
lArr1 = new long[i2];
for (int i=0; i<i2; i++) {
lArr1[i] = readId(idSize, in);
}
handler.objArrayDump(l1, i1, l2, lArr1);
bytesRead += (2 + i2) * idSize + 8;
break;
case 0x23:
// Primitive array dump
l1 = readId(idSize, in);
i1 = in.readInt();
i2 = in.readInt(); // number of elements
b1 = in.readByte();
bytesRead += idSize + 9;
Preconditions.checkState(i2 >= 0);
Value<?>[] vs = new Value[i2];
Type t = Type.hprofTypeToEnum(b1);
for (int i=0; i<vs.length; i++) {
switch (t) {
case OBJ:
long vobj = readId(idSize, in);
vs[i] = new Value<>(t, vobj);
bytesRead += idSize;
break;
case BOOL:
boolean vbool = in.readBoolean();
vs[i] = new Value<>(t, vbool);
bytesRead += 1;
break;
case CHAR:
char vc = in.readChar();
vs[i] = new Value<>(t, vc);
bytesRead += 2;
break;
case FLOAT:
float vf = in.readFloat();
vs[i] = new Value<>(t, vf);
bytesRead += 4;
break;
case DOUBLE:
double vd = in.readDouble();
vs[i] = new Value<>(t, vd);
bytesRead += 8;
break;
case BYTE:
byte vbyte = in.readByte();
vs[i] = new Value<>(t, vbyte);
bytesRead += 1;
break;
case SHORT:
short vshort = in.readShort();
vs[i] = new Value<>(t, vshort);
bytesRead += 2;
break;
case INT:
int vi = in.readInt();
vs[i] = new Value<>(t, vi);
bytesRead += 4;
break;
case LONG:
long vlong = in.readLong();
vs[i] = new Value<>(t, vlong);
bytesRead += 8;
break;
}
}
handler.primArrayDump(l1, i1, b1, vs);
break;
default:
throw new HprofParserException("Unexpected heap dump sub-record type: " + tag);
}
return bytesRead;
}
private void processInstances(int idSize) throws IOException {
for (Instance i: instanceList) {
ByteArrayInputStream bs = new ByteArrayInputStream(i.packedValues);
DataInputStream input = new DataInputStream(bs);
ArrayList<Value<?>> values = new ArrayList<>();
// superclass of Object is 0
long nextClass = i.classObjId;
while (nextClass != 0) {
ClassInfo ci = classMap.get(nextClass);
nextClass = ci.superClassObjId;
for (InstanceField field: ci.instanceFields) {
Value<?> v = null;
switch (field.type) {
case OBJ: // object
long vid = readId(idSize, input);
v = new Value<>(field.type, vid);
break;
case BOOL: // boolean
boolean vbool = input.readBoolean();
v = new Value<>(field.type, vbool);
break;
case CHAR: // char
char vc = input.readChar();
v = new Value<>(field.type, vc);
break;
case FLOAT: // float
float vf = input.readFloat();
v = new Value<>(field.type, vf);
break;
case DOUBLE: // double
double vd = input.readDouble();
v = new Value<>(field.type, vd);
break;
case BYTE: // byte
byte vbyte = input.readByte();
v = new Value<>(field.type, vbyte);
break;
case SHORT: // short
short vs = input.readShort();
v = new Value<>(field.type, vs);
break;
case INT: // int
int vi = input.readInt();
v = new Value<>(field.type, vi);
break;
case LONG: // long
long vl = input.readLong();
v = new Value<>(field.type, vl);
break;
}
values.add(v);
}
}
Value<?>[] valuesArr = new Value[values.size()];
valuesArr = values.toArray(valuesArr);
handler.instanceDump(i.objId, i.stackTraceSerialNum, i.classObjId,
valuesArr);
}
}
private static long readId(int idSize, DataInput in) throws IOException {
long id = -1;
if (idSize == 4) {
id = in.readInt();
id &= 0x00000000ffffffff; // undo sign extension
} else if (idSize == 8) {
id = in.readLong();
} else {
throw new IllegalArgumentException("Invalid identifier size " + idSize);
}
return id;
}
/* Utility */
private int mySkipBytes(int n, DataInput in) throws IOException {
int bytesRead = 0;
try {
while (bytesRead < n) {
in.readByte();
bytesRead++;
}
} catch (EOFException e) {
// expected
}
return bytesRead;
}
}