package; import org.slf4j.Logger; import; import; import; import; import java.text.SimpleDateFormat; import java.text.ParseException; import java.util.List; import java.util.Map; import java.util.TimeZone; import java.util.regex.Pattern; import java.util.regex.Matcher; import; import; import; import; import; import; import; import; import; import; public final class PerfmonParser { private static final Logger LOGGER = org.slf4j.LoggerFactory.getLogger(PerfmonParser.class); private static final SimpleDateFormat TIMESTAMP_FORMAT = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss"); // Issue #26 \D to prevent splitting on something like \SRVXYZ\Processor Information(2,10)\% DPC Time see in Win2012 private static final Pattern DATA_SPLITTER = Pattern.compile("\"?,\\D\"?"); private static final Pattern SUBCATEGORY_SPLITTER = Pattern.compile(":"); // "\\hostname\category (optional subcategory)\metric" // note storing a matcher vs a pattern is _NOT_ thread safe // first group is non-greedy (.*?) to allow proper parsing of strings like // \\SYSTEM\Paging File(\??\D:\pagefile.sys)\% Usage private static final Matcher METRIC_MATCHER = Pattern.compile("\\\\\\\\(.*?)\\\\(.*)\\\\(.*)\"?").matcher(""); private LineNumberReader in = null; private PerfmonDataSet data = null; private final WindowsBytesTransform bytesTransform = new WindowsBytesTransform(); // builders for each column private DataTypeBuilder[] buildersByColumn; // builders by type id private Map<String, DataTypeBuilder> buildersById = new java.util.HashMap<String, DataTypeBuilder>(); public PerfmonDataSet parse(File file, boolean scaleProcessesByCPU) throws IOException, ParseException { return parse(file.getAbsolutePath(), scaleProcessesByCPU); } public PerfmonDataSet parse(String filename, boolean scaleProcessesByCPU) throws IOException, ParseException { long start = System.nanoTime(); data = new PerfmonDataSet(filename); data.setMetadata("OS", "Perfmon"); try { in = new LineNumberReader(new FileReader(filename)); String line = in.readLine(); parseHeader(DATA_SPLITTER.split(line)); while ((line = in.readLine()) != null) { parseData(DATA_SPLITTER.split(line)); } long postProcessStart = System.nanoTime(); // post process after parsing all the data since DataTypes are built lazily WindowsNetworkPostProcessor networkPostProcessor = new WindowsNetworkPostProcessor(); WindowsProcessPostProcessor processPostProcessor = null; networkPostProcessor.addDataTypes(data); if (scaleProcessesByCPU) { processPostProcessor = new WindowsProcessPostProcessor(); processPostProcessor.addDataTypes(data); } for (DataRecord record : data.getRecords()) { networkPostProcessor.postProcess(data, record); if (scaleProcessesByCPU) { processPostProcessor.postProcess(data, record); } } if (LOGGER.isDebugEnabled()) { LOGGER.debug("Post processing" + " complete for {} in {}ms", data.getSourceFile(), (System.nanoTime() - postProcessStart) / 1000000.0d); } DataHelper.aggregateProcessData(data, LOGGER); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Parse" + " complete for {} in {}ms", data.getSourceFile(), (System.nanoTime() - start) / 1000000.0d); } return data; } finally { in.close(); data = null; // columnTypes.clear(); buildersById.clear(); // processes.clear(); buildersByColumn = null; bytesTransform.reset(); } } private void parseHeader(String[] header) { buildersByColumn = new DataTypeBuilder[header.length]; // remove trailing " or , String lastData = header[header.length - 1]; if (lastData.endsWith("\"")) { header[header.length - 1] = lastData.substring(0, lastData.length() - 1); } else if (lastData.endsWith(",")) { header[header.length - 1] = lastData.substring(0, lastData.length() - 2); } // parse out the timezone in a format like (PDH-CSV 4.0) (GMT Daylight Time)(-60) int idx = header[0].lastIndexOf('('); if (idx == -1) { LOGGER.warn("version header '{0}' is not in the right format, the time zone will default to UTC", header[0]); TIMESTAMP_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); } else { String temp = header[0].substring(idx + 1, header[0].length() - 1); try { // timezone format in negative minutes from UTC double offset = Integer.parseInt(temp) / -60.0d; TIMESTAMP_FORMAT.setTimeZone(new java.util.SimpleTimeZone((int) (offset * 3600000), temp)); } catch (NumberFormatException nfe) { LOGGER.warn("version header '{0}' is not in the right format, the time zone will default to UTC", header[0]); TIMESTAMP_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); } } // timestamp does not belong to a category // columnTypes.add(null); buildersByColumn[0] = null; // read the first column to get the hostname METRIC_MATCHER.reset(header[1]); if (METRIC_MATCHER.matches()) { // assume hostname does not change data.setHostname(; } else { throw new IllegalArgumentException("hostname not found in '" + header[1] + "'"); } for (int i = 1; i < header.length; i++) { METRIC_MATCHER.reset(header[i]); if (!METRIC_MATCHER.matches()) { LOGGER.warn("'{}' is not a valid header column", header[i]); buildersByColumn[i] = null; continue; } // looking for type id (sub type id) String toParse =; String uniqueId = null; String id = null; String subId = null; idx = toParse.indexOf('('); if (idx != -1) { // has sub type int endIdx = toParse.indexOf(')', idx + 1); if (endIdx == -1) { LOGGER.warn("no end parentheses found in header column '{}'", toParse); // columnTypes.add(null); buildersByColumn[i] = null; continue; } else { id = DataHelper.newString(toParse.substring(0, idx)); subId = DataHelper.newString(parseSubId(id, toParse.substring(idx + 1, endIdx))); uniqueId = SubDataType.buildId(id, subId); } } else { id = uniqueId = DataHelper.newString(toParse); } String field = parseField(id,; DataTypeBuilder builder = buildersById.get(uniqueId); if (builder == null) { builder = new DataTypeBuilder(uniqueId, id, subId); buildersById.put(uniqueId, builder); } if (data.getTypeIdPrefix().equals(id)) { // Process // skip Total and Idle processes if ("Idle".equals(subId) || "Total".equals(subId)) { buildersByColumn[i] = null; } // skip ID Process field but use is as the process id else if ("ID Process".equals(field)) { buildersByColumn[i] = null; builder.setProcessIdColumn(i); } else { buildersByColumn[i] = builder; builder.addField(field); } } else { buildersByColumn[i] = builder; builder.addField(field); } } } private void parseData(String[] rawData) { if (rawData.length != buildersByColumn.length) { LOGGER.warn("invalid number of data columns at line {}, this data will be skipped", in.getLineNumber()); return; } // remove trailing " or , String lastData = rawData[rawData.length - 1]; if (lastData.endsWith("\"")) { rawData[rawData.length - 1] = lastData.substring(0, lastData.length() - 1); } else if (lastData.endsWith(",")) { rawData[rawData.length - 1] = lastData.substring(0, lastData.length() - 2); } // remove leading " on timestamp String timestamp = DataHelper.newString(rawData[0].substring(1)); long time = 0; try { time = TIMESTAMP_FORMAT.parse(timestamp).getTime(); } catch (ParseException pe) { LOGGER.warn("invalid timestamp format at line {}, this data will be skipped", in.getLineNumber()); return; } Map<String, DataHolder> dataByType = new java.util.HashMap<String, DataHolder>(); for (int i = 1; i < rawData.length; i++) { DataTypeBuilder builder = buildersByColumn[i]; if (builder == null) { continue; } else { DataHolder holder = dataByType.get(builder.unique); if (holder == null) { holder = new DataHolder(builder.fields.size()); dataByType.put(builder.unique, holder); } holder.add(parseDouble(rawData[i])); } } DataRecord record = new DataRecord(time, timestamp); for (String unique : dataByType.keySet()) { DataTypeBuilder builder = buildersById.get(unique); DataHolder holder = dataByType.get(unique); DataType type =, rawData); double[] values =; if (bytesTransform.isValidFor(, builder.subId)) { if (type.hasField("% Used Space")) { int idx = type.getFieldIndex("% Used Space"); values[idx] = 100 - values[idx]; } values = bytesTransform.transform(type, values); } record.addData(type, values); } data.addRecord(record); } private String parseSubId(String id, String toParse) { // some ESXTop data need special handling if ("Interrupt Vector".equals(id)) { String[] split = SUBCATEGORY_SPLITTER.split(toParse); // interrupt id return split[0]; } else if (id.startsWith("Group")) { // remove leading process id String[] split = SUBCATEGORY_SPLITTER.split(toParse); return split[1]; } else if ("Vcpu".equals(id)) { // remove leading process id String[] split = SUBCATEGORY_SPLITTER.split(toParse); return split[1]; } else if (toParse.charAt(0) == '_') { // _Total = Total return toParse.substring(1); } else { return toParse; } } private double parseDouble(String value) { // assume start with space, whole string is space (i.e. empty) if (value.charAt(0) == ' ') { return Double.NaN; } else { return Double.parseDouble(value); } } private String parseField(String id, String toParse) { if ("Interrupt Vector".equals(id)) { String[] split = SUBCATEGORY_SPLITTER.split(toParse); // total stats for interrupt if (split.length > 1) { return DataHelper.newString(split[1]); } else { return toParse; } } else { return toParse; } } // builder class for DataTypes // needed due to Perfmon interleaving Process data columns // Processes also need to be created with a start time and pid are unknown until data is parsed private final class DataTypeBuilder { // id + subId, used for hashCode and equals private final String unique; private final String id; private final String subId; // possible column mapping for ID Process column private int processIdColumn = -1; private final List<String> fields = new java.util.ArrayList<String>(); private DataType type; DataTypeBuilder(String unique, String id, String subId) { this.unique = unique; = id; this.subId = subId; } void addField(String field) { // assume no duplicates will happen fields.add(field); } void setProcessIdColumn(int processIdColumn) { this.processIdColumn = processIdColumn; } @Override public int hashCode() { return unique.hashCode(); } @Override public boolean equals(Object o) { return unique.equals(o); } DataType build(long startTime, String[] rawData) { if (type != null) { return type; } String[] fieldsArray = new String[fields.size()]; fields.toArray(fieldsArray); if (data.getTypeIdPrefix().equals(id)) { // Process int pid = (int) (processIdColumn != -1 ? parseDouble(rawData[processIdColumn]) : 0); String processName = subId; // store processes with full name // parse out pid, if available via // HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\PerfProc\Performance // ProcessNameFormat=2 int idx = processName.indexOf('_'); if (idx != -1) { String temp = processName.substring(idx + 1, processName.length()); try { pid = Integer.parseInt(temp); processName = DataHelper.newString(processName.substring(0, idx)); } catch (NumberFormatException nfe) { LOGGER.warn("invalid pid {} at line {}; using {} instead", temp, in.getLineNumber(), pid); } } else { idx = processName.indexOf('#'); if (idx != -1) { processName = DataHelper.newString(processName.substring(0, idx)); } } if (pid == 0) { // artificial process id pid = data.getProcessCount() + 1; } Process process = new Process(pid, startTime, processName, data.getTypeIdPrefix()); data.addProcess(process); type = new ProcessDataType(process, fieldsArray); } else { String name = SubDataType.buildId(id, subId); if (bytesTransform.isValidFor(id, subId)) { // cannot use a DataTransform for disk free -> disk used since disks also need // to have WindowsBytesTransform applied if ("LogicalDisk".equals(id) || "PhysicalDisk".equals(id)) { for (int i = 0; i < fieldsArray.length; i++) { String field = fieldsArray[i]; if ("% Free Space".equals(field)) { fieldsArray[i] = "% Used Space"; } } } type = bytesTransform.buildDataType(id, subId, name, fieldsArray); } else { if (subId == null) { type = new DataType(id, name, fieldsArray); } else { type = new SubDataType(id, subId, name, fieldsArray); } } } data.addType(type); return type; } } // simple holder for field data as it is being read private final class DataHolder { private int nextIdx = 0; private final double[] data; DataHolder(int size) { data = new double[size]; } void add(double d) { data[nextIdx++] = d; } } }