/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package org.apache.shindig.gadgets.parse; import org.apache.commons.io.IOUtils; import org.apache.shindig.gadgets.GadgetException; import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser; import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser; import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser; import org.w3c.dom.DOMImplementation; import org.w3c.dom.bootstrap.DOMImplementationRegistry; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.File; import java.io.FileInputStream; import java.io.StringWriter; /** * Benchmarks for HTML parsing and serialization */ public class ParseTreeSerializerBenchmark { private DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance(); private int numRuns; private String content; private GadgetHtmlParser cajaParser = new CajaHtmlParser( DOCUMENT_PROVIDER); private GadgetHtmlParser nekoParser = new NekoHtmlParser( DOCUMENT_PROVIDER); private GadgetHtmlParser nekoSimpleParser = new NekoSimplifiedHtmlParser( DOCUMENT_PROVIDER); private boolean warmup; private static final DOMImplementation DOCUMENT_PROVIDER = new ParseModule.DOMImplementationProvider().get(); private ParseTreeSerializerBenchmark(String file, int numRuns) throws Exception { File inputFile = new File(file); if (!inputFile.exists() || !inputFile.canRead()) { System.err.println("Input file: " + file + " not found or can't be read."); System.exit(1); } content = new String(IOUtils.toByteArray(new FileInputStream(file))); this.numRuns = 10; warmup = true; //runCaja(); runNeko(); runNekoSimple(); //Sleep to let JIT kick in Thread.sleep(10000L); this.numRuns = numRuns; warmup = false; //runCaja(); runNeko(); runNekoSimple(); } private void runCaja() throws Exception { output("Caja-----------------"); // Some warmup runs with wait. Enough iterations to trigger the JIT // Wait to allow it to swap execution paths etc... timeParseDom(cajaParser); timeParseDomSerialize(cajaParser); } private void runNeko() throws Exception { output("Neko-----------------"); timeParseDom(nekoParser); timeParseDomSerialize(nekoParser); } private void runNekoSimple() throws Exception { output("NekoSimple-----------------"); timeParseDom(nekoSimpleParser); timeParseDomSerialize(nekoSimpleParser); } private void output(String string) { if (!warmup) { System.out.println(string); } } private void timeParseDom(GadgetHtmlParser parser) throws GadgetException { long parseStart = System.currentTimeMillis(); for (int i = 0; i < numRuns; ++i) { parser.parseDom(content); } long parseMillis = System.currentTimeMillis() - parseStart; output("Parsing W3C DOM [" + parseMillis + " ms total: " + ((double)parseMillis)/numRuns + "ms/run]"); } private void timeParseDomSerialize(GadgetHtmlParser parser) throws GadgetException { org.w3c.dom.Document document = parser.parseDom(content); try { long parseStart = System.currentTimeMillis(); for (int i = 0; i < numRuns; ++i) { HtmlSerializer.serialize(document); } long parseMillis = System.currentTimeMillis() - parseStart; output("Serializing [" + parseMillis + " ms total: " + ((double) parseMillis) / numRuns + "ms/run]"); } catch (Exception e) { throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e); } try { // Create an "identity" transformer - copies input to output Transformer t = TransformerFactory.newInstance().newTransformer(); t.setOutputProperty(OutputKeys.METHOD, "html"); long parseStart = System.currentTimeMillis(); for (int i = 0; i < numRuns; ++i) { StringWriter sw = new StringWriter((content.length() * 11) / 10); t.transform(new DOMSource(document), new StreamResult(sw)); sw.toString(); } long parseMillis = System.currentTimeMillis() - parseStart; output("Serializing DOM Transformer [" + parseMillis + " ms total: " + ((double) parseMillis) / numRuns + "ms/run]"); } catch (Exception e) { throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e); } } public static void main(String[] args) { // Test can be run as standalone program to test out serialization and parsing // performance numbers, using Caja as a parser. if (args.length != 2) { System.err.println("Args: <input-file> <num-runs>"); System.exit(1); } String fileArg = args[0]; String runsArg = args[1]; int numRuns = -1; try { numRuns = Integer.parseInt(runsArg); } catch (Exception e) { System.err.println("Invalid num-runs argument: " + runsArg + ", reason: " + e); } try { new ParseTreeSerializerBenchmark(fileArg, numRuns); } catch (Exception e) { e.printStackTrace(); } } }