// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java,v $ // $Author: derrickoswald $ // $Date: 2006/05/27 17:06:28 $ // $Revision: 1.17 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tests.lexerTests; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import org.htmlparser.lexer.Stream; import org.htmlparser.tests.ParserTestCase; public class StreamTests extends ParserTestCase { static { System.setProperty ("org.htmlparser.tests.lexerTests.StreamTests", "StreamTests"); } /** * Test the first level stream class. */ public StreamTests (String name) { super (name); } /** * Test initialization with a null value. */ public void testNull () throws IOException { Stream stream; stream = new Stream (null); assertTrue ("erroneous character", -1 == stream.read ()); } /** * Test initialization with an empty input stream. */ public void testEmpty () throws IOException { Stream stream; stream = new Stream (new ByteArrayInputStream (new byte[0])); assertTrue ("erroneous character", -1 == stream.read ()); } /** * Test initialization with an input stream having only one byte. */ public void testOneByte () throws IOException { Stream stream; stream = new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42 })); assertTrue ("erroneous character", 0x42 == stream.read ()); assertTrue ("erroneous character", -1 == stream.read ()); } /** * Test that the same bytes are returned as with a naked input stream. */ public void testSameBytes () throws IOException { String link; URL url; URLConnection connection1; URLConnection connection2; BufferedInputStream in; int b1; int b2; Stream stream; int index; link = "http://htmlparser.sourceforge.net"; try { url = new URL (link); connection1 = url.openConnection (); connection1.connect (); in = new BufferedInputStream (connection1.getInputStream ()); connection2 = url.openConnection (); connection2.connect (); stream = new Stream (connection2.getInputStream ()); index = 0; while (-1 != (b1 = in.read ())) { b2 = stream.read (); if (b1 != b2) fail ("bytes differ at position " + index + ", expected " + b1 + ", actual " + b2); index++; } b2 = stream.read (); stream.close (); in.close (); assertTrue ("extra bytes", b2 == -1); } catch (MalformedURLException murle) { fail ("bad url " + link); } } /** * Test that threading works and is faster than a naked input stream. * This, admittedly contrived, test illustrates the following principles: * <li>the underlying network code is already multi-threaded, so there may * not be a need to use application level threading in most cases</li> * <li>results may vary based on network connection speed, JVM, and * especially application usage pattterns</li> * <li>issues only show up with large files, in my case greater than * about 72,400 bytes, since the underlying network code reads that far * into the socket before throttling back and waiting</li> * <li>this is only applicable to TCP/IP usage, disk access would not * have this problem, since the cost of reading disk is much less than * the round-trip cost of a TCP/IP handshake</li> * So, what does it do? It sets up to read a URL two ways, once with a * naked input stream, and then with the Stream class. In each case, before * reading, it delays about 2 seconds (for me anyway) to allow the java.net * implementation to read ahead and then throttle back. The threaded Stream * though keeps reading while this delay is going on and hence gets a big * chunk of the file in memory. This advantage translates to a faster * spin through the bytes after the delay. */ public void testThreaded () throws IOException { String link; URL url; URLConnection connection; BufferedInputStream in; int index; long begin; double bytes_per_second; int delay; Stream stream; long time1; long time2; Thread thread; long available1; long available2; // pick a big file link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html"; try { url = new URL (link); // estimate the connection speed System.gc (); index = 0; connection = url.openConnection (); connection.connect (); in = new BufferedInputStream (connection.getInputStream ()); begin = System.currentTimeMillis (); while (-1 != in.read ()) index++; bytes_per_second = 1000.0 * index / (System.currentTimeMillis () - begin); in.close (); delay = (int)(1.5 * 1000 * bytes_per_second / 72400); // 72400 is the throttle limit on my machine // try the naked input stream System.gc (); index = 0; available1 = 0; connection = url.openConnection (); connection.connect (); in = new BufferedInputStream (connection.getInputStream ()); try { Thread.sleep (delay); } catch (Exception e) { e.printStackTrace (); } begin = System.currentTimeMillis (); do { index++; if (0 == index % 1000) available1 += in.available (); } while (-1 != in.read ()); time1 = System.currentTimeMillis () - begin; in.close (); // try a threaded stream System.gc (); index = 0; available2 = 0; connection = url.openConnection (); connection.connect (); int length = connection.getContentLength (); stream = new Stream (connection.getInputStream (), length); thread = new Thread (stream); thread.setPriority (Thread.NORM_PRIORITY - 1); thread.start (); try { Thread.sleep (delay); } catch (Exception e) { e.printStackTrace (); } begin = System.currentTimeMillis (); do { index++; if (0 == index % 1000) available2 += stream.available (); } while (-1 != stream.read ()); time2 = System.currentTimeMillis () - begin; // System.out.println ("fills: " + stream.fills); // System.out.println ("reallocations: " + stream.reallocations); // System.out.println ("synchronous: " + stream.synchronous); // System.out.println ("buffer size: " + stream.mBuffer.length); // System.out.println ("bytes: " + stream.mLevel); stream.close (); // System.out.println ("time (" + time2 + ") vs. (" + time1 + ") for " + index + " bytes"); double samples = index / 1000; // System.out.println ("average available bytes (" + available2/samples + ") vs. (" + available1/samples + ")"); assertTrue ("slower (" + time2 + ") vs. (" + time1 + ")", time2 < time1); assertTrue ("average available bytes not greater (" + available2/samples + ") vs. (" + available1/samples + ")", available2 > available1); } catch (MalformedURLException murle) { fail ("bad url " + link); } } /** * Test that mark and reset work as per the contract. */ public void testMarkReset () throws IOException { String link; ArrayList bytes1; ArrayList bytes2; URL url; URLConnection connection; Stream stream; int b; int index; // pick a small file > 2000 bytes link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html"; bytes1 = new ArrayList (); bytes2 = new ArrayList (); try { url = new URL (link); connection = url.openConnection (); connection.connect (); stream = new Stream (connection.getInputStream ()); assertTrue ("mark not supported", stream.markSupported ()); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes1.add (new Byte ((byte)b)); } stream.reset (); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes2.add (new Byte ((byte)b)); } index = 0; while (index < bytes1.size ()) { assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index)); index++; } bytes1.clear (); bytes2.clear (); stream.mark (1000); // the 1000 is ignored for (int i = 0; i < 1000; i++) { b = stream.read (); bytes1.add (new Byte ((byte)b)); } stream.reset (); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes2.add (new Byte ((byte)b)); } stream.close (); index = 0; while (index < bytes1.size ()) { assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index)); index++; } } catch (MalformedURLException murle) { fail ("bad url " + link); } } /** * Test that mark and reset work as per the contract when threaded. */ public void testMarkResetThreaded () throws IOException { String link; ArrayList bytes1; ArrayList bytes2; URL url; URLConnection connection; Stream stream; int b; int index; // pick a small file > 2000 bytes link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html"; bytes1 = new ArrayList (); bytes2 = new ArrayList (); try { url = new URL (link); connection = url.openConnection (); connection.connect (); stream = new Stream (connection.getInputStream ()); (new Thread (stream)).start (); assertTrue ("mark not supported", stream.markSupported ()); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes1.add (new Byte ((byte)b)); } stream.reset (); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes2.add (new Byte ((byte)b)); } index = 0; while (index < bytes1.size ()) { assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index)); index++; } bytes1.clear (); bytes2.clear (); stream.mark (1000); // the 1000 is ignored for (int i = 0; i < 1000; i++) { b = stream.read (); bytes1.add (new Byte ((byte)b)); } stream.reset (); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes2.add (new Byte ((byte)b)); } stream.close (); index = 0; while (index < bytes1.size ()) { assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index)); index++; } } catch (MalformedURLException murle) { fail ("bad url " + link); } } /** * Test close. */ public void testClose () throws IOException { Stream stream; stream = new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42, (byte)0x78 })); assertTrue ("erroneous character", 0x42 == stream.read ()); stream.close (); assertTrue ("not closed", -1 == stream.read ()); } }