StreamTests.java example

Explorer
NewsSpeakServer-master
- libs
- src
  - com
    - vn
      - newsspeak
        ArticleParser.java
        ArticleParserFactory.java
        ArticleServlet.java
        Email.java
        ExtractTextXMLHandler.java
        FeaturedSourcesServlet.java
        FeedDataStorePopulator.java
        FeedServlet.java
        MailHandlerServlet.java
        NewsSource.java
        PMF.java
        parsers
        CNNParser.java
        DailyBeastParser.java
        EconomicTimesParser.java
        EngadgetParser.java
        HuffPostParser.java
        IndiaTodayParser.java
        LATimesParser.java
        MashableParser.java
        NYDailyNewsParser.java
        NYTimesParser.java
        ReadWriteWebParser.java
        TOIParser.java
        TechCrunchParser.java
        TheHinduParser.java
        USATodayParser.java
        WSJParser.java
        WashPostParser.java
// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java,v $
// $Author: derrickoswald $
// $Date: 2006/05/27 17:06:28 $
// $Revision: 1.17 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.tests.lexerTests;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;

import org.htmlparser.lexer.Stream;
import org.htmlparser.tests.ParserTestCase;

public class StreamTests extends ParserTestCase
{
    static
    {
        System.setProperty ("org.htmlparser.tests.lexerTests.StreamTests", "StreamTests");
    }

    /**
     * Test the first level stream class.
     */
    public StreamTests (String name)
    {
        super (name);
    }

    /**
     * Test initialization with a null value.
     */
    public void testNull () throws IOException
    {
        Stream stream;

        stream = new Stream (null);
        assertTrue ("erroneous character", -1 == stream.read ());
    }

    /**
     * Test initialization with an empty input stream.
     */
    public void testEmpty () throws IOException
    {
        Stream stream;

        stream = new Stream (new ByteArrayInputStream (new byte[0]));
        assertTrue ("erroneous character", -1 == stream.read ());
    }

    /**
     * Test initialization with an input stream having only one byte.
     */
    public void testOneByte () throws IOException
    {
        Stream stream;

        stream = new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42 }));
        assertTrue ("erroneous character", 0x42 == stream.read ());
        assertTrue ("erroneous character", -1 == stream.read ());
    }

    /**
     * Test that the same bytes are returned as with a naked input stream.
     */
    public void testSameBytes () throws IOException
    {
        String link;
        URL url;
        URLConnection connection1;
        URLConnection connection2;
        BufferedInputStream in;
        int b1;
        int b2;
        Stream stream;
        int index;

        link = "http://htmlparser.sourceforge.net";
        try
        {
            url = new URL (link);
            connection1 = url.openConnection ();
            connection1.connect ();
            in = new BufferedInputStream (connection1.getInputStream ());
            connection2 = url.openConnection ();
            connection2.connect ();
            stream = new Stream (connection2.getInputStream ());
            index = 0;
            while (-1 != (b1 = in.read ()))
            {
                b2 = stream.read ();
                if (b1 != b2)
                    fail ("bytes differ at position " + index + ", expected " + b1 + ", actual " + b2);
                index++;
            }
            b2 = stream.read ();
            stream.close ();
            in.close ();
            assertTrue ("extra bytes", b2 == -1);
        }
        catch (MalformedURLException murle)
        {
            fail ("bad url " + link);
        }
    }

    /**
     * Test that threading works and is faster than a naked input stream.
     * This, admittedly contrived, test illustrates the following principles:
     * <li>the underlying network code is already multi-threaded, so there may
     * not be a need to use application level threading in most cases</li>
     * <li>results may vary based on network connection speed, JVM, and
     * especially application usage pattterns</li>
     * <li>issues only show up with large files, in my case greater than
     * about 72,400 bytes, since the underlying network code reads that far
     * into the socket before throttling back and waiting</li>
     * <li>this is only applicable to TCP/IP usage, disk access would not
     * have this problem, since the cost of reading disk is much less than
     * the round-trip cost of a TCP/IP handshake</li>
     * So, what does it do? It sets up to read a URL two ways, once with a
     * naked input stream, and then with the Stream class. In each case, before
     * reading, it delays about 2 seconds (for me anyway) to allow the java.net
     * implementation to read ahead and then throttle back. The threaded Stream
     * though keeps reading while this delay is going on and hence gets a big
     * chunk of the file in memory. This advantage translates to a faster
     * spin through the bytes after the delay.
     */
    public void testThreaded () throws IOException
    {
        String link;
        URL url;
        URLConnection connection;
        BufferedInputStream in;
        int index;
        long begin;
        double bytes_per_second;
        int delay;
        Stream stream;
        long time1;
        long time2;
        Thread thread;
        long available1;
        long available2;

        // pick a big file
        link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html";
        try
        {
            url = new URL (link);

            // estimate the connection speed
            System.gc ();
            index = 0;
            connection = url.openConnection ();
            connection.connect ();
            in = new BufferedInputStream (connection.getInputStream ());
            begin = System.currentTimeMillis ();
            while (-1 != in.read ())
                index++;
            bytes_per_second = 1000.0 * index / (System.currentTimeMillis () - begin);
            in.close ();

            delay = (int)(1.5 * 1000 * bytes_per_second / 72400); // 72400 is the throttle limit on my machine

            // try the naked input stream
            System.gc ();
            index = 0;
            available1 = 0;
            connection = url.openConnection ();
            connection.connect ();
            in = new BufferedInputStream (connection.getInputStream ());
            try
            {
                Thread.sleep (delay);
            }
            catch (Exception e)
            {
                e.printStackTrace ();
            }
            begin = System.currentTimeMillis ();
            do
            {
                index++;
                if (0 == index % 1000)
                    available1 += in.available ();
            }
            while (-1 != in.read ());
            time1 = System.currentTimeMillis () - begin;
            in.close ();

            // try a threaded stream
            System.gc ();
            index = 0;
            available2 = 0;
            connection = url.openConnection ();
            connection.connect ();
            int length = connection.getContentLength ();
            stream = new Stream (connection.getInputStream (), length);
            thread = new Thread (stream);
            thread.setPriority (Thread.NORM_PRIORITY - 1);
            thread.start ();
            try
            {
                Thread.sleep (delay);
            }
            catch (Exception e)
            {
                e.printStackTrace ();
            }
            begin = System.currentTimeMillis ();
            do
            {
                index++;
                if (0 == index % 1000)
                    available2 += stream.available ();
            }
            while (-1 != stream.read ());
            time2 = System.currentTimeMillis () - begin;

//            System.out.println ("fills: " + stream.fills);
//            System.out.println ("reallocations: " + stream.reallocations);
//            System.out.println ("synchronous: " + stream.synchronous);
//            System.out.println ("buffer size: " + stream.mBuffer.length);
//            System.out.println ("bytes: " + stream.mLevel);
            stream.close ();

//            System.out.println ("time (" + time2 + ") vs. (" + time1 + ") for " + index + " bytes");
            double samples = index / 1000;
//            System.out.println ("average available bytes (" + available2/samples + ") vs. (" + available1/samples + ")");

            assertTrue ("slower (" + time2 + ") vs. (" + time1 + ")", time2 < time1);
            assertTrue ("average available bytes not greater (" + available2/samples + ") vs. (" + available1/samples + ")", available2 > available1);
        }
        catch (MalformedURLException murle)
        {
            fail ("bad url " + link);
        }
    }

    /**
     * Test that mark and reset work as per the contract.
     */
    public void testMarkReset () throws IOException
    {
        String link;
        ArrayList bytes1;
        ArrayList bytes2;
        URL url;
        URLConnection connection;
        Stream stream;
        int b;
        int index;

        // pick a small file > 2000 bytes
        link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html";
        bytes1 = new ArrayList ();
        bytes2 = new ArrayList ();
        try
        {
            url = new URL (link);
            connection = url.openConnection ();
            connection.connect ();
            stream = new Stream (connection.getInputStream ());
            assertTrue ("mark not supported", stream.markSupported ());

            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes1.add (new Byte ((byte)b));
            }
            stream.reset ();
            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes2.add (new Byte ((byte)b));
            }

            index = 0;
            while (index < bytes1.size ())
            {
                assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index));
                index++;
            }

            bytes1.clear ();
            bytes2.clear ();

            stream.mark (1000); // the 1000 is ignored
            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes1.add (new Byte ((byte)b));
            }
            stream.reset ();
            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes2.add (new Byte ((byte)b));
            }
            stream.close ();

            index = 0;
            while (index < bytes1.size ())
            {
                assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index));
                index++;
            }
        }
        catch (MalformedURLException murle)
        {
            fail ("bad url " + link);
        }
    }

    /**
     * Test that mark and reset work as per the contract when threaded.
     */
    public void testMarkResetThreaded () throws IOException
    {
        String link;
        ArrayList bytes1;
        ArrayList bytes2;
        URL url;
        URLConnection connection;
        Stream stream;
        int b;
        int index;

        // pick a small file > 2000 bytes
        link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html";
        bytes1 = new ArrayList ();
        bytes2 = new ArrayList ();
        try
        {
            url = new URL (link);
            connection = url.openConnection ();
            connection.connect ();
            stream = new Stream (connection.getInputStream ());
            (new Thread (stream)).start ();
            assertTrue ("mark not supported", stream.markSupported ());

            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes1.add (new Byte ((byte)b));
            }
            stream.reset ();
            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes2.add (new Byte ((byte)b));
            }

            index = 0;
            while (index < bytes1.size ())
            {
                assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index));
                index++;
            }

            bytes1.clear ();
            bytes2.clear ();

            stream.mark (1000); // the 1000 is ignored
            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes1.add (new Byte ((byte)b));
            }
            stream.reset ();
            for (int i = 0; i < 1000; i++)
            {
                b = stream.read ();
                bytes2.add (new Byte ((byte)b));
            }
            stream.close ();

            index = 0;
            while (index < bytes1.size ())
            {
                assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index));
                index++;
            }
        }
        catch (MalformedURLException murle)
        {
            fail ("bad url " + link);
        }
    }

    /**
     * Test close.
     */
    public void testClose () throws IOException
    {
        Stream stream;

        stream = new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42, (byte)0x78 }));
        assertTrue ("erroneous character", 0x42 == stream.read ());
        stream.close ();
        assertTrue ("not closed", -1 == stream.read ());
   }
}