BeanTest.java example

Explorer
NewsSpeakServer-master
- libs
- src
  - com
    - vn
      - newsspeak
        ArticleParser.java
        ArticleParserFactory.java
        ArticleServlet.java
        Email.java
        ExtractTextXMLHandler.java
        FeaturedSourcesServlet.java
        FeedDataStorePopulator.java
        FeedServlet.java
        MailHandlerServlet.java
        NewsSource.java
        PMF.java
        parsers
        CNNParser.java
        DailyBeastParser.java
        EconomicTimesParser.java
        EngadgetParser.java
        HuffPostParser.java
        IndiaTodayParser.java
        LATimesParser.java
        MashableParser.java
        NYDailyNewsParser.java
        NYTimesParser.java
        ReadWriteWebParser.java
        TOIParser.java
        TechCrunchParser.java
        TheHinduParser.java
        USATodayParser.java
        WSJParser.java
        WashPostParser.java
// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/BeanTest.java,v $
// $Author: derrickoswald $
// $Date: 2006/05/30 01:07:15 $
// $Revision: 1.51 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.tests.utilTests;

import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.net.URL;
import java.util.Vector;

import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.beans.LinkBean;
import org.htmlparser.beans.StringBean;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.tests.*;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.ParserException;

public class BeanTest extends ParserTestCase
{
    static
    {
        System.setProperty ("org.htmlparser.tests.utilTests.BeanTest", "BeanTest");
    }

    public BeanTest (String name)
    {
        super (name);
    }

    protected byte[] pickle (Object object)
        throws
            IOException
    {
        ByteArrayOutputStream bos;
        ObjectOutputStream oos;
        byte[] ret;

        bos = new ByteArrayOutputStream ();
        oos = new ObjectOutputStream (bos);
        oos.writeObject (object);
        oos.close ();
        ret = bos.toByteArray ();

        return (ret);
    }

    protected Object unpickle (byte[] data)
        throws
            IOException,
            ClassNotFoundException
    {
        ByteArrayInputStream bis;
        ObjectInputStream ois;
        Object ret;

        bis = new ByteArrayInputStream (data);
        ois = new ObjectInputStream (bis);
        ret = ois.readObject ();
        ois.close ();

        return (ret);
    }

    /**
     * Makes sure that the bean returns text when passed the html.
     */
    protected void check (StringBean bean, String html, String text)
    {
        String path;
        File file;
        PrintWriter out;
        String string;

        path = System.getProperty ("user.dir");
        if (!path.endsWith (File.separator))
            path += File.separator;
        file = new File (path + "delete_me.html");
        try
        {
            out = new PrintWriter (new FileWriter (file));
            out.print (html);
            out.close ();
            bean.setURL (file.getAbsolutePath ());
            string = bean.getStrings ();
        }
        catch (Exception e)
        {
            fail (e.toString ());
            string = null; // never reached
        }
        finally
        {
            file.delete ();
        }
        assertStringEquals ("stringbean text differs", text, string);
    }

    public void testZeroArgPageConstructor ()
        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        Page page;
        byte[] data;

        page = new Page ();
        data = pickle (page);
        page = (Page)unpickle (data);
    }

    public void testZeroArgLexerConstructor ()
        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        Lexer lexer;
        byte[] data;

        lexer = new Lexer ();
        data = pickle (lexer);
        lexer = (Lexer)unpickle (data);
    }

    public void testZeroArgParserConstructor ()
        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        Parser parser;
        byte[] data;

        parser = new Parser ();
        data = pickle (parser);
        parser = (Parser)unpickle (data);
    }

    public void testSerializable ()
        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        Parser parser;
        Vector vector;
        NodeIterator enumeration;
        byte[] data;

        parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html");
        enumeration = parser.elements ();
        vector = new Vector (50);
        while (enumeration.hasMoreNodes ())
            vector.addElement (enumeration.nextNode ());

        data = pickle (parser);
        parser = (Parser)unpickle (data);

        enumeration = parser.elements ();
        while (enumeration.hasMoreNodes ())
            assertEquals (
                "Nodes before and after serialization differ",
                ((Node)vector.remove (0)).toHtml (),
                enumeration.nextNode ().toHtml ());
    }

    public void testSerializableScanners ()
        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        Parser parser;
        Vector vector;
        NodeIterator enumeration;
        byte[] data;

        parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html");
        enumeration = parser.elements ();
        vector = new Vector (50);
        while (enumeration.hasMoreNodes ())
            vector.addElement (enumeration.nextNode ());

        data = pickle (parser);
        parser = (Parser)unpickle (data);

        enumeration = parser.elements ();
        while (enumeration.hasMoreNodes ())
            assertEquals (
                "Nodes before and after serialization differ",
                ((Node)vector.remove (0)).toHtml (),
                enumeration.nextNode ().toHtml ());
    }

    public void testSerializableStringBean ()
        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        StringBean sb;
        String text;
        byte[] data;

        sb = new StringBean ();
        sb.setURL ("http://htmlparser.sourceforge.net/test/example.html");
        text = sb.getStrings ();

        data = pickle (sb);
        sb = (StringBean)unpickle (data);

        assertEquals (
            "Strings before and after serialization differ",
            text,
            sb.getStrings ());
    }

    public void testSerializableLinkBean ()
        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        LinkBean lb;
        URL[] links;
        byte[] data;
        URL[] links2;

        lb = new LinkBean ();
        lb.setURL ("http://htmlparser.sourceforge.net/test/example.html");
        links = lb.getLinks ();

        data = pickle (lb);
        lb = (LinkBean)unpickle (data);

        links2 = lb.getLinks ();
        assertEquals ("Number of links after serialization differs", links.length, links2.length);
        for (int i = 0; i < links.length; i++)
        {
            assertEquals (
                "Links before and after serialization differ",
                links[i],
                links2[i]);
        }
    }

    public void testStringBeanListener ()
    {
        final StringBean sb;
        final Boolean hit[] = new Boolean[1];

        sb = new StringBean ();
        hit[0] = Boolean.FALSE;
        sb.addPropertyChangeListener (
            new PropertyChangeListener ()
            {
                public void propertyChange (PropertyChangeEvent event)
                {
                    if (event.getSource ().equals (sb))
                        if (event.getPropertyName ().equals (StringBean.PROP_STRINGS_PROPERTY))
                            hit[0] = Boolean.TRUE;
                }
            });

        hit[0] = Boolean.FALSE;
        sb.setURL ("http://htmlparser.sourceforge.net/test/example.html");
        assertTrue (
            "Strings property change not fired for URL change",
            hit[0].booleanValue ());

        hit[0] = Boolean.FALSE;
        sb.setLinks (true);
        assertTrue (
            "Strings property change not fired for links change",
            hit[0].booleanValue ());
    }

    public void testLinkBeanListener ()
    {
        final LinkBean lb;
        final Boolean hit[] = new Boolean[1];

        lb = new LinkBean ();
        hit[0] = Boolean.FALSE;
        lb.addPropertyChangeListener (
            new PropertyChangeListener ()
            {
                public void propertyChange (PropertyChangeEvent event)
                {
                    if (event.getSource ().equals (lb))
                        if (event.getPropertyName ().equals (LinkBean.PROP_LINKS_PROPERTY))
                            hit[0] = Boolean.TRUE;
                }
            });

        hit[0] = Boolean.FALSE;
        lb.setURL ("http://htmlparser.sourceforge.net/test/example.html");
        assertTrue (
            "Links property change not fired for URL change",
            hit[0].booleanValue ());
    }

    /**
     * Test no text returns empty string.
     */
    public void testCollapsed1 ()
    {
        StringBean sb;

        sb = new StringBean ();
        sb.setLinks (false);
        sb.setReplaceNonBreakingSpaces (true);
        sb.setCollapse (false);
        check (sb, "<html><head></head><body></body></html>", "");
        check (sb, "<html><head></head><body> </body></html>", " ");
        check (sb, "<html><head></head><body>\t</body></html>", "\t");
        sb.setCollapse (true);
        check (sb, "<html><head></head><body></body></html>", "");
        check (sb, "<html><head></head><body> </body></html>", "");
        check (sb, "<html><head></head><body>\t</body></html>", "");
    }

    /**
     * Test multiple whitespace returns empty string.
     */
    public void testCollapsed2 ()
    {
        StringBean sb;

        sb = new StringBean ();
        sb.setLinks (false);
        sb.setReplaceNonBreakingSpaces (true);
        sb.setCollapse (false);
        check (sb, "<html><head></head><body>  </body></html>", "  ");
        check (sb, "<html><head></head><body>\t\t</body></html>", "\t\t");
        check (sb, "<html><head></head><body> \t\t</body></html>", " \t\t");
        check (sb, "<html><head></head><body>\t \t</body></html>", "\t \t");
        check (sb, "<html><head></head><body>\t\t </body></html>", "\t\t ");
        sb.setCollapse (true);
        check (sb, "<html><head></head><body>  </body></html>", "");
        check (sb, "<html><head></head><body>\t\t</body></html>", "");
        check (sb, "<html><head></head><body> \t\t</body></html>", "");
        check (sb, "<html><head></head><body>\t \t</body></html>", "");
        check (sb, "<html><head></head><body>\t\t </body></html>", "");
    }

    /**
     * Test text preceded or followed by whitespace returns just text.
     */
    public void testCollapsed3 ()
    {
        StringBean sb;

        sb = new StringBean ();
        sb.setLinks (false);
        sb.setReplaceNonBreakingSpaces (true);
        sb.setCollapse (false);
        check (sb, "<html><head></head><body>x  </body></html>", "x  ");
        check (sb, "<html><head></head><body>x\t\t</body></html>", "x\t\t");
        check (sb, "<html><head></head><body>x \t\t</body></html>", "x \t\t");
        check (sb, "<html><head></head><body>x\t \t</body></html>", "x\t \t");
        check (sb, "<html><head></head><body>x\t\t </body></html>", "x\t\t ");
        sb.setCollapse (true);
        check (sb, "<html><head></head><body>x  </body></html>", "x");
        check (sb, "<html><head></head><body>x\t\t</body></html>", "x");
        check (sb, "<html><head></head><body>x \t\t</body></html>", "x");
        check (sb, "<html><head></head><body>x\t \t</body></html>", "x");
        check (sb, "<html><head></head><body>x\t\t </body></html>", "x");
        check (sb, "<html><head></head><body>  x</body></html>", "x");
        check (sb, "<html><head></head><body>\t\tx</body></html>", "x");
        check (sb, "<html><head></head><body> \t\tx</body></html>", "x");
        check (sb, "<html><head></head><body>\t \tx</body></html>", "x");
        check (sb, "<html><head></head><body>\t\t x</body></html>", "x");
    }

    /**
     * Test text including a "pre" tag
     */
    public void testOutputWithPreTags() {
        StringBean sb;
        sb = new StringBean ();
        String sampleCode = "public class Product {}";
        check (sb, "<body><pre>"+sampleCode+"</pre></body>", sampleCode);
    }

    /**
     * Test text including a "script" tag
     */
    public void testOutputWithScriptTags() {
        StringBean sb;
        sb = new StringBean ();

        String sampleScript =
          "<script language=\"javascript\">\r\n"
        + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n"
        + " document.write ('xxx');\r\n"
        + "else\r\n"
        + " document.write ('yyy');\r\n"
        + "</script>\r\n";

        check (sb, "<body>"+sampleScript+"</body>", "");
    }

    /*
     * Test output with pre and any tag.
     */
    public void testOutputWithPreAndAnyTag()
    {
        StringBean sb;

        sb = new StringBean ();
        sb.setLinks (false);
        sb.setReplaceNonBreakingSpaces (true);
        sb.setCollapse (false);
        check (sb, "<html><head></head><body><pre><hello></pre></body></html>", "");
    }

    /*
     * Test output with pre and any tag and text.
     */
    public void testOutputWithPreAndAnyTagPlusText()
    {
        StringBean sb;

        sb = new StringBean ();
        sb.setLinks (false);
        sb.setReplaceNonBreakingSpaces (true);
        sb.setCollapse (false);
        check (sb, "<html><head></head><body><pre><hello>dogfood</hello></pre></body></html>", "dogfood");
    }

    /*
     * Test output with pre and any tag and text.
     */
    public void testOutputWithPreAndAnyTagPlusTextWithWhitespace()
    {
        StringBean sb;

        sb = new StringBean ();
        sb.setLinks (false);
        sb.setReplaceNonBreakingSpaces (true);
        sb.setCollapse (true);
        check (sb, "<html><head></head><body><pre><hello>dog  food</hello></pre></body></html>", "dog  food");
    }

    /*
     * Test output without pre and any tag and text.
     */
    public void testOutputWithoutPreAndAnyTagPlusTextWithWhitespace()
    {
        StringBean sb;

        sb = new StringBean ();
        sb.setLinks (false);
        sb.setReplaceNonBreakingSpaces (true);
        sb.setCollapse (true);
        check (sb, "<html><head></head><body><hello>dog  food</hello></body></html>", "dog food");
    }

    /**
     * Test output with pre and script tags
     */
    public void xtestOutputWithPreAndScriptTags ()
    {
        StringBean sb;
        sb = new StringBean ();

        String sampleScript =
          "<script language=\"javascript\">\r\n"
        + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n"
        + " document.write ('xxx');\r\n"
        + "else\r\n"
        + " document.write ('yyy');\r\n"
        + "</script>\r\n";

        check (sb, "<body><pre>"+sampleScript+"</pre></body>", sampleScript);
    }

    /**
     * Test output with non-breaking tag within text.
     */
    public void testTagWhitespace ()
    {
        StringBean sb;
        sb = new StringBean ();

        String pre = "AAAAA BBBBB AAA";
        String mid = "AA";
        String post = " BBBBB";
        String html =
          "<HTML>\r\n"
        + "<body>\r\n"
        + "<p>" + pre + "<font color='red'>" + mid + "</font>" + post + "</p>\r\n"
        + "</body>\r\n"
        + "</HTML>\r\n";

        check (sb, html, pre + mid + post);
    }
}