BadTagIdentifier.java example

Explorer

NewsSpeakServer-master
- libs
- src
  - com
    - vn
      - newsspeak
        ArticleParser.java
        ArticleParserFactory.java
        ArticleServlet.java
        Email.java
        ExtractTextXMLHandler.java
        FeaturedSourcesServlet.java
        FeedDataStorePopulator.java
        FeedServlet.java
        MailHandlerServlet.java
        NewsSource.java
        PMF.java
        parsers
        CNNParser.java
        DailyBeastParser.java
        EconomicTimesParser.java
        EngadgetParser.java
        HuffPostParser.java
        IndiaTodayParser.java
        LATimesParser.java
        MashableParser.java
        NYDailyNewsParser.java
        NYTimesParser.java
        ReadWriteWebParser.java
        TOIParser.java
        TechCrunchParser.java
        TheHinduParser.java
        USATodayParser.java
        WSJParser.java
        WashPostParser.java

// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Somik Raha
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/BadTagIdentifier.java,v $
// $Author: derrickoswald $
// $Date: 2004/01/02 16:24:55 $
// $Revision: 1.17 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.tests;

import org.htmlparser.Parser;
import org.htmlparser.visitors.TagFindingVisitor;

public class BadTagIdentifier {

    public BadTagIdentifier() {
        super();
    }

    public static void main(String[] args)
        throws Exception {
        BadTagIdentifier badTags =
            new BadTagIdentifier();
        badTags.identify("http://www.amazon.com");
    }

    private void identify(String url)
        throws Exception{
        String [] tagsBeingChecked =
        {"TABLE","DIV","SPAN"};

        Parser parser =
            new Parser(url);
        TagFindingVisitor tagFinder =
            new TagFindingVisitor(tagsBeingChecked, true);
        parser.visitAllNodesWith(tagFinder);
        for (int i=0;i<tagsBeingChecked.length;i++) {
            System.out.println(
                "Number of "+tagsBeingChecked[i]+" begin tags = "+
            tagFinder.getTagCount(i));
            System.out.println(
                "Number of "+tagsBeingChecked[i]+" end tags = "+
                tagFinder.getEndTagCount(i));
        }

    }
}