/* * @copyright 2010 Evan Leybourn * @license GNU General Public License * * This file is part of Book Catalogue. * * Book Catalogue is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Book Catalogue is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Book Catalogue. If not, see <http://www.gnu.org/licenses/>. */ package com.eleybourn.bookcatalogue; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /* * An HTML handler for the Wikipedia search results return * */ public class SearchWikipediaHandler extends DefaultHandler { //private StringBuilder builder; public String id = ""; public int count = 0; public String[] link = {"", ""}; private boolean entry = false; public static String UL = "ul"; public static String LINK = "A"; public String[] getLinks(){ return link; } public int getCount(){ return count; } @Override public void endElement(String uri, String localName, String name) throws SAXException { super.endElement(uri, localName, name); // don't do anything if we are in the table of contents if (localName.equalsIgnoreCase(UL) && entry == true){ entry = false; } } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { super.startElement(uri, localName, name, attributes); if (localName.equalsIgnoreCase(UL)){ String className = attributes.getValue("class"); if (className != null && className.equals("mw-search-results")) { entry = true; } } if (entry == true) { if (localName.equalsIgnoreCase(LINK)){ String href = attributes.getValue("href"); if (href.contains("/wiki") == false) { //only use /wiki/... urls return; } // we only want the first 2 links if (count < 2) { link[count] = href; count++; } else { entry = false; } } } } }