/* * @copyright 2010 Evan Leybourn * @license GNU General Public License * * This file is part of Book Catalogue. * * Book Catalogue is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Book Catalogue is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Book Catalogue. If not, see <http://www.gnu.org/licenses/>. */ package com.eleybourn.bookcatalogue; import java.util.ArrayList; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * An XML handler for the Wikipedia entry return */ public class SearchWikipediaEntryHandler extends DefaultHandler { private StringBuilder builder; private boolean entry1 = false; private boolean entry2 = false; private boolean entry3 = false; private boolean intoc = false; private boolean in_parent_ul = false; private boolean ready_to_close_parent_ul = false; private int div = 0; private int entrydiv = 0; private String this_title = ""; public ArrayList<String> titles = new ArrayList<String>(); public static String DIV = "div"; public static String LIST1 = "ul"; public static String LIST2 = "ol"; public static String ENTRY = "li"; public static String LINK1 = "a"; //optional public static String LINK2 = "i"; //optional public static String LINK3 = "b"; //optional public static String TOC_TABLE = "table"; public ArrayList<String> getList(){ return titles; } @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); builder.append(ch, start, length); } @Override public void endElement(String uri, String localName, String name) throws SAXException { super.endElement(uri, localName, name); // don't do anything if we are in the table of contents if (intoc == false) { if (localName.equalsIgnoreCase(ENTRY)){ if (entry1 == true && entry2 == true) { String title = this_title + builder.toString(); title = title.replace("\"", "").trim(); if (title != null && title != "") { titles.add(title); } this_title = ""; entry3 = false; } } else if (localName.equalsIgnoreCase(LINK1) || localName.equalsIgnoreCase(LINK2) || localName.equalsIgnoreCase(LINK3)){ if (entry1 == true && entry2 == true && entry3 == true) { this_title += builder.toString(); } } else if (localName.equalsIgnoreCase(LIST1) || localName.equalsIgnoreCase(LIST2)){ if (in_parent_ul == true && ready_to_close_parent_ul == false) { // inner ul (if exists) in_parent_ul = false; entry3 = false; } else if (entry1 == true && entry2 == true) { entry1 = false; entry2 = false; entry3 = false; in_parent_ul = false; } } } if (localName.equalsIgnoreCase(DIV)){ if (entry1 == true && div==entrydiv) { entry1 = false; entry2 = false; entry3 = false; } div--; } if (localName.equalsIgnoreCase(TOC_TABLE)){ if (intoc == true) { intoc = false; } } builder.setLength(0); } @Override public void startDocument() throws SAXException { super.startDocument(); // Clear all variables (in case they were set in a previous instantiation) builder = new StringBuilder(); titles = new ArrayList<String>(); entry1 = false; entry2 = false; entry3 = false; intoc = false; in_parent_ul = false; ready_to_close_parent_ul = false; div = 0; entrydiv = 0; this_title = ""; } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { super.startElement(uri, localName, name, attributes); if (localName.equalsIgnoreCase(DIV)) { div++; String idName = attributes.getValue("id"); if (idName != null && idName.equals("bodyContent")) { entrydiv = div; entry1 = true; } } if (entry1 == true && localName.equalsIgnoreCase(TOC_TABLE)) { String idName = attributes.getValue("id"); if (idName != null && idName.equals("toc")) { intoc = true; } } if (intoc == false) { // This is a parent ul. Not the list ul if (entry1 == true && entry2 == true && (localName.equalsIgnoreCase(LIST1) || localName.equalsIgnoreCase(LIST2))) { // inner ul (if exists) in_parent_ul = true; this_title = ""; ready_to_close_parent_ul = false; } else if (entry1 == true && (localName.equalsIgnoreCase(LIST1) || localName.equalsIgnoreCase(LIST2))) { entry2 = true; ready_to_close_parent_ul = true; } if (entry1 == true && entry2 == true && localName.equalsIgnoreCase(ENTRY)) { entry3 = true; } } } }