/** * Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.jetwick.wikipedia; import de.jetwick.util.Helper; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** * * @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net */ public class Wikipedia { private static Logger logger = LoggerFactory.getLogger(Wikipedia.class); public Collection<WikiEntry> query(String query, String language) { return query(query, language, 3); } private boolean empty(String str) { return str == null || str.trim().isEmpty(); } /** * http://en.wikipedia.org/w/api.php * * @return url, title, text */ public Collection<WikiEntry> query(String query, String language, int hits) { List<WikiEntry> result = new ArrayList<WikiEntry>(); if (language == null || language.length() != 2) language = "en"; String wikiUrl = "http://" + language + ".wikipedia.org/w/api.php?action=query&list=search&srsearch=" + Helper.urlEncode(query) + "&srinfo=totalhits&format=xml&srlimit=" + hits; try { Document doc = Helper.getUrlAsDocument(wikiUrl, 1000); NodeList list = doc.getElementsByTagName("p"); for (int ii = 0; ii < list.getLength(); ii++) { Node node = list.item(ii); if (node.getNodeType() != Node.ELEMENT_NODE) continue; WikiEntry entry = new WikiEntry(); entry.text = ((Element) node).getAttribute("snippet"); entry.title = ((Element) node).getAttribute("title"); if (!empty(entry.title)) { entry.url = "http://" + language + ".wikipedia.org/wiki/" + Helper.urlEncode(entry.title.replaceAll(" ", "_")); result.add(entry); } } } catch (Exception ex) { logger.error("Cannot query wikipedia " + ex.getLocalizedMessage() + " URL was:" + wikiUrl); } return result; } }