package org.wikibrain.parser.wiki; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import org.apache.commons.io.IOUtils; import org.wikibrain.core.dao.DaoException; import org.wikibrain.core.dao.live.QueryParser; import org.wikibrain.core.lang.Language; import java.io.InputStream; import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * @author Shilad Sen */ public class WikitextRenderer { public String renderPage(Language language, String title) throws DaoException { String url = makeUrl(language, "page", title, "prop", "text"); JsonObject obj = fetchURL(url); return obj.get("text").getAsJsonObject().get("*").getAsString(); } public List<String> extractExternalLinks(Language language, String title) throws DaoException { String url = makeUrl(language, "page", title, "prop", "externallinks"); JsonObject obj = fetchURL(url); List<String> links = new ArrayList<String>(); for (JsonElement e : obj.get("externallinks").getAsJsonArray()) { links.add(e.getAsString()); } return links; } public String makeUrl(Language language, String ... props) { String url = "http://" + language.getDomain() + "/w/api.php?action=parse"; for (int i = 0; i < props.length; i += 2) { String key = props[i]; String val = props[i+1]; url += "&" + URLEncoder.encode(key) + "=" + URLEncoder.encode(val); } url += "&format=json"; return url; } /** * queries the wikipedia server for text output that can be parsed to create a wikibrain data object * sets the class attribute queryResult to the value of this raw output * @return * @throws org.wikibrain.core.dao.DaoException */ private JsonObject fetchURL(String url) throws DaoException { InputStream inputStr; try{ inputStr = new URL(url).openStream(); try { String body = IOUtils.toString(inputStr); QueryParser parser = new QueryParser(); return parser.parseQueryObject(body, "parse"); } catch(Exception e){ throw new DaoException("Error parsing LiveDao query URL"); } finally { IOUtils.closeQuietly(inputStr); } } catch(Exception e){ throw new DaoException("Error getting page from the Wikipedia Server (Check your internet connection) "); } } public static void main(String args[]) throws DaoException { WikitextRenderer renderer = new WikitextRenderer(); System.out.println(renderer.extractExternalLinks(Language.EN, "Barack Obama")); // System.out.println(renderer.renderPage(Language.EN, "Barack Obama")); } }