package com.quiltplayer.external.wiki;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.springframework.stereotype.Component;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
/**
* Service for wikipedia questions.
*
* @author Vlado Palczynski
*
*/
@Component
public class WikipediaService implements Runnable {
private static final String QUERY = "http://en.wikipedia.org/w/api.php?action=parse&page=%s&format=%s";
// private static final String PRINTABLE =
// "http://en.wikipedia.org/w/index.php?title=%s&printable=yes";
private static final String NO_ARTICLE = "Wikipedia does not have an article with this exact name";
private String result = "";
private String page = "";
/*
* (non-Javadoc)
*
* @see java.lang.Runnable#run()
*/
@Override
public void run() {
URL url;
try {
result = "";
url = new URL(String.format(QUERY, new Object[] { page, "xml" }));
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(url.toString());
XPathFactory factory = XPathFactory.newInstance();
XPath xpath = factory.newXPath();
XPathExpression expr = xpath.compile("//api/parse/text/text()");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
this.result = nodes.item(i).getNodeValue();
}
// BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(),
// "UTF-8"));
//
// String inputLine;
// boolean tableFound = false;
// boolean scriptFound = false;
// while ((inputLine = in.readLine()) != null) {
// if (!tableFound) {
// if (inputLine.startsWith("<table"))
// tableFound = true;
// }
//
// if (tableFound) {
// if (inputLine.startsWith("<script") && inputLine.endsWith("</script>")) {
// }
// else if (inputLine.startsWith("<script")) {
// scriptFound = true;
// }
// else if (scriptFound && inputLine.contains("</script>"))
// scriptFound = false;
// else
// result = result + inputLine;
// }
// }
//
// in.close();
}
catch (Exception e) {
e.printStackTrace();
}
System.out.println("DONE!");
}
public String getWikiContentForPageName(String pageName) throws MalformedURLException,
IOException, UnsupportedEncodingException {
page = pageName.replace(" ", "_");
run();
return result;
}
public boolean exists(String pageName) {
final long currentTimeStamp = System.currentTimeMillis();
BufferedReader in = null;
try {
URL url = new URL(String.format(QUERY, new Object[] { pageName, "xml" }));
URLConnection connection = url.openConnection();
connection.setDoOutput(true);
in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String s;
while ((s = in.readLine()) != null) {
if (s.contains(NO_ARTICLE)) {
System.out.println("Time to ping wikipedia: "
+ (System.currentTimeMillis() - currentTimeStamp));
return true;
}
}
in.close();
}
catch (MalformedURLException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
finally {
if (in != null) {
try {
in.close();
}
catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println("Time to ping wikipedia: "
+ (System.currentTimeMillis() - currentTimeStamp));
return false;
}
}