package ACE_MSNBC_AQUAINT_Evaluation; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.ByteArrayEntity; import org.apache.http.entity.ContentType; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicHeader; import org.apache.http.params.BasicHttpParams; import org.apache.http.params.HttpConnectionParams; import org.apache.http.params.HttpParams; import org.apache.http.util.EntityUtils; import org.apache.log4j.Logger; import org.rdfhdt.hdt.hdt.HDT; import org.rdfhdt.hdt.hdt.HDTManager; import org.rdfhdt.hdtjena.HDTGraph; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; import com.google.gson.Gson; import com.hp.hpl.jena.query.QueryException; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import DisambiguationApproachDPO.DisambiguatedEntity; import DisambiguationApproachDPO.DisambiguationRequest; import DisambiguationApproachDPO.DisambiguationResponse; import DisambiguationApproachDPO.EntityDisambiguationDPO; import DisambiguationApproachDPO.Response; import doser.tools.indexcreation.WikiPediaUriConverter; public class MainEvaluation { public static final String DISAMBIGUATIONSERVICE = "http://theseus.dimis.fim.uni-passau.de:8080/doser-disambiguationserver/disambiguation/disambiguationWithoutCategories-collective"; private Model redirects; public MainEvaluation(String maindir) { File maind = new File(maindir); HDT redirectsHDT; try { redirectsHDT = HDTManager.mapIndexedHDT("/home/quh/dbpedia_redirects.hdt", null); final HDTGraph redirectsHDTgraph = new HDTGraph(redirectsHDT); this.redirects = ModelFactory.createModelForGraph(redirectsHDTgraph); } catch (IOException e) { e.printStackTrace(); } evaluate(maind); } private void evaluate(File mainDirectory) { String dirStr = mainDirectory.getAbsolutePath(); File solutions = new File(dirStr + "/Problems"); String[] sols = solutions.list(); int overall = 0; int correct = 0; int possibleCorrect = 0; for (int i = 0; i < sols.length; i++) { String text = extractMainText(dirStr + "/RawTexts/" + sols[i]); List<Problem> problem = createProblems(new File(dirStr + "/Problems/" + sols[i]), text); DisambiguationRequest req = new DisambiguationRequest(); List<EntityDisambiguationDPO> dpoList = new ArrayList<EntityDisambiguationDPO>(); List<String> groundtruth = new ArrayList<String>(); for (Problem p : problem) { dpoList.add(p.getDpo()); groundtruth.add(p.getGroundtruth()); } req.setSurfaceFormsToDisambiguate(dpoList); HttpParams my_httpParams = new BasicHttpParams(); HttpConnectionParams.setConnectionTimeout(my_httpParams, 3000); HttpConnectionParams.setSoTimeout(my_httpParams, 0); DefaultHttpClient httpclient = new DefaultHttpClient(my_httpParams); HttpPost httppost = new HttpPost(DISAMBIGUATIONSERVICE); Header[] headers = { new BasicHeader("Accept", "application/json"), new BasicHeader("content-type", "application/json") }; httppost.setHeaders(headers); Gson gson = new Gson(); String json = null; json = gson.toJson(req); // System.out.println(json); ByteArrayEntity ent = new ByteArrayEntity(json.getBytes(), ContentType.create("application/json")); httppost.setEntity(ent); HttpResponse response; StringBuffer buffer = new StringBuffer(); try { response = httpclient.execute(httppost); HttpEntity httpent = response.getEntity(); buffer.append(EntityUtils.toString(httpent)); } catch (ClientProtocolException e) { System.out.println(e); } catch (IOException e) { System.out.println(e); } finally { httpclient.getConnectionManager().shutdown(); } // System.out.println(buffer.toString()); DisambiguationResponse disResponse = gson.fromJson( buffer.toString(), DisambiguationResponse.class); List<Response> responses = disResponse.getTasks(); for (int j = 0; j < responses.size(); j++) { if (responses.get(j) != null) { DisambiguatedEntity disEntity = responses.get(j) .getDisEntities().get(0); if (disEntity != null) { String uri = disEntity.getEntityUri(); String gt = groundtruth.get(j).trim(); gt = gt.replaceAll("http://en.wikipedia.org/wiki/", ""); StringBuffer b = new StringBuffer(); gt = WikiPediaUriConverter.createConformDBpediaUrifromEncodedString(gt); String newUri = getRedirect(gt); if(newUri != null) { gt = newUri; } b.append("URI: "+uri+" GT: "+gt); if (gt.equalsIgnoreCase(uri)) { correct++; b.append(" true"); } else { b.append(" false"); System.out.println(b.toString()); } possibleCorrect++; // System.out.println(b.toString()); } } overall++; } } System.out.println("Zwischenstand: " + correct + " von " + possibleCorrect); } private List<Problem> createProblems(File problem, String text) { List<Problem> list = null; try { XMLReader xmlReader = XMLReaderFactory.createXMLReader(); FileReader reader = new FileReader(problem); InputSource inputSource = new InputSource(reader); ProblemHandler handler = new ProblemHandler(); xmlReader.setContentHandler(handler); xmlReader.parse(inputSource); list = handler.getList(); } catch (SAXException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return list; } private String extractMainText(String filename) { StringBuffer buffer = new StringBuffer(); File textfile = new File(filename); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(textfile)); String line = null; while ((line = reader.readLine()) != null) { buffer.append(line); } reader.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } return buffer.toString(); } public String getRedirect(final String entityUri) { final String query = "SELECT ?types WHERE{ <" + entityUri + "> <http://dbpedia.org/ontology/wikiPageRedirects> ?types. }"; ResultSet results = null; QueryExecution qexec = null; try { final com.hp.hpl.jena.query.Query cquery = QueryFactory .create(query); qexec = QueryExecutionFactory.create(cquery, redirects); results = qexec.execSelect(); } catch (final QueryException e) { Logger.getRootLogger().error(e.getStackTrace()); } finally { if (results != null) { while (results.hasNext()) { final QuerySolution sol = results.nextSolution(); final String type = sol.getResource("types").toString(); return type; } } } return null; } class ProblemHandler implements ContentHandler { private List<Problem> dpoList; private EntityDisambiguationDPO dpo; private Problem p; private String currentValue; ProblemHandler() { super(); this.dpoList = new ArrayList<Problem>(); } @Override public void setDocumentLocator(Locator locator) { // TODO Auto-generated method stub } @Override public void startDocument() throws SAXException { // TODO Auto-generated method stub } @Override public void endDocument() throws SAXException { // TODO Auto-generated method stub } @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { // TODO Auto-generated method stub } @Override public void endPrefixMapping(String prefix) throws SAXException { // TODO Auto-generated method stub } @Override public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { if (localName.equals("ReferenceInstance")) { this.dpo = new EntityDisambiguationDPO(); this.p = new Problem(); } if (localName.equals("SurfaceForm")) { this.currentValue = ""; } if (localName.equals("ChosenAnnotation")) { this.currentValue = ""; } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if (localName.equals("SurfaceForm")) { dpo.setSelectedText(currentValue.trim()); } if (localName.equals("ChosenAnnotation")) { p.setGroundtruth(currentValue); } if (localName.equals("ReferenceInstance")) { dpo.setContext(""); p.setDpo(dpo); dpoList.add(p); } } @Override public void characters(char[] ch, int start, int length) throws SAXException { currentValue += new String(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { // TODO Auto-generated method stub } @Override public void processingInstruction(String target, String data) throws SAXException { // TODO Auto-generated method stub } @Override public void skippedEntity(String name) throws SAXException { // TODO Auto-generated method stub } public List<Problem> getList() { return dpoList; } } class Problem { private EntityDisambiguationDPO dpo; private String groundtruth; public EntityDisambiguationDPO getDpo() { return dpo; } public void setDpo(EntityDisambiguationDPO dpo) { this.dpo = dpo; } public String getGroundtruth() { return groundtruth; } public void setGroundtruth(String groundtruth) { this.groundtruth = groundtruth; } } public static void main(String[] args) { new MainEvaluation( "/home/quh/Arbeitsfläche/WikificationACL2011Data/AQUAINT/"); } }