package AidaDatasetEvaluation;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.util.EntityUtils;
import com.google.gson.Gson;
import DisambiguationApproachDPO.DisambiguatedEntity;
import DisambiguationApproachDPO.DisambiguationRequest;
import DisambiguationApproachDPO.DisambiguationResponse;
import DisambiguationApproachDPO.EntityDisambiguationDPO;
import DisambiguationApproachDPO.Response;
public class AidaDataSetEvaluation {
public static final String DISAMBIGUATIONSERVICE = "http://theseus.dimis.fim.uni-passau.de:8080/doser-disambiguationserver/disambiguation/disambiguationWithoutCategories-collective";
private int overall;
private int correct;
public AidaDataSetEvaluation() {
super();
this.overall = 0;
this.correct = 0;
}
public static void main(String[] args) throws IOException {
AidaDataSetEvaluation eval = new AidaDataSetEvaluation();
eval.action();
}
public void action() throws IOException{
BufferedReader reader = new BufferedReader(new FileReader(new File("/home/quh/Arbeitsfläche/Disambiguation/Datasets/AIDA-YAGO2-dataset.tsv")));
String line = null;
List<String> list = new LinkedList<String>();
List<String> gt = new LinkedList<String>();
int count = 0;
boolean isOpen = false;
while((line = reader.readLine()) != null) {
if(line.contains("-DOCSTART-") && line.contains("testa")) {
if(!list.isEmpty() && !gt.isEmpty() && isOpen) {
Document doc = new Document(list,gt);
evaluate(doc);
count++;
}
list.clear();
gt.clear();
isOpen = true;
} else {
String splitter[] = line.split("\\t");
if(splitter.length > 4) {
if(splitter[1].equalsIgnoreCase("B")) {
String dbpediaUri = "http://dbpedia.org/resource/"+splitter[4].replaceAll("http://en.wikipedia.org/wiki/", "");
if(isAvaiableInKb(dbpediaUri)) {
}
list.add(splitter[2]);
gt.add(dbpediaUri);
}
}
}
}
System.out.println(count);
System.out.println("Overall :"+overall+" Correct: "+correct);
reader.close();
}
public void evaluate(Document doc) {
DisambiguationRequest req = new DisambiguationRequest();
req.setDocumentUri("Local Disambiguation");
List<EntityDisambiguationDPO> dpoList = new ArrayList<EntityDisambiguationDPO>();
List<String> sfs = doc.sfList;
for (int i = 0; i < sfs.size(); ++i) {
EntityDisambiguationDPO dpo = new EntityDisambiguationDPO();
dpo.setDocumentId("Local Disambiguation");
dpo.setContext("");
dpo.setSelectedText(sfs.get(i));
dpoList.add(dpo);
};
req.setSurfaceFormsToDisambiguate(dpoList);
HttpParams my_httpParams = new BasicHttpParams();
HttpConnectionParams.setConnectionTimeout(my_httpParams, 3000);
HttpConnectionParams.setSoTimeout(my_httpParams, 0);
DefaultHttpClient httpclient = new DefaultHttpClient(my_httpParams);
HttpPost httppost = new HttpPost(DISAMBIGUATIONSERVICE);
Header[] headers = { new BasicHeader("Accept", "application/json"),
new BasicHeader("content-type", "application/json") };
httppost.setHeaders(headers);
Gson gson = new Gson();
String json = null;
json = gson.toJson(req);
ByteArrayEntity ent = new ByteArrayEntity(json.getBytes(),
ContentType.create("application/json"));
httppost.setEntity(ent);
HttpResponse response;
StringBuffer buffer = new StringBuffer();
try {
response = httpclient.execute(httppost);
HttpEntity httpent = response.getEntity();
buffer.append(EntityUtils.toString(httpent));
} catch (ClientProtocolException e) {
System.out.println(e);
} catch (IOException e) {
System.out.println(e);
} finally {
httpclient.getConnectionManager().shutdown();
}
System.out.println(buffer.toString());
DisambiguationResponse disResponse = gson.fromJson(
buffer.toString(), DisambiguationResponse.class);
List<Response> responses = disResponse.getTasks();
List<String> gt = doc.gt;
for (int i = 0; i < responses.size(); i++) {
DisambiguatedEntity disEntity = responses.get(i).getDisEntities().get(0);
System.out.println(disEntity.getEntityUri().toString() + "\t"+ gt.get(i));
if(disEntity != null) {
String uri = disEntity.getEntityUri();
if(gt.get(i).equalsIgnoreCase(uri)) {
correct++;
}
}
overall++;
}
System.out.println("Zwischenstand: "+correct+" von "+overall);
}
public boolean isAvaiableInKb(String gt) {
return true;
}
class Document {
private List<String> sfList;
private List<String> gt;
Document(List<String> sfList, List<String> gt) {
super();
this.sfList = sfList;
this.gt = gt;
}
}
}