package experiments.table.imdbAndMusicBrainz;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.util.EntityUtils;
import com.google.gson.Gson;
import DisambiguationApproachDPO.DisambiguatedEntity;
import DisambiguationApproachDPO.DisambiguationRequest;
import DisambiguationApproachDPO.DisambiguationResponse;
import DisambiguationApproachDPO.EntityDisambiguationDPO;
import DisambiguationApproachDPO.Response;
import experiments.table.limaye.corrected.Table;
import experiments.table.limaye.corrected.Table.Column;
import experiments.table.limaye.corrected.Table.Column.Cell;
public class StartEvaluationTableEntities {
public static final String DISAMBIGUATIONSERVICE = "http://theseus.dimis.fim.uni-passau.de:8080/doser-disambiguationserver/disambiguation/disambiguationWithoutCategories-collective";
public static int sum = 0;
public static int correct = 0;
public static int annotated = 0;
public static int haveoneresult = 0;
public static int disambiguationpages = 0;
public static void main(String[] args) {
StartEvaluationTableEntities evaluate = new StartEvaluationTableEntities();
evaluate.action();
}
public void action() {
int cellsOverall = 0;
int cellsAnnotated = 0;
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(new File("/home/quh/Arbeitsfläche/Table Disambiguation Data sets/musicbrainz_columns.txt")));
String line = null;
Table current = new Table();
List<String> lines = new ArrayList<String>();
while((line = reader.readLine()) != null) {
lines.add(line);
// New table
if(line.equals("")) {
readTable(current, lines);
if (current != null) {
int cols = current.getNumberofColumns();
for (int i = 0; i < cols; i++) {
Column col = current.getColumn(i);
List<Cell> cellL = col.getCellList();
cellsOverall++;
for (Cell c : cellL) {
cellsOverall++;
if (c.getGt() != null && !c.getGt().equalsIgnoreCase("")) {
cellsAnnotated++;
}
}
}
System.out.println("Zellen insgesamt: " + cellsOverall + " Zellen annotiert: " + cellsAnnotated);
for (int i = 0; i < current.getNumberofColumns(); i++) {
Column column = current.getColumn(i);
List<EntityDisambiguationDPO> request_dpo = transformInRequestFormat(column);
String topic = column.getHeader();
List<Response> l = queryService(request_dpo, topic);
setDisambiguatedColumn(current, i, l);
}
StartEvaluationTableEntities.evaluateResults(current);
}
lines = new ArrayList<String>();
current = new Table();
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println("Insgesamt: " + sum + " davon richtig: " + correct);
}
private static List<Response> queryService(List<EntityDisambiguationDPO> dpos, String topic) {
DisambiguationRequest req = new DisambiguationRequest();
req.setDocsToReturn(1);
req.setDocumentUri("TestUrl");
req.setSurfaceFormsToDisambiguate(dpos);
// req.setMainTopic(topic);
HttpParams my_httpParams = new BasicHttpParams();
HttpConnectionParams.setConnectionTimeout(my_httpParams, 3000);
HttpConnectionParams.setSoTimeout(my_httpParams, 0);
DefaultHttpClient httpclient = new DefaultHttpClient(my_httpParams);
HttpPost httppost = new HttpPost(DISAMBIGUATIONSERVICE);
Header[] headers = { new BasicHeader("Accept", "application/json"),
new BasicHeader("content-type", "application/json") };
httppost.setHeaders(headers);
Gson gson = new Gson();
String json = null;
json = gson.toJson(req);
ByteArrayEntity ent = new ByteArrayEntity(json.getBytes(), ContentType.create("application/json"));
httppost.setEntity(ent);
HttpResponse response;
StringBuffer buffer = new StringBuffer();
try {
response = httpclient.execute(httppost);
HttpEntity httpent = response.getEntity();
buffer.append(EntityUtils.toString(httpent));
} catch (ClientProtocolException e) {
System.out.println(e);
} catch (IOException e) {
System.out.println(e);
} finally {
httpclient.getConnectionManager().shutdown();
}
// System.out.println(buffer.toString());
DisambiguationResponse disResponse = gson.fromJson(buffer.toString(), DisambiguationResponse.class);
List<Response> responses = disResponse.getTasks();
return responses;
}
private static List<EntityDisambiguationDPO> transformInRequestFormat(Column c) {
List<EntityDisambiguationDPO> list = new LinkedList<EntityDisambiguationDPO>();
List<Cell> cells = c.getCellList();
for (Cell cell : cells) {
EntityDisambiguationDPO dpo = new EntityDisambiguationDPO();
dpo.setDocumentId("");
dpo.setContext(cell.getCellContent());
dpo.setSelectedText(cell.getCellContent());
// System.out.println(cell.getCellContent());
dpo.setStartPosition(0);
list.add(dpo);
}
return list;
}
private void setDisambiguatedColumn(Table t, int columnNr, List<Response> list) {
Column col = t.getColumn(columnNr);
List<Cell> cellList = col.getCellList();
for (int i = 0; i < cellList.size(); i++) {
Response res = list.get(i);
Cell cell = cellList.get(i);
if (res == null) {
cell.setDisambigutedContentString("");
} else {
List<DisambiguatedEntity> disEntities = res.getDisEntities();
if (disEntities == null || disEntities.size() == 0) {
cell.setDisambigutedContentString("");
} else {
cell.setDisambigutedContentString(disEntities.get(0).getText());
cell.setDisambiguatedContent(disEntities.get(0).getEntityUri());
// System.out.println(cell.getCellContent());
// System.out.println(disEntities.get(0).getEntityUri());
}
}
}
}
public void readTable(Table t, List<String> lines) {
t.addColumn("");
Column col = t.getColumn(0);
for(int i = 1; i < lines.size() - 1; i++) {
String[] splitter = lines.get(i).split("\\t");
String sf = splitter[0];
col.addCell(sf);
if(splitter.length > 1) {
col.addLastCellGT(splitter[1]);
}
}
}
public static void evaluateResults(Table t) {
// System.out.println(t.getName());
int nrC = t.getNumberofColumns();
for (int i = 0; i < nrC; i++) {
Table.Column c = t.getColumn(i);
List<Cell> cList = c.getCellList();
for (int j = 0; j < cList.size(); j++) {
Cell cell = cList.get(j);
String gt = cell.getGt();
String val = cell.getDisambiguatedContent();
// System.out.println(val);
if (gt != null && !gt.equals("") && !gt.equalsIgnoreCase("http://dbpedia.org/resource/NULL") && !gt.equalsIgnoreCase("NULL")) {
if(gt.contains(val)) {
correct++;
} else {
System.out.println("Input: " + cell.getCellContent() + " Groundtruth: " + gt + " Value: " + val);
}
if (val != null && !val.equalsIgnoreCase("")) {
annotated++;
}
if(gt.contains("(disambiguation)")) {
disambiguationpages++;
}
sum++;
}
}
}
float prec = ((float) correct / (float) annotated);
float recall = ((float) correct / (float) sum);
float f1 = (2 * prec * recall) / (prec + recall);
float acc = ((float) correct / (float) sum);
System.out.println("Precision: " + prec + " Recall: " + recall + " F1: " + f1 + " Accuracy: " + acc + "DisambiguationPages: "+disambiguationpages);
}
}