package experiments.table.limaye;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import javax.swing.table.TableColumn;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.util.EntityUtils;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import com.google.gson.Gson;
import DisambiguationApproachDPO.DisambiguatedEntity;
import DisambiguationApproachDPO.DisambiguationRequest;
import DisambiguationApproachDPO.DisambiguationResponse;
import DisambiguationApproachDPO.EntityDisambiguationDPO;
import DisambiguationApproachDPO.Response;
import experiments.table.limaye.Table.Column;
import experiments.table.limaye.Table.Column.Cell;
public class StartEvaluationTableEntities {
public static final String DISAMBIGUATIONSERVICE = "http://theseus.dimis.fim.uni-passau.de:8080/doser-disambiguationserver/disambiguation/disambiguationWithoutCategories-collective";
public static int sum = 0;
public static int correct = 0;
public static int haveoneresult = 0;
public static int annotated = 0;
public static void main(String[] args) {
StartEvaluationTableEntities evaluate = new StartEvaluationTableEntities();
evaluate.action();
}
public void action() {
File file = new File("/home/quh/Arbeitsfläche/Entpackung/Arbeitsfläche/To/wikilink/");
File[] f = file.listFiles();
int cellsOverall = 0;
int cellsAnnotated = 0;
for (int u = 0; u < f.length; u++) {
// System.out.println(f[u].getAbsolutePath());
StartEvaluationTableEntities eval = new StartEvaluationTableEntities();
String sourcePath = f[u].getAbsolutePath();
String[] splitter = sourcePath.split("/");
Table t = eval.readTable(f[u].getAbsolutePath());
// t.setName(f[u].getAbsolutePath());
File gtf = new File(
"/home/quh/Arbeitsfläche/Entpackung/Arbeitsfläche/gt/wikilink/" + splitter[splitter.length - 1]);
eval.addGT(t, gtf.getAbsolutePath());
int cols = t.getNumberofColumns();
for (int i = 0; i < cols; i++) {
Column col = t.getColumn(i);
List<Cell> cellL = col.getCellList();
List<String> types = col.getMajorTypes();
cellsOverall++;
// if(types != null && types.size() > 0) {
// cellsAnnotated++;
// }
for (Cell c : cellL) {
cellsOverall++;
if (c.getGt() != null && !c.getGt().equalsIgnoreCase("")) {
cellsAnnotated++;
}
}
}
System.out.println("Zellen insgesamt: " + cellsOverall + " Zellen annotiert: " + cellsAnnotated);
// Query each column separately
for (int i = 0; i < t.getNumberofColumns(); i++) {
Column column = t.getColumn(i);
List<EntityDisambiguationDPO> request_dpo = eval.transformInRequestFormat(column);
String topic = column.getHeader();
List<Response> l = queryService(request_dpo, topic);
setDisambiguatedColumn(t, i, l);
}
StartEvaluationTableEntities.evaluateResults(t);
}
System.out.println("Insgesamt: " + sum + " davon richtig: " + correct);
}
private static List<Response> queryService(List<EntityDisambiguationDPO> dpos, String topic) {
DisambiguationRequest req = new DisambiguationRequest();
req.setDocsToReturn(1);
req.setDocumentUri("TestUrl");
req.setSurfaceFormsToDisambiguate(dpos);
// req.setMainTopic(topic);
HttpParams my_httpParams = new BasicHttpParams();
HttpConnectionParams.setConnectionTimeout(my_httpParams, 3000);
HttpConnectionParams.setSoTimeout(my_httpParams, 0);
DefaultHttpClient httpclient = new DefaultHttpClient(my_httpParams);
HttpPost httppost = new HttpPost(DISAMBIGUATIONSERVICE);
Header[] headers = { new BasicHeader("Accept", "application/json"),
new BasicHeader("content-type", "application/json") };
httppost.setHeaders(headers);
Gson gson = new Gson();
String json = null;
json = gson.toJson(req);
ByteArrayEntity ent = new ByteArrayEntity(json.getBytes(), ContentType.create("application/json"));
httppost.setEntity(ent);
HttpResponse response;
StringBuffer buffer = new StringBuffer();
try {
response = httpclient.execute(httppost);
HttpEntity httpent = response.getEntity();
buffer.append(EntityUtils.toString(httpent));
} catch (ClientProtocolException e) {
System.out.println(e);
} catch (IOException e) {
System.out.println(e);
} finally {
httpclient.getConnectionManager().shutdown();
}
// System.out.println(buffer.toString());
DisambiguationResponse disResponse = gson.fromJson(buffer.toString(), DisambiguationResponse.class);
List<Response> responses = disResponse.getTasks();
return responses;
}
private List<EntityDisambiguationDPO> transformInRequestFormat(Column c) {
List<EntityDisambiguationDPO> list = new LinkedList<EntityDisambiguationDPO>();
List<Cell> cells = c.getCellList();
for (Cell cell : cells) {
EntityDisambiguationDPO dpo = new EntityDisambiguationDPO();
dpo.setDocumentId("");
dpo.setContext(cell.getCellContent());
dpo.setSelectedText(cell.getCellContent());
// System.out.println(cell.getCellContent());
dpo.setStartPosition(0);
list.add(dpo);
}
return list;
}
private void setDisambiguatedColumn(Table t, int columnNr, List<Response> list) {
Column col = t.getColumn(columnNr);
List<Cell> cellList = col.getCellList();
for (int i = 0; i < cellList.size(); i++) {
Response res = list.get(i);
Cell cell = cellList.get(i);
if (res == null) {
cell.setDisambigutedContentString("");
} else {
List<DisambiguatedEntity> disEntities = res.getDisEntities();
if (disEntities == null || disEntities.size() == 0) {
cell.setDisambigutedContentString("");
} else {
cell.setDisambigutedContentString(disEntities.get(0).getText());
cell.setDisambiguatedContent(disEntities.get(0).getEntityUri());
// System.out.println(cell.getCellContent());
// System.out.println(disEntities.get(0).getEntityUri());
}
}
}
}
//
// private Table transformFromRequestFormat(Table t,
// TableDisambiguationResponse response) {
// List<ColumnResponseItem> resi = response.getColumns();
// if (resi != null) {
// for (int i = 0; i < resi.size(); i++) {
// ColumnResponseItem it = resi.get(i);
// List<CellResponse> cr = it.getCells();
// if (cr != null) {
// for (int j = 0; j < cr.size(); j++) {
// CellResponse res = cr.get(j);
// t.getColumn(i).getCellList().get(j)
// .setDisambigutedContentString(res.getText());
// t.getColumn(i).getCellList().get(j)
// .setDisambiguatedContent(res.getUri());
// }
// }
// }
// }
// return t;
// }
public Table readTable(String uri) {
Table t = null;
try {
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
FileReader reader = new FileReader(uri);
InputSource inputSource = new InputSource(reader);
LimayeAnnotationParserWebTables p = new LimayeAnnotationParserWebTables();
xmlReader.setContentHandler(p);
xmlReader.parse(inputSource);
t = p.getTable();
p = null;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
return t;
}
public void addGT(Table table, String uri) {
try {
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
FileReader reader = new FileReader(uri);
InputSource inputSource = new InputSource(reader);
LimayeGroundtruthAnnotationParser p = new LimayeGroundtruthAnnotationParser(table);
xmlReader.setContentHandler(p);
xmlReader.parse(inputSource);
p = null;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
}
public static void evaluateResults(Table t) {
// System.out.println(t.getName());
int nrC = t.getNumberofColumns();
for (int i = 0; i < nrC; i++) {
Table.Column c = t.getColumn(i);
List<Cell> cList = c.getCellList();
for (int j = 0; j < cList.size(); j++) {
Cell cell = cList.get(j);
String gt = cell.getGt();
String val = cell.getDisambiguatedContent();
// System.out.println(val);
if (gt != null && !gt.equals("") && !gt.equalsIgnoreCase("http://dbpedia.org/resource/NULL")) {
if (val.equalsIgnoreCase(gt)) {
correct++;
} else {
System.out.println(
"Input: " + cell.getCellContent() + " Groundtruth: " + gt + " Value: " + val);
}
if (val != null && !val.equalsIgnoreCase("")) {
annotated++;
}
sum++;
}
}
}
float prec = ((float) correct / (float) annotated);
float recall = ((float) correct / (float) sum);
float f1 = (2 * prec * recall) / (prec + recall);
float acc = ((float) correct / (float) sum);
System.out.println("Precision: "+prec +" Recall: "+recall+" F1: "+f1+ " Accuracy: " + acc);
}
}