/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.bat.annotator;
import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.HashSet;
import org.aksw.gerbil.bat.converter.DBpediaToWikiId;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
@Deprecated
public class AgdistisAnnotator extends it.unipi.di.acube.batframework.systemPlugins.AgdistisAnnotator {
protected String host;
protected int port;
protected WikipediaApiInterface wikiApi;
protected JSONParser jsonParser = new JSONParser();
public AgdistisAnnotator(String host, int port, WikipediaApiInterface wikiApi) {
super(host, port, wikiApi);
this.wikiApi = wikiApi;
this.host = host;
this.port = port;
}
@Override
public long getLastAnnotationTime() {
return -1;
}
public HashSet<Annotation> getAnnotations(String textWithMentions) throws IOException, ParseException {
URL agdistisUrl = new URL("http://" + host + ":" + port + "/AGDISTIS");
String parameters = "type=agdistis&text=" + URLEncoder.encode(textWithMentions, "UTF-8");
HttpURLConnection slConnection = (HttpURLConnection) agdistisUrl.openConnection();
slConnection.setDoOutput(true);
slConnection.setDoInput(true);
slConnection.setRequestMethod("POST");
slConnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
slConnection.setRequestProperty("charset", "utf-8");
slConnection.setRequestProperty("Content-Length", "" + Integer.toString(parameters.getBytes().length));
slConnection.setUseCaches(false);
DataOutputStream wr = new DataOutputStream(slConnection.getOutputStream());
wr.writeBytes(parameters);
wr.flush();
wr.close();
InputStream in = slConnection.getInputStream();
HashSet<Annotation> annotations = parseJsonStream(in);
return annotations;
}
private HashSet<Annotation> parseJsonStream(InputStream in) throws IOException, ParseException {
HashSet<Annotation> annotations = new HashSet<>();
JSONArray namedEntities = (JSONArray) this.jsonParser.parse(new InputStreamReader(in, "UTF-8"));
for (Object obj : namedEntities) {
JSONObject namedEntity = (JSONObject) obj;
long start = (long) namedEntity.get("start");
long offset = (long) namedEntity.get("offset");
int position = (int) start;
int length = (int) offset;
String url = (String) namedEntity.get("disambiguatedURL");
if (url == null) {
// String mention = (String) namedEntity.get("namedEntity");
// System.err.printf("No entity for \"%s\" at position %d%n", mention, position);
continue;
}
String urlDecoded = URLDecoder.decode(url, "UTF-8");
int wikiArticle = DBpediaToWikiId.getId(wikiApi, urlDecoded);
if (wikiArticle == -1)
System.err.printf("Wiki title of url %s (decoded %s) could not be found.%n", url, urlDecoded);
annotations.add(new Annotation(position, length, wikiArticle));
}
return annotations;
}
}