package doser.gerbilwrapper; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.List; import org.aksw.gerbil.transfer.nif.Document; import org.aksw.gerbil.transfer.nif.Marking; import org.aksw.gerbil.transfer.nif.TurtleNIFDocumentCreator; import org.aksw.gerbil.transfer.nif.TurtleNIFDocumentParser; import org.aksw.gerbil.transfer.nif.data.NamedEntity; import org.aksw.gerbil.transfer.nif.data.SpanImpl; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.ByteArrayEntity; import org.apache.http.entity.ContentType; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicHeader; import org.apache.http.params.BasicHttpParams; import org.apache.http.params.HttpConnectionParams; import org.apache.http.params.HttpParams; import org.apache.http.util.EntityUtils; import org.restlet.representation.Representation; import org.restlet.resource.Post; import org.restlet.resource.ServerResource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.gson.Gson; public class DoserResource extends ServerResource { public static final String DISAMBIGUATIONSERVICE = "http://theseus.dimis.fim.uni-passau.de:8080/doser-disambiguationserver/disambiguation/disambiguationWithoutCategories-collective"; // We take the entire context // public static final int CONTEXTAREA = 200; private static final Logger LOGGER = LoggerFactory .getLogger(DoserResource.class); private TurtleNIFDocumentParser parser = new TurtleNIFDocumentParser(); private TurtleNIFDocumentCreator creator = new TurtleNIFDocumentCreator(); @Post public String accept(Representation request) { Reader inputReader; try { inputReader = request.getReader(); } catch (IOException e) { LOGGER.error("Exception while reading request.", e); return ""; } // ... this is only the parsing of an incoming document Document document; try { document = parser.getDocumentFromNIFReader(inputReader); } catch (Exception e) { LOGGER.error("Exception while reading request.", e); return ""; } // If your system is only for entity linking, the document object // should already contain a list of markings // Now we have the text and a list of markings (this could be // empty or contain Span objects which would mark the named // entities inside the text) and could call your system for // performing the entity linking task... List<Marking> markings = document.getMarkings(); for (Marking mark : markings) { SpanImpl span = (SpanImpl) mark; // System.out.println(document.getText().substring( // span.getStartPosition(), // span.getStartPosition() + span.getLength()) // + " " // + extractContext(span.getStartPosition(), // document.getText())); } List<Marking> entities = new ArrayList<Marking>(markings.size()); if (markings.size() > 0) { DisambiguationRequest req = new DisambiguationRequest(); req.setDocsToReturn(1); req.setDocumentUri(document.getDocumentURI()); List<EntityDisambiguationDPO> dpoList = new ArrayList<EntityDisambiguationDPO>(); for (int i = 0; i < markings.size(); ++i) { SpanImpl span = (SpanImpl) markings.get(i); String sf = document.getText().substring( span.getStartPosition(), span.getStartPosition() + span.getLength()); System.out.println("Surface Form: "+sf); System.out.println("------------------------------------------------------------------------------------"); EntityDisambiguationDPO dpo = new EntityDisambiguationDPO(); dpo.setDocumentId(document.getDocumentURI()); String context = document.getText(); // extractContext(span.getStartPosition(), // document.getText()); dpo.setContext(context); dpo.setSelectedText(sf); dpo.setStartPosition(span.getStartPosition()); dpoList.add(dpo); }; System.out.println(document.getText()); req.setSurfaceFormsToDisambiguate(dpoList); HttpParams my_httpParams = new BasicHttpParams(); HttpConnectionParams.setConnectionTimeout(my_httpParams, 3000); HttpConnectionParams.setSoTimeout(my_httpParams, 0); DefaultHttpClient httpclient = new DefaultHttpClient(my_httpParams); HttpPost httppost = new HttpPost(DISAMBIGUATIONSERVICE); Header[] headers = { new BasicHeader("Accept", "application/json"), new BasicHeader("content-type", "application/json") }; httppost.setHeaders(headers); Gson gson = new Gson(); String json = null; json = gson.toJson(req); ByteArrayEntity ent = new ByteArrayEntity(json.getBytes(), ContentType.create("application/json")); httppost.setEntity(ent); HttpResponse response; StringBuffer buffer = new StringBuffer(); try { response = httpclient.execute(httppost); HttpEntity httpent = response.getEntity(); buffer.append(EntityUtils.toString(httpent)); } catch (ClientProtocolException e) { System.out.println(e); } catch (IOException e) { System.out.println(e); } finally { httpclient.getConnectionManager().shutdown(); } System.out.println(buffer.toString()); DisambiguationResponse disResponse = gson.fromJson( buffer.toString(), DisambiguationResponse.class); List<Response> responses = disResponse.getTasks(); // System.out.println("Responses Size:" +responses.size()); // for(Response res : responses) { // System.out.println("Response: "+res.getSelectedText()+res.getDisEntities()); // } // ... as result a list of NamedEntity or ScoredNamedEntity objects // should be created for the A2W or Sa2W tasks respectively. For // C2W, Rc2W or Sc2W you should create a list of Annotations or // ScoredAnnotations for (int i = 0; i < markings.size(); ++i) { SpanImpl span = (SpanImpl) markings.get(i); Response res = responses.get(i); if (res != null) { List<DisambiguatedEntity> disEntities = res .getDisEntities(); // System.out.println("Surface form: "+(document.getText().substring(span.getStartPosition(), span.getStartPosition() + span.getLength())) + "context: "+extractContext(span.getStartPosition(), // document.getText())); // System.out.println(disEntities.get(0).getEntityUri()); entities.add(new NamedEntity(span.getStartPosition(), span .getLength(), disEntities.get(0).getEntityUri())); } } } // ... this new list is added to the document and the document is // send back to GERBIL document.setMarkings(entities); String nifDocument = creator.getDocumentAsNIFString(document); return nifDocument; } }