package doser.webclassify.annotation; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import org.apache.http.Header; import org.apache.http.NameValuePair; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.entity.ByteArrayEntity; import org.apache.http.entity.ContentType; import org.apache.http.message.BasicHeader; import org.apache.http.message.BasicNameValuePair; import org.apache.log4j.Logger; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.map.JsonMappingException; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import doser.entitydisambiguation.dpo.DisambiguatedEntity; import doser.entitydisambiguation.table.logic.Type; import doser.general.HelpfulMethods; import doser.language.Languages; import doser.tools.RDFGraphOperations; import doser.tools.ServiceQueries; import doser.webclassify.dpo.WebSite; import doser.webclassify.dpo.WebTypeRequest_Deprecated; import doser.webclassify.dpo.WebTypeResponse_Deprecated; public class AnnotateCategories { public static final String TYPEQUERYURL = "http://theseus.dimis.fim.uni-passau.de:8080/doser-disambiguationserver/webclassify/types"; public AnnotateCategories() { super(); } public void annotateCategories(DisambiguatedEntity entity, Languages lang) { Set<Type> types = null; if (lang.equals(Languages.german)) { types = RDFGraphOperations.getDbpediaCategoriesFromEntity_GER(entity.getEntityUri()); } else { types = RDFGraphOperations.getDbpediaCategoriesFromEntity(entity.getEntityUri()); } entity.setCategories(types); } @SuppressWarnings("deprecation") public void annotateCategory(WebSite website) { Set<Type> types = queryWebsiteTypes(website); } @SuppressWarnings("deprecation") private Set<Type> queryWebsiteTypes(WebSite page) { Set<Type> pageTypes = new TreeSet<Type>(); ArrayList<NameValuePair> postParameters = new ArrayList<NameValuePair>(); postParameters.add(new BasicNameValuePair("text", page.getText())); postParameters.add(new BasicNameValuePair("confidence", "0.2")); postParameters.add(new BasicNameValuePair("support", "20")); UrlEncodedFormEntity ent = null; try { ent = new UrlEncodedFormEntity(postParameters); } catch (UnsupportedEncodingException e1) { Logger.getRootLogger().error("Error:", e1); } Header[] headers = { new BasicHeader("Accept", "application/json") }; if (ent != null) { String resStr = ServiceQueries.httpPostRequest("http://theseus.dimis.fim.uni-passau.de:8061/rest/annotate", ent, headers); JSONObject resultJSON = null; JSONArray entities = null; try { resultJSON = new JSONObject(resStr); entities = resultJSON.getJSONArray("Resources"); Set<String> entitySet = new HashSet<String>(); List<String> entityList = new LinkedList<String>(); for (int i = 0; i < entities.length(); i++) { JSONObject obj = entities.getJSONObject(i); String e = obj.getString("@URI"); entitySet.add(e); entityList.add(e); } List<String> testList = new LinkedList<String>(); testList.addAll(entitySet); WebTypeRequest_Deprecated req = new WebTypeRequest_Deprecated(); req.setEntities(entitySet); final ObjectMapper mapper = new ObjectMapper(); String json = null; byte[] jsonByteString = null; try { json = mapper.writeValueAsString(req); jsonByteString = json.getBytes("UTF-8"); } catch (final JsonParseException e) { Logger.getRootLogger().error("Error:", e); } catch (final JsonMappingException e1) { Logger.getRootLogger().error(e1.getStackTrace()); } catch (final IOException e2) { Logger.getRootLogger().error(e2.getStackTrace()); } Header[] headersTypeQuery = { new BasicHeader("Accept", "application/json"), new BasicHeader("content-type", "application/json") }; ByteArrayEntity bytes = new ByteArrayEntity(jsonByteString, ContentType.create("application/json")); resStr = ServiceQueries.httpPostRequest(TYPEQUERYURL, bytes, headersTypeQuery); WebTypeResponse_Deprecated response = null; try { response = mapper.readValue(resStr, WebTypeResponse_Deprecated.class); } catch (final JsonParseException e) { Logger.getRootLogger().error(e.getStackTrace()); } catch (final JsonMappingException e1) { Logger.getRootLogger().error(e1.getStackTrace()); } catch (final IOException e2) { Logger.getRootLogger().error(e2.getStackTrace()); } Map<String, Set<String>> map = response.getTypes(); createDistribution(map, testList); } catch (JSONException e) { e.printStackTrace(); } } return pageTypes; } private void createDistribution(final Map<String, Set<String>> map, List<String> entities) { Map<String, Integer> distribution = new HashMap<String, Integer>(); for (String s : entities) { Set<String> set = map.get(s); for (String str : set) { if (distribution.containsKey(str)) { int i = distribution.get(str); distribution.put(str, ++i); } else { distribution.put(str, 1); } } } List<Map.Entry<String, Integer>> entries = HelpfulMethods.sortByValue(distribution); int topK = 0; for (Map.Entry<String, Integer> entry : entries) { if (topK < 100) { System.out.println(entry.getKey() + "\t" + entry.getValue()); } topK++; } } }