/* * Copyright (c) 2013, University of Toronto. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package edu.toronto.cs.xml2rdf.freebase; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import net.sf.json.JSON; import net.sf.json.JSONArray; import net.sf.json.JSONObject; import net.sf.json.JSONSerializer; import edu.toronto.cs.xml2rdf.interlink.Interlinker; import edu.toronto.cs.xml2rdf.string.StringMetric; import edu.toronto.cs.xml2rdf.string.StringUtils; public class FreeBaseLinkerOld implements Interlinker { boolean debug = true; final static String domainName = "www.freebase.com"; final static String SearchURL = "/api/service/search"; final static String queryParam = "query"; final static String freebaseTypePrefix = "http://rdf.freebase.com/rdf/"; @Override public Set<String> findTypesForResource(String str, StringMetric metric, double threshold) { try { // Create the freebase URL query string // Eric: IMPORTANT: THIS URL IS NO LONGER WORKING AND THE NEW URL REQUIRES AN API KEY. SEE THE FOLLOWING ADDRESS FOR DETAILS!!! // https://developers.google.com/freebase/v1/getting-started#/service/search // FIXME: The query URL below for freebase needs to be updated with API key. URL url = new URL("http://" + domainName + SearchURL + "?" + queryParam + "=" + str.replaceAll("\\s", "+").replaceAll("%", "").replaceAll("\"", "")); // Retrieve the content and make it into a JSON object String content = StringUtils.getContent((InputStream) url.getContent()); JSON obj = JSONSerializer.toJSON(content); if (obj.isEmpty()) { return null; } JSONObject jsonObject = (JSONObject) obj; // A set that hold all typeID strings that start with // the specified prefix Set<String> types = new HashSet<String>(); // Retrieve an array of elements whose key name is "result" JSONArray array = jsonObject.getJSONArray("result"); // Iterate through all elements in the array whose key name is "result" for (int i = 0; i < array.size(); i++) { // Get the element JSONObject resultElement = (JSONObject) array.get(i); // The boolean value to check if a element with name // similar to the provided text value has been found boolean same = false; // Get the "name" of the element String name = ""; try { name = resultElement.getString("name"); } catch (Exception e) { } // Check if the name and the provided text value is similar if (metric.getSimilarity(str, name) >= threshold) { same = true; } // If the name of the element is not similar, try to find // if a similar alias exists else { JSONArray aliases = resultElement.getJSONArray("alias"); for (int j = 0; j < aliases.size(); j++) { String alias = aliases.getString(j); if (metric.getSimilarity(str, alias) >= threshold) { same = true; break; } } } // If the current element has a name or an alias that is similar // to the provided text value if (same) { // Find the array of types of the element JSONArray typeArray = resultElement.getJSONArray("type"); // Iterate through each type for (int j = 0; j < typeArray.size(); j++) { // For each type, get its typeID, which looks something like "/music/release" String typeId = ((JSONObject) typeArray.get(j)).getString("id"); // Add the prefix to the typeID, which now looks something like // "http://rdf.freebase.com/rdf/music.release" typeId = freebaseTypePrefix + typeId.substring(1).replaceAll("/", "."); // Skip the current iteration if the typeID ends with "topic" if (typeId.endsWith("topic")) { continue; } // Add the typeID to types types.add(typeId); } } } return types; } catch (MalformedURLException e) { if (debug) { e.printStackTrace(); } } catch (IOException e) { if (debug) { e.printStackTrace(); } } return null; } @Override public String getLabelForResource(String uri) { // TODO Auto-generated method stub return null; } @Override public Set<String> findSameAsForResource(String str, StringMetric metric, double threshold, Set<String> types2) { Set<String> ret = new HashSet<String>(); try { URL url = new URL("http://" + domainName + SearchURL + "?" + queryParam + "=" + str.replaceAll("\\s", "+")); String content = StringUtils.getContent((InputStream) url.getContent()); JSON obj = JSONSerializer.toJSON(content); if (obj.isEmpty()) { return null; } JSONObject jsonObject = (JSONObject) obj; JSONArray array = jsonObject.getJSONArray("result"); for (int i = 0; i < array.size(); i++) { JSONObject resultElement = (JSONObject) array.get(i); boolean same = false; String name = ""; try { name = resultElement.getString("name"); } catch (Exception e) { } if (metric.getSimilarity(str, name) >= threshold) { same = true; } else { JSONArray aliases = resultElement.getJSONArray("alias"); for (int j = 0; j < aliases.size(); j++) { String alias = aliases.getString(j); if (metric.getSimilarity(str, alias) >= threshold) { same = true; break; } } } if (same) { boolean typeMatches = false; JSONArray typeArray = resultElement.getJSONArray("type"); for (int j = 0; j < typeArray.size(); j++) { String typeId = ((JSONObject) typeArray.get(j)).getString("id"); typeId = freebaseTypePrefix + typeId.substring(1).replaceAll("/", "."); if (typeId.endsWith("topic")) { continue; } if (types2.contains(typeId)) { typeMatches = true; break; } } if (typeMatches) { ret.add(freebaseTypePrefix + resultElement.getString("id").substring(1).replace('/', '.')); } } } } catch (MalformedURLException e) { if (debug) { e.printStackTrace(); } } catch (IOException e) { if (debug) { e.printStackTrace(); } } return ret; } public static void main(String[] args) { System.out.println(new FreeBaseLinkerOld().findTypesForResource("united states", null, 0)); } @Override public Map<String, Set<String>> findTypesForResources(List<String> str, StringMetric metric, double threshold) { // TODO Auto-generated method stub return null; } final private static int NUMBER_OF_CONCURRENT_THREADS = 6; @Override public Map<String, Set<String>> findSameAsForResources(List<String> str, StringMetric metric, double threshold, Set<String> types) { Map<String, Set<String>> resultMap = new HashMap<String, Set<String>>(); // TypeFetcher thread = new TypeFetcher(term, metric, threshold, resultMap ); // // thread. return resultMap; } class TypeFetcher extends Thread { String term; private StringMetric metric; private double threshold; private Map<String, Set<String>> resultMap; public TypeFetcher(String term, StringMetric metric, double threshold, Map<String, Set<String>> resultMap) { this.term = term; this.threshold = threshold; this.metric = metric; this.resultMap = resultMap; } @Override public void run() { Set<String> types = findTypesForResource(term, metric, threshold); resultMap.put(term, types); } } }