/*
* Copyright (c) 2013, University of Toronto.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package edu.toronto.cs.xml2rdf.freebase;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import edu.toronto.cs.xml2rdf.interlink.Interlinker;
import edu.toronto.cs.xml2rdf.string.StringMetric;
import edu.toronto.cs.xml2rdf.utils.LogUtils;
public class FreeBaseLinker implements Interlinker {
boolean debug = true;
final static String freebaseTypePrefix = "http://rdf.freebase.com/rdf/";
@Override
public Set<String> findTypesForResource(String str, StringMetric metric, double threshold) {
LogUtils.info(this.getClass(), "str=" + str);
Set<String> types = new HashSet<String>();
String query = str.replaceAll("\\s", "+").replaceAll("%", "").replaceAll("\"", "");
final JSONArray json = FreebaseUtil.search(query, "resources/freebase/type_query.json", 5);
if (json == null) {
return null;
}
// Iterate through all elements in the array whose key name is "result"
for (int i = 0; i < json.size(); i++) {
// Get the element
JSONObject resultElement = (JSONObject) json.get(i);
// The boolean value to check if a element with name
// similar to the provided text value has been found
boolean same = false;
// Get the "name" of the element
String name = "";
try {
name = resultElement.getString("name");
} catch (Exception e) {
}
// Check if the name and the provided text value is similar
if (metric.getSimilarity(str, name) >= threshold) {
same = true;
} // If the name of the element is not similar, try to find
// if a similar alias exists
else {
JSONArray aliases = resultElement.getJSONArray("/common/topic/alias");
for (int j = 0; j < aliases.size(); j++) {
String alias = aliases.getString(j);
if (metric.getSimilarity(str, alias) >= threshold) {
same = true;
break;
}
}
}
// If the current element has a name or an alias that is similar
// to the provided text value
if (same) {
// Find the array of types of the element
JSONArray typeArray = resultElement.getJSONArray("type");
// Iterate through each type
for (int j = 0; j < typeArray.size(); j++) {
// For each type, get its typeID, which looks something like "/music/release"
String typeId = (((JSONObject) typeArray.get(j)).getString("id"));
// Add the prefix to the typeID, which now looks something like
// "http://rdf.freebase.com/rdf/music.release"
typeId = freebaseTypePrefix + typeId.substring(1).replaceAll("/", ".");
// Skip the current iteration if the typeID ends with "topic"
if (typeId.endsWith("topic")) {
continue;
}
// Add the typeID to types
types.add(typeId);
}
}
}
return types;
}
@Override
public String getLabelForResource(String uri) {
// TODO Auto-generated method stub
return null;
}
@Override
public Set<String> findSameAsForResource(String str, StringMetric metric,
double threshold, Set<String> types2) {
Set<String> ret = new HashSet<String>();
String query = str.replaceAll("\\s", "+").replaceAll("%", "").replaceAll("\"", "");
final JSONArray json = FreebaseUtil.search(query, "resources/freebase/type_query.json", 5);
for (int i = 0; i < json.size(); i++) {
JSONObject resultElement = (JSONObject) json.get(i);
boolean same = false;
String name = "";
try {
name = resultElement.getString("name");
} catch (Exception e) {
}
if (metric.getSimilarity(str, name) >= threshold) {
same = true;
} else {
JSONArray aliases = resultElement.getJSONArray("alias");
for (int j = 0; j < aliases.size(); j++) {
String alias = aliases.getString(j);
if (metric.getSimilarity(str, alias) >= threshold) {
same = true;
break;
}
}
}
if (same) {
boolean typeMatches = false;
JSONArray typeArray = resultElement.getJSONArray("type");
for (int j = 0; j < typeArray.size(); j++) {
String typeId = ((JSONObject) typeArray.get(j)).getString("id");
typeId = freebaseTypePrefix + typeId.substring(1).replaceAll("/", ".");
if (typeId.endsWith("topic")) {
continue;
}
if (types2.contains(typeId)) {
typeMatches = true;
break;
}
}
if (typeMatches) {
ret.add(freebaseTypePrefix + resultElement.getString("id").substring(1).replace('/', '.'));
}
}
}
return ret;
}
public static void main(String[] args) {
System.out.println(new FreeBaseLinker().findTypesForResource("united states", null, 0));
}
@Override
public Map<String, Set<String>> findTypesForResources(List<String> str,
StringMetric metric, double threshold) {
// TODO Auto-generated method stub
return null;
}
final private static int NUMBER_OF_CONCURRENT_THREADS = 6;
@Override
public Map<String, Set<String>> findSameAsForResources(List<String> str,
StringMetric metric, double threshold, Set<String> types) {
Map<String, Set<String>> resultMap = new HashMap<String, Set<String>>();
// TypeFetcher thread = new TypeFetcher(term, metric, threshold, resultMap );
//
// thread.
return resultMap;
}
class TypeFetcher extends Thread {
String term;
private StringMetric metric;
private double threshold;
private Map<String, Set<String>> resultMap;
public TypeFetcher(String term, StringMetric metric, double threshold, Map<String, Set<String>> resultMap) {
this.term = term;
this.threshold = threshold;
this.metric = metric;
this.resultMap = resultMap;
}
@Override
public void run() {
Set<String> types = findTypesForResource(term, metric, threshold);
resultMap.put(term, types);
}
}
}