import com.freebase.api.Freebase;
import com.freebase.json.JSON;
import com.knowledgebooks.rdf.RdfServiceProxy;
import com.knowledgebooks.rdf.SesameEmbeddedProxy;
import java.io.PrintWriter;
import java.util.*;
/**
* Copyright Mark Watson 2008-2010. All Rights Reserved.
* License: LGPL version 3 (http://www.gnu.org/licenses/lgpl-3.0.txt)
*/
public class EntityToRdfHelpersFreebase {
public static void processPeople(PrintWriter out, String url, String text, String name_type, List<String> name_list, List<String> possible_search_terms) {
Freebase freebase = Freebase.getFreebase();
for (String name : name_list) {
boolean keep_processing = true;
int num_to_take = Math.min(10, possible_search_terms.size());
loop:
while (keep_processing && num_to_take > 2) {
for (int cycle = 0; cycle < 5; cycle++) {
String query = name;
List<String> search_terms = take(possible_search_terms, num_to_take);
for (String key_word : search_terms) {
if (query.indexOf(key_word) == -1)
query += " " + key_word;
}
System.out.println(" * query: " + query);
JSON results = null;
if (name_type.equals("person"))
results = freebase.search(query, new HashMap<String, String>()).get("result");
else
results = freebase.geosearch(query, new HashMap<String, String>()).get("result");
System.out.println("Freebase search results: " + results);
int num_results = results.length(); // I added this API
if (num_results > 0) {
System.out.println(" * next result: " + results.get(0));
System.out.println(" * next relevance score: " + results.get(0).get("relevance:score"));
}
if (num_results > 0) {
System.out.println(" * * * result: " + results.get(0));
String id = "" + results.get(0).get("id");
System.out.println(" * * * id: " + id);
Object relevance = results.get(0).get("relevance:score");
if (relevance != null && Float.parseFloat("" + relevance) > 1.5) {
String blank_node = blankNodeURI(name_type);
out.println("<" + url + "> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://knowledgebooks.com/rdf/webpage> .");
out.println("<" + url + "> <http://knowledgebooks.com/rdf/contents> \"" + text.replaceAll("\"", "'") + "\" .");
out.println("<" + url + "> <http://knowledgebooks.com/rdf/discusses/" + name_type + "> " + blank_node + " .");
out.println("\n\n" + blank_node +
" <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://knowledgebooks.com/rdfs/entity/" + name_type + "> .");
out.println("\n\n" + blank_node +
" <http://xmlns.com/foaf/0.97/name> \"" + name + "\" .");
out.println("\n\n" + blank_node +
" <http://knowledgebooks.com/rdf/freebase/id> \"" + id.substring(1) + "\" .");
break loop;
}
}
// Freebase terms of service: no more that 100,000 web service calls per day.
// This is at a rate of 1.157 calls per second, so:
try {
Thread.sleep(864);
} catch (Exception ignore) {
} // wait 0.846 seconds between search requests
}
num_to_take--;
}
}
}
public static void processPlaces(PrintWriter out, String url, String name_type, List<String> name_list) {
FreebaseToRdf freebase = new FreebaseToRdf();
for (String name : name_list) {
LatLon latlon = freebase.geoSearchGetLatLon(name);
System.out.println("Freebase search " + name + ", lat/lon results: " + latlon);
if (latlon != null) {
String blank_node = blankNodeURI(name_type);
out.println("<" + url + "> <http://knowledgebooks.com/rdf/discusses/" + name_type + "> " + blank_node + " .");
out.println(blank_node + " <http://knowledgebooks.com/rdf/name/> \"" + name + "\" .");
String literal = "" + rdfServiceProxy.latLonToLiteral(latlon.lat, latlon.lon);
out.println(blank_node + " <http://knowledgebooks.com/rdf/location/> \"" + literal + "\" .");
}
// This is at a rate of 1.157 calls per second, so:
try {
Thread.sleep(864);
} catch (Exception ignore) {
} // wait 0.846 seconds between search requests
}
}
private static String blankNodeURI(String name_type) {
return "_:fb" + name_type + blank_node_count++;
}
static long blank_node_count = 1;
private static List<String> take(List<String> names, int num_to_take) {
int size = names.size(), index = 0;
List<String> ret = new ArrayList<String>(size);
for (int i = 0; i < num_to_take; i++) {
loop:
for (int attempt = 0; attempt < 10; attempt++) {
index = (int) (0.99 * Math.random() * size);
if (!noise.contains(names.get(index).toLowerCase()) && !ret.contains(names.get(index))) {
ret.add(names.get(index));
break loop;
}
}
}
return ret;
}
static Set<String> noise = new HashSet<String>();
static {
noise.add("document");
noise.add("formats");
noise.add("company");
noise.add("text");
noise.add("system");
noise.add("product");
noise.add("documents");
noise.add("services");
noise.add("technology");
noise.add("technologies");
noise.add("implementing");
noise.add("implement");
noise.add("language");
noise.add("manage");
noise.add("management");
noise.add("research");
noise.add("library");
noise.add("libraries");
noise.add("language");
noise.add("languages");
noise.add("implement");
noise.add("implements");
noise.add("standard");
noise.add("standards");
noise.add("project");
noise.add("projects");
}
static RdfServiceProxy rdfServiceProxy = null; // this is to get Lat/Lon RDF value
static {
try {
rdfServiceProxy = new SesameEmbeddedProxy();
rdfServiceProxy.createRepository("test-repo1"); // must have a repository open
} catch (Exception ex) {
ex.printStackTrace();
}
}
}