package tathya.semantics.datasource; import java.util.ArrayList; import java.util.List; import org.json.simple.JSONObject; import senna.NounPhraseExtractor; import senna.RunSenna; import tathya.db.YahooBOSS; import com.freebase.api.Freebase; import com.freebase.json.JSON; import cs224n.util.Counter; import cs224n.util.PriorityQueue; public class FreebaseWrapper { private static FreebaseWrapper instance = null; private Freebase fb = null; private FreebaseWrapper() { fb = Freebase.getFreebase(); fb.sign_in("tathya", "tathya"); } public static FreebaseWrapper getInstance() { if (instance == null) { instance = new FreebaseWrapper(); } return instance; } public List<JSON> getTypes(String query, double relevance) { JSON json = this.fb.search(query); //System.out.println(json); if((json.get("result")).array().size()==0) return null; List<JSON> typeEntities = ((json.get("result").get(0)).get("type")).array(); double rScore = (Double) json.get("result").get(0).get("relevance:score").value(); if(rScore > relevance) { return typeEntities; } return null; } @SuppressWarnings("unchecked") public List<JSON> getTypes(JSON entity) { // if((entity.get("types")).array().size()==0) // return null; List<JSON> types = (entity.get("type")).array(); // double rScore = (Double) // entity.get("result").get(0).get("relevance:score").value(); // if(rScore > relevance) { return types; // } // return null; } public List<JSON> getEntities(String query, double relevance) { // JSON options = JSON.o("limit", "50"); JSON json = this.fb.search(query); System.out.println(json); if ((json.get("result")).array().size() == 0) return null; List<JSON> entities = new ArrayList<JSON>(); for (int i = 0; i < json.get("result").array().size(); i++) { String t = json.get("result").get(i).get("id").toString(); t = t.replace("\"", ""); t = t.replace("\\", ""); t = t.replace("_", " "); if (t.matches("^.user.*") || t.matches("^.base.*") || t.matches("^.common.*") || t.matches("^.m/.*") || t.matches("^.soft/.*")) { continue; } entities.add((JSON) json.get("result").get(i)); } double rScore = (Double) json.get("result").get(0) .get("relevance:score").value(); if (rScore > relevance) { return entities; } return null; } public List<JSON> getRankedEntities(String entityStr, double relevance, String context) { // List<String> contextPhrases = com.personalityextractor.entity.extractor.NounPhraseExtractor // .extract(context); List<String> contextPhrases = null; ArrayList<JSON> rankedEntities = new ArrayList<JSON>(); List<JSON> entities = getEntities(entityStr, relevance); PriorityQueue<JSON> queue = new PriorityQueue<JSON>(); StringBuffer contextQuery = new StringBuffer(); for (String c : contextPhrases) { contextQuery.append("\"" + c + "\"" + " "); } int entityCount = YahooBOSS.makeQuery('"' + entityStr + '"'); System.out.println(contextQuery.toString()); if (entities != null) { for (JSON entity : entities) { String t = (String) entity.get("id").value(); t = t.replace("\"", ""); t = t.replace("\\", ""); t = t.replace("_", " "); String[] tokens = t.split("/"); int count = YahooBOSS.makeQuery("\"" + tokens[tokens.length - 1] + "\"" + " \"" + (String) entity.get("name").value() + " " + contextQuery.toString()); queue.add(entity, ((double) count / (double) entityCount)); // System.out.println(t + "\t" + ((double) count/(double) // entityCount)); } } while (queue.hasNext()) { rankedEntities.add(queue.next()); } return rankedEntities; } public List<String> getRankedTypes(JSON entity) { return getRankedTypes(entity, null); } /* * Sort the freebase types in descending order of relevance */ public List<String> getRankedTypes(String entity, double relevance) { ArrayList<String> rankedTypes = new ArrayList<String>(); List<JSON> types = getTypes(entity, relevance); PriorityQueue<String> queue = new PriorityQueue<String>(); int entityCount = YahooBOSS.makeQuery('"' + entity + '"'); //System.out.println(entityCount); if(types != null) { for(JSON type : types) { String t = type.get("id").toString(); t = t.replace("\"", ""); t = t.replace("\\", ""); t = t.replace("_", " "); if(t.matches("^.user.*") || t.matches("^.base.*") || t.matches("^.common.*") || t.matches("^.m/.*")) { continue; } String[] tokens = t.split("/"); StringBuffer query = new StringBuffer(entity); String origQuery = query.toString(); for(String tok : tokens) { if(tok.equalsIgnoreCase("")) { continue; } query.append(" \"" + tok + "\""); } int count = YahooBOSS.makeQuery(query.toString()); queue.add(query.toString().substring(origQuery.length(), query.length()).trim(), ((double) count/(double) entityCount)); //System.out.println(t + "\t" + ((double) count/(double) entityCount)); } } while(queue.hasNext()){ rankedTypes.add(queue.next()); } return rankedTypes; } public List<String> getRankedTypes(JSON entity, String context) { // List<String> contextPhrases = com.personalityextractor.entity.extractor.NounPhraseExtractor.extract(context); List<String> contextPhrases = null; ArrayList<String> rankedTypes = new ArrayList<String>(); List<JSON> types = getTypes(entity); PriorityQueue<String> queue = new PriorityQueue<String>(); StringBuffer contextQuery = new StringBuffer(); for (String c : contextPhrases) { contextQuery.append("\"" + c + "\"" + " "); } int entityCount = YahooBOSS.makeQuery('"' + (String) entity.get("name") .value() + '"'); // System.out.println(contextQuery.toString()); if (types != null) { for (JSON type : types) { String t = type.get("id").toString(); t = t.replace("\"", ""); t = t.replace("\\", ""); t = t.replace("_", " "); if (t.matches("^.user.*") || t.matches("^.base.*") || t.matches("^.common.*") || t.matches("^.m/.*")) { continue; } String[] tokens = t.split("/"); StringBuffer query = new StringBuffer((String) entity.get( "name").value()); for (String tok : tokens) { if (tok.equalsIgnoreCase("")) { continue; } query.append(" \"" + tok + "\""); } int count = YahooBOSS.makeQuery(query.toString() + " " + contextQuery.toString()); queue.add(query.toString(), ((double) count / (double) entityCount)); // System.out.println(t + "\t" + ((double) count/(double) // entityCount)); } } while (queue.hasNext()) { rankedTypes.add(queue.next()); } return rankedTypes; } public static void main(String[] args) { FreebaseWrapper fb = FreebaseWrapper.getInstance(); String query = "giants"; String context1 = "giants are a great baseball team"; String context2 = "giants are a great football team"; List<JSON> entities = fb.getRankedEntities(query, 0, context1); for (int i = 0; i < entities.size(); i++) { JSON entity = entities.get(i); System.out.println("Entity:\t" + (String) entity.get("name").value() + "\t" + (String) entity.get("id").value()); List<String> types = fb.getRankedTypes(entity, context1); for(String type : types) { System.out.println(type); } if(i == 3) { break; } System.out.println("\n--------------------------------\n"); } System.out.println(" \n\n################################\n\n "); entities = fb.getRankedEntities(query, 0, context2); for (int i = 0; i < entities.size(); i++) { JSON entity = entities.get(i); System.out.println("Entity:\t" + (String) entity.get("name").value() + "\t" + (String) entity.get("id").value()); List<String> types = fb.getRankedTypes(entity, context2); for(String type : types) { System.out.println(type); } if(i == 3) { break; } System.out.println("\n--------------------------------\n"); } } }