package features;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import senna.NounPhraseExtractor;
import senna.RunSenna;
import tathya.semantics.datasource.FreebaseWrapper;
import com.freebase.json.JSON;
import cs224n.util.Counter;
public class FreebaseFeatures implements IFeatures {
NounPhraseExtractor nounPhraseExtractor = new NounPhraseExtractor();
public void getTopFeatures(){
Counter<String> fbClasses = new Counter<String>();
Counter<String> entitiesCounter = new Counter<String>();
RunSenna rs = new RunSenna();
HashMap<String, HashSet<String>> classToEntities = new HashMap<String, HashSet<String>>();
HashMap<String, HashSet<String>> entityToClasses = new HashMap<String, HashSet<String>>();
// ArrayList<String> entities = npExtractor.getNounPhrases(sennaOut);
// //ArrayList<String> features = new ArrayList<String>();
// for(String entity : entities){
// entitiesCounter.incrementCount(entity, 1.0);
// List<JSON> types = fb.getTypes(entity,70);
// if(types != null) {
// for(JSON type : types) {
// if(type.get("id")==null)
// continue;
// String ID = type.get("id").string().trim();
// for (String fbType : ID.split("/")) {
// if(!fb.domains.contains(fbType))
// continue;
// fbClasses.incrementCount(fbType, 1.0);
// if (classToEntities.containsKey(fbType)) {
// classToEntities.get(fbType).add(entity);
// } else {
// HashSet<String> entitiesInText = new HashSet<String>();
// entitiesInText.add(entity);
// classToEntities.put(fbType,
// entitiesInText);
// }
// if (entityToClasses.containsKey(entity)) {
// entityToClasses.get(entity).add(fbType);
// } else {
// HashSet<String> classesInText = new HashSet<String>();
// classesInText.add(fbType);
// entityToClasses.put(entity,
// classesInText);
// }
// }
// }
// }
// }
}
public List<String> getFeatures(String text){
ArrayList<String> freebaseFeatures = new ArrayList<String>();
FreebaseWrapper fb = FreebaseWrapper.getInstance();
//call senna
RunSenna rs = new RunSenna();
String sennaOut = rs.getSennaOutput(text.trim());
//get Noun Phrases
ArrayList<String> nounPhrases = nounPhraseExtractor.getNounPhrases(sennaOut);
for (String entity : nounPhrases) {
List<JSON> types = fb.getTypes(entity, 70);
if (types != null) {
for (JSON type : types) {
if (type.get("id") == null)
continue;
String ID = type.get("id").string().trim();
for (String fbType : ID.split("/")) {
if (!fb.domains.contains(fbType))
continue;
freebaseFeatures.add(fbType);
}
}
}
}
return freebaseFeatures;
}
}