package experiments.collective.entdoccentric;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Random;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class PriorLoader {
private static HashMap<Integer, Integer> priorHashMap;
private static HashMap<Integer, HashMap<Integer, Integer>> sensePriorHashMap;
public static void initializeDisambiguationFramework() {
createPriorHashMap();
}
private static void createPriorHashMap() {
priorHashMap = new HashMap<Integer, Integer>();
sensePriorHashMap = new HashMap<Integer, HashMap<Integer, Integer>>();
// File file = new File(Properties.getInstance().getEntityCentricKBLocation());
File file = new File("/home/quh/Arbeitsfläche/Entpackung/Arbeitsfläche/Code_Data/LuceneCorpora/Lucene 4.1/NoStemmingKnowledgeBaseCalbCSmallBackup/");
int overallAnnos = 0;
try {
Directory dir = FSDirectory.open(file);
IndexReader iReader = DirectoryReader.open(dir);
int maxDoc = iReader.numDocs();
for (int i = 0; i < maxDoc; i++) {
if (i % 50000 == 0) {
System.out.println("Loaded Entities: "+i);
}
String val = iReader.document(i).get("occurences");
if (val != null && !val.equalsIgnoreCase("")) {
String[] splitter = val.split(";;;");
int priorVal = 0;
HashMap<Integer, Integer> hash = new HashMap<Integer, Integer>();
for (int j = 0; j < splitter.length; j++) {
String[] splitter1 = splitter[j].split(":::");
int check = 1;
try {
check = Integer.valueOf(splitter1[1]);
} catch (NumberFormatException e) {
}
int newnr = generateNr(check);
priorVal += newnr;
overallAnnos += newnr;
hash.put(splitter1[0].hashCode(), newnr);
}
priorHashMap.put(i, priorVal);
//
// for (int j = 0; j < splitter.length; j++) {
// String[] value = splitter[j].split(":::");
// int check = 1;
// try {
// check = Integer.valueOf(value[1]);
// } catch (NumberFormatException e) {
// }
// int newnr = generateNr(check);
// hash.put(value[0].hashCode(), newnr);
// }
sensePriorHashMap.put(i, hash);
}
}
iReader.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("OverallAnnos: "+overallAnnos);
}
private static int generateNr(int basicNr) {
Random random = new Random();
int j = 0;
for (int i = 1; i <= basicNr; i++) {
int ran = random.nextInt();
if(ran % 25 == 0) {
j++;
}
}
return j;
}
public static float getPriorOfDocument(int docId) {
if(!priorHashMap.containsKey(docId)) {
return 0;
} else {
float prior = (float) Math.log(priorHashMap.get(docId));
return prior;
}
}
public static float getSensePriorOfDocument(int docId, String keyword) {
if(!sensePriorHashMap.containsKey(docId)) {
return 0;
} else {
HashMap<Integer, Integer> hash = sensePriorHashMap.get(docId);
if (!hash.containsKey(keyword.toLowerCase().hashCode())) {
return 0;
}
int value = hash.get(keyword.toLowerCase().hashCode());
float prior = (float) Math.log(value);
return prior;
}
}
}