package lda.wikievidence.dataconstruction;
import hbase.operations.HBaseOperations;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
public class S3ConstructHBaseContext {
public void createContextEntries(String f) {
File file = new File(f);
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = null;
int counter = 0;
while ((line = reader.readLine()) != null) {
String splitter[] = line.split("\\t");
String mentions[] = splitter[1].replaceFirst("\\|", "").split("\\|");
String entity = splitter[0];
entity.replaceAll(".html", "");
counter++;
for (int i = 0; i < mentions.length; i++) {
String mentionSplit[] = mentions[i].split("---");
String sf = mentionSplit[0];
String context = mentionSplit[1];
sf = sf.toLowerCase().trim();
if(sf.length() > 2) {
HBaseOperations.getInstance().addRecord("LDADC_Context", entity, "data", String.valueOf(context.hashCode()), context);
}
}
if(counter % 1000 == 0) {
System.out.println(counter);
}
}
reader.close();
System.out.println(counter);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
}
public static void main(String[] args) {
S3ConstructHBaseContext s = new S3ConstructHBaseContext();
s.createContextEntries(args[0]);
}
}