package lda.wikievidence.dataconstruction;
import hbase.operations.HBaseOperations;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
public class S3ConstructHBaseEntries {
public void initializeSurfaceFormDirections(String f) {
File file = new File(f);
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = null;
int counter = 0;
while ((line = reader.readLine()) != null) {
String splitter[] = line.split("\\t");
String mentions[] = splitter[1].replaceFirst("\\|", "").split("\\|");
String entity = splitter[0];
entity.replaceAll(".html", "");
counter++;
for (int i = 0; i < mentions.length; i++) {
String mentionSplit[] = mentions[i].split("---");
String sf = mentionSplit[0];
String context = mentionSplit[1];
sf = sf.toLowerCase().trim();
if(sf.length() > 2) {
HBaseOperations.getInstance().addRecord("LDADC_EntToSf", entity, "data", String.valueOf(context.hashCode()), sf);
HBaseOperations.getInstance().addRecord("LDADC_SFToEnt", sf, "data", String.valueOf(context.hashCode()), entity);
}
}
if(counter % 10000 == 0) {
System.out.println("Output: "+counter);
}
// counter++;
}
reader.close();
System.out.println(counter);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
}
public static void main(String[] args) {
S3ConstructHBaseEntries s = new S3ConstructHBaseEntries();
s.initializeSurfaceFormDirections(args[0]);
}
}