package lda.wikievidence.dataconstruction; import hbase.operations.HBaseOperations; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; public class S4CreateCircles { public static final int MAXDEPTH = 1; public static final int MAXSFTOENTS = 50; public void processMain(String f, String outputFile) { File file = new File(f); try { PrintWriter writer = new PrintWriter(new FileWriter(new File( outputFile), true)); BufferedReader reader = new BufferedReader(new FileReader(file)); String line = null; int processed = 0; while ((line = reader.readLine()) != null) { String splitter[] = line.split("\\t"); String entityName = splitter[0]; HashSet<String> circleEntities = new HashSet<String>(); circleEntities.add(entityName); discoverNewEntities(entityName, circleEntities, 0); List<String> list = new ArrayList<String>(circleEntities); sortList(list); StringBuffer buffer = new StringBuffer(); for (String s : list) { buffer.append(s + "|"); } String out = buffer.toString(); out = out.substring(0, out.length() - 1); writer.println(out); // if (list.size() > 1) { // System.out.println(out); // } processed++; if (processed % 150 == 0) { System.out.println("processed: " + processed); } } reader.close(); writer.close(); } catch (IOException e) { e.printStackTrace(); } } private static void sortList(List<String> aItems) { Collections.sort(aItems); } private void discoverNewEntities(String entity, HashSet<String> discoveredEntities, int depth) throws IOException { if (depth >= MAXDEPTH) { return; } boolean discovery = false; Set<String> sfs = new HashSet<String>(); HBaseOperations.getInstance().getRow("LDADC_EntToSf", entity, "data", sfs, -1); // ArrayList<String> sfs = HBaseOperations.getRow("input", entity, // "data"); for (String s : sfs) { Set<String> ents = new HashSet<String>(); HBaseOperations.getInstance().getRow("LDADC_SFToEnt", s, "data", ents, -1); // ArrayList<String> ents = HBaseOperations // .getRow("output", s, "data"); // for (String sf : ents) { // System.out.println("HBASE LIEFERT MIR DANACH: " + sf); // } if (ents.size() < MAXSFTOENTS) { for (String str : ents) { if (!discoveredEntities.contains(str)) { discoveredEntities.add(str); discovery = true; } if (discovery) { int i = depth + 1; discoverNewEntities(str, discoveredEntities, i); discovery = false; } } } } } public static void main(String[] args) { S4CreateCircles s4 = new S4CreateCircles(); s4.processMain(args[0], args[1]); } }