package focusedCrawler.tools; import java.io.PrintStream; import focusedCrawler.link.frontier.Frontier; import focusedCrawler.link.frontier.LinkRelevance; import focusedCrawler.util.CliTool; import focusedCrawler.util.persistence.TupleIterator; import io.airlift.airline.Command; import io.airlift.airline.Option; @Command(name="PrintFrontierLinksToFile") public class PrintFrontierLinksToFile extends CliTool { @Option(name="--input-data-path", description="Path to ACHE data target folder", required=true) private String inputPath; @Option(name="--output-file", description="The output file", required=false) private String outputFile; public static void main(String[] args) throws Exception { CliTool.run(args, new PrintFrontierLinksToFile()); } @Override public void execute() throws Exception { if(outputFile == null) { printLinks(System.out); } else { try(PrintStream out = new PrintStream(outputFile)) { printLinks(out); } } } private void printLinks(PrintStream out) throws Exception { Frontier frontier = new Frontier(inputPath, 1000); try (TupleIterator<LinkRelevance> it = frontier.iterator()) { while (it.hasNext()) { LinkRelevance link = it.next().getValue(); out.printf("%.5f %s\n", link.getRelevance(), link.getURL().toString()); } } frontier.close(); } }