package org.curiosity; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.apache.commons.cli.*; import org.curiosity.concept.Camera; import org.curiosity.concept.Image; import org.curiosity.crawl.ImageCrawler; import org.curiosity.publish.MySqlPublisher; import org.curiosity.publish.Publisher; import org.curiosity.publish.PublisherType; import org.curiosity.publish.SysOutPublisher; import org.curiosity.util.DatabaseInvariants; import java.sql.Connection; import java.sql.SQLException; import java.util.Arrays; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; /** * Starter class for Curiosity rover image crawling. * * See {@link #options} for command line options information. * * @author jherwitz */ public class CuriosityImageCrawlerStarter { // the latest sol for which any camera has images // hardcoded as pages with invalid sols still 200, and we don't want to hammer the site // we should be able to calculate or scrape for it though private static final int maxSol = 854; public static void main(String[] args) throws Throwable { CommandLineParser cli = new BasicParser(); Options options = options(); /** * Parse command line args. */ CommandLine cmd = cli.parse(options, args); if (cmd.hasOption("h")) { System.out.println("----------------------------------------------"); System.out.println("Curiosity image crawler options:\n"); System.out.println(options); System.out.println("----------------------------------------------"); System.exit(0); } /** * Parse the crawl range. */ SolDelta solDelta = null; if (cmd.hasOption("s")) { String[] parts = cmd.getOptionValue("s").split(","); if (parts.length != 2) { fail(options.getOption("s")); } int startSol = Integer.parseInt(parts[0]); int endSol = Integer.parseInt(parts[1]); solDelta = new SolDelta(startSol, endSol); } else { fail(options.getOption("s")); } /** * Parse the {@link Camera}s to crawl. */ Set<Camera> cameras = null; if (cmd.hasOption("c")) { if (cmd.getOptionValues("c").length > 1) { fail(options.getOption("c")); } String commafied = cmd.getOptionValue("c"); cameras = Arrays.asList(commafied.split(",")).stream().map(Camera::valueOf).collect(Collectors.toSet()); } else { fail(options.getOption("c")); } /** * Parse the {@link Publisher} to use. */ Publisher publisher = null; if (cmd.hasOption("p")) { if (cmd.getOptionValues("p").length > 1) { fail(options.getOption("p")); } PublisherType publisherType = PublisherType.valueOf(cmd.getOptionValue("p")); switch (publisherType) { case SysOut: publisher = SysOutPublisher.Instance; break; case MySql: if (!cmd.hasOption("user") || !cmd.hasOption("pass") || !cmd.hasOption("jdbc")) { System.err.println("Database values (user, pass, jdbc) not set"); fail(options); } /** * Initialize the database connection. */ String username = cmd.getOptionValue("user"); String password = cmd.getOptionValue("pass"); String jdbc = cmd.getOptionValue("jdbc"); Connection conn = DatabaseInvariants.newConnection(username, password, jdbc); Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { conn.close(); } catch (SQLException e) { throw new RuntimeException(e); } } }); publisher = new MySqlPublisher(conn); break; default: throw new IllegalArgumentException("Unrecognized publisher type: " + publisherType); } } else { fail(options.getOption("c")); } start(solDelta, cameras, publisher); } /** * Kick off the crawl and publish the results. */ private static void start(SolDelta solDelta, Set<Camera> cameras, Publisher publisher) { Preconditions.checkNotNull(solDelta, "solDelta not null"); Preconditions.checkArgument(cameras != null && !cameras.isEmpty(), "cameras not null or empty"); Preconditions.checkNotNull(publisher, "publisher not null"); ImageCrawler crawler = new ImageCrawler(); System.out.println("Starting image crawl..."); ImmutableList.Builder<Image> images = ImmutableList.builder(); cameras.parallelStream() .forEach(camera -> IntStream.range(solDelta.startSol(), solDelta.endSol()) .forEach(sol -> { System.out.printf("Crawling sol:%d camera:%s\n", sol, camera); try { publisher.publishImages(crawler.crawl(sol, camera)); } catch (Throwable t) { t.printStackTrace(); } })); System.out.println("Crawl completed!"); } /** * Command line arguments for {@link CuriosityImageCrawlerStarter}. */ private static Options options() { Options options = new Options(); options.addOption("s", "sols", true, "The interval of sols to pull images for. Syntax: \" -s <START_SOL>,<END_SOL> \". Must be less than " + maxSol + "."); options.addOption("c", "cameras", true, "Space-less commafied list of cameras to pull images for. Possible values: " + Arrays.toString(Camera.values())); options.addOption("p", "publisher", true, "The publisher to report pulled images to. Possible values: " + Arrays.toString(PublisherType.values())); options.addOption("h", "help", false, "Print help text. Supersedes other options."); // the following options are applicable to options.addOption("user", true, "Database username."); options.addOption("pass", true, "Database password."); options.addOption("jdbc", true, "JDBC connection url for the database"); return options; } /** * Fatal due to a malformed {@link Option}. */ private static void fail(Option o) { System.err.println("Required option not properly specified: " + o); System.exit(1); } /** * Fatal due to malformed {@link Options}. */ private static void fail(Options o) { System.err.println("Required options not properly specified: " + o); System.exit(1); } /** * A delta of two sols. Used as a range to crawl. */ private static class SolDelta { private final int startSol; private final int endSol; public SolDelta(int startSol, int endSol) { if (startSol > endSol || startSol < 0 || endSol < 0 || startSol > maxSol || endSol > maxSol) { throw new IllegalArgumentException(String.format( "Sol values invalid: startSol %d: endSol: %d. Must satisfy startSol <= endSol, where startSol >- 0 and endSol >= 0", startSol, endSol)); } this.startSol = startSol; this.endSol = endSol; } public int startSol() { return startSol; } public int endSol() { return endSol; } } }