package org.gbif.occurrence.download.hive; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Map; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import freemarker.cache.ClassTemplateLoader; import freemarker.template.Configuration; import freemarker.template.Template; import freemarker.template.TemplateException; /** * Generates HQL scripts dynamically which are used to create the download HDFS tables, and querying when a user issues * a download request. * <p/> * Rather than generating HQL only at runtime, scripts are generated at build time using a maven * plugin, to aid testing, development and debugging. Freemarker is used as a templating language * to allow rapid development, but the sections which are verbose, and subject to easy typos are controlled * by enumerations in code. The same enumerations are used in many places in the codebase, including the * generation of HBase table columns themselves. */ public class GenerateHQL { private static final String CREATE_TABLES_DIR = "create-tables/hive-scripts"; private static final String DOWNLOAD_DIR = "download-workflow/dwca/hive-scripts"; private static final String SIMPLE_DOWNLOAD_DIR = "download-workflow/simple-csv/hive-scripts"; public static void main(String[] args) { try { Preconditions.checkState(1 == args.length, "Output path for HQL files is required"); File outDir = new File(args[0]); Preconditions.checkState(outDir.exists() && outDir.isDirectory(), "Output directory must exist"); // create the sub directories into which we will write File createTablesDir = new File(outDir, CREATE_TABLES_DIR); File downloadDir = new File(outDir, DOWNLOAD_DIR); File simpleDownloadDir = new File(outDir, SIMPLE_DOWNLOAD_DIR); createTablesDir.mkdirs(); downloadDir.mkdirs(); simpleDownloadDir.mkdirs(); Configuration cfg = new Configuration(); cfg.setTemplateLoader(new ClassTemplateLoader(GenerateHQL.class, "/templates")); // generates HQL for the coordinator jobs to create the tables to be queried generateHBaseTableHQL(cfg, createTablesDir); generateOccurrenceTableHQL(cfg, createTablesDir); // generates HQL executed at actual download time (tightly coupled to table definitions above, hence this is // co-located) generateQueryHQL(cfg, downloadDir); generateSimpleQueryHQL(cfg, simpleDownloadDir); } catch (Exception e) { // Hard exit for safety, and since this is used in build pipelines, any generation error could have // catastophic effects - e.g. partially complete scripts being run, and resulting in inconsistent // data. System.err.println("*** Aborting JVM ***"); System.err.println("Unexpected error building the templated HQL files. " + "Exiting JVM as a precaution, after dumping technical details."); e.printStackTrace(); System.exit(-1); } } /** * Generates HQL which create a Hive table on the HBase table. */ private static void generateHBaseTableHQL(Configuration cfg, File outDir) throws IOException, TemplateException { try (FileWriter out = new FileWriter(new File(outDir, "create-occurrence-hbase.q"))) { Template template = cfg.getTemplate("configure/create-occurrence-hbase.ftl"); Map<String, Object> data = ImmutableMap.<String, Object>of("fields", OccurrenceHBaseTableDefinition.definition()); template.process(data, out); } } /** * Generates HQL which is used to take snapshots of the HBase table, and creates an HDFS equivalent. */ private static void generateOccurrenceTableHQL(Configuration cfg, File outDir) throws IOException, TemplateException { try (FileWriter out = new FileWriter(new File(outDir, "create-occurrence-hdfs.q"))) { Template template = cfg.getTemplate("configure/create-occurrence-hdfs.ftl"); Map<String, Object> data = ImmutableMap.<String, Object>of("fields", OccurrenceHDFSTableDefinition.definition()); template.process(data, out); } } /** * Generates the Hive query file used for DwAc downloads. */ private static void generateQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException { try (FileWriter out = new FileWriter(new File(outDir, "execute-query.q"))) { Template template = cfg.getTemplate("download/execute-query.ftl"); Map<String, Object> data = ImmutableMap.<String, Object>of("verbatimFields", Queries.selectVerbatimFields(), "interpretedFields", Queries.selectInterpretedFields(false), "initializedInterpretedFields", Queries.selectInterpretedFields(true)); template.process(data, out); } } /** * Generates the Hive query file used for CSV downloads. */ private static void generateSimpleQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException { try (FileWriter out = new FileWriter(new File(outDir, "execute-simple-query.q"))) { Template template = cfg.getTemplate("simple-download/execute-simple-query.ftl"); Map<String, Object> data = ImmutableMap.<String, Object>of("fields", Queries.selectSimpleDownloadFields()); template.process(data, out); } } }