package io.lumify.palantir; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameters; import com.google.common.base.Joiner; import io.lumify.core.util.LumifyLogger; import io.lumify.core.util.LumifyLoggerFactory; import io.lumify.palantir.model.PtNoteAndNoteValue; import io.lumify.palantir.ontologyToOwl.OntologyToOwl; import io.lumify.palantir.service.*; import io.lumify.palantir.sqlrunner.SqlRunner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.securegraph.util.ConvertingIterable; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; @Parameters(separators = "=", commandDescription = "Exports Palantir Oracle database to hadoop SequenceFiles") public class DataToSequenceFile implements Exporter.ExporterSource { public static final String ONTOLOGY_XML_DIR_NAME = "OntologyXML"; private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(DataToSequenceFile.class); @Parameter(names = {"-n", "--namespace"}, description = "Table namespace", required = true) private String tableNamespace; @Parameter(names = {"-c", "--connectionstring"}, description = "Database connection string", required = true) private String databaseConnectionString; @Parameter(names = {"-u", "--username"}, description = "Database username", required = true) private String databaseUsername; @Parameter(names = {"-p", "--password"}, description = "Database password", required = true) private String databasePassword; @Parameter(names = {"-d", "--dest"}, description = "Destination path hadoop url", required = true) private String destination; @Parameter(names = {"--baseiri"}, description = "base IRI for ontology", required = true) private String baseIri; @Parameter(names = {"--exporters"}, description = "comma separated list of exporters (default: all)") private String exporters = "all"; private Path destinationPath; private FileSystem fs; private SqlRunner sqlRunner; private Configuration hadoopConfiguration; public static final List<Exporter> EXPORTERS = new ArrayList<>(); static { EXPORTERS.add(new PtObjectTypeExporter()); EXPORTERS.add(new PtPropertyTypeExporter()); EXPORTERS.add(new PtLinkTypeExporter()); EXPORTERS.add(new PtTypeGroupExporter()); EXPORTERS.add(new PtNodeDisplayTypeExporter()); EXPORTERS.add(new PtImageInfoExporter()); EXPORTERS.add(new PtOntologyResourceExporter()); EXPORTERS.add(new PtLinkRelationExporter()); EXPORTERS.add(new PtUserExporter()); EXPORTERS.add(new PtGraphExporter()); EXPORTERS.add(new PtObjectExporter()); EXPORTERS.add(new PtGraphObjectExporter()); EXPORTERS.add(new PtObjectObjectExporter()); EXPORTERS.add(new PtMediaAndValueExporter()); EXPORTERS.add(new PtPropertyAndValueExporter()); EXPORTERS.add(new PtNoteAndNoteValueExporter()); } public DataToSequenceFile(String[] args) { new JCommander(this, args); EXPORTERS.add(new OntologyToOwl(baseIri)); if (exporters.equalsIgnoreCase("all")) { exporters = Joiner.on(',').join(new ConvertingIterable<Exporter, String>(EXPORTERS) { @Override protected String convert(Exporter o) { return o.getObjectClass().getSimpleName(); } }); } } public static void main(String[] args) throws Exception { try { new DataToSequenceFile(args).run(); } catch (Throwable ex) { LOGGER.error("Failed to export", ex); } } private void run() throws Exception { LOGGER.info("begin export"); List<Exporter> exportersToRun = getExportersToRun(exporters); for (Exporter exporter : exportersToRun) { LOGGER.info("Preparing to run: %s", exporter.getClass().getSimpleName()); } hadoopConfiguration = new Configuration(true); destinationPath = new Path(destination); fs = getFileSystem(hadoopConfiguration); sqlRunner = createSqlRunner(); sqlRunner.connect(); try { for (Exporter exporter : exportersToRun) { LOGGER.info("Running: %s", exporter.getClass().getSimpleName()); exporter.run(this); } } finally { sqlRunner.close(); } LOGGER.info("export complete!"); } private List<Exporter> getExportersToRun(String exporters) { List<Exporter> results = new ArrayList<>(); for (String exporterString : exporters.split(",")) { Exporter e = null; for (Exporter exporter : EXPORTERS) { if (exporter.getObjectClass().getSimpleName().equalsIgnoreCase(exporterString)) { e = exporter; break; } } if (e == null) { throw new RuntimeException("invalid exporter: " + exporterString); } results.add(e); } return results; } private SqlRunner createSqlRunner() { return new SqlRunner( databaseConnectionString, databaseUsername, databasePassword, tableNamespace); } private FileSystem getFileSystem(Configuration hadoopConfiguration) throws IOException, URISyntaxException { FileSystem fs = FileSystem.get(new URI(destination), hadoopConfiguration); fs.mkdirs(new Path(destination)); return fs; } public Path getDestinationPath() { return destinationPath; } public SqlRunner getSqlRunner() { return sqlRunner; } @Override public FileSystem getFileSystem() { return fs; } public Configuration getHadoopConfiguration() { return hadoopConfiguration; } public void writeFile(String fileName, String data) { Path path = new Path(getDestinationPath(), fileName); try { try (FSDataOutputStream out = getFileSystem().create(path, true)) { out.write(data.getBytes()); } } catch (IOException e) { throw new RuntimeException("Could not write file: " + path, e); } } }