package io.lumify.palantir.service; import io.lumify.core.util.LumifyLogger; import io.lumify.core.util.LumifyLoggerFactory; import io.lumify.palantir.DataToSequenceFile; import io.lumify.palantir.model.PtModelBase; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import java.io.IOException; public abstract class ExporterBase<T extends PtModelBase> implements Exporter { private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(ExporterBase.class); private final Class<T> ptClass; protected ExporterBase(Class<T> ptClass) { this.ptClass = ptClass; } @Override public Class getObjectClass() { return this.ptClass; } public void run(Exporter.ExporterSource exporterSource) throws IOException { try (SequenceFile.Writer writer = SequenceFile.createWriter( exporterSource.getHadoopConfiguration(), SequenceFile.Writer.file(new Path(exporterSource.getDestinationPath(), getFileName())), SequenceFile.Writer.keyClass(getKeyClass()), SequenceFile.Writer.valueClass(getPtClass()) )) { long startTime = System.currentTimeMillis(); LOGGER.info(this.getClass().getName()); LOGGER.debug("running sql: %s", getSql()); Iterable<T> rows = exporterSource.getSqlRunner().select(getSql(), ptClass); long count = run(exporterSource, rows, writer); long endTime = System.currentTimeMillis(); LOGGER.info("Wrote %d %s (time: %dms)", count, this.ptClass.getSimpleName(), endTime - startTime); } } protected void writeOntologyXmlFile(Exporter.ExporterSource exporterSource, String uri, String data) { String fileName = DataToSequenceFile.ONTOLOGY_XML_DIR_NAME + "/" + uri.replace('.', '/') + ".xml"; exporterSource.writeFile(fileName, data); } protected void writeFile(Exporter.ExporterSource exporterSource, String fileName, String data) { exporterSource.writeFile(fileName, data); } protected Class<?> getKeyClass() { return LongWritable.class; } public String getFileName() { return getPtClass().getSimpleName() + ".seq"; } protected abstract String getSql(); protected long run(Exporter.ExporterSource exporterSource, Iterable<T> rows, SequenceFile.Writer outputFile) { int count = 0; beforeProcessRows(exporterSource); for (T row : rows) { if (count % 1000 == 0) { LOGGER.debug("Exporting %s: %d", getPtClass().getSimpleName(), count); } try { processRow(exporterSource, row, outputFile); } catch (Throwable ex) { handleProcessRowError(row, ex); } count++; } afterProcessRows(exporterSource); return count; } protected void beforeProcessRows(Exporter.ExporterSource exporterSource) { } protected void afterProcessRows(Exporter.ExporterSource exporterSource) { } protected void processRow(Exporter.ExporterSource exporterSource, T row, SequenceFile.Writer outputFile) throws IOException { outputFile.append(row.getKey(), row); } protected void handleProcessRowError(T row, Throwable ex) { LOGGER.error("Could not process row: %s (type: %s)", row, getPtClass().getSimpleName(), ex); } public Class<T> getPtClass() { return ptClass; } }