package io.lumify.migrations; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import io.lumify.core.bootstrap.InjectHelper; import io.lumify.core.bootstrap.LumifyBootstrap; import io.lumify.core.config.Configuration; import io.lumify.core.config.ConfigurationLoader; import io.lumify.core.exception.LumifyException; import io.lumify.core.model.ontology.OntologyRepository; import io.lumify.core.model.termMention.TermMentionRepository; import io.lumify.core.model.user.UserRepository; import io.lumify.core.model.workspace.WorkspaceRepository; import io.lumify.core.security.LumifyVisibility; import io.lumify.core.util.GraphUtil; import io.lumify.core.util.LumifyLogger; import io.lumify.core.util.LumifyLoggerFactory; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.securegraph.Graph; import org.securegraph.accumulo.AccumuloGraph; import org.securegraph.accumulo.AccumuloGraphConfiguration; import org.securegraph.accumulo.mapreduce.AccumuloEdgeInputFormat; import org.securegraph.accumulo.mapreduce.AccumuloElementOutputFormat; import org.securegraph.accumulo.mapreduce.AccumuloVertexInputFormat; import org.securegraph.accumulo.mapreduce.ElementMapper; import java.io.IOException; import java.util.HashMap; public abstract class MigrationBase extends Configured implements Tool { private static LumifyLogger LOGGER; private static final String CONFIG_DRY_RUN = "migration.dryrun"; private final AccumuloGraph graph; private final Configuration configuration; @Parameter(names = {"-force"}, description = "Force the migration.") private boolean forceMigration = false; @Parameter(names = {"-dryrun", "-dry-run"}, description = "Print the modifications, don't perform them.") private boolean dryRun = false; protected MigrationBase( Graph graph, Configuration configuration ) { this.graph = (AccumuloGraph) graph; this.configuration = configuration; } protected static int run(Class<? extends MigrationBase> migrationClass, String[] args) throws Exception { Configuration config = ConfigurationLoader.load(new HashMap()); LOGGER = LumifyLoggerFactory.getLogger(migrationClass); // must initialize logger after configuration loaded MigrationBase migrationBase = InjectHelper.getInstance(migrationClass, LumifyBootstrap.bootstrapModuleMaker(config), config); return ToolRunner.run(new org.apache.hadoop.conf.Configuration(), migrationBase, args); } @Override public int run(String[] args) throws Exception { new JCommander(this, args); try { verifyVersion(forceMigration); } catch (LumifyException ex) { throw new LumifyException("Could not verify version. Run with -force option to force the migraion.", ex); } AccumuloGraphConfiguration accumuloGraphConfiguration = new AccumuloGraphConfiguration(getConfiguration().toHadoopConfiguration(), "graph."); Job migrateVerticesJob = migrateVertices(accumuloGraphConfiguration); Job migrateEdgesJob = migrateEdges(accumuloGraphConfiguration); migrateVerticesJob.waitForCompletion(true); migrateEdgesJob.waitForCompletion(true); LOGGER.info("Vertex Counters"); printCounters(migrateVerticesJob); LOGGER.info("Edge Counters"); printCounters(migrateEdgesJob); if (!migrateVerticesJob.isSuccessful()) { throw new LumifyException("Failed to migrate vertices"); } if (!migrateEdgesJob.isSuccessful()) { throw new LumifyException("Failed to migrate edges"); } writeNewVersion(); return 0; } protected Job migrateVertices(AccumuloGraphConfiguration accumuloGraphConfiguration) throws Exception { return migrate(accumuloGraphConfiguration, MigrationType.VERTICES, getVertexMigrationMapperClass()); } protected abstract Class<? extends ElementMigrationMapperBase> getVertexMigrationMapperClass(); protected Job migrateEdges(AccumuloGraphConfiguration accumuloGraphConfiguration) throws Exception { return migrate(accumuloGraphConfiguration, MigrationType.EDGES, getEdgeMigrationMapperClass()); } protected Class<? extends ElementMigrationMapperBase> getEdgeMigrationMapperClass() { return getVertexMigrationMapperClass(); } protected Job migrate(AccumuloGraphConfiguration accumuloGraphConfiguration, MigrationType migrationType, Class<? extends ElementMigrationMapperBase> mapperClass) throws IOException, AccumuloSecurityException, InterruptedException, ClassNotFoundException { String jobNameSuffix; Class<? extends InputFormat> inputFormatClass; switch (migrationType) { case VERTICES: jobNameSuffix = "migrateVertices"; inputFormatClass = AccumuloVertexInputFormat.class; break; case EDGES: jobNameSuffix = "migrateEdges"; inputFormatClass = AccumuloEdgeInputFormat.class; break; default: throw new LumifyException("Invalid migration type: " + migrationType); } JobConf jobConf = getJobConf(getConfiguration()); String jobName = getClass().getSimpleName() + "-" + jobNameSuffix; Job job = Job.getInstance(jobConf, jobName); job.setJarByClass(getClass()); String instanceName = accumuloGraphConfiguration.getAccumuloInstanceName(); String zooKeepers = accumuloGraphConfiguration.getZookeeperServers(); String principal = accumuloGraphConfiguration.getAccumuloUsername(); AuthenticationToken authorizationToken = accumuloGraphConfiguration.getAuthenticationToken(); AccumuloElementOutputFormat.setOutputInfo(job, instanceName, zooKeepers, principal, authorizationToken); job.setOutputFormatClass(AccumuloElementOutputFormat.class); if (inputFormatClass == AccumuloVertexInputFormat.class) { AccumuloVertexInputFormat.setInputInfo(job, graph, instanceName, zooKeepers, principal, authorizationToken, getAuthorizations()); } else if (inputFormatClass == AccumuloEdgeInputFormat.class) { AccumuloEdgeInputFormat.setInputInfo(job, graph, instanceName, zooKeepers, principal, authorizationToken, getAuthorizations()); } job.setInputFormatClass(inputFormatClass); job.setMapperClass(mapperClass); job.setNumReduceTasks(0); job.submit(); return job; } protected String[] getAuthorizations() { return new String[]{ LumifyVisibility.SUPER_USER_VISIBILITY_STRING, OntologyRepository.VISIBILITY_STRING, UserRepository.VISIBILITY_STRING, WorkspaceRepository.VISIBILITY_STRING, TermMentionRepository.VISIBILITY_STRING }; } protected JobConf getJobConf(Configuration lumifyConfig) { org.apache.hadoop.conf.Configuration hadoopConfig = lumifyConfig.toHadoopConfiguration(getConf()); hadoopConfig.set(ElementMapper.GRAPH_CONFIG_PREFIX, "graph."); hadoopConfig.set(CONFIG_DRY_RUN, Boolean.toString(dryRun)); JobConf result = new JobConf(hadoopConfig, this.getClass()); setConf(result); LOGGER.info("Using config:\n" + result); return result; } protected void writeNewVersion() { if (dryRun) { LOGGER.debug("dry-run: writing version: %d", getToVersion()); } else { GraphUtil.writeVersion(getGraph(), getToVersion()); } } protected void verifyVersion(boolean forceMigration) { if (forceMigration) { try { GraphUtil.verifyVersion(getGraph(), getFromVersion()); } catch (LumifyException ex) { LOGGER.warn("Version verification failed: %s (continuing because of force)", ex.getMessage()); } } else { GraphUtil.verifyVersion(getGraph(), getFromVersion()); } } protected abstract int getFromVersion(); protected int getToVersion() { return getFromVersion() + 1; } public Graph getGraph() { return graph; } public Configuration getConfiguration() { return configuration; } protected void printCounters(Job job) { try { for (String groupName : job.getCounters().getGroupNames()) { CounterGroup groupCounters = job.getCounters().getGroup(groupName); LOGGER.info(" " + groupCounters.getDisplayName()); for (Counter counter : groupCounters) { LOGGER.info(" " + counter.getDisplayName() + ": " + counter.getValue()); } } } catch (IOException ex) { LOGGER.error("Could not print counters", ex); } } public static boolean isDryRun(Mapper.Context context) { String dryRunStr = context.getConfiguration().get(CONFIG_DRY_RUN); if (dryRunStr == null) { return false; } return Boolean.parseBoolean(dryRunStr); } protected static enum MigrationType { VERTICES, EDGES } }