package io.lumify.palantir.mr;
import io.lumify.core.mapreduce.LumifyMRBase;
import io.lumify.core.util.LumifyLogger;
import io.lumify.core.util.LumifyLoggerFactory;
import io.lumify.palantir.model.*;
import io.lumify.palantir.mr.mappers.*;
import org.apache.accumulo.core.data.Mutation;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.securegraph.accumulo.mapreduce.AccumuloElementOutputFormat;
import java.util.HashMap;
import java.util.Map;
public class ImportMR extends LumifyMRBase {
private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(ImportMR.class);
public static final String CONF_BASE_IRI = "baseIri";
public static final String CONF_TYPE = "type";
public static final String CONF_IN_DIR = "inDir";
public static final Map<String, Class<? extends Mapper>> MAPPERS = new HashMap<>();
private Map.Entry<String, Class<? extends Mapper>> workingMapper;
private Path inFilePath;
static {
MAPPERS.put(PtUser.class.getSimpleName(), PtUserMapper.class);
MAPPERS.put(PtGraph.class.getSimpleName(), PtGraphMapper.class);
MAPPERS.put(PtObject.class.getSimpleName(), PtObjectMapper.class);
MAPPERS.put(PtGraphObject.class.getSimpleName(), PtGraphObjectMapper.class);
MAPPERS.put(PtObjectObject.class.getSimpleName(), PtObjectObjectMapper.class);
MAPPERS.put(PtPropertyAndValue.class.getSimpleName(), PtPropertyAndValueMapper.class);
MAPPERS.put(PtMediaAndValue.class.getSimpleName(), PtMediaAndValueMapper.class);
MAPPERS.put(PtNoteAndNoteValue.class.getSimpleName(), PtNoteAndNoteValueMapper.class);
}
@Override
protected void setupJob(Job job) throws Exception {
job.setMapperClass(workingMapper.getValue());
job.setJarByClass(ImportMR.class);
job.setNumReduceTasks(0);
job.setMapOutputValueClass(Mutation.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(AccumuloElementOutputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inFilePath);
}
@Override
protected String getJobName() {
return "palantirImport-" + workingMapper.getKey();
}
@Override
protected void parseArgs(JobConf conf, String[] args) {
if (args.length != 3) {
throw new RuntimeException("Required arguments <inputDir> <type> <baseIri>");
}
String inDir = args[0];
LOGGER.info("inDir: %s", inDir);
conf.set(CONF_IN_DIR, inDir);
String type = args[1];
LOGGER.info("type: %s", type);
conf.set(CONF_TYPE, type);
String baseIri = args[2];
LOGGER.info("baseIri: %s", baseIri);
conf.set(CONF_BASE_IRI, baseIri);
Path inPath = new Path(inDir);
workingMapper = null;
for (Map.Entry<String, Class<? extends Mapper>> mapper : MAPPERS.entrySet()) {
if (mapper.getKey().equalsIgnoreCase(type)) {
workingMapper = mapper;
inFilePath = new Path(inPath, mapper.getKey() + ".seq");
break;
}
}
if (workingMapper == null) {
throw new RuntimeException("Invalid import type");
}
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new ImportMR(), args);
System.exit(res);
}
}