package io.lumify.friendster;
import com.google.inject.Inject;
import io.lumify.core.bootstrap.InjectHelper;
import io.lumify.core.bootstrap.LumifyBootstrap;
import io.lumify.core.config.ConfigurationLoader;
import io.lumify.core.model.ontology.Concept;
import io.lumify.core.model.ontology.OntologyRepository;
import io.lumify.core.util.LumifyLogger;
import io.lumify.core.util.LumifyLoggerFactory;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.mapreduce.lib.partition.RangePartitioner;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.util.TextUtil;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.securegraph.Graph;
import org.securegraph.Vertex;
import org.securegraph.accumulo.AccumuloGraph;
import org.securegraph.accumulo.AccumuloGraphConfiguration;
import org.securegraph.accumulo.mapreduce.AccumuloElementOutputFormat;
import org.securegraph.accumulo.mapreduce.ElementMapper;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
public class ImportMR extends Configured implements Tool {
private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(ImportMR.class);
public static final char KEY_SPLIT = '\u001f';
public static final String MULTI_VALUE_KEY = ImportMR.class.getName();
public static final String FRIENDSTER_SOURCE = "Friendster";
private OntologyRepository ontologyRepository;
private Graph graph;
@Override
public int run(String[] args) throws Exception {
io.lumify.core.config.Configuration lumifyConfig = ConfigurationLoader.load();
Configuration conf = getConfiguration(args, lumifyConfig);
AccumuloGraphConfiguration accumuloGraphConfiguration = new AccumuloGraphConfiguration(conf, "graph.");
InjectHelper.inject(this, LumifyBootstrap.bootstrapModuleMaker(lumifyConfig), lumifyConfig);
verifyFriendsterUserConcept(ontologyRepository);
verifyFriendsterUserToUserRelationship(ontologyRepository);
Job job = Job.getInstance(conf, "friendsterImport");
String instanceName = accumuloGraphConfiguration.getAccumuloInstanceName();
String zooKeepers = accumuloGraphConfiguration.getZookeeperServers();
String principal = accumuloGraphConfiguration.getAccumuloUsername();
AuthenticationToken authorizationToken = accumuloGraphConfiguration.getAuthenticationToken();
AccumuloElementOutputFormat.setOutputInfo(job, instanceName, zooKeepers, principal, authorizationToken);
List<Text> splits = getSplits((AccumuloGraph) graph);
Path splitFile = writeSplitsFile(conf, splits);
if (job.getConfiguration().get("mapred.job.tracker").equals("local")) {
LOGGER.warn("!!!!!! Running in local mode !!!!!!");
} else {
job.setPartitionerClass(RangePartitioner.class);
RangePartitioner.setSplitFile(job, splitFile.toString());
job.setNumReduceTasks(splits.size() + 1);
}
job.setJarByClass(ImportMR.class);
job.setMapperClass(ImportMRMapper.class);
job.setMapOutputValueClass(Mutation.class);
job.setReducerClass(ImportMRReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(AccumuloElementOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(conf.get("in")));
int returnCode = job.waitForCompletion(true) ? 0 : 1;
CounterGroup groupCounters = job.getCounters().getGroup(FriendsterImportCounters.class.getName());
for (Counter counter : groupCounters) {
System.out.println(counter.getDisplayName() + ": " + counter.getValue());
}
return returnCode;
}
private void verifyFriendsterUserToUserRelationship(OntologyRepository ontologyRepository) {
if (!ontologyRepository.hasRelationshipByIRI(FriendsterOntology.EDGE_LABEL_FRIEND)) {
throw new RuntimeException(FriendsterOntology.EDGE_LABEL_FRIEND + " relationship not found");
}
}
private void verifyFriendsterUserConcept(OntologyRepository ontologyRepository) {
Concept concept = ontologyRepository.getConceptByIRI(FriendsterOntology.CONCEPT_TYPE_USER);
if (concept == null) {
throw new RuntimeException(FriendsterOntology.CONCEPT_TYPE_USER + " concept not found");
}
}
private Path writeSplitsFile(Configuration conf, List<Text> splits) throws IOException {
Path splitFile = new Path("/tmp/friendsterImport_splits.txt");
FileSystem fs = FileSystem.get(conf);
PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(splitFile)));
for (Text split : splits) {
out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split))));
}
out.close();
return splitFile;
}
private List<Text> getSplits(AccumuloGraph graph) throws TableNotFoundException, AccumuloSecurityException, AccumuloException {
List<Text> splits = new ArrayList<Text>();
splits.addAll(getSplits(graph, graph.getVerticesTableName()));
splits.addAll(getSplits(graph, graph.getEdgesTableName()));
splits.addAll(getSplits(graph, graph.getDataTableName()));
Collections.sort(splits);
return splits;
}
private Collection<Text> getSplits(AccumuloGraph graph, String tableName) throws TableNotFoundException, AccumuloSecurityException, AccumuloException {
List<Text> tableNamePrefixedSplits = new ArrayList<Text>();
Collection<Text> splits = graph.getConnector().tableOperations().listSplits(tableName, 100);
if (splits.size() == 0) {
return tableNamePrefixedSplits;
}
for (Text split : splits) {
Text splitName = getKey(tableName, TextUtil.getBytes(split));
tableNamePrefixedSplits.add(splitName);
}
return tableNamePrefixedSplits;
}
static Text getKey(String tableName, byte[] key) {
return new Text(tableName + KEY_SPLIT + new String(Base64.encodeBase64(key)));
}
private Configuration getConfiguration(String[] args, io.lumify.core.config.Configuration lumifyConfig) {
if (args.length != 1) {
throw new RuntimeException("Required arguments <inputFileName>");
}
String inFileName = args[0];
LOGGER.info("Using config:\n" + lumifyConfig);
Configuration hadoopConfig = lumifyConfig.toHadoopConfiguration();
hadoopConfig.set(ElementMapper.GRAPH_CONFIG_PREFIX, "graph.");
LOGGER.info("inFileName: %s", inFileName);
hadoopConfig.set("in", inFileName);
hadoopConfig.set(ImportMRMapper.CONFIG_SOURCE_FILE_NAME, new File(inFileName).getName());
this.setConf(hadoopConfig);
return hadoopConfig;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new ImportMR(), args);
System.exit(res);
}
@Inject
public void setOntologyRepository(OntologyRepository ontologyRepository) {
this.ontologyRepository = ontologyRepository;
}
@Inject
public void setGraph(Graph graph) {
this.graph = graph;
}
public static String getUserVertexId(long userId) {
return "FRIENDSTER_USER_" + userId;
}
public static String getFriendEdgeId(Vertex userVertex, Vertex friendVertex) {
return "FRIENDSTER_FRIEND_" + userVertex.getId() + "_" + friendVertex.getId();
}
}