package edu.isi.karma.mapreduce.tripleparser; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; //import org.slf4j.Logger; //import org.slf4j.LoggerFactory; public class Neo4jCSVGenerator { // private final static Logger logger = LoggerFactory.getLogger(Neo4jCSVGenerator.class); // private static void parseTriples(Configuration conf, Path input, Path output) throws IOException { // // FileSystem fs = FileSystem.get(conf); // // try { // // Job job = Job.getInstance(conf, "parse triples"); // // job.setJarByClass(Neo4jCSVGenerator.class); // job.setMapperClass(TripleMapper.class); // job.setReducerClass(TripleReducer.class); // job.setNumReduceTasks(6); // // job.setMapOutputKeyClass(Text.class); // job.setMapOutputValueClass(NullWritable.class); // // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(Text.class); // // if (fs.exists(output)) { // fs.delete(output, true); // } // job.setInputFormatClass(KeyValueTextInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); // FileInputFormat.addInputPath(job, input); // FileOutputFormat.setOutputPath(job, output); // job.waitForCompletion(true); // } // catch (IOException e) { // logger.error("i/o exception loading data sources", e); // } // catch (InterruptedException e) { // logger.debug("Hadoop job interrupted", e); // } // catch (ClassNotFoundException e) { // logger.debug("Cannot find mapper/reducer class", e); // } // // } // // private static void generateRelationships(Configuration conf, Path input, Path output) throws IOException { // // FileSystem fs = FileSystem.get(conf); // // try { // // Job job = Job.getInstance(conf, "export triples to Neo4j relationships CSV file"); // // job.setJarByClass(Neo4jCSVGenerator.class); // job.setMapperClass(Mapper.class); // job.setReducerClass(RelationshipReducer.class); // job.setNumReduceTasks(1); // // job.setMapOutputKeyClass(Text.class); // job.setMapOutputValueClass(Text.class); // // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(NullWritable.class); // // if (fs.exists(output)) { // fs.delete(output, true); // } // job.setInputFormatClass(SequenceFileInputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); // FileInputFormat.addInputPath(job, input); // FileOutputFormat.setOutputPath(job, output); // job.waitForCompletion(true); // } // catch (IOException e) { // logger.error("i/o exception loading data sources", e); // } // catch (InterruptedException e) { // logger.debug("Hadoop job interrupted", e); // } // catch (ClassNotFoundException e) { // logger.debug("Cannot find mapper/reducer class", e); // } // // } // // private static void generateNodes(Configuration conf, Path input, Path output) throws IOException { // // FileSystem fs = FileSystem.get(conf); // // try { // // Job job = Job.getInstance(conf, "export triples to Neo4j nodes CSV file"); // // job.setJarByClass(Neo4jCSVGenerator.class); // job.setMapperClass(NodeMapper.class); // job.setReducerClass(NodeReducer.class); // job.setNumReduceTasks(1); // // job.setMapOutputKeyClass(Text.class); // job.setMapOutputValueClass(NullWritable.class); // // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(NullWritable.class); // // if (fs.exists(output)) { // fs.delete(output, true); // } // job.setInputFormatClass(SequenceFileInputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); // FileInputFormat.addInputPath(job, input); // FileOutputFormat.setOutputPath(job, output); // job.waitForCompletion(true); // } // catch (IOException e) { // logger.error("i/o exception loading data sources", e); // } // catch (InterruptedException e) { // logger.debug("Hadoop job interrupted", e); // } // catch (ClassNotFoundException e) { // logger.debug("Cannot find mapper/reducer class", e); // } // // } private static void printUsage() { List<String> parameters = new ArrayList<>(); List<String> descriptions = new ArrayList<>(); parameters.add("-base"); descriptions.add("path of the base folder, default is \".\""); parameters.add("-input"); descriptions.add("path of input, default value is base/input"); parameters.add("-output1"); descriptions.add("path of nodes output, default value is base/nodes"); parameters.add("-output2"); descriptions.add("path of relationships output, default value is base/relationships"); parameters.add("-parse"); descriptions.add("the flag can be set to false to ignore parsing step, default value is true"); parameters.add("-nodes"); descriptions.add("the flag can be set to false to ignore exporting nodes, default value is true"); parameters.add("-relationships"); descriptions.add("the flag can be set to false to ignore exporting relationships, default value is true"); parameters.add("-help"); descriptions.add("prints the usage of this class"); for (int i = 0; i < parameters.size(); i++) { String p = parameters.get(i); String d = descriptions.get(i); System.out.println("\t" + p + "\n\t\t" + d + "\n"); } } public static void main(String[] args) { // String text = "Bad A cid Jesus, (Adam Starr, 12\\"; // text = text.replace("\t", ""); // text = text.replace(",", ""); // text = text.replaceAll("\\\\", ""); // System.out.println(text); Configuration conf = new Configuration(); for (int i = 0; i < args.length; i++) { if (args[i].toLowerCase().trim().equalsIgnoreCase("-help")) { printUsage(); return; } } HashMap<String, String> arguments = new HashMap<>(); for (int i = 0; i < args.length; i+=2) { if ( i + 1 < args.length) { arguments.put(args[i].trim().toLowerCase(), args[i+1].trim()); } } String arg_base = "-base"; String arg_input = "-input"; String arg_output_nodes = "-output1"; String arg_output_relationships = "-output2"; String arg_parse = "-parse"; String arg_nodes = "-nodes"; String arg_relationships = "-relationships"; Path basePath = new Path("."); if (arguments.containsKey(arg_base)) basePath = new Path(arguments.get(arg_base)); Path input = new Path(basePath, "input/"); if (arguments.containsKey(arg_input)) input = new Path(arguments.get(arg_input)); Path output_nodes = new Path(basePath, "nodes/"); if (arguments.containsKey(arg_output_nodes)) output_nodes = new Path(arguments.get(arg_output_nodes)); Path output_relationships = new Path(basePath, "relationships/"); if (arguments.containsKey(arg_output_relationships)) output_relationships = new Path(arguments.get(arg_output_relationships)); boolean parse = true; if (arguments.containsKey(arg_parse)) { try { parse = Boolean.valueOf(arguments.get(arg_parse)); } catch (Exception e) { System.out.println("Illegal value for argument " + arg_parse); } } boolean exportNodes = true; if (arguments.containsKey(arg_nodes)) { try { exportNodes = Boolean.valueOf(arguments.get(arg_nodes)); } catch (Exception e) { System.out.println("Illegal value for argument " + arg_nodes); } } boolean exportRelationships = true; if (arguments.containsKey(arg_relationships)) { try { exportRelationships = Boolean.valueOf(arguments.get(arg_relationships)); } catch (Exception e) { System.out.println("Illegal value for argument " + arg_relationships); } } try { Path inputSeq = new Path(basePath, "seq/"); if (parse) { TripleProcessor.parseTriples(conf, input, inputSeq); } if (exportNodes) { Thread t1 = new Thread(new NodeProcessor(conf, inputSeq, output_nodes)); t1.start(); } if (exportRelationships) { Thread t2 = new Thread(new RelationshipProcessor(conf, inputSeq, output_relationships)); t2.start(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }