package edu.isi.karma.rdf.bloom; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.hadoop.util.bloom.Key; import org.apache.hadoop.util.hash.Hash; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import edu.isi.karma.er.helper.BloomFilterTripleStoreUtil; import edu.isi.karma.kr2rml.writer.KR2RMLBloomFilter; import edu.isi.karma.rdf.CommandLineArgumentParser; import edu.isi.karma.webserver.KarmaException; public class AddSameAsToBloomFilters { static String filepath; static String triplestoreURL; static String predicate; public static void main(String[] args) throws KarmaException, IOException, ParseException { Options options = createCommandLineOptions(); CommandLine cl = CommandLineArgumentParser.parse(args, options, AddSameAsToBloomFilters.class.getSimpleName()); if(cl == null) { return; } filepath = (String) cl.getOptionValue("filepath"); triplestoreURL = (String) cl.getOptionValue("triplestoreurl"); predicate = (String) cl.getOptionValue("predicate"); BloomFilterTripleStoreUtil utilObj = new BloomFilterTripleStoreUtil(); Set<String> predicates = new HashSet<>(); predicates.add(predicate); List<String> predicateObjectMaps = new ArrayList<>(); for (String t : utilObj.getPredicatesForParentTriplesMapsWithSameClass(triplestoreURL, null, predicates).get("refObjectMaps")) { predicateObjectMaps.addAll(Arrays.asList(t.split(","))); } for (String t : utilObj.getPredicatesForTriplesMapsWithSameClass(triplestoreURL, null, predicates).get("predicateObjectMaps")) { predicateObjectMaps.addAll(Arrays.asList(t.split(","))); } Map<String, String> serializedmapping = utilObj.getBloomFiltersForMaps(triplestoreURL, null, predicateObjectMaps); Map<String, KR2RMLBloomFilter> mapping = new HashMap<>(); for (Entry<String, String> entry : serializedmapping.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); KR2RMLBloomFilter bf = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); bf.populateFromCompressedAndBase64EncodedString(value); mapping.put(key, bf); } Model model = ModelFactory.createDefaultModel(); InputStream s = new FileInputStream(new File(filepath)); model.read(s, null, "TURTLE"); StmtIterator iterator = model.listStatements(); while(iterator.hasNext()) { iterator.next(); Statement st = iterator.next(); String subject = "<" + st.getSubject().toString() + ">"; String object = "<" + st.getObject().toString() + ">"; for (Entry<String, KR2RMLBloomFilter> entry : mapping.entrySet()) { KR2RMLBloomFilter bf = entry.getValue(); if (bf.membershipTest(new Key(subject.getBytes("UTF-8")))) bf.add(new Key(object.getBytes("UTF-8"))); if (bf.membershipTest(new Key(object.getBytes("UTF-8")))) bf.add(new Key(subject.getBytes("UTF-8"))); } } utilObj.updateTripleStoreWithBloomFilters(mapping, serializedmapping, triplestoreURL, null); } private static Options createCommandLineOptions() { Options options = new Options(); options.addOption( new Option("filepath", "filepath", true, "location of the input file directory")); options.addOption( new Option("triplestoreurl", "triplestoreurl", true, "location of the triple store")); options.addOption( new Option("predicate", "predicate",true, "the uri or the predicate")); options.addOption( new Option("help", "help", true, "print this message")); return options; } }