package edu.isi.karma.er.helper;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.util.bloom.BloomFilter;
import org.apache.hadoop.util.hash.Hash;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.isi.karma.kr2rml.writer.KR2RMLBloomFilter;
import edu.isi.karma.modeling.Uris;
import edu.isi.karma.util.HTTPUtil;
import edu.isi.karma.webserver.KarmaException;
public class BloomFilterTripleStoreUtil extends TripleStoreUtil {
private static Logger logger = LoggerFactory
.getLogger(BloomFilterTripleStoreUtil.class);
public boolean processBloomFilters(String modelContext,
String modelRepoUrl,
Map<String, String> bloomfilterMapping,
JSONObject obj) {
boolean result = true;
try{
result &= updateTripleStore(obj, bloomfilterMapping, modelRepoUrl, modelContext);
Map<String, String> verification = new HashMap<>();
Set<String> triplemaps = new HashSet<>(Arrays.asList(obj.getString("ids").split(",")));
boolean verify = verify(verification, triplemaps,modelRepoUrl ,modelContext,obj);
if (!verify) {
result &= updateTripleStore(obj, verification, modelRepoUrl, modelContext);
}
}
catch(Exception e)
{
logger.error(e.getMessage());
result=false;
}
return result;
}
public boolean verify(Map<String, String> verification, Set<String> triplemaps, String modelRepoUrl, String modelContext, JSONObject obj) throws KarmaException, IOException
{
BloomFilterTripleStoreUtil bloomFilterUtilObj = new BloomFilterTripleStoreUtil();
verification.putAll(bloomFilterUtilObj.getBloomFiltersForMaps(modelRepoUrl, modelContext, triplemaps));
boolean verify = true;
for (Entry<String, String> entry : verification.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
KR2RMLBloomFilter bf2 = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
KR2RMLBloomFilter bf = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
bf2.populateFromCompressedAndBase64EncodedString(value);
bf.populateFromCompressedAndBase64EncodedString(obj.getString(key));
bf2.and(bf);
bf2.xor(bf);
try {
Field f1 = BloomFilter.class.getDeclaredField("bits");
f1.setAccessible(true);
BitSet bits = (BitSet) f1.get(bf2);
if (bits.cardinality() != 0) {
verify = false;
break;
}
} catch (Exception e) {
}
}
return verify;
}
public boolean updateTripleStore(JSONObject obj, Map<String, String> bloomfilterMapping, String modelRepoUrl, String modelContext) throws KarmaException, IOException {
Set<String> triplemaps = new HashSet<>(Arrays.asList(obj.getString("ids").split(",")));
bloomfilterMapping.putAll(getBloomFiltersForMaps(modelRepoUrl, modelContext, triplemaps));
Map<String, KR2RMLBloomFilter> bfs = new HashMap<>();
for (String tripleUri : triplemaps) {
String serializedBloomFilter = obj.getString(tripleUri);
KR2RMLBloomFilter bf = new KR2RMLBloomFilter();
bf.populateFromCompressedAndBase64EncodedString(serializedBloomFilter);
bfs.put(tripleUri, bf);
}
return updateTripleStoreWithBloomFilters(bfs, bloomfilterMapping, modelRepoUrl, modelContext);
}
public Map<String, String> getBloomFiltersForMaps(String tripleStoreURL, String context, Collection<String> maps) throws KarmaException
{
tripleStoreURL = normalizeTripleStoreURL(tripleStoreURL);
testTripleStoreConnection(tripleStoreURL);
Map<String, String> bloomfilters = new HashMap<>();
try {
StringBuilder query = new StringBuilder();
query.append("PREFIX km-dev:<http://isi.edu/integration/karma/dev#>\n");
query.append("PREFIX rr:<http://www.w3.org/ns/r2rml#>\n");
query.append("SELECT ?bf ?s \n");
injectContext(context, query);
query.append("WHERE \n{\n");
Iterator<String> iterator = maps.iterator();
while(iterator.hasNext()) {
query.append("{");
query.append("\n ?s <");
query.append(Uris.KM_HAS_BLOOMFILTER);
query.append("> ?bf . ");
query.append("\n<");
query.append(iterator.next());
query.append("> <");
query.append(Uris.KM_HAS_BLOOMFILTER);
if (iterator.hasNext())
query.append("> ?bf . \n} UNION \n");
else
query.append("> ?bf . \n} \n");
}
query.append("}\n");
String queryString = query.toString();
logger.debug("query: " + queryString);
Map<String, String> formparams = new HashMap<>();
formparams.put("query", queryString);
formparams.put("queryLn", "SPARQL");
String responseString = HTTPUtil.executeHTTPPostRequest(
tripleStoreURL, null, "application/sparql-results+json",
formparams);
if (responseString != null) {
JSONObject models = new JSONObject(responseString);
JSONArray values = models.getJSONObject("results")
.getJSONArray("bindings");
int count = 0;
while (count < values.length()) {
JSONObject o = values.getJSONObject(count++);
bloomfilters.put(o.getJSONObject("s").getString("value"), o.getJSONObject("bf").getString("value"));
}
}
} catch (Exception e) {
logger.error(e.getMessage());
}
return bloomfilters;
}
public void deleteBloomFiltersForMaps(String tripleStoreURL, String context, Collection<String> maps) throws KarmaException
{
testTripleStoreConnection(tripleStoreURL);
tripleStoreURL = normalizeTripleStoreURL(tripleStoreURL) + "/statements";
try {
StringBuilder query = new StringBuilder();
query.append("PREFIX km-dev:<http://isi.edu/integration/karma/dev#>\n");
query.append("PREFIX rr:<http://www.w3.org/ns/r2rml#>\n");
if (null != context && !context.trim().isEmpty())
{
query.append("WITH ");
formatURI(context, query);
query.append("\n");
}
query.append("DELETE {?s km-dev:hasBloomFilter ?bf} \n");
query.append("WHERE \n{\n");
Iterator<String> iterator = maps.iterator();
while(iterator.hasNext()) {
query.append("{");
query.append("\n ?s <");
query.append(Uris.KM_HAS_BLOOMFILTER);
query.append("> ?bf . ");
query.append("\n<");
query.append(iterator.next());
query.append("> <");
query.append(Uris.KM_HAS_BLOOMFILTER);
if (iterator.hasNext())
query.append("> ?bf . \n} UNION \n");
else
query.append("> ?bf . \n} \n");
}
query.append("}\n");
String queryString = query.toString();
logger.debug("query: " + queryString);
Map<String, String> formparams = new HashMap<>();
formparams.put("update", queryString);
String responseString = HTTPUtil.executeHTTPPostRequest(
tripleStoreURL, null, mime_types.get(RDF_Types.N3.name()),
formparams);
System.out.println(responseString);
} catch (Exception e) {
logger.error(e.getMessage());
}
}
public boolean updateTripleStoreWithBloomFilters(Map<String, KR2RMLBloomFilter> bfs, Map<String, String> bloomfilterMapping, String modelurl, String context) throws KarmaException, IOException {
Set<String> triplemaps = bfs.keySet();
for (Entry<String, KR2RMLBloomFilter> stringKR2RMLBloomFilterEntry : bfs.entrySet()) {
KR2RMLBloomFilter bf = stringKR2RMLBloomFilterEntry.getValue();
String oldserializedBloomFilter = bloomfilterMapping.get(stringKR2RMLBloomFilterEntry.getKey());
if (oldserializedBloomFilter != null) {
KR2RMLBloomFilter bf2 = new KR2RMLBloomFilter();
bf2.populateFromCompressedAndBase64EncodedString(oldserializedBloomFilter);
bf.or(bf2);
}
bfs.put(stringKR2RMLBloomFilterEntry.getKey(), bf);
}
deleteBloomFiltersForMaps(modelurl, null, triplemaps);
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
for (Entry<String, KR2RMLBloomFilter> entry : bfs.entrySet()) {
pw.print("<" + entry.getKey() + "> ");
pw.print("<" + Uris.KM_HAS_BLOOMFILTER + "> ");
pw.println("\"" + entry.getValue().compressAndBase64Encode() + "\" . ");
}
pw.close();
return saveToStoreFromString(sw.toString(), modelurl, context, new Boolean(false), null);
}
}