/* * Copyright 2003-2010 Tufts University Licensed under the * Educational Community License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.osedu.org/licenses/ECL-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS IS" * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. */ package edu.tufts.vue.rdf; import java.util.*; import java.io.*; import java.net.*; import tufts.vue.DEBUG; import tufts.vue.LWComponent; import tufts.vue.LWPathway; import tufts.vue.LWMap; import tufts.vue.LWSlide; import tufts.vue.VueResources; import tufts.Util; import tufts.vue.VueUtil; import edu.tufts.vue.metadata.*; import edu.tufts.vue.ontology.*; import edu.tufts.vue.metadata.*; import com.hp.hpl.jena.rdf.model.impl.*; import com.hp.hpl.jena.sparql.core.*; import com.hp.hpl.jena.graph.*; /** "ARQ - A query engine for Jena, implementing SPARQL" */ import com.hp.hpl.jena.query.*; /** * * RDFIndex.java * * RDFIndex mainly makes use of Apache Jena for VUE map searching. This RDF approach is overkill for * VUE's current search needs, but originally began as an effort to create larger, peristent, multi-map * indicies. That effort was not completed. The resulting implementation that uses this * class creates a new RDFIndex for every new search (over one map or multiple maps), and then runs * the search on that fresh index. * * We do get one very nice feature for free -- the ability to write out a full .rdf file containing * a set of map data, although the current impl below tosses out the the key names from OSID meta-data * -- just the values are written out, all called "property". * * Makes use of Apache Jena / Jena ARQ (SPARQL query engine) * -- todo: update that lib to latest version... * * If, long-term, we really want to keep using the RDF indexing / searching, we could * go ahead and just build the VUE meta-data system itself out of RDF properties. * * -- Scott Fraize 2012-July * * @author akumar03 * @author Daniel J. Heller * */ // todo: could create domain for every repository based on class name impl. // (As the fields will in fact vary based on the impl) -- e.g: // for edu.tufts.osidimpl.repository.nytimes.Repository: // http://edu/tufts/osidimpl/repository/nytimes#Creator, etc. public class RDFIndex extends com.hp.hpl.jena.rdf.model.impl.ModelCom { private static final org.apache.log4j.Logger Log = org.apache.log4j.Logger.getLogger(RDFIndex.class); public static final int MAX_SIZE = VueResources.getInt("rdf.index.size"); //public static final boolean AUTO_INDEX= VueResources.getBool("rdf.index.auto"); // never implememnted public static final String INDEX_FILE = VueUtil.getDefaultUserFolder()+File.separator+VueResources.getString("rdf.index.file"); public static final String ONT_SEPARATOR = "#"; public static final char ONT_SEPARATOR_CHAR = '#'; /** name-space used for all VUE node fields (e.g., label,notes) as well as all "proper" meta-data (not tufts.vue.Resource meta-data) */ public static final String VUE_ONTOLOGY = Constants.ONTOLOGY_URL + ONT_SEPARATOR; /** Include actual keyword names from tufts.vue.Resource properties as opposed to just putting all values under keyword contentPropertyOf. * These will be put in the a VUE_OSID namespace -- either determined from the osid, or put in a general unknown-osid namespace. * Todo: not exactly the right thing to do for pure web/http URLResources or local file Resources. */ private static final boolean INDEX_VUE_RESOURCE_KEYWORDS = DEBUG.TEST; // The naming convetion here is that a "namespace" has the # at the end, but a "prefix" does not. // E.g., a namespace is ready to just have a label/keyword appended, and a prefix might have further // URL path depth added to it first. E.g.: http://vue.tufts.edu/vue.rdfs private static final String VUE_GENERAL_NAMESPACE = VueResources.getString("metadata.vue.url") + ONT_SEPARATOR; private static final String VUE_OSID_PREFIX = "http://vue.tufts.edu/osid/"; // is adding to the path this way a reasonable RDF convention? private static final String VUE_OSID_UNKNOWN_NAMESPACE = VUE_OSID_PREFIX + "#"; /** i.e.: http://vue.tufts.edu/vue.rdfs#none */ public static final String VueTermOntologyNone = VUE_GENERAL_NAMESPACE + "none"; static { // Log.info(Util.tags(VueResources.getString("metadata.dublincore.url")) + " <- metadata.dublincore.url"); // Yes, all url's are being munged. // Log.info(Util.tags(Constants.ONTOLOGY_URL) + " <- Constants.ONTOLOGY_URL"); Log.info(Util.tags(VUE_ONTOLOGY) + " <- VUE_ONTOLOGY"); Log.info(Util.tags(VUE_GENERAL_NAMESPACE) + " <- VUE_GENERAL_NAMESPACE"); Log.info(Util.tags(VueTermOntologyNone) + " <- ONTOLOGY_NONE " + Util.tag(VueTermOntologyNone)); } /** If true, index slides and slide content. Currently, all slide content is filtered out on the result side anyway. */ private static final boolean INDEX_SLIDES = false; final com.hp.hpl.jena.rdf.model.Property _propertyNone = createProperty(VUE_GENERAL_NAMESPACE, "none"); final com.hp.hpl.jena.rdf.model.Property idOf = createProperty(VUE_ONTOLOGY, Constants.ID); final com.hp.hpl.jena.rdf.model.Property labelOf = createProperty(VUE_ONTOLOGY, Constants.LABEL); final com.hp.hpl.jena.rdf.model.Property childOf = createProperty(VUE_ONTOLOGY, Constants.CHILD); final com.hp.hpl.jena.rdf.model.Property authorOf = createProperty(VUE_ONTOLOGY, Constants.AUTHOR); final com.hp.hpl.jena.rdf.model.Property colorOf = createProperty(VUE_ONTOLOGY, Constants.COLOR); final com.hp.hpl.jena.rdf.model.Property notesOf = createProperty(VUE_ONTOLOGY, Constants.NOTES); final com.hp.hpl.jena.rdf.model.Property contentPropertyOf = createProperty(VUE_ONTOLOGY,Constants.CONTENT_INFO_PROPERTY); // final com.hp.hpl.jena.rdf.model.Property hasTag = createProperty(VUE_ONTOLOGY,Constants.TAG); // Unused: shows up as Text /** will contain the URI's of everything indexed in the map, unless this is a global index, * in which case it will contain the merged URI's from all maps */ private final Map<URI,LWComponent> vueComponentMap = new HashMap(); /** a general default index -- available but unused feature up through summer 2012 */ private static RDFIndex defaultIndex; public RDFIndex(com.hp.hpl.jena.graph.Graph base) { super(base); Log.info("instanced from " + tufts.Util.tags(base)); // are we using this case? } public RDFIndex() { //super(com.hp.hpl.jena.graph.Factory.createDefaultGraph()); // createGraphMem for performance? super(com.hp.hpl.jena.graph.Factory.createGraphMem()); } /** transpose URI results to LWComponents using our internal mapping to vue components * Note that this will NOT remove duplicates from the input results: that should already have been done */ public Collection<LWComponent> decodeVueResults(Collection<URI> results) { final Collection<LWComponent> hits = new ArrayList<LWComponent>(results.size()+2); for (URI uri : results) { final LWComponent c = vueComponentMap.get(uri); if (c != null) hits.add(c); else Log.error("*** Internal error: couldn't find URI in results-index: " + uri + "; " + Util.tags(vueComponentMap)); } return hits; } /** for creating multi-map / global indicies */ public void addMapIndex(RDFIndex mapIndex) { vueComponentMap.putAll(mapIndex.vueComponentMap); super.add(mapIndex); } public void indexMap(LWMap map) { //this.index.remove(this.index); // I presume this is a means of just clearing the entire index? vueComponentMap.clear(); removeAll(); indexAdd(map); } public void indexAdd(LWMap map) { indexAdd(map, false, false); } // Todo: could change to take a LWComponent as the focal, tho creating and index for the tiny amount // of content on a slide is a bit overkill -- only do if easier than engaging our other DataTree // based search mechanism. public void indexAdd(final LWMap map, final boolean metadataOnly, final boolean searchEverything_IS_IGNORED) { if (DEBUG.Enabled && searchEverything_IS_IGNORED) Log.debug("Note: \"search-everything\" bit is now ignored."); // If we want slide content, change default index to always index everything -- will hardly make // a difference just adding slides, and they can be (are currently always) optionally filtered out later anyway. if (DEBUG.RDF) Log.debug("indexAdd: begin; freeMem=: "+Runtime.getRuntime().freeMemory()); final com.hp.hpl.jena.rdf.model.Resource mapRoot = this.createResource(map.getURI().toString()); if (DEBUG.RDF) Log.debug("index: create resource for map; freeMem=: "+Runtime.getRuntime().freeMemory()); try { addProperty(mapRoot, idOf, map.getID()); addProperty(mapRoot, authorOf, System.getProperty("user.name")); if (map.hasLabel()) addProperty(mapRoot, labelOf,map.getLabel()); if (DEBUG.RDF) Log.debug("index: added properties for map; freeMem="+Runtime.getRuntime().freeMemory()); final Collection<LWComponent> searchSet = map.getAllDescendents(); // final Collection<LWComponent> searchSet; // // We always filter out slide content anyway, so no point in allowing it un index // // Note that we really ought to search from the current viewer focal tho, so, // // if user was looking at a single slide with lots of content, the search // // bar would still do something meaninful. // if (searchEverything) { // // E.g., this will search everything incuding Slides, and even the MasterSlide (which is a bug) // // THIS IS A PROBLEM IN THAT A PARAMETERIZED INDEX IS NO LONGER CACHEABLE! // searchSet = map.getAllDescendents(LWComponent.ChildKind.ANY); // } else { // searchSet = map.getAllDescendents(); // } for (LWComponent c : searchSet) { if (c instanceof LWPathway || c instanceof LWMap.Layer) continue; if (!INDEX_SLIDES && c instanceof LWSlide) continue; try { load_VUE_component_to_RDF_index(c, mapRoot, !metadataOnly); } catch (Throwable t) { Log.warn("indexing VUE component " + c, t); } if (size() > MAX_SIZE) { Log.warn("Maximum fail-safe search capacity reached: not all nodes will be searchable. (See property rdf.index.size)"); break; } } if (DEBUG.RDF) Log.debug("index: after indexing all components; freeMem="+Runtime.getRuntime().freeMemory()); } catch(Exception ex) { Log.error("index", ex); } if(DEBUG.RDF) Log.debug("index: done -- size="+this.size()); } public Collection<URI> searchWithSPARQL(final String queryString) { //if (DEBUG.SEARCH) Log.debug("SEARCH; substring=" + Util.tags(substring) + " queryString:\n" + Util.tags(queryString)); if (DEBUG.SEARCH) Log.debug("searchWithSPARQL; queryString:\n" + Util.tags(queryString)); final Collection<URI> resultSet = new ArrayList<URI>(); final com.hp.hpl.jena.query.Query query = QueryFactory.create(queryString); if (DEBUG.SEARCH) Log.debug("QF created " + Util.tag(query) + "; memory=" + Runtime.getRuntime().freeMemory() + "\n" + query.toString().trim().replaceAll("\n\n", "\n")); final QueryExecution qe = QueryExecutionFactory.create(query, this); // 2nd arg is for Model or for FileManager? if (DEBUG.SEARCH) Log.debug("created QEF " + qe + "; memory=" + Runtime.getRuntime().freeMemory()); final ResultSet results = qe.execSelect(); if (DEBUG.SEARCH) Log.debug("execSelect returned; memory=" + Runtime.getRuntime().freeMemory()); while (results.hasNext()) { final QuerySolution qs = results.nextSolution(); if (DEBUG.SEARCH) { final String qss = qs.toString().replaceAll("<http://vue.tufts.edu", "..."); // shorten debug output Log.debug("qSol " + String.format("%.190s%s", qss, qss.length() > 190 ? ("...x"+qss.length()) : "")); } if (false) { // debug debug all vars from query //Util.dumpIterator(qs.varNames()); Iterator<String> vn = qs.varNames(); while (vn.hasNext()) { String v = vn.next(); Log.debug("\t" + Util.tags(v) + "=" + Util.tags(qs.get(v))); } } try { resultSet.add(new URI(qs.getResource("rid").getURI())); } catch (Throwable t) { Log.warn("handling QuerySolution " + qs, t); } } qe.close(); return resultSet; } // This was tried only by searchAllValues: It appears to originally have been to // ignore certain keys, or maybe even values, but I can't see how it ever worked. // ACTUALLY, it might have been to ignore jena Resouce id's in the results, as at one // time I can see that was a field added to the index, tho it would have eliminated // other text values that happened to have '#' in them. // try { // if (substring != NO_KEYWORD) { // // What the hell does this code do?? It's trying to examine the VALUE for // // what looks like KEYWORD transformations... // //final String fullKeyword = qs.getLiteral("keyword").toString(); // final String fullKeyword = qs.getLiteral("val").toString(); // // What cases is this code filtering out the result from? // final int slashLocation = fullKeyword.indexOf("#"); // final int keywordLocation = fullKeyword.toString().toLowerCase(). // lastIndexOf(substring.toString().toLowerCase()); // if (keywordLocation <= slashLocation) { // // ONE ACTUAL RESULT: IF WE ENCOUNTER ANY HIT VALUE STRINGS WITH '#' IN THEM, // // A HIT IN THE STRING *BEFORE* THE '#" IS IGNORED! Can't be right... // Log.info("MYSTERIOUS FILTERING HAS OCCURRED on \"keyword\" " + Util.tags(fullKeyword), new Throwable("HERE")); // continue; // } // // if (keywordLocation > slashLocation) // // resultSet.add(new URI(qs.getResource("resource").toString())); // } // //if (DEBUG.SEARCH) Log.debug("getURI: " + Util.tags(qs.getResource("rid").getURI())); // resultSet.add(new URI(qs.getResource("rid").getURI())); // //resultSet.add(new URI(qs.getResource("resource").toString())); // } catch (Throwable t) { // Log.warn("handling QuerySolution " + qs, t); // } //private static final String NO_KEYWORD = "<no-keyword>"; // private Collection<URI> searchWithQueryString(String queryString) { // return searchWithSPARQL(NO_KEYWORD, queryString); // } /** * Note this does a search using *our* simple query object: edu.tufts.vue.rdf.Query, NOT * a com.hp.hpl.jena.query.Query. Our Query object is really a SPARQL builder. */ public Collection<URI> search(edu.tufts.vue.rdf.Query sparql_builder) { if (DEBUG.SEARCH) Log.debug("search; query=" + Util.tags(sparql_builder)); final String sparqlQuery = sparql_builder.createSPARQLQuery(); return searchWithSPARQL(sparqlQuery); } // private static final String DefaultVueQuery = // "PREFIX vue: <"+VUE_ONTOLOGY+"> SELECT ?resource ?keyword WHERE { ?resource ?x ?keyword }"; /** General search: search all values, ignoring keywords (a.k.a: search the values for every and any key) */ public Collection<URI> searchAllValues(String substring) { if (DEBUG.SEARCH || DEBUG.RDF) Log.debug("searchAllValues: " + Util.tags(substring)); // Newlines for diagnostic readability // This would be a the same as what we'd get from a single-criteria edu.tufts.vue.rdf.Query that had // not specific property it was looking for (just a value), if Query supported that. final String genericSubstringQuery = "PREFIX vue: <"+VUE_ONTOLOGY+">\n SELECT ?rid ?key ?val WHERE {\n\t?rid ?key ?val FILTER regex(?val, \""+substring+ "\", \"i\")\n}"; // Note: the WHERE cause is what assigns local variable names to WHATEVER is in the RDF store // tuples -- e.g., we have the Jena Resource URI at position 0, the keyword/Jena Property at position 1, and // the value at position 2. I suspect that's how it's always ordered in the jena model. // Note that we can order the results in the QuerySolution by differing the order // in the SELECT from the order in the WHERE. //if (DEBUG.SEARCH) Log.debug("searchAllResources " + Util.tags(keyword) + " " + queryString); //return searchWithSPARQL(substring, genericSubstringQuery); return searchWithSPARQL(genericSubstringQuery); } // public void save() { } // public void read() { } private static final boolean RDFIZE_COLOR = VueResources.getBool("rdf.rdfize.color"); /** * Extract relevant data of interest from the given VUE component, loading it into a new * jena.rdf.model.Resource, and add that to the RDF property tree as a child of the given * mapRootResource */ private void load_VUE_component_to_RDF_index( final tufts.vue.LWComponent component, final com.hp.hpl.jena.rdf.model.Resource mapRootResource, final boolean includeNodeData) { final URI uri = component.getURI(); final com.hp.hpl.jena.rdf.model.Resource r = this.createResource(uri.toString()); // We need to be able to look up the RDF URI result later to get back to the LWComponent. // Note it might be faster to keep a master index of all LWComponents created by sequential // ID (as this is a runtime need only) and then we wouldn't have create this map each time, // and we could look them up via the perfect hash (an array index). vueComponentMap.put(uri, component); if (includeNodeData) { //------------------------------------------------------------------ // Load Node info (label, notes, etc) plus any tufts.vue.Resource // meta-data to the index. //------------------------------------------------------------------ if (component.hasLabel()) addProperty(r, labelOf, component.getLabel()); if (component.hasNotes()) addProperty(r, notesOf, component.getNotes()); if (RDFIZE_COLOR && component.getXMLfillColor() != null) addProperty(r, colorOf, component.getXMLfillColor()); final tufts.vue.Resource res = component.getResource(); if (res != null) { String osidNS = null; if (INDEX_VUE_RESOURCE_KEYWORDS) { final String osidProp = res.getProperty("@osid.impl"); String osid = osidProp; if (osid != null) { if (osid.endsWith(".Repository")) { osid = osid.substring(0, osid.length() - 11); osidNS = VUE_OSID_PREFIX + osid.substring(osid.lastIndexOf('.')+1) + "#"; } else { // note: could use provider id... probably even more unique than impl class Log.warn("Couldn't understand @osid.impl: " + Util.tags(osidProp)); } // Need to look into what'd be most appropriate here: // ?E.g., http://vue.tufts.edu/osid/nytimes#title // ?E.g., http://vue.tufts.edu/osid/nytimes/vue.rdfs#title } if (osidNS == null) { // Not exactly correct: these tufts.vue.Resources may be generic web URL's or // or local-file resources, and their data is unrelated to the OSID's osidNS = VUE_OSID_UNKNOWN_NAMESPACE; } } for (Map.Entry e : res.getProperties().entries()) { final Object key = e.getKey(); if (tufts.vue.Resource.isInternalPropertyKey(key.toString())) { // todo: would be better to keep these props in a differnet map // and not have to filter them out everywhere. continue; } final Object value = e.getValue(); if (value != null) { final String strValue = value.toString(); if (strValue == null || strValue.length() <= 0) { // as all of these properties will have the same key, there's // no point in allowing us to check for presence of an empty value continue; } if (INDEX_VUE_RESOURCE_KEYWORDS) { //addProperty(r, createProperty(VUE_ONTOLOGY, key.toString()), strValue); // This will make sure a separate property exists for every resource property // key found, which would enable us to search on specific resource properties. // (E.g., author, creator, etc). if (!tufts.vue.Resource.isHiddenPropertyKey(key.toString())) { addProperty(r, createProperty(osidNS, key.toString()), strValue); } } else { // Using contentPropertyOf means that all resource properties have the same keyword name, // and thus we can never do a searchs such as "author=bob" amongst resource properties... addProperty(r, contentPropertyOf, strValue); } } } } } if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("processing " + component); addStatement(createStatement(r, childOf, mapRootResource)); final List<VueMetadataElement> metadata = component.getMetadataList().getMetadata(); for (VueMetadataElement vme : metadata) { if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("scan " + vme); final String key = vme.getKey(); final String value = vme.getValue(); if (key == VueTermOntologyNone) { // also check #TAG so we don't have to encode that? // This is just an optimization, so checking object identity is okay (should be used that way) if (value != null && value.length() > 0) { // Optimization: this being a "none" term (no keyword), don't bother with empty values addStatement(createStatement(r, _propertyNone, value)); } // todo: add special property for #source as well, or refactor everything so // that such relationships are pre-established and need no detecting (e.g., smarter VME) } else if (key == null) { Log.warn(r + ": null key: " + vme); } else if (key.length() == 0) { Log.warn(r + ": empty key: " + vme); } else if (value == null) { Log.warn(r + ": null value: " + vme); } else { // if (value.length() == 0) { // // may want these if we allow searching just for the presence of key // if (DEBUG.Enabled) Log.debug(r + ": indexing empty value: " + vme); // } // Note that we do NOT want to skip empty values, as we have a non-empty key, and a search for // a key with an empty value might be a valid search type someday (if we had the UI to support // it). // todo: kind of waste to create/fetch these jena Property instances constantly: if we keep // this RDF indexing, someday we could just go ahead and put the RDF property right in the VME // object. (And pre-encode all keys and/or only allow encoded keys in all meta-data // data-structures) Note that in current superclass impl, createProperty will return the // existing Property object if the name matches. final String encodedKey = getEncodedKey(key); addStatement(createStatement(r, getPropertyFromKey(encodedKey), value)); } } } // final List<VueMetadataElement> metadata = component.getMetadataList().getMetadata(); // for (VueMetadataElement vme : metadata) { // if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("scan " + vme); // if (true || vme.getObject() != null) { // // BAD SEMANTICS: we check "getObject" then just go ahead and check key & value? // final String key = vme.getKey(); // final String strValue = vme.getValue(); // if (vme.getKey() == VueTermOntologyNone) { // also check #TAG so we don't have to encode that? // // This is just an optimization, so checking object identity is okay (should be used that way) // if (strValue != null && strValue.length() > 0) { // // Optimization: this being a "none" term (no keyword), don't bother with empty values // addStatement(createStatement(r, _propertyNone, strValue)); // } // } else { // // todo: kind of waste to create/fetch these constantly: if we keep this RDF indexing, // // someday we could just go ahead and put the RDF property right in the VME object. // // (And pre-encode all keys and/or only allow encoded keys in all meta-data // // data-structures) Note that in current superclass impl, createProperty will // // return the existing Property object if the name matches. // // Note that we do NOT want to skip empty values, as we have a non-empty key, and a // // search for a key with an empty value might be a valid search type someday (if we had // // the UI to support it). // final String encodedKey = getEncodedKey(key); // addStatement(createStatement(r, getPropertyFromKey(encodedKey), strValue)); // } // } else { // Log.warn(r + ": null object: " + vme); // used to warn "no statement" -- why ignore key/value if no object in VME? // } // //statement = this.createStatement(r,createPropertyFromKey(element.getKey()),element.getObject().toString()); // //addStatement(statement); // } public void addStatement(com.hp.hpl.jena.rdf.model.Statement statement) { if (size() < MAX_SIZE) { if (DEBUG.SEARCH && DEBUG.META) Log.debug("addStatement: " + statement); super.add(statement); } else { throw new RuntimeException("Size of index: "+size()+ " exceeds MAX_SIZE: "+MAX_SIZE); } } public void addProperty(com.hp.hpl.jena.rdf.model.Resource r, com.hp.hpl.jena.rdf.model.Property p,String value) { if (size() <MAX_SIZE) { r.addProperty(p, value); } else { throw new RuntimeException("Size of index: "+size()+ " exceeds MAX_SIZE: "+MAX_SIZE); } } public static String getUniqueId() { return Constants.RESOURCE_URL+edu.tufts.vue.util.GUID.generate(); // This might better be called something else, but leaving for now just in case of any // backward compatability issue. These are permanently persisted in save files, // tho at the moment there's really no point in doing so. // Constants.NODE_URL+edu.tufts.vue.util.GUID.generate(); } public static RDFIndex getDefaultIndex() { if(defaultIndex == null) { return createDefaultIndex(); } else { return defaultIndex; } } // A concept of auto-indexing was never implemented -- SMF 2012-06-25 // public void startAutoIndexing() { // isAutoIndexing = true; // } // public void stopAutoIndexing() { // isAutoIndexing = false; // } // don't see this called anywhere... public void regenerate() { Log.info("regenerate: before Indexing size: "+size()); List stmtList = listStatements().toList(); for(int i = 0;i<stmtList.size();i++) { com.hp.hpl.jena.rdf.model.Statement statementI = (com.hp.hpl.jena.rdf.model.Statement)stmtList.get(i); for(int j = i+1;j<stmtList.size();j++) { com.hp.hpl.jena.rdf.model.Statement statementJ = (com.hp.hpl.jena.rdf.model.Statement) stmtList.get(j); if(compareStatements(statementI,statementJ)) { remove(statementJ); } } } Log.info("regenerate: after Indexing size: "+size()); } // don't see this called anywhere except above... private boolean compareStatements(com.hp.hpl.jena.rdf.model.Statement stmt1,com.hp.hpl.jena.rdf.model.Statement stmt2) { if (!stmt1.getSubject().toString().equals(stmt2.getSubject().toString())) return false; else if (!stmt1.getObject().toString().equals(stmt2.getObject().toString())) return false; else if (!stmt1.getPredicate().toString().equals(stmt2.getPredicate().toString())) return false; else return true; } /** This is overriden to add debugging only */ @Override public com.hp.hpl.jena.rdf.model.Property createProperty(final String nameSpace, final String localName) { final com.hp.hpl.jena.rdf.model.Property property = super.createProperty(nameSpace, localName); if (DEBUG.SEARCH && DEBUG.RDF) { final String propName; if (property instanceof com.hp.hpl.jena.rdf.model.impl.PropertyImpl) propName = "PropertyImpl"; else propName = property.getClass().getName(); Log.debug("createProperty " + Util.tags(nameSpace) + String.format("+%-18s= %s@%08X[%s]", Util.tags(localName), // note need extra padding for escape codes here: propName, System.identityHashCode(property), property.toString())); } return property; } // /** Auto-create the property if not already there -- [not possible: super.createProperty already does this] */ // public com.hp.hpl.jena.rdf.model.Property findProperty(final String nameSpace, final String localName) { // final com.hp.hpl.jena.rdf.model.Property property = super.getProperty(nameSpace, localName); // if (property == null) // return createProperty(nameSpace, localName); // else // return property; // } /** Get/create a property. If the key isn't domain/ns qualified, default to the VUE_ONTOLOGY name space */ public com.hp.hpl.jena.rdf.model.Property getPropertyFromKey(String key) { //if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("createPropertyFromKey " + Util.tags(key)); final com.hp.hpl.jena.rdf.model.Property p; final String words[] = key.split(ONT_SEPARATOR); if (words.length == 1) { p = createProperty(VUE_ONTOLOGY, key); } else if (words.length < 1) { // Is this case even possible? Empty key? throw new RuntimeException("createPropertyFromKey: The key format is wrong. key - "+key); } else { // Note this means anything after a *second* '#' in the string will be entirely ignored final String nameSpace = words[0] + ONT_SEPARATOR; p = createProperty(nameSpace, words[1]); } //if (DEBUG.SEARCH) Log.debug("created jena property " + Util.tags(p)); return p; } /** Note that while VUE.java currently has code to call getDefaultIdex, it's never used. * This attemps to read a defeault index file. SMF 2012-06-25 */ private static RDFIndex createDefaultIndex() { defaultIndex = new RDFIndex(com.hp.hpl.jena.graph.Factory.createGraphMem()); try { File indexFile = new File(INDEX_FILE); if(indexFile.exists()) { defaultIndex.read(new FileReader(indexFile),Constants.RESOURCE_URL); } } catch(Throwable t) { t.printStackTrace(); } return defaultIndex; } /** * E.g., lets say a key has the name "Clubhouse Name" -- this will turn it into "Clubhouse+Name" * or "Total Time Online (mm)" to "Total+Time+Online+%28mm%29" * * NOTE: tho in the second case, jena will let us create a Property at runtime with that name, * but if we attempt to export it, we get com.hp.hpl.jena.shared.InvalidPropertyURIException at * com.hp.hpl.jena.xmloutput.impl.BaseXMLWriter.splitTag(BaseXMLWriter.java:345), so apparently * URLencoder isn't a full-proof encoding strategy. Oh, and also, "Clubhouse+Name" is converted * to prefix declaration that includes "...#Clubhouse+", and then just "Name" is used when * declaring the property value. * * Any qualifying namespace prefix, if present, is left untouched. */ public static String getEncodedKey(final String key) { String encodedKey = key; try { final int ontEnd = key.indexOf(ONT_SEPARATOR); if (ontEnd != -1) { final String prefix = key.substring(0, ontEnd + 1); final String name = key.substring(ontEnd + 1, key.length()); final String encodedName = java.net.URLEncoder.encode(name, "UTF-8"); if (name != encodedName) { encodedKey = prefix + encodedName; if (DEBUG.RDF) Log.debug("encoded " + Util.tags(key) + " to " + Util.tags(encodedKey)); } } else { encodedKey = java.net.URLEncoder.encode(key, "UTF-8"); if (DEBUG.RDF && encodedKey != key) Log.debug("encoded " + Util.tags(key) + " to " + Util.tags(encodedKey)); } } catch (java.io.UnsupportedEncodingException e) { Log.warn("encoding key " + Util.tags(key), e); } return encodedKey; } }