RDFIndex.java example

Explorer
VUE-master
- VUE2
/*
* Copyright 2003-2010 Tufts University  Licensed under the
 * Educational Community License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may
 * obtain a copy of the License at
 * 
 * http://www.osedu.org/licenses/ECL-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an "AS IS"
 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package edu.tufts.vue.rdf;

import java.util.*;
import java.io.*;
import java.net.*;

import tufts.vue.DEBUG;
import tufts.vue.LWComponent;
import tufts.vue.LWPathway;
import tufts.vue.LWMap;
import tufts.vue.LWSlide;
import tufts.vue.VueResources;
import tufts.Util;
import tufts.vue.VueUtil;
import edu.tufts.vue.metadata.*;

import edu.tufts.vue.ontology.*;
import edu.tufts.vue.metadata.*;
import com.hp.hpl.jena.rdf.model.impl.*;
import com.hp.hpl.jena.sparql.core.*;
import com.hp.hpl.jena.graph.*;

/** "ARQ - A query engine for Jena, implementing SPARQL" */
import com.hp.hpl.jena.query.*;

/**
 *
 * RDFIndex.java
 *
 * RDFIndex mainly makes use of Apache Jena for VUE map searching.  This RDF approach is overkill for
 * VUE's current search needs, but originally began as an effort to create larger, peristent, multi-map
 * indicies.  That effort was not completed.  The resulting implementation that uses this
 * class creates a new RDFIndex for every new search (over one map or multiple maps), and then runs
 * the search on that fresh index.
 *
 * We do get one very nice feature for free -- the ability to write out a full .rdf file containing
 * a set of map data, although the current impl below tosses out the the key names from OSID meta-data
 * -- just the values are written out, all called "property".
 *
 * Makes use of Apache Jena / Jena ARQ (SPARQL query engine)
 * -- todo: update that lib to latest version...
 *
 * If, long-term, we really want to keep using the RDF indexing / searching, we could
 * go ahead and just build the VUE meta-data system itself out of RDF properties.
 *
 * -- Scott Fraize 2012-July
 *
 * @author akumar03
 * @author Daniel J. Heller
 *
 */

// todo: could create domain for every repository based on class name impl.
// (As the fields will in fact vary based on the impl) -- e.g:
// for edu.tufts.osidimpl.repository.nytimes.Repository:
// http://edu/tufts/osidimpl/repository/nytimes#Creator, etc.

public class RDFIndex extends com.hp.hpl.jena.rdf.model.impl.ModelCom
{
    private static final org.apache.log4j.Logger Log = org.apache.log4j.Logger.getLogger(RDFIndex.class);

    public static final int MAX_SIZE = VueResources.getInt("rdf.index.size");
    //public static final boolean AUTO_INDEX= VueResources.getBool("rdf.index.auto"); // never implememnted
    public static final String INDEX_FILE = VueUtil.getDefaultUserFolder()+File.separator+VueResources.getString("rdf.index.file");
    public static final String ONT_SEPARATOR = "#";
    public static final char ONT_SEPARATOR_CHAR = '#';
    /** name-space used for all VUE node fields (e.g., label,notes) as well as all "proper" meta-data (not tufts.vue.Resource meta-data) */
    public static final String VUE_ONTOLOGY = Constants.ONTOLOGY_URL + ONT_SEPARATOR;

    /** Include actual keyword names from tufts.vue.Resource properties as opposed to just putting all values under keyword contentPropertyOf.
     * These will be put in the a VUE_OSID namespace -- either determined from the osid, or put in a general unknown-osid namespace.
     * Todo: not exactly the right thing to do for pure web/http URLResources or local file Resources. */
    private static final boolean INDEX_VUE_RESOURCE_KEYWORDS = DEBUG.TEST;
    
    // The naming convetion here is that a "namespace" has the # at the end, but a "prefix" does not.
    // E.g., a namespace is ready to just have a label/keyword appended, and a prefix might have further
    // URL path depth added to it first.  E.g.: http://vue.tufts.edu/vue.rdfs
    private static final String VUE_GENERAL_NAMESPACE = VueResources.getString("metadata.vue.url") + ONT_SEPARATOR;
    private static final String VUE_OSID_PREFIX = "http://vue.tufts.edu/osid/"; // is adding to the path this way a reasonable RDF convention?
    private static final String VUE_OSID_UNKNOWN_NAMESPACE = VUE_OSID_PREFIX + "#";

    /** i.e.: http://vue.tufts.edu/vue.rdfs#none */
    public static final String VueTermOntologyNone = VUE_GENERAL_NAMESPACE + "none";

    static {
        // Log.info(Util.tags(VueResources.getString("metadata.dublincore.url")) + " <- metadata.dublincore.url"); // Yes, all url's are being munged.
        // Log.info(Util.tags(Constants.ONTOLOGY_URL) + " <- Constants.ONTOLOGY_URL");
        Log.info(Util.tags(VUE_ONTOLOGY) + " <- VUE_ONTOLOGY");
        Log.info(Util.tags(VUE_GENERAL_NAMESPACE) + " <- VUE_GENERAL_NAMESPACE");
        Log.info(Util.tags(VueTermOntologyNone) + " <- ONTOLOGY_NONE " + Util.tag(VueTermOntologyNone));
    }

    /** If true, index slides and slide content.  Currently, all slide content is filtered out on the result side anyway. */
    private static final boolean INDEX_SLIDES = false;
    
    final com.hp.hpl.jena.rdf.model.Property _propertyNone = createProperty(VUE_GENERAL_NAMESPACE, "none");

    final com.hp.hpl.jena.rdf.model.Property idOf       = createProperty(VUE_ONTOLOGY, Constants.ID);
    final com.hp.hpl.jena.rdf.model.Property labelOf    = createProperty(VUE_ONTOLOGY, Constants.LABEL);
    final com.hp.hpl.jena.rdf.model.Property childOf    = createProperty(VUE_ONTOLOGY, Constants.CHILD);
    final com.hp.hpl.jena.rdf.model.Property authorOf   = createProperty(VUE_ONTOLOGY, Constants.AUTHOR);
    final com.hp.hpl.jena.rdf.model.Property colorOf    = createProperty(VUE_ONTOLOGY, Constants.COLOR);
    final com.hp.hpl.jena.rdf.model.Property notesOf    = createProperty(VUE_ONTOLOGY, Constants.NOTES);

    final com.hp.hpl.jena.rdf.model.Property contentPropertyOf = createProperty(VUE_ONTOLOGY,Constants.CONTENT_INFO_PROPERTY);
    // final com.hp.hpl.jena.rdf.model.Property hasTag = createProperty(VUE_ONTOLOGY,Constants.TAG); // Unused: shows up as Text

    /** will contain the URI's of everything indexed in the map, unless this is a global index,
     * in which case it will contain the merged URI's from all maps */
    private final Map<URI,LWComponent> vueComponentMap = new HashMap();

    /** a general default index -- available but unused feature up through summer 2012 */
    private static RDFIndex defaultIndex;
    
    public RDFIndex(com.hp.hpl.jena.graph.Graph base) {
        super(base);
        Log.info("instanced from " + tufts.Util.tags(base)); // are we using this case?
    }

    public RDFIndex() {
        //super(com.hp.hpl.jena.graph.Factory.createDefaultGraph()); // createGraphMem for performance?
        super(com.hp.hpl.jena.graph.Factory.createGraphMem());
    }

    /** transpose URI results to LWComponents using our internal mapping to vue components
     * Note that this will NOT remove duplicates from the input results: that should already have been done */
    public Collection<LWComponent> decodeVueResults(Collection<URI> results) {
        final Collection<LWComponent> hits = new ArrayList<LWComponent>(results.size()+2);
        for (URI uri : results) {
            final LWComponent c = vueComponentMap.get(uri);
            if (c != null)
                hits.add(c);
            else
                Log.error("*** Internal error: couldn't find URI in results-index: " + uri + "; " + Util.tags(vueComponentMap));
        }
        return hits;
    }
                                              
    /** for creating multi-map / global indicies */
    public void addMapIndex(RDFIndex mapIndex) {
        vueComponentMap.putAll(mapIndex.vueComponentMap);
        super.add(mapIndex);
    }
    
    public void indexMap(LWMap map) {
        //this.index.remove(this.index); // I presume this is a means of just clearing the entire index?
        vueComponentMap.clear();
        removeAll();
        indexAdd(map);
    }
    
    public void indexAdd(LWMap map) {
        indexAdd(map, false, false);
    }
    
    // Todo: could change to take a LWComponent as the focal, tho creating and index for the tiny amount
    // of content on a slide is a bit overkill -- only do if easier than engaging our other DataTree
    // based search mechanism.
    public void indexAdd(final LWMap map, final boolean metadataOnly, final boolean searchEverything_IS_IGNORED)
    {
        if (DEBUG.Enabled && searchEverything_IS_IGNORED) Log.debug("Note: \"search-everything\" bit is now ignored.");
        // If we want slide content, change default index to always index everything -- will hardly make
        // a difference just adding slides, and they can be (are currently always) optionally filtered out later anyway.
        
        if (DEBUG.RDF) Log.debug("indexAdd: begin; freeMem=: "+Runtime.getRuntime().freeMemory());
        
        final com.hp.hpl.jena.rdf.model.Resource mapRoot = this.createResource(map.getURI().toString());
        
        if (DEBUG.RDF) Log.debug("index: create resource for map; freeMem=: "+Runtime.getRuntime().freeMemory());
        
        try {
            addProperty(mapRoot, idOf, map.getID());
            addProperty(mapRoot, authorOf, System.getProperty("user.name"));
            if (map.hasLabel())
                addProperty(mapRoot, labelOf,map.getLabel());

            if (DEBUG.RDF) Log.debug("index: added properties for map; freeMem="+Runtime.getRuntime().freeMemory());
            
            final Collection<LWComponent> searchSet = map.getAllDescendents();

            // final Collection<LWComponent> searchSet;
            // // We always filter out slide content anyway, so no point in allowing it un index
            // // Note that we really ought to search from the current viewer focal tho, so,
            // // if user was looking at a single slide with lots of content, the search
            // // bar would still do something meaninful.
            // if (searchEverything) {
            //     // E.g., this will search everything incuding Slides, and even the MasterSlide (which is a bug)
            //     // THIS IS A PROBLEM IN THAT A PARAMETERIZED INDEX IS NO LONGER CACHEABLE!
            //     searchSet = map.getAllDescendents(LWComponent.ChildKind.ANY);
            // } else {
            //     searchSet = map.getAllDescendents();
            // }
            
            for (LWComponent c : searchSet) {
                if (c instanceof LWPathway || c instanceof LWMap.Layer)
                    continue;
                if (!INDEX_SLIDES && c instanceof LWSlide)
                    continue;
                
                try {
                    load_VUE_component_to_RDF_index(c, mapRoot, !metadataOnly);
                } catch (Throwable t) {
                    Log.warn("indexing VUE component " + c, t);
                }

                if (size() > MAX_SIZE) {
                    Log.warn("Maximum fail-safe search capacity reached: not all nodes will be searchable. (See property rdf.index.size)");
                    break;
                }
            }    
            
            if (DEBUG.RDF) Log.debug("index: after indexing all components; freeMem="+Runtime.getRuntime().freeMemory());
            
        } catch(Exception ex) {
            Log.error("index", ex);
        }
        if(DEBUG.RDF) Log.debug("index: done -- size="+this.size());
    }

    
    public Collection<URI> searchWithSPARQL(final String queryString)
    {
        //if (DEBUG.SEARCH) Log.debug("SEARCH;  substring=" + Util.tags(substring) + " queryString:\n" + Util.tags(queryString));
        if (DEBUG.SEARCH) Log.debug("searchWithSPARQL; queryString:\n" + Util.tags(queryString));
        
        final Collection<URI> resultSet = new ArrayList<URI>();
        final com.hp.hpl.jena.query.Query query = QueryFactory.create(queryString);
        
        if (DEBUG.SEARCH) Log.debug("QF created " + Util.tag(query)
                                    + "; memory=" + Runtime.getRuntime().freeMemory()
                                    + "\n" + query.toString().trim().replaceAll("\n\n", "\n"));

        final QueryExecution qe = QueryExecutionFactory.create(query, this); // 2nd arg is for Model or for FileManager?
        if (DEBUG.SEARCH) Log.debug("created QEF " + qe + "; memory=" + Runtime.getRuntime().freeMemory());

        final ResultSet results = qe.execSelect();
        if (DEBUG.SEARCH) Log.debug("execSelect returned; memory=" + Runtime.getRuntime().freeMemory());
        
        while (results.hasNext())  {
            final QuerySolution qs = results.nextSolution();
            if (DEBUG.SEARCH) {
                final String qss = qs.toString().replaceAll("<http://vue.tufts.edu", "..."); // shorten debug output
                Log.debug("qSol " + String.format("%.190s%s", qss, qss.length() > 190 ? ("...x"+qss.length()) : ""));
            }
            if (false) {
                // debug debug all vars from query
                //Util.dumpIterator(qs.varNames());
                Iterator<String> vn = qs.varNames(); 
                while (vn.hasNext()) {
                    String v = vn.next();
                    Log.debug("\t" + Util.tags(v) + "=" + Util.tags(qs.get(v)));
                }
            }
            try {
                resultSet.add(new URI(qs.getResource("rid").getURI()));
            } catch (Throwable t) {
                Log.warn("handling QuerySolution " + qs, t);
            }
        }
        qe.close();
        return resultSet;
    }
                
            // This was tried only by searchAllValues: It appears to originally have been to
            // ignore certain keys, or maybe even values, but I can't see how it ever worked.
            // ACTUALLY, it might have been to ignore jena Resouce id's in the results, as at one
            // time I can see that was a field added to the index, tho it would have eliminated
            // other text values that happened to have '#' in them.
            // try {
            //     if (substring != NO_KEYWORD) {
            //         // What the hell does this code do??  It's trying to examine the VALUE for
            //         // what looks like KEYWORD transformations...
            //         //final String fullKeyword = qs.getLiteral("keyword").toString();
            //         final String fullKeyword = qs.getLiteral("val").toString();
            //         // What cases is this code filtering out the result from?
            //         final int slashLocation = fullKeyword.indexOf("#");
            //         final int keywordLocation = fullKeyword.toString().toLowerCase().
            //             lastIndexOf(substring.toString().toLowerCase());
            //         if (keywordLocation <= slashLocation) {
            //             // ONE ACTUAL RESULT: IF WE ENCOUNTER ANY HIT VALUE STRINGS WITH '#' IN THEM,
            //             // A HIT IN THE STRING *BEFORE* THE '#" IS IGNORED!  Can't be right...
            //             Log.info("MYSTERIOUS FILTERING HAS OCCURRED on \"keyword\" " + Util.tags(fullKeyword), new Throwable("HERE"));
            //             continue;
            //         }
            //         // if (keywordLocation > slashLocation)
            //         //     resultSet.add(new URI(qs.getResource("resource").toString()));
            //     }
            //     //if (DEBUG.SEARCH) Log.debug("getURI: " + Util.tags(qs.getResource("rid").getURI()));
            //     resultSet.add(new URI(qs.getResource("rid").getURI()));
            //     //resultSet.add(new URI(qs.getResource("resource").toString()));
            // } catch (Throwable t) {
            //     Log.warn("handling QuerySolution " + qs, t);
            // }
    
    //private static final String NO_KEYWORD = "<no-keyword>";
    // private Collection<URI> searchWithQueryString(String queryString) {
    //     return searchWithSPARQL(NO_KEYWORD, queryString);
    // }
    
    /**
     * Note this does a search using *our* simple query object: edu.tufts.vue.rdf.Query, NOT
     * a com.hp.hpl.jena.query.Query.  Our Query object is really a SPARQL builder.
     */
    public Collection<URI> search(edu.tufts.vue.rdf.Query sparql_builder) {
        if (DEBUG.SEARCH) Log.debug("search; query=" + Util.tags(sparql_builder));
        final String sparqlQuery = sparql_builder.createSPARQLQuery();
        return searchWithSPARQL(sparqlQuery);
    }
    
    // private static final String DefaultVueQuery =
    //       "PREFIX vue: <"+VUE_ONTOLOGY+"> SELECT ?resource ?keyword WHERE { ?resource ?x ?keyword }";
    
    /** General search: search all values, ignoring keywords (a.k.a: search the values for every and any key) */
    public Collection<URI> searchAllValues(String substring)
    {
        if (DEBUG.SEARCH || DEBUG.RDF) Log.debug("searchAllValues:   " + Util.tags(substring));

        // Newlines for diagnostic readability
        // This would be a the same as what we'd get from a single-criteria edu.tufts.vue.rdf.Query that had
        // not specific property it was looking for (just a value), if Query supported that.
        final String genericSubstringQuery =
            "PREFIX vue: <"+VUE_ONTOLOGY+">\n SELECT ?rid ?key ?val WHERE {\n\t?rid ?key ?val FILTER regex(?val, \""+substring+ "\", \"i\")\n}";

        // Note: the WHERE cause is what assigns local variable names to WHATEVER is in the RDF store
        // tuples -- e.g., we have the Jena Resource URI at position 0, the keyword/Jena Property at position 1, and
        // the value at position 2.  I suspect that's how it's always ordered in the jena model.
        // Note that we can order the results in the QuerySolution by differing the order
        // in the SELECT from the order in the WHERE.
        
        //if (DEBUG.SEARCH) Log.debug("searchAllResources " + Util.tags(keyword) + "  " + queryString);
        //return searchWithSPARQL(substring, genericSubstringQuery);
        return searchWithSPARQL(genericSubstringQuery);
    }
    
    // public void save() { }
    // public void read() { }
    
    private static final boolean RDFIZE_COLOR = VueResources.getBool("rdf.rdfize.color");
    
    /**
     * Extract relevant data of interest from the given VUE component, loading it into a new
     * jena.rdf.model.Resource, and add that to the RDF property tree as a child of the given
     * mapRootResource
     */
    private void load_VUE_component_to_RDF_index(
         final tufts.vue.LWComponent component,
         final com.hp.hpl.jena.rdf.model.Resource mapRootResource,
         final boolean includeNodeData)
    {
        final URI uri = component.getURI();
        final com.hp.hpl.jena.rdf.model.Resource r = this.createResource(uri.toString());

        // We need to be able to look up the RDF URI result later to get back to the LWComponent.
        // Note it might be faster to keep a master index of all LWComponents created by sequential
        // ID (as this is a runtime need only) and then we wouldn't have create this map each time,
        // and we could look them up via the perfect hash (an array index).
        vueComponentMap.put(uri, component);
        
        if (includeNodeData) {
            //------------------------------------------------------------------
            // Load Node info (label, notes, etc) plus any tufts.vue.Resource
            // meta-data to the index.
            //------------------------------------------------------------------
            if (component.hasLabel())
                addProperty(r, labelOf, component.getLabel());

            if (component.hasNotes())
                addProperty(r, notesOf, component.getNotes());

            if (RDFIZE_COLOR && component.getXMLfillColor() != null) 
                addProperty(r, colorOf, component.getXMLfillColor());

            final tufts.vue.Resource res = component.getResource();
            if (res != null) {
                String osidNS = null;
                
                if (INDEX_VUE_RESOURCE_KEYWORDS) {
                    final String osidProp = res.getProperty("@osid.impl");
                    String osid = osidProp;
                    if (osid != null) {
                        if (osid.endsWith(".Repository")) {
                            osid = osid.substring(0, osid.length() - 11);
                            osidNS = VUE_OSID_PREFIX + osid.substring(osid.lastIndexOf('.')+1) + "#";
                        } else {
                            // note: could use provider id...  probably even more unique than impl class
                            Log.warn("Couldn't understand @osid.impl: " + Util.tags(osidProp));
                        }
                        // Need to look into what'd be most appropriate here:
                        // ?E.g., http://vue.tufts.edu/osid/nytimes#title
                        // ?E.g., http://vue.tufts.edu/osid/nytimes/vue.rdfs#title
                    }
                    if (osidNS == null) {
                        // Not exactly correct: these tufts.vue.Resources may be generic web URL's or
                        // or local-file resources, and their data is unrelated to the OSID's
                        osidNS = VUE_OSID_UNKNOWN_NAMESPACE;
                    }
                }
                
                for (Map.Entry e : res.getProperties().entries()) {
                    final Object key = e.getKey();

                    if (tufts.vue.Resource.isInternalPropertyKey(key.toString())) {
                        // todo: would be better to keep these props in a differnet map
                        // and not have to filter them out everywhere. 
                        continue;
                    }
                    
                    final Object value = e.getValue();
                    if (value != null) {
                        final String strValue = value.toString();
                        if (strValue == null || strValue.length() <= 0) {
                            // as all of these properties will have the same key, there's
                            // no point in allowing us to check for presence of an empty value
                            continue;
                        }
                        if (INDEX_VUE_RESOURCE_KEYWORDS) {
                            //addProperty(r, createProperty(VUE_ONTOLOGY, key.toString()), strValue);
                            // This will make sure a separate property exists for every resource property
                            // key found, which would enable us to search on specific resource properties.
                            // (E.g., author, creator, etc).
                            if (!tufts.vue.Resource.isHiddenPropertyKey(key.toString())) {
                                addProperty(r, createProperty(osidNS, key.toString()), strValue);
                            }
                        } else {
                            // Using contentPropertyOf means that all resource properties have the same keyword name,
                            // and thus we can never do a searchs such as "author=bob" amongst resource properties...
                            addProperty(r, contentPropertyOf, strValue);
                        }
                    }
                }
            }
        }

        if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("processing " + component);
            
        addStatement(createStatement(r, childOf, mapRootResource));
            
        final List<VueMetadataElement> metadata = component.getMetadataList().getMetadata();
        for (VueMetadataElement vme : metadata) {
            if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("scan " + vme);
            final String key = vme.getKey();
            final String value = vme.getValue();

            if (key == VueTermOntologyNone) { // also check #TAG so we don't have to encode that?
                // This is just an optimization, so checking object identity is okay (should be used that way)
                if (value != null && value.length() > 0) {
                    // Optimization: this being a "none" term (no keyword), don't bother with empty values
                    addStatement(createStatement(r, _propertyNone, value));
                }
                // todo: add special property for #source as well, or refactor everything so
                // that such relationships are pre-established and need no detecting (e.g., smarter VME)
            }
            else if (key == null) {
                Log.warn(r + ": null key: " + vme);
            }
            else if (key.length() == 0) {
                Log.warn(r + ": empty key: " + vme);
            }
            else if (value == null) {
                Log.warn(r + ": null value: " + vme);
            }
            else {
                // if (value.length() == 0) {
                //     // may want these if we allow searching just for the presence of key
                //     if (DEBUG.Enabled) Log.debug(r + ": indexing empty value: " + vme);
                // }
                // Note that we do NOT want to skip empty values, as we have a non-empty key, and a search for
                // a key with an empty value might be a valid search type someday (if we had the UI to support
                // it).
                
                // todo: kind of waste to create/fetch these jena Property instances constantly: if we keep
                // this RDF indexing, someday we could just go ahead and put the RDF property right in the VME
                // object.  (And pre-encode all keys and/or only allow encoded keys in all meta-data
                // data-structures) Note that in current superclass impl, createProperty will return the
                // existing Property object if the name matches.
                
                final String encodedKey = getEncodedKey(key);
                addStatement(createStatement(r, getPropertyFromKey(encodedKey), value));
            }
        }
    }
    
        // final List<VueMetadataElement> metadata = component.getMetadataList().getMetadata();
        // for (VueMetadataElement vme : metadata) {
        //     if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("scan " + vme);
        //     if (true || vme.getObject() != null) {
        //         // BAD SEMANTICS: we check "getObject" then just go ahead and check key & value?
        //         final String key = vme.getKey();
        //         final String strValue = vme.getValue();
        //         if (vme.getKey() == VueTermOntologyNone) { // also check #TAG so we don't have to encode that?
        //             // This is just an optimization, so checking object identity is okay (should be used that way)
        //             if (strValue != null && strValue.length() > 0) {
        //                 // Optimization: this being a "none" term (no keyword), don't bother with empty values
        //                 addStatement(createStatement(r, _propertyNone, strValue));
        //             }
        //         } else {
        //             // todo: kind of waste to create/fetch these constantly: if we keep this RDF indexing,
        //             // someday we could just go ahead and put the RDF property right in the VME object.
        //             // (And pre-encode all keys and/or only allow encoded keys in all meta-data
        //             // data-structures)  Note that in current superclass impl, createProperty will
        //             // return the existing Property object if the name matches.
                        
        //             // Note that we do NOT want to skip empty values, as we have a non-empty key, and a
        //             // search for a key with an empty value might be a valid search type someday (if we had
        //             // the UI to support it).
                        
        //             final String encodedKey = getEncodedKey(key);
        //             addStatement(createStatement(r, getPropertyFromKey(encodedKey), strValue));
        //         }
        //     } else {
        //         Log.warn(r + ": null object: " + vme); // used to warn "no statement" -- why ignore key/value if no object in VME?
        //     }
        //     //statement = this.createStatement(r,createPropertyFromKey(element.getKey()),element.getObject().toString());
        //     //addStatement(statement);
        // }
    
    public void addStatement(com.hp.hpl.jena.rdf.model.Statement statement) {
        if (size() < MAX_SIZE) {
            if (DEBUG.SEARCH && DEBUG.META) Log.debug("addStatement: " + statement);
            super.add(statement);
        } else {
            throw new RuntimeException("Size of index: "+size()+ " exceeds MAX_SIZE: "+MAX_SIZE);
        }
    }
    
    public void addProperty(com.hp.hpl.jena.rdf.model.Resource r, com.hp.hpl.jena.rdf.model.Property p,String value)  {
        if (size() <MAX_SIZE) {
            r.addProperty(p, value);
        } else {
            throw new RuntimeException("Size of index: "+size()+ " exceeds MAX_SIZE: "+MAX_SIZE);
        }
    }
    public static String getUniqueId() {
        return Constants.RESOURCE_URL+edu.tufts.vue.util.GUID.generate();
        // This might better be called something else, but leaving for now just in case of any
        // backward compatability issue.  These are permanently persisted in save files,
        // tho at the moment there's really no point in doing so.
        // Constants.NODE_URL+edu.tufts.vue.util.GUID.generate();
    }
    
    public static RDFIndex getDefaultIndex() {
        if(defaultIndex == null) {
            return createDefaultIndex();
        } else {
            return defaultIndex;
        }
    }
    
    // A concept of auto-indexing was never implemented -- SMF 2012-06-25
    // public void startAutoIndexing() {
    //     isAutoIndexing = true;
    // }
    // public void stopAutoIndexing() {
    //     isAutoIndexing = false;
    // }
    
    // don't see this called anywhere...
    public void regenerate() {
        Log.info("regenerate: before Indexing size: "+size());
        List stmtList  = listStatements().toList();
        for(int i = 0;i<stmtList.size();i++) {
            com.hp.hpl.jena.rdf.model.Statement statementI = (com.hp.hpl.jena.rdf.model.Statement)stmtList.get(i);
            for(int j = i+1;j<stmtList.size();j++) {
                com.hp.hpl.jena.rdf.model.Statement statementJ = (com.hp.hpl.jena.rdf.model.Statement) stmtList.get(j);
                if(compareStatements(statementI,statementJ)) {
                    remove(statementJ);
                }
            }
        }
        Log.info("regenerate: after Indexing size: "+size());
    }
    // don't see this called anywhere except above...
    private boolean compareStatements(com.hp.hpl.jena.rdf.model.Statement stmt1,com.hp.hpl.jena.rdf.model.Statement stmt2) {
             if (!stmt1.getSubject().toString().equals(stmt2.getSubject().toString()))
            return false;
        else if (!stmt1.getObject().toString().equals(stmt2.getObject().toString()))
            return false;
        else if (!stmt1.getPredicate().toString().equals(stmt2.getPredicate().toString()))
            return false;
        else
            return true;
    }

    /** This is overriden to add debugging only */
    @Override public com.hp.hpl.jena.rdf.model.Property createProperty(final String nameSpace, final String localName)
    {
        final com.hp.hpl.jena.rdf.model.Property property = super.createProperty(nameSpace, localName);
        
        if (DEBUG.SEARCH && DEBUG.RDF) {
            final String propName;
            if (property instanceof com.hp.hpl.jena.rdf.model.impl.PropertyImpl)
                propName = "PropertyImpl";
            else
                propName = property.getClass().getName();
            Log.debug("createProperty " + Util.tags(nameSpace)
                      + String.format("+%-18s= %s@%08X[%s]",
                                      Util.tags(localName), // note need extra padding for escape codes here:
                                      propName,
                                      System.identityHashCode(property),
                                      property.toString()));
        }
        return property;
    }

    // /** Auto-create the property if not already there -- [not possible: super.createProperty already does this] */
    // public com.hp.hpl.jena.rdf.model.Property findProperty(final String nameSpace, final String localName) {
    //     final com.hp.hpl.jena.rdf.model.Property property = super.getProperty(nameSpace, localName);

    //     if (property == null)
    //         return createProperty(nameSpace, localName);
    //     else
    //         return property;
    // }
    
    
    /** Get/create a property.  If the key isn't domain/ns qualified, default to the VUE_ONTOLOGY name space */
    public com.hp.hpl.jena.rdf.model.Property getPropertyFromKey(String key) {
        //if (DEBUG.SEARCH && DEBUG.RDF) Log.debug("createPropertyFromKey " + Util.tags(key));
        final com.hp.hpl.jena.rdf.model.Property p;
        final String words[] = key.split(ONT_SEPARATOR);
        if (words.length == 1) {
            p = createProperty(VUE_ONTOLOGY, key);
        } else if (words.length < 1) {
            // Is this case even possible? Empty key?
            throw new RuntimeException("createPropertyFromKey: The key format is wrong. key - "+key);
        } else {
            // Note this means anything after a *second* '#' in the string will be entirely ignored
            final String nameSpace = words[0] + ONT_SEPARATOR;
            p = createProperty(nameSpace, words[1]);
        }
        //if (DEBUG.SEARCH) Log.debug("created jena property " + Util.tags(p));
        return p;
    }

    /** Note that while VUE.java currently has code to call getDefaultIdex, it's never used.
     * This attemps to read a defeault index file.  SMF 2012-06-25 */
    private static RDFIndex createDefaultIndex() {
        defaultIndex = new RDFIndex(com.hp.hpl.jena.graph.Factory.createGraphMem());
        try {
            File indexFile = new File(INDEX_FILE);
            if(indexFile.exists()) {
                defaultIndex.read(new FileReader(indexFile),Constants.RESOURCE_URL);
            }
        } catch(Throwable t) {
            t.printStackTrace();
        }
        return defaultIndex;
    }
    
    /**
     * E.g., lets say a key has the name "Clubhouse Name" -- this will turn it into "Clubhouse+Name"
     * or "Total Time Online (mm)" to "Total+Time+Online+%28mm%29"
     *
     * NOTE: tho in the second case, jena will let us create a Property at runtime with that name,
     * but if we attempt to export it, we get com.hp.hpl.jena.shared.InvalidPropertyURIException at
     * com.hp.hpl.jena.xmloutput.impl.BaseXMLWriter.splitTag(BaseXMLWriter.java:345), so apparently
     * URLencoder isn't a full-proof encoding strategy.  Oh, and also, "Clubhouse+Name" is converted
     * to prefix declaration that includes "...#Clubhouse+", and then just "Name" is used when
     * declaring the property value.
     *
     * Any qualifying namespace prefix, if present, is left untouched.
     */
    public static String getEncodedKey(final String key)
    {
        String encodedKey = key;
       
        try {
            final int ontEnd = key.indexOf(ONT_SEPARATOR);
            if (ontEnd != -1) {    
                final String prefix = key.substring(0, ontEnd + 1);
                final String name = key.substring(ontEnd + 1, key.length());
                final String encodedName = java.net.URLEncoder.encode(name, "UTF-8");
                if (name != encodedName) {
                    encodedKey = prefix + encodedName;
                    if (DEBUG.RDF) Log.debug("encoded " + Util.tags(key) + " to " + Util.tags(encodedKey));
                }
            } else {
                encodedKey = java.net.URLEncoder.encode(key, "UTF-8");
                if (DEBUG.RDF && encodedKey != key) Log.debug("encoded " + Util.tags(key) + " to " + Util.tags(encodedKey));
            }
        }
        catch (java.io.UnsupportedEncodingException e) {
            Log.warn("encoding key " + Util.tags(key), e);  
        }
        
        return encodedKey;
    }
    
    
}