/* // // Licensed to Benedikt Kämpgen under one or more contributor license // agreements. See the NOTICE file distributed with this work for // additional information regarding copyright ownership. // // Benedikt Kämpgen licenses this file to you under the Apache License, // Version 2.0 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at: // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. */ package org.olap4j.driver.olap4ld.linkeddata; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.olap4j.OlapException; import org.olap4j.Position; import org.olap4j.driver.olap4ld.Olap4ldUtil; import org.olap4j.driver.olap4ld.helper.Olap4ldLinkedDataUtil; import org.olap4j.metadata.Cube; import org.olap4j.metadata.Level; import org.olap4j.metadata.Measure; import org.openrdf.query.BooleanQuery; import org.openrdf.query.GraphQuery; import org.openrdf.query.MalformedQueryException; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.QueryLanguage; import org.openrdf.query.TupleQuery; import org.openrdf.query.TupleQueryResultHandlerException; import org.openrdf.query.Update; import org.openrdf.query.UpdateExecutionException; import org.openrdf.query.resultio.sparqlxml.SPARQLResultsXMLWriter; import org.openrdf.repository.Repository; import org.openrdf.repository.RepositoryConnection; import org.openrdf.repository.RepositoryException; import org.openrdf.repository.sail.SailRepository; import org.openrdf.rio.RDFFormat; import org.openrdf.rio.RDFHandlerException; import org.openrdf.rio.RDFParseException; import org.openrdf.rio.RDFWriter; import org.openrdf.rio.Rio; import org.openrdf.sail.memory.MemoryStore; import org.semanticweb.yars.nx.Literal; import org.semanticweb.yars.nx.Node; import org.semanticweb.yars.nx.Resource; import org.semanticweb.yars.nx.Variable; import org.semanticweb.yars.nx.parser.NxParser; /** * The EmbeddedSesameEngine manages an embedded Sesame repository (triple store) * while executing metadata or olap queries. * * @author b-kaempgen * */ public class EmbeddedSesameEngine implements LinkedDataCubesEngine { // Meta data attributes private static final String DATASOURCEDESCRIPTION = "OLAP data from the statistical Linked Data cloud."; private static final String PROVIDERNAME = "The community."; private static String URL; private static final String DATASOURCEINFO = "Data following the Linked Data principles."; private static final String TABLE_CAT = "LdCatalogSchema"; private static final String TABLE_SCHEM = "LdCatalogSchema"; public String DATASOURCENAME; public String DATASOURCEVERSION; // Each typical sparql query assumes the following prefixes. public String TYPICALPREFIXES = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> "; // Helper attributes /** * Map of locations that have been loaded into the embedded triple store. */ private HashMap<Integer, Boolean> loadedMap = new HashMap<Integer, Boolean>(); /** * The Sesame repository (triple store). Gets filled when asking for cubes. */ public Repository repo; private Integer MAX_LOAD_TRIPLE_SIZE = 1000000000; private Integer MAX_COMPLEX_CONSTRAINTS_TRIPLE_SIZE = 5000; private Integer LOADED_TRIPLE_SIZE = 0; private PhysicalOlapQueryPlan execplan; // Not needed any more since we use materialisation. // private List<List<Node>> equivalenceList; public EmbeddedSesameEngine(URL serverUrlObject, List<String> datastructuredefinitions, List<String> datasets, String databasename) throws OlapException { // We actually do not need that. URL = serverUrlObject.toString(); if (databasename.equals("EMBEDDEDSESAME")) { DATASOURCENAME = databasename; DATASOURCEVERSION = "1.0"; } initialize(); } private PhysicalOlapQueryPlan createExecplan(LogicalOlapQueryPlan queryplan) throws OlapException { /* * Currently, I distinguish: * * Drill-Across for queries with Drill-Across at the top, OLAP-to-SPARQL * queries below. * * DerivedDataset for queries with Convert-Cube or BaseCubeOp. * * OLAP-to-SPARQL for queries with OLAP-to-SPARQL queries * * I want to have Drill-Across for queries with Drill-Across at the top, * OLAP-to-SPARQL queries below, and Convert-Cube or Merge-Cubes at the * end. */ LogicalToPhysical logicaltophysical = new LogicalToPhysical(this); PhysicalOlapIterator newRoot; // Transform into physical query plan newRoot = (PhysicalOlapIterator) logicaltophysical .compile(queryplan._root); PhysicalOlapQueryPlan execplan = new PhysicalOlapQueryPlan(newRoot); return execplan; } public PhysicalOlapQueryPlan getExecplan() { return this.execplan; } private void initialize() { // This seems to hold up a lot. I hope garbage collector works. // TODO: Hopefully, we do not need to close the repo explicitly. if (this.repo != null) { try { this.repo.initialize(); // this.repo.shutDown(); } catch (RepositoryException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { try { this.repo = new SailRepository(new MemoryStore()); // With inferencing // this.repo = new SailRepository(new // ForwardChainingRDFSInferencer(new MemoryStore())); // File dataDir = new // File("/media/84F01919F0191352/Projects/2014/paper/paper-macro-modelling/experiments/NativeSesameStore/"); // // Try another store /* * See * http://rivuli-development.com/further-reading/sesame-cookbook * /loading-large-file-in-sesame-native/ for chunk-loading of * data and the NativeStore. */ // this.repo = new SailRepository(new NativeStore(dataDir)); repo.initialize(); // do something interesting with the values here... // con.close(); } catch (RepositoryException e) { // TODO Auto-generated catch block e.printStackTrace(); } } // LoadedMap loadedMap.clear(); } /** * We now implement the pre-processing pipeline that shall result in a fully * integrated database (triple store, data warehouse). (Cal, A., Calvanese, * D., Giacomo, G. De, & Lenzerini, M. (2002). Data Integration under * Integrity Constraints, 262–279.) * * @throws * @throws OlapException */ private void preload() throws OlapException { try { // Load links loadInStore(new URL( "http://people.aifb.kit.edu/bka/Public/cube_additionalRDF.rdf")); // Seems not to work // loadInStore(new URL("http://pastebin.com/raw.php?i=e1K52uhc")); String triples = "<http://lod.gesis.org/lodpilot/ALLBUS/geo.rdf#list> <http://www.w3.org/2002/07/owl#sameAs> <http://rdfdata.eionet.europa.eu/ramon/ontology/NUTSRegion>. "; // triples += // "<http://lod.gesis.org/lodpilot/ALLBUS/vocab.rdf#variable> <http://www.w3.org/2002/07/owl#sameAs> <http://ontologycentral.com/2009/01/eurostat/ns#indic_na>. "; // triples += // "<http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#list> <http://www.w3.org/2002/07/owl#sameAs> <http://estatwrap.ontologycentral.com/dsd/nama_aux_gph#cl_indic_na>. "; // triples += // "<http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#list> <http://www.w3.org/2002/07/owl#sameAs> <http://estatwrap.ontologycentral.com/dsd/nama_gdp_c#cl_indic_na>. "; triples += "<http://lod.gesis.org/lodpilot/ALLBUS/geo.rdf#00> <http://www.w3.org/2002/07/owl#sameAs> <http://estatwrap.ontologycentral.com/dic/geo#DE>."; insertTriples(triples); // First, we load everything that Data-Fu can create // loadInStore(new URL( // "http://127.0.0.1:8080/Data-Fu-Engine/data-fu/gdp_per_capita_experiment/triples")); // Then, we load everything that Data-Fu cannot create // load and validate dataset requires to load cube // URL dataset; // ---------------- // Load "GDP per capita - annual Data" ds and dsd // URL dataset = new URL( // "http://estatwrap.ontologycentral.com/id/nama_aux_gph#ds"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // Shortcut // loadInStore(new // URL("http://localhost:8080/Data-Fu-Engine/datasets/gdp_per_capita_experiment_load_cubes_nama_aux_gph_estatwrap.n3")); // # Gross Domestic Product (GDP) per capita in Purchasing Power // Standards (PPS) // dataset = new URL( // "http://olap4ld.googlecode.com/git/OLAP4LD-trunk/tests/estatwrap/tec00114_ds.rdf#ds"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // ---------------- // Load "GDP and main components - Current prices [nama_gdp_c]" ds // and dsd // dataset = new URL( // "http://estatwrap.ontologycentral.com/id/nama_gdp_c#ds"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // Shortcut // loadInStore(new // URL("http://localhost:8080/Data-Fu-Engine/datasets/gdp_per_capita_experiment_load_cubes_nama_gdp_c_estatwrap.n3")); // ---------------- // # Regional gross domestic product by NUTS 2 regions [tgs00003] // (Estatwrap) // <http://estatwrap.ontologycentral.com/id/tgs00003#ds> rdf:type // qb:DataSet. // XXX Needed? // dataset = new URL( // "http://estatwrap.ontologycentral.com/id/tgs00003#ds"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // ---------------- // # Regional gross domestic product by NUTS 2 regions [tgs00003] // (Eurostat LD) // <http://eurostat.linked-statistics.org/data/tgs00003> rdf:type // qb:DataSet. // Problem: Eurostat LD provides wrong link between dataset and dsd: // http://eurostat.linked-statistics.org/../dsd/tgs00003. Thus, dsd // and everything else cannot be crawled, properly. Solution: I // manually add the triple beforehand. // Problem: dcterms:date could not be resolved. // XXX Needed? // String triples2 = // "<http://eurostat.linked-statistics.org/data/tgs00003> <http://purl.org/linked-data/cube#structure> <http://eurostat.linked-statistics.org/dsd/tgs00003>. " // + // "<http://eurostat.linked-statistics.org/dsd/tgs00003> <http://purl.org/linked-data/cube#component> _:comp. " // + // "_:comp <http://purl.org/linked-data/cube#measure> <http://purl.org/linked-data/sdmx/2009/measure#obsValue>. " // + // "<http://purl.org/dc/terms/date> <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2000/01/rdf-schema#Literal>. "; // insertTriples(triples2); // XXX Needed? // dataset = new URL( // "http://eurostat.linked-statistics.org/data/tgs00003"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // Problem: Wrong dsd has to be removed // triples2 = // "<http://eurostat.linked-statistics.org/data/tgs00003> <http://purl.org/linked-data/cube#structure> <http://eurostat.linked-statistics.org/../dsd/tgs00003>. "; // deleteTriples(triples2); // triples2 = // "<http://eurostat.linked-statistics.org/dsd/tgs00003> <http://purl.org/linked-data/cube#component> ?comp. " // + // "?comp <http://purl.org/linked-data/cube#dimension> <http://purl.org/linked-data/sdmx/2009/measure#obsValue>. "; // String where = // "?comp <http://purl.org/linked-data/cube#dimension> <http://purl.org/linked-data/sdmx/2009/measure#obsValue>. "; // deleteTriplesWhere(triples2, where); // ---------------- // # Population on 1 January by age and sex [demo_pjan] (Estatwrap) // <http://estatwrap.ontologycentral.com/id/demo_pjan#ds> rdf:type // qb:DataSet. // Problem: demo_pjan contains errors // loadInStore(new // URL("http://localhost:8080/Data-Fu-Engine/datasets/demo_pjan_ds_v3.rdf")); // dataset = new URL( // "http://estatwrap.ontologycentral.com/id/demo_pjan#ds"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // Shortcut // loadInStore(new // URL("http://localhost:8080/Data-Fu-Engine/datasets/gdp_per_capita_experiment_load_cubes_demo_pjan_estatwrap.n3")); // ---------------- // # Population on 1 January by age and sex [demo_pjan] (Eurstat LD) // <http://eurostat.linked-statistics.org/data/demo_pjan> rdf:type // qb:DataSet. // dataset = new URL( // "http://eurostat.linked-statistics.org/data/demo_pjan"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // # Real GDP per Capita (real local currency units, various base // years) // <http://worldbank.270a.info/dataset/GDPPCKN> rdf:type qb:DataSet. // Problem: this dataset only is available in a GZIP file // loadInStore(new URL( // "http://localhost:8080/Data-Fu-Engine/datasets/GDPPCKN.rdf")); // loadInStore(new URL( // "http://worldbank.270a.info/dataset/world-bank-indicators/structure")); // dataset = new URL("http://worldbank.270a.info/dataset/GDPPCKN"); // Olap4ldUtil._log.info("Load dataset: " + dataset); // loadCube(dataset); // Olap4ldLinkedDataUtil.dumpRDF(repo, // "/media/84F01919F0191352/Projects/2014/paper/Link to paper-drill-across/Link to task-data-fu/drill-across-paper/gdp_per_capita_experiment_load_cubes.n3", // RDFFormat.NTRIPLES); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void insertTriples(String triples) { String query = "PREFIX olap4ld:<http://purl.org/olap4ld/> INSERT DATA { GRAPH <http://manually> { " + triples + " } }"; Olap4ldLinkedDataUtil.sparqlRepoUpdate(repo, query, false); } @SuppressWarnings("unused") private void deleteTriples(String triples) { String query = "PREFIX olap4ld:<http://purl.org/olap4ld/> DELETE DATA { " + triples + " }"; Olap4ldLinkedDataUtil.sparqlRepoUpdate(repo, query, false); } @SuppressWarnings("unused") private void deleteTriplesWhere(String triples, String where) { String query = "PREFIX olap4ld:<http://purl.org/olap4ld/> DELETE { " + triples + " } where { " + where + "}"; Olap4ldLinkedDataUtil.sparqlRepoUpdate(repo, query, false); } /** * Returns from String in order to retrieve information about these URIs * * * Properly access the triple store: For dsd and ds we query separate * graphs. * * @param uris * @return fromResult */ @Deprecated private String askForFrom(boolean isDsdQuery) { return ""; } public void executeSparqlConstructQuery(String constructquery) { // We assume one or two cubes, only. try { RepositoryConnection con = this.repo.getConnection(); GraphQuery graphquery = con.prepareGraphQuery( org.openrdf.query.QueryLanguage.SPARQL, constructquery); StringWriter stringout = new StringWriter(); RDFWriter w = Rio.createWriter(RDFFormat.RDFXML, stringout); graphquery.evaluate(w); String triples = stringout.toString(); if (Olap4ldUtil._isDebug) { Olap4ldUtil._log.config("Loaded triples: " + triples); } // Insert query to load triples // String insertquery = // "PREFIX olap4ld:<http://purl.org/olap4ld/> INSERT DATA { GRAPH <http://manually> { " // + triples + " } }"; // // Olap4ldUtil._log.config("SPARQL query: " + insertquery); // // Update updateQuery = con.prepareUpdate(QueryLanguage.SPARQL, // insertquery); // updateQuery.execute(); // Would not work: prolog error // ByteArrayInputStream inputstream = new // ByteArrayInputStream(w.toString().getBytes()); // UTF-8 encoding seems important InputStream stream = new ByteArrayInputStream( triples.getBytes("UTF-8")); // Add to triple store con.add(stream, "", RDFFormat.RDFXML); // Loaded really? if (Olap4ldUtil._isDebug) { String filename = "/media/84F01919F0191352/Projects/2014/paper/paper-macro-modelling/experiments/" + "dataset" + constructquery.hashCode() + "n3"; Olap4ldLinkedDataUtil.dumpRDF(repo, filename, RDFFormat.NTRIPLES); } con.close(); } catch (RepositoryException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MalformedQueryException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (QueryEvaluationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (RDFHandlerException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (RDFParseException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } /** * I think, caching some sparql results would be very useful. * * I create a map between hash value of sparql query and the Nodes. * * If the value is available, I return it. * * However, when to empty the cache? I empty the cache if I populate a new * cube. * * @param query * @param caching * (not used) * @return */ public List<Node[]> executeSparqlSelectQuery(String query, boolean caching) { Olap4ldUtil._log.config("SPARQL query: " + query); List<Node[]> myBindings = new ArrayList<Node[]>(); try { RepositoryConnection con = repo.getConnection(); ByteArrayOutputStream boas = new ByteArrayOutputStream(); // FileOutputStream fos = new // FileOutputStream("/home/benedikt/Workspaces/Git-Repositories/olap4ld/OLAP4LD-trunk/resources/result.srx"); SPARQLResultsXMLWriter sparqlWriter = new SPARQLResultsXMLWriter( boas); TupleQuery tupleQuery = con.prepareTupleQuery(QueryLanguage.SPARQL, query); tupleQuery.evaluate(sparqlWriter); ByteArrayInputStream bais = new ByteArrayInputStream( boas.toByteArray()); // String xmlwriterstreamString = // Olap4ldLinkedDataUtil.convertStreamToString(bais); // System.out.println(xmlwriterstreamString); // Transform sparql xml to nx InputStream nx = Olap4ldLinkedDataUtil.transformSparqlXmlToNx(bais); // Only if logging level accordingly if (Olap4ldUtil._isDebug) { String test2 = Olap4ldLinkedDataUtil.convertStreamToString(nx); Olap4ldUtil._log.config("NX output: " + test2); nx.reset(); } NxParser nxp = new NxParser(nx); Node[] nxx; while (nxp.hasNext()) { try { nxx = nxp.next(); myBindings.add(nxx); } catch (Exception e) { // Might happen often, therefore config only Olap4ldUtil._log .config("NxParser: Could not parse properly: " + e.getMessage()); } ; } boas.close(); con.close(); // do something interesting with the values here... // con.close(); } catch (RepositoryException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MalformedQueryException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (QueryEvaluationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (TupleQueryResultHandlerException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } return myBindings; } public boolean isLoaded(URL resource) { if (loadedMap.get(resource.toString().hashCode()) != null && loadedMap.get(resource.toString().hashCode()) == true) { Olap4ldUtil._log .info("Is loaded: "+resource.toString()+", Hash: "+resource.toString().hashCode()); return true; } else { Olap4ldUtil._log .info("Is not yet loaded: "+resource.toString()+", Hash: "+resource.toString().hashCode()); return false; } } public void setLoaded(URL resource) { Olap4ldUtil._log .info("Set loaded: "+resource.toString()+", Hash: "+resource.toString().hashCode()); loadedMap.put(resource.toString().hashCode(), true); } /** * Loads resource in store if 1) URI and location of resource not already * loaded 2) number of triples has not reached maximum. * * @param location * @throws OlapException */ private void loadInStore(URL noninformationuri) throws OlapException { RepositoryConnection con = null; try { URL informationuri = Olap4ldLinkedDataUtil.askForLocation(noninformationuri); if (isLoaded(noninformationuri) || isLoaded(informationuri)) { setLoaded(noninformationuri); setLoaded(informationuri); // Already loaded return; } // Check max loaded String query = "select (count(?s) as ?count) where {?s ?p ?o}"; List<Node[]> result = executeSparqlSelectQuery(query, false); this.LOADED_TRIPLE_SIZE = new Integer(result.get(1)[0].toString()); Olap4ldUtil._log.config("Number of loaded triples before: " + this.LOADED_TRIPLE_SIZE); if (this.LOADED_TRIPLE_SIZE > this.MAX_LOAD_TRIPLE_SIZE) { Olap4ldUtil._log .warning("Warning: We have reached the maximum number of triples to load!"); throw new OlapException( "Warning: Maximum storage capacity reached! Dataset contains too many triples."); } String locationstring = informationuri.toString(); Olap4ldUtil._log.config("Load in store: " + informationuri); con = repo.getConnection(); // Would not work since we cannot ask for the file size without // downloading the file // Check size and set size to have of heap space // URLConnection urlConnection = locationurl.openConnection(); // urlConnection.connect(); // // assuming both bytes: 1) file_size is byte 2) // int file_size = urlConnection.getContentLength(); // // TODO: Apparently file size often wrong? // Olap4ldUtil._log.config("File size: " + file_size); // long memory_size = Olap4ldUtil.getFreeMemory(); // Olap4ldUtil._log.config("Current memory size: " + memory_size); // // if (file_size > memory_size) { // con.close(); // Olap4ldUtil._log.warning("Warning: File (" + location // + ") to load exceeds amount of heap space memory!"); // throw new OlapException( // "Warning: Maximum storage capacity reached! Dataset too large."); // } // Workaround certain files are not loadable // XXX: remove (only since indicator once did not work) // if (locationstring // .equals("http://worldbank.270a.info/property/indicator.rdf")) { // return; // } // Guess file format RDFFormat format = RDFFormat.forFileName(locationstring); if (format != null) { con.add(informationuri, locationstring, format); } else { // Heuristics // InputStream is; HttpURLConnection connection = (HttpURLConnection) informationuri .openConnection(); // We always try to get rdf/xml connection.setRequestProperty("Accept", "application/rdf+xml"); format = RDFFormat.RDFXML; // Not acceptable time connection.setConnectTimeout(5000); int responsecode = connection.getResponseCode(); // Not acceptable format? if (responsecode == 406) { connection.disconnect(); connection = (HttpURLConnection) informationuri.openConnection(); connection.setRequestProperty("Accept", "text/turtle"); format = RDFFormat.TURTLE; responsecode = connection.getResponseCode(); } // Error // Seems to not work, anymore, since we use Repository. if (responsecode >= 400) { // is = connection.getErrorStream(); // // BufferedReader rd = new BufferedReader( // new InputStreamReader(is)); // // String response = ""; // String line; // while ((line = rd.readLine()) != null) { // response += line; // } // Olap4ldUtil._log // .warning("Warning: URL not possible to load: " // + response); // rd.close(); // is.close(); } else { // is = connection.getInputStream(); // // BufferedReader in = new BufferedReader( // new InputStreamReader(is)); // // // Check first line // String inputLine; // // Read first line only. // // while ((inputLine = in.readLine()) != null) { // // } // inputLine = in.readLine(); // if (inputLine != null // && (inputLine // .startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") // || inputLine // .startsWith("<?xml version=\"1.0\" encoding=\"utf-8\"?>") // || inputLine // .startsWith("<"))) { // Olap4ldUtil._log // .config("Had to guess format to be RDFXML: " // + location); // format = RDFFormat.RDFXML; // // } else if (inputLine != null // && (!inputLine // .startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") // && !inputLine // .startsWith("<?xml version=\"1.0\" encoding=\"utf-8\"?>") // || inputLine // .startsWith("<"))) { // Olap4ldUtil._log // .config("Had to guess format to be Turtle: " // + location); // format = RDFFormat.TURTLE; // // con.add(location, locationstring, // // RDFFormat.TURTLE); // // } else { // Olap4ldUtil._log // .warning("Warning: URL not possible to load since input line is NULL."); // } // in.close(); // is.close(); try { InputStream inputstream = connection.getInputStream(); con.add(inputstream, locationstring, format); connection.disconnect(); } catch (RDFParseException e) { // Try to continue on next line? // int linenumber = e.getLineNumber(); // Since it happens often, we just log it in config Olap4ldUtil._log.config("RDFParseException:" + e.getMessage()); if (e.getColumnNumber() == 1) { Olap4ldUtil._log .config("RDFParseException, but try afresh."); // Try with in-built loading functionality if (format == RDFFormat.RDFXML) { con.add(informationuri, locationstring, RDFFormat.TURTLE); } else { con.add(informationuri, locationstring, RDFFormat.RDFXML); } } } } } Olap4ldUtil._log.info("Lookup on resource: " + noninformationuri); Olap4ldUtil._log.info("Its informationuri: " + informationuri); // Make sure we set it loaded setLoaded(noninformationuri); setLoaded(informationuri); // Log content only if log level accordingly if (Olap4ldUtil._isDebug) { query = "select * where {?s ?p ?o}"; Olap4ldUtil._log.config("Check loaded data (10 triples): " + query); executeSparqlSelectQuery(query, false); } con.close(); // Check max loaded query = "select (count(?s) as ?count) where {?s ?p ?o}"; result = executeSparqlSelectQuery(query, false); this.LOADED_TRIPLE_SIZE = new Integer(result.get(1)[0].toString()); Olap4ldUtil._log.info("Number of loaded triples after: " + this.LOADED_TRIPLE_SIZE); } catch (RepositoryException e) { throw new OlapException("Problem with repository: " + e.getMessage()); } catch (MalformedURLException e) { // If this happens, it is not so bad. e.printStackTrace(); } catch (IOException e) { Olap4ldUtil._log.config("ConnectException:" + e.getMessage()); e.printStackTrace(); } catch (RDFParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * We load all data for a cube. We also normalise and do integrity checks. * * @param location */ private void loadCube(URL noninformationuri) throws OlapException { try { // We crawl the data Olap4ldUtil._log .info("Run directed crawling algorithm on datasets"); long time = System.currentTimeMillis(); runDirectedCrawlingAlgorithm(noninformationuri); // Load other metadata objects? time = System.currentTimeMillis() - time; Olap4ldUtil._log .info("Load dataset: directed crawling algorithm finished in " + time + "ms."); // We need to materialise implicit information Olap4ldUtil._log.info("Run normalisation algorithm on datasets"); time = System.currentTimeMillis(); runNormalizationAlgorithm(); // Own normalization and inferencing. runOWLReasoningAlgorithm(); time = System.currentTimeMillis() - time; Olap4ldUtil._log .info("Run normalisation algorithm on dataset: finished in " + time + "ms."); // Now that we presumably have loaded all necessary // data, we check integrity constraints Olap4ldUtil._log.info("Check integrity constraints on datasets."); time = System.currentTimeMillis(); checkIntegrityConstraints(); // Own checks: RepositoryConnection con = repo.getConnection(); String prefixbindings = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> "; // Datasets should have at least one // observation String testquery = prefixbindings + "ASK { ?CUBE_NAME a qb:DataSet. FILTER NOT EXISTS { ?obs qb:dataSet ?CUBE_NAME. } }"; BooleanQuery booleanQuery = con.prepareBooleanQuery( QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { throw new OlapException( "Failed own check: Dataset should have at least one observation. "); } // XXX Possible other checks // No dimensions // No aggregation function // Code list empty // No member // Important! con.close(); time = System.currentTimeMillis() - time; Olap4ldUtil._log .info("Check integrity constraints on dataset: finished in " + time + "ms."); } catch (RepositoryException e) { throw new OlapException("Problem with repository: " + e.getMessage()); } catch (QueryEvaluationException e) { throw new OlapException("Problem with query evaluation: " + e.getMessage()); } catch (MalformedQueryException e) { throw new OlapException("Problem with malformed query: " + e.getMessage()); } } /** * Duplication strategy of deduction rules as in * http://semanticweb.org/OWLLD/#Rules are executed, but only once which may * not do full materialisation. * * @throws OlapException */ public void runOWLReasoningAlgorithm() throws OlapException { try { RepositoryConnection con = repo.getConnection(); /* * SKOS: * * Since 1) skos:topConceptOf is a sub-property of skos:inScheme. 2) * skos:topConceptOf is owl:inverseOf the property * skos:hasTopConcept 3) The rdfs:domain of skos:hasTopConcept is * the class skos:ConceptScheme.: ?conceptScheme skos:hasTopConcept * ?concept. => ?concept skos:inScheme ?conceptScheme. */ // String updateQuery = TYPICALPREFIXES // + // " INSERT { ?concept skos:inScheme ?codelist.} WHERE { ?codelist skos:hasTopConcept ?concept }; "; // Update updateQueryQuery = con.prepareUpdate( // QueryLanguage.SPARQL, updateQuery); // updateQueryQuery.execute(); // Here, subPropertyOf reasoning is done. String updateQuery = TYPICALPREFIXES + " INSERT { ?dimension rdfs:range ?range.} WHERE { ?dimension rdfs:subPropertyOf ?superdimension. ?superdimension rdfs:range ?range. }; "; Update updateQueryQuery = con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery.execute(); // Here, owl:sameAs reasoning is done. // eq-sym String updateQueryEqSym = TYPICALPREFIXES + " INSERT { ?y owl:sameAs ?x.} WHERE { ?x owl:sameAs ?y }; "; Update updateQueryQueryEqSym = con.prepareUpdate( QueryLanguage.SPARQL, updateQueryEqSym); updateQueryQueryEqSym.execute(); // eq-trans String updateQueryEqTrans = TYPICALPREFIXES + " INSERT { ?x owl:sameAs ?z . } WHERE { ?x owl:sameAs ?y . ?y owl:sameAs ?z . }; "; Update updateQueryQueryEqTrans = con.prepareUpdate( QueryLanguage.SPARQL, updateQueryEqTrans); updateQueryQueryEqTrans.execute(); // eq-rep-s String updateQueryEqRepS = TYPICALPREFIXES + " INSERT { ?s0 ?p ?o . } WHERE { ?s owl:sameAs ?s0 . ?s ?p ?o . }; "; Update updateQueryQueryEqRepS = con.prepareUpdate( QueryLanguage.SPARQL, updateQueryEqRepS); updateQueryQueryEqRepS.execute(); // eq-rep-p String updateQueryEqRepP = TYPICALPREFIXES + " INSERT { ?s ?p0 ?o . } WHERE { ?p owl:sameAs ?p0 . ?s ?p ?o . }; "; Update updateQueryQueryEqRepP = con.prepareUpdate( QueryLanguage.SPARQL, updateQueryEqRepP); updateQueryQueryEqRepP.execute(); // eq-rep-o String updateQueryEqRepO = TYPICALPREFIXES + " INSERT { ?s ?p ?o0 . } WHERE { ?o owl:sameAs ?o0 . ?s ?p ?o . }; "; Update updateQueryQueryEqRepO = con.prepareUpdate( QueryLanguage.SPARQL, updateQueryEqRepO); updateQueryQueryEqRepO.execute(); con.close(); } catch (RepositoryException e) { throw new OlapException("Problem with repository: " + e.getMessage()); } catch (MalformedQueryException e) { throw new OlapException("Problem with malformed query: " + e.getMessage()); } catch (UpdateExecutionException e) { throw new OlapException("Problem with update execution: " + e.getMessage()); } } public static List<ReconciliationCorrespondence> getReconciliationCorrespondences( boolean askForMergeCorrespondences) { List<ReconciliationCorrespondence> correspondences = new ArrayList<ReconciliationCorrespondence>(); // // MIO2EUR // List<Node[]> mio_eur2eur_inputmembers = new ArrayList<Node[]>(); // mio_eur2eur_inputmembers // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#unit"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/unit#MIO_EUR") }); // // mio_eur2eur_inputmembers.add(new Node[] { new // // Resource("http://purl.org/linked-data/sdmx/2009/measure#obsValue"), // new Variable("value1") }); // // List<Node[]> mio_eur2eur_outputmembers = new ArrayList<Node[]>(); // mio_eur2eur_outputmembers.add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#unit"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/unit#EUR") }); // mio_eur2eur_outputmembers // .add(new Node[] { // new Variable("outputcube"), // new // // Resource( // "http://purl.org/linked-data/sdmx/2009/measure#obsValue"), // new Variable("value2") }); // // String mio_eur2eur_function = "(1000000 * x)"; // // ReconciliationCorrespondence mio_eur2eur_correspondence = new // ReconciliationCorrespondence( // "MIO2EUR", mio_eur2eur_inputmembers, null, // mio_eur2eur_outputmembers, mio_eur2eur_function); // if (!askForMergeCorrespondences) { // correspondences.add(mio_eur2eur_correspondence); // } // // // COMPUTE_GDP // // List<Node[]> computegdp_inputmembers1 = new ArrayList<Node[]>(); // computegdp_inputmembers1 // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#indic_na"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/indic_na#B1G") }); // // List<Node[]> computegdp_inputmembers2 = new ArrayList<Node[]>(); // computegdp_inputmembers2 // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#indic_na"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/indic_na#D21_M_D31") }); // // List<Node[]> computegdp_outputmembers = new ArrayList<Node[]>(); // computegdp_outputmembers // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#indic_na"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/indic_na#NGDP") }); // // String computegdp_function = "(x1 + x2)"; // // ReconciliationCorrespondence computegdp_correspondence = new // ReconciliationCorrespondence( // "COMP_GDP", computegdp_inputmembers1, computegdp_inputmembers2, // computegdp_outputmembers, computegdp_function); // if (askForMergeCorrespondences) { // correspondences.add(computegdp_correspondence); // } // // // COMPUTE_GDP_PER_CAPITA // // List<Node[]> computegdppercapita_inputmembers1 = new // ArrayList<Node[]>(); // computegdppercapita_inputmembers1 // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#indic_na"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/indic_na#NGDP") }); // computegdppercapita_inputmembers1.add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#unit"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/unit#EUR") }); // // List<Node[]> computegdppercapita_inputmembers2 = new // ArrayList<Node[]>(); // computegdppercapita_inputmembers2 // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#sex"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/sex#T") }); // computegdppercapita_inputmembers2 // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#age"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/age#TOTAL") }); // // List<Node[]> computegdppercapita_outputmembers = new // ArrayList<Node[]>(); // computegdppercapita_outputmembers // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#indic_na"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/indic_na#NGDPH") }); // computegdppercapita_outputmembers // .add(new Node[] { // new Resource( // "http://ontologycentral.com/2009/01/eurostat/ns#unit"), // new Resource( // "http://estatwrap.ontologycentral.com/dic/unit#EUR_HAB") }); // // String computegdppercapita_function = "(x1 / x2)"; // // ReconciliationCorrespondence computegdppercapita_correspondence = new // ReconciliationCorrespondence( // "COMP_GDP_CAP", computegdppercapita_inputmembers1, // computegdppercapita_inputmembers2, // computegdppercapita_outputmembers, computegdppercapita_function); // if (askForMergeCorrespondences) { // correspondences.add(computegdppercapita_correspondence); // } // COMPUTE_YES // ReconciliationCorrespondence computeyes_correspondence; // List<Node[]> computeyes_inputmembers1 = new ArrayList<Node[]>(); // computeyes_inputmembers1 // .add(new Node[] { // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/vocab.rdf#variable"), // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#v590_2") }); // // List<Node[]> computeyes_inputmembers2 = new ArrayList<Node[]>(); // computeyes_inputmembers2 // .add(new Node[] { // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/vocab.rdf#variable"), // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#v590_3") }); // // List<Node[]> computeyes_outputmembers = new ArrayList<Node[]>(); // computeyes_outputmembers // .add(new Node[] { // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/vocab.rdf#variable"), // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#v590_2+3") }); // // String computeyes_function = "(x1 + x2)"; // // computeyes_correspondence = new ReconciliationCorrespondence( // "COMP_YES", computeyes_inputmembers1, computeyes_inputmembers2, // computeyes_outputmembers, computeyes_function); // // if (askForMergeCorrespondences) { // correspondences.add(computeyes_correspondence); // } // COMPUTE\_PERCENTAGENOS // ReconciliationCorrespondence computepercentagenos_correspondence; // List<Node[]> computepercentagenos_inputmembers1 = new // ArrayList<Node[]>(); // computepercentagenos_inputmembers1 // .add(new Node[] { // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/vocab.rdf#variable"), // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#v590_1") }); // // List<Node[]> computepercentagenos_inputmembers2 = new // ArrayList<Node[]>(); // computepercentagenos_inputmembers2 // .add(new Node[] { // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/vocab.rdf#variable"), // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#v590_2+3") }); // // List<Node[]> computepercentagenos_outputmembers = new // ArrayList<Node[]>(); // computepercentagenos_outputmembers // .add(new Node[] { // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/vocab.rdf#variable"), // new Resource( // "http://lod.gesis.org/lodpilot/ALLBUS/variable.rdf#v590_2+3") }); // // Not yet needed since manual drill-across: // // computepercentagenos_outputmembers // // .add(new Node[] { // // new Resource( // // "http://ontologycentral.com/2009/01/eurostat/ns#indic_na"), // // new Resource( // // "http://estatwrap.ontologycentral.com/dic/indic_na#RGDPG") }); // // String computepercentagenos_function = "(x1 / (x1 + x2))"; // // computepercentagenos_correspondence = new // ReconciliationCorrespondence( // "COMP_PERCNOS", computepercentagenos_inputmembers1, // computepercentagenos_inputmembers2, // computepercentagenos_outputmembers, // computepercentagenos_function); // // if (askForMergeCorrespondences) { // correspondences.add(computepercentagenos_correspondence); // } return correspondences; } /** * Check whether we query for "Measures". * * @param dimensionUniqueName * @param hierarchyUniqueName * @param levelUniqueName * @return */ private boolean isMeasureQueriedForExplicitly(Node dimensionUniqueName, Node hierarchyUniqueName, Node levelUniqueName) { // If one is set, it should not be Measures, not. // Watch out: no square brackets are needed. boolean explicitlyStated = (dimensionUniqueName != null && dimensionUniqueName .toString() .equals(Olap4ldLinkedDataUtil.MEASURE_DIMENSION_NAME)) || (hierarchyUniqueName != null && hierarchyUniqueName .toString().equals( Olap4ldLinkedDataUtil.MEASURE_DIMENSION_NAME)) || (levelUniqueName != null && levelUniqueName.toString() .equals(Olap4ldLinkedDataUtil.MEASURE_DIMENSION_NAME)); return explicitlyStated; } /** * * @return */ public List<Node[]> getDatabases(Restrictions restrictions) { /* * DISCOVER_DATASOURCES(new MetadataColumn("DataSourceName"), new * MetadataColumn("DataSourceDescription"), new MetadataColumn("URL"), * new MetadataColumn("DataSourceInfo"), new MetadataColumn( * "ProviderName"), new MetadataColumn("ProviderType"), new * MetadataColumn("AuthenticationMode")), */ List<Node[]> results = new ArrayList<Node[]>(); Node[] bindingNames = new Node[] { new Variable("?DATA_SOURCE_NAME"), new Variable("?DATA_SOURCE_DESCRIPTION"), new Variable("?PROVIDER_NAME"), new Variable("?URL"), new Variable("?DATA_SOURCE_INFO") }; results.add(bindingNames); Node[] triple = new Node[] { new Literal(DATASOURCENAME), new Literal(DATASOURCEDESCRIPTION), new Literal(PROVIDERNAME), new Literal(URL), new Literal(DATASOURCEINFO) }; results.add(triple); return results; } public List<Node[]> getCatalogs(Restrictions restrictions) { /* * DBSCHEMA_CATALOGS( new MetadataColumn("CATALOG_NAME"), new * MetadataColumn( "DESCRIPTION"), new MetadataColumn("ROLES"), new * MetadataColumn("DATE_MODIFIED")) */ List<Node[]> results = new ArrayList<Node[]>(); Node[] bindingNames = new Node[] { new Variable("?TABLE_CAT") }; results.add(bindingNames); Node[] triple = new Node[] { new Literal(TABLE_CAT) }; results.add(triple); return results; } /** * * @return */ public List<Node[]> getSchemas(Restrictions restrictions) { List<Node[]> results = new ArrayList<Node[]>(); /* * DBSCHEMA_SCHEMATA(new MetadataColumn( "CATALOG_NAME"), new * MetadataColumn("SCHEMA_NAME"), new MetadataColumn("SCHEMA_OWNER")) */ Node[] bindingNames = new Node[] { new Variable("?TABLE_SCHEM"), new Variable("?TABLE_CAT") }; results.add(bindingNames); Node[] triple = new Node[] { new Literal(TABLE_SCHEM), new Literal(TABLE_CAT), // No owner new Literal("") }; results.add(triple); return results; } /** * * Get Cubes from the triple store. * * Here, the restrictions are strict restrictions without patterns. * * This is both called for metadata queries and OLAP queries. * * @return Node[]{} */ public List<Node[]> getCubes(Restrictions restrictions) throws OlapException { Olap4ldUtil._log.config("Linked Data Engine: Get Cubes..."); // I once preloaded some data. // We now assume that we can also query for a global dataset identified // by a // comma separated list of datasets. For now, since we are interested in // all possible // derived datasets from a set of datasets. // Yet, we probably should either start with an MDX query or an Logical // Operator Tree // defining an information need. Although we might also just be // interested in all // possible derived datasets of a set of datasets. try { preload(); } catch (OlapException e) { // TODO Auto-generated catch block e.printStackTrace(); } List<Node[]> result = new ArrayList<Node[]>(); // Check whether Drill-across query // XXX: Wildcard delimiter if (restrictions.cubeNamePattern != null) { String[] datasets = restrictions.cubeNamePattern.toString().split( ","); Olap4ldUtil._log.info("Load dataset: " + datasets.length + " datasets crawled."); for (int i = 0; i < datasets.length; i++) { String dataset = datasets[i]; Restrictions newrestrictions = new Restrictions(); newrestrictions.cubeNamePattern = new Resource(dataset); List<Node[]> intermediaryresult = getCubesPerDataSet(newrestrictions); // Add to result boolean first = true; for (Node[] nodes : intermediaryresult) { if (first) { if (i == 0) { result.add(nodes); } first = false; continue; } // We do not want to have the single datasets returned. // We always have only one cube, the global cube. // result.add(nodes); } } } else { result = getCubesPerDataSet(restrictions); } /* * Now that we have loaded all cube, we need to implement entity * consolidation. * * We create an equivalence table. Then, for each dimension unique name, * we have one equivalence class. Then we can do as before. */ // List<Node[]> myresult = sparql(querytemplate, true); // // Add all of result2 to result // boolean first = true; // for (Node[] nodes : myresult) { // if (first) { // first = false; // continue; // } // result.add(nodes); // } // We do not do that anymore but use materialisation. // Now that we have loaded all data cubes, we can compute the // equivalence list. /* * Load equivalence statements from triple store */ // Olap4ldUtil._log.info("Load dataset: create equivalence list started."); // long time = System.currentTimeMillis(); // // List<Node[]> equivs = getEquivalenceStatements(); // // this.equivalenceList = createEquivalenceList(equivs); // // time = System.currentTimeMillis() - time; // Olap4ldUtil._log // .info("Load dataset: create equivalence list finished in " // + time + "ms."); // Now, add "virtual cube" // ?CATALOG_NAME ?SCHEMA_NAME ?CUBE_NAME ?CUBE_TYPE ?CUBE_CAPTION // ?DESCRIPTION String globalcubename = ""; if (restrictions.cubeNamePattern == null) { Map<String, Integer> cubemap = Olap4ldLinkedDataUtil .getNodeResultFields(result.get(0)); // Concatenate all cubes. boolean first = true; for (Node[] nodes : result) { if (first) { // First header; first = false; continue; } if (!globalcubename.equals("")) { globalcubename += ","; } globalcubename += nodes[cubemap.get("?CUBE_NAME")].toString(); } } else { globalcubename = restrictions.cubeNamePattern.toString(); } // XXX: The virtual cube should actually not be given to users. Users // simply issue queries over available datasets. Node[] virtualcube = new Node[] { new Literal(TABLE_CAT), new Literal(TABLE_SCHEM), new Resource(globalcubename), new Literal("CUBE"), new Literal("Global Cube"), new Literal("This is the global cube.") }; result.add(virtualcube); Olap4ldUtil._log .info("Load datasets: Number of loaded triples for all datasets: " + this.LOADED_TRIPLE_SIZE); // Check max loaded String query = "PREFIX qb: <http://purl.org/linked-data/cube#> select (count(?s) as ?count) where {?s qb:dataSet ?ds}"; List<Node[]> countobservationsresult = executeSparqlSelectQuery(query, false); Integer countobservation = new Integer( countobservationsresult.get(1)[0].toString()); Olap4ldUtil._log .info("Load datasets: Number of observations for all datasets: " + countobservation); /* * Check on restrictions that the interface makes: * * Restrictions are strong restrictions, no fuzzy, since those wild * cards have been eliminated before. */ // List<Node[]> result = applyRestrictions(cubeUris, restrictions); return result; } private List<Node[]> getCubesPerDataSet(Restrictions restrictions) throws OlapException { List<Node[]> result = new ArrayList<Node[]>(); // Before loading, I should check first, whether already loaded. URL noninformationuri; try { if (restrictions.cubeNamePattern == null) { // There is nothing to load Olap4ldUtil._log .config("If no cubeNamePattern is given, we cannot load a cube."); } else { noninformationuri = new URL( restrictions.cubeNamePattern.toString()); URL informationuri = Olap4ldLinkedDataUtil .askForLocation(noninformationuri); if (!isLoaded(noninformationuri) || !isLoaded(informationuri)) { // For now, we simply preload. loadCube(noninformationuri); } setLoaded(noninformationuri); setLoaded(informationuri); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } String additionalFilters = createFilterForRestrictions(restrictions); String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getCubes_regular.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); result = executeSparqlSelectQuery(querytemplate, true); return result; } private void runDirectedCrawlingAlgorithm(URL noninformationuri) throws OlapException { try { // If we have cube uri and location is not loaded, yet, we start // collecting all information loadInStore(noninformationuri); // For everything else: Check whether really cube RepositoryConnection con; con = repo.getConnection(); // qb:structure is more robust than a qb:DataSet. String testquery = "PREFIX qb: <http://purl.org/linked-data/cube#> ASK { ?CUBE_NAME qb:structure ?dsd. FILTER (?CUBE_NAME = <" + noninformationuri + ">)}"; BooleanQuery booleanQuery = con.prepareBooleanQuery( QueryLanguage.SPARQL, testquery); boolean isDataset = booleanQuery.evaluate(); con.close(); if (!isDataset) { throw new OlapException( "A cube should be a qb:DataSet and serve via qb:structure a qb:DataStructureDefinition, also this one " + noninformationuri + "!"); } else { // If loading ds, also load dsd. Ask for DSD URI and // load String query = "PREFIX qb: <http://purl.org/linked-data/cube#> SELECT ?dsd WHERE {<" + noninformationuri + "> qb:structure ?dsd}"; List<Node[]> dsd = executeSparqlSelectQuery(query, true); // There should be a dsd // Note in spec: // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." if (dsd.size() <= 1) { throw new OlapException( "A cube should serve a data structure definition!"); } else { // Get the second URL dsduri = new URL(dsd.get(1)[0].toString()); loadInStore(dsduri); } boolean first; // Not done. Takes too long. // // If loading ds, also load seeAlso // query = // "PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?seeAlso WHERE {<" // + uri + "> rdfs:seeAlso ?seeAlso.}"; // List<Node[]> seeAlso = sparql(query, true); // // first = true; // for (Node[] nodes : seeAlso) { // if (first) { // first = false; // continue; // } // if (nodes[0] instanceof Resource) { // URL componenturi = new URL(nodes[0].toString()); // loadInStore(componenturi); // } // } // If loading ds, also load components query = "PREFIX qb: <http://purl.org/linked-data/cube#> SELECT ?comp WHERE {<" + noninformationuri + "> qb:structure ?dsd. ?dsd qb:component ?comp.}"; List<Node[]> components = executeSparqlSelectQuery(query, true); // There should be a dsd // Note in spec: // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." first = true; for (Node[] nodes : components) { if (first) { first = false; continue; } if (nodes[0] instanceof Resource) { URL componenturi = new URL(nodes[0].toString()); loadInStore(componenturi); } } // If loading ds, also load measures query = "PREFIX qb: <http://purl.org/linked-data/cube#> SELECT ?measure WHERE {<" + noninformationuri + "> qb:structure ?dsd. ?dsd qb:component ?comp. ?comp qb:measure ?measure}"; List<Node[]> measures = executeSparqlSelectQuery(query, true); // There should be a dsd // Note in spec: // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." first = true; for (Node[] nodes : measures) { if (first) { first = false; continue; } if (nodes[0] instanceof Resource) { URL measureuri = new URL(nodes[0].toString()); loadInStore(measureuri); } } // If loading ds, also load dimensions query = "PREFIX qb: <http://purl.org/linked-data/cube#> SELECT ?dimension WHERE {<" + noninformationuri + "> qb:structure ?dsd. ?dsd qb:component ?comp. ?comp qb:dimension ?dimension}"; List<Node[]> dimensions = executeSparqlSelectQuery(query, true); // There should be a dsd // Note in spec: // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." if (dimensions.size() <= 1) { throw new OlapException("A cube should serve a dimension!"); } else { first = true; for (Node[] nodes : dimensions) { if (first) { first = false; continue; } if (nodes[0] instanceof Resource) { URL dimensionuri = new URL(nodes[0].toString()); loadInStore(dimensionuri); } } } // Extra: Not done either. // If loading dimensions, also load rdfs:subPropertyOf // query = // "PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?superdimension WHERE {<" // + uri // + // "> qb:structure ?dsd. ?dsd qb:component ?comp. ?comp qb:dimension ?dimension. ?dimension rdfs:subPropertyOf ?superdimension. }"; // List<Node[]> superdimensions = sparql(query, true); // // first = true; // for (Node[] nodes : superdimensions) { // if (first) { // first = false; // continue; // } // // if (nodes[0] instanceof Resource) { // URL dimensionuri = new URL(nodes[0].toString()); // // loadInStore(dimensionuri); // } // } // If loading ds, also load codelists query = "PREFIX qb: <http://purl.org/linked-data/cube#> SELECT ?codelist WHERE {<" + noninformationuri + "> qb:structure ?dsd. ?dsd qb:component ?comp. ?comp qb:dimension ?dimension. ?dimension qb:codeList ?codelist}"; List<Node[]> codelists = executeSparqlSelectQuery(query, true); // There should be a dsd // Note in spec: // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." if (codelists.size() <= 1) { ; } else { first = true; // So far, members are not crawled. for (Node[] nodes : codelists) { if (first) { first = false; continue; } if (nodes[0] instanceof Resource) { URL codelisturi = new URL(nodes[0].toString()); loadInStore(codelisturi); } } } // Loading members // Not done for now since takes a long time and was not done for // ISEM either. // // If loading ds, also load ranges of dimensions // query = // "PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?range WHERE {<" // + uri // + // "> qb:structure ?dsd. ?dsd qb:component ?comp. ?comp qb:dimension ?dimension. ?dimension rdfs:range ?range}"; // List<Node[]> ranges = sparql(query, true); // // There should be a dsd // // Note in spec: // // // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." // if (ranges.size() <= 1) { // ; // } else { // first = true; // // So far, members are not crawled. // for (Node[] nodes : ranges) { // if (first) { // first = false; // continue; // } // // if (nodes[0] instanceof Resource) { // URL rangesuri = new URL(nodes[0].toString()); // loadInStore(rangesuri); // } // } // } // // // If loading ds, also load dimension values (if resources) - // // done similar as for degenerated members // query = // "PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?member WHERE {<" // + uri // + // "> qb:structure ?dsd. ?dsd qb:component ?comp. ?comp qb:dimension ?dimension. ?obs qb:dataSet ?ds. ?obs ?dimension ?member}"; // List<Node[]> member = sparql(query, true); // // There should be a dsd // // Note in spec: // // // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." // if (member.size() <= 1) { // ; // } else { // first = true; // // So far, members are not crawled. // for (Node[] nodes : member) { // if (first) { // first = false; // continue; // } // // if (nodes[0] instanceof Resource) { // URL memberuri = new URL(nodes[0].toString()); // loadInStore(memberuri); // } // } // } } } catch (MalformedURLException e) { throw new OlapException("Problem with malformed url: " + e.getMessage()); } catch (OlapException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (RepositoryException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (MalformedQueryException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (QueryEvaluationException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } private void checkIntegrityConstraints() throws OlapException { // Check space for more complex integrity constraints boolean doComplexObservationIntegrityConstraints = (this.LOADED_TRIPLE_SIZE < this.MAX_COMPLEX_CONSTRAINTS_TRIPLE_SIZE); // Logging Olap4ldUtil._log.config("Run integrity constraints..."); Olap4ldUtil._log.config("including complex integrity constraints: " + doComplexObservationIntegrityConstraints + "..."); try { // Now, we check the integrity constraints RepositoryConnection con; con = repo.getConnection(); String testquery; BooleanQuery booleanQuery; boolean error = false; String overview = ""; String status = ""; // IC-1. Unique DataSet. Every qb:Observation // has exactly one associated qb:DataSet. // TODO: May take long since all observations tested // Since needs to go through all observations, only done if enough // memory if (doComplexObservationIntegrityConstraints) { testquery = TYPICALPREFIXES + "ASK { { ?obs a qb:Observation . FILTER NOT EXISTS { ?obs qb:dataSet ?dataset1 . } } UNION { ?obs a qb:Observation ; qb:dataSet ?dataset1, ?dataset2 . FILTER (?dataset1 != ?dataset2) }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-1. Unique DataSet. Every qb:Observation has exactly one associated qb:DataSet.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-1. Unique DataSet. Every qb:Observation has exactly one associated qb:DataSet.<br/>"; Olap4ldUtil._log.config(status); overview += status; } } // IC-2. Unique DSD. Every qb:DataSet has // exactly one associated // qb:DataStructureDefinition. <= tested before testquery = TYPICALPREFIXES + "ASK { { ?dataset a qb:DataSet . FILTER NOT EXISTS { ?dataset qb:structure ?dsd . } } UNION { ?dataset a qb:DataSet ; qb:structure ?dsd1, ?dsd2 . FILTER (?dsd1 != ?dsd2) }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-2. Unique DSD. Every qb:DataSet has exactly one associated qb:DataStructureDefinition. <br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-2. Unique DSD. Every qb:DataSet has exactly one associated qb:DataStructureDefinition.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-3. DSD includes measure testquery = TYPICALPREFIXES + "ASK { ?dsd a qb:DataStructureDefinition . FILTER NOT EXISTS { ?dsd qb:component [qb:componentProperty [a qb:MeasureProperty]] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-3. DSD includes measure. Every qb:DataStructureDefinition must include at least one declared measure.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-3. DSD includes measure. Every qb:DataStructureDefinition must include at least one declared measure.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-4. Dimensions have range testquery = TYPICALPREFIXES + "ASK { ?dim a qb:DimensionProperty . FILTER NOT EXISTS { ?dim rdfs:range [] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-4. Dimensions have range. Every dimension declared in a qb:DataStructureDefinition must have a declared rdfs:range.\n"; // Find out what went wrong: String query = TYPICALPREFIXES + "SELECT ?dim { ?dim a qb:DimensionProperty . FILTER NOT EXISTS { ?dim rdfs:range [] }}"; List<Node[]> errordimensions = executeSparqlSelectQuery(query, true); // There should be a dsd // Note in spec: // "Every qb:DataSet has exactly one associated qb:DataStructureDefinition." status += "Wrong dimensions: "; for (Node[] nodes : errordimensions) { // Get the second status += nodes[0].toString() + " "; } Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-4. Dimensions have range. Every dimension declared in a qb:DataStructureDefinition must have a declared rdfs:range.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-5. Concept dimensions have code lists testquery = TYPICALPREFIXES + "ASK { ?dim a qb:DimensionProperty ; rdfs:range skos:Concept . FILTER NOT EXISTS { ?dim qb:codeList [] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-5. Concept dimensions have code lists. Every dimension with range skos:Concept must have a qb:codeList. <br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-5. Concept dimensions have code lists. Every dimension with range skos:Concept must have a qb:codeList. <br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-6. Only attributes may be optional <= not // important right now. We do not regard // attributes. testquery = TYPICALPREFIXES + "ASK { ?dsd qb:component ?componentSpec . ?componentSpec qb:componentRequired \"false\"^^xsd:boolean ; qb:componentProperty ?component . FILTER NOT EXISTS { ?component a qb:AttributeProperty }} "; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-6. Only attributes may be optional. The only components of a qb:DataStructureDefinition that may be marked as optional, using qb:componentRequired are attributes. <br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-6. Only attributes may be optional. The only components of a qb:DataStructureDefinition that may be marked as optional, using qb:componentRequired are attributes.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-7. Slice Keys must be declared <= not // important right now. We do not regard slices. testquery = TYPICALPREFIXES + "ASK { ?sliceKey a qb:SliceKey . FILTER NOT EXISTS { [a qb:DataStructureDefinition] qb:sliceKey ?sliceKey }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-7. Slice Keys must be declared. Every qb:SliceKey must be associated with a qb:DataStructureDefinition.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-7. Slice Keys must be declared. Every qb:SliceKey must be associated with a qb:DataStructureDefinition.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-8. Slice Keys consistent with DSD // Spelling error in spec fixed testquery = TYPICALPREFIXES + "ASK { ?sliceKey a qb:SliceKey; qb:componentProperty ?prop . ?dsd qb:sliceKey ?sliceKey . FILTER NOT EXISTS { ?dsd qb:component [qb:componentProperty ?prop] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-8. Slice Keys consistent with DSD. Every qb:componentProperty on a qb:SliceKey must also be declared as a qb:component of the associated qb:DataStructureDefinition.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-8. Slice Keys consistent with DSD. Every qb:componentProperty on a qb:SliceKey must also be declared as a qb:component of the associated qb:DataStructureDefinition. <br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-9. Unique slice structure // Does not seem to work. Returns all slices. Therefore disabled. // String query = // "PREFIX qb: <http://purl.org/linked-data/cube#> select * where { { ?slice a qb:Slice . FILTER NOT EXISTS { ?slice qb:sliceStructure ?key } } UNION { ?slice a qb:Slice ; qb:sliceStructure ?key1, ?key2; FILTER (?key1 != ?key2) }}"; // List<Node[]> result = sparql(query, false); // testquery = TYPICALPREFIXES // + // "ASK { { ?slice a qb:Slice . FILTER NOT EXISTS { ?slice qb:sliceStructure ?key } } UNION { ?slice a qb:Slice ; qb:sliceStructure ?key1, ?key2; FILTER (?key1 != ?key2) }}"; // booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, // testquery); // if (booleanQuery.evaluate() == true) { // error = true; // status = // "Failed specification check: IC-9. Unique slice structure. Each qb:Slice must have exactly one associated qb:sliceStructure. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } else { // status = // "Successful specification check: IC-9. Unique slice structure. Each qb:Slice must have exactly one associated qb:sliceStructure. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } // IC-10. Slice dimensions complete testquery = TYPICALPREFIXES + "ASK { ?slice qb:sliceStructure [qb:componentProperty ?dim] . FILTER NOT EXISTS { ?slice ?dim [] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-10. Slice dimensions complete. Every qb:Slice must have a value for every dimension declared in its qb:sliceStructure.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-10. Slice dimensions complete. Every qb:Slice must have a value for every dimension declared in its qb:sliceStructure.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // Since needs to go through all observations, only done if enough // memory if (doComplexObservationIntegrityConstraints) { // IC-11. All dimensions required <= takes too // long testquery = TYPICALPREFIXES + "ASK { ?obs qb:dataSet/qb:structure/qb:component/qb:componentProperty ?dim . ?dim a qb:DimensionProperty; FILTER NOT EXISTS { ?obs ?dim [] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-11. All dimensions required. Every qb:Observation has a value for each dimension declared in its associated qb:DataStructureDefinition. <br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-11. All dimensions required. Every qb:Observation has a value for each dimension declared in its associated qb:DataStructureDefinition. <br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-12. No duplicate observations <= takes especially // long, expensive quadratic check (IC-12) (see // http://lists.w3.org/Archives/Public/public-gld-wg/2013Jul/0017.html) // Dave Reynolds has implemented a linear time version of it // testquery = TYPICALPREFIXES // + // "ASK { FILTER( ?allEqual ) { SELECT (MIN(?equal) AS ?allEqual) WHERE { ?obs1 qb:dataSet ?dataset . ?obs2 qb:dataSet ?dataset . FILTER (?obs1 != ?obs2) ?dataset qb:structure/qb:component/qb:componentProperty ?dim . ?dim a qb:DimensionProperty . ?obs1 ?dim ?value1 . ?obs2 ?dim ?value2 . BIND( ?value1 = ?value2 AS ?equal) } GROUP BY ?obs1 ?obs2 }}"; // booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, // testquery); // if (booleanQuery.evaluate() == true) { // error = true; // status = // "Failed specification check: IC-12. No duplicate observations. No two qb:Observations in the same qb:DataSet may have the same value for all dimensions.<br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } else { // status = // "Successful specification check: IC-12. No duplicate observations. No two qb:Observations in the same qb:DataSet may have the same value for all dimensions.<br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } } // IC-13. Required attributes <= We do not // regard attributes testquery = TYPICALPREFIXES + "ASK { ?obs qb:dataSet/qb:structure/qb:component ?component . ?component qb:componentRequired \"true\"^^xsd:boolean ; qb:componentProperty ?attr . FILTER NOT EXISTS { ?obs ?attr [] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-13. Required attributes. Every qb:Observation has a value for each declared attribute that is marked as required.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-13. Required attributes. Every qb:Observation has a value for each declared attribute that is marked as required. <br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-14. All measures present testquery = TYPICALPREFIXES + "ASK { ?obs qb:dataSet/qb:structure ?dsd . FILTER NOT EXISTS { ?dsd qb:component/qb:componentProperty qb:measureType } ?dsd qb:component/qb:componentProperty ?measure . ?measure a qb:MeasureProperty; FILTER NOT EXISTS { ?obs ?measure [] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-14. All measures present. In a qb:DataSet which does not use a Measure dimension then each individual qb:Observation must have a value for every declared measure.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-14. All measures present. In a qb:DataSet which does not use a Measure dimension then each individual qb:Observation must have a value for every declared measure.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-15. Measure dimension consistent <= We do // not support measureType, yet. testquery = TYPICALPREFIXES + "ASK { ?obs qb:dataSet/qb:structure ?dsd ; qb:measureType ?measure . ?dsd qb:component/qb:componentProperty qb:measureType . FILTER NOT EXISTS { ?obs ?measure [] }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-15. Measure dimension consistent. In a qb:DataSet which uses a Measure dimension then each qb:Observation must have a value for the measure corresponding to its given qb:measureType.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-15. Measure dimension consistent. In a qb:DataSet which uses a Measure dimension then each qb:Observation must have a value for the measure corresponding to its given qb:measureType.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-16. Single measure on measure dimension // observation testquery = TYPICALPREFIXES + "ASK { ?obs qb:dataSet/qb:structure ?dsd ; qb:measureType ?measure ; ?omeasure [] . ?dsd qb:component/qb:componentProperty qb:measureType ; qb:component/qb:componentProperty ?omeasure . ?omeasure a qb:MeasureProperty . FILTER (?omeasure != ?measure)}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-16. Single measure on measure dimension observation. In a qb:DataSet which uses a Measure dimension then each qb:Observation must only have a value for one measure (by IC-15 this will be the measure corresponding to its qb:measureType).<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-16. Single measure on measure dimension observation. In a qb:DataSet which uses a Measure dimension then each qb:Observation must only have a value for one measure (by IC-15 this will be the measure corresponding to its qb:measureType). <br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-17. All measures present in measures dimension cube testquery = TYPICALPREFIXES + "ASK { { SELECT ?numMeasures (COUNT(?obs2) AS ?count) WHERE { { SELECT ?dsd (COUNT(?m) AS ?numMeasures) WHERE { ?dsd qb:component/qb:componentProperty ?m. ?m a qb:MeasureProperty . } GROUP BY ?dsd } ?obs1 qb:dataSet/qb:structure ?dsd; qb:dataSet ?dataset ; qb:measureType ?m1 . ?obs2 qb:dataSet ?dataset ; qb:measureType ?m2 . FILTER NOT EXISTS { ?dsd qb:component/qb:componentProperty ?dim . FILTER (?dim != qb:measureType) ?dim a qb:DimensionProperty . ?obs1 ?dim ?v1 . ?obs2 ?dim ?v2. FILTER (?v1 != ?v2) } } GROUP BY ?obs1 ?numMeasures HAVING (?count != ?numMeasures) }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-17. All measures present in measures dimension cube. In a qb:DataSet which uses a Measure dimension then if there is a Observation for some combination of non-measure dimensions then there must be other Observations with the same non-measure dimension values for each of the declared measures.<br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-17. All measures present in measures dimension cube. In a qb:DataSet which uses a Measure dimension then if there is a Observation for some combination of non-measure dimensions then there must be other Observations with the same non-measure dimension values for each of the declared measures.<br/>"; Olap4ldUtil._log.config(status); overview += status; } // IC-18. Consistent data set links testquery = TYPICALPREFIXES + "ASK { ?dataset qb:slice ?slice . ?slice qb:observation ?obs .FILTER NOT EXISTS { ?obs qb:dataSet ?dataset . }}"; booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, testquery); if (booleanQuery.evaluate() == true) { error = true; status = "Failed specification check: IC-18. If a qb:DataSet D has a qb:slice S, and S has an qb:observation O, then the qb:dataSet corresponding to O must be D. <br/>"; Olap4ldUtil._log.config(status); overview += status; } else { status = "Successful specification check: IC-18. If a qb:DataSet D has a qb:slice S, and S has an qb:observation O, then the qb:dataSet corresponding to O must be D. <br/>"; Olap4ldUtil._log.config(status); overview += status; } // Since needs to go through all observations, only done if enough // memory if (doComplexObservationIntegrityConstraints) { // Watch out: skos:inScheme has to be used. // String query = // TYPICALPREFIXES+" select * { ?obs qb:dataSet/qb:structure/qb:component/qb:componentProperty ?dim . ?dim a qb:DimensionProperty ; qb:codeList ?list . ?list a skos:ConceptScheme . ?obs ?dim ?v . FILTER NOT EXISTS { ?v a skos:Concept ; skos:inScheme ?list }}"; // List<Node[]> result = sparql(query, false); // IC-19. Codes from code list // Probably takes very long since involves property chain and // going through all observations. // Commented, because would not fit with equivalence reasoning // (duplication strategy) // testquery = TYPICALPREFIXES // + // "ASK { ?obs qb:dataSet/qb:structure/qb:component/qb:componentProperty ?dim . ?dim a qb:DimensionProperty ; qb:codeList ?list . ?list a skos:ConceptScheme . ?obs ?dim ?v . FILTER NOT EXISTS { ?v a skos:Concept ; skos:inScheme ?list }}"; // String testquery2 = TYPICALPREFIXES // + // "ASK { ?obs qb:dataSet/qb:structure/qb:component/qb:componentProperty ?dim . ?dim a qb:DimensionProperty ; qb:codeList ?list . ?list a skos:Collection . ?obs ?dim ?v . FILTER NOT EXISTS { ?v a skos:Concept . ?list skos:member+ ?v }}"; // booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, // testquery); // BooleanQuery booleanQuery2 = con.prepareBooleanQuery( // QueryLanguage.SPARQL, testquery2); // if (booleanQuery.evaluate() == true // || booleanQuery2.evaluate() == true) { // error = true; // status = // "Failed specification check: IC-19. If a dimension property has a qb:codeList, then the value of the dimension property on every qb:Observation must be in the code list. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } else { // status = // "Successful specification check: IC-19. If a dimension property has a qb:codeList, then the value of the dimension property on every qb:Observation must be in the code list. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } } // For the next two integrity constraints, we need instantiation // queries first. // XXX: Do them later. // IC-20. Codes from hierarchy // testquery = prefixbindings // + // "ASK { ?obs qb:dataSet/qb:structure/qb:component/qb:componentProperty ?dim . ?dim a qb:DimensionProperty ; qb:codeList ?list . ?list a qb:HierarchicalCodeList . ?obs ?dim ?v . FILTER NOT EXISTS { ?list qb:hierarchyRoot/<$p>* ?v }}"; // booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, // testquery); // if (booleanQuery.evaluate() == true) { // error = true; // status = // "Failed specification check: IC-20. If a dimension property has a qb:HierarchicalCodeList with a non-blank qb:parentChildProperty then the value of that dimension property on every qb:Observation must be reachable from a root of the hierarchy using zero or more hops along the qb:parentChildProperty links. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } else { // status = // "Successful specification check: IC-20. If a dimension property has a qb:HierarchicalCodeList with a non-blank qb:parentChildProperty then the value of that dimension property on every qb:Observation must be reachable from a root of the hierarchy using zero or more hops along the qb:parentChildProperty links. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } // IC-21. Codes from hierarchy (inverse) // testquery = prefixbindings // + // "ASK { ?obs qb:dataSet/qb:structure/qb:component/qb:componentProperty ?dim . ?dim a qb:DimensionProperty ; qb:codeList ?list . ?list a qb:HierarchicalCodeList . ?obs ?dim ?v . FILTER NOT EXISTS { ?list qb:hierarchyRoot/<$p>* ?v }}"; // booleanQuery = con.prepareBooleanQuery(QueryLanguage.SPARQL, // testquery); // if (booleanQuery.evaluate() == true) { // error = true; // status = // "Failed specification check: IC-21. If a dimension property has a qb:HierarchicalCodeList with an inverse qb:parentChildProperty then the value of that dimension property on every qb:Observation must be reachable from a root of the hierarchy using zero or more hops along the inverse qb:parentChildProperty links. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } else { // status = // "Successful specification check: IC-21. If a dimension property has a qb:HierarchicalCodeList with an inverse qb:parentChildProperty then the value of that dimension property on every qb:Observation must be reachable from a root of the hierarchy using zero or more hops along the inverse qb:parentChildProperty links. <br/>"; // Olap4ldUtil._log.config(status); // overview += status; // } // Important! con.close(); if (error) { Olap4ldUtil._log .warning("Integrity constraints failed: Integrity constraints overview: " + overview); // XXX: OlapExceptions possible? throw new OlapException( "Integrity constraints failed: Integrity constraints overview:<br/>" + overview); } else { // Logging Olap4ldUtil._log .config("Integrity constraints successful: Integrity constraints overview: " + overview); } } catch (RepositoryException e) { throw new OlapException("Problem with repository: " + e.getMessage()); } catch (MalformedQueryException e) { throw new OlapException("Problem with malformed query: " + e.getMessage()); } catch (QueryEvaluationException e) { throw new OlapException("Problem with query evaluation: " + e.getMessage()); } } /** * According to QB specification, a cube may be provided in abbreviated form * so that inferences first have to be materialised to properly query a * cube. * * @throws OlapException */ public void runNormalizationAlgorithm() throws OlapException { // Logging Olap4ldUtil._log.config("Run normalization algorithm..."); try { RepositoryConnection con; con = repo.getConnection(); // First, we run normalization algorithm String updateQuery = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX qb: <http://purl.org/linked-data/cube#> INSERT { ?o rdf:type qb:Observation .} WHERE { [] qb:observation ?o .}; INSERT { ?o rdf:type qb:Observation .} WHERE { ?o qb:dataSet [] .}; INSERT { ?s rdf:type qb:Slice . } WHERE { [] qb:slice ?s.}; INSERT { ?cs qb:componentProperty ?p . ?p rdf:type qb:DimensionProperty .} WHERE { ?cs qb:dimension ?p .}; INSERT { ?cs qb:componentProperty ?p . ?p rdf:type qb:MeasureProperty .} WHERE { ?cs qb:measure ?p .};INSERT { ?cs qb:componentProperty ?p . ?p rdf:type qb:AttributeProperty .} WHERE { ?cs qb:attribute ?p .}"; Update updateQueryQuery = con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery.execute(); // # Dataset attachments updateQuery = "PREFIX qb: <http://purl.org/linked-data/cube#> INSERT { ?obs ?comp ?value} WHERE { ?spec qb:componentProperty ?comp ; qb:componentAttachment qb:DataSet . ?dataset qb:structure [qb:component ?spec]; ?comp ?value . ?obs qb:dataSet ?dataset.};"; con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery = con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery.execute(); // # Slice attachments updateQuery = "PREFIX qb: <http://purl.org/linked-data/cube#> INSERT { ?obs ?comp ?value} WHERE { ?spec qb:componentProperty ?comp; qb:componentAttachment qb:Slice . ?dataset qb:structure [qb:component ?spec]; qb:slice ?slice . ?slice ?comp ?value; qb:observation ?obs .};"; con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery = con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery.execute(); // # Dimension values on slices updateQuery = "PREFIX qb: <http://purl.org/linked-data/cube#> INSERT { ?obs ?comp ?value} WHERE { ?spec qb:componentProperty ?comp . ?comp a qb:DimensionProperty . ?dataset qb:structure [qb:component ?spec]; qb:slice ?slice . ?slice ?comp ?value; qb:observation ?obs .}"; con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery = con.prepareUpdate(QueryLanguage.SPARQL, updateQuery); updateQueryQuery.execute(); // Important! con.close(); } catch (RepositoryException e) { throw new OlapException("Problem with repository: " + e.getMessage()); } catch (MalformedQueryException e) { throw new OlapException("Problem with malformed query: " + e.getMessage()); } catch (UpdateExecutionException e) { throw new OlapException("Problem with update execution: " + e.getMessage()); } } /** * Returns canonical for a node. * * If none is found, simply the node is returned. * * @param canonical * @return */ @SuppressWarnings("unused") @Deprecated private Node getCanonical(Node canonical) { // for (List<Node> equivalenceClass : equivalenceList) { // for (Node node : equivalenceClass) { // // if (node.equals(canonical)) { // canonical = equivalenceClass.get(0); // break; // } // } // } // return canonical; return null; } /** * * @param equivs * - equiv[0] first same as and equiv[1] second same as entity. * @return */ @SuppressWarnings("unused") @Deprecated private List<List<Node>> createEquivalenceList(List<Node[]> equivs) { List<List<Node>> newequivalenceList = new ArrayList<List<Node>>(); // HashMap<String, Integer> invertedindex = new HashMap<String, // Integer>(); // // boolean first = true; // for (Node[] equiv : equivs) { // // // First is header // if (first) { // first = false; // continue; // } // // String A = equiv[0].toString(); // String B = equiv[1].toString(); // // // Store equiv // // // Find A // Integer rA = invertedindex.get(A); // // // Find B // Integer rB = invertedindex.get(B); // // if (rA == null && rB == null) { // // new row rAB // // // ArrayList<Node> newEquivalenceClass = new ArrayList<Node>(); // newEquivalenceClass.add(A); // newEquivalenceClass.add(B); // newequivalenceList.add(newEquivalenceClass); // // } else if (rA != null && rB != null) { // // merge rA and rB // // // We create new list // // List<Node> rAB = new ArrayList<Node>(); // // for (Node node : rB) { // rAB.add(node); // } // for (Node node : rA) { // rAB.add(node); // } // newequivalenceList.remove(rB); // newequivalenceList.remove(rA); // newequivalenceList.add(rAB); // // } else if (rA != null) { // // add B to rA // // rA.add(B); // // } else if (rB != null) { // // add A to rB // // rB.add(A); // } // // } return newequivalenceList; } @Deprecated private List<Node> getEquivalenceClassOfNode(Node resource) { // List<Node> equivalenceClass = new ArrayList<Node>(); // equivalenceClass.add(resource); // for (List<Node> iterable_element : this.equivalenceList) { // for (Node node : iterable_element) { // if (resource.equals(node)) { // equivalenceClass = iterable_element; // break; // } // } // } // return equivalenceClass; return null; } @Deprecated public List<Node[]> getEquivalenceStatements() { // /* // * More directed better? // * // * {$this->getStandardPrefixes()} select ?same1 ?same2 // * {$this->getStandardFrom()} where { ?dsd qb:component ?comp. ?comp // * ?componentProp ?same1. ?same1 owl:sameAs ?same2 } // */ // // // Same as between dimensions and members. // String query = // "PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX qb: <http://purl.org/linked-data/cube#> select ?same1 ?same2 where " // + // "{ { ?dsd qb:component ?comp. ?comp ?componentProp ?same1. ?same1 owl:sameAs ?same2 } UNION { ?obs a qb:Observation. ?obs ?dimension ?same1. ?same1 owl:sameAs ?same2 } } "; // // List<Node[]> myresult = sparql(query, true); // // Olap4ldUtil._log.info("Number of equivalence statements: " // + (myresult.size() - 1)); // // return myresult; return null; } /** * Get possible dimensions (component properties) for each cube from the * triple store. * * Approach: I create the output from Linked Data, and then I filter it * using the restrictions. * * I have to also return the Measures dimension for each cube. * * @return Node[]{?dsd ?dimension ?compPropType ?name} * @throws MalformedURLException */ public List<Node[]> getDimensions(Restrictions restrictions) throws OlapException { Olap4ldUtil._log.config("Linked Data Engine: Get Dimensions..."); List<Node[]> result = new ArrayList<Node[]>(); // Check whether Drill-across query // XXX: Wildcard delimiter if (restrictions.cubeNamePattern != null) { String[] datasets = restrictions.cubeNamePattern.toString().split( ","); for (int i = 0; i < datasets.length; i++) { String dataset = datasets[i]; // Should make sure that the full restrictions are used. Node saverestrictioncubePattern = restrictions.cubeNamePattern; restrictions.cubeNamePattern = new Resource(dataset); List<Node[]> intermediaryresult = getDimensionsPerDataSet(restrictions); restrictions.cubeNamePattern = saverestrictioncubePattern; // Add to result boolean first = true; for (Node[] anIntermediaryresult : intermediaryresult) { if (first) { if (i == 0) { result.add(anIntermediaryresult); } first = false; continue; } // Add the single dimensions of the datasets to be // transformed with createGlobalDimensions. result.add(anIntermediaryresult); } } } else { result = getDimensionsPerDataSet(restrictions); } // Create global cube which is intersection of all dimensions and new // cube name return createGlobalDimensions(restrictions, result); } private List<Node[]> createGlobalDimensions(Restrictions restrictions, List<Node[]> intermediaryresult) { List<Node[]> result = new ArrayList<Node[]>(); Map<String, Integer> dimensionmap = Olap4ldLinkedDataUtil .getNodeResultFields(intermediaryresult.get(0)); // Add to result boolean first = true; for (Node[] anIntermediaryresult : intermediaryresult) { if (first) { first = false; result.add(anIntermediaryresult); continue; } // Also add dimension to global cube Node[] newnode = new Node[9]; newnode[dimensionmap.get("?CATALOG_NAME")] = anIntermediaryresult[dimensionmap .get("?CATALOG_NAME")]; newnode[dimensionmap.get("?SCHEMA_NAME")] = anIntermediaryresult[dimensionmap .get("?SCHEMA_NAME")]; // New cube name of global cube if (restrictions.cubeNamePattern == null) { newnode[dimensionmap.get("?CUBE_NAME")] = anIntermediaryresult[dimensionmap .get("?CUBE_NAME")]; } else { newnode[dimensionmap.get("?CUBE_NAME")] = restrictions.cubeNamePattern; } newnode[dimensionmap.get("?DIMENSION_NAME")] = anIntermediaryresult[dimensionmap .get("?DIMENSION_NAME")]; // Needs to be canonical name newnode[dimensionmap.get("?DIMENSION_UNIQUE_NAME")] = anIntermediaryresult[dimensionmap .get("?DIMENSION_UNIQUE_NAME")]; newnode[dimensionmap.get("?DIMENSION_CAPTION")] = anIntermediaryresult[dimensionmap .get("?DIMENSION_CAPTION")]; newnode[dimensionmap.get("?DIMENSION_ORDINAL")] = anIntermediaryresult[dimensionmap .get("?DIMENSION_ORDINAL")]; newnode[dimensionmap.get("?DIMENSION_TYPE")] = anIntermediaryresult[dimensionmap .get("?DIMENSION_TYPE")]; newnode[dimensionmap.get("?DESCRIPTION")] = anIntermediaryresult[dimensionmap .get("?DESCRIPTION")]; // Only add if not already contained. boolean contained = false; for (Node[] aResult : result) { boolean sameDimension = aResult[dimensionmap .get("?DIMENSION_UNIQUE_NAME")].toString().equals( newnode[dimensionmap.get("?DIMENSION_UNIQUE_NAME")] .toString()); boolean sameCube = aResult[dimensionmap.get("?CUBE_NAME")] .toString().equals( newnode[dimensionmap.get("?CUBE_NAME")] .toString()); if (sameDimension && sameCube) { contained = true; } } if (!contained) { result.add(newnode); } } return result; } private List<Node[]> getDimensionsPerDataSet(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); List<Node[]> result = new ArrayList<Node[]>(); // Create header Node[] header = new Node[] { new Variable("?CATALOG_NAME"), new Variable("?SCHEMA_NAME"), new Variable("?CUBE_NAME"), new Variable("?DIMENSION_NAME"), new Variable("?DIMENSION_UNIQUE_NAME"), new Variable("?DIMENSION_CAPTION"), new Variable("?DIMENSION_ORDINAL"), new Variable("?DIMENSION_TYPE"), new Variable("?DESCRIPTION") }; result.add(header); if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { // Get all dimensions String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getDimensions_regular.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // Add all of result2 to result boolean first = true; for (Node[] anIntermediaryresult : myresult) { if (first) { first = false; continue; } result.add(anIntermediaryresult); } } // We try to find measures if (true) { // In this case, we do ask for a measure dimension. String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getDimensions_measure_dimension.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // List<Node[]> result2 = applyRestrictions(memberUris2, // restrictions); // Add all of result2 to result boolean first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } } // Use canonical identifier // result = replaceIdentifiersWithCanonical(result); return result; } /** * Every measure also needs to be listed as member. When I create the dsd, I * add obsValue as a dimension, but also as a measure. However, members of * the measure dimension would typically all be named differently from the * measure (e.g., obsValue5), therefore, we do not find a match. The problem * is, that getMembers() has to return the measures. So, either, in the dsd, * we need to add a dimension with the measure as a member, or, the query * for the members should return for measures the measure property as * member. * * * Here, all the measure properties are returned. * * @param context * @param metadataRequest * @param restrictions * @return */ public List<Node[]> getMeasures(Restrictions restrictions) throws OlapException { Olap4ldUtil._log.config("Linked Data Engine: Get Measures..."); List<Node[]> result = new ArrayList<Node[]>(); // Check whether Drill-across query // XXX: Wildcard delimiter if (restrictions.cubeNamePattern != null) { String[] datasets = restrictions.cubeNamePattern.toString().split( ","); for (int i = 0; i < datasets.length; i++) { String dataset = datasets[i]; // Should make sure that the full restrictions are used. Node saverestrictioncubePattern = restrictions.cubeNamePattern; restrictions.cubeNamePattern = new Resource(dataset); List<Node[]> intermediaryresult = getMeasuresPerDataSet(restrictions); restrictions.cubeNamePattern = saverestrictioncubePattern; // Add to result boolean first = true; for (Node[] anIntermediaryresult : intermediaryresult) { if (first) { if (i == 0) { result.add(anIntermediaryresult); } first = false; continue; } // We do not want to have the single datasets returned. // result.add(anIntermediaryresult); // Also add measure to global cube Map<String, Integer> map = Olap4ldLinkedDataUtil .getNodeResultFields(intermediaryresult.get(0)); Node[] newnode = new Node[10]; newnode[map.get("?CATALOG_NAME")] = anIntermediaryresult[map .get("?CATALOG_NAME")]; newnode[map.get("?SCHEMA_NAME")] = anIntermediaryresult[map .get("?SCHEMA_NAME")]; newnode[map.get("?CUBE_NAME")] = restrictions.cubeNamePattern; newnode[map.get("?MEASURE_UNIQUE_NAME")] = anIntermediaryresult[map .get("?MEASURE_UNIQUE_NAME")]; newnode[map.get("?MEASURE_NAME")] = anIntermediaryresult[map .get("?MEASURE_NAME")]; newnode[map.get("?MEASURE_CAPTION")] = anIntermediaryresult[map .get("?MEASURE_CAPTION")]; newnode[map.get("?DATA_TYPE")] = anIntermediaryresult[map .get("?DATA_TYPE")]; newnode[map.get("?MEASURE_IS_VISIBLE")] = anIntermediaryresult[map .get("?MEASURE_IS_VISIBLE")]; newnode[map.get("?MEASURE_AGGREGATOR")] = anIntermediaryresult[map .get("?MEASURE_AGGREGATOR")]; newnode[map.get("?EXPRESSION")] = anIntermediaryresult[map .get("?EXPRESSION")]; // Only add if not already contained. // For measures, we add them all. // boolean contained = false; // for (Node[] aResult : result) { // boolean sameDimension = aResult[map // .get("?MEASURE_UNIQUE_NAME")].toString() // .equals(newnode[map // .get("?MEASURE_UNIQUE_NAME")] // .toString()); // boolean sameCube = aResult[map // .get("?CUBE_NAME")].toString().equals( // newnode[map.get("?CUBE_NAME")] // .toString()); // // if (sameDimension && sameCube) { // contained = true; // } // } // // if (!contained) { // result.add(newnode); // } result.add(newnode); } } } else { result = getMeasuresPerDataSet(restrictions); } return result; } private List<Node[]> getMeasuresPerDataSet(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); // ///////////QUERY////////////////////////// /* * TODO: How to consider equal measures? */ // Boolean values need to be returned as "true" or "false". // Get all measures String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getMeasures.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> result = executeSparqlSelectQuery(querytemplate, true); // Here, we also include measures without aggregation function. // We have also added these measures as members to getMembers(). querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getMeasures_withoutimplicit.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> result2 = executeSparqlSelectQuery(querytemplate, true); // List<Node[]> result = applyRestrictions(measureUris, restrictions); // Add all of result2 to result boolean first = true; for (Node[] nodes : result2) { if (first) { first = false; continue; } result.add(nodes); } // Use canonical identifier // result = replaceIdentifiersWithCanonical(result); return result; } /** * * Return hierarchies * * @param context * @param metadataRequest * @param restrictions * @return */ public List<Node[]> getHierarchies(Restrictions restrictions) throws OlapException { Olap4ldUtil._log.config("Linked Data Engine: Get Hierarchies..."); List<Node[]> result = new ArrayList<Node[]>(); // Check whether Drill-across query // XXX: Wildcard delimiter if (restrictions.cubeNamePattern != null) { String[] datasets = restrictions.cubeNamePattern.toString().split( ","); for (int i = 0; i < datasets.length; i++) { String dataset = datasets[i]; // Should make sure that the full restrictions are used. Node saverestrictioncubePattern = restrictions.cubeNamePattern; restrictions.cubeNamePattern = new Resource(dataset); List<Node[]> intermediaryresult = getHierarchiesPerDataSet(restrictions); restrictions.cubeNamePattern = saverestrictioncubePattern; // Add to result boolean first = true; for (Node[] anIntermediaryresult : intermediaryresult) { if (first) { if (i == 0) { result.add(anIntermediaryresult); } first = false; continue; } // Add the single hierarchies of the datasets to be // transformed with createGlobalHierarchies. result.add(anIntermediaryresult); } } } else { result = getHierarchiesPerDataSet(restrictions); } // Create global hierarchies which is intersection of all hierarchies // and new // cube name return createGlobalHierarchies(restrictions, result); } private List<Node[]> createGlobalHierarchies(Restrictions restrictions, List<Node[]> intermediaryresult) { List<Node[]> result = new ArrayList<Node[]>(); boolean first = true; for (Node[] anIntermediaryresult : intermediaryresult) { if (first) { first = false; result.add(anIntermediaryresult); continue; } // Also add hierarchy to global cube Map<String, Integer> hierarchymap = Olap4ldLinkedDataUtil .getNodeResultFields(intermediaryresult.get(0)); Node[] newnode = new Node[9]; newnode[hierarchymap.get("?CATALOG_NAME")] = anIntermediaryresult[hierarchymap .get("?CATALOG_NAME")]; newnode[hierarchymap.get("?SCHEMA_NAME")] = anIntermediaryresult[hierarchymap .get("?SCHEMA_NAME")]; // New cube name of global cube if (restrictions.cubeNamePattern == null) { newnode[hierarchymap.get("?CUBE_NAME")] = anIntermediaryresult[hierarchymap .get("?CUBE_NAME")]; } else { newnode[hierarchymap.get("?CUBE_NAME")] = restrictions.cubeNamePattern; } newnode[hierarchymap.get("?DIMENSION_UNIQUE_NAME")] = anIntermediaryresult[hierarchymap .get("?DIMENSION_UNIQUE_NAME")]; newnode[hierarchymap.get("?HIERARCHY_UNIQUE_NAME")] = anIntermediaryresult[hierarchymap .get("?HIERARCHY_UNIQUE_NAME")]; newnode[hierarchymap.get("?HIERARCHY_NAME")] = anIntermediaryresult[hierarchymap .get("?HIERARCHY_NAME")]; newnode[hierarchymap.get("?HIERARCHY_CAPTION")] = anIntermediaryresult[hierarchymap .get("?HIERARCHY_CAPTION")]; newnode[hierarchymap.get("?DESCRIPTION")] = anIntermediaryresult[hierarchymap .get("?DESCRIPTION")]; newnode[hierarchymap.get("?HIERARCHY_MAX_LEVEL_NUMBER")] = anIntermediaryresult[hierarchymap .get("?HIERARCHY_MAX_LEVEL_NUMBER")]; // Only add if not already contained. boolean contained = false; for (Node[] aResult : result) { boolean sameDimension = aResult[hierarchymap .get("?DIMENSION_UNIQUE_NAME")].toString().equals( newnode[hierarchymap.get("?DIMENSION_UNIQUE_NAME")] .toString()); boolean sameHierarchy = aResult[hierarchymap .get("?HIERARCHY_UNIQUE_NAME")].toString().equals( newnode[hierarchymap.get("?HIERARCHY_UNIQUE_NAME")] .toString()); boolean sameCube = aResult[hierarchymap.get("?CUBE_NAME")] .toString().equals( newnode[hierarchymap.get("?CUBE_NAME")] .toString()); if (sameDimension && sameHierarchy && sameCube) { contained = true; } } if (!contained) { result.add(newnode); } } return result; } private List<Node[]> getHierarchiesPerDataSet(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); List<Node[]> result = new ArrayList<Node[]>(); // Create header Node[] header = new Node[] { new Variable("?CATALOG_NAME"), new Variable("?SCHEMA_NAME"), new Variable("?CUBE_NAME"), new Variable("?DIMENSION_UNIQUE_NAME"), new Variable("?HIERARCHY_UNIQUE_NAME"), new Variable("?HIERARCHY_NAME"), new Variable("?HIERARCHY_CAPTION"), new Variable("?DESCRIPTION"), new Variable("?HIERARCHY_MAX_LEVEL_NUMBER") }; result.add(header); if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { // Get all hierarchies with codeLists String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getHierarchies_regular.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // Add all of result to result boolean first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } } // List<Node[]> result = applyRestrictions(hierarchyResults, // restrictions); // Try to find measure dimensions. if (true) { // In this case, we do ask for a measure hierarchy. String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getHierarchies_measure_dimension.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // List<Node[]> result2 = applyRestrictions(memberUris2, // restrictions); // Add all of result2 to result boolean first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } } // Get dimension hierarchies without codeList, but only if hierarchy is // not set and different from dimension unique name /* * * Note in spec: * "Every dimension declared in a qb:DataStructureDefinition must have a declared rdfs:range." * Note in spec: * "Every dimension with range skos:Concept must have a qb:codeList." <= * This means, we do not necessarily need a code list in many cases. * But, if we have a code list, then: "If a dimension property has a * qb:codeList, then the value of the dimension property on every * qb:Observation must be in the code list." */ if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getHierarchies_without_codelist.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // List<Node[]> result3 = applyRestrictions(memberUris3, // restrictions); // Add all of result2 to result boolean first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } } // Use canonical identifier // result = replaceIdentifiersWithCanonical(result); return result; } @Deprecated public List<Node[]> replaceIdentifiersWithCanonical(List<Node[]> result) { // // List<Node[]> newresult = new ArrayList<Node[]>(); // // for (Node[] anIntermediaryresult : result) { // Node[] newnode = new Node[anIntermediaryresult.length]; // for (int i = 0; i < anIntermediaryresult.length; i++) { // newnode[i] = getCanonical(anIntermediaryresult[i]); // } // newresult.add(newnode); // } // // return newresult; return null; } /** * * @param context * @param metadataRequest * @param restrictions * @return */ public List<Node[]> getLevels(Restrictions restrictions) throws OlapException { Olap4ldUtil._log.config("Linked Data Engine: Get Levels..."); List<Node[]> result = new ArrayList<Node[]>(); // Check whether Drill-across query // XXX: Wildcard delimiter if (restrictions.cubeNamePattern != null) { String[] datasets = restrictions.cubeNamePattern.toString().split( ","); for (int i = 0; i < datasets.length; i++) { String dataset = datasets[i]; // Should make sure that the full restrictions are used. Node saverestrictioncubePattern = restrictions.cubeNamePattern; restrictions.cubeNamePattern = new Resource(dataset); List<Node[]> intermediaryresult = getLevelsPerDataSet(restrictions); restrictions.cubeNamePattern = saverestrictioncubePattern; // Add to result boolean first = true; for (Node[] anIntermediaryresult : intermediaryresult) { if (first) { if (i == 0) { result.add(anIntermediaryresult); } first = false; continue; } // We do not want to have the single datasets returned. // result.add(anIntermediaryresult); // Also add dimension to global cube Map<String, Integer> levelmap = Olap4ldLinkedDataUtil .getNodeResultFields(intermediaryresult.get(0)); Node[] newnode = new Node[12]; newnode[levelmap.get("?CATALOG_NAME")] = anIntermediaryresult[levelmap .get("?CATALOG_NAME")]; newnode[levelmap.get("?SCHEMA_NAME")] = anIntermediaryresult[levelmap .get("?SCHEMA_NAME")]; newnode[levelmap.get("?CUBE_NAME")] = restrictions.cubeNamePattern; newnode[levelmap.get("?DIMENSION_UNIQUE_NAME")] = anIntermediaryresult[levelmap .get("?DIMENSION_UNIQUE_NAME")]; newnode[levelmap.get("?HIERARCHY_UNIQUE_NAME")] = anIntermediaryresult[levelmap .get("?HIERARCHY_UNIQUE_NAME")]; newnode[levelmap.get("?LEVEL_UNIQUE_NAME")] = anIntermediaryresult[levelmap .get("?LEVEL_UNIQUE_NAME")]; newnode[levelmap.get("?LEVEL_CAPTION")] = anIntermediaryresult[levelmap .get("?LEVEL_CAPTION")]; newnode[levelmap.get("?LEVEL_NAME")] = anIntermediaryresult[levelmap .get("?LEVEL_NAME")]; newnode[levelmap.get("?DESCRIPTION")] = anIntermediaryresult[levelmap .get("?DESCRIPTION")]; newnode[levelmap.get("?LEVEL_NUMBER")] = anIntermediaryresult[levelmap .get("?LEVEL_NUMBER")]; newnode[levelmap.get("?LEVEL_CARDINALITY")] = anIntermediaryresult[levelmap .get("?LEVEL_CARDINALITY")]; newnode[levelmap.get("?LEVEL_TYPE")] = anIntermediaryresult[levelmap .get("?LEVEL_TYPE")]; // Only add if not already contained. boolean contained = false; for (Node[] aResult : result) { boolean sameDimension = aResult[levelmap .get("?DIMENSION_UNIQUE_NAME")].toString() .equals(newnode[levelmap .get("?DIMENSION_UNIQUE_NAME")] .toString()); boolean sameHierarchy = aResult[levelmap .get("?HIERARCHY_UNIQUE_NAME")].toString() .equals(newnode[levelmap .get("?HIERARCHY_UNIQUE_NAME")] .toString()); boolean sameLevel = aResult[levelmap .get("?LEVEL_UNIQUE_NAME")].toString().equals( newnode[levelmap.get("?LEVEL_UNIQUE_NAME")] .toString()); boolean sameCube = aResult[levelmap.get("?CUBE_NAME")] .toString().equals( newnode[levelmap.get("?CUBE_NAME")] .toString()); if (sameDimension && sameHierarchy && sameLevel && sameCube) { contained = true; } } if (!contained) { result.add(newnode); } } } } else { result = getLevelsPerDataSet(restrictions); } return result; } private List<Node[]> getLevelsPerDataSet(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); List<Node[]> result = new ArrayList<Node[]>(); // Create header Node[] header = new Node[] { new Variable("?CATALOG_NAME"), new Variable("?SCHEMA_NAME"), new Variable("?CUBE_NAME"), new Variable("?DIMENSION_UNIQUE_NAME"), new Variable("?HIERARCHY_UNIQUE_NAME"), new Variable("?LEVEL_UNIQUE_NAME"), new Variable("?LEVEL_CAPTION"), new Variable("?LEVEL_NAME"), new Variable("?DESCRIPTION"), new Variable("?LEVEL_NUMBER"), new Variable("?LEVEL_CARDINALITY"), new Variable("?LEVEL_TYPE") }; result.add(header); if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { // TODO: Add regularly modeled levels (without using xkos) String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getLevels_regular.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // Add all of result2 to result boolean first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } // Get all levels of code lists using xkos // TODO: LEVEL_CARDINALITY is not solved, yet. querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getLevels_xkos.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); myresult = executeSparqlSelectQuery(querytemplate, true); // Add all of result2 to result first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } } // Distinct for several measures per cube. // Add measures levels // Second, ask for the measures (which are also members), but only if // measure if (true) { // In this case, we do ask for a measure dimension. String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getLevels_measure_dimension.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // List<Node[]> result2 = applyRestrictions(memberUris2, // restrictions); // Add all of result2 to result boolean first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } } // Add levels for dimensions without codelist, but only if hierarchy and // dimension names are equal if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getLevels_without_codelist.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); // Second, ask for the measures (which are also members) List<Node[]> myresult = executeSparqlSelectQuery(querytemplate, true); // List<Node[]> result3 = applyRestrictions(memberUris3, // restrictions); // Add all of result3 to result boolean first = true; for (Node[] nodes : myresult) { if (first) { first = false; continue; } result.add(nodes); } } // Use canonical identifier // result = replaceIdentifiersWithCanonical(result); return result; } /** * Important issues to remember: Every measure also needs to be listed as * member. When I create the dsd, I add obsValue as a dimension, but also as * a measure. However, members of the measure dimension would typically all * be named differently from the measure (e.g., obsValue5), therefore, we do * not find a match. The problem is, that getMembers() has to return the * measures. So, either, in the dsd, we need to add a dimension with the * measure as a member, or, the query for the members should return for * measures the measure property as member. * * The dimension/hierarchy/level of a measure should always be "Measures". * * Typically, a measure should not have a codeList, since we can have many * many members. If a measure does not have a codelist, the bounding would * still work, since The componentProperty is existing, but no hierarchy... * * For caption of members, we should eventually use * http://www.w3.org/2004/02/skos/core#notation skos:notation, since members * are in rdf represented as skos:Concept and this is the proper way to give * them a representation. * * Assumptions of this method: * * The restrictions are set up only as follows 1) cube, dim, hier, level 2) * cube, dim, hier, level, member, null 3) cube, dim, hier, level, member, * treeOp * * The members are only modelled as follows 1) Measure Member (member of the * measure dimension) 2) Level Member (member of a regular dimension) 3) Top * Concept Member (member via skos:topConcept) 4) Degenerated Member (member * without code list) * * @return Node[]{?memberURI ?name} * @throws MalformedURLException */ public List<Node[]> getMembers(Restrictions restrictions) throws OlapException { Olap4ldUtil._log.config("Linked Data Engine: Get Members..."); List<Node[]> result = new ArrayList<Node[]>(); // Check whether Drill-across query // XXX: Wildcard delimiter if (restrictions.cubeNamePattern != null) { String[] datasets = restrictions.cubeNamePattern.toString().split( ","); for (int i = 0; i < datasets.length; i++) { String dataset = datasets[i]; // Should make sure that the full restrictions are used. // XXX: Refactor: used at every getXXX() Node saverestrictioncubePattern = restrictions.cubeNamePattern; restrictions.cubeNamePattern = new Resource(dataset); List<Node[]> intermediaryresult = getMembersPerDataSet(restrictions); restrictions.cubeNamePattern = saverestrictioncubePattern; // Add to result boolean first = true; for (Node[] anIntermediaryresult : intermediaryresult) { if (first) { if (i == 0) { result.add(anIntermediaryresult); } first = false; continue; } // We do not want to have the single datasets returned. // result.add(anIntermediaryresult); // Also add dimension to global cube Map<String, Integer> membermap = Olap4ldLinkedDataUtil .getNodeResultFields(intermediaryresult.get(0)); Node[] newnode = new Node[13]; newnode[membermap.get("?CATALOG_NAME")] = anIntermediaryresult[membermap .get("?CATALOG_NAME")]; newnode[membermap.get("?SCHEMA_NAME")] = anIntermediaryresult[membermap .get("?SCHEMA_NAME")]; newnode[membermap.get("?CUBE_NAME")] = restrictions.cubeNamePattern; newnode[membermap.get("?DIMENSION_UNIQUE_NAME")] = anIntermediaryresult[membermap .get("?DIMENSION_UNIQUE_NAME")]; newnode[membermap.get("?HIERARCHY_UNIQUE_NAME")] = anIntermediaryresult[membermap .get("?HIERARCHY_UNIQUE_NAME")]; newnode[membermap.get("?LEVEL_UNIQUE_NAME")] = anIntermediaryresult[membermap .get("?LEVEL_UNIQUE_NAME")]; newnode[membermap.get("?LEVEL_NUMBER")] = anIntermediaryresult[membermap .get("?LEVEL_NUMBER")]; newnode[membermap.get("?MEMBER_UNIQUE_NAME")] = anIntermediaryresult[membermap .get("?MEMBER_UNIQUE_NAME")]; newnode[membermap.get("?MEMBER_NAME")] = anIntermediaryresult[membermap .get("?MEMBER_NAME")]; newnode[membermap.get("?MEMBER_CAPTION")] = anIntermediaryresult[membermap .get("?MEMBER_CAPTION")]; newnode[membermap.get("?MEMBER_TYPE")] = anIntermediaryresult[membermap .get("?MEMBER_TYPE")]; newnode[membermap.get("?PARENT_UNIQUE_NAME")] = anIntermediaryresult[membermap .get("?PARENT_UNIQUE_NAME")]; newnode[membermap.get("?PARENT_LEVEL")] = anIntermediaryresult[membermap .get("?PARENT_LEVEL")]; // Only add if not already contained. boolean contained = false; for (Node[] aResult : result) { boolean sameDimension = aResult[membermap .get("?DIMENSION_UNIQUE_NAME")].toString() .equals(newnode[membermap .get("?DIMENSION_UNIQUE_NAME")] .toString()); boolean sameHierarchy = aResult[membermap .get("?HIERARCHY_UNIQUE_NAME")].toString() .equals(newnode[membermap .get("?HIERARCHY_UNIQUE_NAME")] .toString()); boolean sameLevel = aResult[membermap .get("?LEVEL_UNIQUE_NAME")].toString().equals( newnode[membermap.get("?LEVEL_UNIQUE_NAME")] .toString()); boolean sameMember = aResult[membermap .get("?MEMBER_UNIQUE_NAME")].toString().equals( newnode[membermap.get("?MEMBER_UNIQUE_NAME")] .toString()); boolean sameCube = aResult[membermap.get("?CUBE_NAME")] .toString().equals( newnode[membermap.get("?CUBE_NAME")] .toString()); if (sameDimension && sameHierarchy && sameLevel && sameMember && sameCube) { contained = true; } } if (!contained) { result.add(newnode); } } } } else { result = getMembersPerDataSet(restrictions); } return result; } private List<Node[]> getMembersPerDataSet(Restrictions restrictions) { List<Node[]> result = new ArrayList<Node[]>(); List<Node[]> intermediaryresult = null; // Create header Node[] header = new Node[] { new Variable("?CATALOG_NAME"), new Variable("?SCHEMA_NAME"), new Variable("?CUBE_NAME"), new Variable("?DIMENSION_UNIQUE_NAME"), new Variable("?HIERARCHY_UNIQUE_NAME"), new Variable("?LEVEL_UNIQUE_NAME"), new Variable("?LEVEL_NUMBER"), new Variable("?MEMBER_NAME"), new Variable("?MEMBER_UNIQUE_NAME"), new Variable("?MEMBER_CAPTION"), new Variable("?MEMBER_TYPE"), new Variable("?PARENT_UNIQUE_NAME"), new Variable("?PARENT_LEVEL") }; result.add(header); // Measure Member if (true) { intermediaryresult = getMeasureMembers(restrictions); addToResult(intermediaryresult, result); } // Regular members if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { intermediaryresult = getHasTopConceptMembers(restrictions); addToResult(intermediaryresult, result); } // Xkos members // Watch out: No square brackets if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { intermediaryresult = getXkosMembers(restrictions); addToResult(intermediaryresult, result); } // If we still do not have members, then we might have degenerated // members if (!isMeasureQueriedForExplicitly(restrictions.dimensionUniqueName, restrictions.hierarchyUniqueName, restrictions.levelUniqueName)) { // Members without codeList intermediaryresult = getDegeneratedMembers(restrictions); addToResult(intermediaryresult, result); } // Use canonical identifier // result = replaceIdentifiersWithCanonical(result); return result; } private List<Node[]> getMeasureMembers(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); /* * I would assume that if TREE_OP is set, we have a unique member given * and either want its children, its siblings, its parent, self, * ascendants, or descendants. */ if (restrictions.tree != null && (restrictions.tree & 8) != 8) { // Assumption 1: Treeop only uses Member if (restrictions.memberUniqueName == null) { throw new UnsupportedOperationException( "If a treeMask is given, we should also have a unique member name!"); } if ((restrictions.tree & 1) == 1) { // CHILDREN Olap4ldUtil._log.config("TreeOp:CHILDREN"); } if ((restrictions.tree & 2) == 2) { // SIBLINGS Olap4ldUtil._log.config("TreeOp:SIBLINGS"); if (restrictions.cubeNamePattern != null) { additionalFilters += " FILTER (?CUBE_NAME = <" + restrictions.cubeNamePattern + ">) "; } } if ((restrictions.tree & 4) == 4) { // PARENT Olap4ldUtil._log.config("TreeOp:PARENT"); } if ((restrictions.tree & 16) == 16) { // DESCENDANTS Olap4ldUtil._log.config("TreeOp:DESCENDANTS"); } if ((restrictions.tree & 32) == 32) { // ANCESTORS Olap4ldUtil._log.config("TreeOp:ANCESTORS"); } } else { // TreeOp = Self or null Olap4ldUtil._log.config("TreeOp:SELF"); } // Second, ask for the measures (which are also members) String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getMembers_measure_members.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> memberUris2 = executeSparqlSelectQuery(querytemplate, true); return memberUris2; } /** * Finds specific typical members. * * @param restrictions * * @return */ private List<Node[]> getXkosMembers(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); /* * I would assume that if TREE_OP is set, we have a unique member given * and either want its children, its siblings, its parent, self, * ascendants, or descendants. */ if (restrictions.tree != null && (restrictions.tree & 8) != 8) { // Assumption 1: Treeop only uses Member if (restrictions.memberUniqueName == null) { throw new UnsupportedOperationException( "If a treeMask is given, we should also have a unique member name!"); } if ((restrictions.tree & 1) == 1) { // CHILDREN Olap4ldUtil._log.config("TreeOp:CHILDREN"); // Here, we need a specific filter additionalFilters = " FILTER (?PARENT_UNIQUE_NAME = <" + restrictions.memberUniqueName + ">) "; } if ((restrictions.tree & 2) == 2) { // SIBLINGS Olap4ldUtil._log.config("TreeOp:SIBLINGS"); } if ((restrictions.tree & 4) == 4) { // PARENT Olap4ldUtil._log.config("TreeOp:PARENT"); } if ((restrictions.tree & 16) == 16) { // DESCENDANTS Olap4ldUtil._log.config("TreeOp:DESCENDANTS"); } if ((restrictions.tree & 32) == 32) { // ANCESTORS Olap4ldUtil._log.config("TreeOp:ANCESTORS"); } throw new UnsupportedOperationException( "TreeOp and getLevelMember failed."); } else { // TreeOp = Self or null Olap4ldUtil._log.config("TreeOp:SELF"); } String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getMembers_xkos.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> memberUris2 = executeSparqlSelectQuery(querytemplate, true); return memberUris2; } /** * Returns all hasTopConcept members of the cube. * * @param dimensionUniqueName * @param cubeNamePattern * * @param cubeNamePattern * @return */ private List<Node[]> getHasTopConceptMembers(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); /* * I would assume that if TREE_OP is set, we have a unique member given * and either want its children, its siblings, its parent, self, * ascendants, or descendants. */ if (restrictions.tree != null && (restrictions.tree & 8) != 8) { // Assumption 1: Treeop only uses Member if (restrictions.memberUniqueName == null) { throw new UnsupportedOperationException( "If a treeMask is given, we should also have a unique member name!"); } if ((restrictions.tree & 1) == 1) { // CHILDREN Olap4ldUtil._log.config("TreeOp:CHILDREN"); } if ((restrictions.tree & 2) == 2) { // SIBLINGS Olap4ldUtil._log.config("TreeOp:SIBLINGS"); } if ((restrictions.tree & 4) == 4) { // PARENT Olap4ldUtil._log.config("TreeOp:PARENT"); } if ((restrictions.tree & 16) == 16) { // DESCENDANTS Olap4ldUtil._log.config("TreeOp:DESCENDANTS"); } if ((restrictions.tree & 32) == 32) { // ANCESTORS Olap4ldUtil._log.config("TreeOp:ANCESTORS"); } throw new UnsupportedOperationException( "TreeOp and getLevelMember failed."); } else { // TreeOp = Self or null Olap4ldUtil._log.config("TreeOp:SELF"); // First, ask for all members // Get all members of hierarchies without levels, that simply // define // skos:hasTopConcept members with skos:notation. String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getMembers_topConcept.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> memberUris = executeSparqlSelectQuery(querytemplate, true); return memberUris; } } /** * For degenerated dimensions, we have to assume that either dim, hier, or * level are given. * * @return */ private List<Node[]> getDegeneratedMembers(Restrictions restrictions) { String additionalFilters = createFilterForRestrictions(restrictions); if (restrictions.tree != null && (restrictions.tree & 8) != 8) { // Assumption 1: Treeop only uses Member if (restrictions.memberUniqueName == null) { throw new UnsupportedOperationException( "If a treeMask is given, we should also have a unique member name!"); } if ((restrictions.tree & 1) == 1) { // CHILDREN Olap4ldUtil._log.config("TreeOp:CHILDREN"); } if ((restrictions.tree & 2) == 2) { // SIBLINGS Olap4ldUtil._log.config("TreeOp:SIBLINGS"); } if ((restrictions.tree & 4) == 4) { // PARENT Olap4ldUtil._log.config("TreeOp:PARENT"); } if ((restrictions.tree & 16) == 16) { // DESCENDANTS Olap4ldUtil._log.config("TreeOp:DESCENDANTS"); } if ((restrictions.tree & 32) == 32) { // ANCESTORS Olap4ldUtil._log.config("TreeOp:ANCESTORS"); } throw new UnsupportedOperationException( "TreeOp and getLevelMember failed."); } else { // TreeOp = Self or null Olap4ldUtil._log.config("TreeOp:SELF"); } String querytemplate = Olap4ldLinkedDataUtil .readInQueryTemplate("sesame_getMembers_degenerated.txt"); querytemplate = querytemplate.replace("{{{STANDARDFROM}}}", askForFrom(true)); querytemplate = querytemplate.replace("{{{TABLE_CAT}}}", TABLE_CAT); querytemplate = querytemplate.replace("{{{TABLE_SCHEM}}}", TABLE_SCHEM); querytemplate = querytemplate.replace("{{{FILTERS}}}", additionalFilters); List<Node[]> memberUris1 = executeSparqlSelectQuery(querytemplate, true); return memberUris1; } @SuppressWarnings("unused") private boolean isResourceAndNotLiteral(String resource) { return resource.startsWith("http:"); } private String createFilterForRestrictions(Restrictions restrictions) { String filter = ""; // We need to create a filter for the specific restriction filter += (restrictions.cubeNamePattern != null) ? " FILTER (?CUBE_NAME = <" + restrictions.cubeNamePattern + ">) " : ""; if (restrictions.dimensionUniqueName != null && !restrictions.dimensionUniqueName.toString().equals( Olap4ldLinkedDataUtil.MEASURE_DIMENSION_NAME)) { // filter += " filter(" // + createConditionConsiderEquivalences( // restrictions.dimensionUniqueName, new Variable( // "DIMENSION_UNIQUE_NAME")) + ") "; filter += " filter(str(?DIMENSION_UNIQUE_NAME) = \"" + restrictions.dimensionUniqueName + "\") "; } if (restrictions.hierarchyUniqueName != null && !restrictions.hierarchyUniqueName.toString().equals( Olap4ldLinkedDataUtil.MEASURE_DIMENSION_NAME)) { // This we do since ranges may be blank nodes, e.g., of ical:dtend // XXX: Workaround if (restrictions.hierarchyUniqueName.toString().startsWith("node")) { filter += ""; } else { // filter += " filter(" // + createConditionConsiderEquivalences( // restrictions.hierarchyUniqueName, new Variable( // "HIERARCHY_UNIQUE_NAME")) + ") "; filter += " filter(str(?HIERARCHY_UNIQUE_NAME) = \"" + restrictions.hierarchyUniqueName + "\") "; } } if (restrictions.levelUniqueName != null && !restrictions.levelUniqueName.toString().equals( Olap4ldLinkedDataUtil.MEASURE_DIMENSION_NAME)) { // This we do since ranges may be blank nodes, e.g., of ical:dtend // XXX: Workaround if (restrictions.hierarchyUniqueName != null && restrictions.hierarchyUniqueName.toString().startsWith( "node")) { filter += ""; } else { // filter += " filter(" // + createConditionConsiderEquivalences( // restrictions.levelUniqueName, new Variable( // "LEVEL_UNIQUE_NAME")) + ") "; filter += " filter(str(?LEVEL_UNIQUE_NAME) = \"" + restrictions.levelUniqueName + "\") "; } } if (restrictions.memberUniqueName != null && !restrictions.memberUniqueName.toString().equals( Olap4ldLinkedDataUtil.MEASURE_DIMENSION_NAME)) { // filter += " filter(" // + createConditionConsiderEquivalences( // restrictions.memberUniqueName, new Variable( // "MEMBER_UNIQUE_NAME")) + ") "; filter += " filter(str(?MEMBER_UNIQUE_NAME) = \"" + restrictions.memberUniqueName + "\") "; } return filter; } /** * This method creates a filter string for a Resource with a specific * variable for all equivalences. * * @param canonicalResource * @param variableName * @return */ @SuppressWarnings("unused") @Deprecated private String createConditionConsiderEquivalences(Node canonicalResource, Variable variable) { List<Node> equivalenceClass = getEquivalenceClassOfNode(canonicalResource); // Since we sometimes manually build member names, we have to check // on strings String[] filterString = new String[equivalenceClass.size()]; for (int i = 0; i < filterString.length; i++) { filterString[i] = "str(?" + variable + ") = \"" + equivalenceClass.get(i) + "\""; } return "(" + Olap4ldLinkedDataUtil.implodeArray(filterString, " || ") + ")"; } /** * Adds intermediary results to result. * * @param intermediaryresult * @param result */ private void addToResult(List<Node[]> intermediaryresult, List<Node[]> result) { boolean first = true; for (Node[] nodes : intermediaryresult) { if (first) { first = false; continue; } result.add(nodes); } } public List<Node[]> getSets(Restrictions restrictions) { // TODO Auto-generated method stub return null; } @Override public List<Node[]> executeOlapQuery(LogicalOlapQueryPlan queryplan) throws OlapException { // Log logical query plan Olap4ldUtil._log.config("Logical query plan: " + queryplan.toString()); Olap4ldUtil._log .info("Execute logical query plan: Generate physical query plan."); long time = System.currentTimeMillis(); // Create physical query plan this.execplan = createExecplan(queryplan); Olap4ldUtil._log .info("Execute logical query plan: Physical query plan: " + execplan.toString()); time = System.currentTimeMillis() - time; Olap4ldUtil._log .info("Execute logical query plan: Generate physical query plan finished in " + time + "ms."); Olap4ldUtil._log .info("Execute logical query plan: Execute physical query plan."); time = System.currentTimeMillis(); PhysicalOlapIterator resultIterator = this.execplan.getIterator(); /* * We create our own List<Node[]> result with every item * * Every Node[] contains for each dimension in the dimension list of the * metadata a member and for each measure in the measure list a value. */ List<Node[]> result = new ArrayList<Node[]>(); while (resultIterator.hasNext()) { Object nextObject = resultIterator.next(); // Will be Node[] Node[] node = (Node[]) nextObject; result.add(node); } time = System.currentTimeMillis() - time; Olap4ldUtil._log .info("Execute logical query plan: Execute physical query plan finished in " + time + "ms."); return result; } @Override public List<Node[]> executeOlapQuery(Cube cube, List<Level> slicesrollups, List<Position> dices, List<Measure> projections) throws OlapException { throw new UnsupportedOperationException( "Only LogicalOlapQuery trees can be executed!"); } /** * Empties store and locationMap. */ public void rollback() { initialize(); } }