package org.olap4j.driver.olap4ld.helper;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.olap4j.driver.olap4ld.Olap4ldUtil;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.Update;
import org.openrdf.query.UpdateExecutionException;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.Rio;
import org.semanticweb.yars.nx.Literal;
import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.Resource;
import org.semanticweb.yars.nx.Variable;
import org.semanticweb.yars.nx.parser.NxParser;
public class Olap4ldLinkedDataUtil {
/*
* RDF prefixes
*/
static HashMap<Integer, String> standard_prefix2uri = null;
static HashMap<Integer, String> standard_uri2prefix = null;
/*
* As stated in encodeNode2Mdx
*/
public static final String MEASURE_DIMENSION_NAME = "Measures";
/**
* Elements (identifiers) used in MDX need to follow certain rules in order
* to be parseable from and to MDX. In fact, having it URL encoded is not
* enough, also, % and . need to be replaced.
*
* @param baseuri
* The pure uri string to be encoded for MDX
* @return encoded string
*/
static String encodeUriWithPrefix(String uri) {
// Since we do not manage prefixes, we have to come up with one
// ourselves
// (which if possible should be always the same):
return encodeSpecialMdxCharactersInNames(uri);
}
/**
* Helper Method for asking for location
*
* @param uri
* @return
* @throws MalformedURLException
* @throws IOException
*/
public static URL askForLocation(URL uri) throws MalformedURLException {
Olap4ldUtil._log.config("Ask for location: " + uri + "...");
String returnurlstring = null;
HttpURLConnection.setFollowRedirects(false);
HttpURLConnection connection;
try {
connection = (HttpURLConnection) uri.openConnection();
connection.setConnectTimeout(5000);
// int responsecode = connection.getResponseCode();
connection.setRequestProperty("Accept", "application/rdf+xml");
String header = connection.getHeaderField("location");
String domain = uri.getHost();
String protocol = uri.getProtocol();
String port = "";
if (uri.getPort() != 80 && uri.getPort() != -1) {
port = ":" + uri.getPort() + "";
}
String path = uri.getPath();
String query = uri.getQuery();
if (query == null || query.equals("")) {
query = "";
} else {
query = "?" + query;
}
// TODO: Could be that we need to check whether bogus comes out
// (e.g., Not found).
if (header != null) {
// Header may be a absolute or relative URL
if (header.startsWith("http:") || header.startsWith("https:")) {
// absolute URL
returnurlstring = header;
} else if (header.startsWith("/")) {
returnurlstring = protocol + "://" + domain + port + header
+ query;
} else {
/*
* relative URL May be: Gleiche Domäne, Gleiche Ressource,
* ein Pfad-Segment-Aufwärts, gleiches Pfad-Segment (see
* http://de.wikipedia.org/wiki/Uniform_Resource_Locator#
* Relative_URL)
*/
returnurlstring = protocol + "://" + domain + port + path
+ header + query;
}
} else {
// Actually, not correct, but needed for linked-statistics
if (path.startsWith("/../")) {
path = path.replace("../", "");
}
returnurlstring = protocol + "://" + domain + port + path
+ query;
}
// We should remove # uris
if (returnurlstring.contains("#")) {
int index = returnurlstring.lastIndexOf("#");
returnurlstring = returnurlstring.substring(0, index);
}
} catch (IOException e) {
throw new MalformedURLException(e.getMessage());
}
Olap4ldUtil._log.config("... result: " + returnurlstring);
return new URL(returnurlstring);
}
/**
* Decode an MDX identifier of a URI.
*
*
* @param uri
* @return
*/
static Node decodeUriWithPrefix(String encodedname) {
if (standard_prefix2uri == null && standard_uri2prefix == null) {
readInStandardPrefixes();
}
String decodedname = decodeSpecialMdxCharactersInNames(encodedname);
// In this case, the : is the sign
int lastIndexColon = decodedname.lastIndexOf(":");
if (lastIndexColon >= 0) {
String qname = decodedname.substring(lastIndexColon + 1);
// Without colon
String prefix = decodedname.substring(0, lastIndexColon);
String prefixuri = standard_prefix2uri.get(prefix.hashCode());
if (prefixuri != null) {
return new Resource(prefixuri + qname);
}
}
return new Resource(decodedname);
}
/**
* The problem with querying Linked Data for OLAP4LD is that sometimes
* values returned by LinkedDataEngine need to be further processed to be
* usable in ResultSet and metadata objects:
*
* * URIs used for unique names need to be translated into an MDX friendly
* format * Instead of null values, nx format uses "null" (TODO: or by now
* something else?) For certain returned values this needs to be transformed
* into a proper null.
*
* This means: LinkedDataEngine should always return literal values if no
* encoding needs to be done. URI that it returns should be transformed for
* use in MDX. If LinkedDataEngine returns "null" (or similar), it is
* transformed into proper null. All program logic, e.g., if no CAPTION is
* available (so that CAPTION returns null) then use UNIQUE_NAME is either
* implemented by the client or LinkedDataEngine.
*
* @param node
* @return
*/
public static String convertNodeToMDX(org.semanticweb.yars.nx.Node node) {
// If value is uri, then convert into MDX friendly format
if (node.toString().equals("null")) {
// XXX: Not sure whether it is clever to have this like an object
// name with square br.
// No this is complete nonsense. We need to make sure that "null"
// gets returned by rowset and null by metadata object.
return null;
} else if (node.toString().equals("Measures")) {
// Measures does not get encoded, can stay.
return node.toString();
} else {
// No matter of uri or Literal, we need to encode it
// We add square brackets
return "[" + encodeUriWithPrefix(node.toString()) + "]";
}
}
/**
*
* @param mdx
* @return
*/
public static Node convertMDXtoURI(String mdx) {
if (mdx.equals("Measures")) {
// No conversion needed.
return new Literal(mdx);
}
// First, we remove the square brackets
mdx = removeSquareBrackets(mdx);
Node decoded = decodeUriWithPrefix(mdx);
// Convert to canonical value. No, since internal value means within
// Linked Data store.
// Node canonical =
return decoded;
}
public static String readInQueryTemplate(String name) {
try {
StreamSource stream = new StreamSource(
Olap4ldLinkedDataUtil.class.getResourceAsStream("/" + name));
InputStream inputStream = stream.getInputStream();
InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader in = new BufferedReader(reader);
String querytemplate = "";
String readString;
while ((readString = in.readLine()) != null) {
querytemplate += readString;
}
in.close();
return querytemplate;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
/**
* Plan to deal with prefixes is: Have to hashes, one from prexis to uri,
* one from uri to prefix. Then, for a given prefix/uri, we can simply
* return the other.
*/
private static void readInStandardPrefixes() {
standard_prefix2uri = new HashMap<Integer, String>();
standard_uri2prefix = new HashMap<Integer, String>();
String trennzeichen = ";";
try {
StreamSource stream = new StreamSource(
Olap4ldLinkedDataUtil.class
.getResourceAsStream("/standardprefixes.csv"));
InputStream inputStream = stream.getInputStream();
InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader in = new BufferedReader(reader);
String readString;
while ((readString = in.readLine()) != null) {
String[] prefixuricombination = readString.split(trennzeichen);
// Add to our prefix hashmaps
standard_prefix2uri.put(prefixuricombination[0].hashCode(),
prefixuricombination[1]);
standard_uri2prefix.put(prefixuricombination[1].hashCode(),
prefixuricombination[0]);
}
in.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* SPARQL does not allow all characters as parameter names, e.g.,
* ?sdmx-measure:obsValue. Therefore, we transform the URI representation
* into a parameter.
*
* @param uriRepresentation
* @return
*/
public static Variable makeUriToVariable(Node uri) {
// We simply remove all special characters
String uriRepresentation = uri.toString().replaceAll("[^a-zA-Z0-9]+",
"");
return new Variable("?" + uriRepresentation);
}
/**
* Regarding prefixes, we use them to store MDX compliant names for
* metadata. If we use those names in SPARQL queries, we either need to
* define them explicitly or translate the metadata into uris. We have
* standard prefixes that are used for encoding and decoding. It is a
* smaller list, therefore, it can be explicitly defined at the beginning of
* a SPARQL query (even though not all might be used by the query). We also
* store a separate list of specific prefixes that are created at run time
* and which do encoding/decoding, but which are not explicitly defined in a
* SPARQL query.
*
* Deprecated, since we do not dynamically build SPARQL queries but use
* templates. Not true, since we still use it for OLAP queries.
*
* @return
*/
public static String getStandardPrefixes() {
if (standard_prefix2uri == null && standard_uri2prefix == null) {
readInStandardPrefixes();
}
String standardprefixes = "";
Collection<String> uris = standard_prefix2uri.values();
for (String uri : uris) {
String prefix = standard_uri2prefix.get(uri.hashCode());
standardprefixes += "PREFIX " + prefix + ": <" + uri + "> \n";
}
return standardprefixes;
}
/**
* This method, either a proper caption or a uri can be given.
*
* @param caption
* @return
*/
public static String makeCaption(String caption, String alternative) {
if (caption == null || caption.equals("") || caption.equals("null")) {
caption = alternative;
}
if (caption.contains("http://")) {
String value = "";
// If there is a # I take everything from there
int hashindex = caption.indexOf('#');
if (hashindex != -1) {
value = caption.substring(hashindex);
} else {
int slashindex = caption.lastIndexOf('/');
if (slashindex != -1) {
value = caption.substring(slashindex);
} else {
value = caption;
}
}
// resolve CamelCase
HumaniseCamelCase myCamel = new HumaniseCamelCase();
value = myCamel.humanise(value);
return value;
} else {
return caption;
}
}
/**
* Needed for parsing nx triples.
*
* @param is
* @return
* @throws IOException
*/
public static String convertStreamToString(InputStream is)
throws IOException {
/*
* To convert the InputStream to String we use the Reader.read(char[]
* buffer) method. We iterate until the Reader return -1 which means
* there's no more data to read. We use the StringWriter class to
* produce the string.
*/
if (is != null) {
Writer writer = new StringWriter();
char[] buffer = new char[1024];
try {
Reader reader = new BufferedReader(new InputStreamReader(is,
"UTF-8"));
int n;
while ((n = reader.read(buffer)) != -1) {
writer.write(buffer, 0, n);
}
} finally {
is.close();
}
return writer.toString();
} else {
return "";
}
}
@SuppressWarnings("unused")
@Deprecated
private static String getSkosPropertyOfDimension(String dimensionProperty) {
// It can only be either literal or resource: notion, or
// exactMatch. How do I know which one? To make this query faster, we
// need to know.
/*
* TODO: For the moment, I have it hardcoded, whether skos:notation or
* skos:exactMatch
*/
String representation = null;
if (dimensionProperty
.equals("http://www.w3.org/2002/12/cal/ical#dtstart")) {
representation = "skos:notation";
} else if (dimensionProperty
.equals("http://www.w3.org/2002/12/cal/ical#dtend")) {
representation = "skos:notation";
} else if (dimensionProperty.equals("http://purl.org/dc/terms/date")) {
representation = "skos:notation";
} else if (dimensionProperty
.equals("http://edgarwrap.ontologycentral.com/vocab/edgar#issuer")) {
representation = "skos:exactMatch";
} else if (dimensionProperty
.equals("http://edgarwrap.ontologycentral.com/vocab/edgar#segment")) {
representation = "skos:notation";
} else if (dimensionProperty
.equals("http://edgarwrap.ontologycentral.com/vocab/edgar#subject")) {
representation = "skos:exactMatch";
// TODO: Add more.
} else if (dimensionProperty
.equals("http://purl.org/linked-data/cube#dataSet")) {
representation = "skos:exactMatch";
} else if (dimensionProperty
.equals("http://ffiecwrap.ontologycentral.com/vocab/ffiec-concepts#RCONA001")) {
representation = "skos:notation";
} else if (dimensionProperty
.equals("http://ffiecwrap.ontologycentral.com/vocab/ffiec#issuer")) {
representation = "skos:exactMatch";
} else if (dimensionProperty
.equals("http://rdf.freebase.com/ns/business.business_operation.industry")) {
representation = "skos:exactMatch";
} else {
representation = "?anyskos";
// throw new UnsupportedOperationException(
// "Olap4ld does not know this dimension property you are using.");
}
return representation;
}
/**
* Method to join array elements of type string
*
* @author Hendrik Will, imwill.com
* @param inputArray
* Array which contains strings
* @param glueString
* String between each array element
* @return String containing all array elements seperated by glue string
*/
public static String implodeArray(String[] inputArray, String glueString) {
/** Output variable */
String output = "";
if (inputArray.length > 0) {
StringBuilder sb = new StringBuilder();
sb.append(inputArray[0]);
for (int i = 1; i < inputArray.length; i++) {
sb.append(glueString);
sb.append(inputArray[i]);
}
output = sb.toString();
}
return output;
}
/**
* Removes []
*
* @param name
* @return
*/
private static String removeSquareBrackets(String name) {
if (!name.startsWith("[") || !name.endsWith("]")) {
throw new UnsupportedOperationException(
"Name not surrounded by square brackets!");
} else {
return name.substring(1, name.length() - 1);
}
}
/**
* We need to make sure that names of multidimensional elements do not carry
* any MDX special characters.
*
* Note: You can use a bash command for translating a URI into an MDX
* applicable form:
*
* echo "http://lod.gesis.org/lodpilot/ALLBUS/geo.rdf#list" | sed
* 's/\./YYY/g' | sed 's/-/ZZZ/g' | sed 's/%/XXX/g'
*
* @param name
* @return
*/
private static String encodeSpecialMdxCharactersInNames(String name) {
try {
name = URLEncoder.encode(name, "UTF-8");
name = name.replace("%", "XXX");
name = name.replace(".", "YYY");
name = name.replace("-", "ZZZ");
return name;
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
static String decodeSpecialMdxCharactersInNames(String name) {
try {
name = name.replace("XXX", "%");
name = name.replace("YYY", ".");
name = name.replace("ZZZ", "-");
name = URLDecoder.decode(name, "UTF-8");
return name;
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
/**
* This helper class makes the use of nx parser easier. It creates a map
* between the strings used in the given header and the column number where
* a name appears.
*
* @param header
* @return map
*/
public static Map<String, Integer> getNodeResultFields(
org.semanticweb.yars.nx.Node[] header) {
Map<String, Integer> mapFields = new HashMap<String, Integer>();
for (int i = 0; i < header.length; i++) {
// Is variable, therefore, simply toString.
String nodeString = header[i].toString();
// TODO: For now, I add the question mark, again, although, it would
// not be needed
mapFields.put("?" + nodeString, i);
}
return mapFields;
}
public static List<List<Node[]>> splitandparseN3rule(String n3rule)
throws IOException {
// Remove {, }
n3rule = n3rule.replace("{", "");
n3rule = n3rule.replace("}", "");
// Take => as split.
int n3ruleindex = n3rule.indexOf("=>");
String n3rule_body = n3rule.substring(0, n3ruleindex);
String n3rule_head = n3rule.substring(n3ruleindex + 2, n3rule.length());
InputStream stream = new ByteArrayInputStream(
n3rule_body.getBytes("UTF-8"));
NxParser nxp = new NxParser(stream);
List<Node[]> n3rule_nodes_body = new ArrayList<Node[]>();
Node[] nxx;
while (nxp.hasNext()) {
nxx = nxp.next();
n3rule_nodes_body.add(nxx);
for (Node node : nxx) {
System.out.print(node.toN3() + " ");
}
System.out.println();
}
stream.close();
stream = new ByteArrayInputStream(n3rule_head.getBytes("UTF-8"));
nxp = new NxParser(stream);
List<Node[]> n3rule_nodes_head = new ArrayList<Node[]>();
while (nxp.hasNext()) {
nxx = nxp.next();
n3rule_nodes_head.add(nxx);
for (Node node : nxx) {
System.out.print(node.toN3() + " ");
}
System.out.println();
}
stream.close();
List<List<Node[]>> result = new ArrayList<List<Node[]>>();
result.add(n3rule_nodes_body);
result.add(n3rule_nodes_head);
return result;
}
public static InputStream transformSparqlXmlToNx(InputStream xml) {
javax.xml.transform.TransformerFactory tf = javax.xml.transform.TransformerFactory
.newInstance("net.sf.saxon.TransformerFactoryImpl", Thread
.currentThread().getContextClassLoader());
Transformer t;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
// String base = System.getProperty("RESOURCE_BASE", "");
// String userDir = System.getProperty("user.home");
// TODO: This is not very generic.
// Instead of:
// File xslFile = new File(
// "C:/Users/b-kaempgen/Documents/Workspaces/Eclipse_SLD/OLAP4LD/resources/xml2nx.xsl");
// t = tf.newTransformer(new StreamSource(xslFile.getPath()));
t = tf.newTransformer(new StreamSource(Olap4ldLinkedDataUtil.class
.getResourceAsStream("/xml2nx.xsl")));
StreamSource ssource = new StreamSource(xml);
StreamResult sresult = new StreamResult(baos);
Olap4ldUtil._log
.config("...applying xslt to transform xml to nx...");
t.transform(ssource, sresult);
// We need to make INputStream out of OutputStream
ByteArrayInputStream nx = new ByteArrayInputStream(
baos.toByteArray());
return nx;
} catch (TransformerException e) {
e.printStackTrace();
throw new RuntimeException(e.getMessage());
}
}
/**
* dump RDF graph
*
* @param out
* output stream for the serialization
* @param outform
* the RDF serialization format for the dump
* @return
*/
public static void dumpRDF(Repository repo, String file, RDFFormat outform) {
try {
// dump the graph in the specified format
System.out.println("\n==GRAPH DUMP==\n");
FileOutputStream fos = new FileOutputStream(file);
RepositoryConnection con = repo.getConnection();
try {
RDFWriter w = Rio.createWriter(outform, fos);
con.export(w);
} finally {
con.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*
* @param repo
* @param query
* @param caching
*/
public static void sparqlRepoUpdate(Repository repo, String query,
boolean caching) {
Olap4ldUtil._log.config("SPARQL update query: " + query);
try {
RepositoryConnection con = repo.getConnection();
Update tupleQuery = con.prepareUpdate(QueryLanguage.SPARQL, query);
tupleQuery.execute();
// do something interesting with the values here...
// con.close();
} catch (RepositoryException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (MalformedQueryException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UpdateExecutionException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
}