/******************************************************************************* * Copyright 2012 Pearson Education * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package org.semantictools.jsonld.impl; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.semantictools.jsonld.LdAsset; import org.semantictools.jsonld.LdAssetManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class LdAssetManagerImpl implements LdAssetManager { private static Logger logger = LoggerFactory.getLogger(LdAssetManagerImpl.class); private static final int MATCHES_SCHEMA = 0x11; private static final int MATCHES_XMLNS = 0x111; private static final int MATCHES_CONTEXT = 0x2; private static Pattern schemaPattern = Pattern.compile("<[^:]*:schema"); private boolean eagerLoading=true; @Override public LdAsset findAsset(String assetURI) { return loadAsset(assetURI); } protected LdAsset loadAsset(String assetURI) { try { URL url = new URL(assetURI); LdAsset ns = readAsset(assetURI, url); return ns; } catch (Throwable oops) { logger.warn("Failed to download asset: " + assetURI); return null; } } /** * Returns true if this manager will eagerly load the content * of an asset whenever the asset is accessed via the {@link LdAssetManagerImpl#findAsset(String) findAsset} * method. */ public boolean isEagerLoading() { return eagerLoading; } /** * Specifies whether the content of an asset should be loaded eagerly from * the its location when the asset is accessed from this manager. * @param truth */ protected void enableEagerLoading(boolean truth) { eagerLoading = truth; } /** * Scan the asset from the given location and attempt to infer the LdContentType from the contents. * @param uri The URI of the asset, or null if the URI is not known. If the URI is null, then * this method will attempt to extract the URI from the asset content. * @param location The physical location of the asset, which may be different from its URI. * @return The LdAsset from the specified location. If eager loading is enabled, the content * of the asset will be available from {@link LdAsset#getContent()}. * @throws IOException */ protected LdAsset readAsset(String uri, URL location) throws IOException { URLConnection connection = location.openConnection(); if (connection instanceof HttpURLConnection) { HttpURLConnection http = (HttpURLConnection) connection; int status = http.getResponseCode(); if (status != HttpURLConnection.HTTP_OK) { throw new IOException("Unable to access " + uri + ": HTTP Status " + status); } } InputStream input = connection.getInputStream(); LdContentType format = LdContentType.UNKNOWN; StringBuilder builder = eagerLoading ? new StringBuilder() : null; int state = 0; BufferedReader reader = new BufferedReader(new InputStreamReader(input)); try { String line = null; while ((line=reader.readLine()) != null) { Matcher matcher = schemaPattern.matcher(line); if (matcher.matches()) { state = state | MATCHES_SCHEMA; } if (((state&MATCHES_SCHEMA)==MATCHES_SCHEMA) && line.contains("xmlns")) { state = state | MATCHES_XMLNS; } if (((state&MATCHES_XMLNS)==MATCHES_XMLNS) && line.contains("simpleType")) { format = LdContentType.XSD; } if (line.contains("@context")) { state = state | MATCHES_CONTEXT; format = LdContentType.JSON_LD_CONTEXT; } if (line.contains("@prefix")) { format = LdContentType.TURTLE; } if (format==LdContentType.UNKNOWN && line.contains("http://www.w3.org/2002/07/owl#")) { format = LdContentType.TURTLE; } if (uri == null) { uri = parseURI(format, line); } if (eagerLoading) { builder.append(line); builder.append('\n'); } } } finally { safeClose(reader); } LdAsset asset = new LdAsset(uri, format, location); if (eagerLoading) { String content = builder.toString(); asset.setContent(content); } return asset; } private String parseURI(LdContentType format, String line) { String uri = getJsonContextURI(format, line); if (uri != null) return uri; uri = getTurtleUri(format, line); if (uri != null) return uri; uri = getXsdUri(format, line); return uri; } private String getXsdUri(LdContentType format, String line) { String uri = null; if (format==LdContentType.UNKNOWN || format==LdContentType.XSD) { int mark = line.indexOf("targetNamespace"); if (mark >= 0) { int start = line.indexOf('"', mark + 14 )+1; int end = line.indexOf('"', start); uri = line.substring(start, end); } } return uri; } private String nextToken(StringTokenizer tokenizer) { return tokenizer.hasMoreTokens() ? tokenizer.nextToken() : null; } private String getTurtleUri(LdContentType format, String line) { String uri = null; if (format==LdContentType.UNKNOWN || format==LdContentType.TURTLE) { StringTokenizer t = new StringTokenizer(line, " "); String subject = nextToken(t); nextToken(t); String object = nextToken(t); if ( subject != null && object != null && subject.startsWith("<") && subject.endsWith(">") && (object.endsWith(":Ontology") || object.endsWith("#Ontology")) ) { uri = subject.substring(1, subject.length()-1); } } return uri; } private String getJsonContextURI(LdContentType format, String line) { if (format != LdContentType.JSON_LD_CONTEXT && line.contains("\"@id\"")) { int colon = line.indexOf(':'); if (colon > 0) { int begin = line.indexOf('"', colon+1)+1; if (begin > 0) { int end = line.indexOf('"', begin+1); if (end > 0) { return line.substring(begin, end); } } } } return null; } protected void safeClose(Reader input) { try { input.close(); } catch (Throwable oops) { logger.warn("Failed to close connection", oops); } } protected void load(LdAsset asset) { try { if (asset.getContent() == null) { StringBuilder builder = new StringBuilder(); String line = null; BufferedReader reader = asset.getReader(); try { while ( (line=reader.readLine()) != null) { builder.append(line); builder.append('\n'); } } finally { safeClose(reader); } String content = builder.toString(); asset.setContent(content); } } catch (Throwable oops) { logger.warn("Failed to load asset: " + asset.getURI(), oops); } } @Override public LdAsset findAsset(String assetURI, LdContentType format) { return loadAsset(assetURI, format); } protected LdAsset loadAsset(String assetURI, LdContentType format) { if (format == null) { return loadAsset(assetURI); } if (format == LdContentType.ENHANCED_CONTEXT) { // It would be nice to implement content negotiation for retrieving an enhanced // context. However, at the time this code was written, there was no standard // content type for an enhanced context. So we'll just return null. // To get an enhanced context, the caller will need to get the unenhanced context // and then use an LdContextEnhancer to enhance it. return null; } LdAsset asset = loadAsset(assetURI); if ( (asset!=null) && (asset.getFormat()!=format)) { asset = null; } return asset; } }