/*******************************************************************************
* Copyright 2012 Pearson Education
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package org.semantictools.jsonld.impl;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.StringTokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A utility that scans a file and attempts to extract the URI for a file from its contents.
* @author Greg McFall
*
*/
public class FileScanner {
private static final Logger logger = LoggerFactory.getLogger(FileScanner.class);
private TurtleUriFinder turtleFinder = new TurtleUriFinder();
private XsdUriFinder xsdFinder = new XsdUriFinder();
/**
* Return the URI for the resource contained in the given file,
* or null if the URI cannot be determined from the contents
* of the file.
*/
public String extractURI(File file) throws IOException {
LdContentType format = LdContentType.guessContentType(file.getName());
UriFinder finder =
(format==LdContentType.TURTLE) ? turtleFinder :
(format==LdContentType.XSD) ? xsdFinder :
null;
if (finder == null) {
return null;
}
BufferedReader input = new BufferedReader(new FileReader(file));
try {
return extractURI(finder, input);
} finally {
safeClose(input);
}
}
private String extractURI(UriFinder finder, BufferedReader input) throws IOException {
String line = null;
while ( (line=input.readLine()) != null) {
String uri = finder.parseURI(line);
if (uri != null) return uri;
}
return null;
}
private void safeClose(Reader input) {
try {
input.close();
} catch (Throwable oops) {
logger.warn("Failed to close reader", oops);
}
}
interface UriFinder {
String parseURI(String line);
}
/**
* A very simple parser which is optimized for speed.
* Finds the first URI that matches a triple of the form:
* <pre>
* <???> * *:Ontology
* </pre>
* where ??? is the URI to be extracted, and * denotes any non-space text.
* The colon in front of "Ontology" may also be replaced by '#'.
* @author Greg McFall
*
*/
static class TurtleUriFinder implements UriFinder {
private String nextToken(StringTokenizer tokenizer) {
return tokenizer.hasMoreTokens() ? tokenizer.nextToken() : null;
}
@Override
public String parseURI(String line) {
String uri = null;
StringTokenizer t = new StringTokenizer(line, " ");
String subject = nextToken(t);
nextToken(t);
String object = nextToken(t);
if (
subject != null &&
object != null &&
subject.startsWith("<") &&
subject.endsWith(">") &&
(object.endsWith(":Ontology") || object.endsWith("#Ontology"))
) {
uri = subject.substring(1, subject.length()-1);
}
return uri;
}
}
static class XsdUriFinder implements UriFinder {
@Override
public String parseURI(String line) {
String uri = null;
if (line.contains("targetNamespace")) {
int start = line.indexOf('"')+1;
int end = line.lastIndexOf('"');
uri = line.substring(start, end);
}
return uri;
}
}
}