/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.util;
import java.io.* ;
import java.net.URL ;
import java.nio.charset.Charset ;
import java.nio.charset.StandardCharsets ;
import org.apache.jena.JenaRuntime ;
import org.apache.jena.shared.JenaException ;
import org.apache.jena.shared.WrappedIOException ;
public class FileUtils
{
public static final String langXML = "RDF/XML" ;
public static final String langXMLAbbrev = "RDF/XML-ABBREV" ;
public static final String langNTriple = "N-TRIPLE" ;
public static final String langN3 = "N3" ;
public static final String langTurtle = "TURTLE" ;
static Charset utf8 = StandardCharsets.UTF_8 ;
/** Create a reader that uses UTF-8 encoding */
static public Reader asUTF8(InputStream in) {
if ( JenaRuntime.runUnder(JenaRuntime.featureNoCharset) )
return new InputStreamReader(in) ;
// Not ,utf8 -- GNUClassPath (0.20) apparently fails on passing in a charset
// but if passed not the decoder or the name of the charset.
// Reported and fixed.
return new InputStreamReader(in, utf8.newDecoder());
}
/** Create a buffered reader that uses UTF-8 encoding */
static public BufferedReader asBufferedUTF8(InputStream in)
{
BufferedReader r = new BufferedReader(asUTF8(in)) ;
return r ;
}
/** Create a writer that uses UTF-8 encoding */
static public Writer asUTF8(OutputStream out) {
if ( JenaRuntime.runUnder(JenaRuntime.featureNoCharset) )
return new OutputStreamWriter(out) ;
return new OutputStreamWriter(out, utf8.newEncoder());
}
/** Create a print writer that uses UTF-8 encoding */
static public PrintWriter asPrintWriterUTF8(OutputStream out) {
return new PrintWriter(asUTF8(out));
}
/** Guess the language/type of model data.
*
* <ul>
* <li> If the URI ends ".rdf", it is assumed to be RDF/XML</li>
* <li> If the URI ends ".nt", it is assumed to be N-Triples</li>
* <li> If the URI ends ".ttl", it is assumed to be Turtle</li>
* <li> If the URI ends ".owl", it is assumed to be RDF/XML</li>
* </ul>
* @param name URL to base the guess on
* @param otherwise Default guess
* @return String Guessed syntax - or the default supplied
*/
public static String guessLang( String name, String otherwise )
{
String suffix = getFilenameExt( name );
if (suffix.equals( "n3" )) return langN3;
if (suffix.equals( "nt" )) return langNTriple;
if (suffix.equals( "ttl" )) return langTurtle ;
if (suffix.equals( "rdf" )) return langXML;
if (suffix.equals( "owl" )) return langXML;
return otherwise;
}
/** Guess the language/type of model data
*
* <ul>
* <li> If the URI ends ".rdf", it is assumed to be RDF/XML</li>
* <li> If the URI ends ".nt", it is assumed to be N-Triples</li>
* <li> If the URI ends ".ttl", it is assumed to be Turtle</li>
* <li> If the URI ends ".owl", it is assumed to be RDF/XML</li>
* </ul>
* @param urlStr URL to base the guess on
* @return String Guessed syntax - default is RDF/XML
*/
public static String guessLang(String urlStr)
{
return guessLang(urlStr, langXML) ;
}
/** Turn a file: URL or file name into a plain file name */
public static String toFilename(String filenameOrURI)
{
// Requirements of windows and Linux differ slightly here
// Windows wants "file:///c:/foo" => "c:/foo"
// but Linux only wants "file:///foo" => "/foo"
// Pragmatically, a path of "/c:/foo", or "/foo" works everywhere.
// but not "//c:/foo" or "///c:/foo"
// else IKVM thinks its a network path on Windows.
// If it's a a file: we apply %-decoding.
// If there is no scheme name, we don't.
if ( !isFile(filenameOrURI) )
return null ;
// No scheme of file:
String fn = filenameOrURI ;
if ( ! fn.startsWith("file:") )
return fn ;
// file:
// Convert absolute file names
if ( fn.startsWith("file:///") )
fn = fn.substring("file://".length()) ;
else if ( fn.startsWith("file://localhost/") )
// NB Leaves the leading slash on.
fn = fn.substring("file://localhost".length()) ;
else
// Just trim off the file:
fn = fn.substring("file:".length()) ;
return decodeFileName(fn) ;
}
public static String decodeFileName(String s)
{
if ( s.indexOf('%') < 0 )
return s ;
int len = s.length();
StringBuilder sbuff = new StringBuilder(len) ;
for ( int i =0 ; i < len ; i++ )
{
char c = s.charAt(i);
switch (c)
{
case '%':
int codepoint = Integer.parseInt(s.substring(i+1,i+3),16) ;
char ch = (char)codepoint ;
sbuff.append(ch) ;
i = i+2 ;
break ;
default:
sbuff.append(c);
}
}
return sbuff.toString();
}
/** Turn a plain filename into a "file:" URL */
public static String toURL(String filename)
{
if ( filename.length()>5
&& filename.substring(0,5).equalsIgnoreCase("file:") )
return filename ;
/**
* Convert a File, note java.net.URI appears to do the right thing.
* viz:
* Convert to absolute path.
* Convert all % to %25.
* then convert all ' ' to %20.
* It quite probably does more e.g. ? #
* But has bug in only having one / not three at beginning
*/
return "file://" + new File(filename).toURI().toString().substring(5);
}
/** Check whether 'name' is possibly a file reference
*
* @param name
* @return boolean False if clearly not a filename.
*/
public static boolean isFile(String name)
{
String scheme = getScheme(name) ;
if ( scheme == null )
// No URI scheme - treat as filename
return true ;
if ( scheme.equals("file") )
// file: URI scheme
return true ;
// Windows: "c:" etc
if ( scheme.length() == 1 )
// file: URI scheme
return true ;
return false ;
}
/** Check whether a name is an absolute URI (has a scheme name)
*
* @param name
* @return boolean True if there is a scheme name
*/
public static boolean isURI(String name)
{
return (getScheme(name) != null) ;
}
public static String getScheme(String uri)
{
// Find "[^/:]*:.*"
for ( int i = 0 ; i < uri.length() ; i++ )
{
char ch = uri.charAt(i) ;
if ( ch == ':' )
return uri.substring(0,i) ;
if ( ! isASCIILetter(ch) )
// Some illegal character before the ':'
break ;
}
return null ;
}
private static boolean isASCIILetter(char ch)
{
return ( ch >= 'a' && ch <= 'z' ) || ( ch >= 'A' && ch <= 'Z' ) ;
}
/**
* Get the directory part of a filename
* @param filename
* @return Directory name
*/
public static String getDirname(String filename)
{
File f = new File(filename) ;
return f.getParent() ;
}
/**
Get the suffix part of a file name or a URL in file-like format.
*/
public static String getFilenameExt( String filename)
{
int iSlash = filename.lastIndexOf( '/' );
int iBack = filename.lastIndexOf( '\\' );
int iExt = filename.lastIndexOf( '.' );
if (iBack > iSlash) iSlash = iBack;
return iExt > iSlash ? filename.substring( iExt+1 ).toLowerCase() : "";
}
/**
create a temporary file that will be deleted on exit, and do something
sensible with any IO exceptions - namely, throw them up wrapped in
a JenaException.
@param prefix the prefix for File.createTempFile
@param suffix the suffix for File.createTempFile
@return the temporary File
*/
public static File tempFileName( String prefix, String suffix )
{
File result = new File( getTempDirectory(), prefix + randomNumber() + suffix );
if (result.exists()) return tempFileName( prefix, suffix );
result.deleteOnExit();
return result;
}
/**
Answer a File naming a freshly-created directory in the temporary directory. This
directory should be deleted on exit.
TODO handle threading issues, mkdir failure, and better cleanup
@param prefix the prefix for the directory name
@return a File naming the new directory
*/
public static File getScratchDirectory( String prefix )
{
File result = new File( getTempDirectory(), prefix + randomNumber() );
if (result.exists()) return getScratchDirectory( prefix );
if (result.mkdir() == false) throw new JenaException( "mkdir failed on " + result );
result.deleteOnExit();
return result;
}
public static String getTempDirectory()
{ return JenaRuntime.getSystemProperty( "java.io.tmpdir" ); }
private static int counter = 0;
private static int randomNumber()
{
return ++counter;
}
// TODO Replace with a FileManager
/**
Answer a BufferedReader than reads from the named resource file as
UTF-8, possibly throwing WrappedIOExceptions.
*/
public static BufferedReader openResourceFile( String filename )
{
try
{
InputStream is = FileUtils.openResourceFileAsStream( filename );
return new BufferedReader(new InputStreamReader(is, "UTF-8"));
}
catch (IOException e)
{ throw new WrappedIOException( e ); }
}
/**
* Open an resource file for reading.
*/
public static InputStream openResourceFileAsStream(String filename)
throws FileNotFoundException {
InputStream is = ClassLoader.getSystemResourceAsStream(filename);
if (is == null) {
// Try local loader with absolute path
is = FileUtils.class.getResourceAsStream("/" + filename);
if (is == null) {
// Try local loader, relative, just in case
is = FileUtils.class.getResourceAsStream(filename);
if (is == null) {
// Can't find it on classpath, so try relative to current directory
// Will throw security exception under and applet but there's not other choice left
is = new FileInputStream(filename);
}
}
}
return is;
}
// TODO Replace with FileManager
public static BufferedReader readerFromURL( String urlStr )
{
try { return asBufferedUTF8( new URL(urlStr).openStream() ); }
catch (java.net.MalformedURLException e)
{ // Try as a plain filename.
try { return asBufferedUTF8( new FileInputStream( urlStr ) ); }
catch (FileNotFoundException f) { throw new WrappedIOException( f ); }
}
catch (IOException e)
{ throw new WrappedIOException( e ); }
}
/** Read a whole file as UTF-8
* @param filename
* @return String
* @throws IOException
*/
public static String readWholeFileAsUTF8(String filename) throws IOException {
InputStream in = new FileInputStream(filename) ;
return readWholeFileAsUTF8(in) ;
}
/** Read a whole stream as UTF-8
*
* @param in InputStream to be read
* @return String
* @throws IOException
*/
public static String readWholeFileAsUTF8(InputStream in) throws IOException
{
try ( Reader r = new BufferedReader(asUTF8(in),1024) ) {
return readWholeFileAsUTF8(r) ;
}
}
/** Read a whole file as UTF-8
*
* @param r
* @return String The whole file
* @throws IOException
*/
// Private worker as we are trying to force UTF-8.
private static String readWholeFileAsUTF8(Reader r) throws IOException
{
try ( StringWriter sw = new StringWriter(1024) ) {
char buff[] = new char[1024];
int l ;
while ((l = r.read(buff))!=-1) { // .ready does not work with HttpClient streams.
if (l <= 0)
break;
sw.write(buff, 0, l);
}
return sw.toString();
}
}
}