/*
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License, version 2 as published by the Free Software
* Foundation.
*
* You should have received a copy of the GNU General Public License along with this
* program; if not, you can obtain a copy at http://www.gnu.org/licenses/gpl-2.0.html
* or from the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
*
* Copyright 2006 - 2016 Pentaho Corporation. All rights reserved.
*/
package org.pentaho.platform.repository;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOCase;
import org.pentaho.platform.api.repository2.unified.RepositoryFile;
import org.springframework.util.StringUtils;
/**
* General filename and filepath manipulation utilities for the Pentaho Repository. NOTE: these methods will work
* independently of the underlying operating system. Most methods will translate a backslash (\) to a forward slash (/)
* but should be be depended upon to make that translation.
* <p/>
* This class defines six components within a filename (example /dev/project/file.txt):
* <ul>
* <li>the prefix - /</li>
* <li>the path - dev/project/</li>
* <li>the full path - /dev/project/</li>
* <li>the name - file.txt</li>
* <li>the base name - file</li>
* <li>the extension - txt</li>
* </ul>
* Note that this class works best if directory filenames end with a separator. If you omit the last separator, it is
* impossible to determine if the filename corresponds to a file or a directory. As a result, we have chosen to say it
* corresponds to a file.
* <p/>
* This class only supports Pentaho Repository (Unix) style names. Prefixes are matched as follows:
*
* <pre>
* a/b/c.txt --> "" --> relative
* /a/b/c.txt --> "/" --> absolute
* </pre>
* <p/>
* Origin of code: Apache Commons IO 2.1
*
* @author <a href="mailto:dkincade@pentaho.com">David M. Kincade</a>
* @since Pentaho 5.0
*/
public class RepositoryFilenameUtils {
private static final char SEPARATOR = RepositoryFile.SEPARATOR.charAt( 0 );
private static final Character DEFAULT_ESCAPE_CHAR = '%';
private static final String ESCAPE_CHAR_SYSTEM_PROPERTY = "pentaho.repository.client.escapeChar"; //$NON-NLS-1$
private static Character escapeChar;
static {
String escapeCharStr = System.getProperty( ESCAPE_CHAR_SYSTEM_PROPERTY );
if ( escapeCharStr != null && escapeCharStr.trim().length() != 1 ) {
escapeChar = new Character( escapeCharStr.charAt( 0 ) );
} else {
escapeChar = DEFAULT_ESCAPE_CHAR;
}
}
/**
* Instances should NOT be constructed in standard programming.
*/
private RepositoryFilenameUtils() {
}
// -----------------------------------------------------------------------
/**
* Normalizes a path, removing double and single dot path steps.
* <p/>
* This method normalizes a path to a standard format.
* <p/>
* A trailing slash will be retained. A double slash will be merged to a single slash (but UNC names are handled). A
* single dot path segment will be removed. A double dot will cause that path segment and the one before to be
* removed. If the double dot has no parent path segment to work with, <code>null</code> is returned.
* <p/>
* The output will be the same on both Unix and Windows except for the separator character.
*
* <pre>
* /foo// --> /foo/
* /foo/./ --> /foo/
* /foo/../bar --> /bar
* /foo/../bar/ --> /bar/
* /foo/../bar/../baz --> /baz
* //foo//./bar --> /foo/bar
* /../ --> null
* ../foo --> null
* foo/bar/.. --> foo/
* foo/../../bar --> null
* foo/../bar --> bar
* </pre>
*
* @param filename
* the filename to normalize, null returns null
* @return the normalized filename, or null if invalid
*/
public static String normalize( final String filename ) {
return FilenameUtils.normalize( filename, true );
}
/**
* Normalizes a path, removing double and single dot path steps.
* <p/>
* This method normalizes a path to a standard format.
* <p/>
* A trailing slash will be retained. A double slash will be merged to a single slash (but UNC names are handled). A
* single dot path segment will be removed. A double dot will cause that path segment and the one before to be
* removed. If the double dot has no parent path segment to work with, <code>null</code> is returned.
* <p/>
* The output will be the same on both Unix and Windows except for the separator character.
*
* <pre>
* /foo// --> /foo/
* /foo/./ --> /foo/
* /foo/../bar --> /bar
* /foo/../bar/ --> /bar/
* /foo/../bar/../baz --> /baz
* //foo//./bar --> /foo/bar
* /../ --> null
* ../foo --> null
* foo/bar/.. --> foo/
* foo/../../bar --> null
* foo/../bar --> bar
* </pre>
*
* @param filename
* the filename to normalize, null returns null
* @param leadingSlash
* will ensue there is a leading slash on the result if {@code true}
* @return the normalized filename, or null if invalid
*/
public static String normalize( final String filename, final boolean leadingSlash ) {
String normalizedFilename = null;
if ( filename != null ) {
normalizedFilename = normalize( filename.trim() );
if ( leadingSlash && normalizedFilename != null && normalizedFilename.indexOf( RepositoryFile.SEPARATOR ) != 0 ) {
normalizedFilename = SEPARATOR + normalizedFilename;
}
}
return normalizedFilename;
}
// -----------------------------------------------------------------------
/**
* Normalizes a path, removing double and single dot path steps, and removing any final directory separator.
* <p/>
* This method normalizes a path to a standard format.
* <p/>
* A trailing slash will be removed. A double slash will be merged to a single slash (but UNC names are handled). A
* single dot path segment will be removed. A double dot will cause that path segment and the one before to be
* removed. If the double dot has no parent path segment to work with, <code>null</code> is returned.
* <p/>
* The output will be the same on both Unix and Windows except for the separator character.
*
* <pre>
* /foo// --> /foo
* /foo/./ --> /foo
* /foo/../bar --> /bar
* /foo/../bar/ --> /bar
* /foo/../bar/../baz --> /baz
* //foo//./bar --> /foo/bar
* /../ --> null
* ../foo --> null
* foo/bar/.. --> foo
* foo/../../bar --> null
* foo/../bar --> bar
* </pre>
*
* @param filename
* the filename to normalize, null returns null
* @return the normalized filename, or null if invalid
*/
public static String normalizeNoEndSeparator( final String filename ) {
if ( filename == null ) {
return null;
}
return FilenameUtils.normalizeNoEndSeparator( filename, true );
}
// -----------------------------------------------------------------------
/**
* Concatenates a filename to a base path using normal command line style rules.
* <p/>
* The effect is equivalent to resultant directory after changing directory to the first argument, followed by
* changing directory to the second argument.
* <p/>
* The first argument is the base path, the second is the path to concatenate. The returned path is always normalized
* via {@link #normalize(String)}, thus <code>..</code> is handled.
* <p/>
* If <code>pathToAdd</code> is absolute (has an absolute prefix), then it will be normalized and returned. Otherwise,
* the paths will be joined, normalized and returned.
* <p/>
*
* <pre>
* /foo/ + bar --> /foo/bar
* /foo + bar --> /foo/bar
* /foo + /bar --> /bar
* /foo/a/ + ../bar --> foo/bar
* /foo/ + ../../bar --> null
* /foo/ + /bar --> /bar
* /foo/.. + /bar --> /bar
* /foo + bar/c.txt --> /foo/bar/c.txt
* /foo/c.txt + bar --> /foo/c.txt/bar (!)
* </pre>
*
* (!) Note that the first parameter must be a path. If it ends with a name, then the name will be built into the
* concatenated path. If this might be a problem, use {@link #getFullPath(String)} on the base path argument.
*
* @param basePath
* the base path to attach to, always treated as a path
* @param fullFilenameToAdd
* the filename (or path) to attach to the base
* @return the concatenated path, or null if invalid
*/
public static String concat( final String basePath, final String fullFilenameToAdd ) {
if ( fullFilenameToAdd == null ) {
return null;
}
if ( org.apache.commons.lang.StringUtils.isBlank( fullFilenameToAdd ) ) {
return normalizeNoEndSeparator( basePath );
}
int prefix = 0;
if ( StringUtils.hasLength( fullFilenameToAdd ) ) {
prefix = getPrefixLength( fullFilenameToAdd.replace( ":", "_" ) );
}
if ( prefix < 0 ) {
return null;
}
if ( prefix > 0 ) {
return RepositoryFilenameUtils.normalize( fullFilenameToAdd );
}
if ( basePath == null ) {
return null;
}
int len = basePath.length();
if ( len == 0 ) {
return RepositoryFilenameUtils.normalize( fullFilenameToAdd );
}
String baseP = normalizeNoEndSeparator( basePath );
if ( String.valueOf( SEPARATOR ).equals( baseP ) ) {
baseP = "";
}
return RepositoryFilenameUtils.normalize( baseP + SEPARATOR + fullFilenameToAdd );
}
// -----------------------------------------------------------------------
/**
* Converts all separators to the Repository (Unix) separator of forward slash.
*
* @param path
* the path to be changed, null ignored
* @return the updated path
*/
public static String separatorsToRepository( final String path ) {
return FilenameUtils.separatorsToUnix( path );
}
// -----------------------------------------------------------------------
/**
* Returns the length of the filename prefix,
* <p/>
* The prefix length includes the first slash in the full filename if applicable. Thus, it is possible that the length
* returned is greater than the length of the input string.
*
* <pre>
* a/b/c.txt --> "" --> relative
* /a/b/c.txt --> "/" --> absolute
* </pre>
* <p/>
*
* @param filename
* the filename to find the prefix in, null returns -1
* @return the length of the prefix, -1 if invalid or null
*/
public static int getPrefixLength( final String filename ) {
return FilenameUtils.getPrefixLength( filename );
}
/**
* Returns the index of the last directory separator character.
* <p/>
* The position of the last forward or backslash is returned.
* <p/>
*
* @param filename
* the filename to find the last path separator in, null returns -1
* @return the index of the last separator character, or -1 if there is no such character
*/
public static int indexOfLastSeparator( final String filename ) {
return FilenameUtils.indexOfLastSeparator( filename );
}
/**
* Returns the index of the last extension separator character, which is a dot.
* <p/>
* This method also checks that there is no directory separator after the last dot. To do this it uses
* {@link #indexOfLastSeparator(String)}
* <p/>
*
* @param filename
* the filename to find the last path separator in, null returns -1
* @return the index of the last separator character, or -1 if there is no such character
*/
public static int indexOfExtension( final String filename ) {
return FilenameUtils.indexOfExtension( filename );
}
// -----------------------------------------------------------------------
/**
* Gets the prefix from a full filename.
* <p/>
* The prefix includes the first slash in the full filename where applicable.
*
* <pre>
* a/b/c.txt --> "" --> relative
* /a/b/c.txt --> "/" --> absolute
* </pre>
* <p/>
*
* @param filename
* the filename to query, null returns null
* @return the prefix of the file, null if invalid
*/
public static String getPrefix( final String filename ) {
return FilenameUtils.getPrefix( filename );
}
/**
* Gets the path from a full filename, which excludes the prefix.
* <p/>
* The method is entirely text based, and returns the text before and including the last forward or backslash.
*
* <pre>
* a.txt --> ""
* a/b/c --> a/b/
* a/b/c/ --> a/b/c/
* /a.txt --> ""
* /a/b/c --> a/b/
* /a/b/c/ --> a/b/c/
* </pre>
* <p/>
* This method drops the prefix from the result. See {@link #getFullPath(String)} for the method that retains the
* prefix.
*
* @param filename
* the filename to query, null returns null
* @return the path of the file, an empty string if none exists, null if invalid
*/
public static String getPath( final String filename ) {
return FilenameUtils.getPath( filename );
}
/**
* Gets the path from a full filename, which excludes the prefix, and also excluding the final directory separator.
* <p/>
* The method is entirely text based, and returns the text before the last forward or backslash.
*
* <pre>
* a.txt --> ""
* a/b/c --> a/b
* a/b/c/ --> a/b/c
* /a.txt --> ""
* /a/b/c --> a/b
* /a/b/c/ --> a/b/c
* </pre>
* <p/>
* This method drops the prefix from the result. See {@link #getFullPathNoEndSeparator(String)} for the method that
* retains the prefix.
*
* @param filename
* the filename to query, null returns null
* @return the path of the file, an empty string if none exists, null if invalid
*/
public static String getPathNoEndSeparator( final String filename ) {
return FilenameUtils.getPathNoEndSeparator( filename );
}
/**
* Gets the full path from a full filename, which is the prefix + path.
* <p/>
* The method is entirely text based, and returns the text before and including the last forward or backslash.
*
* <pre>
* a.txt --> ""
* a/b/c --> a/b/
* a/b/c/ --> a/b/c/
* /a.txt --> /
* /a/b/c --> /a/b/
* /a/b/c/ --> /a/b/c/
* </pre>
* <p/>
* The output will be the same irrespective of the machine that the code is running on.
*
* @param filename
* the filename to query, null returns null
* @return the path of the file, an empty string if none exists, null if invalid
*/
public static String getFullPath( final String filename ) {
return FilenameUtils.getFullPath( filename );
}
/**
* Gets the full path from a full filename, which is the prefix + path, and also excluding the final directory
* separator.
* <p/>
* This method will handle a file in either Unix or Windows format. The method is entirely text based, and returns the
* text before the last forward or backslash.
*
* <pre>
* a.txt --> ""
* a/b/c --> a/b
* a/b/c/ --> a/b/c
* /a.txt --> /
* /a/b/c --> /a/b
* /a/b/c/ --> /a/b/c
* </pre>
* <p/>
* The output will be the same irrespective of the machine that the code is running on.
*
* @param filename
* the filename to query, null returns null
* @return the path of the file, an empty string if none exists, null if invalid
*/
public static String getFullPathNoEndSeparator( final String filename ) {
return FilenameUtils.getFullPathNoEndSeparator( filename );
}
/**
* Gets the name minus the path from a full filename.
* <p/>
* The text after the last forward or backslash is returned.
*
* <pre>
* a/b/c.txt --> c.txt
* a.txt --> a.txt
* a/b/c --> c
* a/b/c/ --> ""
* </pre>
* <p/>
*
* @param filename
* the filename to query, null returns null
* @return the name of the file without the path, or an empty string if none exists
*/
public static String getName( final String filename ) {
return FilenameUtils.getName( filename );
}
/**
* Gets the base name, minus the full path and extension, from a full filename.
* <p/>
* The text after the last forward or backslash and before the last dot is returned.
*
* <pre>
* a/b/c.txt --> c
* a.txt --> a
* a/b/c --> c
* a/b/c/ --> ""
* </pre>
* <p/>
*
* @param filename
* the filename to query, null returns null
* @return the name of the file without the path, or an empty string if none exists
*/
public static String getBaseName( final String filename ) {
return FilenameUtils.getBaseName( filename );
}
/**
* Gets the extension of a filename.
* <p/>
* This method returns the textual part of the filename after the last dot. There must be no directory separator after
* the dot.
*
* <pre>
* foo.txt --> "txt"
* a/b/c.jpg --> "jpg"
* a/b.txt/c --> ""
* a/b/c --> ""
* </pre>
* <p/>
*
* @param filename
* the filename to retrieve the extension of.
* @return the extension of the file or an empty string if none exists.
*/
public static String getExtension( final String filename ) {
return FilenameUtils.getExtension( filename );
}
// -----------------------------------------------------------------------
/**
* Removes the extension from a filename.
* <p/>
* This method returns the textual part of the filename before the last dot. There must be no directory separator
* after the dot.
*
* <pre>
* foo.txt --> foo
* a/b/c.jpg --> a/b/c
* a/b/c --> a/b/c
* a.b/c --> a.b/c
* </pre>
* <p/>
*
* @param filename
* the filename to query, null returns null
* @return the filename minus the extension
*/
public static String removeExtension( final String filename ) {
return FilenameUtils.removeExtension( filename );
}
// -----------------------------------------------------------------------
/**
* Checks whether two filenames are equal exactly.
* <p/>
* No processing is performed on the filenames other than comparison, thus this is merely a null-safe case-sensitive
* equals.
*
* @param filename1
* the first filename to query, may be null
* @param filename2
* the second filename to query, may be null
* @return true if the filenames are equal, null equals null
* @see org.apache.commons.io.IOCase#SENSITIVE
*/
public static boolean equals( final String filename1, final String filename2 ) {
return FilenameUtils.equals( filename1, filename2, false, IOCase.SENSITIVE );
}
// -----------------------------------------------------------------------
/**
* Checks whether two filenames are equal after both have been normalized.
* <p/>
* Both filenames are first passed to {@link #normalize(String)}. The check is then performed in a case-sensitive
* manner.
*
* @param filename1
* the first filename to query, may be null
* @param filename2
* the second filename to query, may be null
* @return true if the filenames are equal, null equals null
* @see IOCase#SENSITIVE
*/
public static boolean equalsNormalized( String filename1, String filename2 ) {
return FilenameUtils.equals( filename1, filename2, true, IOCase.SENSITIVE );
}
// -----------------------------------------------------------------------
/**
* Checks whether the extension of the filename is that specified.
* <p/>
* This method obtains the extension as the textual part of the filename after the last dot. There must be no
* directory separator after the dot. The extension check is case-sensitive on all platforms.
*
* @param filename
* the filename to query, null returns false
* @param extension
* the extension to check for, null or empty checks for no extension
* @return true if the filename has the specified extension
*/
public static boolean isExtension( final String filename, final String extension ) {
return FilenameUtils.isExtension( filename, extension );
}
/**
* Checks whether the extension of the filename is one of those specified.
* <p/>
* This method obtains the extension as the textual part of the filename after the last dot. There must be no
* directory separator after the dot. The extension check is case-sensitive on all platforms.
*
* @param filename
* the filename to query, null returns false
* @param extensions
* the extensions to check for, null checks for no extension
* @return true if the filename is one of the extensions
*/
public static boolean isExtension( final String filename, final String[] extensions ) {
return FilenameUtils.isExtension( filename, extensions );
}
/**
* Checks whether the extension of the filename is one of those specified.
* <p/>
* This method obtains the extension as the textual part of the filename after the last dot. There must be no
* directory separator after the dot. The extension check is case-sensitive on all platforms.
*
* @param filename
* the filename to query, null returns false
* @param extensions
* the extensions to check for, null checks for no extension
* @return true if the filename is one of the extensions
*/
public static boolean isExtension( final String filename, final Collection extensions ) {
return FilenameUtils.isExtension( filename, extensions );
}
// -----------------------------------------------------------------------
/**
* Checks a filename to see if it matches the specified wildcard matcher, always testing case-sensitive.
* <p/>
* The wildcard matcher uses the characters '?' and '*' to represent a single or multiple wildcard characters. This is
* the same as often found on Dos/Unix command lines. The check is case-sensitive always.
*
* <pre>
* wildcardMatch("c.txt", "*.txt") --> true
* wildcardMatch("c.txt", "*.jpg") --> false
* wildcardMatch("a/b/c.txt", "a/b/*") --> true
* wildcardMatch("c.txt", "*.???") --> true
* wildcardMatch("c.txt", "*.????") --> false
* </pre>
*
* @param filename
* the filename to match on
* @param wildcardMatcher
* the wildcard string to match against
* @return true if the filename matches the wildcard string
* @see IOCase#SENSITIVE
*/
public static boolean wildcardMatch( final String filename, final String wildcardMatcher ) {
return FilenameUtils.wildcardMatch( filename, wildcardMatcher, IOCase.SENSITIVE );
}
/**
* Performs percent-encoding (as specified in {@code IUnifiedRepository}) on given {@code name}, only encoding the
* characters given in {@code reservedChars}. Assumes only ASCII characters in reservedChars.
*
* @param name
* name to escape
* @param reservedChars
* chars within name to escape
* @return escaped name
*/
public static String escape( final String name, final List<Character> reservedChars ) {
if ( name == null || reservedChars == null ) {
throw new IllegalArgumentException();
}
if ( reservedChars.contains( escapeChar ) ) { // we can't use % as escape char if it is illegal
throw new IllegalArgumentException();
}
List<Character> mergedReservedChars = new ArrayList<Character>( reservedChars );
mergedReservedChars.add( escapeChar ); // have to have this one
StringBuilder buffer = new StringBuilder( name.length() * 2 );
for ( int i = 0; i < name.length(); i++ ) {
char ch = name.charAt( i );
if ( mergedReservedChars.contains( ch ) ) {
buffer.append( escapeChar );
buffer.append( Character.toUpperCase( Character.forDigit( ch / 16, 16 ) ) );
buffer.append( Character.toUpperCase( Character.forDigit( ch % 16, 16 ) ) );
} else {
buffer.append( ch );
}
}
return buffer.toString();
}
/**
* Reverts modifications of {@link #escape(String)} such that for all {@code String}s {@code t},
* {@code t.equals(unescape(escape(t)))}. Assumes only ASCII characters have been escaped.
*
* @param name
* name to unescape
* @return unescaped name
*/
public static String unescape( final String name ) {
if ( name == null ) {
throw new IllegalArgumentException();
}
StringBuilder buffer = new StringBuilder( name.length() );
String str = name;
int i = str.indexOf( escapeChar );
while ( i > -1 && i + 2 < str.length() ) {
buffer.append( str.toCharArray(), 0, i );
int a = Character.digit( str.charAt( i + 1 ), 16 );
int b = Character.digit( str.charAt( i + 2 ), 16 );
if ( a > -1 && b > -1 ) {
buffer.append( (char) ( a * 16 + b ) );
str = str.substring( i + 3 );
} else {
buffer.append( escapeChar );
str = str.substring( i + 1 );
}
i = str.indexOf( escapeChar );
}
buffer.append( str );
return buffer.toString();
}
}