/* * This program is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, version 2 as published by the Free Software * Foundation. * * You should have received a copy of the GNU General Public License along with this * program; if not, you can obtain a copy at http://www.gnu.org/licenses/gpl-2.0.html * or from the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * * Copyright 2006 - 2016 Pentaho Corporation. All rights reserved. */ package org.pentaho.platform.repository; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOCase; import org.pentaho.platform.api.repository2.unified.RepositoryFile; import org.springframework.util.StringUtils; /** * General filename and filepath manipulation utilities for the Pentaho Repository. NOTE: these methods will work * independently of the underlying operating system. Most methods will translate a backslash (\) to a forward slash (/) * but should be be depended upon to make that translation. * <p/> * This class defines six components within a filename (example /dev/project/file.txt): * <ul> * <li>the prefix - /</li> * <li>the path - dev/project/</li> * <li>the full path - /dev/project/</li> * <li>the name - file.txt</li> * <li>the base name - file</li> * <li>the extension - txt</li> * </ul> * Note that this class works best if directory filenames end with a separator. If you omit the last separator, it is * impossible to determine if the filename corresponds to a file or a directory. As a result, we have chosen to say it * corresponds to a file. * <p/> * This class only supports Pentaho Repository (Unix) style names. Prefixes are matched as follows: * * <pre> * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * </pre> * <p/> * Origin of code: Apache Commons IO 2.1 * * @author <a href="mailto:dkincade@pentaho.com">David M. Kincade</a> * @since Pentaho 5.0 */ public class RepositoryFilenameUtils { private static final char SEPARATOR = RepositoryFile.SEPARATOR.charAt( 0 ); private static final Character DEFAULT_ESCAPE_CHAR = '%'; private static final String ESCAPE_CHAR_SYSTEM_PROPERTY = "pentaho.repository.client.escapeChar"; //$NON-NLS-1$ private static Character escapeChar; static { String escapeCharStr = System.getProperty( ESCAPE_CHAR_SYSTEM_PROPERTY ); if ( escapeCharStr != null && escapeCharStr.trim().length() != 1 ) { escapeChar = new Character( escapeCharStr.charAt( 0 ) ); } else { escapeChar = DEFAULT_ESCAPE_CHAR; } } /** * Instances should NOT be constructed in standard programming. */ private RepositoryFilenameUtils() { } // ----------------------------------------------------------------------- /** * Normalizes a path, removing double and single dot path steps. * <p/> * This method normalizes a path to a standard format. * <p/> * A trailing slash will be retained. A double slash will be merged to a single slash (but UNC names are handled). A * single dot path segment will be removed. A double dot will cause that path segment and the one before to be * removed. If the double dot has no parent path segment to work with, <code>null</code> is returned. * <p/> * The output will be the same on both Unix and Windows except for the separator character. * * <pre> * /foo// --> /foo/ * /foo/./ --> /foo/ * /foo/../bar --> /bar * /foo/../bar/ --> /bar/ * /foo/../bar/../baz --> /baz * //foo//./bar --> /foo/bar * /../ --> null * ../foo --> null * foo/bar/.. --> foo/ * foo/../../bar --> null * foo/../bar --> bar * </pre> * * @param filename * the filename to normalize, null returns null * @return the normalized filename, or null if invalid */ public static String normalize( final String filename ) { return FilenameUtils.normalize( filename, true ); } /** * Normalizes a path, removing double and single dot path steps. * <p/> * This method normalizes a path to a standard format. * <p/> * A trailing slash will be retained. A double slash will be merged to a single slash (but UNC names are handled). A * single dot path segment will be removed. A double dot will cause that path segment and the one before to be * removed. If the double dot has no parent path segment to work with, <code>null</code> is returned. * <p/> * The output will be the same on both Unix and Windows except for the separator character. * * <pre> * /foo// --> /foo/ * /foo/./ --> /foo/ * /foo/../bar --> /bar * /foo/../bar/ --> /bar/ * /foo/../bar/../baz --> /baz * //foo//./bar --> /foo/bar * /../ --> null * ../foo --> null * foo/bar/.. --> foo/ * foo/../../bar --> null * foo/../bar --> bar * </pre> * * @param filename * the filename to normalize, null returns null * @param leadingSlash * will ensue there is a leading slash on the result if {@code true} * @return the normalized filename, or null if invalid */ public static String normalize( final String filename, final boolean leadingSlash ) { String normalizedFilename = null; if ( filename != null ) { normalizedFilename = normalize( filename.trim() ); if ( leadingSlash && normalizedFilename != null && normalizedFilename.indexOf( RepositoryFile.SEPARATOR ) != 0 ) { normalizedFilename = SEPARATOR + normalizedFilename; } } return normalizedFilename; } // ----------------------------------------------------------------------- /** * Normalizes a path, removing double and single dot path steps, and removing any final directory separator. * <p/> * This method normalizes a path to a standard format. * <p/> * A trailing slash will be removed. A double slash will be merged to a single slash (but UNC names are handled). A * single dot path segment will be removed. A double dot will cause that path segment and the one before to be * removed. If the double dot has no parent path segment to work with, <code>null</code> is returned. * <p/> * The output will be the same on both Unix and Windows except for the separator character. * * <pre> * /foo// --> /foo * /foo/./ --> /foo * /foo/../bar --> /bar * /foo/../bar/ --> /bar * /foo/../bar/../baz --> /baz * //foo//./bar --> /foo/bar * /../ --> null * ../foo --> null * foo/bar/.. --> foo * foo/../../bar --> null * foo/../bar --> bar * </pre> * * @param filename * the filename to normalize, null returns null * @return the normalized filename, or null if invalid */ public static String normalizeNoEndSeparator( final String filename ) { if ( filename == null ) { return null; } return FilenameUtils.normalizeNoEndSeparator( filename, true ); } // ----------------------------------------------------------------------- /** * Concatenates a filename to a base path using normal command line style rules. * <p/> * The effect is equivalent to resultant directory after changing directory to the first argument, followed by * changing directory to the second argument. * <p/> * The first argument is the base path, the second is the path to concatenate. The returned path is always normalized * via {@link #normalize(String)}, thus <code>..</code> is handled. * <p/> * If <code>pathToAdd</code> is absolute (has an absolute prefix), then it will be normalized and returned. Otherwise, * the paths will be joined, normalized and returned. * <p/> * * <pre> * /foo/ + bar --> /foo/bar * /foo + bar --> /foo/bar * /foo + /bar --> /bar * /foo/a/ + ../bar --> foo/bar * /foo/ + ../../bar --> null * /foo/ + /bar --> /bar * /foo/.. + /bar --> /bar * /foo + bar/c.txt --> /foo/bar/c.txt * /foo/c.txt + bar --> /foo/c.txt/bar (!) * </pre> * * (!) Note that the first parameter must be a path. If it ends with a name, then the name will be built into the * concatenated path. If this might be a problem, use {@link #getFullPath(String)} on the base path argument. * * @param basePath * the base path to attach to, always treated as a path * @param fullFilenameToAdd * the filename (or path) to attach to the base * @return the concatenated path, or null if invalid */ public static String concat( final String basePath, final String fullFilenameToAdd ) { if ( fullFilenameToAdd == null ) { return null; } if ( org.apache.commons.lang.StringUtils.isBlank( fullFilenameToAdd ) ) { return normalizeNoEndSeparator( basePath ); } int prefix = 0; if ( StringUtils.hasLength( fullFilenameToAdd ) ) { prefix = getPrefixLength( fullFilenameToAdd.replace( ":", "_" ) ); } if ( prefix < 0 ) { return null; } if ( prefix > 0 ) { return RepositoryFilenameUtils.normalize( fullFilenameToAdd ); } if ( basePath == null ) { return null; } int len = basePath.length(); if ( len == 0 ) { return RepositoryFilenameUtils.normalize( fullFilenameToAdd ); } String baseP = normalizeNoEndSeparator( basePath ); if ( String.valueOf( SEPARATOR ).equals( baseP ) ) { baseP = ""; } return RepositoryFilenameUtils.normalize( baseP + SEPARATOR + fullFilenameToAdd ); } // ----------------------------------------------------------------------- /** * Converts all separators to the Repository (Unix) separator of forward slash. * * @param path * the path to be changed, null ignored * @return the updated path */ public static String separatorsToRepository( final String path ) { return FilenameUtils.separatorsToUnix( path ); } // ----------------------------------------------------------------------- /** * Returns the length of the filename prefix, * <p/> * The prefix length includes the first slash in the full filename if applicable. Thus, it is possible that the length * returned is greater than the length of the input string. * * <pre> * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * </pre> * <p/> * * @param filename * the filename to find the prefix in, null returns -1 * @return the length of the prefix, -1 if invalid or null */ public static int getPrefixLength( final String filename ) { return FilenameUtils.getPrefixLength( filename ); } /** * Returns the index of the last directory separator character. * <p/> * The position of the last forward or backslash is returned. * <p/> * * @param filename * the filename to find the last path separator in, null returns -1 * @return the index of the last separator character, or -1 if there is no such character */ public static int indexOfLastSeparator( final String filename ) { return FilenameUtils.indexOfLastSeparator( filename ); } /** * Returns the index of the last extension separator character, which is a dot. * <p/> * This method also checks that there is no directory separator after the last dot. To do this it uses * {@link #indexOfLastSeparator(String)} * <p/> * * @param filename * the filename to find the last path separator in, null returns -1 * @return the index of the last separator character, or -1 if there is no such character */ public static int indexOfExtension( final String filename ) { return FilenameUtils.indexOfExtension( filename ); } // ----------------------------------------------------------------------- /** * Gets the prefix from a full filename. * <p/> * The prefix includes the first slash in the full filename where applicable. * * <pre> * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * </pre> * <p/> * * @param filename * the filename to query, null returns null * @return the prefix of the file, null if invalid */ public static String getPrefix( final String filename ) { return FilenameUtils.getPrefix( filename ); } /** * Gets the path from a full filename, which excludes the prefix. * <p/> * The method is entirely text based, and returns the text before and including the last forward or backslash. * * <pre> * a.txt --> "" * a/b/c --> a/b/ * a/b/c/ --> a/b/c/ * /a.txt --> "" * /a/b/c --> a/b/ * /a/b/c/ --> a/b/c/ * </pre> * <p/> * This method drops the prefix from the result. See {@link #getFullPath(String)} for the method that retains the * prefix. * * @param filename * the filename to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid */ public static String getPath( final String filename ) { return FilenameUtils.getPath( filename ); } /** * Gets the path from a full filename, which excludes the prefix, and also excluding the final directory separator. * <p/> * The method is entirely text based, and returns the text before the last forward or backslash. * * <pre> * a.txt --> "" * a/b/c --> a/b * a/b/c/ --> a/b/c * /a.txt --> "" * /a/b/c --> a/b * /a/b/c/ --> a/b/c * </pre> * <p/> * This method drops the prefix from the result. See {@link #getFullPathNoEndSeparator(String)} for the method that * retains the prefix. * * @param filename * the filename to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid */ public static String getPathNoEndSeparator( final String filename ) { return FilenameUtils.getPathNoEndSeparator( filename ); } /** * Gets the full path from a full filename, which is the prefix + path. * <p/> * The method is entirely text based, and returns the text before and including the last forward or backslash. * * <pre> * a.txt --> "" * a/b/c --> a/b/ * a/b/c/ --> a/b/c/ * /a.txt --> / * /a/b/c --> /a/b/ * /a/b/c/ --> /a/b/c/ * </pre> * <p/> * The output will be the same irrespective of the machine that the code is running on. * * @param filename * the filename to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid */ public static String getFullPath( final String filename ) { return FilenameUtils.getFullPath( filename ); } /** * Gets the full path from a full filename, which is the prefix + path, and also excluding the final directory * separator. * <p/> * This method will handle a file in either Unix or Windows format. The method is entirely text based, and returns the * text before the last forward or backslash. * * <pre> * a.txt --> "" * a/b/c --> a/b * a/b/c/ --> a/b/c * /a.txt --> / * /a/b/c --> /a/b * /a/b/c/ --> /a/b/c * </pre> * <p/> * The output will be the same irrespective of the machine that the code is running on. * * @param filename * the filename to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid */ public static String getFullPathNoEndSeparator( final String filename ) { return FilenameUtils.getFullPathNoEndSeparator( filename ); } /** * Gets the name minus the path from a full filename. * <p/> * The text after the last forward or backslash is returned. * * <pre> * a/b/c.txt --> c.txt * a.txt --> a.txt * a/b/c --> c * a/b/c/ --> "" * </pre> * <p/> * * @param filename * the filename to query, null returns null * @return the name of the file without the path, or an empty string if none exists */ public static String getName( final String filename ) { return FilenameUtils.getName( filename ); } /** * Gets the base name, minus the full path and extension, from a full filename. * <p/> * The text after the last forward or backslash and before the last dot is returned. * * <pre> * a/b/c.txt --> c * a.txt --> a * a/b/c --> c * a/b/c/ --> "" * </pre> * <p/> * * @param filename * the filename to query, null returns null * @return the name of the file without the path, or an empty string if none exists */ public static String getBaseName( final String filename ) { return FilenameUtils.getBaseName( filename ); } /** * Gets the extension of a filename. * <p/> * This method returns the textual part of the filename after the last dot. There must be no directory separator after * the dot. * * <pre> * foo.txt --> "txt" * a/b/c.jpg --> "jpg" * a/b.txt/c --> "" * a/b/c --> "" * </pre> * <p/> * * @param filename * the filename to retrieve the extension of. * @return the extension of the file or an empty string if none exists. */ public static String getExtension( final String filename ) { return FilenameUtils.getExtension( filename ); } // ----------------------------------------------------------------------- /** * Removes the extension from a filename. * <p/> * This method returns the textual part of the filename before the last dot. There must be no directory separator * after the dot. * * <pre> * foo.txt --> foo * a/b/c.jpg --> a/b/c * a/b/c --> a/b/c * a.b/c --> a.b/c * </pre> * <p/> * * @param filename * the filename to query, null returns null * @return the filename minus the extension */ public static String removeExtension( final String filename ) { return FilenameUtils.removeExtension( filename ); } // ----------------------------------------------------------------------- /** * Checks whether two filenames are equal exactly. * <p/> * No processing is performed on the filenames other than comparison, thus this is merely a null-safe case-sensitive * equals. * * @param filename1 * the first filename to query, may be null * @param filename2 * the second filename to query, may be null * @return true if the filenames are equal, null equals null * @see org.apache.commons.io.IOCase#SENSITIVE */ public static boolean equals( final String filename1, final String filename2 ) { return FilenameUtils.equals( filename1, filename2, false, IOCase.SENSITIVE ); } // ----------------------------------------------------------------------- /** * Checks whether two filenames are equal after both have been normalized. * <p/> * Both filenames are first passed to {@link #normalize(String)}. The check is then performed in a case-sensitive * manner. * * @param filename1 * the first filename to query, may be null * @param filename2 * the second filename to query, may be null * @return true if the filenames are equal, null equals null * @see IOCase#SENSITIVE */ public static boolean equalsNormalized( String filename1, String filename2 ) { return FilenameUtils.equals( filename1, filename2, true, IOCase.SENSITIVE ); } // ----------------------------------------------------------------------- /** * Checks whether the extension of the filename is that specified. * <p/> * This method obtains the extension as the textual part of the filename after the last dot. There must be no * directory separator after the dot. The extension check is case-sensitive on all platforms. * * @param filename * the filename to query, null returns false * @param extension * the extension to check for, null or empty checks for no extension * @return true if the filename has the specified extension */ public static boolean isExtension( final String filename, final String extension ) { return FilenameUtils.isExtension( filename, extension ); } /** * Checks whether the extension of the filename is one of those specified. * <p/> * This method obtains the extension as the textual part of the filename after the last dot. There must be no * directory separator after the dot. The extension check is case-sensitive on all platforms. * * @param filename * the filename to query, null returns false * @param extensions * the extensions to check for, null checks for no extension * @return true if the filename is one of the extensions */ public static boolean isExtension( final String filename, final String[] extensions ) { return FilenameUtils.isExtension( filename, extensions ); } /** * Checks whether the extension of the filename is one of those specified. * <p/> * This method obtains the extension as the textual part of the filename after the last dot. There must be no * directory separator after the dot. The extension check is case-sensitive on all platforms. * * @param filename * the filename to query, null returns false * @param extensions * the extensions to check for, null checks for no extension * @return true if the filename is one of the extensions */ public static boolean isExtension( final String filename, final Collection extensions ) { return FilenameUtils.isExtension( filename, extensions ); } // ----------------------------------------------------------------------- /** * Checks a filename to see if it matches the specified wildcard matcher, always testing case-sensitive. * <p/> * The wildcard matcher uses the characters '?' and '*' to represent a single or multiple wildcard characters. This is * the same as often found on Dos/Unix command lines. The check is case-sensitive always. * * <pre> * wildcardMatch("c.txt", "*.txt") --> true * wildcardMatch("c.txt", "*.jpg") --> false * wildcardMatch("a/b/c.txt", "a/b/*") --> true * wildcardMatch("c.txt", "*.???") --> true * wildcardMatch("c.txt", "*.????") --> false * </pre> * * @param filename * the filename to match on * @param wildcardMatcher * the wildcard string to match against * @return true if the filename matches the wildcard string * @see IOCase#SENSITIVE */ public static boolean wildcardMatch( final String filename, final String wildcardMatcher ) { return FilenameUtils.wildcardMatch( filename, wildcardMatcher, IOCase.SENSITIVE ); } /** * Performs percent-encoding (as specified in {@code IUnifiedRepository}) on given {@code name}, only encoding the * characters given in {@code reservedChars}. Assumes only ASCII characters in reservedChars. * * @param name * name to escape * @param reservedChars * chars within name to escape * @return escaped name */ public static String escape( final String name, final List<Character> reservedChars ) { if ( name == null || reservedChars == null ) { throw new IllegalArgumentException(); } if ( reservedChars.contains( escapeChar ) ) { // we can't use % as escape char if it is illegal throw new IllegalArgumentException(); } List<Character> mergedReservedChars = new ArrayList<Character>( reservedChars ); mergedReservedChars.add( escapeChar ); // have to have this one StringBuilder buffer = new StringBuilder( name.length() * 2 ); for ( int i = 0; i < name.length(); i++ ) { char ch = name.charAt( i ); if ( mergedReservedChars.contains( ch ) ) { buffer.append( escapeChar ); buffer.append( Character.toUpperCase( Character.forDigit( ch / 16, 16 ) ) ); buffer.append( Character.toUpperCase( Character.forDigit( ch % 16, 16 ) ) ); } else { buffer.append( ch ); } } return buffer.toString(); } /** * Reverts modifications of {@link #escape(String)} such that for all {@code String}s {@code t}, * {@code t.equals(unescape(escape(t)))}. Assumes only ASCII characters have been escaped. * * @param name * name to unescape * @return unescaped name */ public static String unescape( final String name ) { if ( name == null ) { throw new IllegalArgumentException(); } StringBuilder buffer = new StringBuilder( name.length() ); String str = name; int i = str.indexOf( escapeChar ); while ( i > -1 && i + 2 < str.length() ) { buffer.append( str.toCharArray(), 0, i ); int a = Character.digit( str.charAt( i + 1 ), 16 ); int b = Character.digit( str.charAt( i + 2 ), 16 ); if ( a > -1 && b > -1 ) { buffer.append( (char) ( a * 16 + b ) ); str = str.substring( i + 3 ); } else { buffer.append( escapeChar ); str = str.substring( i + 1 ); } i = str.indexOf( escapeChar ); } buffer.append( str ); return buffer.toString(); } }