/* * Copyright 2014 wada811<at.wada811@gmail.com> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.wada811.utils; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; /** * General filename and filepath manipulation utilities. * <p> * When dealing with filenames you can hit problems when moving from a Windows based development * machine to a Unix based production machine. This class aims to help avoid those problems. * <p> * <b>NOTE</b>: You may be able to avoid using this class entirely simply by using JDK * {@link java.io.File File} objects and the two argument constructor * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. * <p> * Most methods on this class are designed to work the same on both Unix and Windows. Those that * don't include 'System', 'Unix' or 'Windows' in their name. * <p> * Most methods recognise both separators (forward and back), and both sets of prefixes. See the * javadoc of each method for details. * <p> * This class defines six components within a filename (example C:\dev\project\file.txt): * <ul> * <li>the prefix - C:\</li> * <li>the path - dev\project\</li> * <li>the full path - C:\dev\project\</li> * <li>the name - file.txt</li> * <li>the base name - file</li> * <li>the extension - txt</li> * </ul> * Note that this class works best if directory filenames end with a separator. If you omit the last * separator, it is impossible to determine if the filename corresponds to a file or a directory. As * a result, we have chosen to say it corresponds to a file. * <p> * This class only supports Unix and Windows style names. Prefixes are matched as follows: * * <pre> * Windows: * a\b\c.txt --> "" --> relative * \a\b\c.txt --> "\" --> current drive absolute * C:a\b\c.txt --> "C:" --> drive relative * C:\a\b\c.txt --> "C:\" --> absolute * \\server\a\b\c.txt --> "\\server\" --> UNC * * Unix: * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * ~/a/b/c.txt --> "~/" --> current user * ~ --> "~/" --> current user (slash added) * ~user/a/b/c.txt --> "~user/" --> named user * ~user --> "~user/" --> named user (slash added) * </pre> * * Both prefix styles are matched always, irrespective of the machine that you are currently running * on. * <p> * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. * * @version $Id: FilenameUtils.java 1307462 2012-03-30 15:13:11Z ggregory $ * @since 1.1 */ public class FileNameUtils { /** * The extension separator character. * * @since 1.4 */ public static final char EXTENSION_SEPARATOR = '.'; /** * The extension separator String. * * @since 1.4 */ public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); /** * The system separator character. */ private static final char SYSTEM_SEPARATOR = File.separatorChar; /** * Instances should NOT be constructed in standard programming. */ private FileNameUtils() { } //----------------------------------------------------------------------- /** * Checks if the character is a separator. * * @param ch the character to check * @return true if it is a separator character */ private static boolean isSeparator(char ch){ return ch == SYSTEM_SEPARATOR; } //----------------------------------------------------------------------- /** * Normalizes a path, removing double and single dot path steps. * <p> * This method normalizes a path to a standard format. The input may contain separators in * either Unix or Windows format. The output will contain separators in the format of the * system. * <p> * A trailing slash will be retained. A double slash will be merged to a single slash (but UNC * names are handled). A single dot path segment will be removed. A double dot will cause that * path segment and the one before to be removed. If the double dot has no parent path segment * to work with, {@code null} is returned. * <p> * The output will be the same on both Unix and Windows except for the separator character. * * <pre> * /foo// --> /foo/ * /foo/./ --> /foo/ * /foo/../bar --> /bar * /foo/../bar/ --> /bar/ * /foo/../bar/../baz --> /baz * //foo//./bar --> /foo/bar * /../ --> null * ../foo --> null * foo/bar/.. --> foo/ * foo/../../bar --> null * foo/../bar --> bar * //server/foo/../bar --> //server/bar * //server/../bar --> null * C:\foo\..\bar --> C:\bar * C:\..\bar --> null * ~/foo/../bar/ --> ~/bar/ * ~/../bar --> null * </pre> * * (Note the file separator returned will be correct for Windows/Unix) * * @param filename the filename to normalize, null returns null * @return the normalized filename, or null if invalid */ public static String normalize(String filename){ if(filename == null){ return null; } int size = filename.length(); if(size == 0){ return filename; } int prefix = getPrefixLength(filename); if(prefix < 0){ return null; } char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy filename.getChars(0, filename.length(), array, 0); // add extra separator on the end to simplify code below boolean lastIsDirectory = true; if(array[size - 1] != SYSTEM_SEPARATOR){ array[size++] = SYSTEM_SEPARATOR; lastIsDirectory = false; } // adjoining slashes for(int i = prefix + 1; i < size; i++){ if(array[i] == SYSTEM_SEPARATOR && array[i - 1] == SYSTEM_SEPARATOR){ System.arraycopy(array, i, array, i - 1, size - i); size--; i--; } } // dot slash for(int i = prefix + 1; i < size; i++){ if(array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)){ if(i == size - 1){ lastIsDirectory = true; } System.arraycopy(array, i + 1, array, i - 1, size - i); size -= 2; i--; } } // double dot slash outer: for(int i = prefix + 2; i < size; i++){ if(array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && array[i - 2] == '.' && (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)){ if(i == prefix + 2){ return null; } if(i == size - 1){ lastIsDirectory = true; } int j; for(j = i - 4; j >= prefix; j--){ if(array[j] == SYSTEM_SEPARATOR){ // remove b/../ from a/b/../c System.arraycopy(array, i + 1, array, j + 1, size - i); size -= i - j; i = j + 1; continue outer; } } // remove a/../ from a/../c System.arraycopy(array, i + 1, array, prefix, size - i); size -= i + 1 - prefix; i = prefix + 1; } } if(size <= 0){ // should never be less than 0 return ""; } if(size <= prefix){ // should never be less than prefix return new String(array, 0, size); } if(lastIsDirectory){ return new String(array, 0, size); // keep trailing separator } return new String(array, 0, size - 1); // lose trailing separator } //----------------------------------------------------------------------- /** * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>. * <p> * This method will handle a file in either Unix or Windows format. * <p> * The prefix length includes the first slash in the full filename if applicable. Thus, it is * possible that the length returned is greater than the length of the input string. * * <pre> * Windows: * a\b\c.txt --> "" --> relative * \a\b\c.txt --> "\" --> current drive absolute * C:a\b\c.txt --> "C:" --> drive relative * C:\a\b\c.txt --> "C:\" --> absolute * \\server\a\b\c.txt --> "\\server\" --> UNC * * Unix: * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * ~/a/b/c.txt --> "~/" --> current user * ~ --> "~/" --> current user (slash added) * ~user/a/b/c.txt --> "~user/" --> named user * ~user --> "~user/" --> named user (slash added) * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. ie. both * Unix and Windows prefixes are matched regardless. * * @param filename the filename to find the prefix in, null returns -1 * @return the length of the prefix, -1 if invalid or null */ private static int getPrefixLength(String filename){ if(filename == null){ return -1; } int len = filename.length(); if(len == 0){ return 0; } char ch0 = filename.charAt(0); if(ch0 == ':'){ return -1; } if(len == 1){ if(ch0 == '~'){ return 2; // return a length greater than the input } return isSeparator(ch0) ? 1 : 0; }else{ if(ch0 == '~'){ int posUnix = filename.indexOf(SYSTEM_SEPARATOR, 1); if(posUnix == -1){ return len + 1; // return a length greater than the input } return posUnix + 1; } char ch1 = filename.charAt(1); if(ch1 == ':'){ ch0 = Character.toUpperCase(ch0); if(ch0 >= 'A' && ch0 <= 'Z'){ if(len == 2 || isSeparator(filename.charAt(2)) == false){ return 2; } return 3; } return -1; }else if(isSeparator(ch0) && isSeparator(ch1)){ int posUnix = filename.indexOf(SYSTEM_SEPARATOR, 2); if(posUnix == -1 || posUnix == 2){ return -1; } return posUnix + 1; }else{ return isSeparator(ch0) ? 1 : 0; } } } /** * Returns the index of the last directory separator character. * <p> * This method will handle a file in either Unix or Windows format. The position of the last * forward or backslash is returned. * <p> * The output will be the same irrespective of the machine that the code is running on. * * @param filename the filename to find the last path separator in, null returns -1 * @return the index of the last separator character, or -1 if there * is no such character */ private static int indexOfLastSeparator(String filename){ if(filename == null){ return -1; } int lastUnixPos = filename.lastIndexOf(SYSTEM_SEPARATOR); return lastUnixPos; } /** * Returns the index of the last extension separator character, which is a dot. * <p> * This method also checks that there is no directory separator after the last dot. To do this * it uses {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or * Windows format. * <p> * The output will be the same irrespective of the machine that the code is running on. * * @param filename the filename to find the last path separator in, null returns -1 * @return the index of the last separator character, or -1 if there * is no such character */ private static int indexOfExtension(String filename){ if(filename == null){ return -1; } int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR); int lastSeparator = indexOfLastSeparator(filename); return lastSeparator > extensionPos ? -1 : extensionPos; } //----------------------------------------------------------------------- /** * Gets the prefix from a full filename, such as <code>C:/</code> or <code>~/</code>. * <p> * This method will handle a file in either Unix or Windows format. The prefix includes the * first slash in the full filename where applicable. * * <pre> * Windows: * a\b\c.txt --> "" --> relative * \a\b\c.txt --> "\" --> current drive absolute * C:a\b\c.txt --> "C:" --> drive relative * C:\a\b\c.txt --> "C:\" --> absolute * \\server\a\b\c.txt --> "\\server\" --> UNC * * Unix: * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * ~/a/b/c.txt --> "~/" --> current user * ~ --> "~/" --> current user (slash added) * ~user/a/b/c.txt --> "~user/" --> named user * ~user --> "~user/" --> named user (slash added) * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. ie. both * Unix and Windows prefixes are matched regardless. * * @param filename the filename to query, null returns null * @return the prefix of the file, null if invalid */ private static String getPrefix(String filename){ if(filename == null){ return null; } int len = getPrefixLength(filename); if(len < 0){ return null; } if(len > filename.length()){ return filename + SYSTEM_SEPARATOR; // we know this only happens for unix } return filename.substring(0, len); } /** * Gets the path from a full filename, which excludes the prefix. * <p> * This method will handle a file in either Unix or Windows format. The method is entirely text * based, and returns the text before and including the last forward or backslash. * * <pre> * C:\a\b\c.txt --> a\b\ * ~/a/b/c.txt --> a/b/ * a.txt --> "" * a/b/c --> a/b/ * a/b/c/ --> a/b/c/ * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. * <p> * This method drops the prefix from the result. See {@link #getFullPath(String)} for the method * that retains the prefix. * * @param filename the filename to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid */ public static String getPath(String filename){ if(filename == null){ return null; } int prefix = getPrefixLength(filename); if(prefix < 0){ return null; } int index = indexOfLastSeparator(filename); int endIndex = index + 1; if(prefix >= filename.length() || index < 0 || prefix >= endIndex){ return ""; } return filename.substring(prefix, endIndex); } /** * Gets the full path from a full filename, which is the prefix + path. * <p> * This method will handle a file in either Unix or Windows format. The method is entirely text * based, and returns the text before and including the last forward or backslash. * * <pre> * C:\a\b\c.txt --> C:\a\b\ * ~/a/b/c.txt --> ~/a/b/ * a.txt --> "" * a/b/c --> a/b/ * a/b/c/ --> a/b/c/ * C: --> C: * C:\ --> C:\ * ~ --> ~/ * ~/ --> ~/ * ~user --> ~user/ * ~user/ --> ~user/ * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. * * @param filename the filename to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid */ public static String getFullPath(String filename){ if(filename == null){ return null; } int prefix = getPrefixLength(filename); if(prefix < 0){ return null; } if(prefix >= filename.length()){ return getPrefix(filename); // add end slash if necessary } int index = indexOfLastSeparator(filename); if(index < 0){ return filename.substring(0, prefix); } int end = index + 1; if(end == 0){ end++; } return filename.substring(0, end); } /** * Gets the name minus the path from a full filename. * <p> * This method will handle a file in either Unix or Windows format. The text after the last * forward or backslash is returned. * * <pre> * a/b/c.txt --> c.txt * a.txt --> a.txt * a/b/c --> c * a/b/c/ --> "" * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. * * @param filename the filename to query, null returns null * @return the name of the file without the path, or an empty string if none exists */ public static String getName(String filename){ if(filename == null){ return null; } int index = indexOfLastSeparator(filename); return filename.substring(index + 1); } /** * Gets the base name, minus the full path and extension, from a full filename. * <p> * This method will handle a file in either Unix or Windows format. The text after the last * forward or backslash and before the last dot is returned. * * <pre> * a/b/c.txt --> c * a.txt --> a * a/b/c --> c * a/b/c/ --> "" * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. * * @param filename the filename to query, null returns null * @return the name of the file without the path, or an empty string if none exists */ public static String getBaseName(String filename){ return removeExtension(getName(filename)); } /** * Gets the extension of a filename. * <p> * This method returns the textual part of the filename after the last dot. There must be no * directory separator after the dot. * * <pre> * foo.txt --> "txt" * a/b/c.jpg --> "jpg" * a/b.txt/c --> "" * a/b/c --> "" * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. * * @param filename the filename to retrieve the extension of. * @return the extension of the file or an empty string if none exists or {@code null} if the * filename is {@code null}. */ public static String getExtension(String filename){ if(filename == null){ return null; } int index = indexOfExtension(filename); if(index == -1){ return ""; }else{ return filename.substring(index + 1); } } //----------------------------------------------------------------------- /** * Removes the extension from a filename. * <p> * This method returns the textual part of the filename before the last dot. There must be no * directory separator after the dot. * * <pre> * foo.txt --> foo * a\b\c.jpg --> a\b\c * a\b\c --> a\b\c * a.b\c --> a.b\c * </pre> * <p> * The output will be the same irrespective of the machine that the code is running on. * * @param filename the filename to query, null returns null * @return the filename minus the extension */ public static String removeExtension(String filename){ if(filename == null){ return null; } int index = indexOfExtension(filename); if(index == -1){ return filename; }else{ return filename.substring(0, index); } } }