/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.io; import java.io.File; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Deque; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Stream; /** * General file name and file path manipulation utilities. *
* When dealing with file names you can hit problems when moving from a Windows * based development machine to a Unix based production machine. * This class aims to help avoid those problems. *
** NOTE: You may be able to avoid using this class entirely simply by * using JDK {@link java.io.File File} objects and the two argument constructor * {@link java.io.File#File(java.io.File, String) File(File,String)}. *
** Most methods on this class are designed to work the same on both Unix and Windows. * Those that don't include 'System', 'Unix' or 'Windows' in their name. *
** Most methods recognize both separators (forward and back), and both * sets of prefixes. See the Javadoc of each method for details. *
** This class defines six components within a file name * (example C:\dev\project\file.txt): *
** Note that this class works best if directory file names end with a separator. * If you omit the last separator, it is impossible to determine if the file name * corresponds to a file or a directory. As a result, we have chosen to say * it corresponds to a file. *
** This class only supports Unix and Windows style names. * Prefixes are matched as follows: *
** Windows: * a\b\c.txt --> "" --> relative * \a\b\c.txt --> "\" --> current drive absolute * C:a\b\c.txt --> "C:" --> drive relative * C:\a\b\c.txt --> "C:\" --> absolute * \\server\a\b\c.txt --> "\\server\" --> UNC * * Unix: * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * ~/a/b/c.txt --> "~/" --> current user * ~ --> "~/" --> current user (slash added) * ~user/a/b/c.txt --> "~user/" --> named user * ~user --> "~user/" --> named user (slash added) **
* Both prefix styles are matched always, irrespective of the machine that you are * currently running on. *
** Provenance: Excalibur, Alexandria, Tomcat, Commons-Utils. *
* * @since 1.1 */ public class FilenameUtils { private static final String[] EMPTY_STRING_ARRAY = {}; private static final String EMPTY_STRING = ""; private static final int NOT_FOUND = -1; /** * The extension separator character. * @since 1.4 */ public static final char EXTENSION_SEPARATOR = '.'; /** * The extension separator String. * @since 1.4 */ public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); /** * The Unix separator character. */ private static final char UNIX_NAME_SEPARATOR = '/'; /** * The Windows separator character. */ private static final char WINDOWS_NAME_SEPARATOR = '\\'; /** * The system separator character. */ private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar; /** * The separator character that is the opposite of the system separator. */ private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR); private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); private static final int IPV4_MAX_OCTET_VALUE = 255; private static final int IPV6_MAX_HEX_GROUPS = 8; private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; private static final int MAX_UNSIGNED_SHORT = 0xffff; private static final int BASE_16 = 16; private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); /** * Concatenates a fileName to a base path using normal command line style rules. ** The effect is equivalent to resultant directory after changing * directory to the first argument, followed by changing directory to * the second argument. *
** The first argument is the base path, the second is the path to concatenate. * The returned path is always normalized via {@link #normalize(String)}, * thus {@code ..} is handled. *
** If {@code pathToAdd} is absolute (has an absolute prefix), then * it will be normalized and returned. * Otherwise, the paths will be joined, normalized and returned. *
** The output will be the same on both Unix and Windows except * for the separator character. *
** /foo/ + bar --> /foo/bar * /foo + bar --> /foo/bar * /foo + /bar --> /bar * /foo + C:/bar --> C:/bar * /foo + C:bar --> C:bar [1] * /foo/a/ + ../bar --> /foo/bar * /foo/ + ../../bar --> null * /foo/ + /bar --> /bar * /foo/.. + /bar --> /bar * /foo + bar/c.txt --> /foo/bar/c.txt * /foo/c.txt + bar --> /foo/c.txt/bar [2] **
* [1] Note that the Windows relative drive prefix is unreliable when * used with this method. *
** [2] Note that the first parameter must be a path. If it ends with a name, then * the name will be built into the concatenated path. If this might be a problem, * use {@link #getFullPath(String)} on the base path argument. *
* * @param basePath the base path to attach to, always treated as a path * @param fullFileNameToAdd the fileName (or path) to attach to the base * @return the concatenated path, or null if invalid * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) */ public static String concat(final String basePath, final String fullFileNameToAdd) { final int prefix = getPrefixLength(fullFileNameToAdd); if (prefix < 0) { return null; } if (prefix > 0) { return normalize(fullFileNameToAdd); } if (basePath == null) { return null; } final int len = basePath.length(); if (len == 0) { return normalize(fullFileNameToAdd); } final char ch = basePath.charAt(len - 1); if (isSeparator(ch)) { return normalize(basePath + fullFileNameToAdd); } return normalize(basePath + '/' + fullFileNameToAdd); } /** * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). ** The files names are expected to be normalized. *
* * Edge cases: ** No processing is performed on the fileNames other than comparison, * thus this is merely a null-safe case-sensitive equals. *
* * @param fileName1 the first fileName to query, may be null * @param fileName2 the second fileName to query, may be null * @return true if the fileNames are equal, null equals null * @see IOCase#SENSITIVE */ public static boolean equals(final String fileName1, final String fileName2) { return equals(fileName1, fileName2, false, IOCase.SENSITIVE); } /** * Checks whether two fileNames are equal, optionally normalizing and providing * control over the case-sensitivity. * * @param fileName1 the first fileName to query, may be null * @param fileName2 the second fileName to query, may be null * @param normalize whether to normalize the fileNames * @param ioCase what case sensitivity rule to use, null means case-sensitive * @return true if the fileNames are equal, null equals null * @since 1.3 */ public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) { if (fileName1 == null || fileName2 == null) { return fileName1 == null && fileName2 == null; } if (normalize) { fileName1 = normalize(fileName1); if (fileName1 == null) { return false; } fileName2 = normalize(fileName2); if (fileName2 == null) { return false; } } return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2); } /** * Checks whether two fileNames are equal after both have been normalized. ** Both fileNames are first passed to {@link #normalize(String)}. * The check is then performed in a case-sensitive manner. *
* * @param fileName1 the first fileName to query, may be null * @param fileName2 the second fileName to query, may be null * @return true if the fileNames are equal, null equals null * @see IOCase#SENSITIVE */ public static boolean equalsNormalized(final String fileName1, final String fileName2) { return equals(fileName1, fileName2, true, IOCase.SENSITIVE); } /** * Checks whether two fileNames are equal after both have been normalized * and using the case rules of the system. ** Both fileNames are first passed to {@link #normalize(String)}. * The check is then performed case-sensitive on Unix and * case-insensitive on Windows. *
* * @param fileName1 the first fileName to query, may be null * @param fileName2 the second fileName to query, may be null * @return true if the fileNames are equal, null equals null * @see IOCase#SYSTEM */ public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { return equals(fileName1, fileName2, true, IOCase.SYSTEM); } /** * Checks whether two fileNames are equal using the case rules of the system. ** No processing is performed on the fileNames other than comparison. * The check is case-sensitive on Unix and case-insensitive on Windows. *
* * @param fileName1 the first fileName to query, may be null * @param fileName2 the second fileName to query, may be null * @return true if the fileNames are equal, null equals null * @see IOCase#SYSTEM */ public static boolean equalsOnSystem(final String fileName1, final String fileName2) { return equals(fileName1, fileName2, false, IOCase.SYSTEM); } /** * Flips the Windows name separator to Linux and vice-versa. * * @param ch The Windows or Linux name separator. * @return The Windows or Linux name separator. */ static char flipSeparator(final char ch) { if (ch == UNIX_NAME_SEPARATOR) { return WINDOWS_NAME_SEPARATOR; } if (ch == WINDOWS_NAME_SEPARATOR) { return UNIX_NAME_SEPARATOR; } throw new IllegalArgumentException(String.valueOf(ch)); } /** * Special handling for NTFS ADS: Don't accept colon in the fileName. * * @param fileName a file name * @return ADS offsets. */ private static int getAdsCriticalOffset(final String fileName) { // Step 1: Remove leading path segments. final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR); final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); if (offset1 == -1) { if (offset2 == -1) { return 0; } return offset2 + 1; } if (offset2 == -1) { return offset1 + 1; } return Math.max(offset1, offset2) + 1; } /** * Gets the base name, minus the full path and extension, from a full fileName. ** This method will handle a file in either Unix or Windows format. * The text after the last forward or backslash and before the last dot is returned. *
** a/b/c.txt --> c * a.txt --> a * a/b/c --> c * a/b/c/ --> "" **
* The output will be the same irrespective of the machine that the code is running on. *
* * @param fileName the fileName to query, null returns null * @return the name of the file without the path, or an empty string if none exists * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) */ public static String getBaseName(final String fileName) { return removeExtension(getName(fileName)); } /** * Gets the extension of a fileName. ** This method returns the textual part of the fileName after the last dot. * There must be no directory separator after the dot. *
** foo.txt --> "txt" * a/b/c.jpg --> "jpg" * a/b.txt/c --> "" * a/b/c --> "" **
* The output will be the same irrespective of the machine that the code is running on, with the * exception of a possible {@link IllegalArgumentException} on Windows (see below). *
** Note: This method used to have a hidden problem for names like "foo.exe:bar.txt". * In this case, the name wouldn't be the name of a file, but the identifier of an * alternate data stream (bar.txt) on the file foo.exe. The method used to return * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing * an {@link IllegalArgumentException} for names like this. *
* * @param fileName the fileName to retrieve the extension of. * @return the extension of the file or an empty string if none exists or {@code null} * if the fileName is {@code null}. * @throws IllegalArgumentException Windows only: The fileName parameter is, in fact, * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". */ public static String getExtension(final String fileName) throws IllegalArgumentException { if (fileName == null) { return null; } final int index = indexOfExtension(fileName); if (index == NOT_FOUND) { return EMPTY_STRING; } return fileName.substring(index + 1); } /** * Gets the full path from a full fileName, which is the prefix + path. ** This method will handle a file in either Unix or Windows format. * The method is entirely text based, and returns the text before and * including the last forward or backslash. *
** C:\a\b\c.txt --> C:\a\b\ * ~/a/b/c.txt --> ~/a/b/ * a.txt --> "" * a/b/c --> a/b/ * a/b/c/ --> a/b/c/ * C: --> C: * C:\ --> C:\ * ~ --> ~/ * ~/ --> ~/ * ~user --> ~user/ * ~user/ --> ~user/ **
* The output will be the same irrespective of the machine that the code is running on. *
* * @param fileName the fileName to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) */ public static String getFullPath(final String fileName) { return doGetFullPath(fileName, true); } /** * Gets the full path from a full fileName, which is the prefix + path, * and also excluding the final directory separator. ** This method will handle a file in either Unix or Windows format. * The method is entirely text based, and returns the text before the * last forward or backslash. *
** C:\a\b\c.txt --> C:\a\b * ~/a/b/c.txt --> ~/a/b * a.txt --> "" * a/b/c --> a/b * a/b/c/ --> a/b/c * C: --> C: * C:\ --> C:\ * ~ --> ~ * ~/ --> ~ * ~user --> ~user * ~user/ --> ~user **
* The output will be the same irrespective of the machine that the code is running on. *
* * @param fileName the fileName to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) */ public static String getFullPathNoEndSeparator(final String fileName) { return doGetFullPath(fileName, false); } /** * Gets the name minus the path from a full fileName. ** This method will handle a file in either Unix or Windows format. * The text after the last forward or backslash is returned. *
** a/b/c.txt --> c.txt * a.txt --> a.txt * a/b/c --> c * a/b/c/ --> "" **
* The output will be the same irrespective of the machine that the code is running on. *
* * @param fileName the fileName to query, null returns null * @return the name of the file without the path, or an empty string if none exists * @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000}) */ public static String getName(final String fileName) { if (fileName == null) { return null; } return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1); } /** * Gets the path from a full fileName, which excludes the prefix. ** This method will handle a file in either Unix or Windows format. * The method is entirely text based, and returns the text before and * including the last forward or backslash. *
** C:\a\b\c.txt --> a\b\ * ~/a/b/c.txt --> a/b/ * a.txt --> "" * a/b/c --> a/b/ * a/b/c/ --> a/b/c/ **
* The output will be the same irrespective of the machine that the code is running on. *
** This method drops the prefix from the result. * See {@link #getFullPath(String)} for the method that retains the prefix. *
* * @param fileName the fileName to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) */ public static String getPath(final String fileName) { return doGetPath(fileName, 1); } /** * Gets the path from a full fileName, which excludes the prefix, and * also excluding the final directory separator. ** This method will handle a file in either Unix or Windows format. * The method is entirely text based, and returns the text before the * last forward or backslash. *
** C:\a\b\c.txt --> a\b * ~/a/b/c.txt --> a/b * a.txt --> "" * a/b/c --> a/b * a/b/c/ --> a/b/c **
* The output will be the same irrespective of the machine that the code is running on. *
** This method drops the prefix from the result. * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. *
* * @param fileName the fileName to query, null returns null * @return the path of the file, an empty string if none exists, null if invalid * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) */ public static String getPathNoEndSeparator(final String fileName) { return doGetPath(fileName, 0); } /** * Gets the prefix from a full fileName, such as {@code C:/} * or {@code ~/}. ** This method will handle a file in either Unix or Windows format. * The prefix includes the first slash in the full fileName where applicable. *
** Windows: * a\b\c.txt --> "" --> relative * \a\b\c.txt --> "\" --> current drive absolute * C:a\b\c.txt --> "C:" --> drive relative * C:\a\b\c.txt --> "C:\" --> absolute * \\server\a\b\c.txt --> "\\server\" --> UNC * * Unix: * a/b/c.txt --> "" --> relative * /a/b/c.txt --> "/" --> absolute * ~/a/b/c.txt --> "~/" --> current user * ~ --> "~/" --> current user (slash added) * ~user/a/b/c.txt --> "~user/" --> named user * ~user --> "~user/" --> named user (slash added) **
* The output will be the same irrespective of the machine that the code is running on. * ie. both Unix and Windows prefixes are matched regardless. *
* * @param fileName the fileName to query, null returns null * @return the prefix of the file, null if invalid * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}) */ public static String getPrefix(final String fileName) { if (fileName == null) { return null; } final int len = getPrefixLength(fileName); if (len < 0) { return null; } if (len > fileName.length()) { requireNonNullChars(fileName); return fileName + UNIX_NAME_SEPARATOR; } return requireNonNullChars(fileName.substring(0, len)); } /** * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}. ** This method will handle a file in either Unix or Windows format. *
** The prefix length includes the first slash in the full fileName * if applicable. Thus, it is possible that the length returned is greater * than the length of the input string. *
** Windows: * a\b\c.txt --> 0 --> relative * \a\b\c.txt --> 1 --> current drive absolute * C:a\b\c.txt --> 2 --> drive relative * C:\a\b\c.txt --> 3 --> absolute * \\server\a\b\c.txt --> 9 --> UNC * \\\a\b\c.txt --> -1 --> error * * Unix: * a/b/c.txt --> 0 --> relative * /a/b/c.txt --> 1 --> absolute * ~/a/b/c.txt --> 2 --> current user * ~ --> 2 --> current user (slash added) * ~user/a/b/c.txt --> 6 --> named user * ~user --> 6 --> named user (slash added) * //server/a/b/c.txt --> 9 * ///a/b/c.txt --> -1 --> error * C: --> 0 --> valid file name as only null character and / are reserved characters **
* The output will be the same irrespective of the machine that the code is running on. * ie. both Unix and Windows prefixes are matched regardless. *
** Note that a leading // (or \\) is used to indicate a UNC name on Windows. * These must be followed by a server name, so double-slashes are not collapsed * to a single slash at the start of the fileName. *
* * @param fileName the fileName to find the prefix in, null returns -1 * @return the length of the prefix, -1 if invalid or null */ public static int getPrefixLength(final String fileName) { if (fileName == null) { return NOT_FOUND; } final int len = fileName.length(); if (len == 0) { return 0; } char ch0 = fileName.charAt(0); if (ch0 == ':') { return NOT_FOUND; } if (len == 1) { if (ch0 == '~') { return 2; // return a length greater than the input } return isSeparator(ch0) ? 1 : 0; } if (ch0 == '~') { int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1); int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1); if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { return len + 1; // return a length greater than the input } posUnix = posUnix == NOT_FOUND ? posWin : posUnix; posWin = posWin == NOT_FOUND ? posUnix : posWin; return Math.min(posUnix, posWin) + 1; } final char ch1 = fileName.charAt(1); if (ch1 == ':') { ch0 = Character.toUpperCase(ch0); if (ch0 >= 'A' && ch0 <= 'Z') { if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) { return 0; } if (len == 2 || !isSeparator(fileName.charAt(2))) { return 2; } return 3; } if (ch0 == UNIX_NAME_SEPARATOR) { return 1; } return NOT_FOUND; } if (!isSeparator(ch0) || !isSeparator(ch1)) { return isSeparator(ch0) ? 1 : 0; } int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2); int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2); if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { return NOT_FOUND; } posUnix = posUnix == NOT_FOUND ? posWin : posUnix; posWin = posWin == NOT_FOUND ? posUnix : posWin; final int pos = Math.min(posUnix, posWin) + 1; final String hostnamePart = fileName.substring(2, pos - 1); return isValidHostName(hostnamePart) ? pos : NOT_FOUND; } /** * Returns the index of the last extension separator character, which is a dot. ** This method also checks that there is no directory separator after the last dot. To do this it uses * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format. *
** The output will be the same irrespective of the machine that the code is running on, with the * exception of a possible {@link IllegalArgumentException} on Windows (see below). *
* Note: This method used to have a hidden problem for names like "foo.exe:bar.txt". * In this case, the name wouldn't be the name of a file, but the identifier of an * alternate data stream (bar.txt) on the file foo.exe. The method used to return * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing * an {@link IllegalArgumentException} for names like this. * * @param fileName * the fileName to find the last extension separator in, null returns -1 * @return the index of the last extension separator character, or -1 if there is no such character * @throws IllegalArgumentException Windows only: The fileName parameter is, in fact, * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". */ public static int indexOfExtension(final String fileName) throws IllegalArgumentException { if (fileName == null) { return NOT_FOUND; } if (isSystemWindows()) { // Special handling for NTFS ADS: Don't accept colon in the fileName. final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); if (offset != -1) { throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); } } final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); final int lastSeparator = indexOfLastSeparator(fileName); return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; } /** * Returns the index of the last directory separator character. ** This method will handle a file in either Unix or Windows format. * The position of the last forward or backslash is returned. *
* The output will be the same irrespective of the machine that the code is running on. * * @param fileName the fileName to find the last path separator in, null returns -1 * @return the index of the last separator character, or -1 if there * is no such character */ public static int indexOfLastSeparator(final String fileName) { if (fileName == null) { return NOT_FOUND; } final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR); final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR); return Math.max(lastUnixPos, lastWindowsPos); } private static boolean isEmpty(final String string) { return string == null || string.isEmpty(); } /** * Checks whether the extension of the fileName is one of those specified. *
* This method obtains the extension as the textual part of the fileName
* after the last dot. There must be no directory separator after the dot.
* The extension check is case-sensitive on all platforms.
*
* @param fileName the fileName to query, null returns false
* @param extensions the extensions to check for, null checks for no extension
* @return true if the fileName is one of the extensions
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
*/
public static boolean isExtension(final String fileName, final Collection
* This method obtains the extension as the textual part of the fileName
* after the last dot. There must be no directory separator after the dot.
* The extension check is case-sensitive on all platforms.
*
* @param fileName the fileName to query, null returns false
* @param extension the extension to check for, null or empty checks for no extension
* @return true if the fileName has the specified extension
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
*/
public static boolean isExtension(final String fileName, final String extension) {
if (fileName == null) {
return false;
}
requireNonNullChars(fileName);
if (isEmpty(extension)) {
return indexOfExtension(fileName) == NOT_FOUND;
}
return getExtension(fileName).equals(extension);
}
/**
* Checks whether the extension of the fileName is one of those specified.
*
* This method obtains the extension as the textual part of the fileName
* after the last dot. There must be no directory separator after the dot.
* The extension check is case-sensitive on all platforms.
*
* @param fileName the fileName to query, null returns false
* @param extensions the extensions to check for, null checks for no extension
* @return true if the fileName is one of the extensions
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
*/
public static boolean isExtension(final String fileName, final String... extensions) {
if (fileName == null) {
return false;
}
requireNonNullChars(fileName);
if (extensions == null || extensions.length == 0) {
return indexOfExtension(fileName) == NOT_FOUND;
}
final String fileExt = getExtension(fileName);
return Stream.of(extensions).anyMatch(fileExt::equals);
}
/**
* Checks whether a given string represents a valid IPv4 address.
*
* @param name the name to validate
* @return true if the given name is a valid IPv4 address
*/
// mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
private static boolean isIPv4Address(final String name) {
final Matcher m = IPV4_PATTERN.matcher(name);
if (!m.matches() || m.groupCount() != 4) {
return false;
}
// verify that address subgroups are legal
for (int i = 1; i <= 4; i++) {
final String ipSegment = m.group(i);
final int iIpSegment = Integer.parseInt(ipSegment);
if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
return false;
}
if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
return false;
}
}
return true;
}
// copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
/**
* Checks whether a given string represents a valid IPv6 address.
*
* @param inet6Address the name to validate
* @return true if the given name is a valid IPv6 address
*/
private static boolean isIPv6Address(final String inet6Address) {
final boolean containsCompressedZeroes = inet6Address.contains("::");
if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
return false;
}
if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
|| inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
return false;
}
String[] octets = inet6Address.split(":");
if (containsCompressedZeroes) {
final List Accepted are IP addresses (v4 and v6) as well as what the
* RFC calls a "reg-name". Percent encoded names don't seem to be
* valid names in UNC paths.
* This method normalizes a path to a standard format.
* The input may contain separators in either Unix or Windows format.
* The output will contain separators in the format of the system.
*
* A trailing slash will be retained.
* A double slash will be merged to a single slash (but UNC names are handled).
* A single dot path segment will be removed.
* A double dot will cause that path segment and the one before to be removed.
* If the double dot has no parent path segment to work with, {@code null}
* is returned.
*
* The output will be the same on both Unix and Windows except
* for the separator character.
*
* This method normalizes a path to a standard format.
* The input may contain separators in either Unix or Windows format.
* The output will contain separators in the format specified.
*
* A trailing slash will be retained.
* A double slash will be merged to a single slash (but UNC names are handled).
* A single dot path segment will be removed.
* A double dot will cause that path segment and the one before to be removed.
* If the double dot has no parent path segment to work with, {@code null}
* is returned.
*
* The output will be the same on both Unix and Windows except
* for the separator character.
*
* This method normalizes a path to a standard format.
* The input may contain separators in either Unix or Windows format.
* The output will contain separators in the format of the system.
*
* A trailing slash will be removed.
* A double slash will be merged to a single slash (but UNC names are handled).
* A single dot path segment will be removed.
* A double dot will cause that path segment and the one before to be removed.
* If the double dot has no parent path segment to work with, {@code null}
* is returned.
*
* The output will be the same on both Unix and Windows except
* for the separator character.
*
* This method normalizes a path to a standard format.
* The input may contain separators in either Unix or Windows format.
* The output will contain separators in the format specified.
*
* A trailing slash will be removed.
* A double slash will be merged to a single slash (but UNC names are handled).
* A single dot path segment will be removed.
* A double dot will cause that path segment and the one before to be removed.
* If the double dot has no parent path segment to work with, {@code null}
* is returned.
*
* The output will be the same on both Unix and Windows including
* the separator character.
*
* This method returns the textual part of the fileName before the last dot.
* There must be no directory separator after the dot.
*
* The output will be the same irrespective of the machine that the code is running on.
*
* @param fileName the fileName to query, null returns null
* @return the fileName minus the extension
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
*/
public static String removeExtension(final String fileName) {
if (fileName == null) {
return null;
}
requireNonNullChars(fileName);
final int index = indexOfExtension(fileName);
if (index == NOT_FOUND) {
return fileName;
}
return fileName.substring(0, index);
}
/**
* Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
*
* This may be used for poison byte attacks.
*
* @param path the path to check
* @return The input
* @throws IllegalArgumentException if path contains the null character ({@code U+0000})
*/
private static String requireNonNullChars(final String path) {
if (path.indexOf(0) >= 0) {
throw new IllegalArgumentException(
"Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
}
return path;
}
/**
* Converts all separators to the system separator.
*
* @param path the path to be changed, null ignored.
* @return the updated path.
*/
public static String separatorsToSystem(final String path) {
return FileSystem.getCurrent().normalizeSeparators(path);
}
/**
* Converts all separators to the Unix separator of forward slash.
*
* @param path the path to be changed, null ignored.
* @return the new path.
*/
public static String separatorsToUnix(final String path) {
return FileSystem.LINUX.normalizeSeparators(path);
}
/**
* Converts all separators to the Windows separator of backslash.
*
* @param path the path to be changed, null ignored.
* @return the updated path.
*/
public static String separatorsToWindows(final String path) {
return FileSystem.WINDOWS.normalizeSeparators(path);
}
/**
* Splits a string into a number of tokens.
* The text is split by '?' and '*'.
* Where multiple '*' occur consecutively they are collapsed into a single '*'.
*
* @param text the text to split
* @return the array of tokens, never null
*/
static String[] splitOnTokens(final String text) {
// used by wildcardMatch
// package level so a unit test may run on this
if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
return new String[] { text };
}
final char[] array = text.toCharArray();
final ArrayList
* The wildcard matcher uses the characters '?' and '*' to represent a
* single or multiple (zero or more) wildcard characters.
* This is the same as often found on DOS/Unix command lines.
* The check is case-sensitive always.
*
* The wildcard matcher uses the characters '?' and '*' to represent a
* single or multiple (zero or more) wildcard characters.
* N.B. the sequence "*?" does not work properly at present in match strings.
*
* @param fileName the fileName to match on
* @param wildcardMatcher the wildcard string to match against
* @param ioCase what case sensitivity rule to use, null means case-sensitive
* @return true if the fileName matches the wildcard string
* @since 1.3
*/
public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
if (fileName == null && wildcardMatcher == null) {
return true;
}
if (fileName == null || wildcardMatcher == null) {
return false;
}
ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
final String[] wcs = splitOnTokens(wildcardMatcher);
boolean anyChars = false;
int textIdx = 0;
int wcsIdx = 0;
final Deque
* The wildcard matcher uses the characters '?' and '*' to represent a
* single or multiple (zero or more) wildcard characters.
* This is the same as often found on DOS/Unix command lines.
* The check is case-sensitive on Unix and case-insensitive on Windows.
*
* /foo// --> /foo/
* /foo/./ --> /foo/
* /foo/../bar --> /bar
* /foo/../bar/ --> /bar/
* /foo/../bar/../baz --> /baz
* //foo//./bar --> //foo/bar
* /../ --> null
* ../foo --> null
* foo/bar/.. --> foo/
* foo/../../bar --> null
* foo/../bar --> bar
* //server/foo/../bar --> //server/bar
* //server/../bar --> null
* C:\foo\..\bar --> C:\bar
* C:\..\bar --> null
* ~/foo/../bar/ --> ~/bar/
* ~/../bar --> null
*
* (Note the file separator returned will be correct for Windows/Unix)
*
* @param fileName the fileName to normalize, null returns null
* @return the normalized fileName, or null if invalid
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
*/
public static String normalize(final String fileName) {
return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
}
/**
* Normalizes a path, removing double and single dot path steps.
*
* /foo// --> /foo/
* /foo/./ --> /foo/
* /foo/../bar --> /bar
* /foo/../bar/ --> /bar/
* /foo/../bar/../baz --> /baz
* //foo//./bar --> /foo/bar
* /../ --> null
* ../foo --> null
* foo/bar/.. --> foo/
* foo/../../bar --> null
* foo/../bar --> bar
* //server/foo/../bar --> //server/bar
* //server/../bar --> null
* C:\foo\..\bar --> C:\bar
* C:\..\bar --> null
* ~/foo/../bar/ --> ~/bar/
* ~/../bar --> null
*
* The output will be the same on both Unix and Windows including
* the separator character.
*
* @param fileName the fileName to normalize, null returns null
* @param unixSeparator {@code true} if a Unix separator should
* be used or {@code false} if a Windows separator should be used.
* @return the normalized fileName, or null if invalid
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
* @since 2.0
*/
public static String normalize(final String fileName, final boolean unixSeparator) {
return doNormalize(fileName, toSeparator(unixSeparator), true);
}
/**
* Normalizes a path, removing double and single dot path steps,
* and removing any final directory separator.
*
* /foo// --> /foo
* /foo/./ --> /foo
* /foo/../bar --> /bar
* /foo/../bar/ --> /bar
* /foo/../bar/../baz --> /baz
* //foo//./bar --> /foo/bar
* /../ --> null
* ../foo --> null
* foo/bar/.. --> foo
* foo/../../bar --> null
* foo/../bar --> bar
* //server/foo/../bar --> //server/bar
* //server/../bar --> null
* C:\foo\..\bar --> C:\bar
* C:\..\bar --> null
* ~/foo/../bar/ --> ~/bar
* ~/../bar --> null
*
* (Note the file separator returned will be correct for Windows/Unix)
*
* @param fileName the fileName to normalize, null returns null
* @return the normalized fileName, or null if invalid
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
*/
public static String normalizeNoEndSeparator(final String fileName) {
return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
}
/**
* Normalizes a path, removing double and single dot path steps,
* and removing any final directory separator.
*
* /foo// --> /foo
* /foo/./ --> /foo
* /foo/../bar --> /bar
* /foo/../bar/ --> /bar
* /foo/../bar/../baz --> /baz
* //foo//./bar --> /foo/bar
* /../ --> null
* ../foo --> null
* foo/bar/.. --> foo
* foo/../../bar --> null
* foo/../bar --> bar
* //server/foo/../bar --> //server/bar
* //server/../bar --> null
* C:\foo\..\bar --> C:\bar
* C:\..\bar --> null
* ~/foo/../bar/ --> ~/bar
* ~/../bar --> null
*
*
* @param fileName the fileName to normalize, null returns null
* @param unixSeparator {@code true} if a Unix separator should
* be used or {@code false} if a Windows separator should be used.
* @return the normalized fileName, or null if invalid
* @throws IllegalArgumentException if the fileName contains the null character ({@code U+0000})
* @since 2.0
*/
public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
return doNormalize(fileName, toSeparator(unixSeparator), false);
}
/**
* Removes the extension from a fileName.
*
* foo.txt --> foo
* a\b\c.jpg --> a\b\c
* a\b\c --> a\b\c
* a.b\c --> a.b\c
*
*
* wildcardMatch("c.txt", "*.txt") --> true
* wildcardMatch("c.txt", "*.jpg") --> false
* wildcardMatch("a/b/c.txt", "a/b/*") --> true
* wildcardMatch("c.txt", "*.???") --> true
* wildcardMatch("c.txt", "*.????") --> false
*
* N.B. the sequence "*?" does not work properly at present in match strings.
*
* @param fileName the fileName to match on
* @param wildcardMatcher the wildcard string to match against
* @return true if the fileName matches the wildcard string
* @see IOCase#SENSITIVE
*/
public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
}
/**
* Checks a fileName to see if it matches the specified wildcard matcher
* allowing control over case-sensitivity.
*
* wildcardMatch("c.txt", "*.txt") --> true
* wildcardMatch("c.txt", "*.jpg") --> false
* wildcardMatch("a/b/c.txt", "a/b/*") --> true
* wildcardMatch("c.txt", "*.???") --> true
* wildcardMatch("c.txt", "*.????") --> false
*
* N.B. the sequence "*?" does not work properly at present in match strings.
*
* @param fileName the fileName to match on
* @param wildcardMatcher the wildcard string to match against
* @return true if the fileName matches the wildcard string
* @see IOCase#SYSTEM
*/
public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
}
/**
* Instances should NOT be constructed in standard programming.
*/
public FilenameUtils() {
}
}