From 220bc15cb2a762fd6dc5ee22f67ff7c4472817b0 Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Wed, 29 May 2024 14:48:35 +0200 Subject: [PATCH 1/5] Fixed incorrect uri decoding of file containing an open square bracket [ --- .../java/org/apache/commons/vfs2/provider/UriParser.java | 2 +- .../org/apache/commons/vfs2/provider/UriParserTest.java | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 9b23df3cbd..dffc1696f6 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -328,7 +328,7 @@ public static void decode(final StringBuilder buffer, final int offset, final in boolean ipv6Host = false; for (; count > 0; count--, index++) { final char ch = buffer.charAt(index); - if (ch == '[') { + if (ch == '[' && buffer.lastIndexOf("://", index) == index - 3) { ipv6Host = true; } if (ch == ']') { diff --git a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java index 5ea09a65ca..be40d35f93 100644 --- a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java +++ b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java @@ -53,6 +53,12 @@ public void testIPv6CheckUriEncoding() throws FileSystemException { UriParser.checkUriEncoding("http://[fe80::14b5:1204:5410:64ca%en1]:8080"); } + @Test + public void testBracketUriDecoding() throws FileSystemException { + assertEquals("http://[fe80::14b5:1204:5410:64ca%en1]:8080", UriParser.decode("http://[fe80::14b5:1204:5410:64ca%en1]:8080")); + assertEquals("file:/user/file [with brackets].txt", UriParser.decode("file:/user/file%20[with%20brackets].txt")); + } + @Test public void testNormalScheme() { assertEquals("ftp", UriParser.extractScheme(schemes, "ftp://user:pass@host/some/path/some:file")); From 514e58d79922e6ca917dc32db72ef1d5bfb638db Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Wed, 29 May 2024 14:59:10 +0200 Subject: [PATCH 2/5] Fixed incorrect uri decoding of file containing an open square bracket [ --- .../main/java/org/apache/commons/vfs2/provider/UriParser.java | 3 ++- .../java/org/apache/commons/vfs2/provider/UriParserTest.java | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index dffc1696f6..8c1f02fcf3 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -328,7 +328,8 @@ public static void decode(final StringBuilder buffer, final int offset, final in boolean ipv6Host = false; for (; count > 0; count--, index++) { final char ch = buffer.charAt(index); - if (ch == '[' && buffer.lastIndexOf("://", index) == index - 3) { + if (ch == '[' && + (buffer.lastIndexOf("://", index) == index - 3 || buffer.lastIndexOf("@", index) == index - 1)) { ipv6Host = true; } if (ch == ']') { diff --git a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java index be40d35f93..6b7162b468 100644 --- a/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java +++ b/commons-vfs2/src/test/java/org/apache/commons/vfs2/provider/UriParserTest.java @@ -56,6 +56,7 @@ public void testIPv6CheckUriEncoding() throws FileSystemException { @Test public void testBracketUriDecoding() throws FileSystemException { assertEquals("http://[fe80::14b5:1204:5410:64ca%en1]:8080", UriParser.decode("http://[fe80::14b5:1204:5410:64ca%en1]:8080")); + assertEquals("http://username@[fe80::14b5:1204:5410:64ca%en1]:8080", UriParser.decode("http://username@[fe80::14b5:1204:5410:64ca%en1]:8080")); assertEquals("file:/user/file [with brackets].txt", UriParser.decode("file:/user/file%20[with%20brackets].txt")); } From 70104c67ffd0fcf745cc1f84b99bd76123acf80c Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Thu, 30 May 2024 10:04:53 +0200 Subject: [PATCH 3/5] Fixed incorrect uri decoding of file containing an open square bracket [ Use regular expression to check if [ is used for an IP v6 address --- .../main/java/org/apache/commons/vfs2/provider/UriParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 8c1f02fcf3..2f58a5042b 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -329,7 +329,7 @@ public static void decode(final StringBuilder buffer, final int offset, final in for (; count > 0; count--, index++) { final char ch = buffer.charAt(index); if (ch == '[' && - (buffer.lastIndexOf("://", index) == index - 3 || buffer.lastIndexOf("@", index) == index - 1)) { + (buffer.substring(0, index).matches("\\w+://([^/]+@)?"))) { ipv6Host = true; } if (ch == ']') { From 259705465c1a1042d3d85a66f336da883ec4c22d Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Thu, 30 May 2024 10:05:30 +0200 Subject: [PATCH 4/5] Fixed incorrect uri decoding of file containing an open square bracket [ Use regular expression to check if [ is used for an IP v6 address --- .../main/java/org/apache/commons/vfs2/provider/UriParser.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 2f58a5042b..e5ceb761a3 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -328,8 +328,7 @@ public static void decode(final StringBuilder buffer, final int offset, final in boolean ipv6Host = false; for (; count > 0; count--, index++) { final char ch = buffer.charAt(index); - if (ch == '[' && - (buffer.substring(0, index).matches("\\w+://([^/]+@)?"))) { + if (ch == '[' && buffer.substring(0, index).matches("\\w+://([^/]+@)?")) { ipv6Host = true; } if (ch == ']') { From 1188f261cf33db7b742563ffd546e97fa54fd36c Mon Sep 17 00:00:00 2001 From: Anthony Goubard Date: Tue, 4 Feb 2025 10:20:00 +0100 Subject: [PATCH 5/5] UriParser Fix for decoding files with open square bracket --- .../commons/vfs2/provider/UriParser.java | 1152 ++++++++--------- 1 file changed, 576 insertions(+), 576 deletions(-) diff --git a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java index 4bbb84fe72..3adf2b2036 100644 --- a/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java +++ b/commons-vfs2/src/main/java/org/apache/commons/vfs2/provider/UriParser.java @@ -1,576 +1,576 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.vfs2.provider; - -import java.util.Arrays; - -import org.apache.commons.lang3.SystemUtils; -import org.apache.commons.vfs2.FileName; -import org.apache.commons.vfs2.FileSystemException; -import org.apache.commons.vfs2.FileType; -import org.apache.commons.vfs2.VFS; - -/** - * Utilities for dealing with URIs. See RFC 2396 for details. - */ -public final class UriParser { - - /** - * The set of valid separators. These are all converted to the normalized one. Does not contain the - * normalized separator - */ - // public static final char[] separators = {'\\'}; - public static final char TRANS_SEPARATOR = '\\'; - - /** - * The normalised separator to use. - */ - private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR; - - private static final int HEX_BASE = 16; - - private static final int BITS_IN_HALF_BYTE = 4; - - private static final char LOW_MASK = 0x0F; - - /** - * Encodes and appends a string to a StringBuilder. - * - * @param buffer The StringBuilder to append to. - * @param unencodedValue The String to encode and append. - * @param reserved characters to encode. - */ - public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) { - final int offset = buffer.length(); - buffer.append(unencodedValue); - encode(buffer, offset, unencodedValue.length(), reserved); - } - - static void appendEncodedRfc2396(final StringBuilder buffer, final String unencodedValue, final char[] allowed) { - final int offset = buffer.length(); - buffer.append(unencodedValue); - encodeRfc2396(buffer, offset, unencodedValue.length(), allowed); - } - - /** - * Canonicalizes a path. - * - * @param buffer Source data. - * @param offset Where to start reading. - * @param length How much to read. - * @param fileNameParser Now to encode and decode. - * @throws FileSystemException If an I/O error occurs. - */ - public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length, - final FileNameParser fileNameParser) throws FileSystemException { - int index = offset; - int count = length; - for (; count > 0; count--, index++) { - final char ch = buffer.charAt(index); - if (ch == '%') { - if (count < 3) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + count)); - } - - // Decode - final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); - final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); - if (dig1 == -1 || dig2 == -1) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + 3)); - } - final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); - - final boolean match = value == '%' || fileNameParser.encodeCharacter(value); - - if (match) { - // this is a reserved character, not allowed to decode - index += 2; - count -= 2; - continue; - } - - // Replace - buffer.setCharAt(index, value); - buffer.delete(index + 1, index + 3); - count -= 2; - } else if (fileNameParser.encodeCharacter(ch)) { - // Encode - final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; - buffer.setCharAt(index, '%'); - buffer.insert(index + 1, digits); - index += 2; - } - } - } - - /** - * Decodes the String. - * - * @param uri The String to decode. - * @throws FileSystemException if an error occurs. - */ - public static void checkUriEncoding(final String uri) throws FileSystemException { - decode(uri); - } - - /** - * Removes %nn encodings from a string. - * - * @param encodedStr The encoded String. - * @return The decoded String. - * @throws FileSystemException if an error occurs. - */ - public static String decode(final String encodedStr) throws FileSystemException { - if (encodedStr == null) { - return null; - } - if (encodedStr.indexOf('%') < 0) { - return encodedStr; - } - final StringBuilder buffer = new StringBuilder(encodedStr); - decode(buffer, 0, buffer.length()); - return buffer.toString(); - } - - /** - * Removes %nn encodings from a string. - * - * @param buffer StringBuilder containing the string to decode. - * @param offset The position in the string to start decoding. - * @param length The number of characters to decode. - * @throws FileSystemException if an error occurs. - */ - public static void decode(final StringBuilder buffer, final int offset, final int length) - throws FileSystemException { - int index = offset; - int count = length; - boolean ipv6Host = false; - for (; count > 0; count--, index++) { - final char ch = buffer.charAt(index); - if (ch == '[' && buffer.substring(0, index).matches("\\w+://([^/]+@)?")) { - ipv6Host = true; - } - if (ch == ']') { - ipv6Host = false; - } - if (ch != '%' || ipv6Host) { - continue; - } - - if (count < 3) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + count)); - } - - // Decode - final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); - final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); - if (dig1 == -1 || dig2 == -1) { - throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", - buffer.substring(index, index + 3)); - } - final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); - - // Replace - buffer.setCharAt(index, value); - buffer.delete(index + 1, index + 3); - count -= 2; - } - } - - /** - * Converts "special" characters to their %nn value. - * - * @param decodedStr The decoded String. - * @return The encoded String. - */ - public static String encode(final String decodedStr) { - return encode(decodedStr, null); - } - - /** - * Converts "special" characters to their %nn value. - * - * @param decodedStr The decoded String. - * @param reserved Characters to encode. - * @return The encoded String - */ - public static String encode(final String decodedStr, final char[] reserved) { - if (decodedStr == null) { - return null; - } - final StringBuilder buffer = new StringBuilder(decodedStr); - encode(buffer, 0, buffer.length(), reserved); - return buffer.toString(); - } - - /** - * Encode an array of Strings. - * - * @param strings The array of Strings to encode. - * @return An array of encoded Strings. - */ - public static String[] encode(final String[] strings) { - if (strings == null) { - return null; - } - Arrays.setAll(strings, i -> encode(strings[i])); - return strings; - } - - /** - * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters. - * - * @param buffer The StringBuilder to append to. - * @param offset The position in the buffer to start encoding at. - * @param length The number of characters to encode. - * @param reserved characters to encode. - */ - public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) { - int index = offset; - int count = length; - for (; count > 0; index++, count--) { - final char ch = buffer.charAt(index); - boolean match = ch == '%'; - if (reserved != null) { - for (int i = 0; !match && i < reserved.length; i++) { - if (ch == reserved[i]) { - match = true; - break; - } - } - } - if (match) { - // Encode - final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; - buffer.setCharAt(index, '%'); - buffer.insert(index + 1, digits); - index += 2; - } - } - } - - static void encodeRfc2396(final StringBuilder buffer, final int offset, final int length, final char[] allowed) { - int index = offset; - int count = length; - for (; count > 0; index++, count--) { - final char ch = buffer.charAt(index); - if (Arrays.binarySearch(allowed, ch) < 0) { - // Encode - final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; - buffer.setCharAt(index, '%'); - buffer.insert(index + 1, digits); - index += 2; - } - } - } - - /** - * Extracts the first element of a path. - * - * @param name StringBuilder containing the path. - * @return The first element of the path. - */ - public static String extractFirstElement(final StringBuilder name) { - final int len = name.length(); - if (len < 1) { - return null; - } - int startPos = 0; - if (name.charAt(0) == SEPARATOR_CHAR) { - startPos = 1; - } - for (int pos = startPos; pos < len; pos++) { - if (name.charAt(pos) == SEPARATOR_CHAR) { - // Found a separator - final String elem = name.substring(startPos, pos); - name.delete(startPos, pos + 1); - return elem; - } - } - - // No separator - final String elem = name.substring(startPos); - name.setLength(0); - return elem; - } - - /** - * Extract the query String from the URI. - * - * @param name StringBuilder containing the URI. - * @return The query string, if any. null otherwise. - */ - public static String extractQueryString(final StringBuilder name) { - for (int pos = 0; pos < name.length(); pos++) { - if (name.charAt(pos) == '?') { - final String queryString = name.substring(pos + 1); - name.delete(pos, name.length()); - return queryString; - } - } - - return null; - } - - /** - * Extracts the scheme from a URI. - * - * @param uri The URI. - * @return The scheme name. Returns null if there is no scheme. - * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. - */ - @Deprecated - public static String extractScheme(final String uri) { - return extractScheme(uri, null); - } - - /** - * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. - * - * @param uri The URI. - * @param buffer Returns the remainder of the URI. - * @return The scheme name. Returns null if there is no scheme. - * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. - */ - @Deprecated - public static String extractScheme(final String uri, final StringBuilder buffer) { - if (buffer != null) { - buffer.setLength(0); - buffer.append(uri); - } - - final int maxPos = uri.length(); - for (int pos = 0; pos < maxPos; pos++) { - final char ch = uri.charAt(pos); - - if (ch == ':') { - // Found the end of the scheme - final String scheme = uri.substring(0, pos); - if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) { - // This is not a scheme, but a Windows drive letter - return null; - } - if (buffer != null) { - buffer.delete(0, pos + 1); - } - return scheme.intern(); - } - - if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') { - // A scheme character - continue; - } - if (!(pos > 0 && (ch >= '0' && ch <= '9' || ch == '+' || ch == '-' || ch == '.'))) { - // Not a scheme character - break; - } - // A scheme character (these are not allowed as the first - // character of the scheme), but can be used as subsequent - // characters. - } - - // No scheme in URI - return null; - } - - /** - * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. - *

- * The scheme is extracted based on the currently supported schemes in the system. That is to say the schemes - * supported by the registered providers. - *

- *

- * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically - * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them. - *

- * @param schemes The schemes to check. - * @param uri The potential URI. May also be a name. - * @return The scheme name. Returns null if there is no scheme. - * @since 2.3 - */ - public static String extractScheme(final String[] schemes, final String uri) { - return extractScheme(schemes, uri, null); - } - - /** - * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. - *

- * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes - * supported by the registered providers. - *

- *

- * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically - * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them. - *

- * @param schemes The schemes to check. - * @param uri The potential URI. May also just be a name. - * @param buffer Returns the remainder of the URI. - * @return The scheme name. Returns null if there is no scheme. - * @since 2.3 - */ - public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) { - if (buffer != null) { - buffer.setLength(0); - buffer.append(uri); - } - for (final String scheme : schemes) { - if (uri.startsWith(scheme + ":")) { - if (buffer != null) { - buffer.delete(0, uri.indexOf(':') + 1); - } - return scheme; - } - } - return null; - } - - /** - * Normalises the separators in a name. - * - * @param name The StringBuilder containing the name - * @return true if the StringBuilder was modified. - */ - public static boolean fixSeparators(final StringBuilder name) { - boolean changed = false; - int maxlen = name.length(); - for (int i = 0; i < maxlen; i++) { - final char ch = name.charAt(i); - if (ch == TRANS_SEPARATOR) { - name.setCharAt(i, SEPARATOR_CHAR); - changed = true; - } - if (i < maxlen - 2 && name.charAt(i) == '%' && name.charAt(i + 1) == '2') { - if (name.charAt(i + 2) == 'f' || name.charAt(i + 2) == 'F') { - name.setCharAt(i, SEPARATOR_CHAR); - name.delete(i + 1, i + 3); - maxlen -= 2; - changed = true; - } else if (name.charAt(i + 2) == 'e' || name.charAt(i + 2) == 'E') { - name.setCharAt(i, '.'); - name.delete(i + 1, i + 3); - maxlen -= 2; - changed = true; - } - } - } - return changed; - } - - /** - * Normalises a path. Does the following: - *
    - *
  • Removes empty path elements. - *
  • Handles '.' and '..' elements. - *
  • Removes trailing separator. - *
- * - * Its assumed that the separators are already fixed. - * - * @param path The path to normalize. - * @return The FileType. - * @throws FileSystemException if an error occurs. - * - * @see #fixSeparators - */ - public static FileType normalisePath(final StringBuilder path) throws FileSystemException { - FileType fileType = FileType.FOLDER; - if (path.length() == 0) { - return fileType; - } - - // '/' or '.' or '..' or anyPath/..' or 'anyPath/.' should always be a path - if (path.charAt(path.length() - 1) != '/' - && path.lastIndexOf("/..") != path.length() - 3 - && path.lastIndexOf("/.") != path.length() - 2 - && path.lastIndexOf("..") != 0 - && path.lastIndexOf(".") != 0 - ) { - fileType = FileType.FILE; - } - - // Adjust separators - // fixSeparators(path); - - // Determine the start of the first element - int startFirstElem = 0; - if (path.charAt(0) == SEPARATOR_CHAR) { - if (path.length() == 1) { - return fileType; - } - startFirstElem = 1; - } - - // Iterate over each element - int startElem = startFirstElem; - int maxlen = path.length(); - while (startElem < maxlen) { - // Find the end of the element - int endElem = startElem; - while (endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR) { - endElem++; - } - - final int elemLen = endElem - startElem; - if (elemLen == 0) { - // An empty element - axe it - path.deleteCharAt(endElem); - maxlen = path.length(); - continue; - } - if (elemLen == 1 && path.charAt(startElem) == '.') { - // A '.' element - axe it - path.deleteCharAt(startElem); - maxlen = path.length(); - continue; - } - if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') { - // A '..' element - remove the previous element - if (startElem == startFirstElem) { - // Previous element is missing - throw new FileSystemException("vfs.provider/invalid-relative-path.error"); - } - - // Find start of previous element - int pos = startElem - 2; - while (pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR) { - pos--; - } - startElem = pos + 1; - - path.delete(startElem, endElem + 1); - maxlen = path.length(); - continue; - } - - // A regular element - startElem = endElem + 1; - } - - // Remove trailing separator - if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { - path.deleteCharAt(maxlen - 1); - } - - return fileType; - } - - private UriParser() { - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.vfs2.provider; + +import java.util.Arrays; + +import org.apache.commons.lang3.SystemUtils; +import org.apache.commons.vfs2.FileName; +import org.apache.commons.vfs2.FileSystemException; +import org.apache.commons.vfs2.FileType; +import org.apache.commons.vfs2.VFS; + +/** + * Utilities for dealing with URIs. See RFC 2396 for details. + */ +public final class UriParser { + + /** + * The set of valid separators. These are all converted to the normalized one. Does not contain the + * normalized separator + */ + // public static final char[] separators = {'\\'}; + public static final char TRANS_SEPARATOR = '\\'; + + /** + * The normalised separator to use. + */ + private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR; + + private static final int HEX_BASE = 16; + + private static final int BITS_IN_HALF_BYTE = 4; + + private static final char LOW_MASK = 0x0F; + + /** + * Encodes and appends a string to a StringBuilder. + * + * @param buffer The StringBuilder to append to. + * @param unencodedValue The String to encode and append. + * @param reserved characters to encode. + */ + public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) { + final int offset = buffer.length(); + buffer.append(unencodedValue); + encode(buffer, offset, unencodedValue.length(), reserved); + } + + static void appendEncodedRfc2396(final StringBuilder buffer, final String unencodedValue, final char[] allowed) { + final int offset = buffer.length(); + buffer.append(unencodedValue); + encodeRfc2396(buffer, offset, unencodedValue.length(), allowed); + } + + /** + * Canonicalizes a path. + * + * @param buffer Source data. + * @param offset Where to start reading. + * @param length How much to read. + * @param fileNameParser Now to encode and decode. + * @throws FileSystemException If an I/O error occurs. + */ + public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length, + final FileNameParser fileNameParser) throws FileSystemException { + int index = offset; + int count = length; + for (; count > 0; count--, index++) { + final char ch = buffer.charAt(index); + if (ch == '%') { + if (count < 3) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + count)); + } + + // Decode + final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); + final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); + if (dig1 == -1 || dig2 == -1) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + 3)); + } + final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); + + final boolean match = value == '%' || fileNameParser.encodeCharacter(value); + + if (match) { + // this is a reserved character, not allowed to decode + index += 2; + count -= 2; + continue; + } + + // Replace + buffer.setCharAt(index, value); + buffer.delete(index + 1, index + 3); + count -= 2; + } else if (fileNameParser.encodeCharacter(ch)) { + // Encode + final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; + buffer.setCharAt(index, '%'); + buffer.insert(index + 1, digits); + index += 2; + } + } + } + + /** + * Decodes the String. + * + * @param uri The String to decode. + * @throws FileSystemException if an error occurs. + */ + public static void checkUriEncoding(final String uri) throws FileSystemException { + decode(uri); + } + + /** + * Removes %nn encodings from a string. + * + * @param encodedStr The encoded String. + * @return The decoded String. + * @throws FileSystemException if an error occurs. + */ + public static String decode(final String encodedStr) throws FileSystemException { + if (encodedStr == null) { + return null; + } + if (encodedStr.indexOf('%') < 0) { + return encodedStr; + } + final StringBuilder buffer = new StringBuilder(encodedStr); + decode(buffer, 0, buffer.length()); + return buffer.toString(); + } + + /** + * Removes %nn encodings from a string. + * + * @param buffer StringBuilder containing the string to decode. + * @param offset The position in the string to start decoding. + * @param length The number of characters to decode. + * @throws FileSystemException if an error occurs. + */ + public static void decode(final StringBuilder buffer, final int offset, final int length) + throws FileSystemException { + int index = offset; + int count = length; + boolean ipv6Host = false; + for (; count > 0; count--, index++) { + final char ch = buffer.charAt(index); + if (ch == '[' && buffer.substring(0, index).matches("\\w+://([^/]+@)?")) { + ipv6Host = true; + } + if (ch == ']') { + ipv6Host = false; + } + if (ch != '%' || ipv6Host) { + continue; + } + + if (count < 3) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + count)); + } + + // Decode + final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); + final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); + if (dig1 == -1 || dig2 == -1) { + throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", + buffer.substring(index, index + 3)); + } + final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); + + // Replace + buffer.setCharAt(index, value); + buffer.delete(index + 1, index + 3); + count -= 2; + } + } + + /** + * Converts "special" characters to their %nn value. + * + * @param decodedStr The decoded String. + * @return The encoded String. + */ + public static String encode(final String decodedStr) { + return encode(decodedStr, null); + } + + /** + * Converts "special" characters to their %nn value. + * + * @param decodedStr The decoded String. + * @param reserved Characters to encode. + * @return The encoded String + */ + public static String encode(final String decodedStr, final char[] reserved) { + if (decodedStr == null) { + return null; + } + final StringBuilder buffer = new StringBuilder(decodedStr); + encode(buffer, 0, buffer.length(), reserved); + return buffer.toString(); + } + + /** + * Encode an array of Strings. + * + * @param strings The array of Strings to encode. + * @return An array of encoded Strings. + */ + public static String[] encode(final String[] strings) { + if (strings == null) { + return null; + } + Arrays.setAll(strings, i -> encode(strings[i])); + return strings; + } + + /** + * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters. + * + * @param buffer The StringBuilder to append to. + * @param offset The position in the buffer to start encoding at. + * @param length The number of characters to encode. + * @param reserved characters to encode. + */ + public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) { + int index = offset; + int count = length; + for (; count > 0; index++, count--) { + final char ch = buffer.charAt(index); + boolean match = ch == '%'; + if (reserved != null) { + for (int i = 0; !match && i < reserved.length; i++) { + if (ch == reserved[i]) { + match = true; + break; + } + } + } + if (match) { + // Encode + final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; + buffer.setCharAt(index, '%'); + buffer.insert(index + 1, digits); + index += 2; + } + } + } + + static void encodeRfc2396(final StringBuilder buffer, final int offset, final int length, final char[] allowed) { + int index = offset; + int count = length; + for (; count > 0; index++, count--) { + final char ch = buffer.charAt(index); + if (Arrays.binarySearch(allowed, ch) < 0) { + // Encode + final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; + buffer.setCharAt(index, '%'); + buffer.insert(index + 1, digits); + index += 2; + } + } + } + + /** + * Extracts the first element of a path. + * + * @param name StringBuilder containing the path. + * @return The first element of the path. + */ + public static String extractFirstElement(final StringBuilder name) { + final int len = name.length(); + if (len < 1) { + return null; + } + int startPos = 0; + if (name.charAt(0) == SEPARATOR_CHAR) { + startPos = 1; + } + for (int pos = startPos; pos < len; pos++) { + if (name.charAt(pos) == SEPARATOR_CHAR) { + // Found a separator + final String elem = name.substring(startPos, pos); + name.delete(startPos, pos + 1); + return elem; + } + } + + // No separator + final String elem = name.substring(startPos); + name.setLength(0); + return elem; + } + + /** + * Extract the query String from the URI. + * + * @param name StringBuilder containing the URI. + * @return The query string, if any. null otherwise. + */ + public static String extractQueryString(final StringBuilder name) { + for (int pos = 0; pos < name.length(); pos++) { + if (name.charAt(pos) == '?') { + final String queryString = name.substring(pos + 1); + name.delete(pos, name.length()); + return queryString; + } + } + + return null; + } + + /** + * Extracts the scheme from a URI. + * + * @param uri The URI. + * @return The scheme name. Returns null if there is no scheme. + * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. + */ + @Deprecated + public static String extractScheme(final String uri) { + return extractScheme(uri, null); + } + + /** + * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. + * + * @param uri The URI. + * @param buffer Returns the remainder of the URI. + * @return The scheme name. Returns null if there is no scheme. + * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. + */ + @Deprecated + public static String extractScheme(final String uri, final StringBuilder buffer) { + if (buffer != null) { + buffer.setLength(0); + buffer.append(uri); + } + + final int maxPos = uri.length(); + for (int pos = 0; pos < maxPos; pos++) { + final char ch = uri.charAt(pos); + + if (ch == ':') { + // Found the end of the scheme + final String scheme = uri.substring(0, pos); + if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) { + // This is not a scheme, but a Windows drive letter + return null; + } + if (buffer != null) { + buffer.delete(0, pos + 1); + } + return scheme.intern(); + } + + if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') { + // A scheme character + continue; + } + if (!(pos > 0 && (ch >= '0' && ch <= '9' || ch == '+' || ch == '-' || ch == '.'))) { + // Not a scheme character + break; + } + // A scheme character (these are not allowed as the first + // character of the scheme), but can be used as subsequent + // characters. + } + + // No scheme in URI + return null; + } + + /** + * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. + *

+ * The scheme is extracted based on the currently supported schemes in the system. That is to say the schemes + * supported by the registered providers. + *

+ *

+ * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically + * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them. + *

+ * @param schemes The schemes to check. + * @param uri The potential URI. May also be a name. + * @return The scheme name. Returns null if there is no scheme. + * @since 2.3 + */ + public static String extractScheme(final String[] schemes, final String uri) { + return extractScheme(schemes, uri, null); + } + + /** + * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. + *

+ * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes + * supported by the registered providers. + *

+ *

+ * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically + * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them. + *

+ * @param schemes The schemes to check. + * @param uri The potential URI. May also just be a name. + * @param buffer Returns the remainder of the URI. + * @return The scheme name. Returns null if there is no scheme. + * @since 2.3 + */ + public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) { + if (buffer != null) { + buffer.setLength(0); + buffer.append(uri); + } + for (final String scheme : schemes) { + if (uri.startsWith(scheme + ":")) { + if (buffer != null) { + buffer.delete(0, uri.indexOf(':') + 1); + } + return scheme; + } + } + return null; + } + + /** + * Normalises the separators in a name. + * + * @param name The StringBuilder containing the name + * @return true if the StringBuilder was modified. + */ + public static boolean fixSeparators(final StringBuilder name) { + boolean changed = false; + int maxlen = name.length(); + for (int i = 0; i < maxlen; i++) { + final char ch = name.charAt(i); + if (ch == TRANS_SEPARATOR) { + name.setCharAt(i, SEPARATOR_CHAR); + changed = true; + } + if (i < maxlen - 2 && name.charAt(i) == '%' && name.charAt(i + 1) == '2') { + if (name.charAt(i + 2) == 'f' || name.charAt(i + 2) == 'F') { + name.setCharAt(i, SEPARATOR_CHAR); + name.delete(i + 1, i + 3); + maxlen -= 2; + changed = true; + } else if (name.charAt(i + 2) == 'e' || name.charAt(i + 2) == 'E') { + name.setCharAt(i, '.'); + name.delete(i + 1, i + 3); + maxlen -= 2; + changed = true; + } + } + } + return changed; + } + + /** + * Normalises a path. Does the following: + *
    + *
  • Removes empty path elements. + *
  • Handles '.' and '..' elements. + *
  • Removes trailing separator. + *
+ * + * Its assumed that the separators are already fixed. + * + * @param path The path to normalize. + * @return The FileType. + * @throws FileSystemException if an error occurs. + * + * @see #fixSeparators + */ + public static FileType normalisePath(final StringBuilder path) throws FileSystemException { + FileType fileType = FileType.FOLDER; + if (path.length() == 0) { + return fileType; + } + + // '/' or '.' or '..' or anyPath/..' or 'anyPath/.' should always be a path + if (path.charAt(path.length() - 1) != '/' + && path.lastIndexOf("/..") != path.length() - 3 + && path.lastIndexOf("/.") != path.length() - 2 + && path.lastIndexOf("..") != 0 + && path.lastIndexOf(".") != 0 + ) { + fileType = FileType.FILE; + } + + // Adjust separators + // fixSeparators(path); + + // Determine the start of the first element + int startFirstElem = 0; + if (path.charAt(0) == SEPARATOR_CHAR) { + if (path.length() == 1) { + return fileType; + } + startFirstElem = 1; + } + + // Iterate over each element + int startElem = startFirstElem; + int maxlen = path.length(); + while (startElem < maxlen) { + // Find the end of the element + int endElem = startElem; + while (endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR) { + endElem++; + } + + final int elemLen = endElem - startElem; + if (elemLen == 0) { + // An empty element - axe it + path.deleteCharAt(endElem); + maxlen = path.length(); + continue; + } + if (elemLen == 1 && path.charAt(startElem) == '.') { + // A '.' element - axe it + path.deleteCharAt(startElem); + maxlen = path.length(); + continue; + } + if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') { + // A '..' element - remove the previous element + if (startElem == startFirstElem) { + // Previous element is missing + throw new FileSystemException("vfs.provider/invalid-relative-path.error"); + } + + // Find start of previous element + int pos = startElem - 2; + while (pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR) { + pos--; + } + startElem = pos + 1; + + path.delete(startElem, endElem + 1); + maxlen = path.length(); + continue; + } + + // A regular element + startElem = endElem + 1; + } + + // Remove trailing separator + if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { + path.deleteCharAt(maxlen - 1); + } + + return fileType; + } + + private UriParser() { + } +}