@@ -62,6 +62,7 @@ fn is_token(b: u8) -> bool {
62
62
// ASCII codes to accept URI string.
63
63
// i.e. A-Z a-z 0-9 !#$%&'*+-._();:@=,/?[]~^
64
64
// TODO: Make a stricter checking for URI string?
65
+ #[ cfg( not( feature = "utf8_in_path" ) ) ]
65
66
static URI_MAP : [ bool ; 256 ] = byte_map ! [
66
67
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
67
68
// \0 \n
@@ -90,7 +91,8 @@ static URI_MAP: [bool; 256] = byte_map![
90
91
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
91
92
] ;
92
93
93
- static URI_NON_COMPLIANT_MAP : [ bool ; 256 ] = byte_map ! [
94
+ #[ cfg( feature = "utf8_in_path" ) ]
95
+ static URI_MAP : [ bool ; 256 ] = byte_map ! [
94
96
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
95
97
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
96
98
0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -110,12 +112,8 @@ static URI_NON_COMPLIANT_MAP: [bool; 256] = byte_map![
110
112
] ;
111
113
112
114
#[ inline]
113
- pub ( crate ) fn is_uri_token ( b : u8 , allow_non_compliant : bool ) -> bool {
114
- if allow_non_compliant {
115
- URI_NON_COMPLIANT_MAP [ b as usize ]
116
- } else {
117
- URI_MAP [ b as usize ]
118
- }
115
+ pub ( crate ) fn is_uri_token ( b : u8 ) -> bool {
116
+ URI_MAP [ b as usize ]
119
117
}
120
118
121
119
static HEADER_NAME_MAP : [ bool ; 256 ] = byte_map ! [
@@ -184,6 +182,9 @@ pub enum Error {
184
182
TooManyHeaders ,
185
183
/// Invalid byte in HTTP version.
186
184
Version ,
185
+ #[ cfg( feature = "utf8_in_path" ) ]
186
+ /// Invalid UTF-8 in path.
187
+ Utf8Error ,
187
188
}
188
189
189
190
impl Error {
@@ -197,6 +198,8 @@ impl Error {
197
198
Error :: Token => "invalid token" ,
198
199
Error :: TooManyHeaders => "too many headers" ,
199
200
Error :: Version => "invalid HTTP version" ,
201
+ #[ cfg( feature = "utf8_in_path" ) ]
202
+ Error :: Utf8Error => "invalid UTF-8 in path" ,
200
203
}
201
204
}
202
205
}
@@ -283,7 +286,6 @@ pub struct ParserConfig {
283
286
allow_multiple_spaces_in_request_line_delimiters : bool ,
284
287
allow_multiple_spaces_in_response_status_delimiters : bool ,
285
288
allow_space_before_first_header_name : bool ,
286
- allow_rfc3986_non_compliant_path : bool ,
287
289
ignore_invalid_headers_in_responses : bool ,
288
290
ignore_invalid_headers_in_requests : bool ,
289
291
}
@@ -563,7 +565,7 @@ impl<'h, 'b> Request<'h, 'b> {
563
565
if config. allow_multiple_spaces_in_request_line_delimiters {
564
566
complete ! ( skip_spaces( & mut bytes) ) ;
565
567
}
566
- self . path = Some ( complete ! ( parse_uri( & mut bytes, config . allow_rfc3986_non_compliant_path ) ) ) ;
568
+ self . path = Some ( complete ! ( parse_uri( & mut bytes) ) ) ;
567
569
if config. allow_multiple_spaces_in_request_line_delimiters {
568
570
complete ! ( skip_spaces( & mut bytes) ) ;
569
571
}
@@ -976,9 +978,9 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
976
978
#[ doc( hidden) ]
977
979
#[ allow( missing_docs) ]
978
980
// WARNING: Exported for internal benchmarks, not fit for public consumption
979
- pub fn parse_uri < ' a > ( bytes : & mut Bytes < ' a > , allow_non_compliant : bool ) -> Result < & ' a str > {
981
+ pub fn parse_uri < ' a > ( bytes : & mut Bytes < ' a > ) -> Result < & ' a str > {
980
982
let start = bytes. pos ( ) ;
981
- simd:: match_uri_vectored ( bytes, allow_non_compliant ) ;
983
+ simd:: match_uri_vectored ( bytes) ;
982
984
let end = bytes. pos ( ) ;
983
985
984
986
if next ! ( bytes) == b' ' {
@@ -987,6 +989,14 @@ pub fn parse_uri<'a>(bytes: &mut Bytes<'a>, allow_non_compliant: bool) -> Result
987
989
return Err ( Error :: Token ) ;
988
990
}
989
991
992
+ #[ cfg( feature = "utf8_in_path" ) ]
993
+ // SAFETY: all bytes up till `i` must have been `is_token` and therefore also utf-8.
994
+ return match str:: from_utf8 ( unsafe { bytes. slice_skip ( 1 ) } ) {
995
+ Ok ( uri) => Ok ( Status :: Complete ( uri) ) ,
996
+ Err ( _) => Err ( Error :: Utf8Error ) ,
997
+ } ;
998
+
999
+ #[ cfg( not( feature = "utf8_in_path" ) ) ]
990
1000
return Ok ( Status :: Complete (
991
1001
// SAFETY: all bytes up till `i` must have been `is_token` and therefore also utf-8.
992
1002
unsafe { str:: from_utf8_unchecked ( bytes. slice_skip ( 1 ) ) } ,
@@ -2077,7 +2087,7 @@ mod tests {
2077
2087
assert_eq ! ( parse_chunk_size( b"567f8a\r foo" ) , Err ( crate :: InvalidChunkSize ) ) ;
2078
2088
assert_eq ! ( parse_chunk_size( b"567f8a\r foo" ) , Err ( crate :: InvalidChunkSize ) ) ;
2079
2089
assert_eq ! ( parse_chunk_size( b"567xf8a\r \n " ) , Err ( crate :: InvalidChunkSize ) ) ;
2080
- assert_eq ! ( parse_chunk_size( b"ffffffffffffffff\r \n " ) , Ok ( Status :: Complete ( ( 18 , std :: u64 :: MAX ) ) ) ) ;
2090
+ assert_eq ! ( parse_chunk_size( b"ffffffffffffffff\r \n " ) , Ok ( Status :: Complete ( ( 18 , u64 :: MAX ) ) ) ) ;
2081
2091
assert_eq ! ( parse_chunk_size( b"1ffffffffffffffff\r \n " ) , Err ( crate :: InvalidChunkSize ) ) ;
2082
2092
assert_eq ! ( parse_chunk_size( b"Affffffffffffffff\r \n " ) , Err ( crate :: InvalidChunkSize ) ) ;
2083
2093
assert_eq ! ( parse_chunk_size( b"fffffffffffffffff\r \n " ) , Err ( crate :: InvalidChunkSize ) ) ;
@@ -2185,7 +2195,7 @@ mod tests {
2185
2195
assert_eq ! ( result, Err ( crate :: Error :: Token ) ) ;
2186
2196
}
2187
2197
2188
- static REQUEST_WITH_MULTIPLE_SPACES_AND_BAD_PATH : & [ u8 ] = b"GET /foo> ohno HTTP/1.1\r \n \r \n " ;
2198
+ static REQUEST_WITH_MULTIPLE_SPACES_AND_BAD_PATH : & [ u8 ] = b"GET /foo ohno HTTP/1.1\r \n \r \n " ;
2189
2199
2190
2200
#[ test]
2191
2201
fn test_request_with_multiple_spaces_and_bad_path ( ) {
@@ -2194,7 +2204,7 @@ mod tests {
2194
2204
let result = crate :: ParserConfig :: default ( )
2195
2205
. allow_multiple_spaces_in_request_line_delimiters ( true )
2196
2206
. parse_request ( & mut request, REQUEST_WITH_MULTIPLE_SPACES_AND_BAD_PATH ) ;
2197
- assert_eq ! ( result, Err ( crate :: Error :: Token ) ) ;
2207
+ assert_eq ! ( result, Err ( crate :: Error :: Version ) ) ;
2198
2208
}
2199
2209
2200
2210
static RESPONSE_WITH_SPACES_IN_CODE : & [ u8 ] = b"HTTP/1.1 99 200 OK\r \n \r \n " ;
@@ -2702,7 +2712,8 @@ mod tests {
2702
2712
}
2703
2713
2704
2714
#[ test]
2705
- fn test_rfc3986_non_compliant_path_ko ( ) {
2715
+ #[ cfg( not( feature = "utf8_in_path" ) ) ]
2716
+ fn test_utf8_in_path_ko ( ) {
2706
2717
let mut headers = [ EMPTY_HEADER ; 1 ] ;
2707
2718
let mut request = Request :: new ( & mut headers[ ..] ) ;
2708
2719
@@ -2712,13 +2723,12 @@ mod tests {
2712
2723
}
2713
2724
2714
2725
#[ test]
2715
- fn test_rfc3986_non_compliant_path_ok ( ) {
2726
+ #[ cfg( feature = "utf8_in_path" ) ]
2727
+ fn test_utf8_in_path_ok ( ) {
2716
2728
let mut headers = [ EMPTY_HEADER ; 1 ] ;
2717
2729
let mut request = Request :: new ( & mut headers[ ..] ) ;
2718
- let mut config = crate :: ParserConfig :: default ( ) ;
2719
- config. allow_rfc3986_non_compliant_path = true ;
2720
2730
2721
- let result = config . parse_request ( & mut request, b"GET /test?post=I\xE2 \x80 \x99 msorryIforkedyou HTTP/1.1\r \n Host: example.org\r \n \r \n " ) ;
2731
+ let result = crate :: ParserConfig :: default ( ) . parse_request ( & mut request, b"GET /test?post=I\xE2 \x80 \x99 msorryIforkedyou HTTP/1.1\r \n Host: example.org\r \n \r \n " ) ;
2722
2732
2723
2733
assert_eq ! ( result, Ok ( Status :: Complete ( 67 ) ) ) ;
2724
2734
assert_eq ! ( request. version. unwrap( ) , 1 ) ;
@@ -2728,4 +2738,15 @@ mod tests {
2728
2738
assert_eq ! ( request. headers[ 0 ] . name, "Host" ) ;
2729
2739
assert_eq ! ( request. headers[ 0 ] . value, & b"example.org" [ ..] ) ;
2730
2740
}
2741
+
2742
+ #[ test]
2743
+ #[ cfg( feature = "utf8_in_path" ) ]
2744
+ fn test_bad_utf8_in_path ( ) {
2745
+ let mut headers = [ EMPTY_HEADER ; 1 ] ;
2746
+ let mut request = Request :: new ( & mut headers[ ..] ) ;
2747
+
2748
+ let result = crate :: ParserConfig :: default ( ) . parse_request ( & mut request, b"GET /test?post=I\xE2 msorryIforkedyou HTTP/1.1\r \n Host: example.org\r \n \r \n " ) ;
2749
+
2750
+ assert_eq ! ( result, Err ( crate :: Error :: Utf8Error ) ) ;
2751
+ }
2731
2752
}
0 commit comments