diff --git a/Cargo.lock b/Cargo.lock index e65cd02f69..69590bf5f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -428,7 +428,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "http-body-util", - "hyper 1.2.0", + "hyper 1.4.1", "hyper-util", "itoa", "matchit", @@ -1365,6 +1365,7 @@ dependencies = [ "async_zip", "base64 0.22.1", "brotli", + "bytes", "chrono", "criterion", "deltachat-contact-tools", @@ -1380,7 +1381,10 @@ dependencies = [ "futures-lite 2.3.0", "hex", "hickory-resolver", + "http-body-util", "humansize", + "hyper 1.4.1", + "hyper-util", "image", "iroh", "iroh-gossip", @@ -2918,9 +2922,9 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.0" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41cb79eb393015dadd30fc252023adb0b2400a0caee0fa2a077e6e21a551e840" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", @@ -2996,9 +3000,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.2.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186548d73ac615b32a73aafe38fb4f56c0d340e110e5a200bcadbaf2e199263a" +checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" dependencies = [ "bytes", "futures-channel", @@ -3023,7 +3027,7 @@ checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", "http 1.1.0", - "hyper 1.2.0", + "hyper 1.4.1", "hyper-util", "rustls 0.23.10", "rustls-pki-types", @@ -3041,7 +3045,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.2.0", + "hyper 1.4.1", "hyper-util", "native-tls", "tokio", @@ -3051,16 +3055,16 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.3" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" dependencies = [ "bytes", "futures-channel", "futures-util", "http 1.1.0", "http-body 1.0.0", - "hyper 1.2.0", + "hyper 1.4.1", "pin-project-lite", "socket2", "tokio", @@ -3080,7 +3084,7 @@ dependencies = [ "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows-core 0.51.1", + "windows-core 0.52.0", ] [[package]] @@ -3350,7 +3354,7 @@ dependencies = [ "anyhow", "erased_set", "http-body-util", - "hyper 1.2.0", + "hyper 1.4.1", "hyper-util", "once_cell", "prometheus-client", @@ -3388,7 +3392,7 @@ dependencies = [ "hostname", "http 1.1.0", "http-body-util", - "hyper 1.2.0", + "hyper 1.4.1", "hyper-util", "igd-next", "iroh-base", @@ -5299,7 +5303,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "http-body-util", - "hyper 1.2.0", + "hyper 1.4.1", "hyper-rustls", "hyper-tls", "hyper-util", diff --git a/Cargo.toml b/Cargo.toml index 6adcfdef99..2f90ea9dbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ async-smtp = { version = "0.9", default-features = false, features = ["runtime-t async_zip = { version = "0.0.12", default-features = false, features = ["deflate", "fs"] } base64 = { workspace = true } brotli = { version = "6", default-features=false, features = ["std"] } +bytes = "1" chrono = { workspace = true, features = ["alloc", "clock", "std"] } email = { git = "https://github.com/deltachat/rust-email", branch = "master" } encoded-words = { git = "https://github.com/async-email/encoded-words", branch = "master" } @@ -57,7 +58,10 @@ futures = { workspace = true } futures-lite = { workspace = true } hex = "0.4.0" hickory-resolver = "0.24" +http-body-util = "0.1.2" humansize = "2" +hyper = "1" +hyper-util = "0.1.7" image = { version = "0.25.1", default-features=false, features = ["gif", "jpeg", "ico", "png", "pnm", "webp", "bmp"] } iroh_old = { version = "0.4.2", default-features = false, package = "iroh"} iroh-net = { version = "0.22.0", default-features = false } diff --git a/src/net/http.rs b/src/net/http.rs index 4a66c718fb..fc4073d6ce 100644 --- a/src/net/http.rs +++ b/src/net/http.rs @@ -2,12 +2,16 @@ use std::sync::Arc; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, bail, Context as _, Result}; +use http_body_util::BodyExt; +use hyper_util::rt::TokioIo; use mime::Mime; use once_cell::sync::Lazy; use crate::context::Context; use crate::net::lookup_host_with_cache; +use crate::net::session::SessionStream; +use crate::net::tls::wrap_tls; use crate::socks::Socks5Config; static LETSENCRYPT_ROOT: Lazy = Lazy::new(|| { @@ -32,47 +36,77 @@ pub struct Response { /// Retrieves the text contents of URL using HTTP GET request. pub async fn read_url(context: &Context, url: &str) -> Result { - Ok(read_url_inner(context, url).await?.text().await?) + let response = read_url_blob(context, url).await?; + let text = String::from_utf8_lossy(&response.blob); + Ok(text.to_string()) } -/// Retrieves the binary contents of URL using HTTP GET request. -pub async fn read_url_blob(context: &Context, url: &str) -> Result { - let response = read_url_inner(context, url).await?; - let content_type = response - .headers() - .get(reqwest::header::CONTENT_TYPE) - .and_then(|value| value.to_str().ok()) - .and_then(|value| value.parse::().ok()); - let mimetype = content_type - .as_ref() - .map(|mime| mime.essence_str().to_string()); - let encoding = content_type.as_ref().and_then(|mime| { - mime.get_param(mime::CHARSET) - .map(|charset| charset.as_str().to_string()) - }); - let blob: Vec = response.bytes().await?.into(); - Ok(Response { - blob, - mimetype, - encoding, - }) -} +async fn get_http_sender( + context: &Context, + parsed_url: hyper::Uri, +) -> Result> +where + B: hyper::body::Body + 'static + Send, + B::Data: Send, + B::Error: Into>, +{ + let scheme = parsed_url.scheme_str().context("URL has no scheme")?; + let host = parsed_url.host().context("URL has no host")?; + + let stream: Box = match scheme { + "http" => { + let port = parsed_url.port_u16().unwrap_or(80); + + // It is safe to use cached IP addresses + // for HTTPS URLs, but for HTTP URLs + // better resolve from scratch each time to prevent + // cache poisoning attacks from having lasting effects. + let load_cache = false; + let tcp_stream = crate::net::connect_tcp(context, host, port, load_cache).await?; + Box::new(tcp_stream) + } + "https" => { + let port = parsed_url.port_u16().unwrap_or(443); + let load_cache = true; + let tcp_stream = crate::net::connect_tcp(context, host, port, load_cache).await?; + let strict_tls = true; + let tls_stream = wrap_tls(strict_tls, host, &[], tcp_stream).await?; + Box::new(tls_stream) + } + _ => bail!("Unknown URL scheme"), + }; + + let io = TokioIo::new(stream); + let (sender, conn) = hyper::client::conn::http1::handshake(io).await?; + tokio::task::spawn(conn); -async fn read_url_inner(context: &Context, url: &str) -> Result { - // It is safe to use cached IP addresses - // for HTTPS URLs, but for HTTP URLs - // better resolve from scratch each time to prevent - // cache poisoning attacks from having lasting effects. - let load_cache = url.starts_with("https://"); + Ok(sender) +} - let client = get_client(context, load_cache).await?; +/// Retrieves the binary contents of URL using HTTP GET request. +pub async fn read_url_blob(context: &Context, url: &str) -> Result { let mut url = url.to_string(); // Follow up to 10 http-redirects for _i in 0..10 { - let response = client.get(&url).send().await?; + let parsed_url = url + .parse::() + .with_context(|| format!("Failed to parse URL {url:?}"))?; + + let mut sender = get_http_sender(context, parsed_url.clone()).await?; + let authority = parsed_url + .authority() + .context("URL has no authority")? + .clone(); + + let req = hyper::Request::builder() + .uri(parsed_url.path()) + .header(hyper::header::HOST, authority.as_str()) + .body(http_body_util::Empty::::new())?; + let response = sender.send_request(req).await?; + + let headers = response.headers(); if response.status().is_redirection() { - let headers = response.headers(); let header = headers .get_all("location") .iter() @@ -84,7 +118,25 @@ async fn read_url_inner(context: &Context, url: &str) -> Result().ok()); + let mimetype = content_type + .as_ref() + .map(|mime| mime.essence_str().to_string()); + let encoding = content_type.as_ref().and_then(|mime| { + mime.get_param(mime::CHARSET) + .map(|charset| charset.as_str().to_string()) + }); + let body = response.collect().await?.to_bytes(); + let blob: Vec = body.to_vec(); + return Ok(Response { + blob, + mimetype, + encoding, + }); } Err(anyhow!("Followed 10 redirections"))