Skip to content

Commit

Permalink
Merge pull request #2080 from deltachat/fix-filenames
Browse files Browse the repository at this point in the history
fix decoding of continued filenames
  • Loading branch information
r10s authored Dec 2, 2020
2 parents 83df69f + 1321a78 commit d8babe2
Show file tree
Hide file tree
Showing 8 changed files with 564 additions and 40 deletions.
58 changes: 45 additions & 13 deletions src/dc_receive_imf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2718,25 +2718,57 @@ mod tests {
assert_eq!(last_msg.from_id, DC_CONTACT_ID_INFO);
}

#[async_std::test]
async fn test_html_only_mail() {
let t = TestContext::new_alice().await;
t.ctx
async fn load_imf_email(context: &Context, imf_raw: &[u8]) -> Message {
context
.set_config(Config::ShowEmails, Some("2"))
.await
.unwrap();
dc_receive_imf(
dc_receive_imf(&context, imf_raw, "INBOX", 0, false)
.await
.unwrap();
let chats = Chatlist::try_load(&context, 0, None, None).await.unwrap();
let msg_id = chats.get_msg_id(0).unwrap();
Message::load_from_db(&context, msg_id).await.unwrap()
}

#[async_std::test]
async fn test_html_only_mail() {
let t = TestContext::new_alice().await;
let msg = load_imf_email(
&t.ctx,
include_bytes!("../test-data/message/wrong-html.eml"),
"INBOX",
0,
false,
)
.await
.unwrap();
let chats = Chatlist::try_load(&t.ctx, 0, None, None).await.unwrap();
let msg_id = chats.get_msg_id(0).unwrap();
let msg = Message::load_from_db(&t.ctx, msg_id).await.unwrap();
.await;
assert_eq!(msg.text.unwrap(), " Guten Abend, \n\n Lots of text \n\n text with Umlaut ä... \n\n MfG [...]");
}

#[async_std::test]
async fn test_pdf_filename_simple() {
let t = TestContext::new_alice().await;
let msg = load_imf_email(
&t.ctx,
include_bytes!("../test-data/message/pdf_filename_simple.eml"),
)
.await;
assert_eq!(msg.viewtype, Viewtype::File);
assert_eq!(msg.text.unwrap(), "mail body");
assert_eq!(msg.param.get(Param::File).unwrap(), "$BLOBDIR/simple.pdf");
}

#[async_std::test]
async fn test_pdf_filename_continuation() {
// test filenames split across multiple header lines, see rfc 2231
let t = TestContext::new_alice().await;
let msg = load_imf_email(
&t.ctx,
include_bytes!("../test-data/message/pdf_filename_continuation.eml"),
)
.await;
assert_eq!(msg.viewtype, Viewtype::File);
assert_eq!(msg.text.unwrap(), "mail body");
assert_eq!(
msg.param.get(Param::File).unwrap(),
"$BLOBDIR/test pdf äöüß.pdf"
);
}
}
113 changes: 86 additions & 27 deletions src/mimeparser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use crate::param::*;
use crate::peerstate::Peerstate;
use crate::simplify::*;
use crate::stock::StockMessage;
use percent_encoding::percent_decode_str;

/// A parsed MIME message.
///
Expand Down Expand Up @@ -1275,46 +1276,68 @@ fn is_attachment_disposition(mail: &mailparse::ParsedMail<'_>) -> bool {
/// not specified, filename is guessed. If Content-Disposition cannot
/// be parsed, returns an error.
fn get_attachment_filename(mail: &mailparse::ParsedMail) -> Result<Option<String>> {
// try to get file name from
// `Content-Disposition: ... filename*=...`
// or `Content-Disposition: ... filename*0*=... filename*1*=... filename*2*=...`
// or `Content-Disposition: ... filename=...`

let ct = mail.get_content_disposition();

let desired_filename: Option<String> = ct
.params
.iter()
.filter(|(key, _value)| key.starts_with("filename"))
.fold(None, |acc, (_key, value)| {
if let Some(acc) = acc {
Some(acc + value)
} else {
Some(value.to_string())
// try to get file name as "encoded-words" from
// `Content-Disposition: ... filename=...`
let mut desired_filename = ct.params.get("filename").map(|s| s.to_string());

// try to get file name from
// `Content-Disposition: ... filename*0*=... filename*1*=... filename*2*=...`
// encoded as CHARSET'LANG'test%2E%70%64%66 (key ends with `*`)
// or as "encoded-words" (key does not end with `*`)
if desired_filename.is_none() {
let mut apostrophe_encoded = false;
desired_filename = ct
.params
.iter()
.filter(|(key, _value)| key.starts_with("filename"))
.fold(None, |acc, (key, value)| {
if key.ends_with('*') {
apostrophe_encoded = true;
}
if let Some(acc) = acc {
Some(acc + value)
} else {
Some(value.to_string())
}
});
if apostrophe_encoded {
// we're currently always assuming utf-8, this might need adaption, however, should not break things.
if let Some(name) = desired_filename {
desired_filename = if let Some(name) = name.splitn(3, '\'').last() {
Some(percent_decode_str(&name).decode_utf8_lossy().to_string())
} else {
None
}
}
});
}
}

let desired_filename =
desired_filename.or_else(|| ct.params.get("name").map(|s| s.to_string()));
// if no filename is set, try `Content-Disposition: ... name=...`
if desired_filename.is_none() {
desired_filename = ct.params.get("name").map(|s| s.to_string());
}

// MS Outlook is known to specify filename in the "name" attribute of
// Content-Type and omit Content-Disposition.
let desired_filename =
desired_filename.or_else(|| mail.ctype.params.get("name").map(|s| s.to_string()));
if desired_filename.is_none() {
desired_filename = mail.ctype.params.get("name").map(|s| s.to_string());
}

// If there is no filename, but part is an attachment, guess filename
if ct.disposition == DispositionType::Attachment && desired_filename.is_none() {
if desired_filename.is_none() && ct.disposition == DispositionType::Attachment {
if let Some(subtype) = mail.ctype.mimetype.split('/').nth(1) {
Ok(Some(format!("file.{}", subtype,)))
desired_filename = Some(format!("file.{}", subtype,));
} else {
bail!(
"could not determine attachment filename: {:?}",
ct.disposition
);
}
} else {
Ok(desired_filename)
};
}

Ok(desired_filename)
}

/// Returned addresses are normalized and lowercased.
Expand Down Expand Up @@ -1369,6 +1392,7 @@ mod tests {

use super::*;
use crate::test_utils::*;
use mailparse::ParsedMail;

impl AvatarAction {
pub fn is_change(&self) -> bool {
Expand Down Expand Up @@ -1438,18 +1462,53 @@ mod tests {
assert!(is_attachment_disposition(&mail.subparts[1]));
}

#[test]
fn test_get_attachment_filename() {
let raw = include_bytes!("../test-data/message/html_attach.eml");
fn load_mail_with_attachment(raw: &[u8]) -> ParsedMail {
let mail = mailparse::parse_mail(raw).unwrap();
assert!(get_attachment_filename(&mail).unwrap().is_none());
assert!(get_attachment_filename(&mail.subparts[0])
.unwrap()
.is_none());
mail
}

#[test]
fn test_get_attachment_filename() {
let mail = load_mail_with_attachment(include_bytes!(
"../test-data/message/attach_filename_simple.eml"
));
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
assert_eq!(filename, Some("test.html".to_string()))
}

#[test]
fn test_get_attachment_filename_encoded_words() {
let mail = load_mail_with_attachment(include_bytes!(
"../test-data/message/attach_filename_encoded_words.eml"
));
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string()))
}

#[test]
fn test_get_attachment_filename_encoded_words_cont() {
// test continued encoded-words and also test apostropes work that way
let mail = load_mail_with_attachment(include_bytes!(
"../test-data/message/attach_filename_encoded_words_cont.eml"
));
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
assert_eq!(filename, Some("Maßn'ah'men Okt. 2020.html".to_string()))
}

#[test]
fn test_get_attachment_filename_combined() {
// test that if `filename` and `filename*0` are given, the filename is not doubled
let mail = load_mail_with_attachment(include_bytes!(
"../test-data/message/attach_filename_combined.eml"
));
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string()))
}

#[test]
fn test_mailparse_content_type() {
let ctype =
Expand Down
31 changes: 31 additions & 0 deletions test-data/message/attach_filename_combined.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Subject: Test combined filenames
Message-ID: [email protected]
Date: Sat, 07 Dec 2019 19:00:27 +0000
X-Mailer: Kopano 8.7.16
To: [email protected]
From: [email protected]
Content-Type: multipart/mixed; boundary="NwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z"


--NwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/plain; charset=utf-8
yip, filename may be given twice,
seen this way in Kopano.
normally, both filenames will be the same,
for the test, we use different filenames,
we define that `filename` has a higher precedence.
--
Sent with my Delta Chat Messenger: https://delta.chat

--NwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/html
Content-Disposition: attachment; filename="=?utf-8?Q?Ma=C3=9Fnahmen_Okt=2E_2020=2Ehtml?=";
filename*=utf-8''Ma%C3%9Fnahmen%20Okt.%202020.ignored.html
Content-Transfer-Encoding: base64

PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh

--NwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z--
23 changes: 23 additions & 0 deletions test-data/message/attach_filename_encoded_words.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Subject: Test encoded-words filenames
Message-ID: [email protected]
Date: Sat, 07 Dec 2019 19:00:27 +0000
Chat-Version: 1.0
To: [email protected]
From: [email protected]
Content-Type: multipart/mixed; boundary="mwkNRwabw1M5n2xcr2ODfAqvTjcj9Z"


--mwkNRwabw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/plain; charset=utf-8
--
Sent with my Delta Chat Messenger: https://delta.chat

--mwkNRwabw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/html
Content-Disposition: attachment; filename="=?utf-8?Q?Ma=C3=9Fnahmen_Okt=2E_2020=2Ehtml?=";
Content-Transfer-Encoding: base64

PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh

--mwkNRwabw1M5n2xcr2ODfAqvTjcj9Z--
25 changes: 25 additions & 0 deletions test-data/message/attach_filename_encoded_words_cont.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Subject: Test encoded-words filenames, spanning different lines
Message-ID: [email protected]
Date: Sat, 07 Dec 2019 19:00:27 +0000
X-Mailer: Delta Chat Core 1.0.0-beta.12/DcFFI
Chat-Version: 1.0
To: [email protected]
From: [email protected]
Content-Type: multipart/mixed; boundary="maaNRwaJw1M5n2xcr2ODfAqvTjcj9Z"


--maaNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/plain; charset=utf-8
--
Sent with my Delta Chat Messenger: https://delta.chat

--maaNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/html
Content-Disposition: attachment; filename*0="=?utf-8?Q?Ma=C3=9Fn'ah'men_?=";
filename*1="=?utf-8?Q?Okt=2E_2020=2Ehtml?=";
Content-Transfer-Encoding: base64

PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh

--maaNRwaJw1M5n2xcr2ODfAqvTjcj9Z--
File renamed without changes.
Loading

0 comments on commit d8babe2

Please sign in to comment.