Skip to content

Commit c61b9cb

Browse files
fix(gmail): RFC 2047 encode non-ASCII display names in address headers (#482)
Add encode_address_header() that parses mailbox lists, RFC 2047 encodes only the display-name portion of non-ASCII addresses, and leaves email addresses untouched. Applied to all 4 address headers (To, From, Cc, Bcc) in MessageBuilder::build(). Previously, only Subject got RFC 2047 encoding while address headers only got CRLF sanitization, causing mojibake for non-ASCII names. Supersedes #405, #458, #469. Closes #404. Co-authored-by: jpoehnelt-bot <jpoehnelt-bot@users.noreply.github.com>
1 parent 47afe5f commit c61b9cb

File tree

2 files changed

+251
-4
lines changed

2 files changed

+251
-4
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
"@googleworkspace/cli": patch
3+
---
4+
5+
fix(gmail): RFC 2047 encode non-ASCII display names in To/From/Cc/Bcc headers
6+
7+
Fixes mojibake when sending emails to recipients with non-ASCII display names (e.g. Japanese, Spanish accented characters). The new `encode_address_header()` function parses mailbox lists, encodes only the display-name portion via RFC 2047 Base64, and leaves email addresses untouched.

src/helpers/gmail/mod.rs

Lines changed: 244 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,56 @@ pub(super) fn encode_header_value(value: &str) -> String {
448448
encoded_words.join("\r\n ")
449449
}
450450

451+
/// RFC 2047 encode non-ASCII display names in an address header value.
452+
///
453+
/// Parses a comma-separated mailbox list (e.g. `"José <j@ex.com>, alice@ex.com"`),
454+
/// encodes only the display-name portion of each mailbox if it contains
455+
/// non-ASCII characters, and leaves email addresses untouched.
456+
///
457+
/// Examples:
458+
/// - `"alice@example.com"` → `"alice@example.com"` (bare email, unchanged)
459+
/// - `"José García <jose@example.com>"` → `"=?UTF-8?B?...?= <jose@example.com>"`
460+
/// - `"Alice <a@ex.com>, José <j@ex.com>"` → `"Alice <a@ex.com>, =?UTF-8?B?...?= <j@ex.com>"`
461+
pub(super) fn encode_address_header(value: &str) -> String {
462+
/// Strip all ASCII control characters (C0 range 0x00–0x1F plus DEL 0x7F)
463+
/// from a parsed component. This is defense-in-depth beyond the caller's
464+
/// `sanitize_header_value` which only strips CR/LF.
465+
fn sanitize_component(s: &str) -> String {
466+
s.chars().filter(|c| !c.is_ascii_control()).collect()
467+
}
468+
469+
let mailboxes = split_mailbox_list(value);
470+
let encoded: Vec<String> = mailboxes
471+
.into_iter()
472+
.map(|mailbox| {
473+
let email = sanitize_component(extract_email(mailbox));
474+
let display = sanitize_component(extract_display_name(mailbox));
475+
476+
// Bare email address — no display name to encode.
477+
// Only keep characters valid in email addresses to strip any
478+
// residual injection data glued by CRLF stripping.
479+
if email == display {
480+
return email
481+
.chars()
482+
.take_while(|c| c.is_alphanumeric() || "@._-+%".contains(*c))
483+
.collect();
484+
}
485+
486+
// Non-ASCII display name — RFC 2047 encode it
487+
if !display.is_ascii() {
488+
let encoded_name = encode_header_value(&display);
489+
return format!("{} <{}>", encoded_name, email);
490+
}
491+
492+
// ASCII display name — reconstruct from parsed components
493+
// to strip any potential residual injection data.
494+
format!("{} <{}>", display, email)
495+
})
496+
.collect();
497+
498+
encoded.join(", ")
499+
}
500+
451501
/// In-Reply-To and References values for threading a reply or forward.
452502
#[derive(Clone, Copy)]
453503
pub(super) struct ThreadingHeaders<'a> {
@@ -482,7 +532,7 @@ impl MessageBuilder<'_> {
482532

483533
let mut headers = format!(
484534
"To: {}\r\nSubject: {}",
485-
sanitize_header_value(self.to),
535+
encode_address_header(&sanitize_header_value(self.to)),
486536
// Sanitize first: stripping CRLF before encoding prevents injection
487537
// in encoded-words.
488538
encode_header_value(&sanitize_header_value(self.subject)),
@@ -506,17 +556,26 @@ impl MessageBuilder<'_> {
506556
));
507557

508558
if let Some(from) = self.from {
509-
headers.push_str(&format!("\r\nFrom: {}", sanitize_header_value(from)));
559+
headers.push_str(&format!(
560+
"\r\nFrom: {}",
561+
encode_address_header(&sanitize_header_value(from))
562+
));
510563
}
511564

512565
if let Some(cc) = self.cc {
513-
headers.push_str(&format!("\r\nCc: {}", sanitize_header_value(cc)));
566+
headers.push_str(&format!(
567+
"\r\nCc: {}",
568+
encode_address_header(&sanitize_header_value(cc))
569+
));
514570
}
515571

516572
// The Gmail API reads the Bcc header to route to those recipients,
517573
// then strips it before delivery.
518574
if let Some(bcc) = self.bcc {
519-
headers.push_str(&format!("\r\nBcc: {}", sanitize_header_value(bcc)));
575+
headers.push_str(&format!(
576+
"\r\nBcc: {}",
577+
encode_address_header(&sanitize_header_value(bcc))
578+
));
520579
}
521580

522581
format!("{}\r\n\r\n{}", headers, body)
@@ -1249,6 +1308,187 @@ mod tests {
12491308
}
12501309
}
12511310

1311+
#[test]
1312+
fn test_encode_address_header_bare_email() {
1313+
assert_eq!(
1314+
encode_address_header("alice@example.com"),
1315+
"alice@example.com"
1316+
);
1317+
}
1318+
1319+
#[test]
1320+
fn test_encode_address_header_ascii_display_name() {
1321+
let input = "Alice Smith <alice@example.com>";
1322+
assert_eq!(encode_address_header(input), input);
1323+
}
1324+
1325+
#[test]
1326+
fn test_encode_address_header_non_ascii_display_name() {
1327+
let encoded = encode_address_header("José García <jose@example.com>");
1328+
assert!(
1329+
encoded.contains("=?UTF-8?B?"),
1330+
"Should contain encoded-word: {encoded}"
1331+
);
1332+
assert!(
1333+
encoded.contains("<jose@example.com>"),
1334+
"Email should be preserved: {encoded}"
1335+
);
1336+
assert!(
1337+
!encoded.contains("José"),
1338+
"Raw non-ASCII should not appear: {encoded}"
1339+
);
1340+
}
1341+
1342+
#[test]
1343+
fn test_encode_address_header_multiple_mixed() {
1344+
let input = "Alice <alice@example.com>, José <jose@example.com>";
1345+
let encoded = encode_address_header(input);
1346+
assert!(
1347+
encoded.starts_with("Alice <alice@example.com>, "),
1348+
"ASCII address should be unchanged: {encoded}"
1349+
);
1350+
assert!(
1351+
encoded.contains("=?UTF-8?B?"),
1352+
"Non-ASCII name should be encoded: {encoded}"
1353+
);
1354+
assert!(
1355+
encoded.contains("<jose@example.com>"),
1356+
"Email should be preserved: {encoded}"
1357+
);
1358+
}
1359+
1360+
#[test]
1361+
fn test_encode_address_header_quoted_non_ascii() {
1362+
let encoded = encode_address_header("\"下野祐太\" <shimono@example.com>");
1363+
assert!(
1364+
encoded.contains("=?UTF-8?B?"),
1365+
"Should contain encoded-word: {encoded}"
1366+
);
1367+
assert!(
1368+
encoded.contains("<shimono@example.com>"),
1369+
"Email should be preserved: {encoded}"
1370+
);
1371+
}
1372+
1373+
#[test]
1374+
fn test_message_builder_non_ascii_address_headers() {
1375+
let raw = MessageBuilder {
1376+
to: "José <jose@example.com>",
1377+
subject: "Test",
1378+
from: Some("田中太郎 <tanaka@example.com>"),
1379+
cc: Some("Ñoño <nono@example.com>"),
1380+
bcc: Some("Ünsal <unsal@example.com>"),
1381+
threading: None,
1382+
html: false,
1383+
}
1384+
.build("body");
1385+
1386+
// To header should have encoded display name
1387+
assert!(
1388+
raw.contains("To: =?UTF-8?B?"),
1389+
"To should be RFC 2047 encoded: {raw}"
1390+
);
1391+
// From header should have encoded display name
1392+
assert!(
1393+
raw.contains("From: =?UTF-8?B?"),
1394+
"From should be RFC 2047 encoded: {raw}"
1395+
);
1396+
// Cc header should have encoded display name
1397+
assert!(
1398+
raw.contains("Cc: =?UTF-8?B?"),
1399+
"Cc should be RFC 2047 encoded: {raw}"
1400+
);
1401+
// Bcc header should have encoded display name
1402+
assert!(
1403+
raw.contains("Bcc: =?UTF-8?B?"),
1404+
"Bcc should be RFC 2047 encoded: {raw}"
1405+
);
1406+
// Email addresses should be untouched
1407+
assert!(raw.contains("<jose@example.com>"));
1408+
assert!(raw.contains("<tanaka@example.com>"));
1409+
assert!(raw.contains("<nono@example.com>"));
1410+
assert!(raw.contains("<unsal@example.com>"));
1411+
}
1412+
1413+
#[test]
1414+
fn test_encode_address_header_strips_trailing_garbage() {
1415+
// After sanitize_header_value strips \r\n, residual Bcc: header text
1416+
// would remain appended. Reconstruction must drop it.
1417+
let sanitized = sanitize_header_value("Alice <a@ex.com>\r\nBcc: evil@ex.com");
1418+
let encoded = encode_address_header(&sanitized);
1419+
assert!(
1420+
!encoded.contains("evil"),
1421+
"Trailing injection data should be stripped: {encoded}"
1422+
);
1423+
assert!(
1424+
encoded.contains("<a@ex.com>"),
1425+
"Original email should be preserved: {encoded}"
1426+
);
1427+
}
1428+
1429+
#[test]
1430+
fn test_encode_address_header_strips_null_bytes() {
1431+
let encoded = encode_address_header("Alice\0Bob <alice@ex.com>");
1432+
assert!(
1433+
!encoded.contains('\0'),
1434+
"Null bytes should be stripped: {encoded:?}"
1435+
);
1436+
assert!(encoded.contains("AliceBob"));
1437+
assert!(encoded.contains("<alice@ex.com>"));
1438+
}
1439+
1440+
#[test]
1441+
fn test_encode_address_header_strips_tab_in_email() {
1442+
let encoded = encode_address_header("alice\t@ex.com");
1443+
assert!(
1444+
!encoded.contains('\t'),
1445+
"Tab should be stripped: {encoded:?}"
1446+
);
1447+
}
1448+
1449+
#[test]
1450+
fn test_encode_address_header_injection_bare_email() {
1451+
// Bare email with injection attempt after sanitize strips CRLF.
1452+
// "Bcc" letters are valid email chars, but the colon is not.
1453+
let sanitized = sanitize_header_value("foo@bar.com\r\nBcc: evil@ex.com");
1454+
let encoded = encode_address_header(&sanitized);
1455+
assert!(
1456+
!encoded.contains("evil"),
1457+
"Injection in bare email should be stripped: {encoded}"
1458+
);
1459+
}
1460+
1461+
#[test]
1462+
fn test_encode_address_header_injection_bare_email_no_space() {
1463+
// No space between address and injected header (Bcc:evil)
1464+
let sanitized = sanitize_header_value("foo@bar.com\r\nBcc:evil@ex.com");
1465+
let encoded = encode_address_header(&sanitized);
1466+
assert!(
1467+
!encoded.contains("evil"),
1468+
"No-space injection should be stripped: {encoded}"
1469+
);
1470+
assert_eq!(encoded, "foo@bar.comBcc");
1471+
}
1472+
1473+
#[test]
1474+
fn test_encode_address_header_injection_angle_brackets_in_bare_email() {
1475+
// When angle brackets are injected into a bare email, extract_email
1476+
// parses the angle brackets and takes the non-bare reconstruction
1477+
// path. This is safe from header injection (no CRLF = one header
1478+
// line). The email changes but the original was already corrupted.
1479+
let sanitized = sanitize_header_value("foo@bar.com\r\n<evil@ex.com>");
1480+
let encoded = encode_address_header(&sanitized);
1481+
// Takes the Name <email> reconstruction path — not a header injection
1482+
assert!(encoded.contains("<evil@ex.com>"));
1483+
assert!(!encoded.contains('\r'));
1484+
assert!(!encoded.contains('\n'));
1485+
}
1486+
1487+
#[test]
1488+
fn test_encode_address_header_empty_input() {
1489+
assert_eq!(encode_address_header(""), "");
1490+
}
1491+
12521492
#[test]
12531493
fn test_message_builder_basic() {
12541494
let raw = MessageBuilder {

0 commit comments

Comments
 (0)