Skip to content

Commit 3f82781

Browse files
Better COFF String Detection and Big Endian Wide Strings (#321)
* Better COFF and Big Endian Strings * Cargo Check and Format Fixes * Rework DataType::String in display_literals --------- Co-authored-by: Luke Street <luke@street.dev>
1 parent 926dd06 commit 3f82781

2 files changed

Lines changed: 33 additions & 18 deletions

File tree

objdiff-core/src/arch/mod.rs

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -174,23 +174,36 @@ impl DataType {
174174
strs.push((format!("{bytes:#?}"), None, None));
175175
}
176176
DataType::String => {
177-
if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') {
178-
let str_bytes = &bytes[..nul_idx];
179-
// Special case to display (ASCII) as the label for ASCII-only strings.
180-
let (cow, _, had_errors) = encoding_rs::UTF_8.decode(str_bytes);
181-
if !had_errors && cow.is_ascii() {
182-
let string = format!("{cow}");
183-
let copy_string = escape_special_ascii_characters(string.clone());
184-
strs.push((string, Some("ASCII".into()), Some(copy_string)));
177+
// Special case to display (ASCII) as the label for ASCII-only strings.
178+
let mut is_ascii = false;
179+
if bytes.is_ascii()
180+
&& let Ok(str) = str::from_utf8(bytes)
181+
{
182+
let trimmed = str.trim_end_matches('\0');
183+
if !trimmed.is_empty() {
184+
let copy_string = escape_special_ascii_characters(trimmed);
185+
strs.push((trimmed.to_string(), Some("ASCII".into()), Some(copy_string)));
186+
is_ascii = true;
185187
}
186-
for (encoding, encoding_name) in SUPPORTED_ENCODINGS {
187-
let (cow, _, had_errors) = encoding.decode(str_bytes);
188-
// Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible.
189-
if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) {
190-
let string = format!("{cow}");
191-
let copy_string = escape_special_ascii_characters(string.clone());
192-
strs.push((string, Some(encoding_name.into()), Some(copy_string)));
193-
}
188+
}
189+
190+
for (encoding, encoding_name) in SUPPORTED_ENCODINGS {
191+
// Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible.
192+
if is_ascii && encoding.is_ascii_compatible() {
193+
continue;
194+
}
195+
let (cow, _, had_errors) = encoding.decode(bytes);
196+
if had_errors {
197+
continue;
198+
}
199+
let trimmed = cow.trim_end_matches('\0');
200+
if !trimmed.is_empty() {
201+
let copy_string = escape_special_ascii_characters(trimmed);
202+
strs.push((
203+
trimmed.to_string(),
204+
Some(encoding_name.into()),
205+
Some(copy_string),
206+
));
194207
}
195208
}
196209
}
@@ -508,7 +521,7 @@ pub struct RelocationOverride {
508521

509522
/// Escape ASCII characters such as \n or \t, but not Unicode characters such as \u{3000}.
510523
/// Suitable for copying to clipboard.
511-
fn escape_special_ascii_characters(value: String) -> String {
524+
fn escape_special_ascii_characters(value: &str) -> String {
512525
let mut escaped = String::new();
513526
escaped.push('"');
514527
for c in value.chars() {

objdiff-core/src/arch/ppc/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,9 @@ impl Arch for ArchPpc {
361361
}
362362

363363
fn guess_data_type(&self, resolved: ResolvedInstructionRef, bytes: &[u8]) -> Option<DataType> {
364-
if resolved.relocation.is_some_and(|r| r.symbol.name.starts_with("@stringBase")) {
364+
if resolved.relocation.is_some_and(|r| {
365+
r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG")
366+
}) {
365367
// Pooled string.
366368
return Some(DataType::String);
367369
}

0 commit comments

Comments
 (0)