-
Notifications
You must be signed in to change notification settings - Fork 532
Fix #937: Add virtual address mapping for Go and Rust strings #1220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
5b67c81
4162748
de2fccf
5d1720b
7901e48
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,7 +37,9 @@ | |
|
|
||
|
|
||
| def fix_b2s_wide_strings( | ||
| strings: List[Tuple[str, str, Tuple[int, int], bool]], min_length: int, buffer: bytes | ||
| strings: List[Tuple[str, str, Tuple[int, int], bool]], | ||
| min_length: int, | ||
| buffer: bytes, | ||
| ) -> List[Tuple[str, str, Tuple[int, int], bool]]: | ||
| # TODO(mr-tz): b2s may parse wide strings where there really should be utf-8 strings | ||
| # handle special cases here until fixed | ||
|
|
@@ -74,48 +76,72 @@ def fix_b2s_wide_strings( | |
| def filter_and_transform_utf8_strings( | ||
| strings: List[Tuple[str, str, Tuple[int, int], bool]], | ||
| start_rdata: int, | ||
| image_base: int, | ||
| virtual_address: int, | ||
| ) -> List[StaticString]: | ||
| transformed_strings = [] | ||
|
|
||
| for string in strings: | ||
| s = string[0] | ||
| string_type = string[1] | ||
|
|
||
| # Calculate file offset | ||
| start = string[2][0] + start_rdata | ||
|
|
||
| # Calculate memory address (VA) | ||
| address = image_base + virtual_address + string[2][0] | ||
|
|
||
| if string_type != "UTF8": | ||
| continue | ||
|
|
||
| # our static algorithm does not extract new lines either | ||
| # FLOSS logic: remove new lines | ||
| s = s.replace("\n", "") | ||
| transformed_strings.append(StaticString(string=s, offset=start, encoding=StringEncoding.UTF8)) | ||
|
|
||
| return transformed_strings | ||
| # We pass the calculated address here | ||
| transformed_strings.append( | ||
| StaticString( | ||
| string=s, offset=start, encoding=StringEncoding.UTF8, address=address | ||
| ) | ||
| ) | ||
|
|
||
| return transformed_strings | ||
|
|
||
| def split_strings(static_strings: List[StaticString], address: int, min_length: int) -> None: | ||
| """ | ||
| if address is in between start and end of a string in ref data then split the string | ||
| this modifies the elements of the static strings list directly | ||
| """ | ||
|
|
||
| def split_strings( | ||
| static_strings: List[StaticString], address: int, min_length: int | ||
| ) -> None: | ||
| for string in static_strings: | ||
| if string.offset < address < string.offset + len(string.string): | ||
| rust_string = string.string[0 : address - string.offset] | ||
| rest = string.string[address - string.offset :] | ||
|
|
||
| if len(rust_string) >= min_length: | ||
| # Part 1: Keeps the original base address | ||
| static_strings.append( | ||
| StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8) | ||
| StaticString( | ||
| string=rust_string, | ||
| offset=string.offset, | ||
| encoding=StringEncoding.UTF8, | ||
| address=string.address, | ||
| ) | ||
| ) | ||
| if len(rest) >= min_length: | ||
| static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8)) | ||
| # Part 2: Calculate the new VA for the split point | ||
| va_at_split = string.address + (address - string.offset) | ||
| static_strings.append( | ||
| StaticString( | ||
| string=rest, | ||
| offset=address, | ||
| encoding=StringEncoding.UTF8, | ||
| address=va_at_split, | ||
| ) | ||
| ) | ||
|
|
||
| # remove string from static_strings | ||
| # Remove the original unsplit string | ||
| for static_string in static_strings: | ||
| if static_string == string: | ||
| static_strings.remove(static_string) | ||
| return | ||
|
|
||
| return | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modifying a list while iterating over it, as done in this inner loop, can lead to unexpected behavior and is inefficient. Since # Remove the original unsplit string
static_strings.remove(string)
return |
||
|
|
||
|
|
||
|
|
@@ -168,7 +194,9 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt | |
| fixed_strings = fix_b2s_wide_strings(strings, min_length, buffer_rdata) | ||
|
|
||
| # select only UTF-8 strings and adjust offset | ||
| static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata) | ||
| static_strings = filter_and_transform_utf8_strings( | ||
| fixed_strings, start_rdata, image_base, virtual_address | ||
| ) | ||
|
|
||
| # TODO(mr-tz) - handle miss in rust-hello64.exe | ||
| # .rdata:00000001400C1270 0A aPanickedAfterP db 0Ah ; DATA XREF: .rdata:00000001400C12B8↓o | ||
|
|
@@ -222,7 +250,9 @@ def main(argv=None): | |
|
|
||
| logging.basicConfig(level=logging.DEBUG) | ||
|
|
||
| rust_strings = sorted(extract_rust_strings(args.path, args.min_length), key=lambda s: s.offset) | ||
| rust_strings = sorted( | ||
| extract_rust_strings(args.path, args.min_length), key=lambda s: s.offset | ||
| ) | ||
| for string in rust_strings: | ||
| print(f"{string.offset:#x}: {string.string}") | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
except ValueErrorblock was changed topass, but this can lead to anUnboundLocalErroron the following linestring_blob_size = string_blob_end - string_blob_startiffind_string_blob_rangeraises aValueError. The variablesstring_blob_startandstring_blob_endwould not be defined. The previous implementation, which logged a warning and returned, was safer.