From a7b7ea30ba8b916b20f67ca660e4bbc7425c032a Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 9 Nov 2023 12:53:06 +0100 Subject: [PATCH 1/2] fix b2s wide/utf-8 string handling via workaround --- floss/language/rust/extract.py | 55 +++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/floss/language/rust/extract.py b/floss/language/rust/extract.py index f67e57c82..3d3dc3960 100644 --- a/floss/language/rust/extract.py +++ b/floss/language/rust/extract.py @@ -4,7 +4,7 @@ import pathlib import argparse import itertools -from typing import List, Tuple, Iterable +from typing import List, Tuple, Iterable, Optional import pefile import binary2strings as b2s @@ -25,6 +25,41 @@ def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure: raise ValueError("no .rdata section found") +def fix_b2s_wide_strings( + strings: List[Tuple[str, str, Tuple[int, int], bool]], min_length: int, buffer: bytes +) -> List[Tuple[str, str, Tuple[int, int], bool]]: + # TODO(mr-tz): b2s may parse wide strings where there really should be utf-8 strings + # handle special cases here until fixed + # https://github.com/mandiant/flare-floss/issues/867 + fixed_strings: List[Tuple[str, str, Tuple[int, int], bool]] = list() + last_fixup: Optional[Tuple[str, str, Tuple[int, int], bool]] = None + for string in strings: + s = string[0] + string_type = string[1] + start = string[2][0] + + if string_type == "WIDE_STRING": + sd = s.encode("utf-16le", "ignore") + # utf-8 strings will not start with \x00 + if sd[0] == 0: + new_string = b2s.extract_string(buffer[start + 1 :]) + last_fixup = ( + new_string[0], + new_string[1], + (new_string[2][0] + start + 1, new_string[2][1] + start + 1), + new_string[3], + ) + if len(last_fixup[0]) < min_length: + last_fixup = None + else: + if last_fixup and s in last_fixup[0]: + fixed_strings.append(last_fixup) + else: + fixed_strings.append(string) + last_fixup = None + return fixed_strings + + def filter_and_transform_utf8_strings( strings: List[Tuple[str, str, Tuple[int, int], bool]], start_rdata: int, @@ -46,7 +81,7 @@ def filter_and_transform_utf8_strings( return transformed_strings -def split_strings(static_strings: List[StaticString], address: int) -> None: +def split_strings(static_strings: List[StaticString], address: int, min_length: int) -> None: """ if address is in between start and end of a string in ref data then split the string this modifies the elements of the static strings list directly @@ -57,8 +92,12 @@ def split_strings(static_strings: List[StaticString], address: int) -> None: rust_string = string.string[0 : address - string.offset] rest = string.string[address - string.offset :] - static_strings.append(StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8)) - static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8)) + if len(rust_string) >= min_length: + static_strings.append( + StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8) + ) + if len(rest) >= min_length: + static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8)) # remove string from static_strings for static_string in static_strings: @@ -97,12 +136,14 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt end_rdata = start_rdata + rdata_section.SizeOfRawData virtual_address = rdata_section.VirtualAddress pointer_to_raw_data = rdata_section.PointerToRawData + buffer_rdata = rdata_section.get_data() # extract utf-8 and wide strings, latter not needed here - strings = b2s.extract_all_strings(rdata_section.get_data(), min_length) + strings = b2s.extract_all_strings(buffer_rdata, min_length) + fixed_strings = fix_b2s_wide_strings(strings, min_length, buffer_rdata) # select only UTF-8 strings and adjust offset - static_strings = filter_and_transform_utf8_strings(strings, start_rdata) + static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata) struct_string_addrs = map(lambda c: c.address, get_struct_string_candidates(pe)) @@ -126,7 +167,7 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt if not (start_rdata <= address < end_rdata): continue - split_strings(static_strings, address) + split_strings(static_strings, address, min_length) return static_strings From 98fbde05a9c657259229f8a2888bc7e19f2f147b Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 9 Nov 2023 13:57:14 +0100 Subject: [PATCH 2/2] enable test --- tests/test_language_extract_rust.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_language_extract_rust.py b/tests/test_language_extract_rust.py index f507efe75..9d210ce0c 100644 --- a/tests/test_language_extract_rust.py +++ b/tests/test_language_extract_rust.py @@ -30,12 +30,11 @@ def rust_strings64(): # .rdata:00000001400BD040 30 D0 0B 40 01 00 pieces ___str_ # .rdata:00000001400BD040 00 00 00 00 ; "Hello, world!\n" pytest.param("Hello, world!", 0xBB030, StringEncoding.UTF8, "rust_strings64"), - # TODO enable, see issue #867 # .rdata:00000001400BD050 69 6E 76 61 6C 69 aInvalidArgs db 'invalid args',0 # .rdata:00000001400BD05D 00 00 00 align 20h # .rdata:00000001400BD060 50 D0 0B 40 01 00 stru_1400BD060 ___str_ # .rdata:00000001400BD060 00 00 00 00 ; "invalid args" - # pytest.param("invalid args", 0xBB050, StringEncoding.UTF8, "rust_strings64"), + pytest.param("invalid args", 0xBB050, StringEncoding.UTF8, "rust_strings64"), ], ) def test_data_string_offset(request, string, offset, encoding, rust_strings):