Skip to content

Commit

Permalink
Merge pull request mandiant#897 from mr-tz/workaround-fix-b2s-wide-st…
Browse files Browse the repository at this point in the history
…rings

fix b2s wide/utf-8 string handling via workaround
  • Loading branch information
mr-tz committed Nov 9, 2023
2 parents 52747a4 + 98fbde0 commit 9405cb8
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 9 deletions.
55 changes: 48 additions & 7 deletions floss/language/rust/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pathlib
import argparse
import itertools
from typing import List, Tuple, Iterable
from typing import List, Tuple, Iterable, Optional

import pefile
import binary2strings as b2s
Expand All @@ -25,6 +25,41 @@ def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure:
raise ValueError("no .rdata section found")


def fix_b2s_wide_strings(
strings: List[Tuple[str, str, Tuple[int, int], bool]], min_length: int, buffer: bytes
) -> List[Tuple[str, str, Tuple[int, int], bool]]:
# TODO(mr-tz): b2s may parse wide strings where there really should be utf-8 strings
# handle special cases here until fixed
# https://github.com/mandiant/flare-floss/issues/867
fixed_strings: List[Tuple[str, str, Tuple[int, int], bool]] = list()
last_fixup: Optional[Tuple[str, str, Tuple[int, int], bool]] = None
for string in strings:
s = string[0]
string_type = string[1]
start = string[2][0]

if string_type == "WIDE_STRING":
sd = s.encode("utf-16le", "ignore")
# utf-8 strings will not start with \x00
if sd[0] == 0:
new_string = b2s.extract_string(buffer[start + 1 :])
last_fixup = (
new_string[0],
new_string[1],
(new_string[2][0] + start + 1, new_string[2][1] + start + 1),
new_string[3],
)
if len(last_fixup[0]) < min_length:
last_fixup = None
else:
if last_fixup and s in last_fixup[0]:
fixed_strings.append(last_fixup)
else:
fixed_strings.append(string)
last_fixup = None
return fixed_strings


def filter_and_transform_utf8_strings(
strings: List[Tuple[str, str, Tuple[int, int], bool]],
start_rdata: int,
Expand All @@ -46,7 +81,7 @@ def filter_and_transform_utf8_strings(
return transformed_strings


def split_strings(static_strings: List[StaticString], address: int) -> None:
def split_strings(static_strings: List[StaticString], address: int, min_length: int) -> None:
"""
if address is in between start and end of a string in ref data then split the string
this modifies the elements of the static strings list directly
Expand All @@ -57,8 +92,12 @@ def split_strings(static_strings: List[StaticString], address: int) -> None:
rust_string = string.string[0 : address - string.offset]
rest = string.string[address - string.offset :]

static_strings.append(StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8))
static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8))
if len(rust_string) >= min_length:
static_strings.append(
StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8)
)
if len(rest) >= min_length:
static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8))

# remove string from static_strings
for static_string in static_strings:
Expand Down Expand Up @@ -97,12 +136,14 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
end_rdata = start_rdata + rdata_section.SizeOfRawData
virtual_address = rdata_section.VirtualAddress
pointer_to_raw_data = rdata_section.PointerToRawData
buffer_rdata = rdata_section.get_data()

# extract utf-8 and wide strings, latter not needed here
strings = b2s.extract_all_strings(rdata_section.get_data(), min_length)
strings = b2s.extract_all_strings(buffer_rdata, min_length)
fixed_strings = fix_b2s_wide_strings(strings, min_length, buffer_rdata)

# select only UTF-8 strings and adjust offset
static_strings = filter_and_transform_utf8_strings(strings, start_rdata)
static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata)

struct_string_addrs = map(lambda c: c.address, get_struct_string_candidates(pe))

Expand All @@ -126,7 +167,7 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
if not (start_rdata <= address < end_rdata):
continue

split_strings(static_strings, address)
split_strings(static_strings, address, min_length)

return static_strings

Expand Down
3 changes: 1 addition & 2 deletions tests/test_language_extract_rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,11 @@ def rust_strings64():
# .rdata:00000001400BD040 30 D0 0B 40 01 00 pieces ___str_ <offset aHelloWorld, 0Eh>
# .rdata:00000001400BD040 00 00 00 00 ; "Hello, world!\n"
pytest.param("Hello, world!", 0xBB030, StringEncoding.UTF8, "rust_strings64"),
# TODO enable, see issue #867
# .rdata:00000001400BD050 69 6E 76 61 6C 69 aInvalidArgs db 'invalid args',0
# .rdata:00000001400BD05D 00 00 00 align 20h
# .rdata:00000001400BD060 50 D0 0B 40 01 00 stru_1400BD060 ___str_ <offset aInvalidArgs, 0Ch>
# .rdata:00000001400BD060 00 00 00 00 ; "invalid args"
# pytest.param("invalid args", 0xBB050, StringEncoding.UTF8, "rust_strings64"),
pytest.param("invalid args", 0xBB050, StringEncoding.UTF8, "rust_strings64"),
],
)
def test_data_string_offset(request, string, offset, encoding, rust_strings):
Expand Down

0 comments on commit 9405cb8

Please sign in to comment.