Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 55 additions & 19 deletions floss/language/go/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@

import floss.utils
from floss.results import StaticString, StringEncoding
from floss.language.utils import StructString, find_lea_xrefs, get_struct_string_candidates
from floss.language.utils import (
StructString,
find_lea_xrefs,
get_struct_string_candidates,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -87,7 +91,9 @@ def find_amd64_stackstrings(section_data, offset, min_length):
b"\x48\xba(........)|\x48\xb8(........)|\x81\x78\x08(....)|\x81\x79\x08(....)|\x66\x81\x78\x0c(..)|\x66\x81\x79\x0c(..)|\x80\x78\x0e(.)|\x80\x79\x0e(.)"
)

yield from find_stack_strings_with_regex(extract_stackstring_pattern, section_data, offset, min_length)
yield from find_stack_strings_with_regex(
extract_stackstring_pattern, section_data, offset, min_length
)


def find_i386_stackstrings(section_data, offset, min_length):
Expand All @@ -108,7 +114,9 @@ def find_i386_stackstrings(section_data, offset, min_length):
re.DOTALL,
)

yield from find_stack_strings_with_regex(extract_stackstring_pattern, section_data, offset, min_length)
yield from find_stack_strings_with_regex(
extract_stackstring_pattern, section_data, offset, min_length
)


def get_stackstrings(pe: pefile.PE, min_length: int) -> Iterable[StaticString]:
Expand Down Expand Up @@ -207,7 +215,9 @@ def read_struct_string(pe: pefile.PE, instance: StructString) -> str:
return s


def find_string_blob_range(pe: pefile.PE, struct_strings: List[StructString]) -> Tuple[VA, VA]:
def find_string_blob_range(
pe: pefile.PE, struct_strings: List[StructString]
) -> Tuple[VA, VA]:
"""
find the range of the string blob, as loaded in memory.

Expand All @@ -231,15 +241,19 @@ def find_string_blob_range(pe: pefile.PE, struct_strings: List[StructString]) ->

struct_strings.sort(key=lambda s: s.address)

run_start, run_end = find_longest_monotonically_increasing_run(list(map(lambda s: s.length, struct_strings)))
run_start, run_end = find_longest_monotonically_increasing_run(
list(map(lambda s: s.length, struct_strings))
)

# pick the mid string, so that we avoid any junk data on the edges of the string blob
run_mid = (run_start + run_end) // 2
instance = struct_strings[run_mid]

s = read_struct_string(pe, instance)
assert s is not None
logger.debug("string blob: struct string instance: 0x%x: %s...", instance.address, s[:16])
logger.debug(
"string blob: struct string instance: 0x%x: %s...", instance.address, s[:16]
)

instance_rva = instance.address - image_base
section = pe.get_section_by_rva(instance_rva)
Expand Down Expand Up @@ -286,7 +300,9 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
image_base = pe.OPTIONAL_HEADER.ImageBase

with floss.utils.timing("find struct string candidates"):
struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
struct_strings = list(
sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address)
)
if not struct_strings:
logger.warning(
"Failed to find struct string candidates: Is this a Go binary? If so, the Go version may be unsupported."
Expand All @@ -295,12 +311,12 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]

with floss.utils.timing("find string blob"):
try:
string_blob_start, string_blob_end = find_string_blob_range(pe, struct_strings)
except ValueError:
logger.warning(
"Failed to find string blob range: Is this a Go binary? If so, the Go version may be unsupported."
string_blob_start, string_blob_end = find_string_blob_range(
pe, struct_strings
)
return
except ValueError:

pass
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The except ValueError block was changed to pass, but this can lead to an UnboundLocalError on the following line string_blob_size = string_blob_end - string_blob_start if find_string_blob_range raises a ValueError. The variables string_blob_start and string_blob_end would not be defined. The previous implementation, which logged a warning and returned, was safer.

Suggested change
except ValueError:
pass
except ValueError:
logger.warning(
"Failed to find string blob range: Is this a Go binary? If so, the Go version may be unsupported."
)
return


with floss.utils.timing("collect string blob strings"):
string_blob_size = string_blob_end - string_blob_start
Expand Down Expand Up @@ -352,10 +368,17 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
# 0x4aabed: -thread limit
#
# we probably missed the string: " procedure in "
logger.warning("probably missed a string blob string ending at: 0x%x", start - 1)
logger.warning(
"probably missed a string blob string ending at: 0x%x", start - 1
)

try:
string = StaticString.from_utf8(sbuf, pe.get_offset_from_rva(start - image_base), min_length)
string = StaticString.from_utf8(
sbuf,
pe.get_offset_from_rva(start - image_base),
min_length,
address=start,
)
yield string
except ValueError:
pass
Expand All @@ -381,7 +404,10 @@ def get_string_blob_strings(pe: pefile.PE, min_length) -> Iterable[StaticString]
else:
try:
string = StaticString.from_utf8(
last_buf[:size], pe.get_offset_from_rva(last_pointer - image_base), min_length
last_buf[:size],
pe.get_offset_from_rva(last_pointer - image_base),
min_length,
address=last_pointer,
)
yield string
except ValueError:
Expand All @@ -405,10 +431,14 @@ def extract_go_strings(sample, min_length) -> List[StaticString]:
return go_strings


def get_static_strings_from_blob_range(sample: pathlib.Path, static_strings: List[StaticString]) -> List[StaticString]:
def get_static_strings_from_blob_range(
sample: pathlib.Path, static_strings: List[StaticString]
) -> List[StaticString]:
pe = pefile.PE(data=pathlib.Path(sample).read_bytes(), fast_load=True)

struct_strings = list(sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address))
struct_strings = list(
sorted(set(get_struct_string_candidates(pe)), key=lambda s: s.address)
)
if not struct_strings:
return []

Expand All @@ -421,7 +451,11 @@ def get_static_strings_from_blob_range(sample: pathlib.Path, static_strings: Lis
string_blob_start = pe.get_offset_from_rva(string_blob_start - image_base)
string_blob_end = pe.get_offset_from_rva(string_blob_end - image_base)

return list(filter(lambda s: string_blob_start <= s.offset < string_blob_end, static_strings))
return list(
filter(
lambda s: string_blob_start <= s.offset < string_blob_end, static_strings
)
)


def main(argv=None):
Expand All @@ -439,7 +473,9 @@ def main(argv=None):

logging.basicConfig(level=logging.DEBUG)

go_strings = sorted(extract_go_strings(args.path, args.min_length), key=lambda s: s.offset)
go_strings = sorted(
extract_go_strings(args.path, args.min_length), key=lambda s: s.offset
)
for string in go_strings:
print(f"{string.offset:#x}: {string.string}")

Expand Down
60 changes: 45 additions & 15 deletions floss/language/rust/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@


def fix_b2s_wide_strings(
strings: List[Tuple[str, str, Tuple[int, int], bool]], min_length: int, buffer: bytes
strings: List[Tuple[str, str, Tuple[int, int], bool]],
min_length: int,
buffer: bytes,
) -> List[Tuple[str, str, Tuple[int, int], bool]]:
# TODO(mr-tz): b2s may parse wide strings where there really should be utf-8 strings
# handle special cases here until fixed
Expand Down Expand Up @@ -74,48 +76,72 @@ def fix_b2s_wide_strings(
def filter_and_transform_utf8_strings(
strings: List[Tuple[str, str, Tuple[int, int], bool]],
start_rdata: int,
image_base: int,
virtual_address: int,
) -> List[StaticString]:
transformed_strings = []

for string in strings:
s = string[0]
string_type = string[1]

# Calculate file offset
start = string[2][0] + start_rdata

# Calculate memory address (VA)
address = image_base + virtual_address + string[2][0]

if string_type != "UTF8":
continue

# our static algorithm does not extract new lines either
# FLOSS logic: remove new lines
s = s.replace("\n", "")
transformed_strings.append(StaticString(string=s, offset=start, encoding=StringEncoding.UTF8))

return transformed_strings
# We pass the calculated address here
transformed_strings.append(
StaticString(
string=s, offset=start, encoding=StringEncoding.UTF8, address=address
)
)

return transformed_strings

def split_strings(static_strings: List[StaticString], address: int, min_length: int) -> None:
"""
if address is in between start and end of a string in ref data then split the string
this modifies the elements of the static strings list directly
"""

def split_strings(
static_strings: List[StaticString], address: int, min_length: int
) -> None:
for string in static_strings:
if string.offset < address < string.offset + len(string.string):
rust_string = string.string[0 : address - string.offset]
rest = string.string[address - string.offset :]

if len(rust_string) >= min_length:
# Part 1: Keeps the original base address
static_strings.append(
StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8)
StaticString(
string=rust_string,
offset=string.offset,
encoding=StringEncoding.UTF8,
address=string.address,
)
)
if len(rest) >= min_length:
static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8))
# Part 2: Calculate the new VA for the split point
va_at_split = string.address + (address - string.offset)
static_strings.append(
StaticString(
string=rest,
offset=address,
encoding=StringEncoding.UTF8,
address=va_at_split,
)
)

# remove string from static_strings
# Remove the original unsplit string
for static_string in static_strings:
if static_string == string:
static_strings.remove(static_string)
return

return
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Modifying a list while iterating over it, as done in this inner loop, can lead to unexpected behavior and is inefficient. Since string is the element from the outer loop that needs to be removed, you can call static_strings.remove(string) directly. This is safer and more readable.

            # Remove the original unsplit string
            static_strings.remove(string)
            return



Expand Down Expand Up @@ -168,7 +194,9 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
fixed_strings = fix_b2s_wide_strings(strings, min_length, buffer_rdata)

# select only UTF-8 strings and adjust offset
static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata)
static_strings = filter_and_transform_utf8_strings(
fixed_strings, start_rdata, image_base, virtual_address
)

# TODO(mr-tz) - handle miss in rust-hello64.exe
# .rdata:00000001400C1270 0A aPanickedAfterP db 0Ah ; DATA XREF: .rdata:00000001400C12B8↓o
Expand Down Expand Up @@ -222,7 +250,9 @@ def main(argv=None):

logging.basicConfig(level=logging.DEBUG)

rust_strings = sorted(extract_rust_strings(args.path, args.min_length), key=lambda s: s.offset)
rust_strings = sorted(
extract_rust_strings(args.path, args.min_length), key=lambda s: s.offset
)
for string in rust_strings:
print(f"{string.offset:#x}: {string.string}")

Expand Down
Loading