Skip to content

Commit

Permalink
Extract Rust specific strings from binaries mandiant#791 (mandiant#836)
Browse files Browse the repository at this point in the history
* Initial implementation of Rust specific strings

* Added rust coverage script

* Introduced shared functions into language/utils.py

* Refactored Go and Rust extraction files

* Simplify rust string extraction

* Added push and mov xrefs for i386 arch and test updates

* Add push and mov for amd64

---------

Co-authored-by: Moritz <[email protected]>
  • Loading branch information
Arker123 and mr-tz committed Aug 23, 2023
1 parent 5678d2f commit 567fc95
Show file tree
Hide file tree
Showing 10 changed files with 995 additions and 514 deletions.
4 changes: 3 additions & 1 deletion .github/mypy/mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ ignore_missing_imports = True
ignore_missing_imports = True

[mypy-bs4.*]
ignore_missing_imports = True
ignore_missing_imports = True
[mypy-binary2strings.*]
ignore_missing_imports = True
227 changes: 6 additions & 221 deletions floss/language/go/coverage.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
import sys
import hashlib
import logging
import pathlib
import argparse
from typing import List

import pefile
import tabulate

from floss.utils import get_static_strings
from floss.results import StaticString, StringEncoding
from floss.render.sanitize import sanitize
from floss.language.utils import get_extract_stats
from floss.language.go.extract import extract_go_strings

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -59,224 +57,11 @@ def main():

go_strings = extract_go_strings(path, args.min_length)

get_extract_stats(pe, static_strings, go_strings, args.min_length)


def get_extract_stats(pe, all_ss_strings: List[StaticString], go_strings, min_len) -> float:
all_strings = list()
# these are ascii, extract these utf-8 to get fewer chunks (ascii may split on two-byte characters, for example)
for ss in all_ss_strings:
sec = pe.get_section_by_rva(ss.offset)
secname = sec.Name.decode("utf-8").split("\x00")[0] if sec else ""
all_strings.append((secname, ss))

len_all_ss = 0
len_gostr = 0

gs_found = list()
results = list()
for secname, s in all_strings:
if secname != ".rdata":
continue

if len(s.string) <= 2800:
# This value was chosen based on experimentaion on different samples
# of go binaries that include versions 1.20, 1.18, 1.16, 1.12. and
# architectures amd64 and i386.
# See: https://github.com/mandiant/flare-floss/issues/807#issuecomment-1636087673
continue

len_all_ss += len(s.string)

orig_len = len(s.string)
sha256 = hashlib.sha256()
sha256.update(s.string.encode("utf-8"))
s_id = sha256.hexdigest()[:3].upper()
s_range = (s.offset, s.offset + len(s.string))

found = False
for gs in go_strings:
sec = pe.get_section_by_rva(gs.offset)
gs_sec = sec.Name.decode("utf-8").split("\x00")[0] if sec else ""

if gs_sec != ".rdata":
continue

if (
gs.string
and gs.string in s.string
and gs_sec == secname
and s.offset <= gs.offset <= s.offset + orig_len
):
found = True
len_gostr += len(gs.string)

# remove found string data
idx = s.string.find(gs.string)
assert idx != -1
if idx == 0:
new_offset = s.offset + idx + len(gs.string)
else:
new_offset = s.offset

replaced_s = s.string.replace(gs.string, "", 1)
replaced_len = len(replaced_s)
s_trimmed = StaticString(
string=replaced_s,
offset=new_offset,
encoding=s.encoding,
)

type_ = "substring"
if s.string[: len(gs.string)] == gs.string and s.offset == gs.offset:
type_ = "exactsubstr"

results.append((secname, s_id, s_range, True, type_, s, replaced_len, gs))

s = s_trimmed

gs_found.append(gs)

if replaced_len < min_len:
results.append((secname, s_id, s_range, False, "missing", s, orig_len - replaced_len, gs))
break

if not found:
null = StaticString(string="", offset=0, encoding=StringEncoding.UTF8)
results.append((secname, s_id, s_range, False, "", s, 0, null))

rows = list()
for gs in go_strings:
sec = pe.get_section_by_rva(gs.offset)
gs_sec = sec.Name.decode("utf-8").split("\x00")[0] if sec else ""
if gs_sec != ".rdata":
continue

if gs in gs_found:
continue

gsdata = gs.string
if len(gs.string) >= 50:
gsdata = gs.string[:36] + "...." + gs.string[-10:]
gsdata = sanitize(gsdata)

rows.append(
(
f"{gs_sec}",
f"",
f"",
f"{gs.offset:8x}",
f"",
f"unmatched go string",
f"",
f"",
f"{len(gs.string) if gs.string else ''}",
f"{gsdata}",
f"{hex(gs.offset) if gs.offset else ''}",
)
)

for r in results:
secname, s_id, s_range, found, msg, s, len_after, gs = r

sdata = s.string
if len(s.string) >= 50:
sdata = s.string[:36] + "...." + s.string[-10:]
sdata = sanitize(sdata)

gsdata = gs.string
if len(gs.string) >= 50:
gsdata = gs.string[:36] + "...." + gs.string[-10:]
gsdata = sanitize(gsdata)

len_info = f"{len(s.string):3d}"
if found:
len_info = f"{len(s.string):3d} > {len_after:3d} ({(len(s.string) - len_after) * -1:2d})"

rows.append(
(
f"{secname}",
f"<{s_id}>",
f"{s_range[0]:x} - {s_range[1]:x}",
f"{s.offset:8x}",
f"{found}",
f"{msg}",
len_info,
f"{sdata}",
f"{len(gs.string) if gs.string else ''}",
f"{gsdata}",
f"{hex(gs.offset) if gs.offset else ''}",
)
)

rows = sorted(rows, key=lambda t: t[3])

print(
tabulate.tabulate(
rows,
headers=[
"section",
"id",
"range",
"offset",
"found",
"msg",
"slen",
"string",
"gslen",
"gostring",
"gsoff",
],
tablefmt="psql",
)
)

print(".rdata only")
print("len all string chars:", len_all_ss)
print("len gostring chars :", len_gostr)
print(f"Percentage of string chars extracted: {round(100 * (len_gostr / len_all_ss))}%")
print()

return 100 * (len_gostr / len_all_ss)


def get_missed_strings(
all_ss_strings: List[StaticString], go_strings: List[StaticString], min_len: int
) -> List[StaticString]:
missed_strings = list()

for s in all_ss_strings:
orig_len = len(s.string)

found = False
for gs in go_strings:
if gs.string and gs.string in s.string and s.offset <= gs.offset <= s.offset + orig_len:
found = True

# remove found string data
idx = s.string.find(gs.string)
assert idx != -1
if idx == 0:
new_offset = s.offset + idx + len(gs.string)
else:
new_offset = s.offset

replaced_s = s.string.replace(gs.string, "", 1)
replaced_len = len(replaced_s)
s_trimmed = StaticString(
string=replaced_s,
offset=new_offset,
encoding=s.encoding,
)
s = s_trimmed

if replaced_len < min_len:
break

if not found:
missed_strings.append(s)

return missed_strings
# The value 2800 was chosen based on experimentaion on different samples
# of go binaries that include versions 1.20, 1.18, 1.16, 1.12. and
# architectures amd64 and i386.
# See: https://github.com/mandiant/flare-floss/issues/807#issuecomment-1636087673
get_extract_stats(pe, static_strings, go_strings, args.min_length, 2800)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 567fc95

Please sign in to comment.