diff --git a/libr/arch/p/pyc/plugin.c b/libr/arch/p/pyc/plugin.c index bb357340ccb69..94db5212208b8 100644 --- a/libr/arch/p/pyc/plugin.c +++ b/libr/arch/p/pyc/plugin.c @@ -37,11 +37,27 @@ static pyc_opcodes *get_pyc_opcodes(RArchSession *s) { return ops; } -static RList *get_pyc_code_obj(RArchSession *as) { +#if 0 +struct pyc_version { + ut32 magic; + const char *version; + const char *revision; +}; +typedef struct { + ut64 code_start_offset; + struct pyc_version version; + RList *sections_cache; // RList + RList *interned_table; // RList + RList *cobjs; // RList +} RBinPycObj; +#endif + +static inline RList *get_pyc_code_obj(RArchSession *as) { RBin *b = as->arch->binb.bin; - RBinPlugin *plugin = b->cur && b->cur->bo? b->cur->bo->plugin: NULL; - bool is_pyc = (plugin && strcmp (plugin->meta.name, "pyc") == 0); - return is_pyc? b->cur->bo->bin_obj: NULL; + RBinPlugin *plugin = R_UNWRAP4 (b, cur, bo, plugin); + const bool is_pyc = (plugin && !strcmp (plugin->meta.name, "pyc")); + RBinPycObj *pyc = is_pyc? b->cur->bo->bin_obj: NULL; + return pyc? pyc->cobjs: NULL; } static inline pyc_code_object *get_func(ut64 pc, RList *pyobj) { diff --git a/libr/arch/p/pyc/pyc_dis.h b/libr/arch/p/pyc/pyc_dis.h index eb859c54a9837..07fa5c9650f77 100644 --- a/libr/arch/p/pyc/pyc_dis.h +++ b/libr/arch/p/pyc/pyc_dis.h @@ -6,6 +6,24 @@ #include #include +#if 0 +#include "../../bin/format/pyc/pyc_magic.h" +#else +struct pyc_version { + ut32 magic; + const char *version; + const char *revision; +}; + +typedef struct { + ut64 code_start_offset; + struct pyc_version version; + RList *sections_cache; // RList + RList *interned_table; // RList + RList *cobjs; // RList +} RBinPycObj; +#endif + #include "opcode.h" typedef struct { diff --git a/libr/bin/format/pyc/marshal.c b/libr/bin/format/pyc/marshal.c index addc833129502..df1ef5b39b81c 100644 --- a/libr/bin/format/pyc/marshal.c +++ b/libr/bin/format/pyc/marshal.c @@ -8,15 +8,9 @@ // avoiding using r2 internals asserts #define if_true_return(cond,ret) if (cond) { return (ret); } -// TODO: kill globals -static R_TH_LOCAL ut32 Gmagic = 0; -static R_TH_LOCAL ut32 Gscount = 0; -static R_TH_LOCAL RList *Grefs = NULL; // If you don't have a good reason, do not change this. And also checkout !refs in get_code_object() +/* All mutable parse state is carried in PycUnmarshalCtx (see marshal.h). */ -/* interned_table is used to handle TYPE_INTERNED object */ -extern R_TH_LOCAL RList *interned_table; - -static pyc_object *get_object(RBuffer *buffer, int wtype); +static pyc_object *get_object(RBuffer *buffer, int wtype, PycUnmarshalCtx *ctx); static pyc_object *copy_object(pyc_object *object); static void free_object(pyc_object *object); @@ -92,21 +86,16 @@ static ut8 *get_bytes(RBuffer *buffer, ut32 size) { static pyc_object *get_none_object(void) { pyc_object *ret = R_NEW0 (pyc_object); - if (ret) { - ret->type = TYPE_NONE; - ret->data = strdup ("None"); - if (!ret->data) { - R_FREE (ret); - } + ret->type = TYPE_NONE; + ret->data = strdup ("None"); + if (!ret->data) { + R_FREE (ret); } return ret; } static pyc_object *get_false_object(void) { pyc_object *ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } ret->type = TYPE_FALSE; ret->data = strdup ("False"); if (!ret->data) { @@ -117,9 +106,6 @@ static pyc_object *get_false_object(void) { static pyc_object *get_true_object(void) { pyc_object *ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } ret->type = TYPE_TRUE; ret->data = strdup ("True"); if (!ret->data) { @@ -136,9 +122,6 @@ static pyc_object *get_int_object(RBuffer *buffer) { return NULL; } pyc_object *ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } ret->type = TYPE_INT; ret->data = r_str_newf ("%d", i); if (!ret->data) { @@ -155,12 +138,10 @@ static pyc_object *get_int64_object(RBuffer *buffer) { return NULL; } pyc_object *ret = R_NEW0 (pyc_object); - if (ret) { - ret->type = TYPE_INT64; - ret->data = r_str_newf ("%"PFMT64d, (st64)i); - if (!ret->data) { - R_FREE (ret); - } + ret->type = TYPE_INT64; + ret->data = r_str_newf ("%"PFMT64d, (st64)i); + if (!ret->data) { + R_FREE (ret); } return ret; } @@ -181,9 +162,6 @@ static pyc_object *get_long_object(RBuffer *buffer) { return NULL; } pyc_object *ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } ret->type = TYPE_LONG; if (ndigits < 0) { ndigits = -ndigits; @@ -244,23 +222,19 @@ static pyc_object *get_long_object(RBuffer *buffer) { return ret; } -static pyc_object *get_stringref_object(RBuffer *buffer) { - pyc_object *ret = NULL; +static pyc_object *get_stringref_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { bool error = false; ut32 n = get_st32 (buffer, &error); - if (n >= r_list_length (interned_table)) { + if (n >= r_list_length (ctx->interned_table)) { R_LOG_DEBUG ("bad marshal data (string ref out of range)"); return NULL; } if (error) { return NULL; } - ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } + pyc_object *ret = R_NEW0 (pyc_object); ret->type = TYPE_STRINGREF; - ret->data = r_list_get_n (interned_table, n); + ret->data = r_list_get_n (ctx->interned_table, n); if (!ret->data) { R_FREE (ret); } @@ -268,7 +242,6 @@ static pyc_object *get_stringref_object(RBuffer *buffer) { } static pyc_object *get_float_object(RBuffer *buffer) { - pyc_object *ret = NULL; bool error = false; ut32 size = 0; @@ -276,10 +249,7 @@ static pyc_object *get_float_object(RBuffer *buffer) { if (error) { return NULL; } - ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } + pyc_object *ret = R_NEW0 (pyc_object); ut8 *s = malloc (n + 1); if (!s) { free (ret); @@ -299,7 +269,6 @@ static pyc_object *get_float_object(RBuffer *buffer) { } static pyc_object *get_binary_float_object(RBuffer *buffer) { - pyc_object *ret = NULL; bool error = false; double f; @@ -307,7 +276,7 @@ static pyc_object *get_binary_float_object(RBuffer *buffer) { if (error) { return NULL; } - ret = R_NEW0 (pyc_object); + pyc_object *ret = R_NEW0 (pyc_object); if (!ret) { return NULL; } @@ -320,18 +289,14 @@ static pyc_object *get_binary_float_object(RBuffer *buffer) { return ret; } -static pyc_object *get_complex_object(RBuffer *buffer) { +static pyc_object *get_complex_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { bool error = false; ut32 size = 0; st32 n1 = 0; st32 n2 = 0; pyc_object *ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } - - if ((Gmagic & 0xffff) <= 62061) { + if ((ctx->magic & 0xffff) <= 62061) { n1 = get_ut8 (buffer, &error); } else { n1 = get_st32 (buffer, &error); @@ -354,7 +319,7 @@ static pyc_object *get_complex_object(RBuffer *buffer) { } s1[n1] = '\0'; - if ((Gmagic & 0xffff) <= 62061) { + if ((ctx->magic & 0xffff) <= 62061) { n2 = get_ut8 (buffer, &error); } else { n2 = get_st32 (buffer, &error); @@ -399,9 +364,6 @@ static pyc_object *get_binary_complex_object(RBuffer *buffer) { return NULL; } ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } ret->type = TYPE_BINARY_COMPLEX; ret->data = r_str_newf ("%.15g+%.15gj", a, b); if (!ret->data) { @@ -425,9 +387,6 @@ static pyc_object *get_string_object(RBuffer *buffer) { return NULL; } ret = R_NEW0 (pyc_object); - if (!ret) { - return NULL; - } ret->type = TYPE_STRING; ret->data = get_bytes (buffer, n); if (!ret->data) { @@ -460,7 +419,7 @@ static pyc_object *get_unicode_object(RBuffer *buffer) { return ret; } -static pyc_object *get_interned_object(RBuffer *buffer) { +static pyc_object *get_interned_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { pyc_object *ret = NULL; bool error = false; ut32 n = get_ut32 (buffer, &error); @@ -478,14 +437,14 @@ static pyc_object *get_interned_object(RBuffer *buffer) { ret->type = TYPE_INTERNED; ret->data = get_bytes (buffer, n); /* add data pointer to interned table */ - r_list_append (interned_table, ret->data); + r_list_append (ctx->interned_table, ret->data); if (!ret->data) { R_FREE (ret); } return ret; } -static pyc_object *get_array_object_generic(RBuffer *buffer, ut32 size) { +static pyc_object *get_array_object_generic(RBuffer *buffer, ut32 size, PycUnmarshalCtx *ctx) { pyc_object *ret = R_NEW0 (pyc_object); if (!ret) { return NULL; @@ -497,7 +456,7 @@ static pyc_object *get_array_object_generic(RBuffer *buffer, ut32 size) { } ut32 i; for (i = 0; i < size; i++) { - pyc_object *tmp = get_object (buffer, 0); + pyc_object *tmp = get_object (buffer, 0, ctx); if (!tmp || !r_list_append (ret->data, tmp)) { free_object (tmp); ((RList*)ret->data)->free = NULL; @@ -510,13 +469,13 @@ static pyc_object *get_array_object_generic(RBuffer *buffer, ut32 size) { } /* small TYPE_SMALL_TUPLE doesn't exist in python2 */ -static pyc_object *get_small_tuple_object(RBuffer *buffer) { +static pyc_object *get_small_tuple_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { bool error = false; ut8 n = get_ut8 (buffer, &error); if (error) { return NULL; } - pyc_object *ret = get_array_object_generic (buffer, n); + pyc_object *ret = get_array_object_generic (buffer, n, ctx); if (ret) { ret->type = TYPE_SMALL_TUPLE; return ret; @@ -524,7 +483,7 @@ static pyc_object *get_small_tuple_object(RBuffer *buffer) { return NULL; } -static pyc_object *get_tuple_object(RBuffer *buffer) { +static pyc_object *get_tuple_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { bool error = false; ut32 n = get_ut32 (buffer, &error); if (n > ST32_MAX) { @@ -534,14 +493,14 @@ static pyc_object *get_tuple_object(RBuffer *buffer) { if (error) { return NULL; } - pyc_object *ret = get_array_object_generic (buffer, n); + pyc_object *ret = get_array_object_generic (buffer, n, ctx); if (ret) { ret->type = TYPE_TUPLE; } return ret; } -static pyc_object *get_list_object(RBuffer *buffer) { +static pyc_object *get_list_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { pyc_object *ret = NULL; bool error = false; ut32 n = get_ut32 (buffer, &error); @@ -552,7 +511,7 @@ static pyc_object *get_list_object(RBuffer *buffer) { if (error) { return NULL; } - ret = get_array_object_generic (buffer, n); + ret = get_array_object_generic (buffer, n, ctx); if (ret) { ret->type = TYPE_LIST; return ret; @@ -560,7 +519,7 @@ static pyc_object *get_list_object(RBuffer *buffer) { return NULL; } -static pyc_object *get_dict_object(RBuffer *buffer) { +static pyc_object *get_dict_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { pyc_object *key = NULL, *val = NULL; @@ -574,7 +533,7 @@ static pyc_object *get_dict_object(RBuffer *buffer) { return NULL; } for (;;) { - key = get_object (buffer, 0); + key = get_object (buffer, 0, ctx); if (!key) { break; } @@ -584,7 +543,7 @@ static pyc_object *get_dict_object(RBuffer *buffer) { free_object (key); return NULL; } - val = get_object (buffer, 0); + val = get_object (buffer, 0, ctx); if (!val) { break; } @@ -599,7 +558,7 @@ static pyc_object *get_dict_object(RBuffer *buffer) { return ret; } -static pyc_object *get_set_object(RBuffer *buffer) { +static pyc_object *get_set_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { bool error = false; ut32 n = get_ut32 (buffer, &error); if (n > ST32_MAX) { @@ -609,7 +568,7 @@ static pyc_object *get_set_object(RBuffer *buffer) { if (error) { return NULL; } - pyc_object *ret = get_array_object_generic (buffer, n); + pyc_object *ret = get_array_object_generic (buffer, n, ctx); if (ret) { ret->type = TYPE_SET; } @@ -662,16 +621,16 @@ static pyc_object *get_short_ascii_interned_object(RBuffer *buffer) { return error? NULL: get_ascii_object_generic (buffer, n, true); } -static pyc_object *get_ref_object(RBuffer *buffer) { +static pyc_object *get_ref_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { bool error = false; ut32 index = get_ut32 (buffer, &error); if (error) { return NULL; } - if (index >= r_list_length (Grefs)) { + if (index >= r_list_length (ctx->refs)) { return NULL; } - pyc_object *obj = r_list_get_n (Grefs, index); + pyc_object *obj = r_list_get_n (ctx->refs, index); return obj? copy_object (obj): NULL; } @@ -822,22 +781,16 @@ static pyc_object *copy_object(pyc_object *object) { return copy; } -static pyc_object *get_code_object(RBuffer *buffer) { +static pyc_object *get_code_object(RBuffer *buffer, PycUnmarshalCtx *ctx) { bool error = false; pyc_object *ret = R_NEW0 (pyc_object); pyc_code_object *cobj = R_NEW0 (pyc_code_object); - if (!ret || !cobj) { - free (ret); - free (cobj); - return NULL; - } - // ret->type = TYPE_CODE_v1; // support start from v1.0 ret->data = cobj; - const char *ver = get_pyc_version (Gmagic).version; + const char *ver = get_pyc_version (ctx->magic).version; bool v10_to_12 = magic_int_within (ver, "1.0.1", "1.2", &error); bool v13_to_22 = magic_int_within (ver, "1.3b1", "2.2a1", &error); bool v11_to_14 = magic_int_within (ver, "1.0.1", "1.4", &error); @@ -865,7 +818,7 @@ static pyc_object *get_code_object(RBuffer *buffer) { cobj->posonlyargcount = 0; // None } - if (((3020 < (Gmagic & 0xffff)) && ((Gmagic & 0xffff) < 20121)) && (!v11_to_14)) { + if (((3020 < (ctx->magic & 0xffff)) && ((ctx->magic & 0xffff) < 20121)) && (!v11_to_14)) { cobj->kwonlyargcount = get_ut32 (buffer, &error); // Not included in argcount } else { cobj->kwonlyargcount = 0; @@ -898,31 +851,31 @@ static pyc_object *get_code_object(RBuffer *buffer) { // to help disassemble the code // 1 from get_object() and 4 from get_string_object() cobj->start_offset = r_buf_tell (buffer) + 5; - if (!Grefs) { + if (!ctx->refs) { return ret; //return for entried part to get the root object of this file } - cobj->code = get_object (buffer, 0); + cobj->code = get_object (buffer, 0, ctx); cobj->end_offset = r_buf_tell (buffer); - cobj->consts = get_object (buffer, 0); - cobj->names = get_object (buffer, 0); + cobj->consts = get_object (buffer, 0, ctx); + cobj->names = get_object (buffer, 0, ctx); if (v10_to_12) { cobj->varnames = NULL; } else { - cobj->varnames = get_object (buffer, 0); + cobj->varnames = get_object (buffer, 0, ctx); } if (!(v10_to_12 || v13_to_20)) { - cobj->freevars = get_object (buffer, 0); - cobj->cellvars = get_object (buffer, 0); + cobj->freevars = get_object (buffer, 0, ctx); + cobj->cellvars = get_object (buffer, 0, ctx); } else { cobj->freevars = NULL; cobj->cellvars = NULL; } - cobj->filename = get_object (buffer, 0); - cobj->name = get_object (buffer, 0); + cobj->filename = get_object (buffer, 0, ctx); + cobj->name = get_object (buffer, 0, ctx); if (v15_to_22) { cobj->firstlineno = get_ut16 (buffer, &error); @@ -935,7 +888,7 @@ static pyc_object *get_code_object(RBuffer *buffer) { if (v11_to_14) { cobj->lnotab = NULL; } else { - cobj->lnotab = get_object (buffer, 0); + cobj->lnotab = get_object (buffer, 0, ctx); } if (error) { @@ -955,9 +908,8 @@ static pyc_object *get_code_object(RBuffer *buffer) { return ret; } -ut64 get_code_object_addr(RBuffer *buffer, ut32 magic) { - Gmagic = magic; - pyc_object *co = get_code_object (buffer); +ut64 get_code_object_addr_ctx(RBuffer *buffer, PycUnmarshalCtx *ctx) { + pyc_object *co = get_code_object (buffer, ctx); if (co) { pyc_code_object *cobj = co->data; ut64 result = cobj->start_offset; @@ -967,7 +919,7 @@ ut64 get_code_object_addr(RBuffer *buffer, ut32 magic) { return 0; } -static pyc_object *get_object(RBuffer *buffer, int wanted_type) { +static pyc_object *get_object(RBuffer *buffer, int wanted_type, PycUnmarshalCtx *ctx) { bool error = false; pyc_object *ret = NULL; ut8 code = get_ut8 (buffer, &error); @@ -982,7 +934,7 @@ static pyc_object *get_object(RBuffer *buffer, int wanted_type) { if (flag) { pyc_object *noneret = get_none_object (); if (noneret) { - ref_idx = r_list_append (Grefs, noneret); + ref_idx = r_list_append (ctx->refs, noneret); } } if (wanted_type != 0) { @@ -1007,24 +959,24 @@ static pyc_object *get_object(RBuffer *buffer, int wanted_type) { return get_none_object (); case TYPE_REF: free_object (ret); - return get_ref_object (buffer); + return get_ref_object (buffer, ctx); case TYPE_SMALL_TUPLE: - ret = get_small_tuple_object (buffer); + ret = get_small_tuple_object (buffer, ctx); break; case TYPE_TUPLE: - ret = get_tuple_object (buffer); + ret = get_tuple_object (buffer, ctx); break; case TYPE_STRING: ret = get_string_object (buffer); break; case TYPE_CODE_v0: - ret = get_code_object (buffer); + ret = get_code_object (buffer, ctx); if (ret) { ret->type = TYPE_CODE_v0; } break; case TYPE_CODE_v1: - ret = get_code_object (buffer); + ret = get_code_object (buffer, ctx); if (ret) { ret->type = TYPE_CODE_v1; } @@ -1048,10 +1000,10 @@ static pyc_object *get_object(RBuffer *buffer, int wanted_type) { ret = get_int64_object (buffer); break; case TYPE_INTERNED: - ret = get_interned_object (buffer); + ret = get_interned_object (buffer, ctx); break; case TYPE_STRINGREF: - ret = get_stringref_object (buffer); + ret = get_stringref_object (buffer, ctx); break; case TYPE_FLOAT: ret = get_float_object (buffer); @@ -1060,13 +1012,13 @@ static pyc_object *get_object(RBuffer *buffer, int wanted_type) { ret = get_binary_float_object (buffer); break; case TYPE_COMPLEX: - ret = get_complex_object (buffer); // behaviour depends on Python version + ret = get_complex_object (buffer, ctx); // behaviour depends on Python version break; case TYPE_BINARY_COMPLEX: ret = get_binary_complex_object (buffer); break; case TYPE_LIST: - ret = get_list_object (buffer); + ret = get_list_object (buffer, ctx); break; case TYPE_LONG: ret = get_long_object (buffer); @@ -1075,11 +1027,11 @@ static pyc_object *get_object(RBuffer *buffer, int wanted_type) { ret = get_unicode_object (buffer); break; case TYPE_DICT: - ret = get_dict_object (buffer); + ret = get_dict_object (buffer, ctx); break; case TYPE_FROZENSET: case TYPE_SET: - ret = get_set_object (buffer); + ret = get_set_object (buffer, ctx); break; case TYPE_STOPITER: case TYPE_ELLIPSIS: @@ -1108,13 +1060,13 @@ static pyc_object *get_object(RBuffer *buffer, int wanted_type) { if (!ret) { ret = get_none_object (); if (ret) { - r_list_append (Grefs, ret); + r_list_append (ctx->refs, ret); } } return ret; } -static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *symbols, RList *cobjs, char *prefix) { +static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *symbols, RList *cobjs, char *prefix, PycUnmarshalCtx *ctx) { RListIter *i = NULL; // each code object is a section @@ -1136,7 +1088,7 @@ static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *sy section = R_NEW0 (RBinSection); symbol = R_NEW0 (RBinSymbol); prefix = r_str_newf ("%s%s%s", r_str_get (prefix), - prefix? ".": "", (const char *)cobj->name->data); + prefix? ".": "", (const char *)cobj->name->data); if (!prefix || !section || !symbol) { goto fail; } @@ -1158,7 +1110,7 @@ static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *sy symbol->size = cobj->end_offset - cobj->start_offset; symbol->vaddr = cobj->start_offset; symbol->paddr = cobj->start_offset; - symbol->ordinal = Gscount++; + symbol->ordinal = ctx->scount++; if (cobj->consts->type != TYPE_TUPLE && cobj->consts->type != TYPE_SMALL_TUPLE) { goto fail2; } @@ -1166,7 +1118,7 @@ static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *sy goto fail2; } r_list_foreach (((RList *)(cobj->consts->data)), i, obj) { - extract_sections_symbols (obj, sections, symbols, cobjs, prefix); + extract_sections_symbols (obj, sections, symbols, cobjs, prefix, ctx); } free (prefix); return true; @@ -1181,15 +1133,17 @@ static bool extract_sections_symbols(pyc_object *obj, RList *sections, RList *sy return false; } -bool get_sections_symbols_from_code_objects(RBuffer *buffer, RList *sections, RList *symbols, RList *cobjs, ut32 magic) { - Gmagic = magic; - Grefs = r_list_newf (NULL); // (RListFree)free_object); +bool get_sections_symbols_from_code_objects(RBuffer *buffer, RList *sections, RList *symbols, RList *cobjs, PycUnmarshalCtx *ctx) { + if (!ctx) { + return false; + } bool ret = false; - if (Grefs) { - pyc_object *pobj = get_object (buffer, 0); - ret = extract_sections_symbols (pobj, sections, symbols, cobjs, NULL); - r_list_free (Grefs); - Grefs = NULL; + ctx->refs = r_list_newf (NULL); // (RListFree)free_object); + if (ctx->refs) { + pyc_object *pobj = get_object (buffer, 0, ctx); + ret = extract_sections_symbols (pobj, sections, symbols, cobjs, NULL, ctx); + r_list_free (ctx->refs); + ctx->refs = NULL; } return ret; } diff --git a/libr/bin/format/pyc/marshal.h b/libr/bin/format/pyc/marshal.h index aa732c806c1b6..62eae305f30bf 100644 --- a/libr/bin/format/pyc/marshal.h +++ b/libr/bin/format/pyc/marshal.h @@ -1,4 +1,4 @@ -/* radare - LGPL3 - Copyright 2016 - Matthieu (c0riolis) Tardy */ +/* radare - LGPL3 - Copyright 2016-2025 - Matthieu (c0riolis) Tardy */ #ifndef MARSHAL_H #define MARSHAL_H @@ -70,7 +70,15 @@ typedef struct { st64 end_offset; } pyc_code_object; -bool get_sections_symbols_from_code_objects(RBuffer *buffer, RList *sections, RList *symbols, RList *objs, ut32 magic); -ut64 get_code_object_addr(RBuffer *buffer, ut32 magic); +/* Per-parse context to avoid globals */ +typedef struct { + ut32 magic; /* .pyc magic */ + ut32 scount; /* symbol ordinal counter */ + RList *refs; /* ref table for FLAG_REF objects */ + RList *interned_table; /* shared across a file parse */ +} PycUnmarshalCtx; + +bool get_sections_symbols_from_code_objects(RBuffer *buffer, RList *sections, RList *symbols, RList *objs, PycUnmarshalCtx *ctx); +ut64 get_code_object_addr_ctx(RBuffer *buffer, PycUnmarshalCtx *ctx); #endif diff --git a/libr/bin/format/pyc/pyc.c b/libr/bin/format/pyc/pyc.c index a674533da1ca2..6253d34adae65 100644 --- a/libr/bin/format/pyc/pyc.c +++ b/libr/bin/format/pyc/pyc.c @@ -3,8 +3,19 @@ #include "pyc.h" #include "marshal.h" -bool pyc_get_sections_symbols(RList *sections, RList *symbols, RList *cobjs, RBuffer *buf, ut32 magic) { - return get_sections_symbols_from_code_objects (buf, sections, symbols, cobjs, magic); +bool pyc_get_sections_symbols(RList *sections, RList *symbols, RList *cobjs, RBuffer *buf, ut32 magic, RList *interned_table) { + PycUnmarshalCtx ctx = {0}; + ctx.magic = magic; + ctx.scount = 0; + ctx.refs = NULL; + ctx.interned_table = interned_table; + return get_sections_symbols_from_code_objects (buf, sections, symbols, cobjs, &ctx); +} + +ut64 pyc_get_code_object_addr(RBuffer *buf, ut32 magic) { + PycUnmarshalCtx ctx = {0}; + ctx.magic = magic; + return get_code_object_addr_ctx (buf, &ctx); } static inline bool pyc_is_object(ut8 b, pyc_marshal_type type) { diff --git a/libr/bin/format/pyc/pyc.h b/libr/bin/format/pyc/pyc.h index ea69624ae8135..a0cddba6f3618 100644 --- a/libr/bin/format/pyc/pyc.h +++ b/libr/bin/format/pyc/pyc.h @@ -10,7 +10,8 @@ #include "pyc_magic.h" #include "marshal.h" -bool pyc_get_sections_symbols(RList *sections, RList *symbols, RList *mem, RBuffer *buf, ut32 magic); +bool pyc_get_sections_symbols(RList *sections, RList *symbols, RList *mem, RBuffer *buf, ut32 magic, RList *interned_table); +ut64 pyc_get_code_object_addr(RBuffer *buf, ut32 magic); bool pyc_is_code(ut8 b, ut32 magic); #endif diff --git a/libr/bin/format/pyc/pyc_magic.h b/libr/bin/format/pyc/pyc_magic.h index 187c4d0125665..b2696c2991ac6 100644 --- a/libr/bin/format/pyc/pyc_magic.h +++ b/libr/bin/format/pyc/pyc_magic.h @@ -1,9 +1,10 @@ -/* radare - LGPL3 - Copyright 2016-2020 - c0riolis, x0urc3 */ +/* radare - LGPL3 - Copyright 2016-2025 - c0riolis, x0urc3 */ #ifndef PYC_MAGIC_H #define PYC_MAGIC_H #include +#include struct pyc_version { ut32 magic; @@ -11,6 +12,14 @@ struct pyc_version { const char *revision; }; +typedef struct { + ut64 code_start_offset; + struct pyc_version version; + RList *sections_cache; // RList + RList *interned_table; // RList + RList *cobjs; // RList +} RBinPycObj; + struct pyc_version get_pyc_version(ut32 magic); R_IPI int py_version_cmp(const char *va, const char *vb, bool *err); diff --git a/libr/bin/p/bin_pyc.c b/libr/bin/p/bin_pyc.c index 019a24597b2f8..44ab25536efeb 100644 --- a/libr/bin/p/bin_pyc.c +++ b/libr/bin/p/bin_pyc.c @@ -1,37 +1,52 @@ -/* radare - LGPL3 - Copyright 2016-2023 - c0riolis, x0urc3 */ +/* radare - LGPL3 - Copyright 2016-2025 - c0riolis, x0urc3 */ #include #include "../format/pyc/pyc.h" -static R_TH_LOCAL ut64 code_start_offset = 0; -static R_TH_LOCAL struct pyc_version version; -static R_TH_LOCAL RList *sections_cache = NULL; -RList R_TH_LOCAL *interned_table = NULL; // used from marshall.c +#if 0 +typedef struct { + ut64 code_start_offset; + struct pyc_version version; + RList *sections_cache; // RList + RList *interned_table; // RList + RList *cobjs; // RList +} RBinPycObj; +#endif static bool check(RBinFile *bf, RBuffer *b) { if (r_buf_size (b) > 4) { ut32 buf; r_buf_read_at (b, 0, (ut8 *)&buf, sizeof (buf)); - version = get_pyc_version (buf); - return version.magic != -1; + struct pyc_version v = get_pyc_version (buf); + return v.magic != -1; } return false; } static bool load(RBinFile *bf, RBuffer *buf, ut64 loadaddr) { - return check (bf, buf); + if (!check (bf, buf)) { + return false; + } + ut32 m; + r_buf_read_at (buf, 0, (ut8 *)&m, sizeof (m)); + RBinPycObj *obj = R_NEW0 (RBinPycObj); + obj->version = get_pyc_version (m); + bf->bo->bin_obj = obj; + return true; } -static ut64 get_entrypoint(RBuffer *buf) { +static ut64 get_entrypoint(RBuffer *buf, ut32 magic, ut64 *out_code_start_offset) { ut8 b; ut64 result; int addr; for (addr = 0x8; addr <= 0x10; addr += 0x4) { r_buf_read_at (buf, addr, &b, sizeof (b)); - if (pyc_is_code (b, version.magic)) { - code_start_offset = addr; + if (pyc_is_code (b, magic)) { + if (out_code_start_offset) { + *out_code_start_offset = addr; + } r_buf_seek (buf, addr + 1, R_BUF_SET); - if ((result = get_code_object_addr (buf, version.magic)) == 0) { + if ((result = pyc_get_code_object_addr (buf, magic)) == 0) { return addr; } return result; @@ -41,38 +56,34 @@ static ut64 get_entrypoint(RBuffer *buf) { } static RBinInfo *info(RBinFile *arch) { + RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL; RBinInfo *ret = R_NEW0 (RBinInfo); - if (!ret) { - return NULL; - } ret->file = strdup (arch->file); - ret->type = r_str_newf ("Python %s byte-compiled file", version.version); + ret->type = r_str_newf ("Python %s byte-compiled file", obj? obj->version.version: ""); ret->bclass = strdup ("Python byte-compiled file"); ret->rclass = strdup ("pyc"); ret->arch = strdup ("pyc"); - ret->machine = r_str_newf ("Python %s VM (rev %s)", version.version, - version.revision); + ret->machine = r_str_newf ("Python %s VM (rev %s)", obj? obj->version.version: "", + obj? obj->version.revision: ""); ret->os = strdup ("any"); ret->bits = 32; // TODO py_version_cmp (version.version, "3.6") >= 0? 32: 16;???? - ret->cpu = strdup (version.version); // pass version info in cpu, Asm plugin will get it + ret->cpu = strdup (obj? obj->version.version: ""); // pass version info in cpu, Asm plugin will get it return ret; } static RList *sections(RBinFile *arch) { - return sections_cache; + RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL; + return obj? obj->sections_cache: NULL; } static RList *entries(RBinFile *arch) { + RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL; RList *entries = r_list_newf ((RListFree)free); if (!entries) { return NULL; } RBinAddr *addr = R_NEW0 (RBinAddr); - if (!addr) { - r_list_free (entries); - return NULL; - } - ut64 entrypoint = get_entrypoint (arch->buf); + ut64 entrypoint = get_entrypoint (arch->buf, obj? obj->version.magic: 0, obj? &obj->code_start_offset: NULL); addr->paddr = entrypoint; addr->vaddr = entrypoint; r_buf_seek (arch->buf, entrypoint, R_IO_SEEK_SET); @@ -85,44 +96,57 @@ static ut64 baddr(RBinFile *bf) { } static RList *symbols(RBinFile *arch) { - RList *shared = r_list_newf ((RListFree)r_list_free); - if (!shared) { + RBinPycObj *obj = arch && arch->bo ? (RBinPycObj *)arch->bo->bin_obj : NULL; + if (!obj) { return NULL; } - RList *cobjs = r_list_newf ((RListFree)free); - if (!cobjs) { - r_list_free (shared); - return NULL; + if (!obj->cobjs) { + obj->cobjs = r_list_newf ((RListFree)free); + if (!obj->cobjs) { + return NULL; + } } - interned_table = r_list_newf ((RListFree)free); - if (!interned_table) { - r_list_free (shared); - r_list_free (cobjs); - return NULL; + if (!obj->interned_table) { + obj->interned_table = r_list_newf ((RListFree)free); + if (!obj->interned_table) { + return NULL; + } } - r_list_append (shared, cobjs); - r_list_append (shared, interned_table); - arch->bo->bin_obj = shared; - RList *sections = r_list_newf (NULL); // (RListFree)free); + RList *sections = r_list_newf (NULL); // keep old behavior; free on destroy if needed if (!sections) { - r_list_free (shared); - arch->bo->bin_obj = NULL; return NULL; } RList *symbols = r_list_newf ((RListFree)free); if (!symbols) { - r_list_free (shared); - arch->bo->bin_obj = NULL; r_list_free (sections); return NULL; } RBuffer *buffer = arch->buf; - r_buf_seek (buffer, code_start_offset, R_BUF_SET); - pyc_get_sections_symbols (sections, symbols, cobjs, buffer, version.magic); - sections_cache = sections; + if (!obj->code_start_offset) { + // ensure code_start_offset is initialized + (void) get_entrypoint (buffer, obj->version.magic, &obj->code_start_offset); + } + r_buf_seek (buffer, obj->code_start_offset, R_BUF_SET); + pyc_get_sections_symbols (sections, symbols, obj->cobjs, buffer, obj->version.magic, obj->interned_table); + obj->sections_cache = sections; return symbols; } +static void destroy(RBinFile *bf) { + if (!bf || !bf->bo) { + return; + } + RBinPycObj *obj = (RBinPycObj *)bf->bo->bin_obj; + if (!obj) { + return; + } + r_list_free (obj->interned_table); + r_list_free (obj->cobjs); + // Causes Double free : r_list_free (obj->sections_cache); + free (obj); + bf->bo->bin_obj = NULL; +} + RBinPlugin r_bin_plugin_pyc = { .meta = { .name = "pyc", @@ -137,6 +161,7 @@ RBinPlugin r_bin_plugin_pyc = { .sections = §ions, .baddr = &baddr, .symbols = &symbols, + .destroy = &destroy, }; #ifndef R2_PLUGIN_INCORE