Skip to content

Commit dc0649a

Browse files
committed
Adding helper tools for binutils testing
1 parent 1756895 commit dc0649a

File tree

2 files changed

+311
-0
lines changed

2 files changed

+311
-0
lines changed

asm-tester.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2025 David Guillen Fandos <[email protected]>
4+
5+
# Testing helper for binutils/as
6+
#
7+
# This script can compare the output of two "as" builds (for PSP's allegrex)
8+
# It generates text input asm files and runs them through as. It then uses
9+
# objcopy on the output file to generate a raw binary file and compares them.
10+
#
11+
# It also checks assembly errors. In general it only generates valid instruction
12+
# inputs.
13+
14+
import argparse, re, subprocess, struct, uuid, os, instparse, reginfo, multiprocessing, itertools
15+
from tqdm import tqdm
16+
17+
parser = argparse.ArgumentParser(prog='asm-tester')
18+
parser.add_argument('--reference', dest='reference', required=True, help='Path (or executable within PATH) to invoke reference `as`')
19+
parser.add_argument('--undertest', dest='undertest', required=True, help='Path (or executable within PATH) to invoke for `as`')
20+
parser.add_argument('--objcopy', dest='objcopy', required=True, help='Path (or executable within PATH) to invoke for `objcopy`')
21+
parser.add_argument('--chunksize', dest='chunksize', type=int, default=128*1024, help='Block size (instruction count)')
22+
parser.add_argument('--instr', dest='instregex', default=".*", help='Instructions to emit (a regular expression)')
23+
parser.add_argument('--threads', dest='nthreads', type=int, default=8, help='Number of threads to use')
24+
25+
args = parser.parse_args()
26+
27+
def tmpfile(itnum=0, name="as"):
28+
return "/tmp/%s-test-%d-%s" % (name, itnum, str(uuid.uuid4()))
29+
30+
def run_sidebyside(asmfile):
31+
# Process asm files and generate two object files. Then proceed to dump them.
32+
objf1 = tmpfile(name="obj")
33+
objf2 = tmpfile(name="obj")
34+
35+
p1 = subprocess.run([args.reference, "-march=allegrex", "-o", objf1, asmfile],
36+
stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
37+
p2 = subprocess.run([args.undertest, "-march=allegrex", "-o", objf2, asmfile],
38+
stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
39+
40+
exit_code1 = p1.returncode
41+
exit_code2 = p2.returncode
42+
if exit_code1 != 0 or exit_code2 != 0:
43+
return (False, exit_code1, exit_code2)
44+
45+
rawf1 = tmpfile(name="bin")
46+
rawf2 = tmpfile(name="bin")
47+
48+
p1 = subprocess.run([args.objcopy, '-O', 'binary', '--only-section=.text', objf1, rawf1],
49+
stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
50+
p2 = subprocess.run([args.objcopy, '-O', 'binary', '--only-section=.text', objf2, rawf2],
51+
stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
52+
53+
assert p1.returncode == 0 and p2.returncode == 0
54+
55+
os.unlink(objf1)
56+
os.unlink(objf2)
57+
58+
# Compare both files
59+
same = open(rawf1, "rb").read() == open(rawf1, "rb").read()
60+
os.unlink(rawf1)
61+
os.unlink(rawf2)
62+
63+
return (same, None, None)
64+
65+
def dict_product(indict):
66+
return (dict(zip(indict.keys(), values)) for values in itertools.product(*indict.values()))
67+
68+
def expsize(regtype, lanes):
69+
modif = 1
70+
if ":" in regtype:
71+
regtype, modif = regtype.split(":")
72+
modif = {"D": 2, "Q": 4, "H": 0.5, "T": 0.25}[modif]
73+
return int(modif*lanes)
74+
75+
# Given a dict of regs and types, returns all the permutations of register names for the
76+
# instruction. Returns a reg map (from reg name to subregs) as well.
77+
def gencombs(instname, variables, elemcnt):
78+
# FIXME: Some weird case with load/store insts:
79+
if not elemcnt and instname.endswith(".q"):
80+
elemcnt = 4
81+
82+
combos, subreginfo = {}, {}
83+
for v, vtype in variables.items():
84+
if vtype == "gpr":
85+
combos[v] = ["$%d" % i for i in range(32)]
86+
else:
87+
nume = expsize(vtype, elemcnt)
88+
regtype = vtype.split(":")[0]
89+
combos[v] = []
90+
for regnum, subregs in reginfo.genvect(regtype, nume):
91+
regname = reginfo.regpname(regtype, nume, regnum)
92+
combos[v].append(regname)
93+
subreginfo[regname] = subregs
94+
return (dict_product(combos), subreginfo)
95+
96+
# Given a list of immediates generate all their possible values and combinations
97+
def genimms(imms):
98+
combos = {}
99+
for v, iinfo in imms.items():
100+
combos[v] = []
101+
if iinfo.get("type", None) == "enum":
102+
combos[v] = iinfo["enum"]
103+
else:
104+
for val in range(iinfo["minval"], iinfo["maxval"] + 1):
105+
combos[v].append(str(val))
106+
return dict_product(combos)
107+
108+
# Checks whether a reg combination is legal according to the reg overlap restrictions
109+
def check_overlap(iobj, regcomb, subreginfo):
110+
if iobj.register_compat() == "no-overlap":
111+
for oreg in iobj.outputs():
112+
for ireg in iobj.inputs():
113+
subregso = subreginfo[regcomb[oreg]]
114+
subregsi = subreginfo[regcomb[ireg]]
115+
if set(subregso) & set(subregsi):
116+
return False # Found common registers
117+
elif iobj.register_compat() == "partial-overlap":
118+
for oreg in iobj.outputs():
119+
for ireg in iobj.inputs():
120+
subregso = subreginfo[regcomb[oreg]]
121+
subregsi = subreginfo[regcomb[ireg]]
122+
if set(subregso) & set(subregsi):
123+
if subregso != subregsi:
124+
return False # Found common non-identical registers
125+
return True
126+
127+
# Generate instruction patterns, along with their "empty" bit count.
128+
allinsts = []
129+
for instname, iobj in instparse.insts.items():
130+
if re.match(args.instregex, instname):
131+
allinsts.append((instname, iobj))
132+
133+
# Aggregate all bits toghether to get a number of instructions to generate
134+
print("Testing %d different instructions!" % len(allinsts))
135+
136+
def process_block(instname, iobj):
137+
if any(k for k, v in iobj.inputs().items() if v.split(":")[0] not in ["single", "vector", "matrix", "vfpucc", "gpr"]):
138+
# TODO Support other reg types!
139+
print("Instruction", instname, "has some unsupported inputs", iobj.raw_syntax())
140+
return (True, instname, 0)
141+
142+
if any(k for k, v in iobj.outputs().items() if v.split(":")[0] not in ["single", "vector", "matrix", "vfpucc", "gpr"]):
143+
# TODO Support other reg types!
144+
print("Instruction", instname, "has some unsupported outputs", iobj.raw_syntax())
145+
return (True, instname, 0)
146+
147+
regs = iobj.inputs() | iobj.outputs()
148+
# No need to allocate CC registers :D
149+
regs = {k:v for k, v in regs.items() if v != "vfpucc"}
150+
151+
asmfile = tmpfile()
152+
153+
# Generate al possible bit fields
154+
numinsts = 0
155+
with open(asmfile, "w") as fd:
156+
regit, subreginfo = gencombs(instname, regs, iobj.numelems())
157+
for varcomb in regit:
158+
# Validate that this combination of registers is even valid
159+
if not check_overlap(iobj, varcomb, subreginfo):
160+
continue
161+
162+
# Fake one immediate if there are none. Something nicer would be better tho.
163+
imms = iobj.immediates() or {'dummyimm': {'type': 'interger', 'minval': 0, 'maxval': 0}}
164+
165+
for immcomb in genimms(imms):
166+
istr = iobj.raw_syntax()
167+
for vname, vval in varcomb.items():
168+
istr = istr.replace("%" + vname, vval)
169+
for iname, ival in immcomb.items():
170+
istr = istr.replace("%" + iname, ival)
171+
fd.write(istr + "\n")
172+
numinsts += 1
173+
174+
# Run the disassemblers now!
175+
success, ec1, ec2 = run_sidebyside(asmfile)
176+
if not success:
177+
return (False, instname, ec1, ec2, asmfile)
178+
179+
#os.unlink(asmfile)
180+
return (True, instname, numinsts)
181+
182+
res = []
183+
finfo = []
184+
with multiprocessing.Pool(processes=args.nthreads) as executor:
185+
for instname, iobj in allinsts:
186+
r = executor.apply_async(process_block, (instname, iobj))
187+
res.append(r)
188+
189+
executor.close()
190+
191+
totalinsts = 0
192+
for r in tqdm(res):
193+
succ, *info = r.get()
194+
if succ is False:
195+
print(info)
196+
else:
197+
totalinsts += info[1]
198+
finfo.append("%s : %d instructions" % (info[0], info[1]))
199+
200+
print("\n".join(finfo))
201+
print("--------------")
202+
print("Tested a total of %d instructions" % totalinsts)
203+
204+

disasm-tester.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2025 David Guillen Fandos <[email protected]>
4+
5+
# Testing helper for binutils/objdump
6+
#
7+
# This script can compare the output of two objdump builds (for PSP's allegrex)
8+
# It generates binary blobs and runs them through objdump (binary mode) and
9+
# compares their output.
10+
11+
import argparse, re, subprocess, struct, uuid, os, instparse, multiprocessing
12+
from tqdm import tqdm
13+
14+
parser = argparse.ArgumentParser(prog='disasm-tester')
15+
parser.add_argument('--reference', dest='reference', required=True, help='Path (or executable within PATH) to invoke reference `objdump`')
16+
parser.add_argument('--undertest', dest='undertest', required=True, help='Path (or executable within PATH) to invoke for `objdump`')
17+
parser.add_argument('--chunksize', dest='chunksize', type=int, default=128*1024, help='Block size (instruction count)')
18+
parser.add_argument('--instr', dest='instregex', default=".*", help='Instructions to emit (a regular expression)')
19+
parser.add_argument('--threads', dest='nthreads', type=int, default=8, help='Number of threads to use')
20+
21+
args = parser.parse_args()
22+
23+
def tmpfile(itnum=0):
24+
return "/tmp/objdump-test-%d-%s" % (itnum, str(uuid.uuid4()))
25+
26+
def run_sidebyside(binfile):
27+
p1 = subprocess.Popen([args.reference, "-D", "-b", "binary", "-m", "mips:allegrex", binfile],
28+
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
29+
p2 = subprocess.Popen([args.undertest, "-D", "-b", "binary", "-m", "mips:allegrex", binfile],
30+
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
31+
32+
outp1 = p1.communicate()
33+
outp2 = p2.communicate()
34+
p1.wait()
35+
p2.wait()
36+
exit_code1 = p1.poll()
37+
exit_code2 = p2.poll()
38+
39+
success = (exit_code1 == 0 and exit_code2 == 0 and outp1 == outp2)
40+
return (success, exit_code1, exit_code2, outp1, outp2)
41+
42+
def itchunk(num, chunksize):
43+
i = 0
44+
while i < num:
45+
yield i, min(num, i+chunksize)
46+
i += chunksize
47+
48+
# Generate instruction patterns, along with their "empty" bit count.
49+
allinsts = []
50+
for instname, iobj in instparse.insts.items():
51+
if re.match(args.instregex, instname):
52+
# Get all empty fields
53+
fds = iobj.encoding().fields()
54+
fds = sorted((k, v["lsb"], v["size"])
55+
for k, v in fds.items() if v["value"] is None)
56+
nbits = sum(x[2] for x in fds)
57+
58+
allinsts.append((instname, iobj, fds, nbits))
59+
60+
# Aggregate all bits toghether to get a number of instructions to generate
61+
total_insts = sum(1 << x[3] for x in allinsts)
62+
print("Testing %dM instructions!" % (total_insts // 1000000))
63+
64+
# Generate a list of chunks to process, to divide the work.
65+
work = []
66+
for instname, iobj, fds, nbits in allinsts:
67+
for start, stop in itchunk(1 << nbits, args.chunksize):
68+
work.append((instname, iobj, fds, start, stop))
69+
70+
def process_block(instname, iobj, fds, start, stop):
71+
binfile = tmpfile()
72+
73+
# Base word to fill
74+
baseword = iobj.encoding().baseword()
75+
76+
# Generate al possible bit fields
77+
with open(binfile, "wb") as fd:
78+
for n in range(start, stop):
79+
w, offset = baseword, 0
80+
for fld, lsb, size in fds:
81+
w |= ((n >> offset) & ((1 << size) - 1)) << lsb
82+
offset += size
83+
fd.write(struct.pack("<I", w))
84+
85+
# Run the disassemblers now!
86+
success, ec1, ec2, out1, out2 = run_sidebyside(binfile)
87+
if not success:
88+
return (instname, ec1, ec2, binfile)
89+
os.exit(1)
90+
91+
os.unlink(binfile)
92+
return None
93+
94+
res = []
95+
with multiprocessing.Pool(processes=args.nthreads) as executor:
96+
for instname, iobj, fds, start, stop in work:
97+
r = executor.apply_async(process_block, (instname, iobj, fds, start, stop))
98+
res.append(r)
99+
100+
executor.close()
101+
102+
for r in tqdm(res):
103+
v = r.get()
104+
if v is not None:
105+
print(v)
106+
107+

0 commit comments

Comments
 (0)