Adding helper tools for binutils testing

davidgfnet · davidgfnet · commit dc0649a55c32 · 2025-05-26T01:24:28.000+02:00
diff --git a/asm-tester.py b/asm-tester.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+
+# Copyright 2025 David Guillen Fandos <david@davidgf.net>
+
+# Testing helper for binutils/as
+#
+# This script can compare the output of two "as" builds (for PSP's allegrex)
+# It generates text input asm files and runs them through as. It then uses
+# objcopy on the output file to generate a raw binary file and compares them.
+#
+# It also checks assembly errors. In general it only generates valid instruction
+# inputs.
+
+import argparse, re, subprocess, struct, uuid, os, instparse, reginfo, multiprocessing, itertools
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser(prog='asm-tester')
+parser.add_argument('--reference', dest='reference', required=True, help='Path (or executable within PATH) to invoke reference `as`')
+parser.add_argument('--undertest', dest='undertest', required=True, help='Path (or executable within PATH) to invoke for `as`')
+parser.add_argument('--objcopy', dest='objcopy', required=True, help='Path (or executable within PATH) to invoke for `objcopy`')
+parser.add_argument('--chunksize', dest='chunksize', type=int, default=128*1024, help='Block size (instruction count)')
+parser.add_argument('--instr', dest='instregex', default=".*", help='Instructions to emit (a regular expression)')
+parser.add_argument('--threads', dest='nthreads', type=int, default=8, help='Number of threads to use')
+
+args = parser.parse_args()
+
+def tmpfile(itnum=0, name="as"):
+  return "/tmp/%s-test-%d-%s" % (name, itnum, str(uuid.uuid4()))
+
+def run_sidebyside(asmfile):
+  # Process asm files and generate two object files. Then proceed to dump them.
+  objf1 = tmpfile(name="obj")
+  objf2 = tmpfile(name="obj")
+
+  p1 = subprocess.run([args.reference, "-march=allegrex", "-o", objf1, asmfile],
+    stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+  p2 = subprocess.run([args.undertest, "-march=allegrex", "-o", objf2, asmfile],
+    stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+  exit_code1 = p1.returncode
+  exit_code2 = p2.returncode
+  if exit_code1 != 0 or exit_code2 != 0:
+    return (False, exit_code1, exit_code2)
+
+  rawf1 = tmpfile(name="bin")
+  rawf2 = tmpfile(name="bin")
+
+  p1 = subprocess.run([args.objcopy, '-O', 'binary', '--only-section=.text', objf1, rawf1],
+    stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+  p2 = subprocess.run([args.objcopy, '-O', 'binary', '--only-section=.text', objf2, rawf2],
+    stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+  assert p1.returncode == 0 and p2.returncode == 0
+
+  os.unlink(objf1)
+  os.unlink(objf2)
+
+  # Compare both files
+  same = open(rawf1, "rb").read() == open(rawf1, "rb").read()
+  os.unlink(rawf1)
+  os.unlink(rawf2)
+
+  return (same, None, None)
+
+def dict_product(indict):
+  return (dict(zip(indict.keys(), values)) for values in itertools.product(*indict.values()))
+
+def expsize(regtype, lanes):
+  modif = 1
+  if ":" in regtype:
+    regtype, modif = regtype.split(":")
+    modif = {"D": 2, "Q": 4, "H": 0.5, "T": 0.25}[modif]
+  return int(modif*lanes)
+
+# Given a dict of regs and types, returns all the permutations of register names for the
+# instruction. Returns a reg map (from reg name to subregs) as well.
+def gencombs(instname, variables, elemcnt):
+  # FIXME: Some weird case with load/store insts:
+  if not elemcnt and instname.endswith(".q"):
+    elemcnt = 4
+
+  combos, subreginfo = {}, {}
+  for v, vtype in variables.items():
+    if vtype == "gpr":
+      combos[v] = ["$%d" % i for i in range(32)]
+    else:
+      nume = expsize(vtype, elemcnt)
+      regtype = vtype.split(":")[0]
+      combos[v] = []
+      for regnum, subregs in reginfo.genvect(regtype, nume):
+        regname = reginfo.regpname(regtype, nume, regnum)
+        combos[v].append(regname)
+        subreginfo[regname] = subregs
+  return (dict_product(combos), subreginfo)
+
+# Given a list of immediates generate all their possible values and combinations
+def genimms(imms):
+  combos = {}
+  for v, iinfo in imms.items():
+    combos[v] = []
+    if iinfo.get("type", None) == "enum":
+      combos[v] = iinfo["enum"]
+    else:
+      for val in range(iinfo["minval"], iinfo["maxval"] + 1):
+        combos[v].append(str(val))
+  return dict_product(combos)
+
+# Checks whether a reg combination is legal according to the reg overlap restrictions
+def check_overlap(iobj, regcomb, subreginfo):
+  if iobj.register_compat() == "no-overlap":
+    for oreg in iobj.outputs():
+      for ireg in iobj.inputs():
+        subregso = subreginfo[regcomb[oreg]]
+        subregsi = subreginfo[regcomb[ireg]]
+        if set(subregso) & set(subregsi):
+          return False       # Found common registers
+  elif iobj.register_compat() == "partial-overlap":
+    for oreg in iobj.outputs():
+      for ireg in iobj.inputs():
+        subregso = subreginfo[regcomb[oreg]]
+        subregsi = subreginfo[regcomb[ireg]]
+        if set(subregso) & set(subregsi):
+          if subregso != subregsi:
+            return False       # Found common non-identical registers
+  return True
+
+# Generate instruction patterns, along with their "empty" bit count.
+allinsts = []
+for instname, iobj in instparse.insts.items():
+  if re.match(args.instregex, instname):
+    allinsts.append((instname, iobj))
+
+# Aggregate all bits toghether to get a number of instructions to generate
+print("Testing %d different instructions!" % len(allinsts))
+
+def process_block(instname, iobj):
+  if any(k for k, v in iobj.inputs().items() if v.split(":")[0] not in ["single", "vector", "matrix", "vfpucc", "gpr"]):
+    # TODO Support other reg types!
+    print("Instruction", instname, "has some unsupported inputs", iobj.raw_syntax())
+    return (True, instname, 0)
+
+  if any(k for k, v in iobj.outputs().items() if v.split(":")[0] not in ["single", "vector", "matrix", "vfpucc", "gpr"]):
+    # TODO Support other reg types!
+    print("Instruction", instname, "has some unsupported outputs", iobj.raw_syntax())
+    return (True, instname, 0)
+
+  regs = iobj.inputs() | iobj.outputs()
+  # No need to allocate CC registers :D
+  regs = {k:v for k, v in regs.items() if v != "vfpucc"}
+
+  asmfile = tmpfile()
+
+  # Generate al possible bit fields
+  numinsts = 0
+  with open(asmfile, "w") as fd:
+    regit, subreginfo = gencombs(instname, regs, iobj.numelems())
+    for varcomb in regit:
+      # Validate that this combination of registers is even valid
+      if not check_overlap(iobj, varcomb, subreginfo):
+        continue
+
+      # Fake one immediate if there are none. Something nicer would be better tho.
+      imms = iobj.immediates() or {'dummyimm': {'type': 'interger', 'minval': 0, 'maxval': 0}}
+
+      for immcomb in genimms(imms):
+        istr = iobj.raw_syntax()
+        for vname, vval in varcomb.items():
+          istr = istr.replace("%" + vname, vval)
+        for iname, ival in immcomb.items():
+          istr = istr.replace("%" + iname, ival)
+        fd.write(istr + "\n")
+        numinsts += 1
+
+  # Run the disassemblers now!
+  success, ec1, ec2 = run_sidebyside(asmfile)
+  if not success:
+    return (False, instname, ec1, ec2, asmfile)
+
+  #os.unlink(asmfile)
+  return (True, instname, numinsts)
+
+res = []
+finfo = []
+with multiprocessing.Pool(processes=args.nthreads) as executor:
+  for instname, iobj in allinsts:
+    r = executor.apply_async(process_block, (instname, iobj))
+    res.append(r)
+
+  executor.close()
+
+  totalinsts = 0
+  for r in tqdm(res):
+    succ, *info = r.get()
+    if succ is False:
+      print(info)
+    else:
+      totalinsts += info[1]
+      finfo.append("%s : %d instructions" % (info[0], info[1]))
+
+print("\n".join(finfo))
+print("--------------")
+print("Tested a total of %d instructions" % totalinsts)
+
+
diff --git a/disasm-tester.py b/disasm-tester.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+
+# Copyright 2025 David Guillen Fandos <david@davidgf.net>
+
+# Testing helper for binutils/objdump
+#
+# This script can compare the output of two objdump builds (for PSP's allegrex)
+# It generates binary blobs and runs them through objdump (binary mode) and
+# compares their output.
+
+import argparse, re, subprocess, struct, uuid, os, instparse, multiprocessing
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser(prog='disasm-tester')
+parser.add_argument('--reference', dest='reference', required=True, help='Path (or executable within PATH) to invoke reference `objdump`')
+parser.add_argument('--undertest', dest='undertest', required=True, help='Path (or executable within PATH) to invoke for `objdump`')
+parser.add_argument('--chunksize', dest='chunksize', type=int, default=128*1024, help='Block size (instruction count)')
+parser.add_argument('--instr', dest='instregex', default=".*", help='Instructions to emit (a regular expression)')
+parser.add_argument('--threads', dest='nthreads', type=int, default=8, help='Number of threads to use')
+
+args = parser.parse_args()
+
+def tmpfile(itnum=0):
+  return "/tmp/objdump-test-%d-%s" % (itnum, str(uuid.uuid4()))
+
+def run_sidebyside(binfile):
+  p1 = subprocess.Popen([args.reference, "-D", "-b", "binary", "-m", "mips:allegrex", binfile],
+    stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  p2 = subprocess.Popen([args.undertest, "-D", "-b", "binary", "-m", "mips:allegrex", binfile],
+    stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+  outp1 = p1.communicate()
+  outp2 = p2.communicate()
+  p1.wait()
+  p2.wait()
+  exit_code1 = p1.poll()
+  exit_code2 = p2.poll()
+
+  success = (exit_code1 == 0 and exit_code2 == 0 and outp1 == outp2)
+  return (success, exit_code1, exit_code2, outp1, outp2)
+
+def itchunk(num, chunksize):
+  i = 0
+  while i < num:
+    yield i, min(num, i+chunksize)
+    i += chunksize
+
+# Generate instruction patterns, along with their "empty" bit count.
+allinsts = []
+for instname, iobj in instparse.insts.items():
+  if re.match(args.instregex, instname):
+    # Get all empty fields
+    fds = iobj.encoding().fields()
+    fds = sorted((k, v["lsb"], v["size"])
+                 for k, v in fds.items() if v["value"] is None)
+    nbits = sum(x[2] for x in fds)
+
+    allinsts.append((instname, iobj, fds, nbits))
+
+# Aggregate all bits toghether to get a number of instructions to generate
+total_insts = sum(1 << x[3] for x in allinsts)
+print("Testing %dM instructions!" % (total_insts // 1000000))
+
+# Generate a list of chunks to process, to divide the work.
+work = []
+for instname, iobj, fds, nbits in allinsts:
+  for start, stop in itchunk(1 << nbits, args.chunksize):
+    work.append((instname, iobj, fds, start, stop))
+
+def process_block(instname, iobj, fds, start, stop):
+  binfile = tmpfile()
+
+  # Base word to fill
+  baseword = iobj.encoding().baseword()
+
+  # Generate al possible bit fields
+  with open(binfile, "wb") as fd:
+    for n in range(start, stop):
+      w, offset = baseword, 0
+      for fld, lsb, size in fds:
+        w |= ((n >> offset) & ((1 << size) - 1)) << lsb
+        offset += size
+      fd.write(struct.pack("<I", w))
+
+  # Run the disassemblers now!
+  success, ec1, ec2, out1, out2 = run_sidebyside(binfile)
+  if not success:
+    return (instname, ec1, ec2, binfile)
+    os.exit(1)
+
+  os.unlink(binfile)
+  return None
+
+res = []
+with multiprocessing.Pool(processes=args.nthreads) as executor:
+  for instname, iobj, fds, start, stop in work:
+    r = executor.apply_async(process_block, (instname, iobj, fds, start, stop))
+    res.append(r)
+
+  executor.close()
+
+  for r in tqdm(res):
+    v = r.get()
+    if v is not None:
+      print(v)
+
+