From d703b829b2f13d6a6d7b7bdd6271ec3fecdae4dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 16:04:27 +0100 Subject: [PATCH 01/11] gas_diff_stats.py: Add basic unit tests --- .circleci/config.yml | 6 +- test/scripts/test_gas_diff_stats.py | 87 +++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 test/scripts/test_gas_diff_stats.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 72d96fe4f..e14a4ec78 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -973,6 +973,10 @@ jobs: <<: *base_ubuntu2204_small steps: - checkout + - run: + # TODO: Add these to the base image + name: Install gas_diff_stats.py dependencies + command: python3 -m pip install --user parsec tabulate - run: name: Python unit tests command: python3 test/pyscriptTests.py @@ -985,7 +989,7 @@ jobs: - checkout - run: name: Install dependencies - command: python -m pip install --user requests + command: python -m pip install --user requests parsec tabulate - run: name: Python unit tests command: python.exe test/pyscriptTests.py diff --git a/test/scripts/test_gas_diff_stats.py b/test/scripts/test_gas_diff_stats.py new file mode 100644 index 000000000..4980293ae --- /dev/null +++ b/test/scripts/test_gas_diff_stats.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +import unittest +from textwrap import dedent + +# NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports +# pragma pylint: disable=import-error +from gas_diff_stats import collect_statistics +# pragma pylint: enable=import-error + +class TestGasDiffStats(unittest.TestCase): + def test_collect_statistics_should_fail_on_empty_diff(self): + with self.assertRaises(RuntimeError): + self.assertEqual(collect_statistics(""), (0, 0, 0, 0, 0, 0)) + + def test_collect_statistics_should_accept_whitespace_only_diff(self): + # TODO: Should it really work this way? + # If we're rejecting empty diff, not sure why whitespace is accepted. + self.assertEqual(collect_statistics("\n"), (0, 0, 0, 0, 0, 0)) + self.assertEqual(collect_statistics("\n \n\t\n\n"), (0, 0, 0, 0, 0, 0)) + + def test_collect_statistics_should_report_sum_of_gas_costs(self): + diff_output = dedent(""" + diff --git a/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol b/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol + index 1306529d4..77a330f3c 100644 + --- a/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol + +++ b/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol + @@ -38 +38,2 @@ contract D { + -// gas legacy: 102095 + +// gas legacy: 76495 + @@ -40,3 +41,6 @@ contract D { + -// gas irOptimized: 98438588 + -// gas legacy: 98438774 + -// gas legacyOptimized: 98438580 + +// gas irOptimized: 25388 + +// gas legacy: 98413174 + +// gas legacyOptimized: 25380 + @@ -44,3 +48,6 @@ contract D { + -// gas irOptimized: 98438589 + -// gas legacy: 98438774 + -// gas legacyOptimized: 98438580 + +// gas irOptimized: 25389 + +// gas legacy: 98413174 + +// gas legacyOptimized: 25380 + """).splitlines() + + self.assertEqual(collect_statistics(diff_output), ( + 98438588 + 98438589, # -irOptimized + 98438580 + 98438580, # -legacyOptimized + 102095 + 98438774 + 98438774, # -legacy + 25388 + 25389, # +irOptimized + 25380 + 25380, # +legacyOptimized + 76495 + 98413174 + 98413174, # +legacy + )) + + def test_collect_statistics_should_ignore_ir_costs(self): + diff_output = dedent(""" + -// gas legacy: 1 + -// gas ir: 2 + +// gas legacy: 3 + +// gas ir: 4 + """).splitlines() + + self.assertEqual(collect_statistics(diff_output), ( + 0, # -irOptimized + 0, # -legacyOptimized + 1, # -legacy + 0, # +irOptimized + 0, # +legacyOptimized + 3, # +legacy + )) + + def test_collect_statistics_should_ignore_unchanged_costs(self): + diff_output = dedent(""" + -// gas legacy: 1 + // gas legacyOptimized: 2 + +// gas legacy: 3 + """).splitlines() + + self.assertEqual(collect_statistics(diff_output), ( + 0, # -irOptimized + 0, # -legacyOptimized + 1, # -legacy + 0, # +irOptimized + 0, # +legacyOptimized + 3, # +legacy + )) From c6e23311cca599f25d1df87dcae8d2ce1f758793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 13:29:28 +0100 Subject: [PATCH 02/11] gas_diff_stats.py: Add a Python shebang - The script is executable but executes as a Bash script and fails. --- scripts/gas_diff_stats.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 234d42df0..90a153cd3 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """A script to collect gas statistics and print it. Useful to summarize gas differences to semantic tests for a PR / branch. From e28a1d436a15e698b933f2a6cc5b160f2bcb3616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 13:31:42 +0100 Subject: [PATCH 03/11] gas_diff_stats.py: Print errors to stderr, not stdout --- scripts/gas_diff_stats.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 90a153cd3..b220485c0 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -18,7 +18,9 @@ repository. The changes are compared against ``origin/develop``. """ + import subprocess +import sys from pathlib import Path from enum import Enum from parsec import generate, ParseError, regex, string @@ -110,8 +112,8 @@ def try_parse_git_diff(fname): if diff_output: return collect_statistics(diff_output) except subprocess.CalledProcessError as e: - print("Error in the git diff:") - print(e.output) + print("Error in the git diff:", file=sys.stderr) + print(e.output, file=sys.stderr) return None def stat(old, new): return ((new - old) / old) * 100 if old else 0 From 5518a3074f0804e7d075122d27733b71812549c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 13:35:50 +0100 Subject: [PATCH 04/11] gas_diff_stats.py: Don't let shell evaluate file names and support names with spaces --- scripts/gas_diff_stats.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index b220485c0..6f8087d48 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -105,8 +105,7 @@ def semantictest_statistics(): def try_parse_git_diff(fname): try: diff_output = subprocess.check_output( - "git diff --unified=0 origin/develop HEAD " + fname, - shell=True, + ["git", "diff", "--unified=0", "origin/develop", "HEAD", fname], universal_newlines=True ).splitlines() if diff_output: From 39f3e7673f84f874a33515f383efca029b2abc49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 13:36:26 +0100 Subject: [PATCH 05/11] gas_diff_stats.py: Fail when the semantic test dir does not exist --- scripts/gas_diff_stats.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 6f8087d48..922aa7de4 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -35,6 +35,8 @@ class Diff(Enum): Minus = 1 Plus = 2 +SEMANTIC_TEST_DIR = Path("test/libsolidity/semanticTests/") + minus = string("-").result(Diff.Minus) plus = string("+").result(Diff.Plus) @@ -119,7 +121,10 @@ def stat(old, new): table = [] - for path in Path("test/libsolidity/semanticTests").rglob("*.sol"): + if not SEMANTIC_TEST_DIR.is_dir(): + sys.exit(f"Semantic tests not found. '{SEMANTIC_TEST_DIR.absolute()}' is missing or not a directory.") + + for path in SEMANTIC_TEST_DIR.rglob("*.sol"): fname = path.as_posix() parsed = try_parse_git_diff(fname) if parsed is None: From 855096b84b71aa5f5dd949f83e2dc90831f1e3f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 15:17:41 +0100 Subject: [PATCH 06/11] gas_diff_stats.py: Skip non-gas lines to avoid having to silence parsing errors --- scripts/gas_diff_stats.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 922aa7de4..5d95d5a19 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -89,7 +89,12 @@ def try_parse(line): pass return None - out = [parsed for line in lines if (parsed := try_parse(line)) is not None] + out = [ + parsed + for line in lines + if line.startswith('+// gas ') or line.startswith('-// gas ') + if (parsed := try_parse(line)) is not None + ] diff_kinds = [Diff.Minus, Diff.Plus] codegen_kinds = [Kind.IrOptimized, Kind.LegacyOptimized, Kind.Legacy] return tuple( From b9c7b69c76ed73c57aef3681d6f29dd155d1607e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 15:57:36 +0100 Subject: [PATCH 07/11] gas_diff_stats.py: Explicitly ignore ir gas instead of failing to parse it --- scripts/gas_diff_stats.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 5d95d5a19..518d8c2c7 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -27,9 +27,10 @@ from tabulate import tabulate class Kind(Enum): - IrOptimized = 1 - Legacy = 2 - LegacyOptimized = 3 + Ir = 1 + IrOptimized = 2 + Legacy = 3 + LegacyOptimized = 4 class Diff(Enum): Minus = 1 @@ -44,6 +45,7 @@ class Diff(Enum): comment = string("//") colon = string(":") +gas_ir = string("gas ir").result(Kind.Ir) gas_ir_optimized = string("gas irOptimized").result(Kind.IrOptimized) gas_legacy_optimized = string("gas legacyOptimized").result(Kind.LegacyOptimized) gas_legacy = string("gas legacy").result(Kind.Legacy) @@ -64,7 +66,7 @@ def diff_string() -> (Kind, Diff, int): diff_kind = yield minus | plus yield comment yield space - codegen_kind = yield gas_ir_optimized ^ gas_legacy_optimized ^ gas_legacy + codegen_kind = yield gas_ir_optimized ^ gas_ir ^ gas_legacy_optimized ^ gas_legacy yield colon yield space val = yield number() From 65031d3b3e128cd30283afcec254d2326030069d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 15:22:17 +0100 Subject: [PATCH 08/11] gas_diff_stats.py: Handle errors instead of ignoring them - It's possible now that errors are something really exceptional and don't happen on every run. --- scripts/gas_diff_stats.py | 46 ++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 518d8c2c7..352700252 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -84,18 +84,11 @@ def collect_statistics(lines) -> (int, int, int, int, int, int): if not lines: raise RuntimeError("Empty list") - def try_parse(line): - try: - return diff_string.parse(line) - except ParseError: - pass - return None - out = [ parsed for line in lines if line.startswith('+// gas ') or line.startswith('-// gas ') - if (parsed := try_parse(line)) is not None + if (parsed := diff_string.parse(line)) is not None ] diff_kinds = [Diff.Minus, Diff.Plus] codegen_kinds = [Kind.IrOptimized, Kind.LegacyOptimized, Kind.Legacy] @@ -111,18 +104,15 @@ def try_parse(line): def semantictest_statistics(): """Prints the tabulated statistics that can be pasted in github.""" - def try_parse_git_diff(fname): - try: - diff_output = subprocess.check_output( - ["git", "diff", "--unified=0", "origin/develop", "HEAD", fname], - universal_newlines=True - ).splitlines() - if diff_output: - return collect_statistics(diff_output) - except subprocess.CalledProcessError as e: - print("Error in the git diff:", file=sys.stderr) - print(e.output, file=sys.stderr) - return None + def parse_git_diff(fname): + diff_output = subprocess.check_output( + ["git", "diff", "--unified=0", "origin/develop", "HEAD", fname], + universal_newlines=True + ).splitlines() + if len(diff_output) == 0: + return None + return collect_statistics(diff_output) + def stat(old, new): return ((new - old) / old) * 100 if old else 0 @@ -133,7 +123,7 @@ def stat(old, new): for path in SEMANTIC_TEST_DIR.rglob("*.sol"): fname = path.as_posix() - parsed = try_parse_git_diff(fname) + parsed = parse_git_diff(fname) if parsed is None: continue ir_optimized = stat(parsed[0], parsed[3]) @@ -150,5 +140,17 @@ def stat(old, new): else: print("No differences found.") +def main(): + try: + semantictest_statistics() + except subprocess.CalledProcessError as exception: + sys.exit(f"Error in the git diff:\n{exception.output}") + except ParseError as exception: + sys.exit( + f"ParseError: {exception}\n\n" + f"{exception.text}\n" + f"{' ' * exception.index}^\n" + ) + if __name__ == "__main__": - semantictest_statistics() + main() From 43274fddef0ecc55f4026d089fef3928b0617680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 14:14:17 +0100 Subject: [PATCH 09/11] gas_diff_stats.py: Improve table and number formatting - Use blanks if percentage is infinite instead of misleadingly showing zero. - Round percentages to nearest integer (but indicate when numbers are only close to zero rather than exactly zero). Differences below one percent are rarely relevant, if ever. - Include % after the numbers. - Put file names in backticks to get them displayed using fixed-width font on github. --- scripts/gas_diff_stats.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 352700252..0707b9857 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -114,7 +114,16 @@ def parse_git_diff(fname): return collect_statistics(diff_output) def stat(old, new): - return ((new - old) / old) * 100 if old else 0 + if old == 0: + return '' + percentage = (new - old) / old * 100 + prefix = ( + # Distinguish actual zero from very small differences + '+' if round(percentage) == 0 and percentage > 0 else + '-' if round(percentage) == 0 and percentage < 0 else + '' + ) + return f'{prefix}{round(percentage)}%' table = [] @@ -129,12 +138,12 @@ def stat(old, new): ir_optimized = stat(parsed[0], parsed[3]) legacy_optimized = stat(parsed[1], parsed[4]) legacy = stat(parsed[2], parsed[5]) - fname = fname.split('/', 3)[-1] - table += [map(str, [fname, ir_optimized, legacy_optimized, legacy])] + fname = f"`{fname.split('/', 3)[-1]}`" + table += [[fname, ir_optimized, legacy_optimized, legacy]] if table: print("
Click for a table of gas differences\n") - table_header = ["File name", "IR-optimized (%)", "Legacy-Optimized (%)", "Legacy (%)"] + table_header = ["File name", "IR optimized", "Legacy optimized", "Legacy"] print(tabulate(table, headers=table_header, tablefmt="github")) print("
") else: From b88d690c66b963282a77e214001b160873434ddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 14:16:33 +0100 Subject: [PATCH 10/11] gas_diff_stats.py: Order rows in a deterministic way (by file path) --- scripts/gas_diff_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py index 0707b9857..7403b3917 100755 --- a/scripts/gas_diff_stats.py +++ b/scripts/gas_diff_stats.py @@ -144,7 +144,7 @@ def stat(old, new): if table: print("
Click for a table of gas differences\n") table_header = ["File name", "IR optimized", "Legacy optimized", "Legacy"] - print(tabulate(table, headers=table_header, tablefmt="github")) + print(tabulate(sorted(table), headers=table_header, tablefmt="github")) print("
") else: print("No differences found.") From 36f2d39282a9f951b0f4780193398f5084073874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20=C5=9Aliwak?= Date: Sun, 11 Feb 2024 16:15:52 +0100 Subject: [PATCH 11/11] CI: Add a smoke test run for gas_diff_stats.py --- .circleci/config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index e14a4ec78..8a16effd0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -980,6 +980,9 @@ jobs: - run: name: Python unit tests command: python3 test/pyscriptTests.py + - run: + name: Smoke test for gas_diff_stats.py + command: scripts/gas_diff_stats.py - matrix_notify_failure_unless_pr t_win_pyscripts: