diff --git a/.circleci/config.yml b/.circleci/config.yml
index e0d9aab13..663c64520 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -973,9 +973,16 @@ jobs:
     <<: *base_ubuntu2204_small
     steps:
       - checkout
+      - run:
+          # TODO: Add these to the base image
+          name: Install gas_diff_stats.py dependencies
+          command: python3 -m pip install --user parsec tabulate
       - run:
           name: Python unit tests
           command: python3 test/pyscriptTests.py
+      - run:
+          name: Smoke test for gas_diff_stats.py
+          command: scripts/gas_diff_stats.py
       - matrix_notify_failure_unless_pr
 
   t_win_pyscripts:
@@ -985,7 +992,7 @@ jobs:
       - checkout
       - run:
           name: Install dependencies
-          command: python -m pip install --user requests
+          command: python -m pip install --user requests parsec tabulate
       - run:
           name: Python unit tests
           command: python.exe test/pyscriptTests.py
diff --git a/scripts/gas_diff_stats.py b/scripts/gas_diff_stats.py
index 234d42df0..7403b3917 100755
--- a/scripts/gas_diff_stats.py
+++ b/scripts/gas_diff_stats.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 """A script to collect gas statistics and print it.
 
 Useful to summarize gas differences to semantic tests for a PR / branch.
@@ -17,21 +18,26 @@
 repository. The changes are compared against ``origin/develop``.
 
 """
+
 import subprocess
+import sys
 from pathlib import Path
 from enum import Enum
 from parsec import generate, ParseError, regex, string
 from tabulate import tabulate
 
 class Kind(Enum):
-    IrOptimized = 1
-    Legacy = 2
-    LegacyOptimized = 3
+    Ir = 1
+    IrOptimized = 2
+    Legacy = 3
+    LegacyOptimized = 4
 
 class Diff(Enum):
     Minus = 1
     Plus = 2
 
+SEMANTIC_TEST_DIR = Path("test/libsolidity/semanticTests/")
+
 minus = string("-").result(Diff.Minus)
 plus = string("+").result(Diff.Plus)
 
@@ -39,6 +45,7 @@ class Diff(Enum):
 comment = string("//")
 colon = string(":")
 
+gas_ir = string("gas ir").result(Kind.Ir)
 gas_ir_optimized = string("gas irOptimized").result(Kind.IrOptimized)
 gas_legacy_optimized = string("gas legacyOptimized").result(Kind.LegacyOptimized)
 gas_legacy = string("gas legacy").result(Kind.Legacy)
@@ -59,7 +66,7 @@ def diff_string() -> (Kind, Diff, int):
     diff_kind = yield minus | plus
     yield comment
     yield space
-    codegen_kind = yield gas_ir_optimized ^ gas_legacy_optimized ^ gas_legacy
+    codegen_kind = yield gas_ir_optimized ^ gas_ir ^ gas_legacy_optimized ^ gas_legacy
     yield colon
     yield space
     val = yield number()
@@ -77,14 +84,12 @@ def collect_statistics(lines) -> (int, int, int, int, int, int):
     if not lines:
         raise RuntimeError("Empty list")
 
-    def try_parse(line):
-        try:
-            return diff_string.parse(line)
-        except ParseError:
-            pass
-        return None
-
-    out = [parsed for line in lines if (parsed := try_parse(line)) is not None]
+    out = [
+        parsed
+        for line in lines
+        if line.startswith('+// gas ') or line.startswith('-// gas ')
+        if (parsed := diff_string.parse(line)) is not None
+    ]
     diff_kinds = [Diff.Minus, Diff.Plus]
     codegen_kinds = [Kind.IrOptimized, Kind.LegacyOptimized, Kind.Legacy]
     return tuple(
@@ -99,42 +104,62 @@ def try_parse(line):
 
 def semantictest_statistics():
     """Prints the tabulated statistics that can be pasted in github."""
-    def try_parse_git_diff(fname):
-        try:
-            diff_output = subprocess.check_output(
-                "git diff --unified=0 origin/develop HEAD " + fname,
-                shell=True,
-                universal_newlines=True
-            ).splitlines()
-            if diff_output:
-                return collect_statistics(diff_output)
-        except subprocess.CalledProcessError as e:
-            print("Error in the git diff:")
-            print(e.output)
-        return None
+    def parse_git_diff(fname):
+        diff_output = subprocess.check_output(
+            ["git", "diff", "--unified=0", "origin/develop", "HEAD", fname],
+            universal_newlines=True
+        ).splitlines()
+        if len(diff_output) == 0:
+            return None
+        return collect_statistics(diff_output)
+
     def stat(old, new):
-        return ((new - old) / old) * 100  if old else 0
+        if old == 0:
+            return ''
+        percentage = (new - old) / old * 100
+        prefix = (
+            # Distinguish actual zero from very small differences
+            '+' if round(percentage) == 0 and percentage > 0 else
+            '-' if round(percentage) == 0 and percentage < 0 else
+            ''
+        )
+        return f'{prefix}{round(percentage)}%'
 
     table = []
 
-    for path in Path("test/libsolidity/semanticTests").rglob("*.sol"):
+    if not SEMANTIC_TEST_DIR.is_dir():
+        sys.exit(f"Semantic tests not found. '{SEMANTIC_TEST_DIR.absolute()}' is missing or not a directory.")
+
+    for path in SEMANTIC_TEST_DIR.rglob("*.sol"):
         fname = path.as_posix()
-        parsed = try_parse_git_diff(fname)
+        parsed = parse_git_diff(fname)
         if parsed is None:
             continue
         ir_optimized = stat(parsed[0], parsed[3])
         legacy_optimized = stat(parsed[1], parsed[4])
         legacy = stat(parsed[2], parsed[5])
-        fname = fname.split('/', 3)[-1]
-        table += [map(str, [fname, ir_optimized, legacy_optimized, legacy])]
+        fname = f"`{fname.split('/', 3)[-1]}`"
+        table += [[fname, ir_optimized, legacy_optimized, legacy]]
 
     if table:
         print("<details><summary>Click for a table of gas differences</summary>\n")
-        table_header = ["File name", "IR-optimized (%)", "Legacy-Optimized (%)", "Legacy (%)"]
-        print(tabulate(table, headers=table_header, tablefmt="github"))
+        table_header = ["File name", "IR optimized", "Legacy optimized", "Legacy"]
+        print(tabulate(sorted(table), headers=table_header, tablefmt="github"))
         print("</details>")
     else:
         print("No differences found.")
 
+def main():
+    try:
+        semantictest_statistics()
+    except subprocess.CalledProcessError as exception:
+        sys.exit(f"Error in the git diff:\n{exception.output}")
+    except ParseError as exception:
+        sys.exit(
+            f"ParseError: {exception}\n\n"
+            f"{exception.text}\n"
+            f"{' ' * exception.index}^\n"
+        )
+
 if __name__ == "__main__":
-    semantictest_statistics()
+    main()
diff --git a/test/scripts/test_gas_diff_stats.py b/test/scripts/test_gas_diff_stats.py
new file mode 100644
index 000000000..4980293ae
--- /dev/null
+++ b/test/scripts/test_gas_diff_stats.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+import unittest
+from textwrap import dedent
+
+# NOTE: This test file file only works with scripts/ added to PYTHONPATH so pylint can't find the imports
+# pragma pylint: disable=import-error
+from gas_diff_stats import collect_statistics
+# pragma pylint: enable=import-error
+
+class TestGasDiffStats(unittest.TestCase):
+    def test_collect_statistics_should_fail_on_empty_diff(self):
+        with self.assertRaises(RuntimeError):
+            self.assertEqual(collect_statistics(""), (0, 0, 0, 0, 0, 0))
+
+    def test_collect_statistics_should_accept_whitespace_only_diff(self):
+        # TODO: Should it really work this way?
+        # If we're rejecting empty diff, not sure why whitespace is accepted.
+        self.assertEqual(collect_statistics("\n"), (0, 0, 0, 0, 0, 0))
+        self.assertEqual(collect_statistics("\n  \n\t\n\n"), (0, 0, 0, 0, 0, 0))
+
+    def test_collect_statistics_should_report_sum_of_gas_costs(self):
+        diff_output = dedent("""
+            diff --git a/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol b/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol
+            index 1306529d4..77a330f3c 100644
+            --- a/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol
+            +++ b/test/libsolidity/semanticTests/various/staticcall_for_view_and_pure.sol
+            @@ -38 +38,2 @@ contract D {
+            -// gas legacy: 102095
+            +// gas legacy: 76495
+            @@ -40,3 +41,6 @@ contract D {
+            -// gas irOptimized: 98438588
+            -// gas legacy: 98438774
+            -// gas legacyOptimized: 98438580
+            +// gas irOptimized: 25388
+            +// gas legacy: 98413174
+            +// gas legacyOptimized: 25380
+            @@ -44,3 +48,6 @@ contract D {
+            -// gas irOptimized: 98438589
+            -// gas legacy: 98438774
+            -// gas legacyOptimized: 98438580
+            +// gas irOptimized: 25389
+            +// gas legacy: 98413174
+            +// gas legacyOptimized: 25380
+        """).splitlines()
+
+        self.assertEqual(collect_statistics(diff_output), (
+            98438588 + 98438589,          # -irOptimized
+            98438580 + 98438580,          # -legacyOptimized
+            102095 + 98438774 + 98438774, # -legacy
+            25388 + 25389,                # +irOptimized
+            25380 + 25380,                # +legacyOptimized
+            76495 + 98413174 + 98413174,  # +legacy
+        ))
+
+    def test_collect_statistics_should_ignore_ir_costs(self):
+        diff_output = dedent("""
+            -// gas legacy: 1
+            -// gas ir: 2
+            +// gas legacy: 3
+            +// gas ir: 4
+        """).splitlines()
+
+        self.assertEqual(collect_statistics(diff_output), (
+            0, # -irOptimized
+            0, # -legacyOptimized
+            1, # -legacy
+            0, # +irOptimized
+            0, # +legacyOptimized
+            3, # +legacy
+        ))
+
+    def test_collect_statistics_should_ignore_unchanged_costs(self):
+        diff_output = dedent("""
+            -// gas legacy: 1
+             // gas legacyOptimized: 2
+            +// gas legacy: 3
+        """).splitlines()
+
+        self.assertEqual(collect_statistics(diff_output), (
+            0, # -irOptimized
+            0, # -legacyOptimized
+            1, # -legacy
+            0, # +irOptimized
+            0, # +legacyOptimized
+            3, # +legacy
+        ))