GENWAY-AI · ofekby · Dec 2, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ uv tool install snaplint
 
 ```bash
 flake8 src/ | snaplint take-snapshot
-# Creates .snaplint/snapshot.flake8.json
+# Creates .snaplint/snapshot.flake8.json.gz
 ```
 
 2. **Check for new issues** in CI or locally:
@@ -43,9 +43,9 @@ The diff command shows you:
 
 ```bash
 # Each linter gets its own snapshot
-flake8 . | snaplint take-snapshot    # → .snaplint/snapshot.flake8.json
-mypy . | snaplint take-snapshot      # → .snaplint/snapshot.mypy.json
-pylint src/ | snaplint take-snapshot # → .snaplint/snapshot.pylint.json
+flake8 . | snaplint take-snapshot    # → .snaplint/snapshot.flake8.json.gz
+mypy . | snaplint take-snapshot      # → .snaplint/snapshot.mypy.json.gz
+pylint src/ | snaplint take-snapshot # → .snaplint/snapshot.pylint.json.gz
 ```
 
 ### Custom Snapshot Paths
@@ -79,7 +79,7 @@ When comparing snapshots:
 - **Count changes** show if the total number of issues changed
 - **File-level errors** (line 0) are fully supported
 
-Snapshots are stored as JSON with version metadata for forward compatibility.
+Snapshots are stored as gzip-compressed JSON (`.json.gz`) with version metadata for forward compatibility.
 
 ## CI Integration
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "snaplint"
-version = "0.5.1"
+version = "0.6.0"
 description = "Snapshot linter errors and track only new issues — perfect for incremental linter adoption on large codebases"
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/src/snaplint/cli.py b/src/snaplint/cli.py
@@ -56,7 +56,7 @@ def _detect_linter_from_lines(lines: list[str]) -> str:
 def _get_default_snapshot_path(linter_type: str) -> Path:
     """Get the default snapshot path for a given linter type."""
     snaplint_dir = Path(".snaplint")
-    return snaplint_dir / f"snapshot.{linter_type}.json"
+    return snaplint_dir / f"snapshot.{linter_type}.json.gz"
 
 
 def main() -> int:
@@ -95,7 +95,7 @@ def _main() -> int:
         default=None,
         help=(
             "Path to the snapshot file. If omitted, auto-detects linter "
-            "and uses .snaplint/snapshot.<linter>.json"
+            "and uses .snaplint/snapshot.<linter>.json.gz"
         ),
     )
     parser_diff.add_argument(
@@ -115,7 +115,7 @@ def _main() -> int:
         default=None,
         help=(
             "Path to the snapshot file. If omitted, auto-detects linter "
-            "and uses .snaplint/snapshot.<linter>.json"
+            "and uses .snaplint/snapshot.<linter>.json.gz"
         ),
     )
 
@@ -155,7 +155,7 @@ def _run_take_snapshot(args: argparse.Namespace) -> int:
     snapshot_file = build_snapshot_file(input_lines)
 
     try:
-        with snapshot_path.open("w", encoding="utf-8") as f:
+        with snapshot_path.open("wb") as f:
             write_snapshot(snapshot_file, f)
     except OSError as e:
         raise SnapshotReadError(
@@ -197,7 +197,7 @@ def _run_diff(args: argparse.Namespace) -> int:
         print(f"Using snapshot file: {snapshot_path}", file=sys.stderr)
 
     try:
-        with snapshot_path.open("r", encoding="utf-8") as f:
+        with snapshot_path.open("rb") as f:
             snapshot_file = read_snapshot(f)
     except OSError as e:
         raise SnapshotReadError(

diff --git a/src/snaplint/snapshot.py b/src/snaplint/snapshot.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import asyncio
+import gzip
 import hashlib
 import json
 import sys
@@ -210,18 +211,22 @@ def build_snapshot_file(lines: Iterable[str]) -> SnapshotFile:
     return SnapshotFile(files=tuple(file_snapshots))
 
 
-def write_snapshot(snapshot: SnapshotFile, output: IO[str]) -> None:
-    """Write a SnapshotFile to JSON output."""
+def write_snapshot(snapshot: SnapshotFile, output: IO[bytes]) -> None:
+    """Write a SnapshotFile to gzipped JSON output."""
     json_data = snapshot.model_dump(mode="json")
-    json.dump(json_data, output, indent=2)
-    output.write("\n")
+    json_str = json.dumps(json_data, indent=2) + "\n"
+    output.write(gzip.compress(json_str.encode("utf-8")))
 
 
-def read_snapshot(snapshot_file: IO[str]) -> SnapshotFile:
-    """Read a snapshot file and build a SnapshotFile."""
+def read_snapshot(snapshot_file: IO[bytes]) -> SnapshotFile:
+    """Read a gzipped snapshot file and build a SnapshotFile."""
     try:
-        data = json.load(snapshot_file)
+        compressed_data = snapshot_file.read()
+        json_str = gzip.decompress(compressed_data).decode("utf-8")
+        data = json.loads(json_str)
         return SnapshotFile.model_validate(data)
+    except gzip.BadGzipFile as e:
+        raise SnapshotReadError(f"Invalid gzip file: {e}") from e
-        raise SnapshotReadError(f"Invalid gzip file: {e}") from e
+        raise SnapshotReadError(f"Invalid gzip file: {e}") from e
+    except UnicodeDecodeError as e:
+        raise SnapshotReadError(f"Invalid UTF-8 encoding in snapshot file: {e}") from e
-        raise SnapshotReadError(f"Invalid gzip file: {e}") from e
+        raise SnapshotReadError(f"Invalid gzip file: {e}") from e
+    except UnicodeDecodeError as e:
+        raise SnapshotReadError(f"Invalid UTF-8 encoding in snapshot file: {e}") from e
     except json.JSONDecodeError as e:
         raise SnapshotReadError(f"Invalid JSON in snapshot file: {e}") from e
     except Exception as e:

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import gzip
 import io
 import json
 import sys
@@ -20,7 +21,7 @@ def mock_argv(monkeypatch, *args: str):
 
 @pytest.fixture
 def snapshot_file(tmp_path: Path) -> Path:
-    return tmp_path / "lint.snapshot.json"
+    return tmp_path / "lint.snapshot.json.gz"
 
 
 @pytest.fixture
@@ -177,8 +178,9 @@ def test_cli_take_snapshot(
 
     assert return_code == 0
 
-    # Verify snapshot is valid JSON
-    snapshot_data = json.loads(snapshot_file.read_text())
+    # Verify snapshot is valid gzipped JSON
+    with gzip.open(snapshot_file, "rt", encoding="utf-8") as f:
+        snapshot_data = json.load(f)
     assert snapshot_data["version"] == "1"
     assert len(snapshot_data["files"]) == 1
 

diff --git a/tests/test_e2e.py b/tests/test_e2e.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import gzip
 import subprocess
 import sys
 from pathlib import Path
@@ -91,7 +92,7 @@ def run_snaplint(
 
 def test_e2e_full_workflow(project_dir: Path):
     """Test complete workflow: take snapshot, modify code, diff changes."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
 
     # Step 1: Run linter and take initial snapshot
     initial_lint_output = run_flake8(project_dir)
@@ -107,7 +108,8 @@ def test_e2e_full_workflow(project_dir: Path):
     # Verify snapshot is valid JSON
     import json
 
-    snapshot_data = json.loads(snapshot_file.read_text())
+    with gzip.open(snapshot_file, "rt", encoding="utf-8") as f:
+        snapshot_data = json.load(f)
     assert snapshot_data["version"] == "1"
     assert len(snapshot_data["files"]) > 0
 
@@ -168,7 +170,7 @@ def format_string(text):
 
 def test_e2e_code_refactoring_preserves_errors(project_dir: Path):
     """Test that refactoring code (changing line numbers) is detected properly."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
     utils_file = project_dir / "src" / "utils.py"
 
     # Initial code with error on line 3
@@ -212,7 +214,7 @@ def calculate(x, y):
 
 def test_e2e_multiple_files_complex_diff(project_dir: Path):
     """Test diff across multiple files with various changes."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
 
     # Take initial snapshot
     initial_lint_output = run_flake8(project_dir)
@@ -261,7 +263,7 @@ def get_config():
 
 def test_e2e_order_change_detection(project_dir: Path):
     """Test that changes in error order are detected."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
     test_file = project_dir / "order_test.py"
 
     # Create file with multiple errors in specific order
@@ -304,7 +306,7 @@ def test_e2e_order_change_detection(project_dir: Path):
 
 def test_e2e_snapshot_with_no_errors(project_dir: Path):
     """Test taking snapshot and diffing when there are no lint errors."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
     clean_file = project_dir / "clean.py"
 
     # Create a file with no lint errors
@@ -337,7 +339,8 @@ def hello_world():
     # Verify snapshot has no files or empty files
     import json
 
-    snapshot_data = json.loads(snapshot_file.read_text())
+    with gzip.open(snapshot_file, "rt", encoding="utf-8") as f:
+        snapshot_data = json.load(f)
     assert snapshot_data["version"] == "1"
     # Files list should be empty or very small
     assert len(snapshot_data["files"]) == 0 or all(
@@ -354,7 +357,7 @@ def hello_world():
 
 def test_e2e_binary_file_handling(project_dir: Path):
     """Test that snaplint handles errors in non-existent or binary files gracefully."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
 
     # Create fake linter output pointing to non-existent file
     fake_lint_output = f"{project_dir}/nonexistent.py:1:1: E001 Some error\n"
@@ -372,7 +375,7 @@ def test_e2e_binary_file_handling(project_dir: Path):
 
 def test_e2e_large_codebase_simulation(project_dir: Path):
     """Test with a larger number of files and errors."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
 
     # Create multiple files with various errors
     for i in range(10):
@@ -410,7 +413,8 @@ def func_{i}(x):
     # Verify snapshot contains multiple files
     import json
 
-    snapshot_data = json.loads(snapshot_file.read_text())
+    with gzip.open(snapshot_file, "rt", encoding="utf-8") as f:
+        snapshot_data = json.load(f)
     assert len(snapshot_data["files"]) >= 5  # Should have many files
 
     # Fix errors in half the files
@@ -436,7 +440,7 @@ def func_{i}(x):
 
 def test_e2e_update_snapshot_workflow(project_dir: Path):
     """Test the workflow of updating a snapshot after code changes."""
-    snapshot_file = project_dir / "lint.snapshot.json"
+    snapshot_file = project_dir / "lint.snapshot.json.gz"
 
     # Use a separate directory to avoid interference from fixture files
     isolated_dir = project_dir / "isolated"

diff --git a/uv.lock b/uv.lock