Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
718 changes: 417 additions & 301 deletions Cargo.lock

Large diffs are not rendered by default.

26 changes: 17 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,27 @@ crate-type = ["lib"]
lto = true
codegen-units = 1

[profile.profiling]
inherits = "release"
debug = 2

[dependencies]
console = "0.15.8"
glob = "0.3.1"
console = "0.16.2"
dhat = { version = "0.3.3", optional = true }
glob = "0.3.3"
grimoire_css_color_toolkit = "1.0.0"
indicatif = "0.17.8"
indicatif = "0.18.3"
lazy_static = "1.5.0"
lightningcss = { version = "1.0.0-alpha.59", features = ["browserslist"] }
miette = { version = "7.2.0", features = ["fancy"] }
once_cell = "1.20"
regex = "1.11.0"
lightningcss = { version = "1.0.0-alpha.68", features = ["browserslist"] }
miette = { version = "7.6.0", features = ["fancy"] }
once_cell = "1.21"
regex = "1.12.2"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0.61"
thiserror = "2.0.17"

[features]
heap-profile = ["dhat"]

[dev-dependencies]
tempfile = "3.13.0"
tempfile = "3.24.0"
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1437,6 +1437,21 @@ There are only 3 commands you need to know:
- **`build`**: Kicks off the build process, parsing all your input files and generating the compiled CSS. If you haven’t already run `init`, the `build` command will handle that for you automatically.
- **`shorten`**: Automatically converts all full-length component names in your spells (as defined in your config) to their corresponding shorthand forms. This helps keep your code concise and consistent. Run this command to refactor your files, making your spell syntax as brief as possible without losing clarity or functionality.

**Optional parallel project builds**

If your config defines multiple independent projects (multiple output files), Grimoire CSS can build them in parallel.

- Enable by setting the `GRIMOIRE_CSS_JOBS` environment variable to a positive integer (e.g. `4`).
- Default is `1` (fully sequential; same behavior as before).
- Values are capped to the machine’s available parallelism.
- Higher values can reduce wall-clock build time, but may increase peak memory usage due to multiple optimizations running simultaneously.

Example:

```bash
GRIMOIRE_CSS_JOBS=4 grimoire_css build
```

Grimoire CSS’s CLI is built for developers who want power without bloat. It’s direct, no-nonsense, and integrates smoothly into any project or bundler.

Here’s a refined version of the remaining parts, keeping the technical depth and making them more engaging and polished:
Expand Down
1 change: 1 addition & 0 deletions benchmark/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.14.2
Binary file added benchmark/__pycache__/main.cpython-314.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 4 additions & 0 deletions benchmark/core/benchmark_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ def generate_pretty_results(input_file):
}
}

jobs_value = raw_results.get("system_info", {}).get("benchmark", {}).get("grimoire_css_jobs", None)
if jobs_value is not None:
pretty_results["metadata"]["system"]["grimoire_css_jobs"] = jobs_value

return pretty_results


Expand Down
204 changes: 192 additions & 12 deletions benchmark/core/metrics_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pathlib import Path
import platform
import traceback
import os


class ProcessMonitor:
Expand All @@ -28,12 +29,29 @@ def __init__(self):
"""Initialize the process monitor."""
self.is_windows = platform.system() == "Windows"
self.is_macos = platform.system() == "Darwin"
# Backward-compatible primary memory series.
self.memory_samples = []
self.peak_memory_bytes = 0
# Additional memory series for better cross-run comparability.
self.memory_samples_rss = []
self.peak_memory_bytes_rss = 0
self.memory_samples_uss = []
self.peak_memory_bytes_uss = 0
# Partial USS series: sums USS only for processes where psutil reports it.
# This is useful on macOS where USS may be unavailable for some children.
self.memory_samples_uss_partial = []
self.peak_memory_bytes_uss_partial = 0
self.uss_coverage_samples = [] # fraction in [0..1]
self.process_count_samples = []
self.uss_available_count_samples = []
self.cpu_user_time = 0
self.cpu_system_time = 0
self.io_read_bytes = 0
self.io_write_bytes = 0
# Which memory measurement is used for the backward-compatible primary fields.
# - macOS/Linux: prefer 'uss' if available for the whole process tree, else 'rss'
# - Windows: prefer 'private' if available, else 'rss'
self.memory_measurement = "unknown"
# Tracks all processes we're monitoring
self.monitored_processes = set()
# Maps PIDs to their last CPU times for delta calculations
Expand All @@ -49,10 +67,20 @@ def start_monitoring(self, pid):
# Reset metrics for new monitoring session
self.memory_samples = []
self.peak_memory_bytes = 0
self.memory_samples_rss = []
self.peak_memory_bytes_rss = 0
self.memory_samples_uss = []
self.peak_memory_bytes_uss = 0
self.memory_samples_uss_partial = []
self.peak_memory_bytes_uss_partial = 0
self.uss_coverage_samples = []
self.process_count_samples = []
self.uss_available_count_samples = []
self.cpu_user_time = 0
self.cpu_system_time = 0
self.io_read_bytes = 0
self.io_write_bytes = 0
self.memory_measurement = "unknown"

try:
# Store initial process state
Expand Down Expand Up @@ -107,7 +135,13 @@ def _monitor_process_tree(self, pid):
self._update_process_list()

# Reset per-iteration counters
current_total_memory = 0
current_total_primary = 0
current_total_rss = 0
current_total_uss = 0
uss_valid_for_all = True
current_total_uss_partial = 0
uss_available_count = 0
process_count = 0

# Check all processes in our monitoring list
for proc in list(self.monitored_processes):
Expand All @@ -117,9 +151,23 @@ def _monitor_process_tree(self, pid):
self.monitored_processes.remove(proc)
continue

# Measure memory using the appropriate platform-specific method
memory_used = self._get_process_memory(proc)
current_total_memory += memory_used
process_count += 1

# Measure memory
rss_bytes, uss_bytes, private_bytes = self._get_process_memory_components(proc)

# RSS is always available.
current_total_rss += rss_bytes

# USS is only valid if available for *all* monitored processes.
if uss_bytes is None:
uss_valid_for_all = False
else:
current_total_uss += uss_bytes
current_total_uss_partial += uss_bytes
uss_available_count += 1

# Primary metric is chosen after the loop based on platform and availability.

# Measure CPU time delta
self._update_cpu_times(proc)
Expand All @@ -130,11 +178,61 @@ def _monitor_process_tree(self, pid):
# Process no longer exists or can't be accessed
self.monitored_processes.discard(proc)

# Update memory metrics only if we got a valid reading
if current_total_memory > 0:
self.memory_samples.append(current_total_memory)
self.peak_memory_bytes = max(
self.peak_memory_bytes, current_total_memory)
# Choose a stable primary memory metric per-run to avoid mixing RSS/USS
# across samples (which makes peak comparisons meaningless).
#
# - macOS/Linux: primary is RSS (always available)
# - Windows: primary is private working set if available, otherwise RSS
if self.is_windows:
# Prefer summing private memory if psutil provides it.
current_total_private = 0
private_valid_for_all = True
for proc in list(self.monitored_processes):
try:
if proc.is_running():
_, _, p = self._get_process_memory_components(proc)
if p is None:
private_valid_for_all = False
break
current_total_private += p
except (psutil.NoSuchProcess, psutil.AccessDenied):
private_valid_for_all = False
break

if private_valid_for_all and current_total_private > 0:
self.memory_measurement = "private"
current_total_primary = current_total_private
else:
self.memory_measurement = "rss"
current_total_primary = current_total_rss
else:
self.memory_measurement = "rss"
current_total_primary = current_total_rss

# Update memory metrics only if we got a valid reading.
if current_total_primary > 0:
self.memory_samples.append(current_total_primary)
self.peak_memory_bytes = max(self.peak_memory_bytes, current_total_primary)

if current_total_rss > 0:
self.memory_samples_rss.append(current_total_rss)
self.peak_memory_bytes_rss = max(self.peak_memory_bytes_rss, current_total_rss)

if uss_valid_for_all and current_total_uss > 0:
self.memory_samples_uss.append(current_total_uss)
self.peak_memory_bytes_uss = max(self.peak_memory_bytes_uss, current_total_uss)

# Always record partial-USS (may undercount) and coverage.
if current_total_uss_partial > 0:
self.memory_samples_uss_partial.append(current_total_uss_partial)
self.peak_memory_bytes_uss_partial = max(
self.peak_memory_bytes_uss_partial, current_total_uss_partial
)

if process_count > 0:
self.uss_coverage_samples.append(uss_available_count / process_count)
self.process_count_samples.append(process_count)
self.uss_available_count_samples.append(uss_available_count)

time.sleep(sampling_interval)

Expand All @@ -146,6 +244,32 @@ def _monitor_process_tree(self, pid):
print(f"Error in monitoring thread: {e}")
traceback.print_exc()

def _get_process_memory_components(self, proc):
"""Return (rss_bytes, uss_bytes_or_None, private_bytes_or_None)."""
try:
memory_info = proc.memory_info()
rss = getattr(memory_info, 'rss', 0) or 0

uss = None
private = None

# Windows: private working set.
if self.is_windows:
private = getattr(memory_info, 'private', None)
return rss, uss, private

# macOS/Linux: try USS if available.
try:
memory_full = proc.memory_full_info()
if hasattr(memory_full, 'uss'):
uss = getattr(memory_full, 'uss')
except Exception:
uss = None

return rss, uss, private
except (psutil.NoSuchProcess, psutil.AccessDenied):
return 0, None, None

def _update_process_list(self):
"""Update the list of processes we're monitoring to include new children."""
processes_to_check = list(self.monitored_processes)
Expand Down Expand Up @@ -183,21 +307,30 @@ def _get_process_memory(self, proc):
try:
if self.is_windows:
# On Windows, use private working set for exclusive memory usage
self.memory_measurement = "private"
return proc.memory_info().private
elif self.is_macos:
# On macOS, use rss - shared memory for better accuracy
memory_info = proc.memory_info()
try:
# Try to get more accurate measurement on macOS if available
memory_full = proc.memory_full_info()
return getattr(memory_full, 'uss', memory_info.rss)
if hasattr(memory_full, 'uss'):
self.memory_measurement = "uss"
return getattr(memory_full, 'uss')

self.memory_measurement = "rss"
return memory_info.rss
except:
self.memory_measurement = "rss"
return memory_info.rss
else:
# On Linux, USS (Unique Set Size) is most accurate
try:
self.memory_measurement = "uss"
return proc.memory_full_info().uss
except:
self.memory_measurement = "rss"
return proc.memory_info().rss
except (psutil.NoSuchProcess, psutil.AccessDenied):
return 0
Expand Down Expand Up @@ -263,7 +396,27 @@ def get_metrics(self):
"peak_bytes": self.peak_memory_bytes,
"peak_mb": self.peak_memory_bytes / (1024 * 1024),
"avg_bytes": statistics.mean(self.memory_samples) if self.memory_samples else 0,
"avg_mb": statistics.mean(self.memory_samples) / (1024 * 1024) if self.memory_samples else 0
"avg_mb": statistics.mean(self.memory_samples) / (1024 * 1024) if self.memory_samples else 0,
"measurement": self.memory_measurement,
# Additional series (may be empty if not measurable).
"rss_peak_bytes": self.peak_memory_bytes_rss,
"rss_peak_mb": self.peak_memory_bytes_rss / (1024 * 1024),
"rss_avg_bytes": statistics.mean(self.memory_samples_rss) if self.memory_samples_rss else 0,
"rss_avg_mb": statistics.mean(self.memory_samples_rss) / (1024 * 1024) if self.memory_samples_rss else 0,
"uss_peak_bytes": self.peak_memory_bytes_uss,
"uss_peak_mb": self.peak_memory_bytes_uss / (1024 * 1024),
"uss_avg_bytes": statistics.mean(self.memory_samples_uss) if self.memory_samples_uss else 0,
"uss_avg_mb": statistics.mean(self.memory_samples_uss) / (1024 * 1024) if self.memory_samples_uss else 0,
"uss_is_complete": bool(self.memory_samples_uss),
"uss_partial_peak_bytes": self.peak_memory_bytes_uss_partial,
"uss_partial_peak_mb": self.peak_memory_bytes_uss_partial / (1024 * 1024),
"uss_partial_avg_bytes": statistics.mean(self.memory_samples_uss_partial) if self.memory_samples_uss_partial else 0,
"uss_partial_avg_mb": statistics.mean(self.memory_samples_uss_partial) / (1024 * 1024) if self.memory_samples_uss_partial else 0,
"uss_coverage_avg": statistics.mean(self.uss_coverage_samples) if self.uss_coverage_samples else 0,
"uss_process_count_avg": statistics.mean(self.process_count_samples) if self.process_count_samples else 0,
"uss_process_count_max": max(self.process_count_samples) if self.process_count_samples else 0,
"uss_available_count_avg": statistics.mean(self.uss_available_count_samples) if self.uss_available_count_samples else 0,
"uss_available_count_max": max(self.uss_available_count_samples) if self.uss_available_count_samples else 0,
},
"cpu": {
"user_time": self.cpu_user_time,
Expand Down Expand Up @@ -487,7 +640,23 @@ class GrimoireMetricsCollector(MetricsCollector):
def __init__(self, output_dir="grimoire_css_output", executable="../target/release/grimoire_css"):
"""Initialize the Grimoire CSS metrics collector."""
super().__init__(output_dir)
self.executable = executable
# Allow overriding the binary path for profiling / custom builds.
# By default we run the release binary to keep benchmarks comparable.
#
# Priority order:
# 1) GRIMOIRE_CSS_EXECUTABLE: explicit path wins.
# 2) GRIMOIRE_CSS_USE_PROFILING=1: prefer ../target/profiling/grimoire_css if present.
# 3) Fallback to the default release binary.
overridden = os.environ.get("GRIMOIRE_CSS_EXECUTABLE")
use_profiling = os.environ.get("GRIMOIRE_CSS_USE_PROFILING") == "1"

if overridden:
self.executable = overridden
elif use_profiling:
profiling_candidate = Path("../target/profiling/grimoire_css")
self.executable = str(profiling_candidate) if profiling_candidate.exists() else executable
else:
self.executable = executable

def run_benchmark(self):
"""Run the Grimoire CSS benchmark and collect metrics."""
Expand All @@ -501,6 +670,11 @@ def run_benchmark(self):
process, elapsed_time, process_metrics, stdout, stderr = self.run_process(
cmd)

# dhat (heap profiling) drastically slows execution and changes allocation behavior.
# If it's enabled, the reported build time is not comparable to normal runs.
if stderr and "dhat:" in stderr:
print("Warning: dhat heap profiling detected in Grimoire process output. Build time is not comparable; disable heap profiling for performance benchmarks.")

# Step 3: Analyze output files
output_metrics = self.output_analyzer.analyze()
self.output_files_size = output_metrics["total_size_bytes"]
Expand All @@ -514,6 +688,12 @@ def run_benchmark(self):
process
)

# Add run metadata for reproducibility.
result["run"] = {
"executable": str(self.executable),
"argv": cmd,
}

return result
except Exception as e:
print(f"Error running Grimoire CSS benchmark: {e}")
Expand Down
Loading
Loading