diff --git a/examples/06_plot_customization.py b/examples/06_plot_customization.py index 4cb0594..cd5a780 100644 --- a/examples/06_plot_customization.py +++ b/examples/06_plot_customization.py @@ -81,11 +81,13 @@ def benchmark_custom_plot(n: int) -> None: def main() -> None: print("Running bar chart example...") - benchmark_bar_chart() + res = benchmark_bar_chart() + print(res.to_dataframe()) print("✓ Bar chart saved to '06_bar_chart.png'") print("\nRunning custom plot example...") - benchmark_custom_plot() + res = benchmark_custom_plot() + print(res.to_dataframe()) print("✓ Custom plot saved to '06_custom_plot.png'") diff --git a/nsight/cache.py b/nsight/cache.py new file mode 100644 index 0000000..d029266 --- /dev/null +++ b/nsight/cache.py @@ -0,0 +1,356 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import atexit +import functools +import hashlib +import os +from pathlib import Path +from typing import ClassVar + +import pandas as pd + + +class GlobalNCUProfileCache: + """ + A global singleton manager for NCU profile caching with process isolation. + + This class provides a singleton implementation for managing cached profiling + results from NVIDIA Nsight Compute. The cache stores profiling results as + pandas DataFrames in pickle files, allowing for persistence across multiple + profiling sessions or subprocesses. + + Key Features: + - Main process isolation: Each main profiling process uses its own cache + directory (identified by the main process ID), preventing cross-process + interference + - Environment variable NSPY_NCU_MAIN_PROCESS passes the main process ID + - Singleton pattern: Ensures only one cache instance per process + - Environment variable NSIGHT_PYTHON_CACHE_DIR controls cache location + - Automatic cache file cleanup on program termination (process-specific) + - MD5-based cache key generation for unique file naming + + Process Safety: + The cache is designed to be process-safe, meaning multiple processes + can run concurrently without interfering with each other's cache files. + Each process creates files in its own subdirectory and only cleans up + its own files on exit. + + Thread Safety Considerations: + The current implementation does not require thread safety for its + intended use case. In typical usage scenarios, cache operations are + called sequentially from a single thread within each process. If + multi-threaded usage becomes necessary in the future, appropriate + synchronization mechanisms can be added. + + The cache is designed to work in a specific workflow: + 1. Main process profiles functions and saves results to cache + 2. Subprocess (ncu profile) loads cached results for analysis + 3. Each process automatically cleans up only its own cache files on exit + + Example usage:: + + cache = GlobalNCUProfileCache() + # Save profiling results from main process + cache.save_profile_result("my_function", dataframe) + # Later, in ncu subprocess: + results = cache.load_profile_result("my_function") + + Note: + This class requires proper environment setup: + - NSIGHT_PYTHON_CACHE_DIR: Optional, specifies custom cache directory + - NSPY_NCU_PROFILE: Must be set in ncu subprocess for loading + - NSPY_NCU_MAIN_PID: Must be set in ncu subprocess for identifing main process + + Raises: + RuntimeError: If trying to change cache directory after initialization, + or if cache operations are called from wrong process context. + ValueError: If specified cache directory doesn't exist. + FileNotFoundError: If trying to load non-existent cache. + """ + + # Singleton instance (per process) + _instance: ClassVar[GlobalNCUProfileCache | None] = None + + def __new__(cls) -> GlobalNCUProfileCache: + """ + Create or retrieve the singleton instance. + + This method implements the singleton pattern. It ensures that only + one instance of GlobalNCUProfileCache exists per process. The cache + directory is determined at first instantiation and cannot be changed + subsequently. + """ + cache_dir = cls._cache_dir_path() + + # If instance already exists + if cls._instance is not None: + # Check if trying to change directory + if cls._instance._active_cache_dir != cache_dir: + raise RuntimeError( + f"Cache directory already set to {cls._instance._active_cache_dir}, " + f"cannot change to {cache_dir}" + ) + return cls._instance + + # Validate directory + if not cache_dir.is_dir(): + raise ValueError( + f"Cache directory {cache_dir} does not exist or is not a directory" + ) + + # Create new instance + instance = super().__new__(cls) + cls._instance = instance + + return cls._instance + + def __init__(self) -> None: + """ + Initialize the singleton instance with process isolation. + + Each process gets: + - Its own main process ID for isolation + - Process-specific subdirectory for cache files + - Independent profile ID counter + - Cleanup handler for its own files only + + Raises: + RuntimeError: If NSPY_NCU_MAIN_PID environment variable is not + set or is not a valid integer. + """ + # Prevent reinitialization + if hasattr(self, "_initialized"): + return + self._initialized = True + + # Get current main process ID for isolation + nspy_ncu_main_pid = os.environ.get("NSPY_NCU_MAIN_PID") + if nspy_ncu_main_pid is None: + raise RuntimeError( + "NSPY_NCU_MAIN_PID environment variable is not set. " + "This is required for cache process isolation." + ) + + try: + self._process_id = int(nspy_ncu_main_pid) + except ValueError as e: + raise RuntimeError( + f"Invalid NSPY_NCU_MAIN_PID value: '{nspy_ncu_main_pid}'. " + "Must be a valid integer." + ) from e + + # Global shared profile ID + self._global_ncu_profile_id = 0 + + # Currently active cache directory + self._active_cache_dir = self._cache_dir_path() + + # Process-specific subdirectory + self._process_dir = self._active_cache_dir / f"pid_{self._process_id}" + self._process_dir.mkdir(exist_ok=True) + + # Register cleanup function + atexit.register(self._cleanup_process_cache_files) + + @classmethod + @functools.lru_cache(maxsize=1) + def _cache_dir_path(cls) -> Path: + """ + Determine and return the cache directory path. + + This method uses LRU caching to ensure the directory path is computed + only once. The cache directory is determined by: + 1. NSIGHT_PYTHON_CACHE_DIR environment variable (if set) + 2. Default: ~/.nsight-python + + If the directory doesn't exist, it will be created automatically. + + Returns: + Path: Absolute path to the cache directory. + + Note: + The @functools.lru_cache decorator ensures this method's result + is cached, preventing repeated filesystem operations. + """ + # Get cache directory + _env_cache_dir = os.environ.get("NSIGHT_PYTHON_CACHE_DIR") + if _env_cache_dir is None: + _cache_dir = Path.home() / Path(".nsight-python") + else: + _cache_dir = Path(_env_cache_dir) + + # Get absolute path + _cache_dir = _cache_dir.absolute() + + # Create directory if it doesn't exist + if not _cache_dir.exists(): + _cache_dir.mkdir(parents=True, exist_ok=True) + + return _cache_dir + + def _increment_profile_id(self) -> int: + """ + Increment and return the global profile ID. + + This method atomically increments the profile ID counter and returns + the new value. Each profile operation gets a unique ID, ensuring + cache file uniqueness. + + Returns: + int: The new profile ID after incrementing. + """ + self._global_ncu_profile_id += 1 + return self._global_ncu_profile_id + + def _construct_cache_file_path(self, func_name: str) -> Path: + """ + Construct the cache file path for a given function. + + The cache filename is generated using: + - Function name + - Current profile ID (process-specific) + - Process ID for isolation + - MD5 hash of "funcname_profileid_pid" (first 8 characters) + + This ensures unique filenames across processes. + + Args: + func_name: Name of the profiled function. + + Returns: + Path: Full path to the cache file in process-specific directory. + """ + # Get current global profile id + profile_id = self._global_ncu_profile_id + + # Generate cache key + cache_key = hashlib.md5(f"{func_name}_{profile_id}".encode()).hexdigest()[:8] + + # Create cache filename + cache_filename = f"{func_name}_{profile_id}_{cache_key}.pkl" + cache_path = self._process_dir / cache_filename + + return cache_path + + def save_profile_result( + self, + func_name: str, + df: pd.DataFrame, + verbose: bool = False, + ) -> None: + """ + Save profiling results to the cache. + + This method should only be called from the main profiling process. + It saves a DataFrame to a pickle file and increments the profile ID + for the next operation. + + Args: + func_name: Name of the profiled function. + df: DataFrame containing profiling results to cache. + verbose: If True, print status messages to stdout. + + Raises: + RuntimeError: If called from an ncu subprocess, or if file + operations fail. + """ + if "NSPY_NCU_PROFILE" in os.environ: + raise RuntimeError("Cache saving can only be used in the main process") + + cache_path = self._construct_cache_file_path(func_name) + + try: + # Save DataFrame to disk + df.to_pickle(cache_path) + + if verbose: + print( + f"[NSIGHT-PYTHON] Saved profile results for {func_name} " + f"(Global NCU ID: {self._global_ncu_profile_id}) to {cache_path}" + ) + + # Increment profile ID for next save + self._increment_profile_id() + + except Exception as e: + raise RuntimeError(f"Failed to save cache for {func_name}: {e}") + + def load_profile_result( + self, func_name: str, verbose: bool = False + ) -> pd.DataFrame: + """ + Load profiling results from the cache. + + This method should only be called from an ncu profile subprocess. + It loads a previously saved DataFrame from a pickle file and + increments the profile ID for consistency. + + Args: + func_name: Name of the function whose results to load. + verbose: If True, print status messages to stdout. + + Returns: + pd.DataFrame: The loaded profiling results. + + Raises: + RuntimeError: If called from the main process, or if file + operations fail. + FileNotFoundError: If no cache file exists for the given + function name and current profile ID. + """ + if "NSPY_NCU_PROFILE" not in os.environ: + raise RuntimeError( + "Cache loading can only be used in ncu profile subprocess" + ) + + cache_path = self._construct_cache_file_path(func_name) + + # Try to load from the file + if cache_path.exists(): + try: + df = pd.read_pickle(cache_path) + + if verbose: + print( + f"[NSIGHT-PYTHON] Loaded cached results for {func_name} " + f"(Global NCU ID: {self._global_ncu_profile_id}) from {cache_path}" + ) + + # Increment profile ID for next load + self._increment_profile_id() + + return df + except Exception as e: + raise RuntimeError( + f"Failed to load cache for {func_name} from {cache_path}: {e}" + ) from e + else: + raise FileNotFoundError(f"Cache for {func_name} does not exist") + + def _cleanup_process_cache_files(self) -> None: + """ + Clean up all cache files for this process by removing its directory. + + This method is registered with atexit and automatically removes the + entire process-specific cache directory when the process terminates. + Since each process has its own isolated directory, this operation + does not affect other processes. + + Note: + This is a best-effort cleanup. If the program crashes or is + terminated abruptly, the directory may remain on disk. + """ + try: + if self._process_dir.exists(): + # Remove directory and all its contents + import shutil + + shutil.rmtree(self._process_dir, ignore_errors=True) + except Exception as e: + # Log but don't raise - atexit handlers shouldn't raise exceptions + print( + f"[NSIGHT-PYTHON] Warning: Failed to remove cache directory {self._process_dir}: {e}" + ) diff --git a/nsight/collection/ncu.py b/nsight/collection/ncu.py index 74906ab..301aaea 100644 --- a/nsight/collection/ncu.py +++ b/nsight/collection/ncu.py @@ -19,6 +19,7 @@ from deepdiff import DeepHash from nsight import exceptions, extraction, utils +from nsight.cache import GlobalNCUProfileCache from nsight.collection import core from nsight.exceptions import NCUErrorContext @@ -64,6 +65,9 @@ def launch_ncu( """ assert report_path.endswith(".ncu-rep") + # Set an environment variable to track the main process ID. + os.environ["NSPY_NCU_MAIN_PID"] = str(os.getpid()) + # Determine the script being executed script_path = os.path.abspath(sys.argv[0]) script_args = " ".join(sys.argv[1:]) @@ -254,15 +258,18 @@ def collect( self.combine_kernel_metrics, ) + # Save to cache + GlobalNCUProfileCache().save_profile_result(func.__name__, df) + return df else: # If NSPY_NCU_PROFILE is set, just run the function normally sig = os.environ["NSPY_NCU_PROFILE"] - # If this is not the function call we are profiling, stop + # If this is not the function we are profiling, just load from cache if get_signature(func, configs_list) != sig: - return None + return GlobalNCUProfileCache().load_profile_result(func.__name__) if settings.output_progress: utils.print_header(