1313- LLM frameworks (vllm, sglang, tensorrt_llm)
1414- Dynamo runtime and framework components
1515- File system (permissions and disk space, detailed with --thorough-check)
16+ - HuggingFace model cache (detailed with --thorough-check)
1617- Installation status and component availability
1718
19+ IMPORTANT: This script is STANDALONE and uses only Python stdlib (no Dynamo components).
20+
21+ Why: Must work before Dynamo is built/installed (CI, fresh containers, build failures).
22+ This tool is for pre-deployment validation; dynamo.common.config_dump is for runtime.
23+
24+ Hard-coded paths: Uses defaults (e.g., ~/.cache/huggingface/hub) for predictable
25+ behavior even when environment variables are misconfigured. See class docs for details.
26+
1827The output uses status indicators:
1928- ✅ Component found and working
2029- ❌ Component missing or error
3544├─ OS Ubuntu 24.04.1 LTS (Noble Numbat) (Linux 6.11.0-28-generic x86_64), Memory=26.7/125.5 GiB, Cores=32
3645├─ User info: user=ubuntu, uid=1000, gid=1000
3746├─ ✅ NVIDIA GPU NVIDIA RTX 6000 Ada Generation, driver 570.133.07, CUDA 12.8, Power=26.14/300.00 W, Memory=289/49140 MiB
47+ ├─ 🤖Framework
48+ │ ├─ ✅ vLLM: 0.10.1.1, module=/opt/vllm/vllm/__init__.py, exec=/opt/dynamo/venv/bin/vllm
49+ │ └─ ✅ Sglang: 0.3.0, module=/opt/sglang/sglang/__init__.py
3850├─ File System
3951│ ├─ ✅ Dynamo workspace ($HOME/dynamo) writable
4052│ ├─ ✅ Dynamo .git directory writable
4153│ ├─ ✅ Rustup home ($HOME/.rustup) writable
4254│ ├─ ✅ Cargo home ($HOME/.cargo) writable
4355│ ├─ ✅ Cargo target ($HOME/dynamo/.build/target) writable
4456│ └─ ✅ Python site-packages ($HOME/dynamo/venv/lib/python3.12/site-packages) writable
57+ ├─ ✅ Hugging Face Cache 3 models in ~/.cache/huggingface/hub
4558├─ ✅ Cargo $HOME/.cargo/bin/cargo, cargo 1.89.0 (c24e10642 2025-06-23)
4659│ ├─ Cargo home directory CARGO_HOME=$HOME/.cargo
4760│ └─ Cargo target directory CARGO_TARGET_DIR=$HOME/dynamo/.build/target
5265├─ ✅ Python 3.12.3, /opt/dynamo/venv/bin/python
5366│ ├─ ✅ PyTorch 2.7.1+cu128, ✅torch.cuda.is_available
5467│ └─ PYTHONPATH not set
55- ├─ 🤖Framework
56- │ ├─ ✅ vLLM: 0.10.1.1, module=/opt/vllm/vllm/__init__.py, exec=/opt/dynamo/venv/bin/vllm
57- │ └─ ✅ Sglang: 0.3.0, module=/opt/sglang/sglang/__init__.py
5868└─ Dynamo $HOME/dynamo, SHA: a03d29066, Date: 2025-08-30 16:22:29 PDT
5969 ├─ ✅ Runtime components ai-dynamo-runtime 0.4.1
6070 │ │ /opt/dynamo/venv/lib/python3.12/site-packages/ai_dynamo_runtime-0.4.1.dist-info: created=2025-08-30 19:14:29 PDT
7989 python deploy/sanity_check.py [--thorough-check] [--terse]
8090
8191Options:
82- --thorough-check Enable thorough checking (file permissions, directory sizes, etc. )
83- --terse Enable terse output mode
92+ --thorough-check Enable thorough checking (file permissions, directory sizes, HuggingFace model details )
93+ --terse Enable terse output mode (show only essential info and errors)
8494"""
8595
8696import datetime
@@ -324,6 +334,9 @@ def __init__(
324334 # Add file permissions check
325335 self .add_child (FilePermissionsInfo (thorough_check = self .thorough_check ))
326336
337+ # Add HuggingFace cache check
338+ self .add_child (HuggingFaceInfo (thorough_check = self .thorough_check ))
339+
327340 # Add Cargo (always show, even if not found)
328341 self .add_child (CargoInfo (thorough_check = self .thorough_check ))
329342
@@ -1103,7 +1116,14 @@ def _check_dynamo_directory_permissions(self):
11031116 )
11041117
11051118 def _check_site_packages_permissions (self ):
1106- """Check site-packages directory writability"""
1119+ """Check site-packages directory writability
1120+
1121+ Logic:
1122+ - If running in a virtualenv and its site-packages is writable: PASS
1123+ (system site-packages being read-only is expected and shown as WARNING)
1124+ - If no virtualenv and no writable site-packages: ERROR
1125+ (can't install packages anywhere)
1126+ """
11071127 try :
11081128 import site
11091129
@@ -1113,15 +1133,33 @@ def _check_site_packages_permissions(self):
11131133 if user_site :
11141134 site_packages_dirs .append (user_site )
11151135
1116- # Check each existing site-packages directory
1136+ # First pass: check which directories are writable
1137+ writable_dirs = []
1138+ all_results = []
11171139 recursive = self .thorough_check
1140+
11181141 for site_dir in site_packages_dirs :
11191142 if os .path .exists (site_dir ):
11201143 results = self ._check_permissions_unified (
11211144 [site_dir ], "site-packages" , recursive = recursive
11221145 )
1123- for result in results :
1124- self .add_child (result )
1146+ all_results .append ((site_dir , results ))
1147+
1148+ # Check if this directory is writable
1149+ if results and results [0 ].status == NodeStatus .OK :
1150+ writable_dirs .append (site_dir )
1151+
1152+ # Determine if we have at least one writable site-packages
1153+ has_writable_site_packages = len (writable_dirs ) > 0
1154+
1155+ # Second pass: add results with adjusted status
1156+ for site_dir , results in all_results :
1157+ for result in results :
1158+ # If we have at least one writable site-packages,
1159+ # downgrade ERROR to WARNING for non-writable ones
1160+ if has_writable_site_packages and result .status == NodeStatus .ERROR :
1161+ result .status = NodeStatus .WARNING
1162+ self .add_child (result )
11251163
11261164 except Exception as e :
11271165 self .add_child (
@@ -1227,6 +1265,187 @@ def format_bytes(bytes_val):
12271265 return "" , None
12281266
12291267
1268+ class HuggingFaceInfo (NodeInfo ):
1269+ """Hugging Face models cache information (follows standalone requirement)
1270+
1271+ HARD-CODED PATH: ~/.cache/huggingface/hub
1272+
1273+ ENV VARIABLES (checked by HuggingFace transformers library, not this tool):
1274+ - HF_HOME: Base directory for Hugging Face cache
1275+ - HUGGINGFACE_HUB_CACHE: Direct path to hub cache
1276+ - HF_TOKEN: Authentication token (checked and displayed if set)
1277+
1278+ This class directly uses ~/.cache/huggingface/hub instead of reading environment
1279+ variables because this tool must work reliably in all environments, including when
1280+ environment variables are misconfigured or not set. For dynamic configuration that
1281+ respects all HF environment variables, use dynamo.common.config_dump at runtime.
1282+ """
1283+
1284+ def __init__ (self , thorough_check : bool = False ):
1285+ # HARD-CODED PATH: ~/.cache/huggingface/hub (not reading HF_HOME or HUGGINGFACE_HUB_CACHE)
1286+ hf_cache_path = os .path .expanduser ("~/.cache/huggingface/hub" )
1287+
1288+ if os .path .exists (hf_cache_path ):
1289+ models = self ._get_cached_models (
1290+ hf_cache_path , compute_sizes = thorough_check
1291+ )
1292+ if models :
1293+ self ._init_with_models (hf_cache_path , models , thorough_check )
1294+ else :
1295+ self ._init_no_models_found (hf_cache_path )
1296+ else :
1297+ self ._init_cache_not_available ()
1298+
1299+ # Add HF_TOKEN info if set (common to all cases)
1300+ self ._add_hf_token_info ()
1301+
1302+ def _init_with_models (
1303+ self , hf_cache_path : str , models : List [tuple ], thorough_check : bool
1304+ ):
1305+ """Initialize when models are found in cache."""
1306+ model_count = len (models )
1307+ display_path = self ._replace_home_with_var (hf_cache_path )
1308+ super ().__init__ (
1309+ label = "Hugging Face Cache" ,
1310+ desc = f"{ model_count } models in { display_path } " ,
1311+ status = NodeStatus .OK ,
1312+ )
1313+
1314+ # Only show detailed model list in thorough mode
1315+ if thorough_check :
1316+ self ._add_model_details (models )
1317+
1318+ def _init_no_models_found (self , hf_cache_path : str ):
1319+ """Initialize when cache exists but no models found."""
1320+ display_path = self ._replace_home_with_var (hf_cache_path )
1321+ super ().__init__ (
1322+ label = "Hugging Face Cache" ,
1323+ desc = f"directory exists but no models found in { display_path } " ,
1324+ status = NodeStatus .WARNING ,
1325+ )
1326+
1327+ def _init_cache_not_available (self ):
1328+ """Initialize when cache directory doesn't exist."""
1329+ super ().__init__ (
1330+ label = "Hugging Face Cache" ,
1331+ desc = "~/.cache/huggingface/hub not available" ,
1332+ status = NodeStatus .WARNING ,
1333+ )
1334+
1335+ def _add_model_details (self , models : List [tuple ]):
1336+ """Add detailed model information as child nodes."""
1337+ # Add all models as children (no limit)
1338+ for i , model_info in enumerate (models ):
1339+ model_name , download_date , size_str = model_info
1340+ model_node = NodeInfo (
1341+ label = f"Model { i + 1 } " ,
1342+ desc = f"{ model_name } , downloaded={ download_date } , size={ size_str } " ,
1343+ status = NodeStatus .INFO ,
1344+ )
1345+ self .add_child (model_node )
1346+
1347+ def _add_hf_token_info (self ):
1348+ """Add HF_TOKEN information if the environment variable is set."""
1349+ if os .environ .get ("HF_TOKEN" ):
1350+ token_node = NodeInfo (
1351+ label = "HF_TOKEN" ,
1352+ desc = "<set>" ,
1353+ status = NodeStatus .INFO ,
1354+ )
1355+ self .add_child (token_node )
1356+
1357+ def _get_cached_models (self , cache_path : str , compute_sizes : bool ) -> List [tuple ]:
1358+ """Get list of cached Hugging Face models with metadata.
1359+
1360+ Args:
1361+ cache_path: Path to HuggingFace cache directory
1362+ compute_sizes: Whether to compute directory sizes (slow operation)
1363+
1364+ Returns:
1365+ List of tuples: (model_name, download_date, size_str)
1366+ """
1367+ models = []
1368+ try :
1369+ if os .path .exists (cache_path ):
1370+ for item in os .listdir (cache_path ):
1371+ item_path = os .path .join (cache_path , item )
1372+ # Only count model repos; ignore datasets--, spaces--, blobs, etc.
1373+ if not (os .path .isdir (item_path ) and item .startswith ("models--" )):
1374+ continue
1375+ # Convert "models--org--repo-name" to "org/repo-name"
1376+ parts = item .split ("--" )
1377+ if len (parts ) >= 3 :
1378+ org = parts [1 ]
1379+ model_name = "--" .join (parts [2 :]) # Preserve dashes
1380+ display_name = f"{ org } /{ model_name } "
1381+ else :
1382+ display_name = item # Fallback to raw dir name
1383+
1384+ # Get download date (directory creation/modification time)
1385+ try :
1386+ stat_info = os .stat (item_path )
1387+ # Use the earlier of creation time or modification time
1388+ download_time = min (stat_info .st_ctime , stat_info .st_mtime )
1389+ download_date = self ._format_timestamp_pdt (download_time )
1390+ except Exception :
1391+ download_date = "unknown"
1392+
1393+ # Get directory size (only when requested)
1394+ size_str = "-"
1395+ if compute_sizes :
1396+ try :
1397+ size_bytes = self ._get_directory_size_bytes (item_path )
1398+ size_str = self ._format_size (size_bytes )
1399+ except Exception :
1400+ size_str = "unknown"
1401+
1402+ models .append ((display_name , download_date , size_str ))
1403+ except Exception :
1404+ pass
1405+
1406+ # Sort by model name
1407+ return sorted (models , key = lambda x : x [0 ])
1408+
1409+ def _get_directory_size_bytes (self , directory : str ) -> int :
1410+ """Get the total size of a directory in bytes."""
1411+ total_size = 0
1412+ try :
1413+ for dirpath , dirnames , filenames in os .walk (directory ):
1414+ for filename in filenames :
1415+ filepath = os .path .join (dirpath , filename )
1416+ try :
1417+ if not os .path .islink (filepath ): # Skip symbolic links
1418+ total_size += os .path .getsize (filepath )
1419+ except (OSError , FileNotFoundError ):
1420+ pass # Skip files that can't be accessed
1421+ except Exception :
1422+ pass
1423+ return total_size
1424+
1425+ def _format_size (self , size_bytes : int ) -> str :
1426+ """Format size in bytes to human readable format."""
1427+ if size_bytes == 0 :
1428+ return "0 B"
1429+
1430+ units = ["B" , "KB" , "MB" , "GB" , "TB" ]
1431+ size = float (size_bytes )
1432+ unit_index = 0
1433+
1434+ while size >= 1024.0 and unit_index < len (units ) - 1 :
1435+ size /= 1024.0
1436+ unit_index += 1
1437+
1438+ # Format with appropriate precision
1439+ if unit_index == 0 : # Bytes
1440+ return f"{ int (size )} { units [unit_index ]} "
1441+ elif size >= 100 :
1442+ return f"{ size :.0f} { units [unit_index ]} "
1443+ elif size >= 10 :
1444+ return f"{ size :.1f} { units [unit_index ]} "
1445+ else :
1446+ return f"{ size :.2f} { units [unit_index ]} "
1447+
1448+
12301449class CargoInfo (NodeInfo ):
12311450 """Cargo tool information"""
12321451
0 commit comments