From d188e159529a02adb2a5d9c8a9eae89a4df630f8 Mon Sep 17 00:00:00 2001 From: zeroRains Date: Wed, 28 May 2025 10:32:50 +0800 Subject: [PATCH 1/2] support fastsafetensors --- paddlenlp/transformers/model_utils.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index 4415d8a74afa..9a5da59d6d49 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -3136,10 +3136,26 @@ def load_sharded_checkpoint_as_one(folder, variant=None, return_numpy=False): shard_files = list(set(index["weight_map"].values())) loader = safe_load_file if load_safe else partial(paddlenlp_load, map_location="np" if return_numpy else "cpu") - ret = {} - for shard_file in tqdm(shard_files): - state_dict = loader(os.path.join(folder, shard_file)) - ret.update(state_dict) + try: + # Not use `fastsafe_open` because the end of `with` will destroy the cuda memory, tensor can not use. + from fastsafetensors import SafeTensorsFileLoader, SingleGroup + + path = [os.path.join(folder, shard_file) for shard_file in shard_files] + device = "gpu" if paddle.is_compiled_with_cuda() else "cpu" + not_use_gds = True + # Check load time of files + for _ in tqdm(range(1)): + loader = SafeTensorsFileLoader( + SingleGroup(), device=device, nogds=not_use_gds, debug_log=False, framework="paddle" + ) + loader.add_filenames({0: path}) + bufs_pp = loader.copy_files_to_device(max_copy_block_size=256 * 1024 * 1024) + key_dims = {key: -1 for key in loader.get_keys()} + ret = bufs_pp.as_dict(key_dims) + except: + for shard_file in tqdm(shard_files): + state_dict = loader(os.path.join(folder, shard_file)) + ret.update(state_dict) if not return_numpy: for key in list(ret.keys()): From 4008df6bdd7bf247b5ff081b5df3f407d8373ce8 Mon Sep 17 00:00:00 2001 From: zeroRains Date: Fri, 30 May 2025 10:39:09 +0800 Subject: [PATCH 2/2] fix the bug with load hunging --- paddlenlp/transformers/model_utils.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index 9a5da59d6d49..4a3ad92ba21f 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -3135,24 +3135,27 @@ def load_sharded_checkpoint_as_one(folder, variant=None, return_numpy=False): shard_files = list(set(index["weight_map"].values())) loader = safe_load_file if load_safe else partial(paddlenlp_load, map_location="np" if return_numpy else "cpu") - + ret = {} try: - # Not use `fastsafe_open` because the end of `with` will destroy the cuda memory, tensor can not use. - from fastsafetensors import SafeTensorsFileLoader, SingleGroup + from fastsafetensors import fastsafe_open path = [os.path.join(folder, shard_file) for shard_file in shard_files] - device = "gpu" if paddle.is_compiled_with_cuda() else "cpu" - not_use_gds = True + device = "gpu" if paddle.device.cuda.device_count() else "cpu" + not_use_gds = False # Check load time of files for _ in tqdm(range(1)): - loader = SafeTensorsFileLoader( - SingleGroup(), device=device, nogds=not_use_gds, debug_log=False, framework="paddle" - ) - loader.add_filenames({0: path}) - bufs_pp = loader.copy_files_to_device(max_copy_block_size=256 * 1024 * 1024) - key_dims = {key: -1 for key in loader.get_keys()} - ret = bufs_pp.as_dict(key_dims) + with fastsafe_open( + filenames=path, + nogds=not_use_gds, + device=device, + max_copy_block_size=256 * 1024 * 1024, + framework="paddle", + ) as f: + for key in f.get_keys(): + # Must clone, because cuda memory will be destroyed after `with` end. + ret[key] = f.get_tensor(key).clone().detach() except: + for shard_file in tqdm(shard_files): state_dict = loader(os.path.join(folder, shard_file)) ret.update(state_dict)