From 5cd85b5e008de2ec398d6596e240187d627561e3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 21 Dec 2024 10:10:18 +0200 Subject: [PATCH] convert : add BertForMaskedLM (#10919) --- convert_hf_to_gguf.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 76ab11ebe8e2c..9764251823070 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2628,7 +2628,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(self.map_tensor_name(name), data_torch)] -@Model.register("BertModel", "CamembertModel") +@Model.register("BertModel", "BertForMaskedLM", "CamembertModel") class BertModel(Model): model_arch = gguf.MODEL_ARCH.BERT @@ -2694,10 +2694,25 @@ def phantom(tok): def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: del bid # unused + if name.startswith("bert."): + name = name[5:] + + if name.endswith(".gamma"): + name = name[:-6] + ".weight" + + if name.endswith(".beta"): + name = name[:-5] + ".bias" + # we are only using BERT for embeddings so we don't need the pooling layer if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"): return [] # we don't need these + if name.startswith("cls.predictions"): + return [] + + if name.startswith("cls.seq_relationship"): + return [] + return [(self.map_tensor_name(name), data_torch)]