Skip to content

Commit

Permalink
Merge branch 'newest_mlora' of https://github.com/cainiaogoroad/mLoRA
Browse files Browse the repository at this point in the history
…into newest_mlora
  • Loading branch information
cainiaogoroad committed Jun 27, 2024
2 parents 07e2d60 + a74889d commit 3aca4fe
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion mlora/executor/task/cpo_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def data(self, start_idx: int) -> Tuple[List[Tokens], List[MLoRADataConfig]]:

# convert the string to tokens
ret_tokens = list(map(lambda raw_str: self.tokenizer_.encode(
raw_str, bos=True, eos=True), batch_str))
raw_str, bos=True, eos=True, cutoff_len=self.config_.cutoff_len_), batch_str))
end_idx = start_idx + len(ret_tokens)

def loss_fn(input: torch.Tensor, target: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
Expand Down
2 changes: 1 addition & 1 deletion mlora/executor/task/dpo_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def data(self, start_idx: int) -> Tuple[List[Tokens], List[MLoRADataConfig]]:
ret_tokens = []
# for refrerence
ref_model_token = list(map(lambda raw_str: self.tokenizer_.encode(
raw_str, bos=True, eos=True), batch_str))
raw_str, bos=True, eos=True, cutoff_len=self.config_.cutoff_len_), batch_str))
policy_model_token = copy.deepcopy(ref_model_token)

ret_tokens.extend(ref_model_token)
Expand Down
2 changes: 1 addition & 1 deletion mlora/executor/task/train_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def data(self, start_idx: int) -> Tuple[List[Tokens], List[MLoRADataConfig]]:

# convert the string to tokens
ret_tokens = list(map(lambda raw_str: self.tokenizer_.encode(
raw_str, bos=True, eos=True), batch_str))
raw_str, bos=True, eos=True, cutoff_len=self.config_.cutoff_len_), batch_str))
end_idx = start_idx + len(ret_tokens)

def loss_fn(input: torch.Tensor, target: torch.Tensor, _: torch.Tensor) -> torch.Tensor:
Expand Down
3 changes: 2 additions & 1 deletion mlora/model/tokenizer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ def __init__(self, model_path: str):
if self.pad_id_ is None and self.unk_id_ is not None:
self.pad_id_ = self.unk_id_

def encode(self, data: str, bos=True, eos=True) -> Tokens:
def encode(self, data: str, bos=True, eos=True, cutoff_len=4096) -> Tokens:
tokens = self.tokenizer_.encode(data, add_special_tokens=False)
tokens = tokens[:cutoff_len - int(bos) - int(eos)]
if bos:
tokens = [self.bos_id_] + tokens
if eos:
Expand Down

0 comments on commit 3aca4fe

Please sign in to comment.