Skip to content

Commit 76ae288

Browse files
committed
Ran 'make fix' command to fix linter error.
1 parent dd45832 commit 76ae288

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

src/instructlab/training/data_process.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -723,23 +723,29 @@ def pretraining_is_using_legacy_granite_chat_template(ds: Dataset) -> bool:
723723
return False
724724

725725

726-
def ensure_dataset_is_compatible_with_legacy_format(batch: t.Dict[str, t.List[t.Any]]) -> t.Dict[str, t.List[t.Any]]:
726+
def ensure_dataset_is_compatible_with_legacy_format(
727+
batch: t.Dict[str, t.List[t.Any]],
728+
) -> t.Dict[str, t.List[t.Any]]:
727729
"""
728730
Given a batch of samples using the legacy pre-training format, unroll the samples into ones with
729731
the original messages contents.
730732
"""
731733
processed_messages = []
732734
unmask_flags = []
733735

734-
for messages, unmask_flag in zip(batch["messages"], batch.get("unmask", [False] * len(batch["messages"]))):
736+
for messages, unmask_flag in zip(
737+
batch["messages"], batch.get("unmask", [False] * len(batch["messages"]))
738+
):
735739
new_messages = []
736740
unmask = unmask_flag
737741

738742
for msg in messages:
739743
if msg["role"] != "pretraining":
740744
new_messages.append(msg)
741745
else:
742-
new_messages.extend(extract_messages_from_pretraining_text(msg["content"]))
746+
new_messages.extend(
747+
extract_messages_from_pretraining_text(msg["content"])
748+
)
743749
unmask = True # if any pretraining message is found, set unmask to True
744750

745751
processed_messages.append(new_messages)

0 commit comments

Comments
 (0)