Skip to content

Commit

Permalink
PR changes
Browse files Browse the repository at this point in the history
Signed-off-by: Abhishek <[email protected]>
  • Loading branch information
Abhishek-TAMU committed Dec 11, 2024
1 parent e89002d commit 4ba1c04
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ indent-string=' '
max-line-length=100

# Maximum number of lines in a module.
max-module-lines=1200
max-module-lines=1400

# Allow the body of a class to be on the same line as the declaration if body
# contains single statement.
Expand Down
8 changes: 7 additions & 1 deletion tests/data/test_data_preprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,11 @@ def test_process_dataconfig_file(data_config_path, data_path):
),
(
DATA_CONFIG_APPLY_CUSTOM_TEMPLATE_YAML,
[TWITTER_COMPLAINTS_DATA_JSONL, TWITTER_COMPLAINTS_DATA_JSONL],
[
TWITTER_COMPLAINTS_DATA_JSONL,
TWITTER_COMPLAINTS_DATA_JSONL,
TWITTER_COMPLAINTS_DATA_JSONL,
],
),
(
DATA_CONFIG_APPLY_CUSTOM_TEMPLATE_YAML,
Expand All @@ -529,6 +533,7 @@ def test_process_dataconfig_file(data_config_path, data_path):
[
TWITTER_COMPLAINTS_TOKENIZED_PARQUET,
TWITTER_COMPLAINTS_TOKENIZED_PARQUET,
TWITTER_COMPLAINTS_TOKENIZED_PARQUET,
],
),
(
Expand Down Expand Up @@ -561,6 +566,7 @@ def test_process_dataconfig_file(data_config_path, data_path):
[
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW,
],
),
],
Expand Down
26 changes: 25 additions & 1 deletion tests/test_sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@
MALFORMATTED_DATA,
MODEL_NAME,
TWITTER_COMPLAINTS_DATA_ARROW,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_PARQUET,
TWITTER_COMPLAINTS_DATA_JSON,
TWITTER_COMPLAINTS_DATA_JSONL,
TWITTER_COMPLAINTS_DATA_PARQUET,
Expand Down Expand Up @@ -772,13 +775,34 @@ def test_run_causallm_ft_pretokenized(dataset_path):
@pytest.mark.parametrize(
"datafiles, datasetconfigname",
[
(
[
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON,
],
DATA_CONFIG_MULTIPLE_DATASETS_SAMPLING_YAML,
),
(
[
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL,
],
DATA_CONFIG_MULTIPLE_DATASETS_SAMPLING_YAML,
)
),
(
[
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_ARROW,
],
DATA_CONFIG_MULTIPLE_DATASETS_SAMPLING_YAML,
),
(
[
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_PARQUET,
TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_PARQUET,
],
DATA_CONFIG_MULTIPLE_DATASETS_SAMPLING_YAML,
),
],
)
def test_run_causallm_ft_and_inference_with_multiple_dataset(
Expand Down

0 comments on commit 4ba1c04

Please sign in to comment.