From 2d1c17c883a463e8d533b5366db7468de4ef0a3f Mon Sep 17 00:00:00 2001 From: Padmanabha V Seshadri Date: Wed, 14 Aug 2024 20:28:10 +0530 Subject: [PATCH] feat: Example log controller yaml with training state (#296) Signed-off-by: Padmanabha V Seshadri --- .../logging_controller_with_rank.yaml | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 examples/trainercontroller_configs/logging_controller_with_rank.yaml diff --git a/examples/trainercontroller_configs/logging_controller_with_rank.yaml b/examples/trainercontroller_configs/logging_controller_with_rank.yaml new file mode 100644 index 000000000..631428e42 --- /dev/null +++ b/examples/trainercontroller_configs/logging_controller_with_rank.yaml @@ -0,0 +1,33 @@ +controller_metrics: + - name: trainer_state + class: TrainingState + - name: training_loss + class: Loss + - name: per_process_state + class: PerProcessState +operations: + - name: logger_on_log + class: LogControl + arguments: + log_format: 'Epoch: {trainer_state[epoch]:.0f}, Step: {trainer_state[global_step]}, Rank: {per_process_state[rank]}, loss = {training_loss}' + log_level: info + - name: logger_on_save + class: LogControl + arguments: + log_format: | + Saving model in huggingface format at step: {trainer_state[global_step]} + Model saved in {args.output_dir}/checkpoint-{trainer_state[global_step]} + log_level: info +controllers: + - name: ctrl-on-log-format + triggers: + - on_log + rule: 'training_loss != None' + operations: + - logger_on_log.should_log + - name: ctrl-on-save-format + triggers: + - on_save + rule: 'True' + operations: + - logger_on_save.should_log \ No newline at end of file