From d8646475859e98c2eeee5351b6647613bf5bebeb Mon Sep 17 00:00:00 2001 From: Xingyu Xie Date: Sun, 4 Sep 2022 14:41:25 +0800 Subject: [PATCH] add log file --- .../exp_results/pretrain/hydra_train-adam.log | 5885 +++++++++++++++++ .../pretrain/hydra_train-adan-2.log | 1353 ++++ .../exp_results/pretrain/hydra_train-adan.log | 4776 +++++++++++++ 3 files changed, 12014 insertions(+) create mode 100644 NLP/BERT/exp_results/pretrain/hydra_train-adam.log create mode 100644 NLP/BERT/exp_results/pretrain/hydra_train-adan-2.log create mode 100644 NLP/BERT/exp_results/pretrain/hydra_train-adan.log diff --git a/NLP/BERT/exp_results/pretrain/hydra_train-adam.log b/NLP/BERT/exp_results/pretrain/hydra_train-adam.log new file mode 100644 index 0000000..a1e0220 --- /dev/null +++ b/NLP/BERT/exp_results/pretrain/hydra_train-adam.log @@ -0,0 +1,5885 @@ +[2022-07-30 10:49:23,263][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 8, 'distributed_num_procs': 8, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': 'tcp://localhost:15724', 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 8, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 1, 'skip_invalid_size_inputs_valid_test': True, 'max_tokens': None, 'batch_size': 32, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': True, 'validate_interval': 5, 'validate_interval_updates': 50000, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 32, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 1000000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': False, 'update_freq': [1], 'lr': [0.0001], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False}, 'checkpoint': {'_name': None, 'save_dir': 'bert/baseline/', 'restore_file': 'checkpoint_last.pt', 'continue_once': None, 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 5, 'save_interval_updates': 50000, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 8}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False, 'eos_token': None}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'roberta', 'max_positions': 512, 'dropout': 0.1, 'attention_dropout': 0.1}, 'task': {'_name': 'masked_lm', 'data': '/dataset/common/bert-corpus-0729/', 'sample_break_mode': complete, 'tokens_per_sample': 512, 'mask_prob': 0.15, 'leave_unmasked_prob': 0.1, 'random_token_prob': 0.1, 'freq_weighted_replacement': False, 'mask_whole_words': False, 'mask_multiple_length': 1, 'mask_stdev': 0.0, 'shorten_method': none, 'shorten_data_split_list': '', 'seed': 1, 'include_target_tokens': False}, 'criterion': {'_name': 'masked_lm', 'tpu': False}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-06, 'weight_decay': 0.01, 'use_old_adam': False, 'fp16_adam_stats': False, 'tpu': False, 'lr': [0.0001]}, 'lr_scheduler': {'_name': 'polynomial_decay', 'warmup_updates': 10000, 'force_anneal': None, 'end_learning_rate': 0.0, 'power': 1.0, 'total_num_update': 1000000.0, 'lr': [0.0001]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} +[2022-07-30 10:49:23,314][fairseq.tasks.masked_lm][INFO] - dictionary: 50264 types +[2022-07-30 10:49:31,685][fairseq_cli.train][INFO] - RobertaModel( + (encoder): RobertaEncoder( + (sentence_encoder): TransformerEncoder( + (dropout_module): FairseqDropout() + (embed_tokens): Embedding(50265, 768, padding_idx=1) + (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1) + (layernorm_embedding): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (layers): ModuleList( + (0): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (1): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (2): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (3): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (4): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (5): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (6): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (7): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (8): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (9): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (10): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (11): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (lm_head): RobertaLMHead( + (dense): Linear(in_features=768, out_features=768, bias=True) + (layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + ) + (classification_heads): ModuleDict() +) +[2022-07-30 10:49:31,688][fairseq_cli.train][INFO] - task: MaskedLMTask +[2022-07-30 10:49:31,688][fairseq_cli.train][INFO] - model: RobertaModel +[2022-07-30 10:49:31,688][fairseq_cli.train][INFO] - criterion: MaskedLmLoss +[2022-07-30 10:49:31,691][fairseq_cli.train][INFO] - num. shared model params: 209,714,265 (num. trained: 209,714,265) +[2022-07-30 10:49:31,692][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) +[2022-07-30 10:49:31,727][fairseq.data.data_utils][INFO] - loaded 1,066,112 examples from: /dataset/common/bert-corpus-0729/valid +[2022-07-30 10:49:31,764][fairseq.tasks.masked_lm][INFO] - loaded 67780 blocks from: /dataset/common/bert-corpus-0729/valid +[2022-07-30 10:49:46,665][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:2 to store for rank: 0 +[2022-07-30 10:49:46,686][torch.distributed.distributed_c10d][INFO] - Rank 0: Completed store-based barrier for key:store_based_barrier_key:2 with 8 nodes. +[2022-07-30 10:49:46,687][fairseq.trainer][INFO] - detected shared parameter: encoder.sentence_encoder.embed_tokens.weight <- encoder.lm_head.weight +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 0: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 1: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 2: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 3: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 4: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 5: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 6: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - rank 7: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-07-30 10:49:51,570][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** +[2022-07-30 10:49:51,571][fairseq_cli.train][INFO] - training on 8 devices (GPUs/TPUs) +[2022-07-30 10:49:51,571][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 32 +[2022-07-30 10:49:51,572][fairseq.trainer][INFO] - Preparing to load checkpoint bert/baseline/checkpoint_last.pt +[2022-07-30 10:49:51,572][fairseq.trainer][INFO] - No existing checkpoint found bert/baseline/checkpoint_last.pt +[2022-07-30 10:49:51,572][fairseq.trainer][INFO] - loading train data for epoch 1 +[2022-07-30 10:49:54,453][fairseq.data.data_utils][INFO] - loaded 214,960,826 examples from: /dataset/common/bert-corpus-0729/train +[2022-07-30 10:49:58,977][fairseq.tasks.masked_lm][INFO] - loaded 13244396 blocks from: /dataset/common/bert-corpus-0729/train +[2022-07-30 10:50:03,343][fairseq.tasks.fairseq_task][WARNING] - 63,646 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[11407155, 8928673, 3941655, 2058309, 2058002, 11308513, 1003447, 2725530, 13205669, 7271248] +[2022-07-30 10:50:12,789][fairseq.optim.adam][INFO] - using FusedAdam +[2022-07-30 10:50:12,869][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-30 10:50:12,872][fairseq.trainer][INFO] - begin training epoch 1 +[2022-07-30 10:50:12,873][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-30 10:50:23,500][fairseq.modules.cross_entropy][INFO] - using fused cross entropy +[2022-07-30 10:50:48,574][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 10:51:28,021][train_inner][INFO] - {"epoch": 1, "update": 0.004, "loss": "15.078", "ppl": "34584.6", "wps": "389279", "ups": "3.3", "wpb": "118059", "bsz": "256", "num_updates": "200", "lr": "2e-06", "gnorm": "2.879", "loss_scale": "64", "train_wall": "64", "gb_free": "21.5", "wall": "96"} +[2022-07-30 10:52:27,669][train_inner][INFO] - {"epoch": 1, "update": 0.008, "loss": "13.696", "ppl": "13272.6", "wps": "396098", "ups": "3.35", "wpb": "118132", "bsz": "256", "num_updates": "400", "lr": "4e-06", "gnorm": "2.027", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "156"} +[2022-07-30 10:53:27,288][train_inner][INFO] - {"epoch": 1, "update": 0.012, "loss": "12.682", "ppl": "6569.96", "wps": "397278", "ups": "3.35", "wpb": "118423", "bsz": "256", "num_updates": "600", "lr": "6e-06", "gnorm": "1.596", "loss_scale": "64", "train_wall": "59", "gb_free": "27.9", "wall": "216"} +[2022-07-30 10:54:26,663][train_inner][INFO] - {"epoch": 1, "update": 0.016, "loss": "11.679", "ppl": "3279.94", "wps": "395967", "ups": "3.37", "wpb": "117553", "bsz": "256", "num_updates": "800", "lr": "8e-06", "gnorm": "1.149", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "275"} +[2022-07-30 10:55:26,132][train_inner][INFO] - {"epoch": 1, "update": 0.019, "loss": "10.983", "ppl": "2024.08", "wps": "398887", "ups": "3.36", "wpb": "118607", "bsz": "256", "num_updates": "1000", "lr": "1e-05", "gnorm": "0.743", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "335"} +[2022-07-30 10:56:25,856][train_inner][INFO] - {"epoch": 1, "update": 0.023, "loss": "10.631", "ppl": "1586.12", "wps": "395363", "ups": "3.35", "wpb": "118060", "bsz": "256", "num_updates": "1200", "lr": "1.2e-05", "gnorm": "0.64", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "394"} +[2022-07-30 10:57:25,288][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "10.454", "ppl": "1402.56", "wps": "397468", "ups": "3.37", "wpb": "118112", "bsz": "256", "num_updates": "1400", "lr": "1.4e-05", "gnorm": "0.705", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "454"} +[2022-07-30 10:58:24,698][train_inner][INFO] - {"epoch": 1, "update": 0.031, "loss": "10.328", "ppl": "1285.63", "wps": "398603", "ups": "3.37", "wpb": "118403", "bsz": "256", "num_updates": "1600", "lr": "1.6e-05", "gnorm": "0.75", "loss_scale": "64", "train_wall": "59", "gb_free": "28.4", "wall": "513"} +[2022-07-30 10:59:24,399][train_inner][INFO] - {"epoch": 1, "update": 0.035, "loss": "10.22", "ppl": "1192.56", "wps": "395750", "ups": "3.35", "wpb": "118133", "bsz": "256", "num_updates": "1800", "lr": "1.8e-05", "gnorm": "0.732", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "573"} +[2022-07-30 11:00:23,932][train_inner][INFO] - {"epoch": 1, "update": 0.039, "loss": "10.126", "ppl": "1117.42", "wps": "398695", "ups": "3.36", "wpb": "118677", "bsz": "256", "num_updates": "2000", "lr": "2e-05", "gnorm": "0.724", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "632"} +[2022-07-30 11:01:23,369][train_inner][INFO] - {"epoch": 1, "update": 0.043, "loss": "10.046", "ppl": "1056.84", "wps": "396407", "ups": "3.36", "wpb": "117804", "bsz": "256", "num_updates": "2200", "lr": "2.2e-05", "gnorm": "0.749", "loss_scale": "128", "train_wall": "59", "gb_free": "22.5", "wall": "692"} +[2022-07-30 11:02:24,237][train_inner][INFO] - {"epoch": 1, "update": 0.047, "loss": "9.966", "ppl": "999.95", "wps": "390240", "ups": "3.29", "wpb": "118766", "bsz": "256", "num_updates": "2400", "lr": "2.4e-05", "gnorm": "0.757", "loss_scale": "128", "train_wall": "61", "gb_free": "24.9", "wall": "753"} +[2022-07-30 11:03:23,702][train_inner][INFO] - {"epoch": 1, "update": 0.051, "loss": "9.903", "ppl": "957.34", "wps": "397586", "ups": "3.36", "wpb": "118210", "bsz": "256", "num_updates": "2600", "lr": "2.6e-05", "gnorm": "0.739", "loss_scale": "128", "train_wall": "59", "gb_free": "21.3", "wall": "812"} +[2022-07-30 11:04:23,296][train_inner][INFO] - {"epoch": 1, "update": 0.054, "loss": "9.839", "ppl": "915.63", "wps": "395093", "ups": "3.36", "wpb": "117726", "bsz": "256", "num_updates": "2800", "lr": "2.8e-05", "gnorm": "0.74", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "872"} +[2022-07-30 11:05:22,938][train_inner][INFO] - {"epoch": 1, "update": 0.058, "loss": "9.781", "ppl": "879.95", "wps": "398131", "ups": "3.35", "wpb": "118725", "bsz": "256", "num_updates": "3000", "lr": "3e-05", "gnorm": "0.742", "loss_scale": "128", "train_wall": "59", "gb_free": "24.9", "wall": "931"} +[2022-07-30 11:06:22,527][train_inner][INFO] - {"epoch": 1, "update": 0.062, "loss": "9.727", "ppl": "847.64", "wps": "399782", "ups": "3.36", "wpb": "119112", "bsz": "256", "num_updates": "3200", "lr": "3.2e-05", "gnorm": "0.758", "loss_scale": "128", "train_wall": "59", "gb_free": "27", "wall": "991"} +[2022-07-30 11:07:21,915][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "9.685", "ppl": "822.98", "wps": "398744", "ups": "3.37", "wpb": "118404", "bsz": "256", "num_updates": "3400", "lr": "3.4e-05", "gnorm": "0.744", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "1050"} +[2022-07-30 11:08:21,345][train_inner][INFO] - {"epoch": 1, "update": 0.07, "loss": "9.638", "ppl": "797", "wps": "399572", "ups": "3.37", "wpb": "118731", "bsz": "256", "num_updates": "3600", "lr": "3.6e-05", "gnorm": "0.731", "loss_scale": "128", "train_wall": "59", "gb_free": "23.6", "wall": "1110"} +[2022-07-30 11:09:20,766][train_inner][INFO] - {"epoch": 1, "update": 0.074, "loss": "9.595", "ppl": "773.6", "wps": "396577", "ups": "3.37", "wpb": "117824", "bsz": "256", "num_updates": "3800", "lr": "3.8e-05", "gnorm": "0.732", "loss_scale": "128", "train_wall": "59", "gb_free": "25.1", "wall": "1169"} +[2022-07-30 11:10:19,956][train_inner][INFO] - {"epoch": 1, "update": 0.078, "loss": "9.563", "ppl": "756.39", "wps": "401760", "ups": "3.38", "wpb": "118899", "bsz": "256", "num_updates": "4000", "lr": "4e-05", "gnorm": "0.713", "loss_scale": "128", "train_wall": "59", "gb_free": "24.6", "wall": "1228"} +[2022-07-30 11:11:19,329][train_inner][INFO] - {"epoch": 1, "update": 0.082, "loss": "9.53", "ppl": "739.37", "wps": "399075", "ups": "3.37", "wpb": "118470", "bsz": "256", "num_updates": "4200", "lr": "4.2e-05", "gnorm": "0.713", "loss_scale": "256", "train_wall": "59", "gb_free": "23.9", "wall": "1288"} +[2022-07-30 11:12:18,816][train_inner][INFO] - {"epoch": 1, "update": 0.085, "loss": "9.499", "ppl": "723.68", "wps": "397404", "ups": "3.36", "wpb": "118201", "bsz": "256", "num_updates": "4400", "lr": "4.4e-05", "gnorm": "0.717", "loss_scale": "256", "train_wall": "59", "gb_free": "27.4", "wall": "1347"} +[2022-07-30 11:13:18,245][train_inner][INFO] - {"epoch": 1, "update": 0.089, "loss": "9.465", "ppl": "706.58", "wps": "397700", "ups": "3.37", "wpb": "118172", "bsz": "256", "num_updates": "4600", "lr": "4.6e-05", "gnorm": "0.694", "loss_scale": "256", "train_wall": "59", "gb_free": "22.2", "wall": "1407"} +[2022-07-30 11:14:17,882][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "9.438", "ppl": "693.54", "wps": "396956", "ups": "3.35", "wpb": "118366", "bsz": "256", "num_updates": "4800", "lr": "4.8e-05", "gnorm": "0.7", "loss_scale": "256", "train_wall": "59", "gb_free": "22.2", "wall": "1466"} +[2022-07-30 11:15:16,890][train_inner][INFO] - {"epoch": 1, "update": 0.097, "loss": "9.417", "ppl": "683.55", "wps": "401826", "ups": "3.39", "wpb": "118554", "bsz": "256", "num_updates": "5000", "lr": "5e-05", "gnorm": "0.697", "loss_scale": "256", "train_wall": "59", "gb_free": "24.4", "wall": "1525"} +[2022-07-30 11:16:16,187][train_inner][INFO] - {"epoch": 1, "update": 0.101, "loss": "9.378", "ppl": "665.29", "wps": "399790", "ups": "3.37", "wpb": "118532", "bsz": "256", "num_updates": "5200", "lr": "5.2e-05", "gnorm": "0.718", "loss_scale": "256", "train_wall": "59", "gb_free": "24.4", "wall": "1585"} +[2022-07-30 11:16:19,354][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 128.0 +[2022-07-30 11:17:15,665][train_inner][INFO] - {"epoch": 1, "update": 0.105, "loss": "9.279", "ppl": "621.36", "wps": "396060", "ups": "3.36", "wpb": "117782", "bsz": "256", "num_updates": "5400", "lr": "5.4e-05", "gnorm": "0.783", "loss_scale": "128", "train_wall": "59", "gb_free": "25.3", "wall": "1644"} +[2022-07-30 11:18:15,218][train_inner][INFO] - {"epoch": 1, "update": 0.109, "loss": "9.213", "ppl": "593.64", "wps": "397181", "ups": "3.36", "wpb": "118266", "bsz": "256", "num_updates": "5600", "lr": "5.6e-05", "gnorm": "0.835", "loss_scale": "128", "train_wall": "59", "gb_free": "23.5", "wall": "1704"} +[2022-07-30 11:19:15,045][train_inner][INFO] - {"epoch": 1, "update": 0.113, "loss": "9.153", "ppl": "569.45", "wps": "397347", "ups": "3.34", "wpb": "118860", "bsz": "256", "num_updates": "5800", "lr": "5.8e-05", "gnorm": "0.879", "loss_scale": "128", "train_wall": "59", "gb_free": "22.6", "wall": "1763"} +[2022-07-30 11:19:33,046][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 11:20:15,035][train_inner][INFO] - {"epoch": 1, "update": 0.117, "loss": "9.11", "ppl": "552.54", "wps": "393524", "ups": "3.33", "wpb": "118036", "bsz": "256", "num_updates": "6000", "lr": "6e-05", "gnorm": "0.916", "loss_scale": "64", "train_wall": "60", "gb_free": "22.9", "wall": "1823"} +[2022-07-30 11:21:14,454][train_inner][INFO] - {"epoch": 1, "update": 0.12, "loss": "9.072", "ppl": "538.32", "wps": "398537", "ups": "3.37", "wpb": "118402", "bsz": "256", "num_updates": "6200", "lr": "6.2e-05", "gnorm": "0.981", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "1883"} +[2022-07-30 11:22:14,145][train_inner][INFO] - {"epoch": 1, "update": 0.124, "loss": "9.017", "ppl": "518.13", "wps": "396495", "ups": "3.35", "wpb": "118336", "bsz": "256", "num_updates": "6400", "lr": "6.4e-05", "gnorm": "1.022", "loss_scale": "64", "train_wall": "59", "gb_free": "25.2", "wall": "1943"} +[2022-07-30 11:23:13,736][train_inner][INFO] - {"epoch": 1, "update": 0.128, "loss": "8.985", "ppl": "506.68", "wps": "397254", "ups": "3.36", "wpb": "118362", "bsz": "256", "num_updates": "6600", "lr": "6.6e-05", "gnorm": "1.062", "loss_scale": "64", "train_wall": "59", "gb_free": "24.7", "wall": "2002"} +[2022-07-30 11:24:13,137][train_inner][INFO] - {"epoch": 1, "update": 0.132, "loss": "8.938", "ppl": "490.48", "wps": "397501", "ups": "3.37", "wpb": "118060", "bsz": "256", "num_updates": "6800", "lr": "6.8e-05", "gnorm": "1.106", "loss_scale": "64", "train_wall": "59", "gb_free": "27", "wall": "2062"} +[2022-07-30 11:25:12,653][train_inner][INFO] - {"epoch": 1, "update": 0.136, "loss": "8.885", "ppl": "472.7", "wps": "397179", "ups": "3.36", "wpb": "118192", "bsz": "256", "num_updates": "7000", "lr": "7e-05", "gnorm": "1.097", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "2121"} +[2022-07-30 11:26:12,019][train_inner][INFO] - {"epoch": 1, "update": 0.14, "loss": "8.843", "ppl": "459.23", "wps": "398822", "ups": "3.37", "wpb": "118381", "bsz": "256", "num_updates": "7200", "lr": "7.2e-05", "gnorm": "1.161", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "2180"} +[2022-07-30 11:27:11,795][train_inner][INFO] - {"epoch": 1, "update": 0.144, "loss": "8.788", "ppl": "442.02", "wps": "396110", "ups": "3.35", "wpb": "118388", "bsz": "256", "num_updates": "7400", "lr": "7.4e-05", "gnorm": "1.221", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "2240"} +[2022-07-30 11:28:11,417][train_inner][INFO] - {"epoch": 1, "update": 0.148, "loss": "8.729", "ppl": "424.3", "wps": "399628", "ups": "3.35", "wpb": "119133", "bsz": "256", "num_updates": "7600", "lr": "7.6e-05", "gnorm": "1.223", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "2300"} +[2022-07-30 11:29:10,633][train_inner][INFO] - {"epoch": 1, "update": 0.152, "loss": "8.679", "ppl": "409.89", "wps": "398353", "ups": "3.38", "wpb": "117943", "bsz": "256", "num_updates": "7800", "lr": "7.8e-05", "gnorm": "1.253", "loss_scale": "64", "train_wall": "59", "gb_free": "25.9", "wall": "2359"} +[2022-07-30 11:29:47,145][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 11:30:10,829][train_inner][INFO] - {"epoch": 1, "update": 0.155, "loss": "8.605", "ppl": "389.3", "wps": "393221", "ups": "3.32", "wpb": "118349", "bsz": "256", "num_updates": "8000", "lr": "8e-05", "gnorm": "1.389", "loss_scale": "64", "train_wall": "60", "gb_free": "21.8", "wall": "2419"} +[2022-07-30 11:31:10,298][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "8.525", "ppl": "368.48", "wps": "398250", "ups": "3.36", "wpb": "118419", "bsz": "256", "num_updates": "8200", "lr": "8.2e-05", "gnorm": "1.42", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "2479"} +[2022-07-30 11:32:09,834][train_inner][INFO] - {"epoch": 1, "update": 0.163, "loss": "8.377", "ppl": "332.49", "wps": "395403", "ups": "3.36", "wpb": "117701", "bsz": "256", "num_updates": "8400", "lr": "8.4e-05", "gnorm": "1.596", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "2538"} +[2022-07-30 11:32:57,121][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 11:33:10,011][train_inner][INFO] - {"epoch": 1, "update": 0.167, "loss": "8.014", "ppl": "258.42", "wps": "394768", "ups": "3.32", "wpb": "118780", "bsz": "256", "num_updates": "8600", "lr": "8.6e-05", "gnorm": "2.103", "loss_scale": "32", "train_wall": "60", "gb_free": "23.2", "wall": "2598"} +[2022-07-30 11:34:09,710][train_inner][INFO] - {"epoch": 1, "update": 0.171, "loss": "7.679", "ppl": "204.87", "wps": "395902", "ups": "3.35", "wpb": "118174", "bsz": "256", "num_updates": "8800", "lr": "8.8e-05", "gnorm": "2.067", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "2658"} +[2022-07-30 11:35:09,296][train_inner][INFO] - {"epoch": 1, "update": 0.175, "loss": "7.417", "ppl": "170.93", "wps": "398445", "ups": "3.36", "wpb": "118707", "bsz": "256", "num_updates": "9000", "lr": "9e-05", "gnorm": "2.206", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "2718"} +[2022-07-30 11:36:09,053][train_inner][INFO] - {"epoch": 1, "update": 0.179, "loss": "7.121", "ppl": "139.21", "wps": "397284", "ups": "3.35", "wpb": "118702", "bsz": "256", "num_updates": "9200", "lr": "9.2e-05", "gnorm": "1.986", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "2777"} +[2022-07-30 11:37:08,654][train_inner][INFO] - {"epoch": 1, "update": 0.183, "loss": "6.646", "ppl": "100.12", "wps": "396149", "ups": "3.36", "wpb": "118052", "bsz": "256", "num_updates": "9400", "lr": "9.4e-05", "gnorm": "1.755", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "2837"} +[2022-07-30 11:38:08,078][train_inner][INFO] - {"epoch": 1, "update": 0.187, "loss": "6.24", "ppl": "75.56", "wps": "397161", "ups": "3.37", "wpb": "118003", "bsz": "256", "num_updates": "9600", "lr": "9.6e-05", "gnorm": "1.531", "loss_scale": "32", "train_wall": "59", "gb_free": "27.2", "wall": "2897"} +[2022-07-30 11:39:07,733][train_inner][INFO] - {"epoch": 1, "update": 0.19, "loss": "5.927", "ppl": "60.85", "wps": "396927", "ups": "3.35", "wpb": "118394", "bsz": "256", "num_updates": "9800", "lr": "9.8e-05", "gnorm": "1.46", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "2956"} +[2022-07-30 11:40:07,143][train_inner][INFO] - {"epoch": 1, "update": 0.194, "loss": "5.676", "ppl": "51.11", "wps": "400282", "ups": "3.37", "wpb": "118901", "bsz": "256", "num_updates": "10000", "lr": "0.0001", "gnorm": "1.387", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "3016"} +[2022-07-30 11:41:06,743][train_inner][INFO] - {"epoch": 1, "update": 0.198, "loss": "5.462", "ppl": "44.09", "wps": "397193", "ups": "3.36", "wpb": "118364", "bsz": "256", "num_updates": "10200", "lr": "9.99798e-05", "gnorm": "1.336", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "3075"} +[2022-07-30 11:42:05,948][train_inner][INFO] - {"epoch": 1, "update": 0.202, "loss": "5.307", "ppl": "39.59", "wps": "397736", "ups": "3.38", "wpb": "117739", "bsz": "256", "num_updates": "10400", "lr": "9.99596e-05", "gnorm": "1.26", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "3134"} +[2022-07-30 11:43:05,191][train_inner][INFO] - {"epoch": 1, "update": 0.206, "loss": "5.159", "ppl": "35.74", "wps": "399431", "ups": "3.38", "wpb": "118316", "bsz": "256", "num_updates": "10600", "lr": "9.99394e-05", "gnorm": "1.214", "loss_scale": "32", "train_wall": "59", "gb_free": "27.1", "wall": "3194"} +[2022-07-30 11:44:05,123][train_inner][INFO] - {"epoch": 1, "update": 0.21, "loss": "5.034", "ppl": "32.76", "wps": "394321", "ups": "3.34", "wpb": "118161", "bsz": "256", "num_updates": "10800", "lr": "9.99192e-05", "gnorm": "1.171", "loss_scale": "64", "train_wall": "60", "gb_free": "21.6", "wall": "3254"} +[2022-07-30 11:45:04,484][train_inner][INFO] - {"epoch": 1, "update": 0.214, "loss": "4.926", "ppl": "30.39", "wps": "399723", "ups": "3.37", "wpb": "118639", "bsz": "256", "num_updates": "11000", "lr": "9.9899e-05", "gnorm": "1.129", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "3313"} +[2022-07-30 11:46:04,109][train_inner][INFO] - {"epoch": 1, "update": 0.218, "loss": "4.843", "ppl": "28.7", "wps": "396219", "ups": "3.35", "wpb": "118123", "bsz": "256", "num_updates": "11200", "lr": "9.98788e-05", "gnorm": "1.1", "loss_scale": "64", "train_wall": "59", "gb_free": "29.3", "wall": "3373"} +[2022-07-30 11:47:03,723][train_inner][INFO] - {"epoch": 1, "update": 0.222, "loss": "4.753", "ppl": "26.97", "wps": "395736", "ups": "3.35", "wpb": "117955", "bsz": "256", "num_updates": "11400", "lr": "9.98586e-05", "gnorm": "1.068", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "3432"} +[2022-07-30 11:48:03,046][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "4.66", "ppl": "25.27", "wps": "399566", "ups": "3.37", "wpb": "118518", "bsz": "256", "num_updates": "11600", "lr": "9.98384e-05", "gnorm": "1.044", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "3491"} +[2022-07-30 11:49:02,315][train_inner][INFO] - {"epoch": 1, "update": 0.229, "loss": "4.601", "ppl": "24.27", "wps": "399571", "ups": "3.37", "wpb": "118410", "bsz": "256", "num_updates": "11800", "lr": "9.98182e-05", "gnorm": "1.024", "loss_scale": "64", "train_wall": "59", "gb_free": "25.3", "wall": "3551"} +[2022-07-30 11:50:01,867][train_inner][INFO] - {"epoch": 1, "update": 0.233, "loss": "4.53", "ppl": "23.1", "wps": "397901", "ups": "3.36", "wpb": "118477", "bsz": "256", "num_updates": "12000", "lr": "9.9798e-05", "gnorm": "1.001", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "3610"} +[2022-07-30 11:51:01,918][train_inner][INFO] - {"epoch": 1, "update": 0.237, "loss": "4.468", "ppl": "22.13", "wps": "393960", "ups": "3.33", "wpb": "118288", "bsz": "256", "num_updates": "12200", "lr": "9.97778e-05", "gnorm": "0.988", "loss_scale": "64", "train_wall": "60", "gb_free": "22.4", "wall": "3670"} +[2022-07-30 11:52:01,513][train_inner][INFO] - {"epoch": 1, "update": 0.241, "loss": "4.398", "ppl": "21.08", "wps": "398382", "ups": "3.36", "wpb": "118707", "bsz": "256", "num_updates": "12400", "lr": "9.97576e-05", "gnorm": "0.965", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "3730"} +[2022-07-30 11:53:01,012][train_inner][INFO] - {"epoch": 1, "update": 0.245, "loss": "4.362", "ppl": "20.56", "wps": "398746", "ups": "3.36", "wpb": "118623", "bsz": "256", "num_updates": "12600", "lr": "9.97374e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "3789"} +[2022-07-30 11:54:00,397][train_inner][INFO] - {"epoch": 1, "update": 0.249, "loss": "4.3", "ppl": "19.7", "wps": "399290", "ups": "3.37", "wpb": "118558", "bsz": "256", "num_updates": "12800", "lr": "9.97172e-05", "gnorm": "0.942", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "3849"} +[2022-07-30 11:55:00,067][train_inner][INFO] - {"epoch": 1, "update": 0.253, "loss": "4.26", "ppl": "19.16", "wps": "397834", "ups": "3.35", "wpb": "118694", "bsz": "256", "num_updates": "13000", "lr": "9.9697e-05", "gnorm": "0.929", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "3908"} +[2022-07-30 11:55:59,667][train_inner][INFO] - {"epoch": 1, "update": 0.256, "loss": "4.218", "ppl": "18.61", "wps": "398077", "ups": "3.36", "wpb": "118627", "bsz": "256", "num_updates": "13200", "lr": "9.96768e-05", "gnorm": "0.923", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "3968"} +[2022-07-30 11:56:59,304][train_inner][INFO] - {"epoch": 1, "update": 0.26, "loss": "4.175", "ppl": "18.06", "wps": "397677", "ups": "3.35", "wpb": "118579", "bsz": "256", "num_updates": "13400", "lr": "9.96566e-05", "gnorm": "0.912", "loss_scale": "128", "train_wall": "59", "gb_free": "21.8", "wall": "4028"} +[2022-07-30 11:57:58,735][train_inner][INFO] - {"epoch": 1, "update": 0.264, "loss": "4.135", "ppl": "17.57", "wps": "400075", "ups": "3.37", "wpb": "118884", "bsz": "256", "num_updates": "13600", "lr": "9.96364e-05", "gnorm": "0.902", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "4087"} +[2022-07-30 11:58:57,746][train_inner][INFO] - {"epoch": 1, "update": 0.268, "loss": "4.102", "ppl": "17.17", "wps": "400971", "ups": "3.39", "wpb": "118308", "bsz": "256", "num_updates": "13800", "lr": "9.96162e-05", "gnorm": "0.901", "loss_scale": "128", "train_wall": "59", "gb_free": "24.6", "wall": "4146"} +[2022-07-30 11:59:57,145][train_inner][INFO] - {"epoch": 1, "update": 0.272, "loss": "4.071", "ppl": "16.81", "wps": "396898", "ups": "3.37", "wpb": "117876", "bsz": "256", "num_updates": "14000", "lr": "9.9596e-05", "gnorm": "0.897", "loss_scale": "128", "train_wall": "59", "gb_free": "21.6", "wall": "4206"} +[2022-07-30 12:00:56,811][train_inner][INFO] - {"epoch": 1, "update": 0.276, "loss": "4.039", "ppl": "16.44", "wps": "397564", "ups": "3.35", "wpb": "118604", "bsz": "256", "num_updates": "14200", "lr": "9.95758e-05", "gnorm": "0.887", "loss_scale": "128", "train_wall": "59", "gb_free": "25.2", "wall": "4265"} +[2022-07-30 12:01:56,273][train_inner][INFO] - {"epoch": 1, "update": 0.28, "loss": "4.001", "ppl": "16.01", "wps": "398877", "ups": "3.36", "wpb": "118588", "bsz": "256", "num_updates": "14400", "lr": "9.95556e-05", "gnorm": "0.879", "loss_scale": "128", "train_wall": "59", "gb_free": "22.6", "wall": "4325"} +[2022-07-30 12:02:55,662][train_inner][INFO] - {"epoch": 1, "update": 0.284, "loss": "3.973", "ppl": "15.7", "wps": "397001", "ups": "3.37", "wpb": "117887", "bsz": "256", "num_updates": "14600", "lr": "9.95354e-05", "gnorm": "0.875", "loss_scale": "128", "train_wall": "59", "gb_free": "25.3", "wall": "4384"} +[2022-07-30 12:03:55,591][train_inner][INFO] - {"epoch": 1, "update": 0.288, "loss": "3.942", "ppl": "15.37", "wps": "396929", "ups": "3.34", "wpb": "118936", "bsz": "256", "num_updates": "14800", "lr": "9.95152e-05", "gnorm": "0.869", "loss_scale": "256", "train_wall": "60", "gb_free": "21.3", "wall": "4444"} +[2022-07-30 12:04:54,927][train_inner][INFO] - {"epoch": 1, "update": 0.291, "loss": "3.924", "ppl": "15.18", "wps": "400943", "ups": "3.37", "wpb": "118952", "bsz": "256", "num_updates": "15000", "lr": "9.94949e-05", "gnorm": "0.864", "loss_scale": "256", "train_wall": "59", "gb_free": "21.6", "wall": "4503"} +[2022-07-30 12:05:54,427][train_inner][INFO] - {"epoch": 1, "update": 0.295, "loss": "3.899", "ppl": "14.91", "wps": "397438", "ups": "3.36", "wpb": "118236", "bsz": "256", "num_updates": "15200", "lr": "9.94747e-05", "gnorm": "0.863", "loss_scale": "256", "train_wall": "59", "gb_free": "21.5", "wall": "4563"} +[2022-07-30 12:06:54,114][train_inner][INFO] - {"epoch": 1, "update": 0.299, "loss": "3.877", "ppl": "14.69", "wps": "396355", "ups": "3.35", "wpb": "118285", "bsz": "256", "num_updates": "15400", "lr": "9.94545e-05", "gnorm": "0.86", "loss_scale": "256", "train_wall": "59", "gb_free": "23", "wall": "4623"} +[2022-07-30 12:07:27,561][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 128.0 +[2022-07-30 12:07:53,096][train_inner][INFO] - {"epoch": 1, "update": 0.303, "loss": "3.852", "ppl": "14.44", "wps": "401019", "ups": "3.39", "wpb": "118265", "bsz": "256", "num_updates": "15600", "lr": "9.94343e-05", "gnorm": "0.857", "loss_scale": "128", "train_wall": "59", "gb_free": "24", "wall": "4682"} +[2022-07-30 12:08:52,463][train_inner][INFO] - {"epoch": 1, "update": 0.307, "loss": "3.83", "ppl": "14.22", "wps": "400228", "ups": "3.37", "wpb": "118800", "bsz": "256", "num_updates": "15800", "lr": "9.94141e-05", "gnorm": "0.852", "loss_scale": "128", "train_wall": "59", "gb_free": "21.3", "wall": "4741"} +[2022-07-30 12:09:51,756][train_inner][INFO] - {"epoch": 1, "update": 0.311, "loss": "3.808", "ppl": "14.01", "wps": "398639", "ups": "3.37", "wpb": "118181", "bsz": "256", "num_updates": "16000", "lr": "9.93939e-05", "gnorm": "0.85", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "4800"} +[2022-07-30 12:10:51,049][train_inner][INFO] - {"epoch": 1, "update": 0.315, "loss": "3.786", "ppl": "13.79", "wps": "400469", "ups": "3.37", "wpb": "118725", "bsz": "256", "num_updates": "16200", "lr": "9.93737e-05", "gnorm": "0.847", "loss_scale": "128", "train_wall": "59", "gb_free": "21.6", "wall": "4859"} +[2022-07-30 12:11:50,626][train_inner][INFO] - {"epoch": 1, "update": 0.319, "loss": "3.777", "ppl": "13.71", "wps": "393375", "ups": "3.36", "wpb": "117180", "bsz": "256", "num_updates": "16400", "lr": "9.93535e-05", "gnorm": "0.85", "loss_scale": "128", "train_wall": "59", "gb_free": "21.8", "wall": "4919"} +[2022-07-30 12:12:50,232][train_inner][INFO] - {"epoch": 1, "update": 0.323, "loss": "3.743", "ppl": "13.39", "wps": "398605", "ups": "3.36", "wpb": "118794", "bsz": "256", "num_updates": "16600", "lr": "9.93333e-05", "gnorm": "0.841", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "4979"} +[2022-07-30 12:13:49,923][train_inner][INFO] - {"epoch": 1, "update": 0.326, "loss": "3.736", "ppl": "13.33", "wps": "394391", "ups": "3.35", "wpb": "117708", "bsz": "256", "num_updates": "16800", "lr": "9.93131e-05", "gnorm": "0.843", "loss_scale": "128", "train_wall": "59", "gb_free": "25.5", "wall": "5038"} +[2022-07-30 12:14:49,052][train_inner][INFO] - {"epoch": 1, "update": 0.33, "loss": "3.714", "ppl": "13.13", "wps": "399008", "ups": "3.38", "wpb": "117964", "bsz": "256", "num_updates": "17000", "lr": "9.92929e-05", "gnorm": "0.836", "loss_scale": "128", "train_wall": "59", "gb_free": "21.9", "wall": "5097"} +[2022-07-30 12:15:48,087][train_inner][INFO] - {"epoch": 1, "update": 0.334, "loss": "3.688", "ppl": "12.89", "wps": "399902", "ups": "3.39", "wpb": "118039", "bsz": "256", "num_updates": "17200", "lr": "9.92727e-05", "gnorm": "0.834", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "5157"} +[2022-07-30 12:16:47,868][train_inner][INFO] - {"epoch": 1, "update": 0.338, "loss": "3.67", "ppl": "12.73", "wps": "396189", "ups": "3.35", "wpb": "118423", "bsz": "256", "num_updates": "17400", "lr": "9.92525e-05", "gnorm": "0.833", "loss_scale": "128", "train_wall": "59", "gb_free": "25", "wall": "5216"} +[2022-07-30 12:17:47,533][train_inner][INFO] - {"epoch": 1, "update": 0.342, "loss": "3.658", "ppl": "12.62", "wps": "396940", "ups": "3.35", "wpb": "118416", "bsz": "256", "num_updates": "17600", "lr": "9.92323e-05", "gnorm": "0.831", "loss_scale": "256", "train_wall": "59", "gb_free": "23", "wall": "5276"} +[2022-07-30 12:18:46,977][train_inner][INFO] - {"epoch": 1, "update": 0.346, "loss": "3.638", "ppl": "12.45", "wps": "397711", "ups": "3.36", "wpb": "118208", "bsz": "256", "num_updates": "17800", "lr": "9.92121e-05", "gnorm": "0.831", "loss_scale": "256", "train_wall": "59", "gb_free": "27.2", "wall": "5335"} +[2022-07-30 12:19:06,622][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 128.0 +[2022-07-30 12:19:46,337][train_inner][INFO] - {"epoch": 1, "update": 0.35, "loss": "3.634", "ppl": "12.42", "wps": "396204", "ups": "3.37", "wpb": "117592", "bsz": "256", "num_updates": "18000", "lr": "9.91919e-05", "gnorm": "0.831", "loss_scale": "128", "train_wall": "59", "gb_free": "23.3", "wall": "5395"} +[2022-07-30 12:20:45,834][train_inner][INFO] - {"epoch": 1, "update": 0.354, "loss": "3.617", "ppl": "12.27", "wps": "398023", "ups": "3.36", "wpb": "118404", "bsz": "256", "num_updates": "18200", "lr": "9.91717e-05", "gnorm": "0.826", "loss_scale": "128", "train_wall": "59", "gb_free": "21.3", "wall": "5454"} +[2022-07-30 12:21:45,409][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "3.595", "ppl": "12.09", "wps": "398403", "ups": "3.36", "wpb": "118674", "bsz": "256", "num_updates": "18400", "lr": "9.91515e-05", "gnorm": "0.824", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "5514"} +[2022-07-30 12:22:45,034][train_inner][INFO] - {"epoch": 1, "update": 0.361, "loss": "3.59", "ppl": "12.04", "wps": "397924", "ups": "3.35", "wpb": "118629", "bsz": "256", "num_updates": "18600", "lr": "9.91313e-05", "gnorm": "0.827", "loss_scale": "128", "train_wall": "59", "gb_free": "26.9", "wall": "5573"} +[2022-07-30 12:23:44,449][train_inner][INFO] - {"epoch": 1, "update": 0.365, "loss": "3.575", "ppl": "11.92", "wps": "398015", "ups": "3.37", "wpb": "118240", "bsz": "256", "num_updates": "18800", "lr": "9.91111e-05", "gnorm": "0.821", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "5633"} +[2022-07-30 12:24:44,042][train_inner][INFO] - {"epoch": 1, "update": 0.369, "loss": "3.559", "ppl": "11.79", "wps": "398667", "ups": "3.36", "wpb": "118788", "bsz": "256", "num_updates": "19000", "lr": "9.90909e-05", "gnorm": "0.821", "loss_scale": "128", "train_wall": "59", "gb_free": "21.3", "wall": "5692"} +[2022-07-30 12:25:43,104][train_inner][INFO] - {"epoch": 1, "update": 0.373, "loss": "3.539", "ppl": "11.62", "wps": "400430", "ups": "3.39", "wpb": "118249", "bsz": "256", "num_updates": "19200", "lr": "9.90707e-05", "gnorm": "0.821", "loss_scale": "128", "train_wall": "59", "gb_free": "27.1", "wall": "5752"} +[2022-07-30 12:26:42,370][train_inner][INFO] - {"epoch": 1, "update": 0.377, "loss": "3.539", "ppl": "11.63", "wps": "397640", "ups": "3.37", "wpb": "117832", "bsz": "256", "num_updates": "19400", "lr": "9.90505e-05", "gnorm": "0.824", "loss_scale": "128", "train_wall": "59", "gb_free": "22.4", "wall": "5811"} +[2022-07-30 12:27:41,827][train_inner][INFO] - {"epoch": 1, "update": 0.381, "loss": "3.527", "ppl": "11.53", "wps": "397426", "ups": "3.36", "wpb": "118148", "bsz": "256", "num_updates": "19600", "lr": "9.90303e-05", "gnorm": "0.819", "loss_scale": "128", "train_wall": "59", "gb_free": "21.7", "wall": "5870"} +[2022-07-30 12:28:41,258][train_inner][INFO] - {"epoch": 1, "update": 0.385, "loss": "3.52", "ppl": "11.47", "wps": "396794", "ups": "3.37", "wpb": "117908", "bsz": "256", "num_updates": "19800", "lr": "9.90101e-05", "gnorm": "0.823", "loss_scale": "128", "train_wall": "59", "gb_free": "24.3", "wall": "5930"} +[2022-07-30 12:29:40,761][train_inner][INFO] - {"epoch": 1, "update": 0.389, "loss": "3.5", "ppl": "11.31", "wps": "398759", "ups": "3.36", "wpb": "118636", "bsz": "256", "num_updates": "20000", "lr": "9.89899e-05", "gnorm": "0.816", "loss_scale": "256", "train_wall": "59", "gb_free": "21.3", "wall": "5989"} +[2022-07-30 12:29:43,157][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 128.0 +[2022-07-30 12:30:40,812][train_inner][INFO] - {"epoch": 1, "update": 0.392, "loss": "3.49", "ppl": "11.24", "wps": "393087", "ups": "3.33", "wpb": "118025", "bsz": "256", "num_updates": "20200", "lr": "9.89697e-05", "gnorm": "0.817", "loss_scale": "128", "train_wall": "60", "gb_free": "23.2", "wall": "6049"} +[2022-07-30 12:31:40,446][train_inner][INFO] - {"epoch": 1, "update": 0.396, "loss": "3.478", "ppl": "11.14", "wps": "396442", "ups": "3.35", "wpb": "118207", "bsz": "256", "num_updates": "20400", "lr": "9.89495e-05", "gnorm": "0.819", "loss_scale": "128", "train_wall": "59", "gb_free": "21.6", "wall": "6109"} +[2022-07-30 12:32:40,238][train_inner][INFO] - {"epoch": 1, "update": 0.4, "loss": "3.463", "ppl": "11.03", "wps": "396092", "ups": "3.34", "wpb": "118414", "bsz": "256", "num_updates": "20600", "lr": "9.89293e-05", "gnorm": "0.813", "loss_scale": "128", "train_wall": "59", "gb_free": "29.7", "wall": "6169"} +[2022-07-30 12:33:39,590][train_inner][INFO] - {"epoch": 1, "update": 0.404, "loss": "3.457", "ppl": "10.98", "wps": "400274", "ups": "3.37", "wpb": "118785", "bsz": "256", "num_updates": "20800", "lr": "9.89091e-05", "gnorm": "0.813", "loss_scale": "128", "train_wall": "59", "gb_free": "22.5", "wall": "6228"} +[2022-07-30 12:34:39,251][train_inner][INFO] - {"epoch": 1, "update": 0.408, "loss": "3.448", "ppl": "10.91", "wps": "397744", "ups": "3.35", "wpb": "118648", "bsz": "256", "num_updates": "21000", "lr": "9.88889e-05", "gnorm": "0.813", "loss_scale": "128", "train_wall": "59", "gb_free": "21.9", "wall": "6288"} +[2022-07-30 12:35:38,719][train_inner][INFO] - {"epoch": 1, "update": 0.412, "loss": "3.435", "ppl": "10.82", "wps": "398076", "ups": "3.36", "wpb": "118363", "bsz": "256", "num_updates": "21200", "lr": "9.88687e-05", "gnorm": "0.813", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "6347"} +[2022-07-30 12:36:39,302][train_inner][INFO] - {"epoch": 1, "update": 0.416, "loss": "3.431", "ppl": "10.78", "wps": "389104", "ups": "3.3", "wpb": "117865", "bsz": "256", "num_updates": "21400", "lr": "9.88485e-05", "gnorm": "0.813", "loss_scale": "128", "train_wall": "60", "gb_free": "24.2", "wall": "6408"} +[2022-07-30 12:37:38,555][train_inner][INFO] - {"epoch": 1, "update": 0.42, "loss": "3.425", "ppl": "10.74", "wps": "398095", "ups": "3.38", "wpb": "117941", "bsz": "256", "num_updates": "21600", "lr": "9.88283e-05", "gnorm": "0.815", "loss_scale": "128", "train_wall": "59", "gb_free": "22.3", "wall": "6467"} +[2022-07-30 12:38:38,264][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "3.407", "ppl": "10.6", "wps": "398163", "ups": "3.35", "wpb": "118869", "bsz": "256", "num_updates": "21800", "lr": "9.88081e-05", "gnorm": "0.809", "loss_scale": "128", "train_wall": "59", "gb_free": "25.1", "wall": "6527"} +[2022-07-30 12:39:37,808][train_inner][INFO] - {"epoch": 1, "update": 0.427, "loss": "3.405", "ppl": "10.59", "wps": "396194", "ups": "3.36", "wpb": "117954", "bsz": "256", "num_updates": "22000", "lr": "9.87879e-05", "gnorm": "0.811", "loss_scale": "128", "train_wall": "59", "gb_free": "21.9", "wall": "6586"} +[2022-07-30 12:40:00,006][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 128.0 +[2022-07-30 12:40:37,405][train_inner][INFO] - {"epoch": 1, "update": 0.431, "loss": "3.386", "ppl": "10.46", "wps": "397509", "ups": "3.36", "wpb": "118450", "bsz": "256", "num_updates": "22200", "lr": "9.87677e-05", "gnorm": "0.812", "loss_scale": "128", "train_wall": "59", "gb_free": "21.6", "wall": "6646"} +[2022-07-30 12:41:36,629][train_inner][INFO] - {"epoch": 1, "update": 0.435, "loss": "3.379", "ppl": "10.41", "wps": "398675", "ups": "3.38", "wpb": "118055", "bsz": "256", "num_updates": "22400", "lr": "9.87475e-05", "gnorm": "0.812", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "6705"} +[2022-07-30 12:42:37,114][train_inner][INFO] - {"epoch": 1, "update": 0.439, "loss": "3.373", "ppl": "10.36", "wps": "392996", "ups": "3.31", "wpb": "118851", "bsz": "256", "num_updates": "22600", "lr": "9.87273e-05", "gnorm": "0.809", "loss_scale": "128", "train_wall": "60", "gb_free": "22.2", "wall": "6766"} +[2022-07-30 12:43:37,644][train_inner][INFO] - {"epoch": 1, "update": 0.443, "loss": "3.362", "ppl": "10.28", "wps": "390559", "ups": "3.3", "wpb": "118204", "bsz": "256", "num_updates": "22800", "lr": "9.87071e-05", "gnorm": "0.808", "loss_scale": "128", "train_wall": "60", "gb_free": "24.9", "wall": "6826"} +[2022-07-30 12:44:37,094][train_inner][INFO] - {"epoch": 1, "update": 0.447, "loss": "3.35", "ppl": "10.2", "wps": "399288", "ups": "3.36", "wpb": "118687", "bsz": "256", "num_updates": "23000", "lr": "9.86869e-05", "gnorm": "0.806", "loss_scale": "128", "train_wall": "59", "gb_free": "22.1", "wall": "6886"} +[2022-07-30 12:45:36,710][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "3.344", "ppl": "10.16", "wps": "397391", "ups": "3.35", "wpb": "118452", "bsz": "256", "num_updates": "23200", "lr": "9.86667e-05", "gnorm": "0.809", "loss_scale": "128", "train_wall": "59", "gb_free": "22.7", "wall": "6945"} +[2022-07-30 12:46:36,457][train_inner][INFO] - {"epoch": 1, "update": 0.455, "loss": "3.339", "ppl": "10.12", "wps": "396944", "ups": "3.35", "wpb": "118582", "bsz": "256", "num_updates": "23400", "lr": "9.86465e-05", "gnorm": "0.809", "loss_scale": "128", "train_wall": "59", "gb_free": "21.3", "wall": "7005"} +[2022-07-30 12:47:36,106][train_inner][INFO] - {"epoch": 1, "update": 0.459, "loss": "3.329", "ppl": "10.05", "wps": "397394", "ups": "3.35", "wpb": "118520", "bsz": "256", "num_updates": "23600", "lr": "9.86263e-05", "gnorm": "0.806", "loss_scale": "128", "train_wall": "59", "gb_free": "22.4", "wall": "7065"} +[2022-07-30 12:48:35,594][train_inner][INFO] - {"epoch": 1, "update": 0.462, "loss": "3.322", "ppl": "10", "wps": "398954", "ups": "3.36", "wpb": "118664", "bsz": "256", "num_updates": "23800", "lr": "9.86061e-05", "gnorm": "0.807", "loss_scale": "128", "train_wall": "59", "gb_free": "21.7", "wall": "7124"} +[2022-07-30 12:49:35,056][train_inner][INFO] - {"epoch": 1, "update": 0.466, "loss": "3.318", "ppl": "9.97", "wps": "397709", "ups": "3.36", "wpb": "118243", "bsz": "256", "num_updates": "24000", "lr": "9.85859e-05", "gnorm": "0.809", "loss_scale": "128", "train_wall": "59", "gb_free": "24.7", "wall": "7183"} +[2022-07-30 12:50:34,425][train_inner][INFO] - {"epoch": 1, "update": 0.47, "loss": "3.311", "ppl": "9.92", "wps": "398086", "ups": "3.37", "wpb": "118168", "bsz": "255.9", "num_updates": "24200", "lr": "9.85657e-05", "gnorm": "0.807", "loss_scale": "256", "train_wall": "59", "gb_free": "22", "wall": "7243"} +[2022-07-30 12:51:12,085][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 128.0 +[2022-07-30 12:51:34,050][train_inner][INFO] - {"epoch": 1, "update": 0.474, "loss": "3.307", "ppl": "9.9", "wps": "396693", "ups": "3.35", "wpb": "118264", "bsz": "256", "num_updates": "24400", "lr": "9.85455e-05", "gnorm": "0.808", "loss_scale": "128", "train_wall": "59", "gb_free": "31.3", "wall": "7302"} +[2022-07-30 12:52:33,449][train_inner][INFO] - {"epoch": 1, "update": 0.478, "loss": "3.3", "ppl": "9.85", "wps": "397096", "ups": "3.37", "wpb": "117934", "bsz": "256", "num_updates": "24600", "lr": "9.85253e-05", "gnorm": "0.808", "loss_scale": "128", "train_wall": "59", "gb_free": "22.2", "wall": "7362"} +[2022-07-30 12:53:33,106][train_inner][INFO] - {"epoch": 1, "update": 0.482, "loss": "3.283", "ppl": "9.74", "wps": "397076", "ups": "3.35", "wpb": "118441", "bsz": "256", "num_updates": "24800", "lr": "9.85051e-05", "gnorm": "0.807", "loss_scale": "128", "train_wall": "59", "gb_free": "21.3", "wall": "7422"} +[2022-07-30 12:54:25,863][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 12:54:32,915][train_inner][INFO] - {"epoch": 1, "update": 0.486, "loss": "3.28", "ppl": "9.71", "wps": "395558", "ups": "3.34", "wpb": "118290", "bsz": "256", "num_updates": "25000", "lr": "9.84848e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "59", "gb_free": "25.1", "wall": "7481"} +[2022-07-30 12:55:32,323][train_inner][INFO] - {"epoch": 1, "update": 0.49, "loss": "3.276", "ppl": "9.69", "wps": "397112", "ups": "3.37", "wpb": "117957", "bsz": "256", "num_updates": "25200", "lr": "9.84646e-05", "gnorm": "0.808", "loss_scale": "64", "train_wall": "59", "gb_free": "23.8", "wall": "7541"} +[2022-07-30 12:56:31,881][train_inner][INFO] - {"epoch": 1, "update": 0.494, "loss": "3.271", "ppl": "9.65", "wps": "396641", "ups": "3.36", "wpb": "118116", "bsz": "256", "num_updates": "25400", "lr": "9.84444e-05", "gnorm": "0.806", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "7600"} +[2022-07-30 12:57:31,499][train_inner][INFO] - {"epoch": 1, "update": 0.497, "loss": "3.261", "ppl": "9.59", "wps": "397628", "ups": "3.35", "wpb": "118528", "bsz": "256", "num_updates": "25600", "lr": "9.84242e-05", "gnorm": "0.805", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "7660"} +[2022-07-30 12:58:30,999][train_inner][INFO] - {"epoch": 1, "update": 0.501, "loss": "3.256", "ppl": "9.55", "wps": "396453", "ups": "3.36", "wpb": "117945", "bsz": "256", "num_updates": "25800", "lr": "9.8404e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "7719"} +[2022-07-30 12:59:30,141][train_inner][INFO] - {"epoch": 1, "update": 0.505, "loss": "3.25", "ppl": "9.52", "wps": "399081", "ups": "3.38", "wpb": "118012", "bsz": "256", "num_updates": "26000", "lr": "9.83838e-05", "gnorm": "0.808", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "7779"} +[2022-07-30 13:00:29,654][train_inner][INFO] - {"epoch": 1, "update": 0.509, "loss": "3.248", "ppl": "9.5", "wps": "396774", "ups": "3.36", "wpb": "118064", "bsz": "256", "num_updates": "26200", "lr": "9.83636e-05", "gnorm": "0.808", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "7838"} +[2022-07-30 13:01:29,425][train_inner][INFO] - {"epoch": 1, "update": 0.513, "loss": "3.236", "ppl": "9.42", "wps": "396531", "ups": "3.35", "wpb": "118505", "bsz": "256", "num_updates": "26400", "lr": "9.83434e-05", "gnorm": "0.806", "loss_scale": "64", "train_wall": "59", "gb_free": "24.2", "wall": "7898"} +[2022-07-30 13:02:28,837][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "3.23", "ppl": "9.39", "wps": "397906", "ups": "3.37", "wpb": "118200", "bsz": "256", "num_updates": "26600", "lr": "9.83232e-05", "gnorm": "0.805", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "7957"} +[2022-07-30 13:03:28,203][train_inner][INFO] - {"epoch": 1, "update": 0.521, "loss": "3.218", "ppl": "9.31", "wps": "398130", "ups": "3.37", "wpb": "118177", "bsz": "256", "num_updates": "26800", "lr": "9.8303e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "59", "gb_free": "25.8", "wall": "8017"} +[2022-07-30 13:04:27,793][train_inner][INFO] - {"epoch": 1, "update": 0.525, "loss": "3.226", "ppl": "9.36", "wps": "396546", "ups": "3.36", "wpb": "118150", "bsz": "256", "num_updates": "27000", "lr": "9.82828e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "8076"} +[2022-07-30 13:05:27,841][train_inner][INFO] - {"epoch": 1, "update": 0.528, "loss": "3.213", "ppl": "9.27", "wps": "394207", "ups": "3.33", "wpb": "118356", "bsz": "256", "num_updates": "27200", "lr": "9.82626e-05", "gnorm": "0.805", "loss_scale": "128", "train_wall": "60", "gb_free": "23.5", "wall": "8136"} +[2022-07-30 13:06:27,359][train_inner][INFO] - {"epoch": 1, "update": 0.532, "loss": "3.209", "ppl": "9.25", "wps": "396932", "ups": "3.36", "wpb": "118122", "bsz": "256", "num_updates": "27400", "lr": "9.82424e-05", "gnorm": "0.807", "loss_scale": "128", "train_wall": "59", "gb_free": "22.1", "wall": "8196"} +[2022-07-30 13:07:27,015][train_inner][INFO] - {"epoch": 1, "update": 0.536, "loss": "3.199", "ppl": "9.18", "wps": "395318", "ups": "3.35", "wpb": "117915", "bsz": "256", "num_updates": "27600", "lr": "9.82222e-05", "gnorm": "0.809", "loss_scale": "128", "train_wall": "59", "gb_free": "23.9", "wall": "8255"} +[2022-07-30 13:08:26,912][train_inner][INFO] - {"epoch": 1, "update": 0.54, "loss": "3.187", "ppl": "9.11", "wps": "397451", "ups": "3.34", "wpb": "119029", "bsz": "256", "num_updates": "27800", "lr": "9.8202e-05", "gnorm": "0.806", "loss_scale": "128", "train_wall": "60", "gb_free": "22.6", "wall": "8315"} +[2022-07-30 13:09:26,404][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "3.183", "ppl": "9.08", "wps": "396791", "ups": "3.36", "wpb": "118029", "bsz": "256", "num_updates": "28000", "lr": "9.81818e-05", "gnorm": "0.807", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "8375"} +[2022-07-30 13:10:26,303][train_inner][INFO] - {"epoch": 1, "update": 0.548, "loss": "3.174", "ppl": "9.03", "wps": "396486", "ups": "3.34", "wpb": "118745", "bsz": "256", "num_updates": "28200", "lr": "9.81616e-05", "gnorm": "0.803", "loss_scale": "128", "train_wall": "60", "gb_free": "22.4", "wall": "8435"} +[2022-07-30 13:11:25,665][train_inner][INFO] - {"epoch": 1, "update": 0.552, "loss": "3.177", "ppl": "9.04", "wps": "399127", "ups": "3.37", "wpb": "118464", "bsz": "256", "num_updates": "28400", "lr": "9.81414e-05", "gnorm": "0.807", "loss_scale": "128", "train_wall": "59", "gb_free": "22.2", "wall": "8494"} +[2022-07-30 13:12:25,438][train_inner][INFO] - {"epoch": 1, "update": 0.556, "loss": "3.174", "ppl": "9.03", "wps": "395602", "ups": "3.35", "wpb": "118230", "bsz": "256", "num_updates": "28600", "lr": "9.81212e-05", "gnorm": "0.806", "loss_scale": "128", "train_wall": "59", "gb_free": "28.3", "wall": "8554"} +[2022-07-30 13:12:55,296][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 13:13:25,224][train_inner][INFO] - {"epoch": 1, "update": 0.56, "loss": "3.168", "ppl": "8.99", "wps": "396001", "ups": "3.35", "wpb": "118376", "bsz": "256", "num_updates": "28800", "lr": "9.8101e-05", "gnorm": "0.803", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "8614"} +[2022-07-30 13:14:25,000][train_inner][INFO] - {"epoch": 1, "update": 0.563, "loss": "3.157", "ppl": "8.92", "wps": "395114", "ups": "3.35", "wpb": "118090", "bsz": "256", "num_updates": "29000", "lr": "9.80808e-05", "gnorm": "0.806", "loss_scale": "64", "train_wall": "59", "gb_free": "25.3", "wall": "8673"} +[2022-07-30 13:15:24,394][train_inner][INFO] - {"epoch": 1, "update": 0.567, "loss": "3.152", "ppl": "8.89", "wps": "396491", "ups": "3.37", "wpb": "117745", "bsz": "256", "num_updates": "29200", "lr": "9.80606e-05", "gnorm": "0.809", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "8733"} +[2022-07-30 13:16:23,834][train_inner][INFO] - {"epoch": 1, "update": 0.571, "loss": "3.155", "ppl": "8.91", "wps": "397029", "ups": "3.36", "wpb": "117997", "bsz": "256", "num_updates": "29400", "lr": "9.80404e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "8792"} +[2022-07-30 13:16:34,132][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 13:17:23,450][train_inner][INFO] - {"epoch": 1, "update": 0.575, "loss": "3.139", "ppl": "8.81", "wps": "397985", "ups": "3.35", "wpb": "118629", "bsz": "256", "num_updates": "29600", "lr": "9.80202e-05", "gnorm": "0.803", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "8852"} +[2022-07-30 13:18:23,108][train_inner][INFO] - {"epoch": 1, "update": 0.579, "loss": "3.141", "ppl": "8.82", "wps": "396744", "ups": "3.35", "wpb": "118343", "bsz": "256", "num_updates": "29800", "lr": "9.8e-05", "gnorm": "0.807", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "8912"} +[2022-07-30 13:19:22,670][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "3.134", "ppl": "8.78", "wps": "398908", "ups": "3.36", "wpb": "118800", "bsz": "256", "num_updates": "30000", "lr": "9.79798e-05", "gnorm": "0.805", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "8971"} +[2022-07-30 13:20:22,274][train_inner][INFO] - {"epoch": 1, "update": 0.587, "loss": "3.131", "ppl": "8.76", "wps": "396042", "ups": "3.36", "wpb": "118028", "bsz": "256", "num_updates": "30200", "lr": "9.79596e-05", "gnorm": "0.809", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "9031"} +[2022-07-30 13:21:21,671][train_inner][INFO] - {"epoch": 1, "update": 0.591, "loss": "3.136", "ppl": "8.79", "wps": "397476", "ups": "3.37", "wpb": "118044", "bsz": "256", "num_updates": "30400", "lr": "9.79394e-05", "gnorm": "0.807", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "9090"} +[2022-07-30 13:22:21,019][train_inner][INFO] - {"epoch": 1, "update": 0.595, "loss": "3.128", "ppl": "8.74", "wps": "398170", "ups": "3.37", "wpb": "118151", "bsz": "256", "num_updates": "30600", "lr": "9.79192e-05", "gnorm": "0.809", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "9149"} +[2022-07-30 13:23:20,390][train_inner][INFO] - {"epoch": 1, "update": 0.598, "loss": "3.108", "ppl": "8.62", "wps": "400480", "ups": "3.37", "wpb": "118884", "bsz": "256", "num_updates": "30800", "lr": "9.7899e-05", "gnorm": "0.804", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "9209"} +[2022-07-30 13:24:20,165][train_inner][INFO] - {"epoch": 1, "update": 0.602, "loss": "3.117", "ppl": "8.68", "wps": "396408", "ups": "3.35", "wpb": "118476", "bsz": "256", "num_updates": "31000", "lr": "9.78788e-05", "gnorm": "0.807", "loss_scale": "32", "train_wall": "59", "gb_free": "25.7", "wall": "9269"} +[2022-07-30 13:25:19,347][train_inner][INFO] - {"epoch": 1, "update": 0.606, "loss": "3.105", "ppl": "8.61", "wps": "398295", "ups": "3.38", "wpb": "117860", "bsz": "256", "num_updates": "31200", "lr": "9.78586e-05", "gnorm": "0.81", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "9328"} +[2022-07-30 13:26:18,824][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "3.104", "ppl": "8.6", "wps": "397100", "ups": "3.36", "wpb": "118090", "bsz": "256", "num_updates": "31400", "lr": "9.78384e-05", "gnorm": "0.808", "loss_scale": "32", "train_wall": "59", "gb_free": "26", "wall": "9387"} +[2022-07-30 13:27:18,348][train_inner][INFO] - {"epoch": 1, "update": 0.614, "loss": "3.1", "ppl": "8.57", "wps": "396236", "ups": "3.36", "wpb": "117927", "bsz": "256", "num_updates": "31600", "lr": "9.78182e-05", "gnorm": "0.809", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "9447"} +[2022-07-30 13:28:18,030][train_inner][INFO] - {"epoch": 1, "update": 0.618, "loss": "3.093", "ppl": "8.53", "wps": "395249", "ups": "3.35", "wpb": "117946", "bsz": "256", "num_updates": "31800", "lr": "9.7798e-05", "gnorm": "0.811", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "9506"} +[2022-07-30 13:29:17,599][train_inner][INFO] - {"epoch": 1, "update": 0.622, "loss": "3.083", "ppl": "8.47", "wps": "399330", "ups": "3.36", "wpb": "118938", "bsz": "256", "num_updates": "32000", "lr": "9.77778e-05", "gnorm": "0.805", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "9566"} +[2022-07-30 13:30:17,250][train_inner][INFO] - {"epoch": 1, "update": 0.626, "loss": "3.086", "ppl": "8.49", "wps": "398545", "ups": "3.35", "wpb": "118868", "bsz": "256", "num_updates": "32200", "lr": "9.77576e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "59", "gb_free": "27.4", "wall": "9626"} +[2022-07-30 13:31:16,694][train_inner][INFO] - {"epoch": 1, "update": 0.63, "loss": "3.082", "ppl": "8.47", "wps": "396276", "ups": "3.36", "wpb": "117780", "bsz": "256", "num_updates": "32400", "lr": "9.77374e-05", "gnorm": "0.811", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "9685"} +[2022-07-30 13:32:15,975][train_inner][INFO] - {"epoch": 1, "update": 0.633, "loss": "3.076", "ppl": "8.43", "wps": "399428", "ups": "3.37", "wpb": "118392", "bsz": "256", "num_updates": "32600", "lr": "9.77172e-05", "gnorm": "0.809", "loss_scale": "64", "train_wall": "59", "gb_free": "28.1", "wall": "9744"} +[2022-07-30 13:33:15,340][train_inner][INFO] - {"epoch": 1, "update": 0.637, "loss": "3.068", "ppl": "8.38", "wps": "397649", "ups": "3.37", "wpb": "118032", "bsz": "256", "num_updates": "32800", "lr": "9.7697e-05", "gnorm": "0.809", "loss_scale": "64", "train_wall": "59", "gb_free": "24.6", "wall": "9804"} +[2022-07-30 13:34:14,608][train_inner][INFO] - {"epoch": 1, "update": 0.641, "loss": "3.071", "ppl": "8.4", "wps": "399467", "ups": "3.37", "wpb": "118376", "bsz": "256", "num_updates": "33000", "lr": "9.76768e-05", "gnorm": "0.808", "loss_scale": "64", "train_wall": "59", "gb_free": "25", "wall": "9863"} +[2022-07-30 13:35:13,769][train_inner][INFO] - {"epoch": 1, "update": 0.645, "loss": "3.067", "ppl": "8.38", "wps": "399237", "ups": "3.38", "wpb": "118095", "bsz": "256", "num_updates": "33200", "lr": "9.76566e-05", "gnorm": "0.809", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "9922"} +[2022-07-30 13:36:13,546][train_inner][INFO] - {"epoch": 1, "update": 0.649, "loss": "3.055", "ppl": "8.31", "wps": "394918", "ups": "3.35", "wpb": "118036", "bsz": "256", "num_updates": "33400", "lr": "9.76364e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "25", "wall": "9982"} +[2022-07-30 13:37:12,649][train_inner][INFO] - {"epoch": 1, "update": 0.653, "loss": "3.059", "ppl": "8.33", "wps": "399419", "ups": "3.38", "wpb": "118032", "bsz": "256", "num_updates": "33600", "lr": "9.76162e-05", "gnorm": "0.81", "loss_scale": "128", "train_wall": "59", "gb_free": "24.9", "wall": "10041"} +[2022-07-30 13:37:38,500][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 13:38:12,506][train_inner][INFO] - {"epoch": 1, "update": 0.657, "loss": "3.05", "ppl": "8.28", "wps": "397042", "ups": "3.34", "wpb": "118829", "bsz": "256", "num_updates": "33800", "lr": "9.7596e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "60", "gb_free": "21.4", "wall": "10101"} +[2022-07-30 13:39:12,215][train_inner][INFO] - {"epoch": 1, "update": 0.661, "loss": "3.052", "ppl": "8.3", "wps": "395952", "ups": "3.35", "wpb": "118208", "bsz": "256", "num_updates": "34000", "lr": "9.75758e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "10161"} +[2022-07-30 13:40:11,751][train_inner][INFO] - {"epoch": 1, "update": 0.665, "loss": "3.048", "ppl": "8.27", "wps": "396620", "ups": "3.36", "wpb": "118064", "bsz": "256", "num_updates": "34200", "lr": "9.75556e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "28.9", "wall": "10220"} +[2022-07-30 13:41:11,134][train_inner][INFO] - {"epoch": 1, "update": 0.668, "loss": "3.041", "ppl": "8.23", "wps": "398053", "ups": "3.37", "wpb": "118187", "bsz": "256", "num_updates": "34400", "lr": "9.75354e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "28", "wall": "10280"} +[2022-07-30 13:42:10,492][train_inner][INFO] - {"epoch": 1, "update": 0.672, "loss": "3.034", "ppl": "8.19", "wps": "397816", "ups": "3.37", "wpb": "118067", "bsz": "256", "num_updates": "34600", "lr": "9.75152e-05", "gnorm": "0.811", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "10339"} +[2022-07-30 13:43:10,090][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "3.027", "ppl": "8.15", "wps": "398257", "ups": "3.36", "wpb": "118676", "bsz": "256", "num_updates": "34800", "lr": "9.74949e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "10399"} +[2022-07-30 13:44:09,394][train_inner][INFO] - {"epoch": 1, "update": 0.68, "loss": "3.031", "ppl": "8.17", "wps": "395706", "ups": "3.37", "wpb": "117334", "bsz": "256", "num_updates": "35000", "lr": "9.74747e-05", "gnorm": "0.815", "loss_scale": "64", "train_wall": "59", "gb_free": "25.3", "wall": "10458"} +[2022-07-30 13:45:09,106][train_inner][INFO] - {"epoch": 1, "update": 0.684, "loss": "3.019", "ppl": "8.11", "wps": "398366", "ups": "3.35", "wpb": "118936", "bsz": "256", "num_updates": "35200", "lr": "9.74545e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "10518"} +[2022-07-30 13:46:10,040][train_inner][INFO] - {"epoch": 1, "update": 0.688, "loss": "3.016", "ppl": "8.09", "wps": "386838", "ups": "3.28", "wpb": "117858", "bsz": "256", "num_updates": "35400", "lr": "9.74343e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "61", "gb_free": "27.1", "wall": "10578"} +[2022-07-30 13:47:09,283][train_inner][INFO] - {"epoch": 1, "update": 0.692, "loss": "3.018", "ppl": "8.1", "wps": "401970", "ups": "3.38", "wpb": "119068", "bsz": "256", "num_updates": "35600", "lr": "9.74141e-05", "gnorm": "0.809", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "10638"} +[2022-07-30 13:48:09,258][train_inner][INFO] - {"epoch": 1, "update": 0.696, "loss": "3.015", "ppl": "8.08", "wps": "396016", "ups": "3.33", "wpb": "118755", "bsz": "256", "num_updates": "35800", "lr": "9.73939e-05", "gnorm": "0.81", "loss_scale": "128", "train_wall": "60", "gb_free": "23.1", "wall": "10698"} +[2022-07-30 13:49:08,846][train_inner][INFO] - {"epoch": 1, "update": 0.699, "loss": "3.016", "ppl": "8.09", "wps": "396115", "ups": "3.36", "wpb": "118017", "bsz": "256", "num_updates": "36000", "lr": "9.73737e-05", "gnorm": "0.813", "loss_scale": "128", "train_wall": "59", "gb_free": "23.4", "wall": "10757"} +[2022-07-30 13:49:38,269][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 13:50:08,486][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "3.009", "ppl": "8.05", "wps": "396887", "ups": "3.35", "wpb": "118352", "bsz": "256", "num_updates": "36200", "lr": "9.73535e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "10817"} +[2022-07-30 13:51:08,217][train_inner][INFO] - {"epoch": 1, "update": 0.707, "loss": "3.001", "ppl": "8", "wps": "398018", "ups": "3.35", "wpb": "118869", "bsz": "256", "num_updates": "36400", "lr": "9.73333e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "10877"} +[2022-07-30 13:52:07,387][train_inner][INFO] - {"epoch": 1, "update": 0.711, "loss": "2.995", "ppl": "7.97", "wps": "400462", "ups": "3.38", "wpb": "118475", "bsz": "256", "num_updates": "36600", "lr": "9.73131e-05", "gnorm": "0.81", "loss_scale": "64", "train_wall": "59", "gb_free": "25", "wall": "10936"} +[2022-07-30 13:53:07,128][train_inner][INFO] - {"epoch": 1, "update": 0.715, "loss": "2.993", "ppl": "7.96", "wps": "395926", "ups": "3.35", "wpb": "118266", "bsz": "256", "num_updates": "36800", "lr": "9.72929e-05", "gnorm": "0.814", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "10996"} +[2022-07-30 13:54:06,674][train_inner][INFO] - {"epoch": 1, "update": 0.719, "loss": "3", "ppl": "8", "wps": "395715", "ups": "3.36", "wpb": "117815", "bsz": "256", "num_updates": "37000", "lr": "9.72727e-05", "gnorm": "0.815", "loss_scale": "64", "train_wall": "59", "gb_free": "25.9", "wall": "11055"} +[2022-07-30 13:55:06,335][train_inner][INFO] - {"epoch": 1, "update": 0.723, "loss": "2.993", "ppl": "7.96", "wps": "395723", "ups": "3.35", "wpb": "118047", "bsz": "256", "num_updates": "37200", "lr": "9.72525e-05", "gnorm": "0.815", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "11115"} +[2022-07-30 13:56:05,694][train_inner][INFO] - {"epoch": 1, "update": 0.727, "loss": "2.985", "ppl": "7.92", "wps": "399612", "ups": "3.37", "wpb": "118600", "bsz": "256", "num_updates": "37400", "lr": "9.72323e-05", "gnorm": "0.813", "loss_scale": "64", "train_wall": "59", "gb_free": "24.9", "wall": "11174"} +[2022-07-30 13:57:05,339][train_inner][INFO] - {"epoch": 1, "update": 0.731, "loss": "2.981", "ppl": "7.9", "wps": "395627", "ups": "3.35", "wpb": "117985", "bsz": "256", "num_updates": "37600", "lr": "9.72121e-05", "gnorm": "0.814", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "11234"} +[2022-07-30 13:58:05,080][train_inner][INFO] - {"epoch": 1, "update": 0.734, "loss": "2.979", "ppl": "7.88", "wps": "397954", "ups": "3.35", "wpb": "118872", "bsz": "256", "num_updates": "37800", "lr": "9.71919e-05", "gnorm": "0.812", "loss_scale": "64", "train_wall": "59", "gb_free": "27.2", "wall": "11294"} +[2022-07-30 13:59:05,277][train_inner][INFO] - {"epoch": 1, "update": 0.738, "loss": "2.986", "ppl": "7.92", "wps": "392427", "ups": "3.32", "wpb": "118113", "bsz": "256", "num_updates": "38000", "lr": "9.71717e-05", "gnorm": "0.815", "loss_scale": "64", "train_wall": "60", "gb_free": "21.4", "wall": "11354"} +[2022-07-30 14:00:04,599][train_inner][INFO] - {"epoch": 1, "update": 0.742, "loss": "2.974", "ppl": "7.86", "wps": "399319", "ups": "3.37", "wpb": "118442", "bsz": "256", "num_updates": "38200", "lr": "9.71515e-05", "gnorm": "0.813", "loss_scale": "128", "train_wall": "59", "gb_free": "30.9", "wall": "11413"} +[2022-07-30 14:00:29,331][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 14:01:04,637][train_inner][INFO] - {"epoch": 1, "update": 0.746, "loss": "2.968", "ppl": "7.83", "wps": "395266", "ups": "3.33", "wpb": "118653", "bsz": "256", "num_updates": "38400", "lr": "9.71313e-05", "gnorm": "0.813", "loss_scale": "64", "train_wall": "60", "gb_free": "21.4", "wall": "11473"} +[2022-07-30 14:02:03,611][train_inner][INFO] - {"epoch": 1, "update": 0.75, "loss": "2.971", "ppl": "7.84", "wps": "398900", "ups": "3.39", "wpb": "117624", "bsz": "256", "num_updates": "38600", "lr": "9.71111e-05", "gnorm": "0.816", "loss_scale": "64", "train_wall": "59", "gb_free": "25", "wall": "11532"} +[2022-07-30 14:03:03,163][train_inner][INFO] - {"epoch": 1, "update": 0.754, "loss": "2.966", "ppl": "7.81", "wps": "395955", "ups": "3.36", "wpb": "117898", "bsz": "256", "num_updates": "38800", "lr": "9.70909e-05", "gnorm": "0.814", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "11592"} +[2022-07-30 14:04:02,885][train_inner][INFO] - {"epoch": 1, "update": 0.758, "loss": "2.965", "ppl": "7.81", "wps": "398078", "ups": "3.35", "wpb": "118869", "bsz": "255.9", "num_updates": "39000", "lr": "9.70707e-05", "gnorm": "0.812", "loss_scale": "64", "train_wall": "59", "gb_free": "29.6", "wall": "11651"} +[2022-07-30 14:05:01,876][train_inner][INFO] - {"epoch": 1, "update": 0.762, "loss": "2.966", "ppl": "7.81", "wps": "398674", "ups": "3.39", "wpb": "117589", "bsz": "256", "num_updates": "39200", "lr": "9.70505e-05", "gnorm": "0.817", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "11710"} +[2022-07-30 14:06:01,285][train_inner][INFO] - {"epoch": 1, "update": 0.766, "loss": "2.954", "ppl": "7.75", "wps": "395013", "ups": "3.37", "wpb": "117337", "bsz": "256", "num_updates": "39400", "lr": "9.70303e-05", "gnorm": "0.819", "loss_scale": "64", "train_wall": "59", "gb_free": "30.5", "wall": "11770"} +[2022-07-30 14:07:00,447][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "2.957", "ppl": "7.76", "wps": "397178", "ups": "3.38", "wpb": "117488", "bsz": "256", "num_updates": "39600", "lr": "9.70101e-05", "gnorm": "0.818", "loss_scale": "64", "train_wall": "59", "gb_free": "25.1", "wall": "11829"} +[2022-07-30 14:08:00,243][train_inner][INFO] - {"epoch": 1, "update": 0.773, "loss": "2.938", "ppl": "7.66", "wps": "395200", "ups": "3.34", "wpb": "118156", "bsz": "256", "num_updates": "39800", "lr": "9.69899e-05", "gnorm": "0.814", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "11889"} +[2022-07-30 14:08:59,363][train_inner][INFO] - {"epoch": 1, "update": 0.777, "loss": "2.95", "ppl": "7.73", "wps": "399898", "ups": "3.38", "wpb": "118208", "bsz": "256", "num_updates": "40000", "lr": "9.69697e-05", "gnorm": "0.815", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "11948"} +[2022-07-30 14:09:58,480][train_inner][INFO] - {"epoch": 1, "update": 0.781, "loss": "2.95", "ppl": "7.73", "wps": "399465", "ups": "3.38", "wpb": "118076", "bsz": "256", "num_updates": "40200", "lr": "9.69495e-05", "gnorm": "0.817", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "12007"} +[2022-07-30 14:10:57,867][train_inner][INFO] - {"epoch": 1, "update": 0.785, "loss": "2.944", "ppl": "7.7", "wps": "397128", "ups": "3.37", "wpb": "117919", "bsz": "256", "num_updates": "40400", "lr": "9.69293e-05", "gnorm": "0.819", "loss_scale": "128", "train_wall": "59", "gb_free": "22", "wall": "12066"} +[2022-07-30 14:11:56,933][train_inner][INFO] - {"epoch": 1, "update": 0.789, "loss": "2.939", "ppl": "7.67", "wps": "400245", "ups": "3.39", "wpb": "118204", "bsz": "256", "num_updates": "40600", "lr": "9.69091e-05", "gnorm": "0.817", "loss_scale": "128", "train_wall": "59", "gb_free": "25.7", "wall": "12125"} +[2022-07-30 14:12:56,360][train_inner][INFO] - {"epoch": 1, "update": 0.793, "loss": "2.931", "ppl": "7.63", "wps": "397452", "ups": "3.37", "wpb": "118096", "bsz": "256", "num_updates": "40800", "lr": "9.68889e-05", "gnorm": "0.818", "loss_scale": "128", "train_wall": "59", "gb_free": "31.4", "wall": "12185"} +[2022-07-30 14:13:56,125][train_inner][INFO] - {"epoch": 1, "update": 0.797, "loss": "2.936", "ppl": "7.65", "wps": "395122", "ups": "3.35", "wpb": "118072", "bsz": "256", "num_updates": "41000", "lr": "9.68687e-05", "gnorm": "0.818", "loss_scale": "128", "train_wall": "59", "gb_free": "27.3", "wall": "12245"} +[2022-07-30 14:14:55,728][train_inner][INFO] - {"epoch": 1, "update": 0.8, "loss": "2.932", "ppl": "7.63", "wps": "395574", "ups": "3.36", "wpb": "117886", "bsz": "256", "num_updates": "41200", "lr": "9.68485e-05", "gnorm": "0.819", "loss_scale": "128", "train_wall": "59", "gb_free": "25.6", "wall": "12304"} +[2022-07-30 14:15:55,075][train_inner][INFO] - {"epoch": 1, "update": 0.804, "loss": "2.917", "ppl": "7.56", "wps": "398312", "ups": "3.37", "wpb": "118193", "bsz": "256", "num_updates": "41400", "lr": "9.68283e-05", "gnorm": "0.816", "loss_scale": "128", "train_wall": "59", "gb_free": "23.2", "wall": "12364"} +[2022-07-30 14:16:55,697][train_inner][INFO] - {"epoch": 1, "update": 0.808, "loss": "2.916", "ppl": "7.55", "wps": "391157", "ups": "3.3", "wpb": "118563", "bsz": "256", "num_updates": "41600", "lr": "9.68081e-05", "gnorm": "0.815", "loss_scale": "128", "train_wall": "60", "gb_free": "21.7", "wall": "12424"} +[2022-07-30 14:17:08,197][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 14:17:30,238][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 14:17:55,790][train_inner][INFO] - {"epoch": 1, "update": 0.812, "loss": "2.918", "ppl": "7.56", "wps": "393388", "ups": "3.33", "wpb": "118198", "bsz": "256", "num_updates": "41800", "lr": "9.67879e-05", "gnorm": "0.819", "loss_scale": "32", "train_wall": "60", "gb_free": "21.6", "wall": "12484"} +[2022-07-30 14:18:55,489][train_inner][INFO] - {"epoch": 1, "update": 0.816, "loss": "2.924", "ppl": "7.59", "wps": "395564", "ups": "3.35", "wpb": "118075", "bsz": "256", "num_updates": "42000", "lr": "9.67677e-05", "gnorm": "0.818", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "12544"} +[2022-07-30 14:19:55,212][train_inner][INFO] - {"epoch": 1, "update": 0.82, "loss": "2.92", "ppl": "7.57", "wps": "393877", "ups": "3.35", "wpb": "117616", "bsz": "256", "num_updates": "42200", "lr": "9.67475e-05", "gnorm": "0.82", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "12604"} +[2022-07-30 14:20:54,708][train_inner][INFO] - {"epoch": 1, "update": 0.824, "loss": "2.914", "ppl": "7.53", "wps": "397524", "ups": "3.36", "wpb": "118254", "bsz": "256", "num_updates": "42400", "lr": "9.67273e-05", "gnorm": "0.818", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "12663"} +[2022-07-30 14:21:54,420][train_inner][INFO] - {"epoch": 1, "update": 0.828, "loss": "2.907", "ppl": "7.5", "wps": "394936", "ups": "3.35", "wpb": "117910", "bsz": "256", "num_updates": "42600", "lr": "9.67071e-05", "gnorm": "0.819", "loss_scale": "32", "train_wall": "59", "gb_free": "25.6", "wall": "12723"} +[2022-07-30 14:22:53,902][train_inner][INFO] - {"epoch": 1, "update": 0.832, "loss": "2.908", "ppl": "7.5", "wps": "397070", "ups": "3.36", "wpb": "118092", "bsz": "256", "num_updates": "42800", "lr": "9.66869e-05", "gnorm": "0.821", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "12782"} +[2022-07-30 14:23:53,483][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "2.897", "ppl": "7.45", "wps": "397940", "ups": "3.36", "wpb": "118546", "bsz": "256", "num_updates": "43000", "lr": "9.66667e-05", "gnorm": "0.817", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "12842"} +[2022-07-30 14:24:53,100][train_inner][INFO] - {"epoch": 1, "update": 0.839, "loss": "2.903", "ppl": "7.48", "wps": "397932", "ups": "3.35", "wpb": "118617", "bsz": "256", "num_updates": "43200", "lr": "9.66465e-05", "gnorm": "0.817", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "12902"} +[2022-07-30 14:25:52,538][train_inner][INFO] - {"epoch": 1, "update": 0.843, "loss": "2.89", "ppl": "7.41", "wps": "399785", "ups": "3.36", "wpb": "118812", "bsz": "256", "num_updates": "43400", "lr": "9.66263e-05", "gnorm": "0.818", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "12961"} +[2022-07-30 14:26:51,666][train_inner][INFO] - {"epoch": 1, "update": 0.847, "loss": "2.892", "ppl": "7.42", "wps": "402463", "ups": "3.38", "wpb": "118984", "bsz": "256", "num_updates": "43600", "lr": "9.66061e-05", "gnorm": "0.817", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "13020"} +[2022-07-30 14:27:52,372][train_inner][INFO] - {"epoch": 1, "update": 0.851, "loss": "2.892", "ppl": "7.42", "wps": "390057", "ups": "3.29", "wpb": "118393", "bsz": "256", "num_updates": "43800", "lr": "9.65859e-05", "gnorm": "0.819", "loss_scale": "64", "train_wall": "60", "gb_free": "22.6", "wall": "13081"} +[2022-07-30 14:28:51,741][train_inner][INFO] - {"epoch": 1, "update": 0.855, "loss": "2.89", "ppl": "7.41", "wps": "397646", "ups": "3.37", "wpb": "118038", "bsz": "256", "num_updates": "44000", "lr": "9.65657e-05", "gnorm": "0.822", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "13140"} +[2022-07-30 14:29:51,082][train_inner][INFO] - {"epoch": 1, "update": 0.859, "loss": "2.886", "ppl": "7.39", "wps": "399110", "ups": "3.37", "wpb": "118417", "bsz": "256", "num_updates": "44200", "lr": "9.65455e-05", "gnorm": "0.819", "loss_scale": "64", "train_wall": "59", "gb_free": "26.9", "wall": "13200"} +[2022-07-30 14:30:50,528][train_inner][INFO] - {"epoch": 1, "update": 0.863, "loss": "2.892", "ppl": "7.43", "wps": "398220", "ups": "3.36", "wpb": "118361", "bsz": "256", "num_updates": "44400", "lr": "9.65253e-05", "gnorm": "0.82", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "13259"} +[2022-07-30 14:31:50,734][train_inner][INFO] - {"epoch": 1, "update": 0.867, "loss": "2.88", "ppl": "7.36", "wps": "392203", "ups": "3.32", "wpb": "118064", "bsz": "256", "num_updates": "44600", "lr": "9.65051e-05", "gnorm": "0.821", "loss_scale": "64", "train_wall": "60", "gb_free": "21.9", "wall": "13319"} +[2022-07-30 14:32:49,999][train_inner][INFO] - {"epoch": 1, "update": 0.87, "loss": "2.877", "ppl": "7.35", "wps": "399818", "ups": "3.37", "wpb": "118475", "bsz": "256", "num_updates": "44800", "lr": "9.64848e-05", "gnorm": "0.821", "loss_scale": "64", "train_wall": "59", "gb_free": "26.9", "wall": "13378"} +[2022-07-30 14:33:49,348][train_inner][INFO] - {"epoch": 1, "update": 0.874, "loss": "2.882", "ppl": "7.37", "wps": "399540", "ups": "3.37", "wpb": "118561", "bsz": "256", "num_updates": "45000", "lr": "9.64646e-05", "gnorm": "0.821", "loss_scale": "64", "train_wall": "59", "gb_free": "22.8", "wall": "13438"} +[2022-07-30 14:34:48,625][train_inner][INFO] - {"epoch": 1, "update": 0.878, "loss": "2.869", "ppl": "7.3", "wps": "401058", "ups": "3.37", "wpb": "118858", "bsz": "256", "num_updates": "45200", "lr": "9.64444e-05", "gnorm": "0.817", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "13497"} +[2022-07-30 14:35:47,593][train_inner][INFO] - {"epoch": 1, "update": 0.882, "loss": "2.87", "ppl": "7.31", "wps": "401014", "ups": "3.39", "wpb": "118235", "bsz": "256", "num_updates": "45400", "lr": "9.64242e-05", "gnorm": "0.821", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "13556"} +[2022-07-30 14:36:46,927][train_inner][INFO] - {"epoch": 1, "update": 0.886, "loss": "2.864", "ppl": "7.28", "wps": "400364", "ups": "3.37", "wpb": "118776", "bsz": "256", "num_updates": "45600", "lr": "9.6404e-05", "gnorm": "0.82", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "13615"} +[2022-07-30 14:37:46,317][train_inner][INFO] - {"epoch": 1, "update": 0.89, "loss": "2.864", "ppl": "7.28", "wps": "399174", "ups": "3.37", "wpb": "118532", "bsz": "256", "num_updates": "45800", "lr": "9.63838e-05", "gnorm": "0.823", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "13675"} +[2022-07-30 14:38:46,083][train_inner][INFO] - {"epoch": 1, "update": 0.894, "loss": "2.863", "ppl": "7.27", "wps": "397183", "ups": "3.35", "wpb": "118689", "bsz": "256", "num_updates": "46000", "lr": "9.63636e-05", "gnorm": "0.821", "loss_scale": "128", "train_wall": "59", "gb_free": "22", "wall": "13735"} +[2022-07-30 14:38:55,985][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 14:39:46,022][train_inner][INFO] - {"epoch": 1, "update": 0.898, "loss": "2.862", "ppl": "7.27", "wps": "396241", "ups": "3.34", "wpb": "118752", "bsz": "256", "num_updates": "46200", "lr": "9.63434e-05", "gnorm": "0.822", "loss_scale": "64", "train_wall": "60", "gb_free": "26.9", "wall": "13794"} +[2022-07-30 14:40:45,445][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "2.855", "ppl": "7.23", "wps": "399172", "ups": "3.37", "wpb": "118598", "bsz": "256", "num_updates": "46400", "lr": "9.63232e-05", "gnorm": "0.821", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "13854"} +[2022-07-30 14:41:44,491][train_inner][INFO] - {"epoch": 1, "update": 0.905, "loss": "2.857", "ppl": "7.25", "wps": "400438", "ups": "3.39", "wpb": "118222", "bsz": "256", "num_updates": "46600", "lr": "9.6303e-05", "gnorm": "0.823", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "13913"} +[2022-07-30 14:42:43,965][train_inner][INFO] - {"epoch": 1, "update": 0.909, "loss": "2.855", "ppl": "7.23", "wps": "397083", "ups": "3.36", "wpb": "118078", "bsz": "256", "num_updates": "46800", "lr": "9.62828e-05", "gnorm": "0.826", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "13972"} +[2022-07-30 14:43:43,474][train_inner][INFO] - {"epoch": 1, "update": 0.913, "loss": "2.856", "ppl": "7.24", "wps": "396931", "ups": "3.36", "wpb": "118104", "bsz": "256", "num_updates": "47000", "lr": "9.62626e-05", "gnorm": "0.824", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "14032"} +[2022-07-30 14:44:42,788][train_inner][INFO] - {"epoch": 1, "update": 0.917, "loss": "2.849", "ppl": "7.21", "wps": "397669", "ups": "3.37", "wpb": "117936", "bsz": "256", "num_updates": "47200", "lr": "9.62424e-05", "gnorm": "0.824", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "14091"} +[2022-07-30 14:45:42,374][train_inner][INFO] - {"epoch": 1, "update": 0.921, "loss": "2.852", "ppl": "7.22", "wps": "396754", "ups": "3.36", "wpb": "118205", "bsz": "256", "num_updates": "47400", "lr": "9.62222e-05", "gnorm": "0.824", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "14151"} +[2022-07-30 14:46:41,210][train_inner][INFO] - {"epoch": 1, "update": 0.925, "loss": "2.852", "ppl": "7.22", "wps": "400258", "ups": "3.4", "wpb": "117748", "bsz": "256", "num_updates": "47600", "lr": "9.6202e-05", "gnorm": "0.826", "loss_scale": "64", "train_wall": "58", "gb_free": "23.4", "wall": "14210"} +[2022-07-30 14:47:41,909][train_inner][INFO] - {"epoch": 1, "update": 0.929, "loss": "2.842", "ppl": "7.17", "wps": "390383", "ups": "3.3", "wpb": "118477", "bsz": "256", "num_updates": "47800", "lr": "9.61818e-05", "gnorm": "0.822", "loss_scale": "64", "train_wall": "60", "gb_free": "21.5", "wall": "14270"} +[2022-07-30 14:48:41,676][train_inner][INFO] - {"epoch": 1, "update": 0.933, "loss": "2.834", "ppl": "7.13", "wps": "397348", "ups": "3.35", "wpb": "118741", "bsz": "256", "num_updates": "48000", "lr": "9.61616e-05", "gnorm": "0.821", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "14330"} +[2022-07-30 14:49:41,261][train_inner][INFO] - {"epoch": 1, "update": 0.937, "loss": "2.839", "ppl": "7.16", "wps": "396241", "ups": "3.36", "wpb": "118048", "bsz": "256", "num_updates": "48200", "lr": "9.61414e-05", "gnorm": "0.824", "loss_scale": "128", "train_wall": "59", "gb_free": "21.9", "wall": "14390"} +[2022-07-30 14:50:07,958][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 14:50:41,016][train_inner][INFO] - {"epoch": 1, "update": 0.94, "loss": "2.848", "ppl": "7.2", "wps": "394849", "ups": "3.35", "wpb": "117970", "bsz": "256", "num_updates": "48400", "lr": "9.61212e-05", "gnorm": "0.828", "loss_scale": "64", "train_wall": "59", "gb_free": "25.2", "wall": "14449"} +[2022-07-30 14:51:40,145][train_inner][INFO] - {"epoch": 1, "update": 0.944, "loss": "2.828", "ppl": "7.1", "wps": "399751", "ups": "3.38", "wpb": "118183", "bsz": "256", "num_updates": "48600", "lr": "9.6101e-05", "gnorm": "0.826", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "14509"} +[2022-07-30 14:52:39,420][train_inner][INFO] - {"epoch": 1, "update": 0.948, "loss": "2.836", "ppl": "7.14", "wps": "399488", "ups": "3.37", "wpb": "118398", "bsz": "256", "num_updates": "48800", "lr": "9.60808e-05", "gnorm": "0.828", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "14568"} +[2022-07-30 14:53:40,262][train_inner][INFO] - {"epoch": 1, "update": 0.952, "loss": "2.827", "ppl": "7.1", "wps": "389456", "ups": "3.29", "wpb": "118477", "bsz": "256", "num_updates": "49000", "lr": "9.60606e-05", "gnorm": "0.827", "loss_scale": "64", "train_wall": "60", "gb_free": "22.1", "wall": "14629"} +[2022-07-30 14:54:39,141][train_inner][INFO] - {"epoch": 1, "update": 0.956, "loss": "2.824", "ppl": "7.08", "wps": "400360", "ups": "3.4", "wpb": "117862", "bsz": "256", "num_updates": "49200", "lr": "9.60404e-05", "gnorm": "0.828", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "14688"} +[2022-07-30 14:55:38,544][train_inner][INFO] - {"epoch": 1, "update": 0.96, "loss": "2.826", "ppl": "7.09", "wps": "399816", "ups": "3.37", "wpb": "118750", "bsz": "256", "num_updates": "49400", "lr": "9.60202e-05", "gnorm": "0.825", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "14747"} +[2022-07-30 14:56:38,237][train_inner][INFO] - {"epoch": 1, "update": 0.964, "loss": "2.827", "ppl": "7.1", "wps": "394069", "ups": "3.35", "wpb": "117616", "bsz": "256", "num_updates": "49600", "lr": "9.6e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "14807"} +[2022-07-30 14:57:37,745][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "2.82", "ppl": "7.06", "wps": "396450", "ups": "3.36", "wpb": "117959", "bsz": "256", "num_updates": "49800", "lr": "9.59798e-05", "gnorm": "0.829", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "14866"} +[2022-07-30 14:58:36,981][train_inner][INFO] - {"epoch": 1, "update": 0.971, "loss": "2.818", "ppl": "7.05", "wps": "400252", "ups": "3.38", "wpb": "118545", "bsz": "256", "num_updates": "50000", "lr": "9.59596e-05", "gnorm": "0.826", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "14925"} +[2022-07-30 14:58:36,982][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-30 14:58:36,989][fairseq.tasks.fairseq_task][WARNING] - 576 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[2086, 18643, 32310, 47209, 49755, 37591, 48266, 19397, 19415, 38832] +[2022-07-30 14:58:59,622][valid][INFO] - {"epoch": 1, "valid_loss": "2.699", "valid_ppl": "6.49", "valid_wps": "1.61138e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "50000"} +[2022-07-30 14:58:59,625][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 50000 updates +[2022-07-30 14:58:59,626][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_1_50000.pt +[2022-07-30 14:59:06,830][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_1_50000.pt +[2022-07-30 14:59:27,670][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_1_50000.pt (epoch 1 @ 50000 updates, score 2.699) (writing took 28.044438926503062 seconds) +[2022-07-30 15:00:27,217][train_inner][INFO] - {"epoch": 1, "update": 0.975, "loss": "2.822", "ppl": "7.07", "wps": "214265", "ups": "1.81", "wpb": "118098", "bsz": "256", "num_updates": "50200", "lr": "9.59394e-05", "gnorm": "0.828", "loss_scale": "64", "train_wall": "59", "gb_free": "25.5", "wall": "15036"} +[2022-07-30 15:01:22,339][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 15:01:27,119][train_inner][INFO] - {"epoch": 1, "update": 0.979, "loss": "2.814", "ppl": "7.03", "wps": "394894", "ups": "3.34", "wpb": "118273", "bsz": "256", "num_updates": "50400", "lr": "9.59192e-05", "gnorm": "0.829", "loss_scale": "64", "train_wall": "60", "gb_free": "24.2", "wall": "15096"} +[2022-07-30 15:02:26,456][train_inner][INFO] - {"epoch": 1, "update": 0.983, "loss": "2.809", "ppl": "7.01", "wps": "397392", "ups": "3.37", "wpb": "117900", "bsz": "256", "num_updates": "50600", "lr": "9.5899e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "15155"} +[2022-07-30 15:03:25,697][train_inner][INFO] - {"epoch": 1, "update": 0.987, "loss": "2.819", "ppl": "7.06", "wps": "399919", "ups": "3.38", "wpb": "118457", "bsz": "256", "num_updates": "50800", "lr": "9.58788e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "23.5", "wall": "15214"} +[2022-07-30 15:04:25,308][train_inner][INFO] - {"epoch": 1, "update": 0.991, "loss": "2.815", "ppl": "7.04", "wps": "396105", "ups": "3.36", "wpb": "118060", "bsz": "256", "num_updates": "51000", "lr": "9.58586e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "15274"} +[2022-07-30 15:05:25,063][train_inner][INFO] - {"epoch": 1, "update": 0.995, "loss": "2.807", "ppl": "7", "wps": "397234", "ups": "3.35", "wpb": "118684", "bsz": "256", "num_updates": "51200", "lr": "9.58384e-05", "gnorm": "0.829", "loss_scale": "64", "train_wall": "59", "gb_free": "23.5", "wall": "15333"} +[2022-07-30 15:06:24,644][train_inner][INFO] - {"epoch": 1, "update": 0.999, "loss": "2.808", "ppl": "7", "wps": "396838", "ups": "3.36", "wpb": "118219", "bsz": "256", "num_updates": "51400", "lr": "9.58182e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "15393"} +[2022-07-30 15:06:44,502][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below) +[2022-07-30 15:06:44,503][train][INFO] - {"epoch": 1, "train_loss": "4.474", "train_ppl": "22.23", "train_wps": "395942", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "51467", "train_lr": "9.58114e-05", "train_gnorm": "0.894", "train_loss_scale": "64", "train_train_wall": "15242", "train_gb_free": "24.3", "train_wall": "15413"} +[2022-07-30 15:06:44,590][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-30 15:06:44,593][fairseq.trainer][INFO] - begin training epoch 2 +[2022-07-30 15:06:44,593][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-30 15:07:35,431][train_inner][INFO] - {"epoch": 2, "update": 1.003, "loss": "2.801", "ppl": "6.97", "wps": "332407", "ups": "2.83", "wpb": "117650", "bsz": "255.4", "num_updates": "51600", "lr": "9.5798e-05", "gnorm": "0.835", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "15464"} +[2022-07-30 15:08:34,686][train_inner][INFO] - {"epoch": 2, "update": 1.006, "loss": "2.801", "ppl": "6.97", "wps": "399336", "ups": "3.38", "wpb": "118311", "bsz": "256", "num_updates": "51800", "lr": "9.57778e-05", "gnorm": "0.83", "loss_scale": "64", "train_wall": "59", "gb_free": "33.7", "wall": "15523"} +[2022-07-30 15:09:33,900][train_inner][INFO] - {"epoch": 2, "update": 1.01, "loss": "2.799", "ppl": "6.96", "wps": "397857", "ups": "3.38", "wpb": "117793", "bsz": "256", "num_updates": "52000", "lr": "9.57576e-05", "gnorm": "0.833", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "15582"} +[2022-07-30 15:10:33,575][train_inner][INFO] - {"epoch": 2, "update": 1.014, "loss": "2.789", "ppl": "6.91", "wps": "397493", "ups": "3.35", "wpb": "118602", "bsz": "256", "num_updates": "52200", "lr": "9.57374e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "25.3", "wall": "15642"} +[2022-07-30 15:11:33,308][train_inner][INFO] - {"epoch": 2, "update": 1.018, "loss": "2.784", "ppl": "6.89", "wps": "396279", "ups": "3.35", "wpb": "118354", "bsz": "256", "num_updates": "52400", "lr": "9.57172e-05", "gnorm": "0.829", "loss_scale": "64", "train_wall": "59", "gb_free": "24.8", "wall": "15702"} +[2022-07-30 15:12:33,772][train_inner][INFO] - {"epoch": 2, "update": 1.022, "loss": "2.791", "ppl": "6.92", "wps": "390066", "ups": "3.31", "wpb": "117923", "bsz": "256", "num_updates": "52600", "lr": "9.5697e-05", "gnorm": "0.832", "loss_scale": "128", "train_wall": "60", "gb_free": "21.5", "wall": "15762"} +[2022-07-30 15:13:33,434][train_inner][INFO] - {"epoch": 2, "update": 1.026, "loss": "2.789", "ppl": "6.91", "wps": "395150", "ups": "3.35", "wpb": "117876", "bsz": "256", "num_updates": "52800", "lr": "9.56768e-05", "gnorm": "0.832", "loss_scale": "128", "train_wall": "59", "gb_free": "23.3", "wall": "15822"} +[2022-07-30 15:14:18,104][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 15:14:33,100][train_inner][INFO] - {"epoch": 2, "update": 1.03, "loss": "2.779", "ppl": "6.86", "wps": "397705", "ups": "3.35", "wpb": "118648", "bsz": "256", "num_updates": "53000", "lr": "9.56566e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "15882"} +[2022-07-30 15:15:32,284][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "2.78", "ppl": "6.87", "wps": "400651", "ups": "3.38", "wpb": "118559", "bsz": "256", "num_updates": "53200", "lr": "9.56364e-05", "gnorm": "0.83", "loss_scale": "64", "train_wall": "59", "gb_free": "24.6", "wall": "15941"} +[2022-07-30 15:16:31,589][train_inner][INFO] - {"epoch": 2, "update": 1.038, "loss": "2.787", "ppl": "6.9", "wps": "400419", "ups": "3.37", "wpb": "118734", "bsz": "256", "num_updates": "53400", "lr": "9.56162e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "16000"} +[2022-07-30 15:17:30,971][train_inner][INFO] - {"epoch": 2, "update": 1.041, "loss": "2.784", "ppl": "6.89", "wps": "396492", "ups": "3.37", "wpb": "117721", "bsz": "256", "num_updates": "53600", "lr": "9.5596e-05", "gnorm": "0.835", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "16059"} +[2022-07-30 15:18:31,514][train_inner][INFO] - {"epoch": 2, "update": 1.045, "loss": "2.78", "ppl": "6.87", "wps": "390917", "ups": "3.3", "wpb": "118336", "bsz": "256", "num_updates": "53800", "lr": "9.55758e-05", "gnorm": "0.831", "loss_scale": "64", "train_wall": "60", "gb_free": "22.9", "wall": "16120"} +[2022-07-30 15:19:30,711][train_inner][INFO] - {"epoch": 2, "update": 1.049, "loss": "2.779", "ppl": "6.87", "wps": "399431", "ups": "3.38", "wpb": "118224", "bsz": "256", "num_updates": "54000", "lr": "9.55556e-05", "gnorm": "0.833", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "16179"} +[2022-07-30 15:20:30,382][train_inner][INFO] - {"epoch": 2, "update": 1.053, "loss": "2.77", "ppl": "6.82", "wps": "397432", "ups": "3.35", "wpb": "118576", "bsz": "256", "num_updates": "54200", "lr": "9.55354e-05", "gnorm": "0.829", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "16239"} +[2022-07-30 15:21:29,590][train_inner][INFO] - {"epoch": 2, "update": 1.057, "loss": "2.77", "ppl": "6.82", "wps": "401358", "ups": "3.38", "wpb": "118817", "bsz": "256", "num_updates": "54400", "lr": "9.55152e-05", "gnorm": "0.829", "loss_scale": "64", "train_wall": "59", "gb_free": "24.7", "wall": "16298"} +[2022-07-30 15:22:30,423][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "2.774", "ppl": "6.84", "wps": "389702", "ups": "3.29", "wpb": "118533", "bsz": "256", "num_updates": "54600", "lr": "9.54949e-05", "gnorm": "0.833", "loss_scale": "64", "train_wall": "60", "gb_free": "23.3", "wall": "16359"} +[2022-07-30 15:23:29,647][train_inner][INFO] - {"epoch": 2, "update": 1.065, "loss": "2.774", "ppl": "6.84", "wps": "401737", "ups": "3.38", "wpb": "118962", "bsz": "256", "num_updates": "54800", "lr": "9.54747e-05", "gnorm": "0.833", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "16418"} +[2022-07-30 15:24:29,483][train_inner][INFO] - {"epoch": 2, "update": 1.069, "loss": "2.769", "ppl": "6.82", "wps": "395119", "ups": "3.34", "wpb": "118211", "bsz": "256", "num_updates": "55000", "lr": "9.54545e-05", "gnorm": "0.835", "loss_scale": "128", "train_wall": "59", "gb_free": "21.6", "wall": "16478"} +[2022-07-30 15:25:28,825][train_inner][INFO] - {"epoch": 2, "update": 1.073, "loss": "2.765", "ppl": "6.8", "wps": "396718", "ups": "3.37", "wpb": "117710", "bsz": "256", "num_updates": "55200", "lr": "9.54343e-05", "gnorm": "0.835", "loss_scale": "128", "train_wall": "59", "gb_free": "23.8", "wall": "16537"} +[2022-07-30 15:25:35,905][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 15:26:28,457][train_inner][INFO] - {"epoch": 2, "update": 1.076, "loss": "2.764", "ppl": "6.79", "wps": "396012", "ups": "3.35", "wpb": "118073", "bsz": "256", "num_updates": "55400", "lr": "9.54141e-05", "gnorm": "0.836", "loss_scale": "64", "train_wall": "59", "gb_free": "24.8", "wall": "16597"} +[2022-07-30 15:27:27,696][train_inner][INFO] - {"epoch": 2, "update": 1.08, "loss": "2.764", "ppl": "6.79", "wps": "399974", "ups": "3.38", "wpb": "118471", "bsz": "256", "num_updates": "55600", "lr": "9.53939e-05", "gnorm": "0.835", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "16656"} +[2022-07-30 15:28:27,158][train_inner][INFO] - {"epoch": 2, "update": 1.084, "loss": "2.773", "ppl": "6.84", "wps": "397155", "ups": "3.36", "wpb": "118076", "bsz": "256", "num_updates": "55800", "lr": "9.53737e-05", "gnorm": "0.837", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "16716"} +[2022-07-30 15:29:26,137][train_inner][INFO] - {"epoch": 2, "update": 1.088, "loss": "2.764", "ppl": "6.79", "wps": "400802", "ups": "3.39", "wpb": "118194", "bsz": "256", "num_updates": "56000", "lr": "9.53535e-05", "gnorm": "0.836", "loss_scale": "64", "train_wall": "59", "gb_free": "25.4", "wall": "16775"} +[2022-07-30 15:30:25,688][train_inner][INFO] - {"epoch": 2, "update": 1.092, "loss": "2.751", "ppl": "6.73", "wps": "399290", "ups": "3.36", "wpb": "118891", "bsz": "256", "num_updates": "56200", "lr": "9.53333e-05", "gnorm": "0.832", "loss_scale": "64", "train_wall": "59", "gb_free": "24.5", "wall": "16834"} +[2022-07-30 15:31:25,447][train_inner][INFO] - {"epoch": 2, "update": 1.096, "loss": "2.752", "ppl": "6.74", "wps": "395850", "ups": "3.35", "wpb": "118275", "bsz": "256", "num_updates": "56400", "lr": "9.53131e-05", "gnorm": "0.836", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "16894"} +[2022-07-30 15:31:25,729][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 15:32:25,295][train_inner][INFO] - {"epoch": 2, "update": 1.1, "loss": "2.748", "ppl": "6.72", "wps": "395534", "ups": "3.34", "wpb": "118360", "bsz": "256", "num_updates": "56600", "lr": "9.52929e-05", "gnorm": "0.835", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "16954"} +[2022-07-30 15:33:25,253][train_inner][INFO] - {"epoch": 2, "update": 1.104, "loss": "2.752", "ppl": "6.74", "wps": "394312", "ups": "3.34", "wpb": "118210", "bsz": "256", "num_updates": "56800", "lr": "9.52727e-05", "gnorm": "0.835", "loss_scale": "32", "train_wall": "60", "gb_free": "24.5", "wall": "17014"} +[2022-07-30 15:34:24,578][train_inner][INFO] - {"epoch": 2, "update": 1.108, "loss": "2.744", "ppl": "6.7", "wps": "399279", "ups": "3.37", "wpb": "118435", "bsz": "256", "num_updates": "57000", "lr": "9.52525e-05", "gnorm": "0.834", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "17073"} +[2022-07-30 15:35:23,843][train_inner][INFO] - {"epoch": 2, "update": 1.111, "loss": "2.757", "ppl": "6.76", "wps": "397933", "ups": "3.37", "wpb": "117918", "bsz": "256", "num_updates": "57200", "lr": "9.52323e-05", "gnorm": "0.839", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "17132"} +[2022-07-30 15:36:23,115][train_inner][INFO] - {"epoch": 2, "update": 1.115, "loss": "2.748", "ppl": "6.72", "wps": "398291", "ups": "3.37", "wpb": "118037", "bsz": "256", "num_updates": "57400", "lr": "9.52121e-05", "gnorm": "0.839", "loss_scale": "32", "train_wall": "59", "gb_free": "24.8", "wall": "17192"} +[2022-07-30 15:37:22,730][train_inner][INFO] - {"epoch": 2, "update": 1.119, "loss": "2.741", "ppl": "6.68", "wps": "399400", "ups": "3.35", "wpb": "119050", "bsz": "256", "num_updates": "57600", "lr": "9.51919e-05", "gnorm": "0.833", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "17251"} +[2022-07-30 15:38:22,347][train_inner][INFO] - {"epoch": 2, "update": 1.123, "loss": "2.739", "ppl": "6.68", "wps": "400494", "ups": "3.35", "wpb": "119380", "bsz": "256", "num_updates": "57800", "lr": "9.51717e-05", "gnorm": "0.832", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "17311"} +[2022-07-30 15:39:22,005][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "2.742", "ppl": "6.69", "wps": "396164", "ups": "3.35", "wpb": "118170", "bsz": "256", "num_updates": "58000", "lr": "9.51515e-05", "gnorm": "0.839", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "17370"} +[2022-07-30 15:40:21,820][train_inner][INFO] - {"epoch": 2, "update": 1.131, "loss": "2.74", "ppl": "6.68", "wps": "393766", "ups": "3.34", "wpb": "117766", "bsz": "256", "num_updates": "58200", "lr": "9.51313e-05", "gnorm": "0.838", "loss_scale": "32", "train_wall": "59", "gb_free": "26", "wall": "17430"} +[2022-07-30 15:41:21,071][train_inner][INFO] - {"epoch": 2, "update": 1.135, "loss": "2.742", "ppl": "6.69", "wps": "398192", "ups": "3.38", "wpb": "117965", "bsz": "256", "num_updates": "58400", "lr": "9.51111e-05", "gnorm": "0.839", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "17490"} +[2022-07-30 15:42:20,622][train_inner][INFO] - {"epoch": 2, "update": 1.139, "loss": "2.734", "ppl": "6.65", "wps": "397138", "ups": "3.36", "wpb": "118250", "bsz": "256", "num_updates": "58600", "lr": "9.50909e-05", "gnorm": "0.838", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "17549"} +[2022-07-30 15:43:20,134][train_inner][INFO] - {"epoch": 2, "update": 1.142, "loss": "2.736", "ppl": "6.66", "wps": "396529", "ups": "3.36", "wpb": "117990", "bsz": "256", "num_updates": "58800", "lr": "9.50707e-05", "gnorm": "0.838", "loss_scale": "64", "train_wall": "59", "gb_free": "26.1", "wall": "17609"} +[2022-07-30 15:44:20,155][train_inner][INFO] - {"epoch": 2, "update": 1.146, "loss": "2.736", "ppl": "6.66", "wps": "392090", "ups": "3.33", "wpb": "117668", "bsz": "256", "num_updates": "59000", "lr": "9.50505e-05", "gnorm": "0.841", "loss_scale": "64", "train_wall": "60", "gb_free": "25.7", "wall": "17669"} +[2022-07-30 15:45:19,722][train_inner][INFO] - {"epoch": 2, "update": 1.15, "loss": "2.732", "ppl": "6.65", "wps": "398450", "ups": "3.36", "wpb": "118672", "bsz": "256", "num_updates": "59200", "lr": "9.50303e-05", "gnorm": "0.837", "loss_scale": "64", "train_wall": "59", "gb_free": "31.4", "wall": "17728"} +[2022-07-30 15:46:19,274][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "2.731", "ppl": "6.64", "wps": "396760", "ups": "3.36", "wpb": "118138", "bsz": "256", "num_updates": "59400", "lr": "9.50101e-05", "gnorm": "0.839", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "17788"} +[2022-07-30 15:47:18,730][train_inner][INFO] - {"epoch": 2, "update": 1.158, "loss": "2.727", "ppl": "6.62", "wps": "398975", "ups": "3.36", "wpb": "118606", "bsz": "256", "num_updates": "59600", "lr": "9.49899e-05", "gnorm": "0.837", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "17847"} +[2022-07-30 15:48:18,033][train_inner][INFO] - {"epoch": 2, "update": 1.162, "loss": "2.728", "ppl": "6.62", "wps": "400466", "ups": "3.37", "wpb": "118744", "bsz": "256", "num_updates": "59800", "lr": "9.49697e-05", "gnorm": "0.84", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "17906"} +[2022-07-30 15:49:17,333][train_inner][INFO] - {"epoch": 2, "update": 1.166, "loss": "2.724", "ppl": "6.61", "wps": "399160", "ups": "3.37", "wpb": "118350", "bsz": "256", "num_updates": "60000", "lr": "9.49495e-05", "gnorm": "0.841", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "17966"} +[2022-07-30 15:50:17,016][train_inner][INFO] - {"epoch": 2, "update": 1.17, "loss": "2.719", "ppl": "6.58", "wps": "400186", "ups": "3.35", "wpb": "119419", "bsz": "256", "num_updates": "60200", "lr": "9.49293e-05", "gnorm": "0.837", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "18025"} +[2022-07-30 15:51:17,933][train_inner][INFO] - {"epoch": 2, "update": 1.174, "loss": "2.72", "ppl": "6.59", "wps": "389856", "ups": "3.28", "wpb": "118744", "bsz": "256", "num_updates": "60400", "lr": "9.49091e-05", "gnorm": "0.838", "loss_scale": "64", "train_wall": "61", "gb_free": "21.6", "wall": "18086"} +[2022-07-30 15:52:17,398][train_inner][INFO] - {"epoch": 2, "update": 1.177, "loss": "2.729", "ppl": "6.63", "wps": "396983", "ups": "3.36", "wpb": "118033", "bsz": "256", "num_updates": "60600", "lr": "9.48889e-05", "gnorm": "0.844", "loss_scale": "128", "train_wall": "59", "gb_free": "27.4", "wall": "18146"} +[2022-07-30 15:53:16,629][train_inner][INFO] - {"epoch": 2, "update": 1.181, "loss": "2.718", "ppl": "6.58", "wps": "399746", "ups": "3.38", "wpb": "118385", "bsz": "256", "num_updates": "60800", "lr": "9.48687e-05", "gnorm": "0.84", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "18205"} +[2022-07-30 15:54:15,862][train_inner][INFO] - {"epoch": 2, "update": 1.185, "loss": "2.718", "ppl": "6.58", "wps": "399673", "ups": "3.38", "wpb": "118369", "bsz": "256", "num_updates": "61000", "lr": "9.48485e-05", "gnorm": "0.84", "loss_scale": "128", "train_wall": "59", "gb_free": "25.8", "wall": "18264"} +[2022-07-30 15:55:15,461][train_inner][INFO] - {"epoch": 2, "update": 1.189, "loss": "2.72", "ppl": "6.59", "wps": "396103", "ups": "3.36", "wpb": "118036", "bsz": "256", "num_updates": "61200", "lr": "9.48283e-05", "gnorm": "0.842", "loss_scale": "128", "train_wall": "59", "gb_free": "21.6", "wall": "18324"} +[2022-07-30 15:56:14,754][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "2.721", "ppl": "6.59", "wps": "397807", "ups": "3.37", "wpb": "117934", "bsz": "256", "num_updates": "61400", "lr": "9.48081e-05", "gnorm": "0.843", "loss_scale": "128", "train_wall": "59", "gb_free": "21.8", "wall": "18383"} +[2022-07-30 15:56:43,405][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 15:57:14,611][train_inner][INFO] - {"epoch": 2, "update": 1.197, "loss": "2.709", "ppl": "6.54", "wps": "395378", "ups": "3.34", "wpb": "118330", "bsz": "256", "num_updates": "61600", "lr": "9.47879e-05", "gnorm": "0.841", "loss_scale": "64", "train_wall": "60", "gb_free": "21.4", "wall": "18443"} +[2022-07-30 15:58:13,870][train_inner][INFO] - {"epoch": 2, "update": 1.201, "loss": "2.716", "ppl": "6.57", "wps": "399333", "ups": "3.38", "wpb": "118319", "bsz": "256", "num_updates": "61800", "lr": "9.47677e-05", "gnorm": "0.841", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "18502"} +[2022-07-30 15:59:13,619][train_inner][INFO] - {"epoch": 2, "update": 1.205, "loss": "2.706", "ppl": "6.52", "wps": "396272", "ups": "3.35", "wpb": "118385", "bsz": "256", "num_updates": "62000", "lr": "9.47475e-05", "gnorm": "0.842", "loss_scale": "64", "train_wall": "59", "gb_free": "25.8", "wall": "18562"} +[2022-07-30 16:00:13,110][train_inner][INFO] - {"epoch": 2, "update": 1.209, "loss": "2.714", "ppl": "6.56", "wps": "399567", "ups": "3.36", "wpb": "118852", "bsz": "256", "num_updates": "62200", "lr": "9.47273e-05", "gnorm": "0.839", "loss_scale": "64", "train_wall": "59", "gb_free": "24.4", "wall": "18622"} +[2022-07-30 16:01:12,456][train_inner][INFO] - {"epoch": 2, "update": 1.212, "loss": "2.705", "ppl": "6.52", "wps": "400227", "ups": "3.37", "wpb": "118759", "bsz": "256", "num_updates": "62400", "lr": "9.47071e-05", "gnorm": "0.844", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "18681"} +[2022-07-30 16:02:11,879][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 16:02:13,397][train_inner][INFO] - {"epoch": 2, "update": 1.216, "loss": "2.704", "ppl": "6.51", "wps": "388734", "ups": "3.28", "wpb": "118447", "bsz": "256", "num_updates": "62600", "lr": "9.46869e-05", "gnorm": "0.842", "loss_scale": "32", "train_wall": "61", "gb_free": "22.5", "wall": "18742"} +[2022-07-30 16:03:13,012][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "2.705", "ppl": "6.52", "wps": "399154", "ups": "3.35", "wpb": "118977", "bsz": "256", "num_updates": "62800", "lr": "9.46667e-05", "gnorm": "0.841", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "18801"} +[2022-07-30 16:04:12,811][train_inner][INFO] - {"epoch": 2, "update": 1.224, "loss": "2.7", "ppl": "6.5", "wps": "397444", "ups": "3.34", "wpb": "118833", "bsz": "255.9", "num_updates": "63000", "lr": "9.46465e-05", "gnorm": "0.842", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "18861"} +[2022-07-30 16:05:12,479][train_inner][INFO] - {"epoch": 2, "update": 1.228, "loss": "2.705", "ppl": "6.52", "wps": "395877", "ups": "3.35", "wpb": "118106", "bsz": "256", "num_updates": "63200", "lr": "9.46263e-05", "gnorm": "0.844", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "18921"} +[2022-07-30 16:06:11,922][train_inner][INFO] - {"epoch": 2, "update": 1.232, "loss": "2.697", "ppl": "6.49", "wps": "399260", "ups": "3.36", "wpb": "118665", "bsz": "256", "num_updates": "63400", "lr": "9.46061e-05", "gnorm": "0.843", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "18980"} +[2022-07-30 16:07:11,204][train_inner][INFO] - {"epoch": 2, "update": 1.236, "loss": "2.696", "ppl": "6.48", "wps": "399619", "ups": "3.37", "wpb": "118450", "bsz": "256", "num_updates": "63600", "lr": "9.45859e-05", "gnorm": "0.844", "loss_scale": "32", "train_wall": "59", "gb_free": "27.8", "wall": "19040"} +[2022-07-30 16:08:10,841][train_inner][INFO] - {"epoch": 2, "update": 1.24, "loss": "2.701", "ppl": "6.5", "wps": "395938", "ups": "3.35", "wpb": "118062", "bsz": "256", "num_updates": "63800", "lr": "9.45657e-05", "gnorm": "0.849", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "19099"} +[2022-07-30 16:09:11,443][train_inner][INFO] - {"epoch": 2, "update": 1.244, "loss": "2.698", "ppl": "6.49", "wps": "390111", "ups": "3.3", "wpb": "118205", "bsz": "256", "num_updates": "64000", "lr": "9.45455e-05", "gnorm": "0.849", "loss_scale": "32", "train_wall": "60", "gb_free": "24", "wall": "19160"} +[2022-07-30 16:10:11,211][train_inner][INFO] - {"epoch": 2, "update": 1.247, "loss": "2.697", "ppl": "6.49", "wps": "393551", "ups": "3.35", "wpb": "117608", "bsz": "256", "num_updates": "64200", "lr": "9.45253e-05", "gnorm": "0.849", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "19220"} +[2022-07-30 16:11:10,764][train_inner][INFO] - {"epoch": 2, "update": 1.251, "loss": "2.696", "ppl": "6.48", "wps": "393525", "ups": "3.36", "wpb": "117178", "bsz": "256", "num_updates": "64400", "lr": "9.45051e-05", "gnorm": "0.85", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "19279"} +[2022-07-30 16:12:10,299][train_inner][INFO] - {"epoch": 2, "update": 1.255, "loss": "2.691", "ppl": "6.46", "wps": "396402", "ups": "3.36", "wpb": "117997", "bsz": "256", "num_updates": "64600", "lr": "9.44848e-05", "gnorm": "0.846", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "19339"} +[2022-07-30 16:13:09,738][train_inner][INFO] - {"epoch": 2, "update": 1.259, "loss": "2.698", "ppl": "6.49", "wps": "395814", "ups": "3.36", "wpb": "117634", "bsz": "256", "num_updates": "64800", "lr": "9.44646e-05", "gnorm": "0.848", "loss_scale": "64", "train_wall": "59", "gb_free": "25.5", "wall": "19398"} +[2022-07-30 16:14:09,856][train_inner][INFO] - {"epoch": 2, "update": 1.263, "loss": "2.689", "ppl": "6.45", "wps": "393999", "ups": "3.33", "wpb": "118432", "bsz": "256", "num_updates": "65000", "lr": "9.44444e-05", "gnorm": "0.846", "loss_scale": "64", "train_wall": "60", "gb_free": "23", "wall": "19458"} +[2022-07-30 16:15:09,767][train_inner][INFO] - {"epoch": 2, "update": 1.267, "loss": "2.688", "ppl": "6.45", "wps": "396611", "ups": "3.34", "wpb": "118806", "bsz": "256", "num_updates": "65200", "lr": "9.44242e-05", "gnorm": "0.846", "loss_scale": "64", "train_wall": "60", "gb_free": "21.4", "wall": "19518"} +[2022-07-30 16:16:08,733][train_inner][INFO] - {"epoch": 2, "update": 1.271, "loss": "2.684", "ppl": "6.42", "wps": "401241", "ups": "3.39", "wpb": "118296", "bsz": "256", "num_updates": "65400", "lr": "9.4404e-05", "gnorm": "0.847", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "19577"} +[2022-07-30 16:17:08,356][train_inner][INFO] - {"epoch": 2, "update": 1.275, "loss": "2.687", "ppl": "6.44", "wps": "397088", "ups": "3.35", "wpb": "118376", "bsz": "256", "num_updates": "65600", "lr": "9.43838e-05", "gnorm": "0.844", "loss_scale": "64", "train_wall": "59", "gb_free": "26.9", "wall": "19637"} +[2022-07-30 16:18:07,917][train_inner][INFO] - {"epoch": 2, "update": 1.278, "loss": "2.681", "ppl": "6.41", "wps": "397919", "ups": "3.36", "wpb": "118502", "bsz": "256", "num_updates": "65800", "lr": "9.43636e-05", "gnorm": "0.847", "loss_scale": "64", "train_wall": "59", "gb_free": "24.5", "wall": "19696"} +[2022-07-30 16:19:07,330][train_inner][INFO] - {"epoch": 2, "update": 1.282, "loss": "2.686", "ppl": "6.43", "wps": "397622", "ups": "3.37", "wpb": "118119", "bsz": "256", "num_updates": "66000", "lr": "9.43434e-05", "gnorm": "0.848", "loss_scale": "64", "train_wall": "59", "gb_free": "35.3", "wall": "19756"} +[2022-07-30 16:20:07,064][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "2.68", "ppl": "6.41", "wps": "397310", "ups": "3.35", "wpb": "118665", "bsz": "256", "num_updates": "66200", "lr": "9.43232e-05", "gnorm": "0.846", "loss_scale": "64", "train_wall": "59", "gb_free": "24", "wall": "19815"} +[2022-07-30 16:21:06,529][train_inner][INFO] - {"epoch": 2, "update": 1.29, "loss": "2.682", "ppl": "6.42", "wps": "396535", "ups": "3.36", "wpb": "117898", "bsz": "256", "num_updates": "66400", "lr": "9.4303e-05", "gnorm": "0.848", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "19875"} +[2022-07-30 16:22:06,059][train_inner][INFO] - {"epoch": 2, "update": 1.294, "loss": "2.679", "ppl": "6.4", "wps": "395751", "ups": "3.36", "wpb": "117795", "bsz": "256", "num_updates": "66600", "lr": "9.42828e-05", "gnorm": "0.85", "loss_scale": "64", "train_wall": "59", "gb_free": "24.6", "wall": "19934"} +[2022-07-30 16:23:05,557][train_inner][INFO] - {"epoch": 2, "update": 1.298, "loss": "2.68", "ppl": "6.41", "wps": "395955", "ups": "3.36", "wpb": "117790", "bsz": "256", "num_updates": "66800", "lr": "9.42626e-05", "gnorm": "0.85", "loss_scale": "128", "train_wall": "59", "gb_free": "23.3", "wall": "19994"} +[2022-07-30 16:23:31,094][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 16:24:05,425][train_inner][INFO] - {"epoch": 2, "update": 1.302, "loss": "2.672", "ppl": "6.37", "wps": "398622", "ups": "3.34", "wpb": "119323", "bsz": "256", "num_updates": "67000", "lr": "9.42424e-05", "gnorm": "0.842", "loss_scale": "64", "train_wall": "60", "gb_free": "28.7", "wall": "20054"} +[2022-07-30 16:25:05,799][train_inner][INFO] - {"epoch": 2, "update": 1.306, "loss": "2.672", "ppl": "6.37", "wps": "391903", "ups": "3.31", "wpb": "118302", "bsz": "256", "num_updates": "67200", "lr": "9.42222e-05", "gnorm": "0.849", "loss_scale": "64", "train_wall": "60", "gb_free": "21.9", "wall": "20114"} +[2022-07-30 16:26:05,390][train_inner][INFO] - {"epoch": 2, "update": 1.31, "loss": "2.667", "ppl": "6.35", "wps": "398323", "ups": "3.36", "wpb": "118683", "bsz": "256", "num_updates": "67400", "lr": "9.4202e-05", "gnorm": "0.847", "loss_scale": "64", "train_wall": "59", "gb_free": "24.9", "wall": "20174"} +[2022-07-30 16:26:46,997][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 16:27:05,240][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "2.671", "ppl": "6.37", "wps": "394848", "ups": "3.34", "wpb": "118156", "bsz": "256", "num_updates": "67600", "lr": "9.41818e-05", "gnorm": "0.85", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "20234"} +[2022-07-30 16:28:05,545][train_inner][INFO] - {"epoch": 2, "update": 1.317, "loss": "2.677", "ppl": "6.4", "wps": "392429", "ups": "3.32", "wpb": "118326", "bsz": "256", "num_updates": "67800", "lr": "9.41616e-05", "gnorm": "0.852", "loss_scale": "32", "train_wall": "60", "gb_free": "23.4", "wall": "20294"} +[2022-07-30 16:29:04,519][train_inner][INFO] - {"epoch": 2, "update": 1.321, "loss": "2.666", "ppl": "6.34", "wps": "402730", "ups": "3.39", "wpb": "118752", "bsz": "256", "num_updates": "68000", "lr": "9.41414e-05", "gnorm": "0.85", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "20353"} +[2022-07-30 16:30:04,315][train_inner][INFO] - {"epoch": 2, "update": 1.325, "loss": "2.669", "ppl": "6.36", "wps": "395862", "ups": "3.34", "wpb": "118353", "bsz": "256", "num_updates": "68200", "lr": "9.41212e-05", "gnorm": "0.85", "loss_scale": "32", "train_wall": "59", "gb_free": "26.1", "wall": "20413"} +[2022-07-30 16:31:03,650][train_inner][INFO] - {"epoch": 2, "update": 1.329, "loss": "2.668", "ppl": "6.35", "wps": "400072", "ups": "3.37", "wpb": "118692", "bsz": "256", "num_updates": "68400", "lr": "9.4101e-05", "gnorm": "0.848", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "20472"} +[2022-07-30 16:32:03,017][train_inner][INFO] - {"epoch": 2, "update": 1.333, "loss": "2.665", "ppl": "6.34", "wps": "397617", "ups": "3.37", "wpb": "118026", "bsz": "256", "num_updates": "68600", "lr": "9.40808e-05", "gnorm": "0.852", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "20531"} +[2022-07-30 16:33:02,482][train_inner][INFO] - {"epoch": 2, "update": 1.337, "loss": "2.659", "ppl": "6.32", "wps": "398691", "ups": "3.36", "wpb": "118539", "bsz": "256", "num_updates": "68800", "lr": "9.40606e-05", "gnorm": "0.848", "loss_scale": "32", "train_wall": "59", "gb_free": "26.7", "wall": "20591"} +[2022-07-30 16:34:02,344][train_inner][INFO] - {"epoch": 2, "update": 1.341, "loss": "2.66", "ppl": "6.32", "wps": "393690", "ups": "3.34", "wpb": "117834", "bsz": "256", "num_updates": "69000", "lr": "9.40404e-05", "gnorm": "0.853", "loss_scale": "32", "train_wall": "60", "gb_free": "27.2", "wall": "20651"} +[2022-07-30 16:35:02,018][train_inner][INFO] - {"epoch": 2, "update": 1.345, "loss": "2.651", "ppl": "6.28", "wps": "398323", "ups": "3.35", "wpb": "118847", "bsz": "256", "num_updates": "69200", "lr": "9.40202e-05", "gnorm": "0.848", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "20710"} +[2022-07-30 16:36:01,891][train_inner][INFO] - {"epoch": 2, "update": 1.348, "loss": "2.658", "ppl": "6.31", "wps": "397893", "ups": "3.34", "wpb": "119113", "bsz": "256", "num_updates": "69400", "lr": "9.4e-05", "gnorm": "0.851", "loss_scale": "32", "train_wall": "60", "gb_free": "25.6", "wall": "20770"} +[2022-07-30 16:37:01,761][train_inner][INFO] - {"epoch": 2, "update": 1.352, "loss": "2.665", "ppl": "6.34", "wps": "395285", "ups": "3.34", "wpb": "118328", "bsz": "256", "num_updates": "69600", "lr": "9.39798e-05", "gnorm": "0.853", "loss_scale": "64", "train_wall": "60", "gb_free": "22.8", "wall": "20830"} +[2022-07-30 16:38:02,515][train_inner][INFO] - {"epoch": 2, "update": 1.356, "loss": "2.658", "ppl": "6.31", "wps": "390836", "ups": "3.29", "wpb": "118722", "bsz": "256", "num_updates": "69800", "lr": "9.39596e-05", "gnorm": "0.855", "loss_scale": "64", "train_wall": "60", "gb_free": "21.6", "wall": "20891"} +[2022-07-30 16:39:01,660][train_inner][INFO] - {"epoch": 2, "update": 1.36, "loss": "2.656", "ppl": "6.3", "wps": "399570", "ups": "3.38", "wpb": "118162", "bsz": "256", "num_updates": "70000", "lr": "9.39394e-05", "gnorm": "0.854", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "20950"} +[2022-07-30 16:40:01,074][train_inner][INFO] - {"epoch": 2, "update": 1.364, "loss": "2.655", "ppl": "6.3", "wps": "399315", "ups": "3.37", "wpb": "118625", "bsz": "256", "num_updates": "70200", "lr": "9.39192e-05", "gnorm": "0.854", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "21010"} +[2022-07-30 16:41:00,608][train_inner][INFO] - {"epoch": 2, "update": 1.368, "loss": "2.647", "ppl": "6.26", "wps": "397299", "ups": "3.36", "wpb": "118262", "bsz": "256", "num_updates": "70400", "lr": "9.3899e-05", "gnorm": "0.852", "loss_scale": "64", "train_wall": "59", "gb_free": "26.9", "wall": "21069"} +[2022-07-30 16:42:00,107][train_inner][INFO] - {"epoch": 2, "update": 1.372, "loss": "2.662", "ppl": "6.33", "wps": "397659", "ups": "3.36", "wpb": "118302", "bsz": "256", "num_updates": "70600", "lr": "9.38788e-05", "gnorm": "0.854", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "21129"} +[2022-07-30 16:42:59,815][train_inner][INFO] - {"epoch": 2, "update": 1.376, "loss": "2.645", "ppl": "6.25", "wps": "395400", "ups": "3.35", "wpb": "118040", "bsz": "256", "num_updates": "70800", "lr": "9.38586e-05", "gnorm": "0.851", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "21188"} +[2022-07-30 16:43:59,324][train_inner][INFO] - {"epoch": 2, "update": 1.38, "loss": "2.657", "ppl": "6.31", "wps": "396384", "ups": "3.36", "wpb": "117941", "bsz": "256", "num_updates": "71000", "lr": "9.38384e-05", "gnorm": "0.856", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "21248"} +[2022-07-30 16:44:58,883][train_inner][INFO] - {"epoch": 2, "update": 1.383, "loss": "2.651", "ppl": "6.28", "wps": "395834", "ups": "3.36", "wpb": "117878", "bsz": "256", "num_updates": "71200", "lr": "9.38182e-05", "gnorm": "0.857", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "21307"} +[2022-07-30 16:45:58,371][train_inner][INFO] - {"epoch": 2, "update": 1.387, "loss": "2.653", "ppl": "6.29", "wps": "395227", "ups": "3.36", "wpb": "117555", "bsz": "256", "num_updates": "71400", "lr": "9.3798e-05", "gnorm": "0.859", "loss_scale": "64", "train_wall": "59", "gb_free": "24.8", "wall": "21367"} +[2022-07-30 16:46:58,459][train_inner][INFO] - {"epoch": 2, "update": 1.391, "loss": "2.651", "ppl": "6.28", "wps": "393335", "ups": "3.33", "wpb": "118174", "bsz": "256", "num_updates": "71600", "lr": "9.37778e-05", "gnorm": "0.854", "loss_scale": "64", "train_wall": "60", "gb_free": "22.8", "wall": "21427"} +[2022-07-30 16:47:57,874][train_inner][INFO] - {"epoch": 2, "update": 1.395, "loss": "2.643", "ppl": "6.25", "wps": "398331", "ups": "3.37", "wpb": "118333", "bsz": "256", "num_updates": "71800", "lr": "9.37576e-05", "gnorm": "0.854", "loss_scale": "128", "train_wall": "59", "gb_free": "22.2", "wall": "21486"} +[2022-07-30 16:48:25,857][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 16:48:57,679][train_inner][INFO] - {"epoch": 2, "update": 1.399, "loss": "2.651", "ppl": "6.28", "wps": "395200", "ups": "3.34", "wpb": "118173", "bsz": "256", "num_updates": "72000", "lr": "9.37374e-05", "gnorm": "0.856", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "21546"} +[2022-07-30 16:49:56,854][train_inner][INFO] - {"epoch": 2, "update": 1.403, "loss": "2.646", "ppl": "6.26", "wps": "399308", "ups": "3.38", "wpb": "118145", "bsz": "256", "num_updates": "72200", "lr": "9.37172e-05", "gnorm": "0.856", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "21605"} +[2022-07-30 16:50:56,404][train_inner][INFO] - {"epoch": 2, "update": 1.407, "loss": "2.648", "ppl": "6.27", "wps": "397138", "ups": "3.36", "wpb": "118247", "bsz": "256", "num_updates": "72400", "lr": "9.3697e-05", "gnorm": "0.855", "loss_scale": "64", "train_wall": "59", "gb_free": "31.7", "wall": "21665"} +[2022-07-30 16:51:56,044][train_inner][INFO] - {"epoch": 2, "update": 1.411, "loss": "2.641", "ppl": "6.24", "wps": "398033", "ups": "3.35", "wpb": "118692", "bsz": "256", "num_updates": "72600", "lr": "9.36768e-05", "gnorm": "0.853", "loss_scale": "64", "train_wall": "59", "gb_free": "23.8", "wall": "21724"} +[2022-07-30 16:52:55,689][train_inner][INFO] - {"epoch": 2, "update": 1.414, "loss": "2.642", "ppl": "6.24", "wps": "397068", "ups": "3.35", "wpb": "118416", "bsz": "256", "num_updates": "72800", "lr": "9.36566e-05", "gnorm": "0.855", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "21784"} +[2022-07-30 16:53:55,404][train_inner][INFO] - {"epoch": 2, "update": 1.418, "loss": "2.641", "ppl": "6.24", "wps": "395263", "ups": "3.35", "wpb": "118014", "bsz": "256", "num_updates": "73000", "lr": "9.36364e-05", "gnorm": "0.86", "loss_scale": "64", "train_wall": "59", "gb_free": "26.5", "wall": "21844"} +[2022-07-30 16:54:54,777][train_inner][INFO] - {"epoch": 2, "update": 1.422, "loss": "2.632", "ppl": "6.2", "wps": "399307", "ups": "3.37", "wpb": "118538", "bsz": "256", "num_updates": "73200", "lr": "9.36162e-05", "gnorm": "0.853", "loss_scale": "64", "train_wall": "59", "gb_free": "26.1", "wall": "21903"} +[2022-07-30 16:55:54,710][train_inner][INFO] - {"epoch": 2, "update": 1.426, "loss": "2.632", "ppl": "6.2", "wps": "394969", "ups": "3.34", "wpb": "118358", "bsz": "256", "num_updates": "73400", "lr": "9.3596e-05", "gnorm": "0.857", "loss_scale": "64", "train_wall": "60", "gb_free": "21.3", "wall": "21963"} +[2022-07-30 16:56:54,046][train_inner][INFO] - {"epoch": 2, "update": 1.43, "loss": "2.628", "ppl": "6.18", "wps": "400794", "ups": "3.37", "wpb": "118906", "bsz": "256", "num_updates": "73600", "lr": "9.35758e-05", "gnorm": "0.854", "loss_scale": "64", "train_wall": "59", "gb_free": "26.6", "wall": "22022"} +[2022-07-30 16:57:53,604][train_inner][INFO] - {"epoch": 2, "update": 1.434, "loss": "2.635", "ppl": "6.21", "wps": "395405", "ups": "3.36", "wpb": "117748", "bsz": "256", "num_updates": "73800", "lr": "9.35556e-05", "gnorm": "0.858", "loss_scale": "64", "train_wall": "59", "gb_free": "32.7", "wall": "22082"} +[2022-07-30 16:58:52,899][train_inner][INFO] - {"epoch": 2, "update": 1.438, "loss": "2.635", "ppl": "6.21", "wps": "399101", "ups": "3.37", "wpb": "118322", "bsz": "256", "num_updates": "74000", "lr": "9.35354e-05", "gnorm": "0.856", "loss_scale": "128", "train_wall": "59", "gb_free": "26.3", "wall": "22141"} +[2022-07-30 16:59:50,730][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 16:59:52,547][train_inner][INFO] - {"epoch": 2, "update": 1.442, "loss": "2.63", "ppl": "6.19", "wps": "395951", "ups": "3.35", "wpb": "118088", "bsz": "256", "num_updates": "74200", "lr": "9.35152e-05", "gnorm": "0.856", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "22201"} +[2022-07-30 17:00:51,811][train_inner][INFO] - {"epoch": 2, "update": 1.446, "loss": "2.634", "ppl": "6.21", "wps": "398341", "ups": "3.37", "wpb": "118035", "bsz": "256", "num_updates": "74400", "lr": "9.34949e-05", "gnorm": "0.859", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "22260"} +[2022-07-30 17:01:51,539][train_inner][INFO] - {"epoch": 2, "update": 1.449, "loss": "2.631", "ppl": "6.19", "wps": "395541", "ups": "3.35", "wpb": "118123", "bsz": "256", "num_updates": "74600", "lr": "9.34747e-05", "gnorm": "0.859", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "22320"} +[2022-07-30 17:02:51,272][train_inner][INFO] - {"epoch": 2, "update": 1.453, "loss": "2.627", "ppl": "6.18", "wps": "396759", "ups": "3.35", "wpb": "118498", "bsz": "256", "num_updates": "74800", "lr": "9.34545e-05", "gnorm": "0.86", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "22380"} +[2022-07-30 17:03:51,023][train_inner][INFO] - {"epoch": 2, "update": 1.457, "loss": "2.625", "ppl": "6.17", "wps": "398443", "ups": "3.35", "wpb": "119036", "bsz": "256", "num_updates": "75000", "lr": "9.34343e-05", "gnorm": "0.856", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "22439"} +[2022-07-30 17:04:50,414][train_inner][INFO] - {"epoch": 2, "update": 1.461, "loss": "2.627", "ppl": "6.18", "wps": "397811", "ups": "3.37", "wpb": "118131", "bsz": "256", "num_updates": "75200", "lr": "9.34141e-05", "gnorm": "0.858", "loss_scale": "64", "train_wall": "59", "gb_free": "25.9", "wall": "22499"} +[2022-07-30 17:05:49,801][train_inner][INFO] - {"epoch": 2, "update": 1.465, "loss": "2.627", "ppl": "6.18", "wps": "397604", "ups": "3.37", "wpb": "118062", "bsz": "255.9", "num_updates": "75400", "lr": "9.33939e-05", "gnorm": "0.861", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "22558"} +[2022-07-30 17:06:50,214][train_inner][INFO] - {"epoch": 2, "update": 1.469, "loss": "2.626", "ppl": "6.17", "wps": "392809", "ups": "3.31", "wpb": "118653", "bsz": "256", "num_updates": "75600", "lr": "9.33737e-05", "gnorm": "0.858", "loss_scale": "64", "train_wall": "60", "gb_free": "24.8", "wall": "22619"} +[2022-07-30 17:07:49,675][train_inner][INFO] - {"epoch": 2, "update": 1.473, "loss": "2.624", "ppl": "6.17", "wps": "395978", "ups": "3.36", "wpb": "117726", "bsz": "256", "num_updates": "75800", "lr": "9.33535e-05", "gnorm": "0.861", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "22678"} +[2022-07-30 17:08:49,041][train_inner][INFO] - {"epoch": 2, "update": 1.477, "loss": "2.621", "ppl": "6.15", "wps": "399705", "ups": "3.37", "wpb": "118643", "bsz": "256", "num_updates": "76000", "lr": "9.33333e-05", "gnorm": "0.857", "loss_scale": "64", "train_wall": "59", "gb_free": "23.8", "wall": "22737"} +[2022-07-30 17:09:48,612][train_inner][INFO] - {"epoch": 2, "update": 1.481, "loss": "2.615", "ppl": "6.13", "wps": "399374", "ups": "3.36", "wpb": "118947", "bsz": "256", "num_updates": "76200", "lr": "9.33131e-05", "gnorm": "0.856", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "22797"} +[2022-07-30 17:10:48,064][train_inner][INFO] - {"epoch": 2, "update": 1.484, "loss": "2.627", "ppl": "6.18", "wps": "396515", "ups": "3.36", "wpb": "117861", "bsz": "256", "num_updates": "76400", "lr": "9.32929e-05", "gnorm": "0.862", "loss_scale": "128", "train_wall": "59", "gb_free": "21.3", "wall": "22856"} +[2022-07-30 17:10:49,509][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 17:11:47,491][train_inner][INFO] - {"epoch": 2, "update": 1.488, "loss": "2.632", "ppl": "6.2", "wps": "395514", "ups": "3.37", "wpb": "117519", "bsz": "256", "num_updates": "76600", "lr": "9.32727e-05", "gnorm": "0.864", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "22916"} +[2022-07-30 17:12:46,972][train_inner][INFO] - {"epoch": 2, "update": 1.492, "loss": "2.619", "ppl": "6.14", "wps": "397936", "ups": "3.36", "wpb": "118348", "bsz": "256", "num_updates": "76800", "lr": "9.32525e-05", "gnorm": "0.861", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "22975"} +[2022-07-30 17:13:46,520][train_inner][INFO] - {"epoch": 2, "update": 1.496, "loss": "2.61", "ppl": "6.11", "wps": "398980", "ups": "3.36", "wpb": "118791", "bsz": "256", "num_updates": "77000", "lr": "9.32323e-05", "gnorm": "0.858", "loss_scale": "64", "train_wall": "59", "gb_free": "25.1", "wall": "23035"} +[2022-07-30 17:14:46,209][train_inner][INFO] - {"epoch": 2, "update": 1.5, "loss": "2.614", "ppl": "6.12", "wps": "398145", "ups": "3.35", "wpb": "118824", "bsz": "256", "num_updates": "77200", "lr": "9.32121e-05", "gnorm": "0.858", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "23095"} +[2022-07-30 17:15:45,743][train_inner][INFO] - {"epoch": 2, "update": 1.504, "loss": "2.615", "ppl": "6.13", "wps": "398900", "ups": "3.36", "wpb": "118740", "bsz": "256", "num_updates": "77400", "lr": "9.31919e-05", "gnorm": "0.862", "loss_scale": "64", "train_wall": "59", "gb_free": "28.3", "wall": "23154"} +[2022-07-30 17:16:45,343][train_inner][INFO] - {"epoch": 2, "update": 1.508, "loss": "2.608", "ppl": "6.1", "wps": "397300", "ups": "3.36", "wpb": "118394", "bsz": "256", "num_updates": "77600", "lr": "9.31717e-05", "gnorm": "0.862", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "23214"} +[2022-07-30 17:17:44,699][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "2.615", "ppl": "6.12", "wps": "396616", "ups": "3.37", "wpb": "117707", "bsz": "256", "num_updates": "77800", "lr": "9.31515e-05", "gnorm": "0.863", "loss_scale": "64", "train_wall": "59", "gb_free": "23", "wall": "23273"} +[2022-07-30 17:18:43,975][train_inner][INFO] - {"epoch": 2, "update": 1.516, "loss": "2.617", "ppl": "6.13", "wps": "398231", "ups": "3.37", "wpb": "118028", "bsz": "256", "num_updates": "78000", "lr": "9.31313e-05", "gnorm": "0.865", "loss_scale": "64", "train_wall": "59", "gb_free": "29.4", "wall": "23332"} +[2022-07-30 17:19:43,678][train_inner][INFO] - {"epoch": 2, "update": 1.519, "loss": "2.611", "ppl": "6.11", "wps": "395972", "ups": "3.35", "wpb": "118203", "bsz": "256", "num_updates": "78200", "lr": "9.31111e-05", "gnorm": "0.86", "loss_scale": "64", "train_wall": "59", "gb_free": "27.3", "wall": "23392"} +[2022-07-30 17:20:43,104][train_inner][INFO] - {"epoch": 2, "update": 1.523, "loss": "2.609", "ppl": "6.1", "wps": "397784", "ups": "3.37", "wpb": "118193", "bsz": "256", "num_updates": "78400", "lr": "9.30909e-05", "gnorm": "0.864", "loss_scale": "64", "train_wall": "59", "gb_free": "24.1", "wall": "23452"} +[2022-07-30 17:21:42,763][train_inner][INFO] - {"epoch": 2, "update": 1.527, "loss": "2.609", "ppl": "6.1", "wps": "397079", "ups": "3.35", "wpb": "118445", "bsz": "256", "num_updates": "78600", "lr": "9.30707e-05", "gnorm": "0.862", "loss_scale": "128", "train_wall": "59", "gb_free": "23.4", "wall": "23511"} +[2022-07-30 17:22:05,854][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 17:22:42,074][train_inner][INFO] - {"epoch": 2, "update": 1.531, "loss": "2.609", "ppl": "6.1", "wps": "398604", "ups": "3.37", "wpb": "118206", "bsz": "256", "num_updates": "78800", "lr": "9.30505e-05", "gnorm": "0.863", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "23571"} +[2022-07-30 17:23:41,425][train_inner][INFO] - {"epoch": 2, "update": 1.535, "loss": "2.603", "ppl": "6.07", "wps": "399069", "ups": "3.37", "wpb": "118426", "bsz": "256", "num_updates": "79000", "lr": "9.30303e-05", "gnorm": "0.864", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "23630"} +[2022-07-30 17:24:40,708][train_inner][INFO] - {"epoch": 2, "update": 1.539, "loss": "2.605", "ppl": "6.08", "wps": "400148", "ups": "3.37", "wpb": "118608", "bsz": "256", "num_updates": "79200", "lr": "9.30101e-05", "gnorm": "0.863", "loss_scale": "64", "train_wall": "59", "gb_free": "25.5", "wall": "23689"} +[2022-07-30 17:24:55,680][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 17:25:41,305][train_inner][INFO] - {"epoch": 2, "update": 1.543, "loss": "2.604", "ppl": "6.08", "wps": "390074", "ups": "3.3", "wpb": "118185", "bsz": "256", "num_updates": "79400", "lr": "9.29899e-05", "gnorm": "0.863", "loss_scale": "32", "train_wall": "60", "gb_free": "21.6", "wall": "23750"} +[2022-07-30 17:26:40,674][train_inner][INFO] - {"epoch": 2, "update": 1.547, "loss": "2.61", "ppl": "6.11", "wps": "399517", "ups": "3.37", "wpb": "118596", "bsz": "256", "num_updates": "79600", "lr": "9.29697e-05", "gnorm": "0.864", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "23809"} +[2022-07-30 17:27:40,219][train_inner][INFO] - {"epoch": 2, "update": 1.551, "loss": "2.603", "ppl": "6.07", "wps": "397548", "ups": "3.36", "wpb": "118358", "bsz": "256", "num_updates": "79800", "lr": "9.29495e-05", "gnorm": "0.863", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "23869"} +[2022-07-30 17:28:39,337][train_inner][INFO] - {"epoch": 2, "update": 1.554, "loss": "2.595", "ppl": "6.04", "wps": "400839", "ups": "3.38", "wpb": "118482", "bsz": "256", "num_updates": "80000", "lr": "9.29293e-05", "gnorm": "0.865", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "23928"} +[2022-07-30 17:29:38,670][train_inner][INFO] - {"epoch": 2, "update": 1.558, "loss": "2.604", "ppl": "6.08", "wps": "398410", "ups": "3.37", "wpb": "118194", "bsz": "256", "num_updates": "80200", "lr": "9.29091e-05", "gnorm": "0.867", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "23987"} +[2022-07-30 17:30:38,374][train_inner][INFO] - {"epoch": 2, "update": 1.562, "loss": "2.605", "ppl": "6.08", "wps": "395782", "ups": "3.35", "wpb": "118147", "bsz": "256", "num_updates": "80400", "lr": "9.28889e-05", "gnorm": "0.866", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "24047"} +[2022-07-30 17:31:38,177][train_inner][INFO] - {"epoch": 2, "update": 1.566, "loss": "2.594", "ppl": "6.04", "wps": "396458", "ups": "3.34", "wpb": "118548", "bsz": "256", "num_updates": "80600", "lr": "9.28687e-05", "gnorm": "0.862", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "24107"} +[2022-07-30 17:32:37,947][train_inner][INFO] - {"epoch": 2, "update": 1.57, "loss": "2.589", "ppl": "6.02", "wps": "397817", "ups": "3.35", "wpb": "118885", "bsz": "256", "num_updates": "80800", "lr": "9.28485e-05", "gnorm": "0.861", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "24166"} +[2022-07-30 17:33:37,441][train_inner][INFO] - {"epoch": 2, "update": 1.574, "loss": "2.604", "ppl": "6.08", "wps": "398667", "ups": "3.36", "wpb": "118590", "bsz": "256", "num_updates": "81000", "lr": "9.28283e-05", "gnorm": "0.865", "loss_scale": "32", "train_wall": "59", "gb_free": "26.2", "wall": "24226"} +[2022-07-30 17:34:36,977][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "2.592", "ppl": "6.03", "wps": "396479", "ups": "3.36", "wpb": "118024", "bsz": "256", "num_updates": "81200", "lr": "9.28081e-05", "gnorm": "0.867", "loss_scale": "32", "train_wall": "59", "gb_free": "28.9", "wall": "24285"} +[2022-07-30 17:35:36,262][train_inner][INFO] - {"epoch": 2, "update": 1.582, "loss": "2.594", "ppl": "6.04", "wps": "398038", "ups": "3.37", "wpb": "117987", "bsz": "256", "num_updates": "81400", "lr": "9.27879e-05", "gnorm": "0.869", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "24345"} +[2022-07-30 17:36:35,535][train_inner][INFO] - {"epoch": 2, "update": 1.585, "loss": "2.596", "ppl": "6.05", "wps": "398738", "ups": "3.37", "wpb": "118170", "bsz": "256", "num_updates": "81600", "lr": "9.27677e-05", "gnorm": "0.868", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "24404"} +[2022-07-30 17:37:35,044][train_inner][INFO] - {"epoch": 2, "update": 1.589, "loss": "2.598", "ppl": "6.05", "wps": "395717", "ups": "3.36", "wpb": "117744", "bsz": "256", "num_updates": "81800", "lr": "9.27475e-05", "gnorm": "0.868", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "24463"} +[2022-07-30 17:38:34,500][train_inner][INFO] - {"epoch": 2, "update": 1.593, "loss": "2.59", "ppl": "6.02", "wps": "397243", "ups": "3.36", "wpb": "118092", "bsz": "256", "num_updates": "82000", "lr": "9.27273e-05", "gnorm": "0.868", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "24523"} +[2022-07-30 17:39:33,912][train_inner][INFO] - {"epoch": 2, "update": 1.597, "loss": "2.588", "ppl": "6.01", "wps": "398781", "ups": "3.37", "wpb": "118461", "bsz": "256", "num_updates": "82200", "lr": "9.27071e-05", "gnorm": "0.864", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "24582"} +[2022-07-30 17:40:33,616][train_inner][INFO] - {"epoch": 2, "update": 1.601, "loss": "2.596", "ppl": "6.05", "wps": "394760", "ups": "3.35", "wpb": "117843", "bsz": "256", "num_updates": "82400", "lr": "9.26869e-05", "gnorm": "0.871", "loss_scale": "64", "train_wall": "59", "gb_free": "24.2", "wall": "24642"} +[2022-07-30 17:41:32,669][train_inner][INFO] - {"epoch": 2, "update": 1.605, "loss": "2.591", "ppl": "6.03", "wps": "399716", "ups": "3.39", "wpb": "118021", "bsz": "256", "num_updates": "82600", "lr": "9.26667e-05", "gnorm": "0.869", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "24701"} +[2022-07-30 17:42:32,550][train_inner][INFO] - {"epoch": 2, "update": 1.609, "loss": "2.587", "ppl": "6.01", "wps": "394948", "ups": "3.34", "wpb": "118247", "bsz": "256", "num_updates": "82800", "lr": "9.26465e-05", "gnorm": "0.868", "loss_scale": "64", "train_wall": "60", "gb_free": "22.1", "wall": "24761"} +[2022-07-30 17:43:32,276][train_inner][INFO] - {"epoch": 2, "update": 1.613, "loss": "2.585", "ppl": "6", "wps": "396964", "ups": "3.35", "wpb": "118544", "bsz": "256", "num_updates": "83000", "lr": "9.26263e-05", "gnorm": "0.866", "loss_scale": "64", "train_wall": "59", "gb_free": "25.3", "wall": "24821"} +[2022-07-30 17:44:31,782][train_inner][INFO] - {"epoch": 2, "update": 1.617, "loss": "2.582", "ppl": "5.99", "wps": "399192", "ups": "3.36", "wpb": "118771", "bsz": "256", "num_updates": "83200", "lr": "9.26061e-05", "gnorm": "0.865", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "24880"} +[2022-07-30 17:45:31,041][train_inner][INFO] - {"epoch": 2, "update": 1.62, "loss": "2.588", "ppl": "6.01", "wps": "399481", "ups": "3.38", "wpb": "118364", "bsz": "256", "num_updates": "83400", "lr": "9.25859e-05", "gnorm": "0.866", "loss_scale": "128", "train_wall": "59", "gb_free": "24.7", "wall": "24939"} +[2022-07-30 17:46:30,424][train_inner][INFO] - {"epoch": 2, "update": 1.624, "loss": "2.586", "ppl": "6", "wps": "400398", "ups": "3.37", "wpb": "118884", "bsz": "256", "num_updates": "83600", "lr": "9.25657e-05", "gnorm": "0.866", "loss_scale": "128", "train_wall": "59", "gb_free": "25.3", "wall": "24999"} +[2022-07-30 17:47:29,288][train_inner][INFO] - {"epoch": 2, "update": 1.628, "loss": "2.593", "ppl": "6.03", "wps": "399904", "ups": "3.4", "wpb": "117698", "bsz": "256", "num_updates": "83800", "lr": "9.25455e-05", "gnorm": "0.872", "loss_scale": "128", "train_wall": "59", "gb_free": "23.5", "wall": "25058"} +[2022-07-30 17:47:33,628][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 17:48:28,780][train_inner][INFO] - {"epoch": 2, "update": 1.632, "loss": "2.588", "ppl": "6.01", "wps": "394480", "ups": "3.36", "wpb": "117341", "bsz": "256", "num_updates": "84000", "lr": "9.25253e-05", "gnorm": "0.876", "loss_scale": "64", "train_wall": "59", "gb_free": "26.2", "wall": "25117"} +[2022-07-30 17:49:10,565][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 17:49:28,355][train_inner][INFO] - {"epoch": 2, "update": 1.636, "loss": "2.578", "ppl": "5.97", "wps": "399280", "ups": "3.36", "wpb": "118935", "bsz": "256", "num_updates": "84200", "lr": "9.25051e-05", "gnorm": "0.868", "loss_scale": "32", "train_wall": "59", "gb_free": "27.7", "wall": "25177"} +[2022-07-30 17:50:27,393][train_inner][INFO] - {"epoch": 2, "update": 1.64, "loss": "2.587", "ppl": "6.01", "wps": "398852", "ups": "3.39", "wpb": "117736", "bsz": "256", "num_updates": "84400", "lr": "9.24848e-05", "gnorm": "0.873", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "25236"} +[2022-07-30 17:51:26,491][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "2.578", "ppl": "5.97", "wps": "401438", "ups": "3.38", "wpb": "118619", "bsz": "256", "num_updates": "84600", "lr": "9.24646e-05", "gnorm": "0.87", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "25295"} +[2022-07-30 17:52:25,869][train_inner][INFO] - {"epoch": 2, "update": 1.648, "loss": "2.574", "ppl": "5.96", "wps": "398322", "ups": "3.37", "wpb": "118257", "bsz": "256", "num_updates": "84800", "lr": "9.24444e-05", "gnorm": "0.868", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "25354"} +[2022-07-30 17:53:25,455][train_inner][INFO] - {"epoch": 2, "update": 1.652, "loss": "2.58", "ppl": "5.98", "wps": "398470", "ups": "3.36", "wpb": "118715", "bsz": "256", "num_updates": "85000", "lr": "9.24242e-05", "gnorm": "0.868", "loss_scale": "32", "train_wall": "59", "gb_free": "23.3", "wall": "25414"} +[2022-07-30 17:54:24,945][train_inner][INFO] - {"epoch": 2, "update": 1.655, "loss": "2.582", "ppl": "5.99", "wps": "396918", "ups": "3.36", "wpb": "118063", "bsz": "256", "num_updates": "85200", "lr": "9.2404e-05", "gnorm": "0.871", "loss_scale": "32", "train_wall": "59", "gb_free": "24.5", "wall": "25473"} +[2022-07-30 17:55:23,888][train_inner][INFO] - {"epoch": 2, "update": 1.659, "loss": "2.578", "ppl": "5.97", "wps": "399821", "ups": "3.39", "wpb": "117832", "bsz": "256", "num_updates": "85400", "lr": "9.23838e-05", "gnorm": "0.873", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "25532"} +[2022-07-30 17:56:23,216][train_inner][INFO] - {"epoch": 2, "update": 1.663, "loss": "2.573", "ppl": "5.95", "wps": "397634", "ups": "3.37", "wpb": "117953", "bsz": "256", "num_updates": "85600", "lr": "9.23636e-05", "gnorm": "0.873", "loss_scale": "32", "train_wall": "59", "gb_free": "28", "wall": "25592"} +[2022-07-30 17:57:23,508][train_inner][INFO] - {"epoch": 2, "update": 1.667, "loss": "2.58", "ppl": "5.98", "wps": "389779", "ups": "3.32", "wpb": "117502", "bsz": "256", "num_updates": "85800", "lr": "9.23434e-05", "gnorm": "0.876", "loss_scale": "32", "train_wall": "60", "gb_free": "22.2", "wall": "25652"} +[2022-07-30 17:58:22,734][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "2.573", "ppl": "5.95", "wps": "398812", "ups": "3.38", "wpb": "118098", "bsz": "256", "num_updates": "86000", "lr": "9.23232e-05", "gnorm": "0.873", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "25711"} +[2022-07-30 17:59:22,538][train_inner][INFO] - {"epoch": 2, "update": 1.675, "loss": "2.573", "ppl": "5.95", "wps": "397946", "ups": "3.34", "wpb": "118994", "bsz": "256", "num_updates": "86200", "lr": "9.2303e-05", "gnorm": "0.868", "loss_scale": "64", "train_wall": "59", "gb_free": "23.5", "wall": "25771"} +[2022-07-30 18:00:21,795][train_inner][INFO] - {"epoch": 2, "update": 1.679, "loss": "2.572", "ppl": "5.95", "wps": "399763", "ups": "3.38", "wpb": "118443", "bsz": "256", "num_updates": "86400", "lr": "9.22828e-05", "gnorm": "0.872", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "25830"} +[2022-07-30 18:01:21,261][train_inner][INFO] - {"epoch": 2, "update": 1.683, "loss": "2.576", "ppl": "5.96", "wps": "396099", "ups": "3.36", "wpb": "117770", "bsz": "256", "num_updates": "86600", "lr": "9.22626e-05", "gnorm": "0.874", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "25890"} +[2022-07-30 18:02:20,522][train_inner][INFO] - {"epoch": 2, "update": 1.687, "loss": "2.569", "ppl": "5.94", "wps": "398778", "ups": "3.37", "wpb": "118161", "bsz": "256", "num_updates": "86800", "lr": "9.22424e-05", "gnorm": "0.875", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "25949"} +[2022-07-30 18:03:20,350][train_inner][INFO] - {"epoch": 2, "update": 1.69, "loss": "2.562", "ppl": "5.9", "wps": "398544", "ups": "3.34", "wpb": "119218", "bsz": "256", "num_updates": "87000", "lr": "9.22222e-05", "gnorm": "0.87", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "26009"} +[2022-07-30 18:04:20,071][train_inner][INFO] - {"epoch": 2, "update": 1.694, "loss": "2.571", "ppl": "5.94", "wps": "396055", "ups": "3.35", "wpb": "118263", "bsz": "256", "num_updates": "87200", "lr": "9.2202e-05", "gnorm": "0.871", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "26069"} +[2022-07-30 18:05:19,096][train_inner][INFO] - {"epoch": 2, "update": 1.698, "loss": "2.57", "ppl": "5.94", "wps": "400714", "ups": "3.39", "wpb": "118261", "bsz": "256", "num_updates": "87400", "lr": "9.21818e-05", "gnorm": "0.875", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "26128"} +[2022-07-30 18:06:18,677][train_inner][INFO] - {"epoch": 2, "update": 1.702, "loss": "2.567", "ppl": "5.92", "wps": "397500", "ups": "3.36", "wpb": "118415", "bsz": "256", "num_updates": "87600", "lr": "9.21616e-05", "gnorm": "0.872", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "26187"} +[2022-07-30 18:07:18,290][train_inner][INFO] - {"epoch": 2, "update": 1.706, "loss": "2.565", "ppl": "5.92", "wps": "396643", "ups": "3.35", "wpb": "118226", "bsz": "256", "num_updates": "87800", "lr": "9.21414e-05", "gnorm": "0.874", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "26247"} +[2022-07-30 18:08:17,641][train_inner][INFO] - {"epoch": 2, "update": 1.71, "loss": "2.565", "ppl": "5.92", "wps": "397505", "ups": "3.37", "wpb": "117960", "bsz": "256", "num_updates": "88000", "lr": "9.21212e-05", "gnorm": "0.876", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "26306"} +[2022-07-30 18:09:17,388][train_inner][INFO] - {"epoch": 2, "update": 1.714, "loss": "2.565", "ppl": "5.92", "wps": "395054", "ups": "3.35", "wpb": "118016", "bsz": "256", "num_updates": "88200", "lr": "9.2101e-05", "gnorm": "0.88", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "26366"} +[2022-07-30 18:10:16,630][train_inner][INFO] - {"epoch": 2, "update": 1.718, "loss": "2.561", "ppl": "5.9", "wps": "399477", "ups": "3.38", "wpb": "118328", "bsz": "256", "num_updates": "88400", "lr": "9.20808e-05", "gnorm": "0.875", "loss_scale": "128", "train_wall": "59", "gb_free": "22.5", "wall": "26425"} +[2022-07-30 18:11:16,407][train_inner][INFO] - {"epoch": 2, "update": 1.721, "loss": "2.568", "ppl": "5.93", "wps": "397789", "ups": "3.35", "wpb": "118892", "bsz": "256", "num_updates": "88600", "lr": "9.20606e-05", "gnorm": "0.873", "loss_scale": "128", "train_wall": "59", "gb_free": "23.4", "wall": "26485"} +[2022-07-30 18:11:19,423][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 18:12:16,656][train_inner][INFO] - {"epoch": 2, "update": 1.725, "loss": "2.552", "ppl": "5.87", "wps": "395303", "ups": "3.32", "wpb": "119084", "bsz": "256", "num_updates": "88800", "lr": "9.20404e-05", "gnorm": "0.87", "loss_scale": "64", "train_wall": "60", "gb_free": "25.1", "wall": "26545"} +[2022-07-30 18:13:16,123][train_inner][INFO] - {"epoch": 2, "update": 1.729, "loss": "2.569", "ppl": "5.93", "wps": "396466", "ups": "3.36", "wpb": "117881", "bsz": "256", "num_updates": "89000", "lr": "9.20202e-05", "gnorm": "0.878", "loss_scale": "64", "train_wall": "59", "gb_free": "24.6", "wall": "26605"} +[2022-07-30 18:14:15,888][train_inner][INFO] - {"epoch": 2, "update": 1.733, "loss": "2.561", "ppl": "5.9", "wps": "395091", "ups": "3.35", "wpb": "118063", "bsz": "256", "num_updates": "89200", "lr": "9.2e-05", "gnorm": "0.878", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "26664"} +[2022-07-30 18:15:15,050][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "2.571", "ppl": "5.94", "wps": "397926", "ups": "3.38", "wpb": "117709", "bsz": "256", "num_updates": "89400", "lr": "9.19798e-05", "gnorm": "0.879", "loss_scale": "64", "train_wall": "59", "gb_free": "24.8", "wall": "26723"} +[2022-07-30 18:16:14,386][train_inner][INFO] - {"epoch": 2, "update": 1.741, "loss": "2.563", "ppl": "5.91", "wps": "397363", "ups": "3.37", "wpb": "117890", "bsz": "256", "num_updates": "89600", "lr": "9.19596e-05", "gnorm": "0.879", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "26783"} +[2022-07-30 18:17:13,973][train_inner][INFO] - {"epoch": 2, "update": 1.745, "loss": "2.56", "ppl": "5.9", "wps": "394540", "ups": "3.36", "wpb": "117546", "bsz": "256", "num_updates": "89800", "lr": "9.19394e-05", "gnorm": "0.879", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "26842"} +[2022-07-30 18:18:13,478][train_inner][INFO] - {"epoch": 2, "update": 1.749, "loss": "2.556", "ppl": "5.88", "wps": "396958", "ups": "3.36", "wpb": "118104", "bsz": "256", "num_updates": "90000", "lr": "9.19192e-05", "gnorm": "0.877", "loss_scale": "64", "train_wall": "59", "gb_free": "26.7", "wall": "26902"} +[2022-07-30 18:19:13,070][train_inner][INFO] - {"epoch": 2, "update": 1.753, "loss": "2.552", "ppl": "5.87", "wps": "397389", "ups": "3.36", "wpb": "118405", "bsz": "256", "num_updates": "90200", "lr": "9.1899e-05", "gnorm": "0.876", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "26961"} +[2022-07-30 18:20:12,932][train_inner][INFO] - {"epoch": 2, "update": 1.756, "loss": "2.551", "ppl": "5.86", "wps": "395399", "ups": "3.34", "wpb": "118346", "bsz": "256", "num_updates": "90400", "lr": "9.18788e-05", "gnorm": "0.875", "loss_scale": "64", "train_wall": "60", "gb_free": "22", "wall": "27021"} +[2022-07-30 18:21:13,296][train_inner][INFO] - {"epoch": 2, "update": 1.76, "loss": "2.551", "ppl": "5.86", "wps": "392151", "ups": "3.31", "wpb": "118359", "bsz": "256", "num_updates": "90600", "lr": "9.18586e-05", "gnorm": "0.877", "loss_scale": "64", "train_wall": "60", "gb_free": "21.5", "wall": "27082"} +[2022-07-30 18:22:12,416][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "2.554", "ppl": "5.87", "wps": "398356", "ups": "3.38", "wpb": "117753", "bsz": "256", "num_updates": "90800", "lr": "9.18384e-05", "gnorm": "0.879", "loss_scale": "128", "train_wall": "59", "gb_free": "27.4", "wall": "27141"} +[2022-07-30 18:23:11,926][train_inner][INFO] - {"epoch": 2, "update": 1.768, "loss": "2.553", "ppl": "5.87", "wps": "397504", "ups": "3.36", "wpb": "118276", "bsz": "256", "num_updates": "91000", "lr": "9.18182e-05", "gnorm": "0.878", "loss_scale": "128", "train_wall": "59", "gb_free": "23.7", "wall": "27200"} +[2022-07-30 18:23:44,652][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 18:24:11,792][train_inner][INFO] - {"epoch": 2, "update": 1.772, "loss": "2.555", "ppl": "5.88", "wps": "395044", "ups": "3.34", "wpb": "118247", "bsz": "256", "num_updates": "91200", "lr": "9.1798e-05", "gnorm": "0.878", "loss_scale": "64", "train_wall": "60", "gb_free": "24.5", "wall": "27260"} +[2022-07-30 18:25:11,511][train_inner][INFO] - {"epoch": 2, "update": 1.776, "loss": "2.546", "ppl": "5.84", "wps": "397827", "ups": "3.35", "wpb": "118788", "bsz": "256", "num_updates": "91400", "lr": "9.17778e-05", "gnorm": "0.875", "loss_scale": "64", "train_wall": "59", "gb_free": "24.9", "wall": "27320"} +[2022-07-30 18:26:11,016][train_inner][INFO] - {"epoch": 2, "update": 1.78, "loss": "2.552", "ppl": "5.86", "wps": "397710", "ups": "3.36", "wpb": "118328", "bsz": "256", "num_updates": "91600", "lr": "9.17576e-05", "gnorm": "0.876", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "27379"} +[2022-07-30 18:27:10,756][train_inner][INFO] - {"epoch": 2, "update": 1.784, "loss": "2.551", "ppl": "5.86", "wps": "395944", "ups": "3.35", "wpb": "118269", "bsz": "256", "num_updates": "91800", "lr": "9.17374e-05", "gnorm": "0.878", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "27439"} +[2022-07-30 18:28:10,211][train_inner][INFO] - {"epoch": 2, "update": 1.788, "loss": "2.542", "ppl": "5.82", "wps": "398816", "ups": "3.36", "wpb": "118556", "bsz": "256", "num_updates": "92000", "lr": "9.17172e-05", "gnorm": "0.877", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "27499"} +[2022-07-30 18:29:09,695][train_inner][INFO] - {"epoch": 2, "update": 1.791, "loss": "2.545", "ppl": "5.84", "wps": "397880", "ups": "3.36", "wpb": "118338", "bsz": "256", "num_updates": "92200", "lr": "9.1697e-05", "gnorm": "0.88", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "27558"} +[2022-07-30 18:30:08,598][train_inner][INFO] - {"epoch": 2, "update": 1.795, "loss": "2.541", "ppl": "5.82", "wps": "401141", "ups": "3.4", "wpb": "118142", "bsz": "256", "num_updates": "92400", "lr": "9.16768e-05", "gnorm": "0.876", "loss_scale": "64", "train_wall": "59", "gb_free": "23", "wall": "27617"} +[2022-07-30 18:31:08,218][train_inner][INFO] - {"epoch": 2, "update": 1.799, "loss": "2.553", "ppl": "5.87", "wps": "394225", "ups": "3.35", "wpb": "117518", "bsz": "256", "num_updates": "92600", "lr": "9.16566e-05", "gnorm": "0.882", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "27677"} +[2022-07-30 18:31:34,244][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 18:32:07,759][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "2.545", "ppl": "5.84", "wps": "396846", "ups": "3.36", "wpb": "118142", "bsz": "256", "num_updates": "92800", "lr": "9.16364e-05", "gnorm": "0.878", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "27736"} +[2022-07-30 18:33:06,937][train_inner][INFO] - {"epoch": 2, "update": 1.807, "loss": "2.543", "ppl": "5.83", "wps": "401675", "ups": "3.38", "wpb": "118850", "bsz": "256", "num_updates": "93000", "lr": "9.16162e-05", "gnorm": "0.876", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "27795"} +[2022-07-30 18:34:06,229][train_inner][INFO] - {"epoch": 2, "update": 1.811, "loss": "2.539", "ppl": "5.81", "wps": "400063", "ups": "3.37", "wpb": "118602", "bsz": "256", "num_updates": "93200", "lr": "9.1596e-05", "gnorm": "0.877", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "27855"} +[2022-07-30 18:35:06,694][train_inner][INFO] - {"epoch": 2, "update": 1.815, "loss": "2.544", "ppl": "5.83", "wps": "390284", "ups": "3.31", "wpb": "117992", "bsz": "256", "num_updates": "93400", "lr": "9.15758e-05", "gnorm": "0.882", "loss_scale": "32", "train_wall": "60", "gb_free": "21.6", "wall": "27915"} +[2022-07-30 18:36:06,480][train_inner][INFO] - {"epoch": 2, "update": 1.819, "loss": "2.542", "ppl": "5.82", "wps": "395239", "ups": "3.35", "wpb": "118148", "bsz": "256", "num_updates": "93600", "lr": "9.15556e-05", "gnorm": "0.88", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "27975"} +[2022-07-30 18:37:05,606][train_inner][INFO] - {"epoch": 2, "update": 1.823, "loss": "2.541", "ppl": "5.82", "wps": "401491", "ups": "3.38", "wpb": "118691", "bsz": "256", "num_updates": "93800", "lr": "9.15354e-05", "gnorm": "0.878", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "28034"} +[2022-07-30 18:38:05,292][train_inner][INFO] - {"epoch": 2, "update": 1.826, "loss": "2.54", "ppl": "5.81", "wps": "396016", "ups": "3.35", "wpb": "118182", "bsz": "256", "num_updates": "94000", "lr": "9.15152e-05", "gnorm": "0.883", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "28094"} +[2022-07-30 18:39:05,052][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "2.539", "ppl": "5.81", "wps": "397384", "ups": "3.35", "wpb": "118737", "bsz": "256", "num_updates": "94200", "lr": "9.14949e-05", "gnorm": "0.879", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "28153"} +[2022-07-30 18:40:04,660][train_inner][INFO] - {"epoch": 2, "update": 1.834, "loss": "2.532", "ppl": "5.78", "wps": "398930", "ups": "3.36", "wpb": "118896", "bsz": "256", "num_updates": "94400", "lr": "9.14747e-05", "gnorm": "0.878", "loss_scale": "32", "train_wall": "59", "gb_free": "26.3", "wall": "28213"} +[2022-07-30 18:41:04,721][train_inner][INFO] - {"epoch": 2, "update": 1.838, "loss": "2.537", "ppl": "5.8", "wps": "395218", "ups": "3.33", "wpb": "118686", "bsz": "256", "num_updates": "94600", "lr": "9.14545e-05", "gnorm": "0.88", "loss_scale": "32", "train_wall": "60", "gb_free": "25.5", "wall": "28273"} +[2022-07-30 18:42:04,349][train_inner][INFO] - {"epoch": 2, "update": 1.842, "loss": "2.537", "ppl": "5.8", "wps": "396407", "ups": "3.35", "wpb": "118184", "bsz": "256", "num_updates": "94800", "lr": "9.14343e-05", "gnorm": "0.882", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "28333"} +[2022-07-30 18:43:03,759][train_inner][INFO] - {"epoch": 2, "update": 1.846, "loss": "2.542", "ppl": "5.82", "wps": "397957", "ups": "3.37", "wpb": "118212", "bsz": "256", "num_updates": "95000", "lr": "9.14141e-05", "gnorm": "0.883", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "28392"} +[2022-07-30 18:44:03,129][train_inner][INFO] - {"epoch": 2, "update": 1.85, "loss": "2.539", "ppl": "5.81", "wps": "398440", "ups": "3.37", "wpb": "118276", "bsz": "256", "num_updates": "95200", "lr": "9.13939e-05", "gnorm": "0.883", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "28452"} +[2022-07-30 18:45:02,811][train_inner][INFO] - {"epoch": 2, "update": 1.854, "loss": "2.539", "ppl": "5.81", "wps": "395519", "ups": "3.35", "wpb": "118025", "bsz": "256", "num_updates": "95400", "lr": "9.13737e-05", "gnorm": "0.883", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "28511"} +[2022-07-30 18:45:27,299][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 18:46:02,613][train_inner][INFO] - {"epoch": 2, "update": 1.858, "loss": "2.531", "ppl": "5.78", "wps": "396425", "ups": "3.34", "wpb": "118534", "bsz": "256", "num_updates": "95600", "lr": "9.13535e-05", "gnorm": "0.881", "loss_scale": "32", "train_wall": "59", "gb_free": "26", "wall": "28571"} +[2022-07-30 18:47:02,923][train_inner][INFO] - {"epoch": 2, "update": 1.861, "loss": "2.526", "ppl": "5.76", "wps": "391780", "ups": "3.32", "wpb": "118141", "bsz": "256", "num_updates": "95800", "lr": "9.13333e-05", "gnorm": "0.882", "loss_scale": "32", "train_wall": "60", "gb_free": "23.5", "wall": "28631"} +[2022-07-30 18:48:02,160][train_inner][INFO] - {"epoch": 2, "update": 1.865, "loss": "2.53", "ppl": "5.77", "wps": "398949", "ups": "3.38", "wpb": "118162", "bsz": "256", "num_updates": "96000", "lr": "9.13131e-05", "gnorm": "0.883", "loss_scale": "32", "train_wall": "59", "gb_free": "25.1", "wall": "28691"} +[2022-07-30 18:49:01,405][train_inner][INFO] - {"epoch": 2, "update": 1.869, "loss": "2.536", "ppl": "5.8", "wps": "398800", "ups": "3.38", "wpb": "118134", "bsz": "256", "num_updates": "96200", "lr": "9.12929e-05", "gnorm": "0.886", "loss_scale": "32", "train_wall": "59", "gb_free": "29", "wall": "28750"} +[2022-07-30 18:50:00,374][train_inner][INFO] - {"epoch": 2, "update": 1.873, "loss": "2.535", "ppl": "5.79", "wps": "399218", "ups": "3.39", "wpb": "117706", "bsz": "256", "num_updates": "96400", "lr": "9.12727e-05", "gnorm": "0.886", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "28809"} +[2022-07-30 18:50:59,999][train_inner][INFO] - {"epoch": 2, "update": 1.877, "loss": "2.533", "ppl": "5.79", "wps": "396642", "ups": "3.35", "wpb": "118247", "bsz": "256", "num_updates": "96600", "lr": "9.12525e-05", "gnorm": "0.883", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "28868"} +[2022-07-30 18:51:59,747][train_inner][INFO] - {"epoch": 2, "update": 1.881, "loss": "2.531", "ppl": "5.78", "wps": "395889", "ups": "3.35", "wpb": "118269", "bsz": "256", "num_updates": "96800", "lr": "9.12323e-05", "gnorm": "0.883", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "28928"} +[2022-07-30 18:52:59,539][train_inner][INFO] - {"epoch": 2, "update": 1.885, "loss": "2.534", "ppl": "5.79", "wps": "395231", "ups": "3.34", "wpb": "118156", "bsz": "256", "num_updates": "97000", "lr": "9.12121e-05", "gnorm": "0.885", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "28988"} +[2022-07-30 18:53:58,571][train_inner][INFO] - {"epoch": 2, "update": 1.889, "loss": "2.538", "ppl": "5.81", "wps": "399161", "ups": "3.39", "wpb": "117816", "bsz": "256", "num_updates": "97200", "lr": "9.11919e-05", "gnorm": "0.886", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "29047"} +[2022-07-30 18:54:57,851][train_inner][INFO] - {"epoch": 2, "update": 1.892, "loss": "2.527", "ppl": "5.77", "wps": "398513", "ups": "3.37", "wpb": "118119", "bsz": "256", "num_updates": "97400", "lr": "9.11717e-05", "gnorm": "0.885", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "29106"} +[2022-07-30 18:55:57,125][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "2.529", "ppl": "5.77", "wps": "400244", "ups": "3.37", "wpb": "118620", "bsz": "256", "num_updates": "97600", "lr": "9.11515e-05", "gnorm": "0.883", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "29166"} +[2022-07-30 18:56:56,662][train_inner][INFO] - {"epoch": 2, "update": 1.9, "loss": "2.521", "ppl": "5.74", "wps": "397119", "ups": "3.36", "wpb": "118214", "bsz": "256", "num_updates": "97800", "lr": "9.11313e-05", "gnorm": "0.886", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "29225"} +[2022-07-30 18:57:56,017][train_inner][INFO] - {"epoch": 2, "update": 1.904, "loss": "2.532", "ppl": "5.78", "wps": "398165", "ups": "3.37", "wpb": "118166", "bsz": "256", "num_updates": "98000", "lr": "9.11111e-05", "gnorm": "0.886", "loss_scale": "64", "train_wall": "59", "gb_free": "25.7", "wall": "29284"} +[2022-07-30 18:58:55,819][train_inner][INFO] - {"epoch": 2, "update": 1.908, "loss": "2.522", "ppl": "5.75", "wps": "398248", "ups": "3.34", "wpb": "119078", "bsz": "256", "num_updates": "98200", "lr": "9.10909e-05", "gnorm": "0.882", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "29344"} +[2022-07-30 18:59:55,100][train_inner][INFO] - {"epoch": 2, "update": 1.912, "loss": "2.523", "ppl": "5.75", "wps": "398753", "ups": "3.37", "wpb": "118191", "bsz": "256", "num_updates": "98400", "lr": "9.10707e-05", "gnorm": "0.885", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "29404"} +[2022-07-30 19:00:54,607][train_inner][INFO] - {"epoch": 2, "update": 1.916, "loss": "2.529", "ppl": "5.77", "wps": "396941", "ups": "3.36", "wpb": "118103", "bsz": "256", "num_updates": "98600", "lr": "9.10505e-05", "gnorm": "0.886", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "29463"} +[2022-07-30 19:01:53,907][train_inner][INFO] - {"epoch": 2, "update": 1.92, "loss": "2.521", "ppl": "5.74", "wps": "399153", "ups": "3.37", "wpb": "118348", "bsz": "256", "num_updates": "98800", "lr": "9.10303e-05", "gnorm": "0.887", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "29522"} +[2022-07-30 19:02:53,361][train_inner][INFO] - {"epoch": 2, "update": 1.924, "loss": "2.526", "ppl": "5.76", "wps": "396769", "ups": "3.36", "wpb": "117946", "bsz": "256", "num_updates": "99000", "lr": "9.10101e-05", "gnorm": "0.887", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "29582"} +[2022-07-30 19:02:57,452][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 19:03:52,946][train_inner][INFO] - {"epoch": 2, "update": 1.927, "loss": "2.525", "ppl": "5.76", "wps": "395719", "ups": "3.36", "wpb": "117895", "bsz": "256", "num_updates": "99200", "lr": "9.09899e-05", "gnorm": "0.888", "loss_scale": "32", "train_wall": "59", "gb_free": "25.4", "wall": "29641"} +[2022-07-30 19:04:52,609][train_inner][INFO] - {"epoch": 2, "update": 1.931, "loss": "2.516", "ppl": "5.72", "wps": "396843", "ups": "3.35", "wpb": "118384", "bsz": "256", "num_updates": "99400", "lr": "9.09697e-05", "gnorm": "0.883", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "29701"} +[2022-07-30 19:05:51,882][train_inner][INFO] - {"epoch": 2, "update": 1.935, "loss": "2.525", "ppl": "5.76", "wps": "397920", "ups": "3.37", "wpb": "117929", "bsz": "256", "num_updates": "99600", "lr": "9.09495e-05", "gnorm": "0.892", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "29760"} +[2022-07-30 19:06:51,236][train_inner][INFO] - {"epoch": 2, "update": 1.939, "loss": "2.521", "ppl": "5.74", "wps": "400007", "ups": "3.37", "wpb": "118708", "bsz": "256", "num_updates": "99800", "lr": "9.09293e-05", "gnorm": "0.887", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "29820"} +[2022-07-30 19:07:50,667][train_inner][INFO] - {"epoch": 2, "update": 1.943, "loss": "2.516", "ppl": "5.72", "wps": "396843", "ups": "3.37", "wpb": "117924", "bsz": "256", "num_updates": "100000", "lr": "9.09091e-05", "gnorm": "0.887", "loss_scale": "32", "train_wall": "59", "gb_free": "23.8", "wall": "29879"} +[2022-07-30 19:07:50,668][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-30 19:08:13,367][valid][INFO] - {"epoch": 2, "valid_loss": "2.402", "valid_ppl": "5.29", "valid_wps": "1.61697e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "100000", "valid_best_loss": "2.402"} +[2022-07-30 19:08:13,370][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 100000 updates +[2022-07-30 19:08:13,371][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_2_100000.pt +[2022-07-30 19:08:20,830][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_2_100000.pt +[2022-07-30 19:08:41,421][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_2_100000.pt (epoch 2 @ 100000 updates, score 2.402) (writing took 28.051131883636117 seconds) +[2022-07-30 19:09:40,615][train_inner][INFO] - {"epoch": 2, "update": 1.947, "loss": "2.515", "ppl": "5.71", "wps": "214577", "ups": "1.82", "wpb": "117961", "bsz": "256", "num_updates": "100200", "lr": "9.08889e-05", "gnorm": "0.887", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "29989"} +[2022-07-30 19:10:40,455][train_inner][INFO] - {"epoch": 2, "update": 1.951, "loss": "2.519", "ppl": "5.73", "wps": "396007", "ups": "3.34", "wpb": "118484", "bsz": "256", "num_updates": "100400", "lr": "9.08687e-05", "gnorm": "0.884", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "30049"} +[2022-07-30 19:11:39,386][train_inner][INFO] - {"epoch": 2, "update": 1.955, "loss": "2.519", "ppl": "5.73", "wps": "400168", "ups": "3.39", "wpb": "117910", "bsz": "256", "num_updates": "100600", "lr": "9.08485e-05", "gnorm": "0.888", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "30108"} +[2022-07-30 19:12:39,852][train_inner][INFO] - {"epoch": 2, "update": 1.959, "loss": "2.518", "ppl": "5.73", "wps": "391387", "ups": "3.31", "wpb": "118328", "bsz": "256", "num_updates": "100800", "lr": "9.08283e-05", "gnorm": "0.888", "loss_scale": "32", "train_wall": "60", "gb_free": "23.5", "wall": "30168"} +[2022-07-30 19:13:39,364][train_inner][INFO] - {"epoch": 2, "update": 1.962, "loss": "2.513", "ppl": "5.71", "wps": "398129", "ups": "3.36", "wpb": "118466", "bsz": "256", "num_updates": "101000", "lr": "9.08081e-05", "gnorm": "0.885", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "30228"} +[2022-07-30 19:14:38,569][train_inner][INFO] - {"epoch": 2, "update": 1.966, "loss": "2.508", "ppl": "5.69", "wps": "400438", "ups": "3.38", "wpb": "118539", "bsz": "256", "num_updates": "101200", "lr": "9.07879e-05", "gnorm": "0.885", "loss_scale": "64", "train_wall": "59", "gb_free": "24.4", "wall": "30287"} +[2022-07-30 19:15:38,036][train_inner][INFO] - {"epoch": 2, "update": 1.97, "loss": "2.52", "ppl": "5.74", "wps": "397564", "ups": "3.36", "wpb": "118208", "bsz": "256", "num_updates": "101400", "lr": "9.07677e-05", "gnorm": "0.889", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "30346"} +[2022-07-30 19:16:37,751][train_inner][INFO] - {"epoch": 2, "update": 1.974, "loss": "2.512", "ppl": "5.7", "wps": "396690", "ups": "3.35", "wpb": "118442", "bsz": "256", "num_updates": "101600", "lr": "9.07475e-05", "gnorm": "0.885", "loss_scale": "64", "train_wall": "59", "gb_free": "26.8", "wall": "30406"} +[2022-07-30 19:17:36,949][train_inner][INFO] - {"epoch": 2, "update": 1.978, "loss": "2.509", "ppl": "5.69", "wps": "402916", "ups": "3.38", "wpb": "119258", "bsz": "256", "num_updates": "101800", "lr": "9.07273e-05", "gnorm": "0.884", "loss_scale": "64", "train_wall": "59", "gb_free": "24.9", "wall": "30465"} +[2022-07-30 19:18:36,616][train_inner][INFO] - {"epoch": 2, "update": 1.982, "loss": "2.514", "ppl": "5.71", "wps": "397534", "ups": "3.35", "wpb": "118597", "bsz": "256", "num_updates": "102000", "lr": "9.07071e-05", "gnorm": "0.887", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "30525"} +[2022-07-30 19:18:38,997][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 19:19:36,192][train_inner][INFO] - {"epoch": 2, "update": 1.986, "loss": "2.509", "ppl": "5.69", "wps": "396956", "ups": "3.36", "wpb": "118245", "bsz": "256", "num_updates": "102200", "lr": "9.06869e-05", "gnorm": "0.889", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "30585"} +[2022-07-30 19:20:35,382][train_inner][INFO] - {"epoch": 2, "update": 1.99, "loss": "2.511", "ppl": "5.7", "wps": "399131", "ups": "3.38", "wpb": "118122", "bsz": "256", "num_updates": "102400", "lr": "9.06667e-05", "gnorm": "0.89", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "30644"} +[2022-07-30 19:21:35,081][train_inner][INFO] - {"epoch": 2, "update": 1.993, "loss": "2.519", "ppl": "5.73", "wps": "395638", "ups": "3.35", "wpb": "118094", "bsz": "256", "num_updates": "102600", "lr": "9.06465e-05", "gnorm": "0.891", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "30704"} +[2022-07-30 19:22:34,512][train_inner][INFO] - {"epoch": 2, "update": 1.997, "loss": "2.509", "ppl": "5.69", "wps": "398407", "ups": "3.37", "wpb": "118389", "bsz": "256", "num_updates": "102800", "lr": "9.06263e-05", "gnorm": "0.891", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "30763"} +[2022-07-30 19:23:14,668][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below) +[2022-07-30 19:23:14,669][train][INFO] - {"epoch": 2, "train_loss": "2.628", "train_ppl": "6.18", "train_wps": "395615", "train_ups": "3.34", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "102935", "train_lr": "9.06126e-05", "train_gnorm": "0.861", "train_loss_scale": "32", "train_train_wall": "15241", "train_gb_free": "25.9", "train_wall": "30803"} +[2022-07-30 19:23:14,759][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-30 19:23:14,762][fairseq.trainer][INFO] - begin training epoch 3 +[2022-07-30 19:23:14,762][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-30 19:23:45,234][train_inner][INFO] - {"epoch": 3, "update": 2.001, "loss": "2.512", "ppl": "5.7", "wps": "332442", "ups": "2.83", "wpb": "117553", "bsz": "255.4", "num_updates": "103000", "lr": "9.06061e-05", "gnorm": "0.897", "loss_scale": "32", "train_wall": "61", "gb_free": "22.6", "wall": "30834"} +[2022-07-30 19:24:44,612][train_inner][INFO] - {"epoch": 3, "update": 2.005, "loss": "2.51", "ppl": "5.69", "wps": "398322", "ups": "3.37", "wpb": "118258", "bsz": "256", "num_updates": "103200", "lr": "9.05859e-05", "gnorm": "0.894", "loss_scale": "32", "train_wall": "59", "gb_free": "27.6", "wall": "30893"} +[2022-07-30 19:25:43,864][train_inner][INFO] - {"epoch": 3, "update": 2.009, "loss": "2.509", "ppl": "5.69", "wps": "397777", "ups": "3.38", "wpb": "117844", "bsz": "256", "num_updates": "103400", "lr": "9.05657e-05", "gnorm": "0.89", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "30952"} +[2022-07-30 19:26:43,501][train_inner][INFO] - {"epoch": 3, "update": 2.013, "loss": "2.5", "ppl": "5.66", "wps": "396405", "ups": "3.35", "wpb": "118201", "bsz": "256", "num_updates": "103600", "lr": "9.05455e-05", "gnorm": "0.89", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "31012"} +[2022-07-30 19:27:43,145][train_inner][INFO] - {"epoch": 3, "update": 2.017, "loss": "2.506", "ppl": "5.68", "wps": "397928", "ups": "3.35", "wpb": "118669", "bsz": "256", "num_updates": "103800", "lr": "9.05253e-05", "gnorm": "0.892", "loss_scale": "32", "train_wall": "59", "gb_free": "30.3", "wall": "31072"} +[2022-07-30 19:28:42,251][train_inner][INFO] - {"epoch": 3, "update": 2.021, "loss": "2.502", "ppl": "5.66", "wps": "398779", "ups": "3.38", "wpb": "117850", "bsz": "256", "num_updates": "104000", "lr": "9.05051e-05", "gnorm": "0.893", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "31131"} +[2022-07-30 19:29:43,082][train_inner][INFO] - {"epoch": 3, "update": 2.025, "loss": "2.505", "ppl": "5.68", "wps": "388443", "ups": "3.29", "wpb": "118147", "bsz": "256", "num_updates": "104200", "lr": "9.04848e-05", "gnorm": "0.892", "loss_scale": "64", "train_wall": "60", "gb_free": "22.4", "wall": "31192"} +[2022-07-30 19:30:42,508][train_inner][INFO] - {"epoch": 3, "update": 2.028, "loss": "2.506", "ppl": "5.68", "wps": "397578", "ups": "3.37", "wpb": "118131", "bsz": "256", "num_updates": "104400", "lr": "9.04646e-05", "gnorm": "0.892", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "31251"} +[2022-07-30 19:30:46,756][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 19:31:42,450][train_inner][INFO] - {"epoch": 3, "update": 2.032, "loss": "2.504", "ppl": "5.67", "wps": "395696", "ups": "3.34", "wpb": "118592", "bsz": "256", "num_updates": "104600", "lr": "9.04444e-05", "gnorm": "0.897", "loss_scale": "32", "train_wall": "60", "gb_free": "24", "wall": "31311"} +[2022-07-30 19:32:41,923][train_inner][INFO] - {"epoch": 3, "update": 2.036, "loss": "2.498", "ppl": "5.65", "wps": "397414", "ups": "3.36", "wpb": "118176", "bsz": "256", "num_updates": "104800", "lr": "9.04242e-05", "gnorm": "0.892", "loss_scale": "32", "train_wall": "59", "gb_free": "26.4", "wall": "31370"} +[2022-07-30 19:33:41,226][train_inner][INFO] - {"epoch": 3, "update": 2.04, "loss": "2.495", "ppl": "5.64", "wps": "398889", "ups": "3.37", "wpb": "118276", "bsz": "256", "num_updates": "105000", "lr": "9.0404e-05", "gnorm": "0.894", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "31430"} +[2022-07-30 19:34:40,746][train_inner][INFO] - {"epoch": 3, "update": 2.044, "loss": "2.494", "ppl": "5.63", "wps": "398196", "ups": "3.36", "wpb": "118502", "bsz": "256", "num_updates": "105200", "lr": "9.03838e-05", "gnorm": "0.889", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "31489"} +[2022-07-30 19:35:40,182][train_inner][INFO] - {"epoch": 3, "update": 2.048, "loss": "2.495", "ppl": "5.64", "wps": "398460", "ups": "3.36", "wpb": "118413", "bsz": "256", "num_updates": "105400", "lr": "9.03636e-05", "gnorm": "0.891", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "31549"} +[2022-07-30 19:36:39,378][train_inner][INFO] - {"epoch": 3, "update": 2.052, "loss": "2.494", "ppl": "5.63", "wps": "401407", "ups": "3.38", "wpb": "118809", "bsz": "256", "num_updates": "105600", "lr": "9.03434e-05", "gnorm": "0.889", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "31608"} +[2022-07-30 19:37:38,760][train_inner][INFO] - {"epoch": 3, "update": 2.056, "loss": "2.494", "ppl": "5.63", "wps": "397024", "ups": "3.37", "wpb": "117878", "bsz": "256", "num_updates": "105800", "lr": "9.03232e-05", "gnorm": "0.894", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "31667"} +[2022-07-30 19:38:38,340][train_inner][INFO] - {"epoch": 3, "update": 2.06, "loss": "2.495", "ppl": "5.64", "wps": "397499", "ups": "3.36", "wpb": "118415", "bsz": "256", "num_updates": "106000", "lr": "9.0303e-05", "gnorm": "0.894", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "31727"} +[2022-07-30 19:39:37,874][train_inner][INFO] - {"epoch": 3, "update": 2.063, "loss": "2.495", "ppl": "5.64", "wps": "398872", "ups": "3.36", "wpb": "118731", "bsz": "256", "num_updates": "106200", "lr": "9.02828e-05", "gnorm": "0.891", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "31786"} +[2022-07-30 19:39:56,883][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-30 19:40:38,783][train_inner][INFO] - {"epoch": 3, "update": 2.067, "loss": "2.497", "ppl": "5.65", "wps": "387403", "ups": "3.28", "wpb": "117980", "bsz": "256", "num_updates": "106400", "lr": "9.02626e-05", "gnorm": "0.896", "loss_scale": "16", "train_wall": "61", "gb_free": "24", "wall": "31847"} +[2022-07-30 19:41:38,497][train_inner][INFO] - {"epoch": 3, "update": 2.071, "loss": "2.498", "ppl": "5.65", "wps": "394910", "ups": "3.35", "wpb": "117908", "bsz": "256", "num_updates": "106600", "lr": "9.02424e-05", "gnorm": "0.897", "loss_scale": "16", "train_wall": "59", "gb_free": "25", "wall": "31907"} +[2022-07-30 19:42:37,892][train_inner][INFO] - {"epoch": 3, "update": 2.075, "loss": "2.488", "ppl": "5.61", "wps": "399493", "ups": "3.37", "wpb": "118638", "bsz": "256", "num_updates": "106800", "lr": "9.02222e-05", "gnorm": "0.893", "loss_scale": "16", "train_wall": "59", "gb_free": "24.2", "wall": "31966"} +[2022-07-30 19:43:37,400][train_inner][INFO] - {"epoch": 3, "update": 2.079, "loss": "2.488", "ppl": "5.61", "wps": "396416", "ups": "3.36", "wpb": "117950", "bsz": "256", "num_updates": "107000", "lr": "9.0202e-05", "gnorm": "0.898", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "32026"} +[2022-07-30 19:44:36,949][train_inner][INFO] - {"epoch": 3, "update": 2.083, "loss": "2.494", "ppl": "5.64", "wps": "399452", "ups": "3.36", "wpb": "118933", "bsz": "256", "num_updates": "107200", "lr": "9.01818e-05", "gnorm": "0.893", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "32085"} +[2022-07-30 19:45:36,426][train_inner][INFO] - {"epoch": 3, "update": 2.087, "loss": "2.486", "ppl": "5.6", "wps": "396836", "ups": "3.36", "wpb": "118012", "bsz": "256", "num_updates": "107400", "lr": "9.01616e-05", "gnorm": "0.893", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "32145"} +[2022-07-30 19:46:35,658][train_inner][INFO] - {"epoch": 3, "update": 2.091, "loss": "2.497", "ppl": "5.64", "wps": "397089", "ups": "3.38", "wpb": "117602", "bsz": "256", "num_updates": "107600", "lr": "9.01414e-05", "gnorm": "0.9", "loss_scale": "16", "train_wall": "59", "gb_free": "25.6", "wall": "32204"} +[2022-07-30 19:47:36,473][train_inner][INFO] - {"epoch": 3, "update": 2.095, "loss": "2.493", "ppl": "5.63", "wps": "387683", "ups": "3.29", "wpb": "117883", "bsz": "256", "num_updates": "107800", "lr": "9.01212e-05", "gnorm": "0.898", "loss_scale": "16", "train_wall": "60", "gb_free": "21.7", "wall": "32265"} +[2022-07-30 19:48:36,074][train_inner][INFO] - {"epoch": 3, "update": 2.098, "loss": "2.485", "ppl": "5.6", "wps": "397353", "ups": "3.36", "wpb": "118414", "bsz": "256", "num_updates": "108000", "lr": "9.0101e-05", "gnorm": "0.895", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "32325"} +[2022-07-30 19:49:35,656][train_inner][INFO] - {"epoch": 3, "update": 2.102, "loss": "2.487", "ppl": "5.61", "wps": "395603", "ups": "3.36", "wpb": "117853", "bsz": "256", "num_updates": "108200", "lr": "9.00808e-05", "gnorm": "0.898", "loss_scale": "16", "train_wall": "59", "gb_free": "24.1", "wall": "32384"} +[2022-07-30 19:50:34,949][train_inner][INFO] - {"epoch": 3, "update": 2.106, "loss": "2.486", "ppl": "5.6", "wps": "399894", "ups": "3.37", "wpb": "118554", "bsz": "256", "num_updates": "108400", "lr": "9.00606e-05", "gnorm": "0.893", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "32443"} +[2022-07-30 19:51:34,223][train_inner][INFO] - {"epoch": 3, "update": 2.11, "loss": "2.485", "ppl": "5.6", "wps": "399387", "ups": "3.37", "wpb": "118365", "bsz": "256", "num_updates": "108600", "lr": "9.00404e-05", "gnorm": "0.895", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "32503"} +[2022-07-30 19:52:34,359][train_inner][INFO] - {"epoch": 3, "update": 2.114, "loss": "2.483", "ppl": "5.59", "wps": "395654", "ups": "3.33", "wpb": "118964", "bsz": "256", "num_updates": "108800", "lr": "9.00202e-05", "gnorm": "0.894", "loss_scale": "32", "train_wall": "60", "gb_free": "25.4", "wall": "32563"} +[2022-07-30 19:53:33,764][train_inner][INFO] - {"epoch": 3, "update": 2.118, "loss": "2.48", "ppl": "5.58", "wps": "397263", "ups": "3.37", "wpb": "117996", "bsz": "256", "num_updates": "109000", "lr": "9e-05", "gnorm": "0.897", "loss_scale": "32", "train_wall": "59", "gb_free": "25.3", "wall": "32622"} +[2022-07-30 19:54:32,911][train_inner][INFO] - {"epoch": 3, "update": 2.122, "loss": "2.491", "ppl": "5.62", "wps": "400162", "ups": "3.38", "wpb": "118342", "bsz": "256", "num_updates": "109200", "lr": "8.99798e-05", "gnorm": "0.896", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "32681"} +[2022-07-30 19:55:31,979][train_inner][INFO] - {"epoch": 3, "update": 2.126, "loss": "2.489", "ppl": "5.61", "wps": "399946", "ups": "3.39", "wpb": "118118", "bsz": "256", "num_updates": "109400", "lr": "8.99596e-05", "gnorm": "0.898", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "32740"} +[2022-07-30 19:56:31,607][train_inner][INFO] - {"epoch": 3, "update": 2.129, "loss": "2.484", "ppl": "5.59", "wps": "399007", "ups": "3.35", "wpb": "118958", "bsz": "256", "num_updates": "109600", "lr": "8.99394e-05", "gnorm": "0.895", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "32800"} +[2022-07-30 19:57:31,650][train_inner][INFO] - {"epoch": 3, "update": 2.133, "loss": "2.489", "ppl": "5.61", "wps": "392586", "ups": "3.33", "wpb": "117860", "bsz": "256", "num_updates": "109800", "lr": "8.99192e-05", "gnorm": "0.897", "loss_scale": "32", "train_wall": "60", "gb_free": "29.6", "wall": "32860"} +[2022-07-30 19:58:31,139][train_inner][INFO] - {"epoch": 3, "update": 2.137, "loss": "2.477", "ppl": "5.57", "wps": "397498", "ups": "3.36", "wpb": "118232", "bsz": "256", "num_updates": "110000", "lr": "8.9899e-05", "gnorm": "0.898", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "32920"} +[2022-07-30 19:59:30,632][train_inner][INFO] - {"epoch": 3, "update": 2.141, "loss": "2.478", "ppl": "5.57", "wps": "398555", "ups": "3.36", "wpb": "118556", "bsz": "256", "num_updates": "110200", "lr": "8.98788e-05", "gnorm": "0.893", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "32979"} +[2022-07-30 20:00:30,073][train_inner][INFO] - {"epoch": 3, "update": 2.145, "loss": "2.473", "ppl": "5.55", "wps": "398090", "ups": "3.36", "wpb": "118314", "bsz": "256", "num_updates": "110400", "lr": "8.98586e-05", "gnorm": "0.897", "loss_scale": "64", "train_wall": "59", "gb_free": "24.4", "wall": "33039"} +[2022-07-30 20:01:30,052][train_inner][INFO] - {"epoch": 3, "update": 2.149, "loss": "2.477", "ppl": "5.57", "wps": "394944", "ups": "3.33", "wpb": "118440", "bsz": "256", "num_updates": "110600", "lr": "8.98384e-05", "gnorm": "0.898", "loss_scale": "64", "train_wall": "60", "gb_free": "21.3", "wall": "33098"} +[2022-07-30 20:02:29,634][train_inner][INFO] - {"epoch": 3, "update": 2.153, "loss": "2.477", "ppl": "5.57", "wps": "397526", "ups": "3.36", "wpb": "118426", "bsz": "256", "num_updates": "110800", "lr": "8.98182e-05", "gnorm": "0.898", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "33158"} +[2022-07-30 20:03:29,265][train_inner][INFO] - {"epoch": 3, "update": 2.157, "loss": "2.475", "ppl": "5.56", "wps": "397707", "ups": "3.35", "wpb": "118578", "bsz": "256", "num_updates": "111000", "lr": "8.9798e-05", "gnorm": "0.895", "loss_scale": "64", "train_wall": "59", "gb_free": "23.3", "wall": "33218"} +[2022-07-30 20:04:28,916][train_inner][INFO] - {"epoch": 3, "update": 2.161, "loss": "2.48", "ppl": "5.58", "wps": "395283", "ups": "3.35", "wpb": "117894", "bsz": "256", "num_updates": "111200", "lr": "8.97778e-05", "gnorm": "0.9", "loss_scale": "64", "train_wall": "59", "gb_free": "30.4", "wall": "33277"} +[2022-07-30 20:05:28,622][train_inner][INFO] - {"epoch": 3, "update": 2.164, "loss": "2.48", "ppl": "5.58", "wps": "396573", "ups": "3.35", "wpb": "118388", "bsz": "256", "num_updates": "111400", "lr": "8.97576e-05", "gnorm": "0.898", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "33337"} +[2022-07-30 20:06:28,007][train_inner][INFO] - {"epoch": 3, "update": 2.168, "loss": "2.487", "ppl": "5.61", "wps": "397416", "ups": "3.37", "wpb": "118002", "bsz": "256", "num_updates": "111600", "lr": "8.97374e-05", "gnorm": "0.902", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "33396"} +[2022-07-30 20:07:27,190][train_inner][INFO] - {"epoch": 3, "update": 2.172, "loss": "2.476", "ppl": "5.56", "wps": "400169", "ups": "3.38", "wpb": "118415", "bsz": "256", "num_updates": "111800", "lr": "8.97172e-05", "gnorm": "0.898", "loss_scale": "64", "train_wall": "59", "gb_free": "27.1", "wall": "33456"} +[2022-07-30 20:08:26,800][train_inner][INFO] - {"epoch": 3, "update": 2.176, "loss": "2.478", "ppl": "5.57", "wps": "395314", "ups": "3.36", "wpb": "117822", "bsz": "256", "num_updates": "112000", "lr": "8.9697e-05", "gnorm": "0.902", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "33515"} +[2022-07-30 20:09:26,041][train_inner][INFO] - {"epoch": 3, "update": 2.18, "loss": "2.478", "ppl": "5.57", "wps": "399169", "ups": "3.38", "wpb": "118235", "bsz": "256", "num_updates": "112200", "lr": "8.96768e-05", "gnorm": "0.898", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "33574"} +[2022-07-30 20:10:25,572][train_inner][INFO] - {"epoch": 3, "update": 2.184, "loss": "2.474", "ppl": "5.55", "wps": "398322", "ups": "3.36", "wpb": "118561", "bsz": "256", "num_updates": "112400", "lr": "8.96566e-05", "gnorm": "0.897", "loss_scale": "64", "train_wall": "59", "gb_free": "23.5", "wall": "33634"} +[2022-07-30 20:11:14,594][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 20:11:24,904][train_inner][INFO] - {"epoch": 3, "update": 2.188, "loss": "2.475", "ppl": "5.56", "wps": "397439", "ups": "3.37", "wpb": "117903", "bsz": "256", "num_updates": "112600", "lr": "8.96364e-05", "gnorm": "0.9", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "33693"} +[2022-07-30 20:12:24,526][train_inner][INFO] - {"epoch": 3, "update": 2.192, "loss": "2.479", "ppl": "5.57", "wps": "396130", "ups": "3.35", "wpb": "118091", "bsz": "256", "num_updates": "112800", "lr": "8.96162e-05", "gnorm": "0.899", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "33753"} +[2022-07-30 20:13:24,279][train_inner][INFO] - {"epoch": 3, "update": 2.196, "loss": "2.479", "ppl": "5.58", "wps": "394326", "ups": "3.35", "wpb": "117810", "bsz": "256", "num_updates": "113000", "lr": "8.9596e-05", "gnorm": "0.903", "loss_scale": "64", "train_wall": "59", "gb_free": "26.6", "wall": "33813"} +[2022-07-30 20:14:23,532][train_inner][INFO] - {"epoch": 3, "update": 2.199, "loss": "2.475", "ppl": "5.56", "wps": "399087", "ups": "3.38", "wpb": "118235", "bsz": "256", "num_updates": "113200", "lr": "8.95758e-05", "gnorm": "0.9", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "33872"} +[2022-07-30 20:15:22,848][train_inner][INFO] - {"epoch": 3, "update": 2.203, "loss": "2.48", "ppl": "5.58", "wps": "400172", "ups": "3.37", "wpb": "118681", "bsz": "256", "num_updates": "113400", "lr": "8.95556e-05", "gnorm": "0.899", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "33931"} +[2022-07-30 20:16:22,060][train_inner][INFO] - {"epoch": 3, "update": 2.207, "loss": "2.474", "ppl": "5.56", "wps": "398626", "ups": "3.38", "wpb": "118016", "bsz": "256", "num_updates": "113600", "lr": "8.95354e-05", "gnorm": "0.9", "loss_scale": "64", "train_wall": "59", "gb_free": "29.6", "wall": "33990"} +[2022-07-30 20:17:21,527][train_inner][INFO] - {"epoch": 3, "update": 2.211, "loss": "2.477", "ppl": "5.57", "wps": "395838", "ups": "3.36", "wpb": "117696", "bsz": "256", "num_updates": "113800", "lr": "8.95152e-05", "gnorm": "0.903", "loss_scale": "64", "train_wall": "59", "gb_free": "27.9", "wall": "34050"} +[2022-07-30 20:18:21,083][train_inner][INFO] - {"epoch": 3, "update": 2.215, "loss": "2.474", "ppl": "5.56", "wps": "397178", "ups": "3.36", "wpb": "118270", "bsz": "256", "num_updates": "114000", "lr": "8.94949e-05", "gnorm": "0.902", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "34110"} +[2022-07-30 20:19:20,769][train_inner][INFO] - {"epoch": 3, "update": 2.219, "loss": "2.473", "ppl": "5.55", "wps": "395463", "ups": "3.35", "wpb": "118018", "bsz": "256", "num_updates": "114200", "lr": "8.94747e-05", "gnorm": "0.902", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "34169"} +[2022-07-30 20:20:20,306][train_inner][INFO] - {"epoch": 3, "update": 2.223, "loss": "2.469", "ppl": "5.54", "wps": "397000", "ups": "3.36", "wpb": "118181", "bsz": "256", "num_updates": "114400", "lr": "8.94545e-05", "gnorm": "0.898", "loss_scale": "64", "train_wall": "59", "gb_free": "24.9", "wall": "34229"} +[2022-07-30 20:21:19,599][train_inner][INFO] - {"epoch": 3, "update": 2.227, "loss": "2.465", "ppl": "5.52", "wps": "400038", "ups": "3.37", "wpb": "118597", "bsz": "256", "num_updates": "114600", "lr": "8.94343e-05", "gnorm": "0.899", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "34288"} +[2022-07-30 20:21:53,664][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 20:22:20,335][train_inner][INFO] - {"epoch": 3, "update": 2.231, "loss": "2.474", "ppl": "5.55", "wps": "389655", "ups": "3.29", "wpb": "118330", "bsz": "256", "num_updates": "114800", "lr": "8.94141e-05", "gnorm": "0.903", "loss_scale": "64", "train_wall": "60", "gb_free": "24.9", "wall": "34349"} +[2022-07-30 20:22:42,930][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 20:23:19,971][train_inner][INFO] - {"epoch": 3, "update": 2.234, "loss": "2.469", "ppl": "5.54", "wps": "397351", "ups": "3.35", "wpb": "118481", "bsz": "256", "num_updates": "115000", "lr": "8.93939e-05", "gnorm": "0.9", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "34408"} +[2022-07-30 20:24:19,398][train_inner][INFO] - {"epoch": 3, "update": 2.238, "loss": "2.473", "ppl": "5.55", "wps": "398978", "ups": "3.37", "wpb": "118550", "bsz": "256", "num_updates": "115200", "lr": "8.93737e-05", "gnorm": "0.904", "loss_scale": "32", "train_wall": "59", "gb_free": "27.2", "wall": "34468"} +[2022-07-30 20:25:18,978][train_inner][INFO] - {"epoch": 3, "update": 2.242, "loss": "2.469", "ppl": "5.54", "wps": "396364", "ups": "3.36", "wpb": "118076", "bsz": "255.9", "num_updates": "115400", "lr": "8.93535e-05", "gnorm": "0.904", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "34527"} +[2022-07-30 20:26:18,599][train_inner][INFO] - {"epoch": 3, "update": 2.246, "loss": "2.464", "ppl": "5.52", "wps": "398474", "ups": "3.35", "wpb": "118785", "bsz": "256", "num_updates": "115600", "lr": "8.93333e-05", "gnorm": "0.899", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "34587"} +[2022-07-30 20:27:18,101][train_inner][INFO] - {"epoch": 3, "update": 2.25, "loss": "2.461", "ppl": "5.51", "wps": "399559", "ups": "3.36", "wpb": "118871", "bsz": "256", "num_updates": "115800", "lr": "8.93131e-05", "gnorm": "0.9", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "34647"} +[2022-07-30 20:28:17,609][train_inner][INFO] - {"epoch": 3, "update": 2.254, "loss": "2.461", "ppl": "5.5", "wps": "397646", "ups": "3.36", "wpb": "118316", "bsz": "256", "num_updates": "116000", "lr": "8.92929e-05", "gnorm": "0.901", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "34706"} +[2022-07-30 20:29:17,172][train_inner][INFO] - {"epoch": 3, "update": 2.258, "loss": "2.464", "ppl": "5.52", "wps": "396694", "ups": "3.36", "wpb": "118140", "bsz": "256", "num_updates": "116200", "lr": "8.92727e-05", "gnorm": "0.904", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "34766"} +[2022-07-30 20:30:16,761][train_inner][INFO] - {"epoch": 3, "update": 2.262, "loss": "2.466", "ppl": "5.52", "wps": "398942", "ups": "3.36", "wpb": "118860", "bsz": "256", "num_updates": "116400", "lr": "8.92525e-05", "gnorm": "0.902", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "34825"} +[2022-07-30 20:31:16,161][train_inner][INFO] - {"epoch": 3, "update": 2.265, "loss": "2.469", "ppl": "5.54", "wps": "397534", "ups": "3.37", "wpb": "118067", "bsz": "256", "num_updates": "116600", "lr": "8.92323e-05", "gnorm": "0.904", "loss_scale": "32", "train_wall": "59", "gb_free": "23.3", "wall": "34885"} +[2022-07-30 20:32:15,778][train_inner][INFO] - {"epoch": 3, "update": 2.269, "loss": "2.466", "ppl": "5.53", "wps": "396234", "ups": "3.35", "wpb": "118111", "bsz": "256", "num_updates": "116800", "lr": "8.92121e-05", "gnorm": "0.903", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "34944"} +[2022-07-30 20:33:13,690][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 20:33:15,505][train_inner][INFO] - {"epoch": 3, "update": 2.273, "loss": "2.468", "ppl": "5.53", "wps": "395079", "ups": "3.35", "wpb": "117983", "bsz": "256", "num_updates": "117000", "lr": "8.91919e-05", "gnorm": "0.906", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "35004"} +[2022-07-30 20:34:14,704][train_inner][INFO] - {"epoch": 3, "update": 2.277, "loss": "2.468", "ppl": "5.53", "wps": "397839", "ups": "3.38", "wpb": "117759", "bsz": "256", "num_updates": "117200", "lr": "8.91717e-05", "gnorm": "0.908", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "35063"} +[2022-07-30 20:35:14,322][train_inner][INFO] - {"epoch": 3, "update": 2.281, "loss": "2.459", "ppl": "5.5", "wps": "398154", "ups": "3.35", "wpb": "118685", "bsz": "256", "num_updates": "117400", "lr": "8.91515e-05", "gnorm": "0.901", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "35123"} +[2022-07-30 20:36:13,651][train_inner][INFO] - {"epoch": 3, "update": 2.285, "loss": "2.461", "ppl": "5.51", "wps": "397753", "ups": "3.37", "wpb": "117989", "bsz": "256", "num_updates": "117600", "lr": "8.91313e-05", "gnorm": "0.906", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "35182"} +[2022-07-30 20:37:13,021][train_inner][INFO] - {"epoch": 3, "update": 2.289, "loss": "2.465", "ppl": "5.52", "wps": "399764", "ups": "3.37", "wpb": "118670", "bsz": "256", "num_updates": "117800", "lr": "8.91111e-05", "gnorm": "0.905", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "35241"} +[2022-07-30 20:38:12,799][train_inner][INFO] - {"epoch": 3, "update": 2.293, "loss": "2.455", "ppl": "5.48", "wps": "397083", "ups": "3.35", "wpb": "118683", "bsz": "256", "num_updates": "118000", "lr": "8.90909e-05", "gnorm": "0.899", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "35301"} +[2022-07-30 20:39:12,317][train_inner][INFO] - {"epoch": 3, "update": 2.297, "loss": "2.461", "ppl": "5.51", "wps": "398853", "ups": "3.36", "wpb": "118692", "bsz": "256", "num_updates": "118200", "lr": "8.90707e-05", "gnorm": "0.904", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "35361"} +[2022-07-30 20:40:11,666][train_inner][INFO] - {"epoch": 3, "update": 2.3, "loss": "2.457", "ppl": "5.49", "wps": "399043", "ups": "3.37", "wpb": "118413", "bsz": "256", "num_updates": "118400", "lr": "8.90505e-05", "gnorm": "0.905", "loss_scale": "32", "train_wall": "59", "gb_free": "24.5", "wall": "35420"} +[2022-07-30 20:41:11,105][train_inner][INFO] - {"epoch": 3, "update": 2.304, "loss": "2.46", "ppl": "5.5", "wps": "399074", "ups": "3.36", "wpb": "118602", "bsz": "256", "num_updates": "118600", "lr": "8.90303e-05", "gnorm": "0.904", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "35480"} +[2022-07-30 20:42:10,558][train_inner][INFO] - {"epoch": 3, "update": 2.308, "loss": "2.46", "ppl": "5.5", "wps": "398879", "ups": "3.36", "wpb": "118571", "bsz": "256", "num_updates": "118800", "lr": "8.90101e-05", "gnorm": "0.905", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "35539"} +[2022-07-30 20:43:10,483][train_inner][INFO] - {"epoch": 3, "update": 2.312, "loss": "2.453", "ppl": "5.48", "wps": "396610", "ups": "3.34", "wpb": "118834", "bsz": "256", "num_updates": "119000", "lr": "8.89899e-05", "gnorm": "0.901", "loss_scale": "32", "train_wall": "60", "gb_free": "21.3", "wall": "35599"} +[2022-07-30 20:44:10,097][train_inner][INFO] - {"epoch": 3, "update": 2.316, "loss": "2.458", "ppl": "5.5", "wps": "396702", "ups": "3.35", "wpb": "118244", "bsz": "256", "num_updates": "119200", "lr": "8.89697e-05", "gnorm": "0.904", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "35659"} +[2022-07-30 20:45:09,475][train_inner][INFO] - {"epoch": 3, "update": 2.32, "loss": "2.458", "ppl": "5.49", "wps": "399428", "ups": "3.37", "wpb": "118586", "bsz": "256", "num_updates": "119400", "lr": "8.89495e-05", "gnorm": "0.904", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "35718"} +[2022-07-30 20:46:08,827][train_inner][INFO] - {"epoch": 3, "update": 2.324, "loss": "2.462", "ppl": "5.51", "wps": "397250", "ups": "3.37", "wpb": "117886", "bsz": "256", "num_updates": "119600", "lr": "8.89293e-05", "gnorm": "0.91", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "35777"} +[2022-07-30 20:47:08,507][train_inner][INFO] - {"epoch": 3, "update": 2.328, "loss": "2.46", "ppl": "5.5", "wps": "395790", "ups": "3.35", "wpb": "118103", "bsz": "256", "num_updates": "119800", "lr": "8.89091e-05", "gnorm": "0.908", "loss_scale": "64", "train_wall": "59", "gb_free": "24.8", "wall": "35837"} +[2022-07-30 20:48:07,708][train_inner][INFO] - {"epoch": 3, "update": 2.332, "loss": "2.464", "ppl": "5.52", "wps": "397197", "ups": "3.38", "wpb": "117573", "bsz": "256", "num_updates": "120000", "lr": "8.88889e-05", "gnorm": "0.909", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "35896"} +[2022-07-30 20:49:07,300][train_inner][INFO] - {"epoch": 3, "update": 2.335, "loss": "2.454", "ppl": "5.48", "wps": "396595", "ups": "3.36", "wpb": "118168", "bsz": "256", "num_updates": "120200", "lr": "8.88687e-05", "gnorm": "0.906", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "35956"} +[2022-07-30 20:50:06,402][train_inner][INFO] - {"epoch": 3, "update": 2.339, "loss": "2.456", "ppl": "5.49", "wps": "399625", "ups": "3.38", "wpb": "118093", "bsz": "256", "num_updates": "120400", "lr": "8.88485e-05", "gnorm": "0.905", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "36015"} +[2022-07-30 20:51:06,220][train_inner][INFO] - {"epoch": 3, "update": 2.343, "loss": "2.452", "ppl": "5.47", "wps": "395810", "ups": "3.34", "wpb": "118381", "bsz": "256", "num_updates": "120600", "lr": "8.88283e-05", "gnorm": "0.903", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "36075"} +[2022-07-30 20:52:05,395][train_inner][INFO] - {"epoch": 3, "update": 2.347, "loss": "2.456", "ppl": "5.49", "wps": "399206", "ups": "3.38", "wpb": "118115", "bsz": "256", "num_updates": "120800", "lr": "8.88081e-05", "gnorm": "0.908", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "36134"} +[2022-07-30 20:53:05,295][train_inner][INFO] - {"epoch": 3, "update": 2.351, "loss": "2.448", "ppl": "5.46", "wps": "395950", "ups": "3.34", "wpb": "118585", "bsz": "256", "num_updates": "121000", "lr": "8.87879e-05", "gnorm": "0.905", "loss_scale": "64", "train_wall": "60", "gb_free": "21.3", "wall": "36194"} +[2022-07-30 20:53:42,961][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 20:54:05,311][train_inner][INFO] - {"epoch": 3, "update": 2.355, "loss": "2.451", "ppl": "5.47", "wps": "395183", "ups": "3.33", "wpb": "118587", "bsz": "256", "num_updates": "121200", "lr": "8.87677e-05", "gnorm": "0.905", "loss_scale": "64", "train_wall": "60", "gb_free": "25.9", "wall": "36254"} +[2022-07-30 20:55:04,608][train_inner][INFO] - {"epoch": 3, "update": 2.359, "loss": "2.465", "ppl": "5.52", "wps": "398330", "ups": "3.37", "wpb": "118096", "bsz": "256", "num_updates": "121400", "lr": "8.87475e-05", "gnorm": "0.91", "loss_scale": "64", "train_wall": "59", "gb_free": "25.4", "wall": "36313"} +[2022-07-30 20:56:03,933][train_inner][INFO] - {"epoch": 3, "update": 2.363, "loss": "2.448", "ppl": "5.46", "wps": "398602", "ups": "3.37", "wpb": "118236", "bsz": "256", "num_updates": "121600", "lr": "8.87273e-05", "gnorm": "0.908", "loss_scale": "64", "train_wall": "59", "gb_free": "26.4", "wall": "36372"} +[2022-07-30 20:57:03,387][train_inner][INFO] - {"epoch": 3, "update": 2.367, "loss": "2.45", "ppl": "5.47", "wps": "397904", "ups": "3.36", "wpb": "118284", "bsz": "256", "num_updates": "121800", "lr": "8.87071e-05", "gnorm": "0.906", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "36432"} +[2022-07-30 20:58:02,767][train_inner][INFO] - {"epoch": 3, "update": 2.37, "loss": "2.449", "ppl": "5.46", "wps": "398885", "ups": "3.37", "wpb": "118429", "bsz": "256", "num_updates": "122000", "lr": "8.86869e-05", "gnorm": "0.907", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "36491"} +[2022-07-30 20:59:02,076][train_inner][INFO] - {"epoch": 3, "update": 2.374, "loss": "2.451", "ppl": "5.47", "wps": "399376", "ups": "3.37", "wpb": "118431", "bsz": "256", "num_updates": "122200", "lr": "8.86667e-05", "gnorm": "0.906", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "36551"} +[2022-07-30 21:00:01,319][train_inner][INFO] - {"epoch": 3, "update": 2.378, "loss": "2.448", "ppl": "5.46", "wps": "398751", "ups": "3.38", "wpb": "118115", "bsz": "256", "num_updates": "122400", "lr": "8.86465e-05", "gnorm": "0.91", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "36610"} +[2022-07-30 21:00:11,446][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 21:00:21,005][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-30 21:01:01,445][train_inner][INFO] - {"epoch": 3, "update": 2.382, "loss": "2.445", "ppl": "5.44", "wps": "392750", "ups": "3.33", "wpb": "118072", "bsz": "255.9", "num_updates": "122600", "lr": "8.86263e-05", "gnorm": "0.907", "loss_scale": "16", "train_wall": "60", "gb_free": "22", "wall": "36670"} +[2022-07-30 21:02:00,879][train_inner][INFO] - {"epoch": 3, "update": 2.386, "loss": "2.451", "ppl": "5.47", "wps": "395149", "ups": "3.37", "wpb": "117425", "bsz": "256", "num_updates": "122800", "lr": "8.86061e-05", "gnorm": "0.911", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "36729"} +[2022-07-30 21:03:00,204][train_inner][INFO] - {"epoch": 3, "update": 2.39, "loss": "2.449", "ppl": "5.46", "wps": "398412", "ups": "3.37", "wpb": "118178", "bsz": "256", "num_updates": "123000", "lr": "8.85859e-05", "gnorm": "0.906", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "36789"} +[2022-07-30 21:03:59,799][train_inner][INFO] - {"epoch": 3, "update": 2.394, "loss": "2.444", "ppl": "5.44", "wps": "396854", "ups": "3.36", "wpb": "118252", "bsz": "256", "num_updates": "123200", "lr": "8.85657e-05", "gnorm": "0.909", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "36848"} +[2022-07-30 21:04:59,396][train_inner][INFO] - {"epoch": 3, "update": 2.398, "loss": "2.445", "ppl": "5.45", "wps": "398194", "ups": "3.36", "wpb": "118656", "bsz": "256", "num_updates": "123400", "lr": "8.85455e-05", "gnorm": "0.909", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "36908"} +[2022-07-30 21:05:59,499][train_inner][INFO] - {"epoch": 3, "update": 2.402, "loss": "2.444", "ppl": "5.44", "wps": "392810", "ups": "3.33", "wpb": "118046", "bsz": "256", "num_updates": "123600", "lr": "8.85253e-05", "gnorm": "0.911", "loss_scale": "16", "train_wall": "60", "gb_free": "22", "wall": "36968"} +[2022-07-30 21:06:59,104][train_inner][INFO] - {"epoch": 3, "update": 2.405, "loss": "2.444", "ppl": "5.44", "wps": "398746", "ups": "3.36", "wpb": "118834", "bsz": "256", "num_updates": "123800", "lr": "8.85051e-05", "gnorm": "0.909", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "37028"} +[2022-07-30 21:07:58,689][train_inner][INFO] - {"epoch": 3, "update": 2.409, "loss": "2.447", "ppl": "5.45", "wps": "394637", "ups": "3.36", "wpb": "117572", "bsz": "256", "num_updates": "124000", "lr": "8.84848e-05", "gnorm": "0.911", "loss_scale": "16", "train_wall": "59", "gb_free": "30.2", "wall": "37087"} +[2022-07-30 21:08:58,402][train_inner][INFO] - {"epoch": 3, "update": 2.413, "loss": "2.453", "ppl": "5.48", "wps": "393439", "ups": "3.35", "wpb": "117467", "bsz": "256", "num_updates": "124200", "lr": "8.84646e-05", "gnorm": "0.914", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "37147"} +[2022-07-30 21:09:57,856][train_inner][INFO] - {"epoch": 3, "update": 2.417, "loss": "2.445", "ppl": "5.44", "wps": "400317", "ups": "3.36", "wpb": "119001", "bsz": "256", "num_updates": "124400", "lr": "8.84444e-05", "gnorm": "0.904", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "37206"} +[2022-07-30 21:10:57,207][train_inner][INFO] - {"epoch": 3, "update": 2.421, "loss": "2.448", "ppl": "5.46", "wps": "399087", "ups": "3.37", "wpb": "118429", "bsz": "256", "num_updates": "124600", "lr": "8.84242e-05", "gnorm": "0.909", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "37266"} +[2022-07-30 21:11:56,487][train_inner][INFO] - {"epoch": 3, "update": 2.425, "loss": "2.444", "ppl": "5.44", "wps": "400311", "ups": "3.37", "wpb": "118653", "bsz": "256", "num_updates": "124800", "lr": "8.8404e-05", "gnorm": "0.908", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "37325"} +[2022-07-30 21:12:55,916][train_inner][INFO] - {"epoch": 3, "update": 2.429, "loss": "2.438", "ppl": "5.42", "wps": "397898", "ups": "3.37", "wpb": "118232", "bsz": "256", "num_updates": "125000", "lr": "8.83838e-05", "gnorm": "0.913", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "37384"} +[2022-07-30 21:13:55,721][train_inner][INFO] - {"epoch": 3, "update": 2.433, "loss": "2.437", "ppl": "5.42", "wps": "400000", "ups": "3.34", "wpb": "119608", "bsz": "256", "num_updates": "125200", "lr": "8.83636e-05", "gnorm": "0.906", "loss_scale": "32", "train_wall": "59", "gb_free": "27.7", "wall": "37444"} +[2022-07-30 21:14:55,319][train_inner][INFO] - {"epoch": 3, "update": 2.436, "loss": "2.438", "ppl": "5.42", "wps": "397422", "ups": "3.36", "wpb": "118428", "bsz": "256", "num_updates": "125400", "lr": "8.83434e-05", "gnorm": "0.909", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "37504"} +[2022-07-30 21:15:55,206][train_inner][INFO] - {"epoch": 3, "update": 2.44, "loss": "2.445", "ppl": "5.44", "wps": "395712", "ups": "3.34", "wpb": "118488", "bsz": "256", "num_updates": "125600", "lr": "8.83232e-05", "gnorm": "0.91", "loss_scale": "32", "train_wall": "60", "gb_free": "25.4", "wall": "37564"} +[2022-07-30 21:16:54,520][train_inner][INFO] - {"epoch": 3, "update": 2.444, "loss": "2.447", "ppl": "5.45", "wps": "396587", "ups": "3.37", "wpb": "117614", "bsz": "256", "num_updates": "125800", "lr": "8.8303e-05", "gnorm": "0.918", "loss_scale": "32", "train_wall": "59", "gb_free": "26.7", "wall": "37623"} +[2022-07-30 21:17:54,040][train_inner][INFO] - {"epoch": 3, "update": 2.448, "loss": "2.444", "ppl": "5.44", "wps": "397542", "ups": "3.36", "wpb": "118308", "bsz": "256", "num_updates": "126000", "lr": "8.82828e-05", "gnorm": "0.909", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "37682"} +[2022-07-30 21:18:53,909][train_inner][INFO] - {"epoch": 3, "update": 2.452, "loss": "2.439", "ppl": "5.42", "wps": "396391", "ups": "3.34", "wpb": "118658", "bsz": "256", "num_updates": "126200", "lr": "8.82626e-05", "gnorm": "0.908", "loss_scale": "32", "train_wall": "60", "gb_free": "24.8", "wall": "37742"} +[2022-07-30 21:19:53,289][train_inner][INFO] - {"epoch": 3, "update": 2.456, "loss": "2.444", "ppl": "5.44", "wps": "396272", "ups": "3.37", "wpb": "117652", "bsz": "256", "num_updates": "126400", "lr": "8.82424e-05", "gnorm": "0.915", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "37802"} +[2022-07-30 21:20:51,801][train_inner][INFO] - {"epoch": 3, "update": 2.46, "loss": "2.437", "ppl": "5.42", "wps": "403862", "ups": "3.42", "wpb": "118153", "bsz": "256", "num_updates": "126600", "lr": "8.82222e-05", "gnorm": "0.913", "loss_scale": "64", "train_wall": "58", "gb_free": "27.4", "wall": "37860"} +[2022-07-30 21:21:51,033][train_inner][INFO] - {"epoch": 3, "update": 2.464, "loss": "2.443", "ppl": "5.44", "wps": "397891", "ups": "3.38", "wpb": "117839", "bsz": "256", "num_updates": "126800", "lr": "8.8202e-05", "gnorm": "0.914", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "37919"} +[2022-07-30 21:22:50,404][train_inner][INFO] - {"epoch": 3, "update": 2.468, "loss": "2.437", "ppl": "5.41", "wps": "399638", "ups": "3.37", "wpb": "118633", "bsz": "256", "num_updates": "127000", "lr": "8.81818e-05", "gnorm": "0.909", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "37979"} +[2022-07-30 21:23:49,764][train_inner][INFO] - {"epoch": 3, "update": 2.471, "loss": "2.437", "ppl": "5.42", "wps": "399583", "ups": "3.37", "wpb": "118595", "bsz": "256", "num_updates": "127200", "lr": "8.81616e-05", "gnorm": "0.91", "loss_scale": "64", "train_wall": "59", "gb_free": "25", "wall": "38038"} +[2022-07-30 21:24:49,202][train_inner][INFO] - {"epoch": 3, "update": 2.475, "loss": "2.443", "ppl": "5.44", "wps": "396588", "ups": "3.36", "wpb": "117861", "bsz": "256", "num_updates": "127400", "lr": "8.81414e-05", "gnorm": "0.915", "loss_scale": "64", "train_wall": "59", "gb_free": "22.7", "wall": "38098"} +[2022-07-30 21:25:48,478][train_inner][INFO] - {"epoch": 3, "update": 2.479, "loss": "2.435", "ppl": "5.41", "wps": "399749", "ups": "3.37", "wpb": "118478", "bsz": "256", "num_updates": "127600", "lr": "8.81212e-05", "gnorm": "0.911", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "38157"} +[2022-07-30 21:25:49,056][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 21:26:48,212][train_inner][INFO] - {"epoch": 3, "update": 2.483, "loss": "2.441", "ppl": "5.43", "wps": "394172", "ups": "3.35", "wpb": "117726", "bsz": "256", "num_updates": "127800", "lr": "8.8101e-05", "gnorm": "0.915", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "38217"} +[2022-07-30 21:27:47,921][train_inner][INFO] - {"epoch": 3, "update": 2.487, "loss": "2.435", "ppl": "5.41", "wps": "397670", "ups": "3.35", "wpb": "118721", "bsz": "256", "num_updates": "128000", "lr": "8.80808e-05", "gnorm": "0.912", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "38276"} +[2022-07-30 21:28:47,653][train_inner][INFO] - {"epoch": 3, "update": 2.491, "loss": "2.44", "ppl": "5.43", "wps": "396608", "ups": "3.35", "wpb": "118451", "bsz": "256", "num_updates": "128200", "lr": "8.80606e-05", "gnorm": "0.912", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "38336"} +[2022-07-30 21:29:46,894][train_inner][INFO] - {"epoch": 3, "update": 2.495, "loss": "2.428", "ppl": "5.38", "wps": "399921", "ups": "3.38", "wpb": "118456", "bsz": "256", "num_updates": "128400", "lr": "8.80404e-05", "gnorm": "0.911", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "38395"} +[2022-07-30 21:30:46,430][train_inner][INFO] - {"epoch": 3, "update": 2.499, "loss": "2.433", "ppl": "5.4", "wps": "399766", "ups": "3.36", "wpb": "119003", "bsz": "256", "num_updates": "128600", "lr": "8.80202e-05", "gnorm": "0.909", "loss_scale": "32", "train_wall": "59", "gb_free": "24.5", "wall": "38455"} +[2022-07-30 21:31:46,025][train_inner][INFO] - {"epoch": 3, "update": 2.503, "loss": "2.428", "ppl": "5.38", "wps": "396390", "ups": "3.36", "wpb": "118112", "bsz": "256", "num_updates": "128800", "lr": "8.8e-05", "gnorm": "0.915", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "38514"} +[2022-07-30 21:32:45,532][train_inner][INFO] - {"epoch": 3, "update": 2.506, "loss": "2.429", "ppl": "5.38", "wps": "397240", "ups": "3.36", "wpb": "118192", "bsz": "256", "num_updates": "129000", "lr": "8.79798e-05", "gnorm": "0.915", "loss_scale": "32", "train_wall": "59", "gb_free": "25.6", "wall": "38574"} +[2022-07-30 21:33:45,002][train_inner][INFO] - {"epoch": 3, "update": 2.51, "loss": "2.441", "ppl": "5.43", "wps": "397560", "ups": "3.36", "wpb": "118213", "bsz": "256", "num_updates": "129200", "lr": "8.79596e-05", "gnorm": "0.915", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "38633"} +[2022-07-30 21:34:44,237][train_inner][INFO] - {"epoch": 3, "update": 2.514, "loss": "2.434", "ppl": "5.4", "wps": "398901", "ups": "3.38", "wpb": "118144", "bsz": "256", "num_updates": "129400", "lr": "8.79394e-05", "gnorm": "0.914", "loss_scale": "32", "train_wall": "59", "gb_free": "25", "wall": "38693"} +[2022-07-30 21:35:43,762][train_inner][INFO] - {"epoch": 3, "update": 2.518, "loss": "2.43", "ppl": "5.39", "wps": "398346", "ups": "3.36", "wpb": "118557", "bsz": "256", "num_updates": "129600", "lr": "8.79192e-05", "gnorm": "0.912", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "38752"} +[2022-07-30 21:36:44,295][train_inner][INFO] - {"epoch": 3, "update": 2.522, "loss": "2.429", "ppl": "5.38", "wps": "388531", "ups": "3.3", "wpb": "117594", "bsz": "256", "num_updates": "129800", "lr": "8.7899e-05", "gnorm": "0.916", "loss_scale": "64", "train_wall": "60", "gb_free": "24.1", "wall": "38813"} +[2022-07-30 21:37:43,739][train_inner][INFO] - {"epoch": 3, "update": 2.526, "loss": "2.439", "ppl": "5.42", "wps": "397737", "ups": "3.36", "wpb": "118215", "bsz": "256", "num_updates": "130000", "lr": "8.78788e-05", "gnorm": "0.915", "loss_scale": "64", "train_wall": "59", "gb_free": "25.1", "wall": "38872"} +[2022-07-30 21:38:43,108][train_inner][INFO] - {"epoch": 3, "update": 2.53, "loss": "2.431", "ppl": "5.39", "wps": "398671", "ups": "3.37", "wpb": "118343", "bsz": "256", "num_updates": "130200", "lr": "8.78586e-05", "gnorm": "0.912", "loss_scale": "64", "train_wall": "59", "gb_free": "23.5", "wall": "38932"} +[2022-07-30 21:39:43,356][train_inner][INFO] - {"epoch": 3, "update": 2.534, "loss": "2.43", "ppl": "5.39", "wps": "391070", "ups": "3.32", "wpb": "117805", "bsz": "256", "num_updates": "130400", "lr": "8.78384e-05", "gnorm": "0.915", "loss_scale": "64", "train_wall": "60", "gb_free": "23.2", "wall": "38992"} +[2022-07-30 21:40:42,834][train_inner][INFO] - {"epoch": 3, "update": 2.538, "loss": "2.424", "ppl": "5.37", "wps": "398715", "ups": "3.36", "wpb": "118572", "bsz": "256", "num_updates": "130600", "lr": "8.78182e-05", "gnorm": "0.91", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "39051"} +[2022-07-30 21:41:42,111][train_inner][INFO] - {"epoch": 3, "update": 2.541, "loss": "2.426", "ppl": "5.37", "wps": "398510", "ups": "3.37", "wpb": "118113", "bsz": "256", "num_updates": "130800", "lr": "8.7798e-05", "gnorm": "0.913", "loss_scale": "64", "train_wall": "59", "gb_free": "25", "wall": "39111"} +[2022-07-30 21:42:41,713][train_inner][INFO] - {"epoch": 3, "update": 2.545, "loss": "2.427", "ppl": "5.38", "wps": "397880", "ups": "3.36", "wpb": "118571", "bsz": "256", "num_updates": "131000", "lr": "8.77778e-05", "gnorm": "0.911", "loss_scale": "64", "train_wall": "59", "gb_free": "27.3", "wall": "39170"} +[2022-07-30 21:43:41,309][train_inner][INFO] - {"epoch": 3, "update": 2.549, "loss": "2.426", "ppl": "5.37", "wps": "398819", "ups": "3.36", "wpb": "118839", "bsz": "256", "num_updates": "131200", "lr": "8.77576e-05", "gnorm": "0.911", "loss_scale": "64", "train_wall": "59", "gb_free": "24.9", "wall": "39230"} +[2022-07-30 21:44:40,702][train_inner][INFO] - {"epoch": 3, "update": 2.553, "loss": "2.43", "ppl": "5.39", "wps": "398233", "ups": "3.37", "wpb": "118260", "bsz": "256", "num_updates": "131400", "lr": "8.77374e-05", "gnorm": "0.916", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "39289"} +[2022-07-30 21:45:40,533][train_inner][INFO] - {"epoch": 3, "update": 2.557, "loss": "2.431", "ppl": "5.39", "wps": "396094", "ups": "3.34", "wpb": "118492", "bsz": "256", "num_updates": "131600", "lr": "8.77172e-05", "gnorm": "0.913", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "39349"} +[2022-07-30 21:46:20,879][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 21:46:40,031][train_inner][INFO] - {"epoch": 3, "update": 2.561, "loss": "2.428", "ppl": "5.38", "wps": "398185", "ups": "3.36", "wpb": "118455", "bsz": "256", "num_updates": "131800", "lr": "8.7697e-05", "gnorm": "0.915", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "39408"} +[2022-07-30 21:47:40,874][train_inner][INFO] - {"epoch": 3, "update": 2.565, "loss": "2.424", "ppl": "5.37", "wps": "390161", "ups": "3.29", "wpb": "118692", "bsz": "256", "num_updates": "132000", "lr": "8.76768e-05", "gnorm": "0.913", "loss_scale": "64", "train_wall": "61", "gb_free": "22.2", "wall": "39469"} +[2022-07-30 21:48:40,467][train_inner][INFO] - {"epoch": 3, "update": 2.569, "loss": "2.431", "ppl": "5.39", "wps": "396226", "ups": "3.36", "wpb": "118061", "bsz": "256", "num_updates": "132200", "lr": "8.76566e-05", "gnorm": "0.917", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "39529"} +[2022-07-30 21:49:41,197][train_inner][INFO] - {"epoch": 3, "update": 2.572, "loss": "2.431", "ppl": "5.39", "wps": "387970", "ups": "3.29", "wpb": "117806", "bsz": "256", "num_updates": "132400", "lr": "8.76364e-05", "gnorm": "0.919", "loss_scale": "64", "train_wall": "60", "gb_free": "23.1", "wall": "39590"} +[2022-07-30 21:50:41,951][train_inner][INFO] - {"epoch": 3, "update": 2.576, "loss": "2.425", "ppl": "5.37", "wps": "390364", "ups": "3.29", "wpb": "118580", "bsz": "256", "num_updates": "132600", "lr": "8.76162e-05", "gnorm": "0.912", "loss_scale": "64", "train_wall": "60", "gb_free": "21.3", "wall": "39650"} +[2022-07-30 21:51:41,570][train_inner][INFO] - {"epoch": 3, "update": 2.58, "loss": "2.423", "ppl": "5.36", "wps": "397129", "ups": "3.35", "wpb": "118380", "bsz": "256", "num_updates": "132800", "lr": "8.7596e-05", "gnorm": "0.916", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "39710"} +[2022-07-30 21:52:41,040][train_inner][INFO] - {"epoch": 3, "update": 2.584, "loss": "2.421", "ppl": "5.35", "wps": "397469", "ups": "3.36", "wpb": "118187", "bsz": "256", "num_updates": "133000", "lr": "8.75758e-05", "gnorm": "0.917", "loss_scale": "64", "train_wall": "59", "gb_free": "27.3", "wall": "39769"} +[2022-07-30 21:53:40,131][train_inner][INFO] - {"epoch": 3, "update": 2.588, "loss": "2.425", "ppl": "5.37", "wps": "400307", "ups": "3.38", "wpb": "118273", "bsz": "256", "num_updates": "133200", "lr": "8.75556e-05", "gnorm": "0.917", "loss_scale": "64", "train_wall": "59", "gb_free": "29.3", "wall": "39829"} +[2022-07-30 21:53:53,786][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 21:54:41,072][train_inner][INFO] - {"epoch": 3, "update": 2.592, "loss": "2.421", "ppl": "5.35", "wps": "388203", "ups": "3.28", "wpb": "118285", "bsz": "256", "num_updates": "133400", "lr": "8.75354e-05", "gnorm": "0.916", "loss_scale": "32", "train_wall": "61", "gb_free": "22.9", "wall": "39890"} +[2022-07-30 21:55:40,654][train_inner][INFO] - {"epoch": 3, "update": 2.596, "loss": "2.422", "ppl": "5.36", "wps": "397892", "ups": "3.36", "wpb": "118535", "bsz": "256", "num_updates": "133600", "lr": "8.75152e-05", "gnorm": "0.916", "loss_scale": "32", "train_wall": "59", "gb_free": "28.6", "wall": "39949"} +[2022-07-30 21:56:39,771][train_inner][INFO] - {"epoch": 3, "update": 2.6, "loss": "2.424", "ppl": "5.37", "wps": "399541", "ups": "3.38", "wpb": "118098", "bsz": "256", "num_updates": "133800", "lr": "8.74949e-05", "gnorm": "0.918", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "40008"} +[2022-07-30 21:57:39,094][train_inner][INFO] - {"epoch": 3, "update": 2.604, "loss": "2.415", "ppl": "5.33", "wps": "400393", "ups": "3.37", "wpb": "118762", "bsz": "256", "num_updates": "134000", "lr": "8.74747e-05", "gnorm": "0.912", "loss_scale": "32", "train_wall": "59", "gb_free": "25.8", "wall": "40068"} +[2022-07-30 21:58:38,482][train_inner][INFO] - {"epoch": 3, "update": 2.607, "loss": "2.422", "ppl": "5.36", "wps": "397645", "ups": "3.37", "wpb": "118075", "bsz": "256", "num_updates": "134200", "lr": "8.74545e-05", "gnorm": "0.917", "loss_scale": "32", "train_wall": "59", "gb_free": "24.4", "wall": "40127"} +[2022-07-30 21:59:37,751][train_inner][INFO] - {"epoch": 3, "update": 2.611, "loss": "2.414", "ppl": "5.33", "wps": "401972", "ups": "3.37", "wpb": "119121", "bsz": "256", "num_updates": "134400", "lr": "8.74343e-05", "gnorm": "0.915", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "40186"} +[2022-07-30 22:00:36,937][train_inner][INFO] - {"epoch": 3, "update": 2.615, "loss": "2.421", "ppl": "5.35", "wps": "399544", "ups": "3.38", "wpb": "118238", "bsz": "256", "num_updates": "134600", "lr": "8.74141e-05", "gnorm": "0.917", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "40245"} +[2022-07-30 22:01:36,328][train_inner][INFO] - {"epoch": 3, "update": 2.619, "loss": "2.414", "ppl": "5.33", "wps": "399820", "ups": "3.37", "wpb": "118726", "bsz": "256", "num_updates": "134800", "lr": "8.73939e-05", "gnorm": "0.912", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "40305"} +[2022-07-30 22:02:36,027][train_inner][INFO] - {"epoch": 3, "update": 2.623, "loss": "2.423", "ppl": "5.36", "wps": "393781", "ups": "3.35", "wpb": "117543", "bsz": "256", "num_updates": "135000", "lr": "8.73737e-05", "gnorm": "0.922", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "40364"} +[2022-07-30 22:03:35,441][train_inner][INFO] - {"epoch": 3, "update": 2.627, "loss": "2.413", "ppl": "5.32", "wps": "401324", "ups": "3.37", "wpb": "119219", "bsz": "256", "num_updates": "135200", "lr": "8.73535e-05", "gnorm": "0.917", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "40424"} +[2022-07-30 22:04:35,114][train_inner][INFO] - {"epoch": 3, "update": 2.631, "loss": "2.425", "ppl": "5.37", "wps": "394785", "ups": "3.35", "wpb": "117789", "bsz": "256", "num_updates": "135400", "lr": "8.73333e-05", "gnorm": "0.921", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "40484"} +[2022-07-30 22:05:34,365][train_inner][INFO] - {"epoch": 3, "update": 2.635, "loss": "2.421", "ppl": "5.36", "wps": "401454", "ups": "3.38", "wpb": "118932", "bsz": "256", "num_updates": "135600", "lr": "8.73131e-05", "gnorm": "0.915", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "40543"} +[2022-07-30 22:05:44,744][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 22:06:34,357][train_inner][INFO] - {"epoch": 3, "update": 2.639, "loss": "2.414", "ppl": "5.33", "wps": "393747", "ups": "3.33", "wpb": "118107", "bsz": "256", "num_updates": "135800", "lr": "8.72929e-05", "gnorm": "0.918", "loss_scale": "32", "train_wall": "60", "gb_free": "22.6", "wall": "40603"} +[2022-07-30 22:07:34,319][train_inner][INFO] - {"epoch": 3, "update": 2.642, "loss": "2.409", "ppl": "5.31", "wps": "397021", "ups": "3.34", "wpb": "119030", "bsz": "256", "num_updates": "136000", "lr": "8.72727e-05", "gnorm": "0.913", "loss_scale": "32", "train_wall": "60", "gb_free": "21.4", "wall": "40663"} +[2022-07-30 22:08:34,171][train_inner][INFO] - {"epoch": 3, "update": 2.646, "loss": "2.416", "ppl": "5.34", "wps": "395225", "ups": "3.34", "wpb": "118275", "bsz": "256", "num_updates": "136200", "lr": "8.72525e-05", "gnorm": "0.917", "loss_scale": "32", "train_wall": "60", "gb_free": "24", "wall": "40723"} +[2022-07-30 22:09:33,784][train_inner][INFO] - {"epoch": 3, "update": 2.65, "loss": "2.413", "ppl": "5.33", "wps": "396545", "ups": "3.36", "wpb": "118195", "bsz": "256", "num_updates": "136400", "lr": "8.72323e-05", "gnorm": "0.917", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "40782"} +[2022-07-30 22:10:33,405][train_inner][INFO] - {"epoch": 3, "update": 2.654, "loss": "2.413", "ppl": "5.33", "wps": "397130", "ups": "3.35", "wpb": "118385", "bsz": "256", "num_updates": "136600", "lr": "8.72121e-05", "gnorm": "0.918", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "40842"} +[2022-07-30 22:11:34,127][train_inner][INFO] - {"epoch": 3, "update": 2.658, "loss": "2.412", "ppl": "5.32", "wps": "389322", "ups": "3.29", "wpb": "118202", "bsz": "256", "num_updates": "136800", "lr": "8.71919e-05", "gnorm": "0.921", "loss_scale": "32", "train_wall": "60", "gb_free": "23.3", "wall": "40903"} +[2022-07-30 22:12:33,548][train_inner][INFO] - {"epoch": 3, "update": 2.662, "loss": "2.414", "ppl": "5.33", "wps": "398275", "ups": "3.37", "wpb": "118329", "bsz": "256", "num_updates": "137000", "lr": "8.71717e-05", "gnorm": "0.921", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "40962"} +[2022-07-30 22:13:33,394][train_inner][INFO] - {"epoch": 3, "update": 2.666, "loss": "2.415", "ppl": "5.33", "wps": "395527", "ups": "3.34", "wpb": "118353", "bsz": "256", "num_updates": "137200", "lr": "8.71515e-05", "gnorm": "0.918", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "41022"} +[2022-07-30 22:14:33,089][train_inner][INFO] - {"epoch": 3, "update": 2.67, "loss": "2.413", "ppl": "5.33", "wps": "394746", "ups": "3.35", "wpb": "117820", "bsz": "256", "num_updates": "137400", "lr": "8.71313e-05", "gnorm": "0.919", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "41082"} +[2022-07-30 22:15:31,890][train_inner][INFO] - {"epoch": 3, "update": 2.674, "loss": "2.41", "ppl": "5.31", "wps": "402325", "ups": "3.4", "wpb": "118284", "bsz": "256", "num_updates": "137600", "lr": "8.71111e-05", "gnorm": "0.917", "loss_scale": "32", "train_wall": "58", "gb_free": "23.4", "wall": "41140"} +[2022-07-30 22:16:31,058][train_inner][INFO] - {"epoch": 3, "update": 2.677, "loss": "2.422", "ppl": "5.36", "wps": "398964", "ups": "3.38", "wpb": "118029", "bsz": "256", "num_updates": "137800", "lr": "8.70909e-05", "gnorm": "0.919", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "41199"} +[2022-07-30 22:17:30,691][train_inner][INFO] - {"epoch": 3, "update": 2.681, "loss": "2.411", "ppl": "5.32", "wps": "395024", "ups": "3.35", "wpb": "117782", "bsz": "256", "num_updates": "138000", "lr": "8.70707e-05", "gnorm": "0.924", "loss_scale": "64", "train_wall": "59", "gb_free": "25.8", "wall": "41259"} +[2022-07-30 22:18:31,438][train_inner][INFO] - {"epoch": 3, "update": 2.685, "loss": "2.415", "ppl": "5.33", "wps": "389370", "ups": "3.29", "wpb": "118264", "bsz": "256", "num_updates": "138200", "lr": "8.70505e-05", "gnorm": "0.92", "loss_scale": "64", "train_wall": "60", "gb_free": "21.5", "wall": "41320"} +[2022-07-30 22:19:30,665][train_inner][INFO] - {"epoch": 3, "update": 2.689, "loss": "2.417", "ppl": "5.34", "wps": "398717", "ups": "3.38", "wpb": "118074", "bsz": "256", "num_updates": "138400", "lr": "8.70303e-05", "gnorm": "0.921", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "41379"} +[2022-07-30 22:20:30,587][train_inner][INFO] - {"epoch": 3, "update": 2.693, "loss": "2.416", "ppl": "5.34", "wps": "394417", "ups": "3.34", "wpb": "118171", "bsz": "256", "num_updates": "138600", "lr": "8.70101e-05", "gnorm": "0.921", "loss_scale": "64", "train_wall": "60", "gb_free": "22.2", "wall": "41439"} +[2022-07-30 22:21:30,246][train_inner][INFO] - {"epoch": 3, "update": 2.697, "loss": "2.41", "ppl": "5.31", "wps": "397638", "ups": "3.35", "wpb": "118612", "bsz": "256", "num_updates": "138800", "lr": "8.69899e-05", "gnorm": "0.918", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "41499"} +[2022-07-30 22:22:30,168][train_inner][INFO] - {"epoch": 3, "update": 2.701, "loss": "2.417", "ppl": "5.34", "wps": "396314", "ups": "3.34", "wpb": "118738", "bsz": "256", "num_updates": "139000", "lr": "8.69697e-05", "gnorm": "0.918", "loss_scale": "64", "train_wall": "60", "gb_free": "21.9", "wall": "41559"} +[2022-07-30 22:23:29,859][train_inner][INFO] - {"epoch": 3, "update": 2.705, "loss": "2.411", "ppl": "5.32", "wps": "396880", "ups": "3.35", "wpb": "118450", "bsz": "256", "num_updates": "139200", "lr": "8.69495e-05", "gnorm": "0.922", "loss_scale": "64", "train_wall": "59", "gb_free": "24.8", "wall": "41618"} +[2022-07-30 22:24:29,438][train_inner][INFO] - {"epoch": 3, "update": 2.708, "loss": "2.41", "ppl": "5.31", "wps": "397682", "ups": "3.36", "wpb": "118468", "bsz": "256", "num_updates": "139400", "lr": "8.69293e-05", "gnorm": "0.918", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "41678"} +[2022-07-30 22:25:29,245][train_inner][INFO] - {"epoch": 3, "update": 2.712, "loss": "2.406", "ppl": "5.3", "wps": "396024", "ups": "3.34", "wpb": "118423", "bsz": "256", "num_updates": "139600", "lr": "8.69091e-05", "gnorm": "0.92", "loss_scale": "64", "train_wall": "59", "gb_free": "22.8", "wall": "41738"} +[2022-07-30 22:25:40,593][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 22:26:29,156][train_inner][INFO] - {"epoch": 3, "update": 2.716, "loss": "2.406", "ppl": "5.3", "wps": "393587", "ups": "3.34", "wpb": "117902", "bsz": "256", "num_updates": "139800", "lr": "8.68889e-05", "gnorm": "0.922", "loss_scale": "32", "train_wall": "60", "gb_free": "22.5", "wall": "41798"} +[2022-07-30 22:26:57,286][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-30 22:27:28,527][train_inner][INFO] - {"epoch": 3, "update": 2.72, "loss": "2.401", "ppl": "5.28", "wps": "397770", "ups": "3.37", "wpb": "118079", "bsz": "256", "num_updates": "140000", "lr": "8.68687e-05", "gnorm": "0.92", "loss_scale": "16", "train_wall": "59", "gb_free": "23.6", "wall": "41857"} +[2022-07-30 22:28:28,385][train_inner][INFO] - {"epoch": 3, "update": 2.724, "loss": "2.402", "ppl": "5.29", "wps": "395840", "ups": "3.34", "wpb": "118470", "bsz": "256", "num_updates": "140200", "lr": "8.68485e-05", "gnorm": "0.919", "loss_scale": "16", "train_wall": "60", "gb_free": "25.1", "wall": "41917"} +[2022-07-30 22:29:27,984][train_inner][INFO] - {"epoch": 3, "update": 2.728, "loss": "2.403", "ppl": "5.29", "wps": "397946", "ups": "3.36", "wpb": "118584", "bsz": "256", "num_updates": "140400", "lr": "8.68283e-05", "gnorm": "0.919", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "41976"} +[2022-07-30 22:30:27,164][train_inner][INFO] - {"epoch": 3, "update": 2.732, "loss": "2.407", "ppl": "5.3", "wps": "399177", "ups": "3.38", "wpb": "118116", "bsz": "256", "num_updates": "140600", "lr": "8.68081e-05", "gnorm": "0.923", "loss_scale": "16", "train_wall": "59", "gb_free": "23.3", "wall": "42036"} +[2022-07-30 22:31:26,867][train_inner][INFO] - {"epoch": 3, "update": 2.736, "loss": "2.407", "ppl": "5.3", "wps": "397420", "ups": "3.35", "wpb": "118635", "bsz": "256", "num_updates": "140800", "lr": "8.67879e-05", "gnorm": "0.921", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "42095"} +[2022-07-30 22:32:26,187][train_inner][INFO] - {"epoch": 3, "update": 2.74, "loss": "2.405", "ppl": "5.3", "wps": "399990", "ups": "3.37", "wpb": "118635", "bsz": "256", "num_updates": "141000", "lr": "8.67677e-05", "gnorm": "0.92", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "42155"} +[2022-07-30 22:33:25,717][train_inner][INFO] - {"epoch": 3, "update": 2.743, "loss": "2.41", "ppl": "5.31", "wps": "397708", "ups": "3.36", "wpb": "118377", "bsz": "256", "num_updates": "141200", "lr": "8.67475e-05", "gnorm": "0.921", "loss_scale": "16", "train_wall": "59", "gb_free": "23.8", "wall": "42214"} +[2022-07-30 22:34:25,169][train_inner][INFO] - {"epoch": 3, "update": 2.747, "loss": "2.409", "ppl": "5.31", "wps": "398355", "ups": "3.36", "wpb": "118415", "bsz": "256", "num_updates": "141400", "lr": "8.67273e-05", "gnorm": "0.921", "loss_scale": "16", "train_wall": "59", "gb_free": "24", "wall": "42274"} +[2022-07-30 22:35:24,575][train_inner][INFO] - {"epoch": 3, "update": 2.751, "loss": "2.409", "ppl": "5.31", "wps": "396849", "ups": "3.37", "wpb": "117876", "bsz": "256", "num_updates": "141600", "lr": "8.67071e-05", "gnorm": "0.926", "loss_scale": "16", "train_wall": "59", "gb_free": "27.7", "wall": "42333"} +[2022-07-30 22:36:23,907][train_inner][INFO] - {"epoch": 3, "update": 2.755, "loss": "2.4", "ppl": "5.28", "wps": "400579", "ups": "3.37", "wpb": "118833", "bsz": "256", "num_updates": "141800", "lr": "8.66869e-05", "gnorm": "0.92", "loss_scale": "16", "train_wall": "59", "gb_free": "24.9", "wall": "42392"} +[2022-07-30 22:37:23,482][train_inner][INFO] - {"epoch": 3, "update": 2.759, "loss": "2.4", "ppl": "5.28", "wps": "396719", "ups": "3.36", "wpb": "118173", "bsz": "256", "num_updates": "142000", "lr": "8.66667e-05", "gnorm": "0.92", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "42452"} +[2022-07-30 22:38:24,307][train_inner][INFO] - {"epoch": 3, "update": 2.763, "loss": "2.398", "ppl": "5.27", "wps": "390740", "ups": "3.29", "wpb": "118832", "bsz": "256", "num_updates": "142200", "lr": "8.66465e-05", "gnorm": "0.919", "loss_scale": "32", "train_wall": "60", "gb_free": "25.3", "wall": "42513"} +[2022-07-30 22:39:23,825][train_inner][INFO] - {"epoch": 3, "update": 2.767, "loss": "2.403", "ppl": "5.29", "wps": "398600", "ups": "3.36", "wpb": "118618", "bsz": "256", "num_updates": "142400", "lr": "8.66263e-05", "gnorm": "0.919", "loss_scale": "32", "train_wall": "59", "gb_free": "23.3", "wall": "42572"} +[2022-07-30 22:40:23,349][train_inner][INFO] - {"epoch": 3, "update": 2.771, "loss": "2.408", "ppl": "5.31", "wps": "397389", "ups": "3.36", "wpb": "118272", "bsz": "256", "num_updates": "142600", "lr": "8.66061e-05", "gnorm": "0.922", "loss_scale": "32", "train_wall": "59", "gb_free": "25.1", "wall": "42632"} +[2022-07-30 22:41:22,736][train_inner][INFO] - {"epoch": 3, "update": 2.775, "loss": "2.402", "ppl": "5.28", "wps": "397249", "ups": "3.37", "wpb": "117955", "bsz": "256", "num_updates": "142800", "lr": "8.65859e-05", "gnorm": "0.923", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "42691"} +[2022-07-30 22:42:22,340][train_inner][INFO] - {"epoch": 3, "update": 2.778, "loss": "2.41", "ppl": "5.31", "wps": "396669", "ups": "3.36", "wpb": "118215", "bsz": "256", "num_updates": "143000", "lr": "8.65657e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "42751"} +[2022-07-30 22:43:21,596][train_inner][INFO] - {"epoch": 3, "update": 2.782, "loss": "2.402", "ppl": "5.28", "wps": "399829", "ups": "3.38", "wpb": "118461", "bsz": "256", "num_updates": "143200", "lr": "8.65455e-05", "gnorm": "0.918", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "42810"} +[2022-07-30 22:44:21,106][train_inner][INFO] - {"epoch": 3, "update": 2.786, "loss": "2.404", "ppl": "5.29", "wps": "395598", "ups": "3.36", "wpb": "117709", "bsz": "256", "num_updates": "143400", "lr": "8.65253e-05", "gnorm": "0.924", "loss_scale": "32", "train_wall": "59", "gb_free": "24.8", "wall": "42870"} +[2022-07-30 22:45:20,378][train_inner][INFO] - {"epoch": 3, "update": 2.79, "loss": "2.394", "ppl": "5.26", "wps": "401966", "ups": "3.37", "wpb": "119126", "bsz": "256", "num_updates": "143600", "lr": "8.65051e-05", "gnorm": "0.92", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "42929"} +[2022-07-30 22:46:19,736][train_inner][INFO] - {"epoch": 3, "update": 2.794, "loss": "2.404", "ppl": "5.29", "wps": "397889", "ups": "3.37", "wpb": "118088", "bsz": "256", "num_updates": "143800", "lr": "8.64848e-05", "gnorm": "0.929", "loss_scale": "32", "train_wall": "59", "gb_free": "24.6", "wall": "42988"} +[2022-07-30 22:47:19,539][train_inner][INFO] - {"epoch": 3, "update": 2.798, "loss": "2.407", "ppl": "5.3", "wps": "395998", "ups": "3.34", "wpb": "118410", "bsz": "256", "num_updates": "144000", "lr": "8.64646e-05", "gnorm": "0.921", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "43048"} +[2022-07-30 22:48:19,113][train_inner][INFO] - {"epoch": 3, "update": 2.802, "loss": "2.398", "ppl": "5.27", "wps": "396969", "ups": "3.36", "wpb": "118244", "bsz": "256", "num_updates": "144200", "lr": "8.64444e-05", "gnorm": "0.924", "loss_scale": "64", "train_wall": "59", "gb_free": "25.6", "wall": "43108"} +[2022-07-30 22:49:17,734][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 22:49:18,932][train_inner][INFO] - {"epoch": 3, "update": 2.806, "loss": "2.403", "ppl": "5.29", "wps": "394520", "ups": "3.34", "wpb": "117997", "bsz": "256", "num_updates": "144400", "lr": "8.64242e-05", "gnorm": "0.923", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "43167"} +[2022-07-30 22:50:18,602][train_inner][INFO] - {"epoch": 3, "update": 2.81, "loss": "2.4", "ppl": "5.28", "wps": "396856", "ups": "3.35", "wpb": "118403", "bsz": "256", "num_updates": "144600", "lr": "8.6404e-05", "gnorm": "0.923", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "43227"} +[2022-07-30 22:51:18,004][train_inner][INFO] - {"epoch": 3, "update": 2.813, "loss": "2.398", "ppl": "5.27", "wps": "397728", "ups": "3.37", "wpb": "118127", "bsz": "256", "num_updates": "144800", "lr": "8.63838e-05", "gnorm": "0.922", "loss_scale": "32", "train_wall": "59", "gb_free": "27.8", "wall": "43286"} +[2022-07-30 22:52:17,380][train_inner][INFO] - {"epoch": 3, "update": 2.817, "loss": "2.395", "ppl": "5.26", "wps": "396651", "ups": "3.37", "wpb": "117757", "bsz": "256", "num_updates": "145000", "lr": "8.63636e-05", "gnorm": "0.924", "loss_scale": "32", "train_wall": "59", "gb_free": "25.5", "wall": "43346"} +[2022-07-30 22:53:16,818][train_inner][INFO] - {"epoch": 3, "update": 2.821, "loss": "2.396", "ppl": "5.26", "wps": "397140", "ups": "3.36", "wpb": "118025", "bsz": "256", "num_updates": "145200", "lr": "8.63434e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "43405"} +[2022-07-30 22:54:16,520][train_inner][INFO] - {"epoch": 3, "update": 2.825, "loss": "2.39", "ppl": "5.24", "wps": "398147", "ups": "3.35", "wpb": "118850", "bsz": "256", "num_updates": "145400", "lr": "8.63232e-05", "gnorm": "0.921", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "43465"} +[2022-07-30 22:55:16,385][train_inner][INFO] - {"epoch": 3, "update": 2.829, "loss": "2.395", "ppl": "5.26", "wps": "396089", "ups": "3.34", "wpb": "118558", "bsz": "256", "num_updates": "145600", "lr": "8.6303e-05", "gnorm": "0.925", "loss_scale": "32", "train_wall": "60", "gb_free": "22.1", "wall": "43525"} +[2022-07-30 22:56:16,299][train_inner][INFO] - {"epoch": 3, "update": 2.833, "loss": "2.398", "ppl": "5.27", "wps": "394284", "ups": "3.34", "wpb": "118116", "bsz": "256", "num_updates": "145800", "lr": "8.62828e-05", "gnorm": "0.924", "loss_scale": "32", "train_wall": "60", "gb_free": "21.6", "wall": "43585"} +[2022-07-30 22:57:15,720][train_inner][INFO] - {"epoch": 3, "update": 2.837, "loss": "2.398", "ppl": "5.27", "wps": "398557", "ups": "3.37", "wpb": "118412", "bsz": "256", "num_updates": "146000", "lr": "8.62626e-05", "gnorm": "0.923", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "43644"} +[2022-07-30 22:58:15,473][train_inner][INFO] - {"epoch": 3, "update": 2.841, "loss": "2.399", "ppl": "5.27", "wps": "395572", "ups": "3.35", "wpb": "118183", "bsz": "256", "num_updates": "146200", "lr": "8.62424e-05", "gnorm": "0.925", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "43704"} +[2022-07-30 22:59:16,386][train_inner][INFO] - {"epoch": 3, "update": 2.844, "loss": "2.4", "ppl": "5.28", "wps": "388507", "ups": "3.28", "wpb": "118325", "bsz": "256", "num_updates": "146400", "lr": "8.62222e-05", "gnorm": "0.924", "loss_scale": "32", "train_wall": "61", "gb_free": "22.1", "wall": "43765"} +[2022-07-30 23:00:16,135][train_inner][INFO] - {"epoch": 3, "update": 2.848, "loss": "2.4", "ppl": "5.28", "wps": "394763", "ups": "3.35", "wpb": "117933", "bsz": "256", "num_updates": "146600", "lr": "8.6202e-05", "gnorm": "0.925", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "43825"} +[2022-07-30 23:01:15,411][train_inner][INFO] - {"epoch": 3, "update": 2.852, "loss": "2.4", "ppl": "5.28", "wps": "400651", "ups": "3.37", "wpb": "118742", "bsz": "256", "num_updates": "146800", "lr": "8.61818e-05", "gnorm": "0.922", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "43884"} +[2022-07-30 23:01:48,464][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 23:02:15,120][train_inner][INFO] - {"epoch": 3, "update": 2.856, "loss": "2.39", "ppl": "5.24", "wps": "395968", "ups": "3.35", "wpb": "118214", "bsz": "256", "num_updates": "147000", "lr": "8.61616e-05", "gnorm": "0.923", "loss_scale": "32", "train_wall": "59", "gb_free": "26.6", "wall": "43944"} +[2022-07-30 23:03:14,211][train_inner][INFO] - {"epoch": 3, "update": 2.86, "loss": "2.397", "ppl": "5.27", "wps": "399522", "ups": "3.38", "wpb": "118041", "bsz": "256", "num_updates": "147200", "lr": "8.61414e-05", "gnorm": "0.931", "loss_scale": "32", "train_wall": "59", "gb_free": "26.8", "wall": "44003"} +[2022-07-30 23:04:13,678][train_inner][INFO] - {"epoch": 3, "update": 2.864, "loss": "2.4", "ppl": "5.28", "wps": "397034", "ups": "3.36", "wpb": "118051", "bsz": "256", "num_updates": "147400", "lr": "8.61212e-05", "gnorm": "0.928", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "44062"} +[2022-07-30 23:05:13,201][train_inner][INFO] - {"epoch": 3, "update": 2.868, "loss": "2.392", "ppl": "5.25", "wps": "398115", "ups": "3.36", "wpb": "118483", "bsz": "256", "num_updates": "147600", "lr": "8.6101e-05", "gnorm": "0.923", "loss_scale": "32", "train_wall": "59", "gb_free": "26.1", "wall": "44122"} +[2022-07-30 23:06:12,493][train_inner][INFO] - {"epoch": 3, "update": 2.872, "loss": "2.392", "ppl": "5.25", "wps": "398676", "ups": "3.37", "wpb": "118191", "bsz": "256", "num_updates": "147800", "lr": "8.60808e-05", "gnorm": "0.924", "loss_scale": "32", "train_wall": "59", "gb_free": "26.4", "wall": "44181"} +[2022-07-30 23:07:11,856][train_inner][INFO] - {"epoch": 3, "update": 2.876, "loss": "2.391", "ppl": "5.25", "wps": "398202", "ups": "3.37", "wpb": "118192", "bsz": "256", "num_updates": "148000", "lr": "8.60606e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "44240"} +[2022-07-30 23:08:12,569][train_inner][INFO] - {"epoch": 3, "update": 2.879, "loss": "2.384", "ppl": "5.22", "wps": "390488", "ups": "3.29", "wpb": "118539", "bsz": "256", "num_updates": "148200", "lr": "8.60404e-05", "gnorm": "0.92", "loss_scale": "32", "train_wall": "60", "gb_free": "25.3", "wall": "44301"} +[2022-07-30 23:09:11,713][train_inner][INFO] - {"epoch": 3, "update": 2.883, "loss": "2.397", "ppl": "5.27", "wps": "400434", "ups": "3.38", "wpb": "118416", "bsz": "256", "num_updates": "148400", "lr": "8.60202e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "44360"} +[2022-07-30 23:10:11,395][train_inner][INFO] - {"epoch": 3, "update": 2.887, "loss": "2.389", "ppl": "5.24", "wps": "395723", "ups": "3.35", "wpb": "118086", "bsz": "256", "num_updates": "148600", "lr": "8.6e-05", "gnorm": "0.925", "loss_scale": "32", "train_wall": "59", "gb_free": "28.5", "wall": "44420"} +[2022-07-30 23:11:10,693][train_inner][INFO] - {"epoch": 3, "update": 2.891, "loss": "2.386", "ppl": "5.23", "wps": "399374", "ups": "3.37", "wpb": "118410", "bsz": "256", "num_updates": "148800", "lr": "8.59798e-05", "gnorm": "0.923", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "44479"} +[2022-07-30 23:12:10,346][train_inner][INFO] - {"epoch": 3, "update": 2.895, "loss": "2.389", "ppl": "5.24", "wps": "397492", "ups": "3.35", "wpb": "118556", "bsz": "256", "num_updates": "149000", "lr": "8.59596e-05", "gnorm": "0.923", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "44539"} +[2022-07-30 23:13:10,113][train_inner][INFO] - {"epoch": 3, "update": 2.899, "loss": "2.391", "ppl": "5.25", "wps": "397090", "ups": "3.35", "wpb": "118664", "bsz": "256", "num_updates": "149200", "lr": "8.59394e-05", "gnorm": "0.923", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "44599"} +[2022-07-30 23:14:09,380][train_inner][INFO] - {"epoch": 3, "update": 2.903, "loss": "2.391", "ppl": "5.25", "wps": "399077", "ups": "3.37", "wpb": "118261", "bsz": "256", "num_updates": "149400", "lr": "8.59192e-05", "gnorm": "0.924", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "44658"} +[2022-07-30 23:15:08,703][train_inner][INFO] - {"epoch": 3, "update": 2.907, "loss": "2.391", "ppl": "5.25", "wps": "400309", "ups": "3.37", "wpb": "118737", "bsz": "256", "num_updates": "149600", "lr": "8.5899e-05", "gnorm": "0.926", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "44717"} +[2022-07-30 23:16:08,438][train_inner][INFO] - {"epoch": 3, "update": 2.911, "loss": "2.387", "ppl": "5.23", "wps": "396114", "ups": "3.35", "wpb": "118307", "bsz": "256", "num_updates": "149800", "lr": "8.58788e-05", "gnorm": "0.924", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "44777"} +[2022-07-30 23:17:07,802][train_inner][INFO] - {"epoch": 3, "update": 2.914, "loss": "2.393", "ppl": "5.25", "wps": "397433", "ups": "3.37", "wpb": "117966", "bsz": "256", "num_updates": "150000", "lr": "8.58586e-05", "gnorm": "0.929", "loss_scale": "64", "train_wall": "59", "gb_free": "27.2", "wall": "44836"} +[2022-07-30 23:17:07,803][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-30 23:17:30,530][valid][INFO] - {"epoch": 3, "valid_loss": "2.275", "valid_ppl": "4.84", "valid_wps": "1.59966e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "150000", "valid_best_loss": "2.275"} +[2022-07-30 23:17:30,533][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 150000 updates +[2022-07-30 23:17:30,534][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_3_150000.pt +[2022-07-30 23:17:36,581][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_3_150000.pt +[2022-07-30 23:18:08,791][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_3_150000.pt (epoch 3 @ 150000 updates, score 2.275) (writing took 38.25741279311478 seconds) +[2022-07-30 23:19:07,796][train_inner][INFO] - {"epoch": 3, "update": 2.918, "loss": "2.384", "ppl": "5.22", "wps": "196904", "ups": "1.67", "wpb": "118136", "bsz": "256", "num_updates": "150200", "lr": "8.58384e-05", "gnorm": "0.926", "loss_scale": "64", "train_wall": "59", "gb_free": "24.5", "wall": "44956"} +[2022-07-30 23:20:07,442][train_inner][INFO] - {"epoch": 3, "update": 2.922, "loss": "2.384", "ppl": "5.22", "wps": "396711", "ups": "3.35", "wpb": "118311", "bsz": "256", "num_updates": "150400", "lr": "8.58182e-05", "gnorm": "0.926", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "45016"} +[2022-07-30 23:21:06,548][train_inner][INFO] - {"epoch": 3, "update": 2.926, "loss": "2.394", "ppl": "5.25", "wps": "397960", "ups": "3.38", "wpb": "117609", "bsz": "256", "num_updates": "150600", "lr": "8.5798e-05", "gnorm": "0.931", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "45075"} +[2022-07-30 23:22:05,758][train_inner][INFO] - {"epoch": 3, "update": 2.93, "loss": "2.386", "ppl": "5.23", "wps": "400422", "ups": "3.38", "wpb": "118542", "bsz": "256", "num_updates": "150800", "lr": "8.57778e-05", "gnorm": "0.926", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "45134"} +[2022-07-30 23:23:05,542][train_inner][INFO] - {"epoch": 3, "update": 2.934, "loss": "2.383", "ppl": "5.22", "wps": "396706", "ups": "3.35", "wpb": "118583", "bsz": "256", "num_updates": "151000", "lr": "8.57576e-05", "gnorm": "0.924", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "45194"} +[2022-07-30 23:24:04,914][train_inner][INFO] - {"epoch": 3, "update": 2.938, "loss": "2.389", "ppl": "5.24", "wps": "397651", "ups": "3.37", "wpb": "118046", "bsz": "256", "num_updates": "151200", "lr": "8.57374e-05", "gnorm": "0.928", "loss_scale": "128", "train_wall": "59", "gb_free": "33.5", "wall": "45253"} +[2022-07-30 23:25:04,341][train_inner][INFO] - {"epoch": 3, "update": 2.942, "loss": "2.384", "ppl": "5.22", "wps": "397338", "ups": "3.37", "wpb": "118062", "bsz": "256", "num_updates": "151400", "lr": "8.57172e-05", "gnorm": "0.929", "loss_scale": "128", "train_wall": "59", "gb_free": "22.9", "wall": "45313"} +[2022-07-30 23:25:49,242][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-30 23:26:04,196][train_inner][INFO] - {"epoch": 3, "update": 2.946, "loss": "2.38", "ppl": "5.21", "wps": "397168", "ups": "3.34", "wpb": "118861", "bsz": "256", "num_updates": "151600", "lr": "8.5697e-05", "gnorm": "0.924", "loss_scale": "64", "train_wall": "60", "gb_free": "24.3", "wall": "45373"} +[2022-07-30 23:27:03,568][train_inner][INFO] - {"epoch": 3, "update": 2.949, "loss": "2.387", "ppl": "5.23", "wps": "398283", "ups": "3.37", "wpb": "118233", "bsz": "256", "num_updates": "151800", "lr": "8.56768e-05", "gnorm": "0.928", "loss_scale": "64", "train_wall": "59", "gb_free": "32.6", "wall": "45432"} +[2022-07-30 23:28:02,910][train_inner][INFO] - {"epoch": 3, "update": 2.953, "loss": "2.385", "ppl": "5.22", "wps": "398124", "ups": "3.37", "wpb": "118127", "bsz": "256", "num_updates": "152000", "lr": "8.56566e-05", "gnorm": "0.926", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "45491"} +[2022-07-30 23:29:02,521][train_inner][INFO] - {"epoch": 3, "update": 2.957, "loss": "2.379", "ppl": "5.2", "wps": "399002", "ups": "3.36", "wpb": "118923", "bsz": "256", "num_updates": "152200", "lr": "8.56364e-05", "gnorm": "0.923", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "45551"} +[2022-07-30 23:29:45,154][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 23:30:02,366][train_inner][INFO] - {"epoch": 3, "update": 2.961, "loss": "2.38", "ppl": "5.21", "wps": "395723", "ups": "3.34", "wpb": "118410", "bsz": "256", "num_updates": "152400", "lr": "8.56162e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "59", "gb_free": "26.6", "wall": "45611"} +[2022-07-30 23:31:01,830][train_inner][INFO] - {"epoch": 3, "update": 2.965, "loss": "2.382", "ppl": "5.21", "wps": "399395", "ups": "3.36", "wpb": "118748", "bsz": "256", "num_updates": "152600", "lr": "8.5596e-05", "gnorm": "0.925", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "45670"} +[2022-07-30 23:32:01,446][train_inner][INFO] - {"epoch": 3, "update": 2.969, "loss": "2.39", "ppl": "5.24", "wps": "395458", "ups": "3.35", "wpb": "117877", "bsz": "256", "num_updates": "152800", "lr": "8.55758e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "26.5", "wall": "45730"} +[2022-07-30 23:33:00,704][train_inner][INFO] - {"epoch": 3, "update": 2.973, "loss": "2.387", "ppl": "5.23", "wps": "398091", "ups": "3.38", "wpb": "117948", "bsz": "256", "num_updates": "153000", "lr": "8.55556e-05", "gnorm": "0.931", "loss_scale": "32", "train_wall": "59", "gb_free": "24.2", "wall": "45789"} +[2022-07-30 23:34:00,416][train_inner][INFO] - {"epoch": 3, "update": 2.977, "loss": "2.381", "ppl": "5.21", "wps": "396148", "ups": "3.35", "wpb": "118273", "bsz": "256", "num_updates": "153200", "lr": "8.55354e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "45849"} +[2022-07-30 23:34:59,961][train_inner][INFO] - {"epoch": 3, "update": 2.981, "loss": "2.378", "ppl": "5.2", "wps": "397949", "ups": "3.36", "wpb": "118480", "bsz": "256", "num_updates": "153400", "lr": "8.55152e-05", "gnorm": "0.928", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "45908"} +[2022-07-30 23:35:59,432][train_inner][INFO] - {"epoch": 3, "update": 2.984, "loss": "2.381", "ppl": "5.21", "wps": "397474", "ups": "3.36", "wpb": "118189", "bsz": "256", "num_updates": "153600", "lr": "8.54949e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "31.8", "wall": "45968"} +[2022-07-30 23:37:00,030][train_inner][INFO] - {"epoch": 3, "update": 2.988, "loss": "2.383", "ppl": "5.22", "wps": "389642", "ups": "3.3", "wpb": "118055", "bsz": "256", "num_updates": "153800", "lr": "8.54747e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "60", "gb_free": "24.2", "wall": "46028"} +[2022-07-30 23:37:59,225][train_inner][INFO] - {"epoch": 3, "update": 2.992, "loss": "2.39", "ppl": "5.24", "wps": "398262", "ups": "3.38", "wpb": "117876", "bsz": "256", "num_updates": "154000", "lr": "8.54545e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "46088"} +[2022-07-30 23:38:58,269][train_inner][INFO] - {"epoch": 3, "update": 2.996, "loss": "2.38", "ppl": "5.2", "wps": "399666", "ups": "3.39", "wpb": "117989", "bsz": "256", "num_updates": "154200", "lr": "8.54343e-05", "gnorm": "0.929", "loss_scale": "32", "train_wall": "59", "gb_free": "25.6", "wall": "46147"} +[2022-07-30 23:39:57,270][train_inner][INFO] - {"epoch": 3, "update": 3.0, "loss": "2.375", "ppl": "5.19", "wps": "400753", "ups": "3.39", "wpb": "118223", "bsz": "256", "num_updates": "154400", "lr": "8.54141e-05", "gnorm": "0.925", "loss_scale": "64", "train_wall": "59", "gb_free": "23.3", "wall": "46206"} +[2022-07-30 23:39:58,432][fairseq_cli.train][INFO] - end of epoch 3 (average epoch stats below) +[2022-07-30 23:39:58,433][train][INFO] - {"epoch": 3, "train_loss": "2.437", "train_ppl": "5.41", "train_wps": "395272", "train_ups": "3.34", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "154404", "train_lr": "8.54137e-05", "train_gnorm": "0.912", "train_loss_scale": "64", "train_train_wall": "15243", "train_gb_free": "21.3", "train_wall": "46207"} +[2022-07-30 23:39:58,544][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-30 23:39:58,547][fairseq.trainer][INFO] - begin training epoch 4 +[2022-07-30 23:39:58,547][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-30 23:41:07,769][train_inner][INFO] - {"epoch": 4, "update": 3.004, "loss": "2.378", "ppl": "5.2", "wps": "334782", "ups": "2.84", "wpb": "118008", "bsz": "255.4", "num_updates": "154600", "lr": "8.53939e-05", "gnorm": "0.93", "loss_scale": "64", "train_wall": "60", "gb_free": "25.2", "wall": "46276"} +[2022-07-30 23:42:07,535][train_inner][INFO] - {"epoch": 4, "update": 3.008, "loss": "2.379", "ppl": "5.2", "wps": "395277", "ups": "3.35", "wpb": "118119", "bsz": "256", "num_updates": "154800", "lr": "8.53737e-05", "gnorm": "0.93", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "46336"} +[2022-07-30 23:43:07,059][train_inner][INFO] - {"epoch": 4, "update": 3.012, "loss": "2.373", "ppl": "5.18", "wps": "396998", "ups": "3.36", "wpb": "118155", "bsz": "256", "num_updates": "155000", "lr": "8.53535e-05", "gnorm": "0.93", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "46395"} +[2022-07-30 23:43:38,020][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-30 23:44:06,977][train_inner][INFO] - {"epoch": 4, "update": 3.015, "loss": "2.376", "ppl": "5.19", "wps": "395294", "ups": "3.34", "wpb": "118424", "bsz": "256", "num_updates": "155200", "lr": "8.53333e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "60", "gb_free": "21.6", "wall": "46455"} +[2022-07-30 23:45:06,884][train_inner][INFO] - {"epoch": 4, "update": 3.019, "loss": "2.371", "ppl": "5.17", "wps": "395932", "ups": "3.34", "wpb": "118595", "bsz": "256", "num_updates": "155400", "lr": "8.53131e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "60", "gb_free": "22.2", "wall": "46515"} +[2022-07-30 23:46:06,359][train_inner][INFO] - {"epoch": 4, "update": 3.023, "loss": "2.373", "ppl": "5.18", "wps": "398228", "ups": "3.36", "wpb": "118423", "bsz": "256", "num_updates": "155600", "lr": "8.52929e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "46575"} +[2022-07-30 23:47:05,793][train_inner][INFO] - {"epoch": 4, "update": 3.027, "loss": "2.378", "ppl": "5.2", "wps": "397540", "ups": "3.37", "wpb": "118137", "bsz": "256", "num_updates": "155800", "lr": "8.52727e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "46634"} +[2022-07-30 23:48:05,520][train_inner][INFO] - {"epoch": 4, "update": 3.031, "loss": "2.373", "ppl": "5.18", "wps": "398129", "ups": "3.35", "wpb": "118894", "bsz": "256", "num_updates": "156000", "lr": "8.52525e-05", "gnorm": "0.931", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "46694"} +[2022-07-30 23:49:05,079][train_inner][INFO] - {"epoch": 4, "update": 3.035, "loss": "2.367", "ppl": "5.16", "wps": "398495", "ups": "3.36", "wpb": "118668", "bsz": "256", "num_updates": "156200", "lr": "8.52323e-05", "gnorm": "0.928", "loss_scale": "32", "train_wall": "59", "gb_free": "26.7", "wall": "46754"} +[2022-07-30 23:50:04,784][train_inner][INFO] - {"epoch": 4, "update": 3.039, "loss": "2.375", "ppl": "5.19", "wps": "395421", "ups": "3.35", "wpb": "118044", "bsz": "256", "num_updates": "156400", "lr": "8.52121e-05", "gnorm": "0.929", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "46813"} +[2022-07-30 23:51:04,147][train_inner][INFO] - {"epoch": 4, "update": 3.043, "loss": "2.369", "ppl": "5.17", "wps": "401851", "ups": "3.37", "wpb": "119274", "bsz": "256", "num_updates": "156600", "lr": "8.51919e-05", "gnorm": "0.926", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "46873"} +[2022-07-30 23:52:03,792][train_inner][INFO] - {"epoch": 4, "update": 3.047, "loss": "2.375", "ppl": "5.19", "wps": "393956", "ups": "3.35", "wpb": "117486", "bsz": "256", "num_updates": "156800", "lr": "8.51717e-05", "gnorm": "0.933", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "46932"} +[2022-07-30 23:53:03,352][train_inner][INFO] - {"epoch": 4, "update": 3.05, "loss": "2.37", "ppl": "5.17", "wps": "396710", "ups": "3.36", "wpb": "118140", "bsz": "256", "num_updates": "157000", "lr": "8.51515e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "46992"} +[2022-07-30 23:54:03,216][train_inner][INFO] - {"epoch": 4, "update": 3.054, "loss": "2.374", "ppl": "5.18", "wps": "398638", "ups": "3.34", "wpb": "119320", "bsz": "256", "num_updates": "157200", "lr": "8.51313e-05", "gnorm": "0.926", "loss_scale": "64", "train_wall": "60", "gb_free": "21.6", "wall": "47052"} +[2022-07-30 23:55:02,647][train_inner][INFO] - {"epoch": 4, "update": 3.058, "loss": "2.368", "ppl": "5.16", "wps": "398954", "ups": "3.37", "wpb": "118550", "bsz": "256", "num_updates": "157400", "lr": "8.51111e-05", "gnorm": "0.931", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "47111"} +[2022-07-30 23:56:02,055][train_inner][INFO] - {"epoch": 4, "update": 3.062, "loss": "2.368", "ppl": "5.16", "wps": "399351", "ups": "3.37", "wpb": "118622", "bsz": "256", "num_updates": "157600", "lr": "8.50909e-05", "gnorm": "0.93", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "47170"} +[2022-07-30 23:57:01,803][train_inner][INFO] - {"epoch": 4, "update": 3.066, "loss": "2.373", "ppl": "5.18", "wps": "397129", "ups": "3.35", "wpb": "118638", "bsz": "256", "num_updates": "157800", "lr": "8.50707e-05", "gnorm": "0.929", "loss_scale": "64", "train_wall": "59", "gb_free": "24.2", "wall": "47230"} +[2022-07-30 23:58:01,772][train_inner][INFO] - {"epoch": 4, "update": 3.07, "loss": "2.371", "ppl": "5.17", "wps": "394444", "ups": "3.34", "wpb": "118270", "bsz": "256", "num_updates": "158000", "lr": "8.50505e-05", "gnorm": "0.929", "loss_scale": "64", "train_wall": "60", "gb_free": "22.2", "wall": "47290"} +[2022-07-30 23:59:01,517][train_inner][INFO] - {"epoch": 4, "update": 3.074, "loss": "2.376", "ppl": "5.19", "wps": "393347", "ups": "3.35", "wpb": "117502", "bsz": "256", "num_updates": "158200", "lr": "8.50303e-05", "gnorm": "0.935", "loss_scale": "64", "train_wall": "59", "gb_free": "24.7", "wall": "47350"} +[2022-07-31 00:00:00,927][train_inner][INFO] - {"epoch": 4, "update": 3.078, "loss": "2.378", "ppl": "5.2", "wps": "397820", "ups": "3.37", "wpb": "118172", "bsz": "256", "num_updates": "158400", "lr": "8.50101e-05", "gnorm": "0.931", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "47409"} +[2022-07-31 00:01:00,568][train_inner][INFO] - {"epoch": 4, "update": 3.082, "loss": "2.375", "ppl": "5.19", "wps": "395286", "ups": "3.35", "wpb": "117876", "bsz": "256", "num_updates": "158600", "lr": "8.49899e-05", "gnorm": "0.934", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "47469"} +[2022-07-31 00:01:59,894][train_inner][INFO] - {"epoch": 4, "update": 3.085, "loss": "2.371", "ppl": "5.17", "wps": "398878", "ups": "3.37", "wpb": "118318", "bsz": "256", "num_updates": "158800", "lr": "8.49697e-05", "gnorm": "0.928", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "47528"} +[2022-07-31 00:02:59,390][train_inner][INFO] - {"epoch": 4, "update": 3.089, "loss": "2.366", "ppl": "5.15", "wps": "398490", "ups": "3.36", "wpb": "118542", "bsz": "256", "num_updates": "159000", "lr": "8.49495e-05", "gnorm": "0.928", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "47588"} +[2022-07-31 00:03:58,879][train_inner][INFO] - {"epoch": 4, "update": 3.093, "loss": "2.369", "ppl": "5.17", "wps": "398865", "ups": "3.36", "wpb": "118640", "bsz": "256", "num_updates": "159200", "lr": "8.49293e-05", "gnorm": "0.928", "loss_scale": "128", "train_wall": "59", "gb_free": "22.5", "wall": "47647"} +[2022-07-31 00:04:58,260][train_inner][INFO] - {"epoch": 4, "update": 3.097, "loss": "2.374", "ppl": "5.18", "wps": "396608", "ups": "3.37", "wpb": "117755", "bsz": "256", "num_updates": "159400", "lr": "8.49091e-05", "gnorm": "0.934", "loss_scale": "128", "train_wall": "59", "gb_free": "22.6", "wall": "47707"} +[2022-07-31 00:05:07,138][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-31 00:05:57,890][train_inner][INFO] - {"epoch": 4, "update": 3.101, "loss": "2.37", "ppl": "5.17", "wps": "396499", "ups": "3.35", "wpb": "118216", "bsz": "256", "num_updates": "159600", "lr": "8.48889e-05", "gnorm": "0.93", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "47766"} +[2022-07-31 00:06:57,452][train_inner][INFO] - {"epoch": 4, "update": 3.105, "loss": "2.365", "ppl": "5.15", "wps": "397470", "ups": "3.36", "wpb": "118368", "bsz": "256", "num_updates": "159800", "lr": "8.48687e-05", "gnorm": "0.931", "loss_scale": "64", "train_wall": "59", "gb_free": "28.1", "wall": "47826"} +[2022-07-31 00:07:56,839][train_inner][INFO] - {"epoch": 4, "update": 3.109, "loss": "2.365", "ppl": "5.15", "wps": "399267", "ups": "3.37", "wpb": "118556", "bsz": "256", "num_updates": "160000", "lr": "8.48485e-05", "gnorm": "0.929", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "47885"} +[2022-07-31 00:08:56,216][train_inner][INFO] - {"epoch": 4, "update": 3.113, "loss": "2.37", "ppl": "5.17", "wps": "397336", "ups": "3.37", "wpb": "117964", "bsz": "256", "num_updates": "160200", "lr": "8.48283e-05", "gnorm": "0.934", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "47945"} +[2022-07-31 00:09:01,518][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 00:09:55,852][train_inner][INFO] - {"epoch": 4, "update": 3.117, "loss": "2.363", "ppl": "5.15", "wps": "396286", "ups": "3.35", "wpb": "118164", "bsz": "256", "num_updates": "160400", "lr": "8.48081e-05", "gnorm": "0.932", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "48004"} +[2022-07-31 00:10:54,887][train_inner][INFO] - {"epoch": 4, "update": 3.12, "loss": "2.37", "ppl": "5.17", "wps": "400230", "ups": "3.39", "wpb": "118136", "bsz": "256", "num_updates": "160600", "lr": "8.47879e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "48063"} +[2022-07-31 00:11:54,602][train_inner][INFO] - {"epoch": 4, "update": 3.124, "loss": "2.367", "ppl": "5.16", "wps": "395876", "ups": "3.35", "wpb": "118197", "bsz": "256", "num_updates": "160800", "lr": "8.47677e-05", "gnorm": "0.932", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "48123"} +[2022-07-31 00:12:53,981][train_inner][INFO] - {"epoch": 4, "update": 3.128, "loss": "2.372", "ppl": "5.18", "wps": "397776", "ups": "3.37", "wpb": "118098", "bsz": "256", "num_updates": "161000", "lr": "8.47475e-05", "gnorm": "0.932", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "48182"} +[2022-07-31 00:13:53,480][train_inner][INFO] - {"epoch": 4, "update": 3.132, "loss": "2.364", "ppl": "5.15", "wps": "398091", "ups": "3.36", "wpb": "118428", "bsz": "256", "num_updates": "161200", "lr": "8.47273e-05", "gnorm": "0.931", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "48242"} +[2022-07-31 00:14:53,075][train_inner][INFO] - {"epoch": 4, "update": 3.136, "loss": "2.368", "ppl": "5.16", "wps": "396310", "ups": "3.36", "wpb": "118090", "bsz": "256", "num_updates": "161400", "lr": "8.47071e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "48302"} +[2022-07-31 00:15:52,588][train_inner][INFO] - {"epoch": 4, "update": 3.14, "loss": "2.367", "ppl": "5.16", "wps": "398215", "ups": "3.36", "wpb": "118494", "bsz": "256", "num_updates": "161600", "lr": "8.46869e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "48361"} +[2022-07-31 00:16:52,448][train_inner][INFO] - {"epoch": 4, "update": 3.144, "loss": "2.366", "ppl": "5.15", "wps": "397561", "ups": "3.34", "wpb": "118990", "bsz": "256", "num_updates": "161800", "lr": "8.46667e-05", "gnorm": "0.929", "loss_scale": "32", "train_wall": "60", "gb_free": "24.2", "wall": "48421"} +[2022-07-31 00:17:52,170][train_inner][INFO] - {"epoch": 4, "update": 3.148, "loss": "2.362", "ppl": "5.14", "wps": "397185", "ups": "3.35", "wpb": "118601", "bsz": "256", "num_updates": "162000", "lr": "8.46465e-05", "gnorm": "0.931", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "48481"} +[2022-07-31 00:18:51,262][train_inner][INFO] - {"epoch": 4, "update": 3.151, "loss": "2.372", "ppl": "5.18", "wps": "399280", "ups": "3.38", "wpb": "117972", "bsz": "256", "num_updates": "162200", "lr": "8.46263e-05", "gnorm": "0.936", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "48540"} +[2022-07-31 00:19:50,905][train_inner][INFO] - {"epoch": 4, "update": 3.155, "loss": "2.364", "ppl": "5.15", "wps": "397436", "ups": "3.35", "wpb": "118520", "bsz": "256", "num_updates": "162400", "lr": "8.46061e-05", "gnorm": "0.93", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "48599"} +[2022-07-31 00:20:09,341][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 00:20:50,148][train_inner][INFO] - {"epoch": 4, "update": 3.159, "loss": "2.362", "ppl": "5.14", "wps": "400163", "ups": "3.38", "wpb": "118534", "bsz": "256", "num_updates": "162600", "lr": "8.45859e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "25.2", "wall": "48659"} +[2022-07-31 00:21:49,659][train_inner][INFO] - {"epoch": 4, "update": 3.163, "loss": "2.36", "ppl": "5.13", "wps": "397860", "ups": "3.36", "wpb": "118384", "bsz": "256", "num_updates": "162800", "lr": "8.45657e-05", "gnorm": "0.933", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "48718"} +[2022-07-31 00:22:49,069][train_inner][INFO] - {"epoch": 4, "update": 3.167, "loss": "2.36", "ppl": "5.13", "wps": "396844", "ups": "3.37", "wpb": "117882", "bsz": "256", "num_updates": "163000", "lr": "8.45455e-05", "gnorm": "0.933", "loss_scale": "32", "train_wall": "59", "gb_free": "23.8", "wall": "48777"} +[2022-07-31 00:23:48,642][train_inner][INFO] - {"epoch": 4, "update": 3.171, "loss": "2.362", "ppl": "5.14", "wps": "396828", "ups": "3.36", "wpb": "118201", "bsz": "256", "num_updates": "163200", "lr": "8.45253e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "48837"} +[2022-07-31 00:24:48,207][train_inner][INFO] - {"epoch": 4, "update": 3.175, "loss": "2.356", "ppl": "5.12", "wps": "396072", "ups": "3.36", "wpb": "117958", "bsz": "256", "num_updates": "163400", "lr": "8.45051e-05", "gnorm": "0.94", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "48897"} +[2022-07-31 00:25:47,898][train_inner][INFO] - {"epoch": 4, "update": 3.179, "loss": "2.362", "ppl": "5.14", "wps": "396487", "ups": "3.35", "wpb": "118332", "bsz": "256", "num_updates": "163600", "lr": "8.44848e-05", "gnorm": "0.931", "loss_scale": "32", "train_wall": "59", "gb_free": "31.4", "wall": "48956"} +[2022-07-31 00:26:47,504][train_inner][INFO] - {"epoch": 4, "update": 3.183, "loss": "2.36", "ppl": "5.13", "wps": "395878", "ups": "3.36", "wpb": "117983", "bsz": "256", "num_updates": "163800", "lr": "8.44646e-05", "gnorm": "0.935", "loss_scale": "32", "train_wall": "59", "gb_free": "28.3", "wall": "49016"} +[2022-07-31 00:27:47,419][train_inner][INFO] - {"epoch": 4, "update": 3.186, "loss": "2.365", "ppl": "5.15", "wps": "393958", "ups": "3.34", "wpb": "118019", "bsz": "256", "num_updates": "164000", "lr": "8.44444e-05", "gnorm": "0.935", "loss_scale": "32", "train_wall": "60", "gb_free": "22.6", "wall": "49076"} +[2022-07-31 00:28:46,488][train_inner][INFO] - {"epoch": 4, "update": 3.19, "loss": "2.36", "ppl": "5.14", "wps": "402329", "ups": "3.39", "wpb": "118825", "bsz": "256", "num_updates": "164200", "lr": "8.44242e-05", "gnorm": "0.93", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "49135"} +[2022-07-31 00:29:46,113][train_inner][INFO] - {"epoch": 4, "update": 3.194, "loss": "2.359", "ppl": "5.13", "wps": "397177", "ups": "3.35", "wpb": "118408", "bsz": "256", "num_updates": "164400", "lr": "8.4404e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "24.5", "wall": "49195"} +[2022-07-31 00:30:47,193][train_inner][INFO] - {"epoch": 4, "update": 3.198, "loss": "2.361", "ppl": "5.14", "wps": "388040", "ups": "3.27", "wpb": "118506", "bsz": "256", "num_updates": "164600", "lr": "8.43838e-05", "gnorm": "0.931", "loss_scale": "64", "train_wall": "61", "gb_free": "21.4", "wall": "49256"} +[2022-07-31 00:31:46,277][train_inner][INFO] - {"epoch": 4, "update": 3.202, "loss": "2.355", "ppl": "5.12", "wps": "399925", "ups": "3.39", "wpb": "118144", "bsz": "256", "num_updates": "164800", "lr": "8.43636e-05", "gnorm": "0.934", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "49315"} +[2022-07-31 00:32:13,687][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 00:32:45,917][train_inner][INFO] - {"epoch": 4, "update": 3.206, "loss": "2.362", "ppl": "5.14", "wps": "395152", "ups": "3.35", "wpb": "117833", "bsz": "256", "num_updates": "165000", "lr": "8.43434e-05", "gnorm": "0.937", "loss_scale": "32", "train_wall": "59", "gb_free": "26.6", "wall": "49374"} +[2022-07-31 00:33:45,652][train_inner][INFO] - {"epoch": 4, "update": 3.21, "loss": "2.365", "ppl": "5.15", "wps": "396175", "ups": "3.35", "wpb": "118326", "bsz": "256", "num_updates": "165200", "lr": "8.43232e-05", "gnorm": "0.933", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "49434"} +[2022-07-31 00:34:45,290][train_inner][INFO] - {"epoch": 4, "update": 3.214, "loss": "2.353", "ppl": "5.11", "wps": "397006", "ups": "3.35", "wpb": "118384", "bsz": "256", "num_updates": "165400", "lr": "8.4303e-05", "gnorm": "0.935", "loss_scale": "32", "train_wall": "59", "gb_free": "26.7", "wall": "49494"} +[2022-07-31 00:35:44,701][train_inner][INFO] - {"epoch": 4, "update": 3.218, "loss": "2.364", "ppl": "5.15", "wps": "399135", "ups": "3.37", "wpb": "118563", "bsz": "256", "num_updates": "165600", "lr": "8.42828e-05", "gnorm": "0.933", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "49553"} +[2022-07-31 00:36:43,805][train_inner][INFO] - {"epoch": 4, "update": 3.221, "loss": "2.358", "ppl": "5.13", "wps": "400434", "ups": "3.38", "wpb": "118337", "bsz": "256", "num_updates": "165800", "lr": "8.42626e-05", "gnorm": "0.935", "loss_scale": "32", "train_wall": "59", "gb_free": "24.1", "wall": "49612"} +[2022-07-31 00:37:43,394][train_inner][INFO] - {"epoch": 4, "update": 3.225, "loss": "2.357", "ppl": "5.12", "wps": "398646", "ups": "3.36", "wpb": "118773", "bsz": "256", "num_updates": "166000", "lr": "8.42424e-05", "gnorm": "0.932", "loss_scale": "32", "train_wall": "59", "gb_free": "24.8", "wall": "49672"} +[2022-07-31 00:37:53,499][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 00:38:42,979][train_inner][INFO] - {"epoch": 4, "update": 3.229, "loss": "2.363", "ppl": "5.14", "wps": "394426", "ups": "3.36", "wpb": "117509", "bsz": "256", "num_updates": "166200", "lr": "8.42222e-05", "gnorm": "0.937", "loss_scale": "16", "train_wall": "59", "gb_free": "30.9", "wall": "49731"} +[2022-07-31 00:39:42,471][train_inner][INFO] - {"epoch": 4, "update": 3.233, "loss": "2.354", "ppl": "5.11", "wps": "399222", "ups": "3.36", "wpb": "118752", "bsz": "256", "num_updates": "166400", "lr": "8.4202e-05", "gnorm": "0.937", "loss_scale": "16", "train_wall": "59", "gb_free": "26.5", "wall": "49791"} +[2022-07-31 00:40:41,908][train_inner][INFO] - {"epoch": 4, "update": 3.237, "loss": "2.36", "ppl": "5.13", "wps": "397968", "ups": "3.36", "wpb": "118269", "bsz": "256", "num_updates": "166600", "lr": "8.41818e-05", "gnorm": "0.935", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "49850"} +[2022-07-31 00:41:41,355][train_inner][INFO] - {"epoch": 4, "update": 3.241, "loss": "2.361", "ppl": "5.14", "wps": "397589", "ups": "3.36", "wpb": "118176", "bsz": "256", "num_updates": "166800", "lr": "8.41616e-05", "gnorm": "0.939", "loss_scale": "16", "train_wall": "59", "gb_free": "25", "wall": "49910"} +[2022-07-31 00:42:41,926][train_inner][INFO] - {"epoch": 4, "update": 3.245, "loss": "2.356", "ppl": "5.12", "wps": "391378", "ups": "3.3", "wpb": "118531", "bsz": "255.9", "num_updates": "167000", "lr": "8.41414e-05", "gnorm": "0.938", "loss_scale": "16", "train_wall": "60", "gb_free": "32.3", "wall": "49970"} +[2022-07-31 00:43:41,299][train_inner][INFO] - {"epoch": 4, "update": 3.249, "loss": "2.355", "ppl": "5.12", "wps": "399683", "ups": "3.37", "wpb": "118650", "bsz": "256", "num_updates": "167200", "lr": "8.41212e-05", "gnorm": "0.932", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "50030"} +[2022-07-31 00:44:40,525][train_inner][INFO] - {"epoch": 4, "update": 3.253, "loss": "2.359", "ppl": "5.13", "wps": "399430", "ups": "3.38", "wpb": "118283", "bsz": "256", "num_updates": "167400", "lr": "8.4101e-05", "gnorm": "0.94", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "50089"} +[2022-07-31 00:45:40,117][train_inner][INFO] - {"epoch": 4, "update": 3.256, "loss": "2.355", "ppl": "5.12", "wps": "397082", "ups": "3.36", "wpb": "118314", "bsz": "256", "num_updates": "167600", "lr": "8.40808e-05", "gnorm": "0.935", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "50149"} +[2022-07-31 00:46:39,163][train_inner][INFO] - {"epoch": 4, "update": 3.26, "loss": "2.359", "ppl": "5.13", "wps": "400386", "ups": "3.39", "wpb": "118205", "bsz": "256", "num_updates": "167800", "lr": "8.40606e-05", "gnorm": "0.933", "loss_scale": "16", "train_wall": "59", "gb_free": "27.5", "wall": "50208"} +[2022-07-31 00:47:38,520][train_inner][INFO] - {"epoch": 4, "update": 3.264, "loss": "2.353", "ppl": "5.11", "wps": "398139", "ups": "3.37", "wpb": "118160", "bsz": "256", "num_updates": "168000", "lr": "8.40404e-05", "gnorm": "0.934", "loss_scale": "16", "train_wall": "59", "gb_free": "23.2", "wall": "50267"} +[2022-07-31 00:48:37,588][train_inner][INFO] - {"epoch": 4, "update": 3.268, "loss": "2.357", "ppl": "5.12", "wps": "400192", "ups": "3.39", "wpb": "118192", "bsz": "256", "num_updates": "168200", "lr": "8.40202e-05", "gnorm": "0.936", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "50326"} +[2022-07-31 00:49:37,119][train_inner][INFO] - {"epoch": 4, "update": 3.272, "loss": "2.361", "ppl": "5.14", "wps": "396146", "ups": "3.36", "wpb": "117915", "bsz": "256", "num_updates": "168400", "lr": "8.4e-05", "gnorm": "0.937", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "50386"} +[2022-07-31 00:50:36,558][train_inner][INFO] - {"epoch": 4, "update": 3.276, "loss": "2.36", "ppl": "5.13", "wps": "397776", "ups": "3.36", "wpb": "118216", "bsz": "256", "num_updates": "168600", "lr": "8.39798e-05", "gnorm": "0.939", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "50445"} +[2022-07-31 00:51:36,236][train_inner][INFO] - {"epoch": 4, "update": 3.28, "loss": "2.347", "ppl": "5.09", "wps": "396200", "ups": "3.35", "wpb": "118221", "bsz": "256", "num_updates": "168800", "lr": "8.39596e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "50505"} +[2022-07-31 00:52:35,661][train_inner][INFO] - {"epoch": 4, "update": 3.284, "loss": "2.345", "ppl": "5.08", "wps": "399353", "ups": "3.37", "wpb": "118656", "bsz": "256", "num_updates": "169000", "lr": "8.39394e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "50564"} +[2022-07-31 00:53:34,870][train_inner][INFO] - {"epoch": 4, "update": 3.287, "loss": "2.351", "ppl": "5.1", "wps": "399758", "ups": "3.38", "wpb": "118345", "bsz": "256", "num_updates": "169200", "lr": "8.39192e-05", "gnorm": "0.935", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "50623"} +[2022-07-31 00:54:34,000][train_inner][INFO] - {"epoch": 4, "update": 3.291, "loss": "2.356", "ppl": "5.12", "wps": "400407", "ups": "3.38", "wpb": "118379", "bsz": "256", "num_updates": "169400", "lr": "8.3899e-05", "gnorm": "0.936", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "50682"} +[2022-07-31 00:55:33,674][train_inner][INFO] - {"epoch": 4, "update": 3.295, "loss": "2.352", "ppl": "5.11", "wps": "395237", "ups": "3.35", "wpb": "117927", "bsz": "256", "num_updates": "169600", "lr": "8.38788e-05", "gnorm": "0.938", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "50742"} +[2022-07-31 00:56:33,238][train_inner][INFO] - {"epoch": 4, "update": 3.299, "loss": "2.35", "ppl": "5.1", "wps": "396769", "ups": "3.36", "wpb": "118164", "bsz": "256", "num_updates": "169800", "lr": "8.38586e-05", "gnorm": "0.935", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "50802"} +[2022-07-31 00:57:32,696][train_inner][INFO] - {"epoch": 4, "update": 3.303, "loss": "2.353", "ppl": "5.11", "wps": "399156", "ups": "3.36", "wpb": "118665", "bsz": "256", "num_updates": "170000", "lr": "8.38384e-05", "gnorm": "0.933", "loss_scale": "32", "train_wall": "59", "gb_free": "27.2", "wall": "50861"} +[2022-07-31 00:58:32,496][train_inner][INFO] - {"epoch": 4, "update": 3.307, "loss": "2.349", "ppl": "5.09", "wps": "396288", "ups": "3.34", "wpb": "118488", "bsz": "256", "num_updates": "170200", "lr": "8.38182e-05", "gnorm": "0.935", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "50921"} +[2022-07-31 00:59:32,196][train_inner][INFO] - {"epoch": 4, "update": 3.311, "loss": "2.35", "ppl": "5.1", "wps": "397852", "ups": "3.35", "wpb": "118758", "bsz": "256", "num_updates": "170400", "lr": "8.3798e-05", "gnorm": "0.932", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "50981"} +[2022-07-31 01:00:31,972][train_inner][INFO] - {"epoch": 4, "update": 3.315, "loss": "2.354", "ppl": "5.11", "wps": "395242", "ups": "3.35", "wpb": "118129", "bsz": "256", "num_updates": "170600", "lr": "8.37778e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "24.7", "wall": "51040"} +[2022-07-31 01:01:31,470][train_inner][INFO] - {"epoch": 4, "update": 3.319, "loss": "2.348", "ppl": "5.09", "wps": "394353", "ups": "3.36", "wpb": "117316", "bsz": "256", "num_updates": "170800", "lr": "8.37576e-05", "gnorm": "0.937", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "51100"} +[2022-07-31 01:02:30,962][train_inner][INFO] - {"epoch": 4, "update": 3.322, "loss": "2.354", "ppl": "5.11", "wps": "396607", "ups": "3.36", "wpb": "117974", "bsz": "256", "num_updates": "171000", "lr": "8.37374e-05", "gnorm": "0.936", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "51159"} +[2022-07-31 01:03:30,332][train_inner][INFO] - {"epoch": 4, "update": 3.326, "loss": "2.354", "ppl": "5.11", "wps": "397758", "ups": "3.37", "wpb": "118074", "bsz": "256", "num_updates": "171200", "lr": "8.37172e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "51219"} +[2022-07-31 01:04:29,715][train_inner][INFO] - {"epoch": 4, "update": 3.33, "loss": "2.348", "ppl": "5.09", "wps": "399749", "ups": "3.37", "wpb": "118690", "bsz": "256", "num_updates": "171400", "lr": "8.3697e-05", "gnorm": "0.935", "loss_scale": "64", "train_wall": "59", "gb_free": "26.7", "wall": "51278"} +[2022-07-31 01:05:29,372][train_inner][INFO] - {"epoch": 4, "update": 3.334, "loss": "2.356", "ppl": "5.12", "wps": "395077", "ups": "3.35", "wpb": "117846", "bsz": "256", "num_updates": "171600", "lr": "8.36768e-05", "gnorm": "0.938", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "51338"} +[2022-07-31 01:06:28,778][train_inner][INFO] - {"epoch": 4, "update": 3.338, "loss": "2.346", "ppl": "5.08", "wps": "397409", "ups": "3.37", "wpb": "118042", "bsz": "256", "num_updates": "171800", "lr": "8.36566e-05", "gnorm": "0.936", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "51397"} +[2022-07-31 01:07:29,507][train_inner][INFO] - {"epoch": 4, "update": 3.342, "loss": "2.351", "ppl": "5.1", "wps": "386174", "ups": "3.29", "wpb": "117258", "bsz": "256", "num_updates": "172000", "lr": "8.36364e-05", "gnorm": "0.942", "loss_scale": "64", "train_wall": "60", "gb_free": "22.6", "wall": "51458"} +[2022-07-31 01:08:30,231][train_inner][INFO] - {"epoch": 4, "update": 3.346, "loss": "2.35", "ppl": "5.1", "wps": "389664", "ups": "3.29", "wpb": "118310", "bsz": "256", "num_updates": "172200", "lr": "8.36162e-05", "gnorm": "0.94", "loss_scale": "128", "train_wall": "60", "gb_free": "21.4", "wall": "51519"} +[2022-07-31 01:09:29,895][train_inner][INFO] - {"epoch": 4, "update": 3.35, "loss": "2.349", "ppl": "5.1", "wps": "395935", "ups": "3.35", "wpb": "118114", "bsz": "256", "num_updates": "172400", "lr": "8.3596e-05", "gnorm": "0.935", "loss_scale": "128", "train_wall": "59", "gb_free": "22.6", "wall": "51578"} +[2022-07-31 01:09:39,594][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-31 01:10:29,591][train_inner][INFO] - {"epoch": 4, "update": 3.354, "loss": "2.349", "ppl": "5.1", "wps": "397364", "ups": "3.35", "wpb": "118604", "bsz": "256", "num_updates": "172600", "lr": "8.35758e-05", "gnorm": "0.934", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "51638"} +[2022-07-31 01:11:07,264][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 01:11:29,230][train_inner][INFO] - {"epoch": 4, "update": 3.357, "loss": "2.346", "ppl": "5.09", "wps": "397241", "ups": "3.35", "wpb": "118455", "bsz": "256", "num_updates": "172800", "lr": "8.35556e-05", "gnorm": "0.936", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "51698"} +[2022-07-31 01:12:28,808][train_inner][INFO] - {"epoch": 4, "update": 3.361, "loss": "2.356", "ppl": "5.12", "wps": "397652", "ups": "3.36", "wpb": "118454", "bsz": "256", "num_updates": "173000", "lr": "8.35354e-05", "gnorm": "0.937", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "51757"} +[2022-07-31 01:13:27,985][train_inner][INFO] - {"epoch": 4, "update": 3.365, "loss": "2.343", "ppl": "5.07", "wps": "400372", "ups": "3.38", "wpb": "118465", "bsz": "256", "num_updates": "173200", "lr": "8.35152e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "51816"} +[2022-07-31 01:14:27,342][train_inner][INFO] - {"epoch": 4, "update": 3.369, "loss": "2.345", "ppl": "5.08", "wps": "397942", "ups": "3.37", "wpb": "118103", "bsz": "256", "num_updates": "173400", "lr": "8.34949e-05", "gnorm": "0.938", "loss_scale": "32", "train_wall": "59", "gb_free": "26.4", "wall": "51876"} +[2022-07-31 01:15:26,745][train_inner][INFO] - {"epoch": 4, "update": 3.373, "loss": "2.348", "ppl": "5.09", "wps": "399022", "ups": "3.37", "wpb": "118515", "bsz": "256", "num_updates": "173600", "lr": "8.34747e-05", "gnorm": "0.937", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "51935"} +[2022-07-31 01:16:26,222][train_inner][INFO] - {"epoch": 4, "update": 3.377, "loss": "2.342", "ppl": "5.07", "wps": "399852", "ups": "3.36", "wpb": "118908", "bsz": "256", "num_updates": "173800", "lr": "8.34545e-05", "gnorm": "0.933", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "51995"} +[2022-07-31 01:17:25,748][train_inner][INFO] - {"epoch": 4, "update": 3.381, "loss": "2.343", "ppl": "5.07", "wps": "398573", "ups": "3.36", "wpb": "118627", "bsz": "256", "num_updates": "174000", "lr": "8.34343e-05", "gnorm": "0.934", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "52054"} +[2022-07-31 01:18:25,552][train_inner][INFO] - {"epoch": 4, "update": 3.385, "loss": "2.349", "ppl": "5.1", "wps": "395073", "ups": "3.34", "wpb": "118134", "bsz": "256", "num_updates": "174200", "lr": "8.34141e-05", "gnorm": "0.944", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "52114"} +[2022-07-31 01:19:24,697][train_inner][INFO] - {"epoch": 4, "update": 3.389, "loss": "2.342", "ppl": "5.07", "wps": "397779", "ups": "3.38", "wpb": "117633", "bsz": "256", "num_updates": "174400", "lr": "8.33939e-05", "gnorm": "0.94", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "52173"} +[2022-07-31 01:20:24,030][train_inner][INFO] - {"epoch": 4, "update": 3.392, "loss": "2.35", "ppl": "5.1", "wps": "396206", "ups": "3.37", "wpb": "117540", "bsz": "256", "num_updates": "174600", "lr": "8.33737e-05", "gnorm": "0.94", "loss_scale": "32", "train_wall": "59", "gb_free": "30.1", "wall": "52232"} +[2022-07-31 01:21:23,144][train_inner][INFO] - {"epoch": 4, "update": 3.396, "loss": "2.345", "ppl": "5.08", "wps": "400007", "ups": "3.38", "wpb": "118229", "bsz": "256", "num_updates": "174800", "lr": "8.33535e-05", "gnorm": "0.938", "loss_scale": "64", "train_wall": "59", "gb_free": "24.4", "wall": "52292"} +[2022-07-31 01:22:22,836][train_inner][INFO] - {"epoch": 4, "update": 3.4, "loss": "2.349", "ppl": "5.1", "wps": "397852", "ups": "3.35", "wpb": "118742", "bsz": "256", "num_updates": "175000", "lr": "8.33333e-05", "gnorm": "0.937", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "52351"} +[2022-07-31 01:23:22,445][train_inner][INFO] - {"epoch": 4, "update": 3.404, "loss": "2.345", "ppl": "5.08", "wps": "396192", "ups": "3.36", "wpb": "118082", "bsz": "256", "num_updates": "175200", "lr": "8.33131e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "52411"} +[2022-07-31 01:24:21,888][train_inner][INFO] - {"epoch": 4, "update": 3.408, "loss": "2.345", "ppl": "5.08", "wps": "397560", "ups": "3.36", "wpb": "118160", "bsz": "256", "num_updates": "175400", "lr": "8.32929e-05", "gnorm": "0.936", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "52470"} +[2022-07-31 01:25:21,608][train_inner][INFO] - {"epoch": 4, "update": 3.412, "loss": "2.346", "ppl": "5.09", "wps": "393973", "ups": "3.35", "wpb": "117640", "bsz": "256", "num_updates": "175600", "lr": "8.32727e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "25.9", "wall": "52530"} +[2022-07-31 01:26:21,198][train_inner][INFO] - {"epoch": 4, "update": 3.416, "loss": "2.342", "ppl": "5.07", "wps": "396996", "ups": "3.36", "wpb": "118283", "bsz": "256", "num_updates": "175800", "lr": "8.32525e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "52590"} +[2022-07-31 01:27:21,859][train_inner][INFO] - {"epoch": 4, "update": 3.42, "loss": "2.334", "ppl": "5.04", "wps": "390723", "ups": "3.3", "wpb": "118508", "bsz": "256", "num_updates": "176000", "lr": "8.32323e-05", "gnorm": "0.938", "loss_scale": "64", "train_wall": "60", "gb_free": "23.3", "wall": "52650"} +[2022-07-31 01:28:21,338][train_inner][INFO] - {"epoch": 4, "update": 3.423, "loss": "2.345", "ppl": "5.08", "wps": "398754", "ups": "3.36", "wpb": "118586", "bsz": "256", "num_updates": "176200", "lr": "8.32121e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "52710"} +[2022-07-31 01:29:20,840][train_inner][INFO] - {"epoch": 4, "update": 3.427, "loss": "2.346", "ppl": "5.08", "wps": "396169", "ups": "3.36", "wpb": "117863", "bsz": "256", "num_updates": "176400", "lr": "8.31919e-05", "gnorm": "0.94", "loss_scale": "64", "train_wall": "59", "gb_free": "22.8", "wall": "52769"} +[2022-07-31 01:30:20,197][train_inner][INFO] - {"epoch": 4, "update": 3.431, "loss": "2.342", "ppl": "5.07", "wps": "399551", "ups": "3.37", "wpb": "118580", "bsz": "256", "num_updates": "176600", "lr": "8.31717e-05", "gnorm": "0.936", "loss_scale": "64", "train_wall": "59", "gb_free": "27.7", "wall": "52829"} +[2022-07-31 01:31:19,786][train_inner][INFO] - {"epoch": 4, "update": 3.435, "loss": "2.339", "ppl": "5.06", "wps": "397975", "ups": "3.36", "wpb": "118575", "bsz": "256", "num_updates": "176800", "lr": "8.31515e-05", "gnorm": "0.937", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "52888"} +[2022-07-31 01:32:19,210][train_inner][INFO] - {"epoch": 4, "update": 3.439, "loss": "2.34", "ppl": "5.06", "wps": "399130", "ups": "3.37", "wpb": "118587", "bsz": "256", "num_updates": "177000", "lr": "8.31313e-05", "gnorm": "0.935", "loss_scale": "128", "train_wall": "59", "gb_free": "21.5", "wall": "52948"} +[2022-07-31 01:32:39,555][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-31 01:33:18,706][train_inner][INFO] - {"epoch": 4, "update": 3.443, "loss": "2.342", "ppl": "5.07", "wps": "396612", "ups": "3.36", "wpb": "117983", "bsz": "256", "num_updates": "177200", "lr": "8.31111e-05", "gnorm": "0.94", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "53007"} +[2022-07-31 01:34:18,528][train_inner][INFO] - {"epoch": 4, "update": 3.447, "loss": "2.345", "ppl": "5.08", "wps": "395004", "ups": "3.34", "wpb": "118150", "bsz": "256", "num_updates": "177400", "lr": "8.30909e-05", "gnorm": "0.94", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "53067"} +[2022-07-31 01:35:18,293][train_inner][INFO] - {"epoch": 4, "update": 3.451, "loss": "2.343", "ppl": "5.07", "wps": "397357", "ups": "3.35", "wpb": "118739", "bsz": "256", "num_updates": "177600", "lr": "8.30707e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "53127"} +[2022-07-31 01:36:17,892][train_inner][INFO] - {"epoch": 4, "update": 3.455, "loss": "2.336", "ppl": "5.05", "wps": "396673", "ups": "3.36", "wpb": "118206", "bsz": "256", "num_updates": "177800", "lr": "8.30505e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "24.6", "wall": "53186"} +[2022-07-31 01:37:17,286][train_inner][INFO] - {"epoch": 4, "update": 3.458, "loss": "2.341", "ppl": "5.07", "wps": "398071", "ups": "3.37", "wpb": "118215", "bsz": "256", "num_updates": "178000", "lr": "8.30303e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "53246"} +[2022-07-31 01:38:17,210][train_inner][INFO] - {"epoch": 4, "update": 3.462, "loss": "2.337", "ppl": "5.05", "wps": "393070", "ups": "3.34", "wpb": "117770", "bsz": "256", "num_updates": "178200", "lr": "8.30101e-05", "gnorm": "0.938", "loss_scale": "64", "train_wall": "60", "gb_free": "21.5", "wall": "53306"} +[2022-07-31 01:39:17,817][train_inner][INFO] - {"epoch": 4, "update": 3.466, "loss": "2.335", "ppl": "5.05", "wps": "391127", "ups": "3.3", "wpb": "118524", "bsz": "256", "num_updates": "178400", "lr": "8.29899e-05", "gnorm": "0.936", "loss_scale": "64", "train_wall": "60", "gb_free": "21.6", "wall": "53366"} +[2022-07-31 01:40:17,168][train_inner][INFO] - {"epoch": 4, "update": 3.47, "loss": "2.336", "ppl": "5.05", "wps": "399172", "ups": "3.37", "wpb": "118456", "bsz": "256", "num_updates": "178600", "lr": "8.29697e-05", "gnorm": "0.936", "loss_scale": "64", "train_wall": "59", "gb_free": "22.8", "wall": "53426"} +[2022-07-31 01:40:19,285][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 01:41:16,981][train_inner][INFO] - {"epoch": 4, "update": 3.474, "loss": "2.336", "ppl": "5.05", "wps": "397818", "ups": "3.34", "wpb": "118971", "bsz": "256", "num_updates": "178800", "lr": "8.29495e-05", "gnorm": "0.936", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "53485"} +[2022-07-31 01:42:16,470][train_inner][INFO] - {"epoch": 4, "update": 3.478, "loss": "2.333", "ppl": "5.04", "wps": "398115", "ups": "3.36", "wpb": "118417", "bsz": "256", "num_updates": "179000", "lr": "8.29293e-05", "gnorm": "0.939", "loss_scale": "32", "train_wall": "59", "gb_free": "25.1", "wall": "53545"} +[2022-07-31 01:43:16,123][train_inner][INFO] - {"epoch": 4, "update": 3.482, "loss": "2.34", "ppl": "5.06", "wps": "397990", "ups": "3.35", "wpb": "118706", "bsz": "256", "num_updates": "179200", "lr": "8.29091e-05", "gnorm": "0.942", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "53605"} +[2022-07-31 01:44:15,564][train_inner][INFO] - {"epoch": 4, "update": 3.486, "loss": "2.343", "ppl": "5.08", "wps": "397852", "ups": "3.36", "wpb": "118243", "bsz": "256", "num_updates": "179400", "lr": "8.28889e-05", "gnorm": "0.941", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "53664"} +[2022-07-31 01:45:15,014][train_inner][INFO] - {"epoch": 4, "update": 3.49, "loss": "2.331", "ppl": "5.03", "wps": "399254", "ups": "3.36", "wpb": "118679", "bsz": "256", "num_updates": "179600", "lr": "8.28687e-05", "gnorm": "0.936", "loss_scale": "32", "train_wall": "59", "gb_free": "25.6", "wall": "53723"} +[2022-07-31 01:46:15,785][train_inner][INFO] - {"epoch": 4, "update": 3.493, "loss": "2.331", "ppl": "5.03", "wps": "389714", "ups": "3.29", "wpb": "118416", "bsz": "256", "num_updates": "179800", "lr": "8.28485e-05", "gnorm": "0.94", "loss_scale": "32", "train_wall": "60", "gb_free": "23.4", "wall": "53784"} +[2022-07-31 01:47:15,142][train_inner][INFO] - {"epoch": 4, "update": 3.497, "loss": "2.337", "ppl": "5.05", "wps": "398477", "ups": "3.37", "wpb": "118261", "bsz": "256", "num_updates": "180000", "lr": "8.28283e-05", "gnorm": "0.941", "loss_scale": "32", "train_wall": "59", "gb_free": "27.2", "wall": "53844"} +[2022-07-31 01:48:14,431][train_inner][INFO] - {"epoch": 4, "update": 3.501, "loss": "2.341", "ppl": "5.07", "wps": "399208", "ups": "3.37", "wpb": "118342", "bsz": "256", "num_updates": "180200", "lr": "8.28081e-05", "gnorm": "0.942", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "53903"} +[2022-07-31 01:49:13,586][train_inner][INFO] - {"epoch": 4, "update": 3.505, "loss": "2.336", "ppl": "5.05", "wps": "401341", "ups": "3.38", "wpb": "118706", "bsz": "256", "num_updates": "180400", "lr": "8.27879e-05", "gnorm": "0.938", "loss_scale": "32", "train_wall": "59", "gb_free": "25", "wall": "53962"} +[2022-07-31 01:50:12,939][train_inner][INFO] - {"epoch": 4, "update": 3.509, "loss": "2.34", "ppl": "5.06", "wps": "398895", "ups": "3.37", "wpb": "118376", "bsz": "256", "num_updates": "180600", "lr": "8.27677e-05", "gnorm": "0.941", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "54021"} +[2022-07-31 01:51:12,549][train_inner][INFO] - {"epoch": 4, "update": 3.513, "loss": "2.339", "ppl": "5.06", "wps": "395935", "ups": "3.36", "wpb": "118008", "bsz": "256", "num_updates": "180800", "lr": "8.27475e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "59", "gb_free": "27.4", "wall": "54081"} +[2022-07-31 01:52:12,340][train_inner][INFO] - {"epoch": 4, "update": 3.517, "loss": "2.333", "ppl": "5.04", "wps": "397119", "ups": "3.34", "wpb": "118720", "bsz": "256", "num_updates": "181000", "lr": "8.27273e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "54141"} +[2022-07-31 01:53:11,768][train_inner][INFO] - {"epoch": 4, "update": 3.521, "loss": "2.333", "ppl": "5.04", "wps": "398684", "ups": "3.37", "wpb": "118464", "bsz": "256", "num_updates": "181200", "lr": "8.27071e-05", "gnorm": "0.94", "loss_scale": "64", "train_wall": "59", "gb_free": "28.9", "wall": "54200"} +[2022-07-31 01:54:11,477][train_inner][INFO] - {"epoch": 4, "update": 3.525, "loss": "2.335", "ppl": "5.05", "wps": "394975", "ups": "3.35", "wpb": "117917", "bsz": "256", "num_updates": "181400", "lr": "8.26869e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "59", "gb_free": "27.4", "wall": "54260"} +[2022-07-31 01:55:11,024][train_inner][INFO] - {"epoch": 4, "update": 3.528, "loss": "2.338", "ppl": "5.05", "wps": "398387", "ups": "3.36", "wpb": "118612", "bsz": "256", "num_updates": "181600", "lr": "8.26667e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "54319"} +[2022-07-31 01:56:10,795][train_inner][INFO] - {"epoch": 4, "update": 3.532, "loss": "2.337", "ppl": "5.05", "wps": "393182", "ups": "3.35", "wpb": "117504", "bsz": "256", "num_updates": "181800", "lr": "8.26465e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "54379"} +[2022-07-31 01:57:10,335][train_inner][INFO] - {"epoch": 4, "update": 3.536, "loss": "2.337", "ppl": "5.05", "wps": "398183", "ups": "3.36", "wpb": "118538", "bsz": "256", "num_updates": "182000", "lr": "8.26263e-05", "gnorm": "0.938", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "54439"} +[2022-07-31 01:58:11,170][train_inner][INFO] - {"epoch": 4, "update": 3.54, "loss": "2.333", "ppl": "5.04", "wps": "389028", "ups": "3.29", "wpb": "118331", "bsz": "256", "num_updates": "182200", "lr": "8.26061e-05", "gnorm": "0.94", "loss_scale": "64", "train_wall": "61", "gb_free": "23.6", "wall": "54500"} +[2022-07-31 01:59:10,483][train_inner][INFO] - {"epoch": 4, "update": 3.544, "loss": "2.329", "ppl": "5.02", "wps": "399774", "ups": "3.37", "wpb": "118558", "bsz": "256", "num_updates": "182400", "lr": "8.25859e-05", "gnorm": "0.939", "loss_scale": "64", "train_wall": "59", "gb_free": "26.3", "wall": "54559"} +[2022-07-31 02:00:10,068][train_inner][INFO] - {"epoch": 4, "update": 3.548, "loss": "2.337", "ppl": "5.05", "wps": "396566", "ups": "3.36", "wpb": "118146", "bsz": "256", "num_updates": "182600", "lr": "8.25657e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "54618"} +[2022-07-31 02:00:47,269][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-31 02:01:10,281][train_inner][INFO] - {"epoch": 4, "update": 3.552, "loss": "2.331", "ppl": "5.03", "wps": "393106", "ups": "3.32", "wpb": "118350", "bsz": "256", "num_updates": "182800", "lr": "8.25455e-05", "gnorm": "0.938", "loss_scale": "64", "train_wall": "60", "gb_free": "22.1", "wall": "54679"} +[2022-07-31 02:02:08,245][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 02:02:10,548][train_inner][INFO] - {"epoch": 4, "update": 3.556, "loss": "2.33", "ppl": "5.03", "wps": "394460", "ups": "3.32", "wpb": "118864", "bsz": "256", "num_updates": "183000", "lr": "8.25253e-05", "gnorm": "0.94", "loss_scale": "32", "train_wall": "60", "gb_free": "21.5", "wall": "54739"} +[2022-07-31 02:03:10,213][train_inner][INFO] - {"epoch": 4, "update": 3.56, "loss": "2.335", "ppl": "5.04", "wps": "397890", "ups": "3.35", "wpb": "118701", "bsz": "256", "num_updates": "183200", "lr": "8.25051e-05", "gnorm": "0.939", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "54799"} +[2022-07-31 02:04:09,681][train_inner][INFO] - {"epoch": 4, "update": 3.563, "loss": "2.343", "ppl": "5.07", "wps": "397796", "ups": "3.36", "wpb": "118279", "bsz": "256", "num_updates": "183400", "lr": "8.24848e-05", "gnorm": "0.943", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "54858"} +[2022-07-31 02:05:09,753][train_inner][INFO] - {"epoch": 4, "update": 3.567, "loss": "2.328", "ppl": "5.02", "wps": "396151", "ups": "3.33", "wpb": "118988", "bsz": "256", "num_updates": "183600", "lr": "8.24646e-05", "gnorm": "0.938", "loss_scale": "32", "train_wall": "60", "gb_free": "25.6", "wall": "54918"} +[2022-07-31 02:06:09,337][train_inner][INFO] - {"epoch": 4, "update": 3.571, "loss": "2.336", "ppl": "5.05", "wps": "398412", "ups": "3.36", "wpb": "118693", "bsz": "256", "num_updates": "183800", "lr": "8.24444e-05", "gnorm": "0.94", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "54978"} +[2022-07-31 02:07:08,742][train_inner][INFO] - {"epoch": 4, "update": 3.575, "loss": "2.329", "ppl": "5.02", "wps": "399715", "ups": "3.37", "wpb": "118724", "bsz": "256", "num_updates": "184000", "lr": "8.24242e-05", "gnorm": "0.936", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "55037"} +[2022-07-31 02:08:07,975][train_inner][INFO] - {"epoch": 4, "update": 3.579, "loss": "2.334", "ppl": "5.04", "wps": "398226", "ups": "3.38", "wpb": "117940", "bsz": "256", "num_updates": "184200", "lr": "8.2404e-05", "gnorm": "0.943", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "55096"} +[2022-07-31 02:09:08,571][train_inner][INFO] - {"epoch": 4, "update": 3.583, "loss": "2.336", "ppl": "5.05", "wps": "389909", "ups": "3.3", "wpb": "118133", "bsz": "256", "num_updates": "184400", "lr": "8.23838e-05", "gnorm": "0.942", "loss_scale": "32", "train_wall": "60", "gb_free": "21.3", "wall": "55157"} +[2022-07-31 02:10:08,522][train_inner][INFO] - {"epoch": 4, "update": 3.587, "loss": "2.335", "ppl": "5.04", "wps": "394811", "ups": "3.34", "wpb": "118346", "bsz": "256", "num_updates": "184600", "lr": "8.23636e-05", "gnorm": "0.942", "loss_scale": "32", "train_wall": "60", "gb_free": "23.7", "wall": "55217"} +[2022-07-31 02:11:08,210][train_inner][INFO] - {"epoch": 4, "update": 3.591, "loss": "2.331", "ppl": "5.03", "wps": "397389", "ups": "3.35", "wpb": "118597", "bsz": "256", "num_updates": "184800", "lr": "8.23434e-05", "gnorm": "0.939", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "55277"} +[2022-07-31 02:12:07,587][train_inner][INFO] - {"epoch": 4, "update": 3.594, "loss": "2.334", "ppl": "5.04", "wps": "397666", "ups": "3.37", "wpb": "118061", "bsz": "256", "num_updates": "185000", "lr": "8.23232e-05", "gnorm": "0.941", "loss_scale": "32", "train_wall": "59", "gb_free": "26.2", "wall": "55336"} +[2022-07-31 02:13:07,076][train_inner][INFO] - {"epoch": 4, "update": 3.598, "loss": "2.325", "ppl": "5.01", "wps": "397817", "ups": "3.36", "wpb": "118327", "bsz": "256", "num_updates": "185200", "lr": "8.2303e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "55396"} +[2022-07-31 02:14:06,421][train_inner][INFO] - {"epoch": 4, "update": 3.602, "loss": "2.329", "ppl": "5.03", "wps": "398446", "ups": "3.37", "wpb": "118229", "bsz": "256", "num_updates": "185400", "lr": "8.22828e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "59", "gb_free": "22.5", "wall": "55455"} +[2022-07-31 02:15:05,728][train_inner][INFO] - {"epoch": 4, "update": 3.606, "loss": "2.336", "ppl": "5.05", "wps": "397214", "ups": "3.37", "wpb": "117786", "bsz": "256", "num_updates": "185600", "lr": "8.22626e-05", "gnorm": "0.945", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "55514"} +[2022-07-31 02:15:08,086][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 02:16:05,261][train_inner][INFO] - {"epoch": 4, "update": 3.61, "loss": "2.336", "ppl": "5.05", "wps": "396500", "ups": "3.36", "wpb": "118024", "bsz": "256", "num_updates": "185800", "lr": "8.22424e-05", "gnorm": "0.945", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "55574"} +[2022-07-31 02:17:04,863][train_inner][INFO] - {"epoch": 4, "update": 3.614, "loss": "2.329", "ppl": "5.03", "wps": "395961", "ups": "3.36", "wpb": "118000", "bsz": "256", "num_updates": "186000", "lr": "8.22222e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "55633"} +[2022-07-31 02:18:04,443][train_inner][INFO] - {"epoch": 4, "update": 3.618, "loss": "2.325", "ppl": "5.01", "wps": "398518", "ups": "3.36", "wpb": "118716", "bsz": "256", "num_updates": "186200", "lr": "8.2202e-05", "gnorm": "0.939", "loss_scale": "32", "train_wall": "59", "gb_free": "25.3", "wall": "55693"} +[2022-07-31 02:19:03,531][train_inner][INFO] - {"epoch": 4, "update": 3.622, "loss": "2.327", "ppl": "5.02", "wps": "397424", "ups": "3.38", "wpb": "117414", "bsz": "256", "num_updates": "186400", "lr": "8.21818e-05", "gnorm": "0.944", "loss_scale": "32", "train_wall": "59", "gb_free": "26.3", "wall": "55752"} +[2022-07-31 02:20:02,879][train_inner][INFO] - {"epoch": 4, "update": 3.626, "loss": "2.326", "ppl": "5.02", "wps": "398954", "ups": "3.37", "wpb": "118386", "bsz": "256", "num_updates": "186600", "lr": "8.21616e-05", "gnorm": "0.94", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "55811"} +[2022-07-31 02:21:02,400][train_inner][INFO] - {"epoch": 4, "update": 3.629, "loss": "2.324", "ppl": "5.01", "wps": "399638", "ups": "3.36", "wpb": "118934", "bsz": "256", "num_updates": "186800", "lr": "8.21414e-05", "gnorm": "0.938", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "55871"} +[2022-07-31 02:22:02,314][train_inner][INFO] - {"epoch": 4, "update": 3.633, "loss": "2.323", "ppl": "5", "wps": "395923", "ups": "3.34", "wpb": "118605", "bsz": "256", "num_updates": "187000", "lr": "8.21212e-05", "gnorm": "0.941", "loss_scale": "32", "train_wall": "60", "gb_free": "22.5", "wall": "55931"} +[2022-07-31 02:23:01,969][train_inner][INFO] - {"epoch": 4, "update": 3.637, "loss": "2.327", "ppl": "5.02", "wps": "397887", "ups": "3.35", "wpb": "118678", "bsz": "256", "num_updates": "187200", "lr": "8.2101e-05", "gnorm": "0.938", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "55990"} +[2022-07-31 02:24:01,853][train_inner][INFO] - {"epoch": 4, "update": 3.641, "loss": "2.329", "ppl": "5.03", "wps": "396098", "ups": "3.34", "wpb": "118599", "bsz": "256", "num_updates": "187400", "lr": "8.20808e-05", "gnorm": "0.945", "loss_scale": "32", "train_wall": "60", "gb_free": "22.2", "wall": "56050"} +[2022-07-31 02:25:01,487][train_inner][INFO] - {"epoch": 4, "update": 3.645, "loss": "2.329", "ppl": "5.02", "wps": "395611", "ups": "3.35", "wpb": "117958", "bsz": "256", "num_updates": "187600", "lr": "8.20606e-05", "gnorm": "0.944", "loss_scale": "32", "train_wall": "59", "gb_free": "28", "wall": "56110"} +[2022-07-31 02:26:01,195][train_inner][INFO] - {"epoch": 4, "update": 3.649, "loss": "2.327", "ppl": "5.02", "wps": "397396", "ups": "3.35", "wpb": "118638", "bsz": "256", "num_updates": "187800", "lr": "8.20404e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "56170"} +[2022-07-31 02:27:01,013][train_inner][INFO] - {"epoch": 4, "update": 3.653, "loss": "2.323", "ppl": "5", "wps": "396520", "ups": "3.34", "wpb": "118594", "bsz": "256", "num_updates": "188000", "lr": "8.20202e-05", "gnorm": "0.942", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "56229"} +[2022-07-31 02:28:00,498][train_inner][INFO] - {"epoch": 4, "update": 3.657, "loss": "2.326", "ppl": "5.02", "wps": "398860", "ups": "3.36", "wpb": "118630", "bsz": "256", "num_updates": "188200", "lr": "8.2e-05", "gnorm": "0.942", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "56289"} +[2022-07-31 02:29:00,008][train_inner][INFO] - {"epoch": 4, "update": 3.661, "loss": "2.329", "ppl": "5.02", "wps": "398428", "ups": "3.36", "wpb": "118553", "bsz": "256", "num_updates": "188400", "lr": "8.19798e-05", "gnorm": "0.944", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "56348"} +[2022-07-31 02:29:59,559][train_inner][INFO] - {"epoch": 4, "update": 3.664, "loss": "2.322", "ppl": "5", "wps": "397148", "ups": "3.36", "wpb": "118251", "bsz": "256", "num_updates": "188600", "lr": "8.19596e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "24.4", "wall": "56408"} +[2022-07-31 02:30:58,913][train_inner][INFO] - {"epoch": 4, "update": 3.668, "loss": "2.328", "ppl": "5.02", "wps": "398543", "ups": "3.37", "wpb": "118276", "bsz": "256", "num_updates": "188800", "lr": "8.19394e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "56467"} +[2022-07-31 02:31:58,776][train_inner][INFO] - {"epoch": 4, "update": 3.672, "loss": "2.325", "ppl": "5.01", "wps": "396437", "ups": "3.34", "wpb": "118659", "bsz": "256", "num_updates": "189000", "lr": "8.19192e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "60", "gb_free": "25.9", "wall": "56527"} +[2022-07-31 02:32:58,241][train_inner][INFO] - {"epoch": 4, "update": 3.676, "loss": "2.325", "ppl": "5.01", "wps": "396906", "ups": "3.36", "wpb": "118009", "bsz": "256", "num_updates": "189200", "lr": "8.1899e-05", "gnorm": "0.942", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "56587"} +[2022-07-31 02:33:57,845][train_inner][INFO] - {"epoch": 4, "update": 3.68, "loss": "2.326", "ppl": "5.02", "wps": "398866", "ups": "3.36", "wpb": "118868", "bsz": "256", "num_updates": "189400", "lr": "8.18788e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "59", "gb_free": "23.9", "wall": "56646"} +[2022-07-31 02:34:57,272][train_inner][INFO] - {"epoch": 4, "update": 3.684, "loss": "2.315", "ppl": "4.98", "wps": "398733", "ups": "3.37", "wpb": "118477", "bsz": "256", "num_updates": "189600", "lr": "8.18586e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "56706"} +[2022-07-31 02:35:56,549][train_inner][INFO] - {"epoch": 4, "update": 3.688, "loss": "2.317", "ppl": "4.98", "wps": "400927", "ups": "3.37", "wpb": "118829", "bsz": "256", "num_updates": "189800", "lr": "8.18384e-05", "gnorm": "0.941", "loss_scale": "128", "train_wall": "59", "gb_free": "24.5", "wall": "56765"} +[2022-07-31 02:36:56,023][train_inner][INFO] - {"epoch": 4, "update": 3.692, "loss": "2.32", "ppl": "4.99", "wps": "398482", "ups": "3.36", "wpb": "118494", "bsz": "256", "num_updates": "190000", "lr": "8.18182e-05", "gnorm": "0.94", "loss_scale": "128", "train_wall": "59", "gb_free": "21.4", "wall": "56824"} +[2022-07-31 02:37:55,878][train_inner][INFO] - {"epoch": 4, "update": 3.695, "loss": "2.32", "ppl": "4.99", "wps": "395400", "ups": "3.34", "wpb": "118334", "bsz": "256", "num_updates": "190200", "lr": "8.1798e-05", "gnorm": "0.942", "loss_scale": "128", "train_wall": "60", "gb_free": "22.7", "wall": "56884"} +[2022-07-31 02:38:08,574][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-31 02:38:55,438][train_inner][INFO] - {"epoch": 4, "update": 3.699, "loss": "2.327", "ppl": "5.02", "wps": "397420", "ups": "3.36", "wpb": "118350", "bsz": "256", "num_updates": "190400", "lr": "8.17778e-05", "gnorm": "0.942", "loss_scale": "64", "train_wall": "59", "gb_free": "24.8", "wall": "56944"} +[2022-07-31 02:39:54,505][train_inner][INFO] - {"epoch": 4, "update": 3.703, "loss": "2.322", "ppl": "5", "wps": "400284", "ups": "3.39", "wpb": "118217", "bsz": "256", "num_updates": "190600", "lr": "8.17576e-05", "gnorm": "0.944", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "57003"} +[2022-07-31 02:40:54,238][train_inner][INFO] - {"epoch": 4, "update": 3.707, "loss": "2.322", "ppl": "5", "wps": "395755", "ups": "3.35", "wpb": "118198", "bsz": "256", "num_updates": "190800", "lr": "8.17374e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "25.8", "wall": "57063"} +[2022-07-31 02:41:54,055][train_inner][INFO] - {"epoch": 4, "update": 3.711, "loss": "2.321", "ppl": "5", "wps": "396870", "ups": "3.34", "wpb": "118697", "bsz": "256", "num_updates": "191000", "lr": "8.17172e-05", "gnorm": "0.94", "loss_scale": "64", "train_wall": "59", "gb_free": "23.9", "wall": "57122"} +[2022-07-31 02:42:53,580][train_inner][INFO] - {"epoch": 4, "update": 3.715, "loss": "2.325", "ppl": "5.01", "wps": "397132", "ups": "3.36", "wpb": "118195", "bsz": "256", "num_updates": "191200", "lr": "8.1697e-05", "gnorm": "0.942", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "57182"} +[2022-07-31 02:43:52,881][train_inner][INFO] - {"epoch": 4, "update": 3.719, "loss": "2.323", "ppl": "5", "wps": "399850", "ups": "3.37", "wpb": "118558", "bsz": "256", "num_updates": "191400", "lr": "8.16768e-05", "gnorm": "0.94", "loss_scale": "64", "train_wall": "59", "gb_free": "25.2", "wall": "57241"} +[2022-07-31 02:44:53,197][train_inner][INFO] - {"epoch": 4, "update": 3.723, "loss": "2.319", "ppl": "4.99", "wps": "393197", "ups": "3.32", "wpb": "118580", "bsz": "256", "num_updates": "191600", "lr": "8.16566e-05", "gnorm": "0.941", "loss_scale": "64", "train_wall": "60", "gb_free": "26.9", "wall": "57302"} +[2022-07-31 02:45:32,488][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 02:45:53,387][train_inner][INFO] - {"epoch": 4, "update": 3.727, "loss": "2.326", "ppl": "5.01", "wps": "392539", "ups": "3.32", "wpb": "118133", "bsz": "256", "num_updates": "191800", "lr": "8.16364e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "60", "gb_free": "21.3", "wall": "57362"} +[2022-07-31 02:46:53,718][train_inner][INFO] - {"epoch": 4, "update": 3.73, "loss": "2.327", "ppl": "5.02", "wps": "391901", "ups": "3.32", "wpb": "118218", "bsz": "256", "num_updates": "192000", "lr": "8.16162e-05", "gnorm": "0.941", "loss_scale": "32", "train_wall": "60", "gb_free": "23.8", "wall": "57422"} +[2022-07-31 02:47:53,304][train_inner][INFO] - {"epoch": 4, "update": 3.734, "loss": "2.33", "ppl": "5.03", "wps": "394116", "ups": "3.36", "wpb": "117419", "bsz": "256", "num_updates": "192200", "lr": "8.1596e-05", "gnorm": "0.95", "loss_scale": "32", "train_wall": "59", "gb_free": "24.2", "wall": "57482"} +[2022-07-31 02:48:53,051][train_inner][INFO] - {"epoch": 4, "update": 3.738, "loss": "2.32", "ppl": "4.99", "wps": "396046", "ups": "3.35", "wpb": "118311", "bsz": "256", "num_updates": "192400", "lr": "8.15758e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "57541"} +[2022-07-31 02:49:52,695][train_inner][INFO] - {"epoch": 4, "update": 3.742, "loss": "2.319", "ppl": "4.99", "wps": "397184", "ups": "3.35", "wpb": "118447", "bsz": "256", "num_updates": "192600", "lr": "8.15556e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "57601"} +[2022-07-31 02:50:52,003][train_inner][INFO] - {"epoch": 4, "update": 3.746, "loss": "2.316", "ppl": "4.98", "wps": "399548", "ups": "3.37", "wpb": "118482", "bsz": "256", "num_updates": "192800", "lr": "8.15354e-05", "gnorm": "0.942", "loss_scale": "32", "train_wall": "59", "gb_free": "25.6", "wall": "57660"} +[2022-07-31 02:51:51,350][train_inner][INFO] - {"epoch": 4, "update": 3.75, "loss": "2.32", "ppl": "4.99", "wps": "397168", "ups": "3.37", "wpb": "117853", "bsz": "256", "num_updates": "193000", "lr": "8.15152e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "57720"} +[2022-07-31 02:52:50,740][train_inner][INFO] - {"epoch": 4, "update": 3.754, "loss": "2.314", "ppl": "4.97", "wps": "398771", "ups": "3.37", "wpb": "118413", "bsz": "256", "num_updates": "193200", "lr": "8.14949e-05", "gnorm": "0.942", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "57779"} +[2022-07-31 02:53:50,040][train_inner][INFO] - {"epoch": 4, "update": 3.758, "loss": "2.324", "ppl": "5.01", "wps": "398351", "ups": "3.37", "wpb": "118111", "bsz": "256", "num_updates": "193400", "lr": "8.14747e-05", "gnorm": "0.945", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "57838"} +[2022-07-31 02:54:50,419][train_inner][INFO] - {"epoch": 4, "update": 3.762, "loss": "2.321", "ppl": "5", "wps": "393502", "ups": "3.31", "wpb": "118794", "bsz": "256", "num_updates": "193600", "lr": "8.14545e-05", "gnorm": "0.944", "loss_scale": "32", "train_wall": "60", "gb_free": "22.6", "wall": "57899"} +[2022-07-31 02:55:49,815][train_inner][INFO] - {"epoch": 4, "update": 3.765, "loss": "2.317", "ppl": "4.98", "wps": "397249", "ups": "3.37", "wpb": "117974", "bsz": "256", "num_updates": "193800", "lr": "8.14343e-05", "gnorm": "0.946", "loss_scale": "64", "train_wall": "59", "gb_free": "24.6", "wall": "57958"} +[2022-07-31 02:56:48,967][train_inner][INFO] - {"epoch": 4, "update": 3.769, "loss": "2.326", "ppl": "5.01", "wps": "398641", "ups": "3.38", "wpb": "117902", "bsz": "256", "num_updates": "194000", "lr": "8.14141e-05", "gnorm": "0.945", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "58017"} +[2022-07-31 02:57:48,642][train_inner][INFO] - {"epoch": 4, "update": 3.773, "loss": "2.318", "ppl": "4.99", "wps": "395552", "ups": "3.35", "wpb": "118023", "bsz": "256", "num_updates": "194200", "lr": "8.13939e-05", "gnorm": "0.944", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "58077"} +[2022-07-31 02:58:48,418][train_inner][INFO] - {"epoch": 4, "update": 3.777, "loss": "2.316", "ppl": "4.98", "wps": "395154", "ups": "3.35", "wpb": "118102", "bsz": "256", "num_updates": "194400", "lr": "8.13737e-05", "gnorm": "0.944", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "58137"} +[2022-07-31 02:59:47,748][train_inner][INFO] - {"epoch": 4, "update": 3.781, "loss": "2.316", "ppl": "4.98", "wps": "397645", "ups": "3.37", "wpb": "117961", "bsz": "256", "num_updates": "194600", "lr": "8.13535e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "58196"} +[2022-07-31 03:00:46,822][train_inner][INFO] - {"epoch": 4, "update": 3.785, "loss": "2.321", "ppl": "5", "wps": "401180", "ups": "3.39", "wpb": "118496", "bsz": "256", "num_updates": "194800", "lr": "8.13333e-05", "gnorm": "0.945", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "58255"} +[2022-07-31 03:01:46,044][train_inner][INFO] - {"epoch": 4, "update": 3.789, "loss": "2.314", "ppl": "4.97", "wps": "399572", "ups": "3.38", "wpb": "118317", "bsz": "256", "num_updates": "195000", "lr": "8.13131e-05", "gnorm": "0.944", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "58314"} +[2022-07-31 03:01:55,565][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 03:02:46,087][train_inner][INFO] - {"epoch": 4, "update": 3.793, "loss": "2.323", "ppl": "5", "wps": "393316", "ups": "3.33", "wpb": "118078", "bsz": "256", "num_updates": "195200", "lr": "8.12929e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "60", "gb_free": "22.9", "wall": "58375"} +[2022-07-31 03:03:45,335][train_inner][INFO] - {"epoch": 4, "update": 3.797, "loss": "2.314", "ppl": "4.97", "wps": "400303", "ups": "3.38", "wpb": "118586", "bsz": "256", "num_updates": "195400", "lr": "8.12727e-05", "gnorm": "0.943", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "58434"} +[2022-07-31 03:04:45,016][train_inner][INFO] - {"epoch": 4, "update": 3.8, "loss": "2.316", "ppl": "4.98", "wps": "396898", "ups": "3.35", "wpb": "118436", "bsz": "256", "num_updates": "195600", "lr": "8.12525e-05", "gnorm": "0.942", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "58493"} +[2022-07-31 03:05:44,422][train_inner][INFO] - {"epoch": 4, "update": 3.804, "loss": "2.313", "ppl": "4.97", "wps": "396958", "ups": "3.37", "wpb": "117907", "bsz": "256", "num_updates": "195800", "lr": "8.12323e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "58553"} +[2022-07-31 03:06:43,662][train_inner][INFO] - {"epoch": 4, "update": 3.808, "loss": "2.314", "ppl": "4.97", "wps": "401461", "ups": "3.38", "wpb": "118911", "bsz": "256", "num_updates": "196000", "lr": "8.12121e-05", "gnorm": "0.941", "loss_scale": "32", "train_wall": "59", "gb_free": "25.4", "wall": "58612"} +[2022-07-31 03:07:43,518][train_inner][INFO] - {"epoch": 4, "update": 3.812, "loss": "2.319", "ppl": "4.99", "wps": "394488", "ups": "3.34", "wpb": "118063", "bsz": "256", "num_updates": "196200", "lr": "8.11919e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "60", "gb_free": "21.3", "wall": "58672"} +[2022-07-31 03:08:42,890][train_inner][INFO] - {"epoch": 4, "update": 3.816, "loss": "2.317", "ppl": "4.98", "wps": "399081", "ups": "3.37", "wpb": "118471", "bsz": "256", "num_updates": "196400", "lr": "8.11717e-05", "gnorm": "0.945", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "58731"} +[2022-07-31 03:09:42,132][train_inner][INFO] - {"epoch": 4, "update": 3.82, "loss": "2.318", "ppl": "4.99", "wps": "398485", "ups": "3.38", "wpb": "118034", "bsz": "256", "num_updates": "196600", "lr": "8.11515e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "58791"} +[2022-07-31 03:10:41,635][train_inner][INFO] - {"epoch": 4, "update": 3.824, "loss": "2.314", "ppl": "4.97", "wps": "396161", "ups": "3.36", "wpb": "117862", "bsz": "256", "num_updates": "196800", "lr": "8.11313e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "58850"} +[2022-07-31 03:11:40,773][train_inner][INFO] - {"epoch": 4, "update": 3.828, "loss": "2.313", "ppl": "4.97", "wps": "399572", "ups": "3.38", "wpb": "118150", "bsz": "256", "num_updates": "197000", "lr": "8.11111e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "58909"} +[2022-07-31 03:12:40,318][train_inner][INFO] - {"epoch": 4, "update": 3.831, "loss": "2.31", "ppl": "4.96", "wps": "397976", "ups": "3.36", "wpb": "118485", "bsz": "256", "num_updates": "197200", "lr": "8.10909e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "58969"} +[2022-07-31 03:13:39,844][train_inner][INFO] - {"epoch": 4, "update": 3.835, "loss": "2.318", "ppl": "4.99", "wps": "398209", "ups": "3.36", "wpb": "118520", "bsz": "256", "num_updates": "197400", "lr": "8.10707e-05", "gnorm": "0.946", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "59028"} +[2022-07-31 03:14:39,625][train_inner][INFO] - {"epoch": 4, "update": 3.839, "loss": "2.318", "ppl": "4.99", "wps": "396531", "ups": "3.35", "wpb": "118523", "bsz": "256", "num_updates": "197600", "lr": "8.10505e-05", "gnorm": "0.942", "loss_scale": "64", "train_wall": "59", "gb_free": "23.8", "wall": "59088"} +[2022-07-31 03:15:38,993][train_inner][INFO] - {"epoch": 4, "update": 3.843, "loss": "2.317", "ppl": "4.98", "wps": "398123", "ups": "3.37", "wpb": "118178", "bsz": "256", "num_updates": "197800", "lr": "8.10303e-05", "gnorm": "0.946", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "59147"} +[2022-07-31 03:16:38,339][train_inner][INFO] - {"epoch": 4, "update": 3.847, "loss": "2.314", "ppl": "4.97", "wps": "399661", "ups": "3.37", "wpb": "118592", "bsz": "256", "num_updates": "198000", "lr": "8.10101e-05", "gnorm": "0.946", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "59207"} +[2022-07-31 03:17:38,039][train_inner][INFO] - {"epoch": 4, "update": 3.851, "loss": "2.305", "ppl": "4.94", "wps": "397167", "ups": "3.35", "wpb": "118552", "bsz": "256", "num_updates": "198200", "lr": "8.09899e-05", "gnorm": "0.944", "loss_scale": "64", "train_wall": "59", "gb_free": "25.4", "wall": "59266"} +[2022-07-31 03:18:37,547][train_inner][INFO] - {"epoch": 4, "update": 3.855, "loss": "2.319", "ppl": "4.99", "wps": "396505", "ups": "3.36", "wpb": "117975", "bsz": "256", "num_updates": "198400", "lr": "8.09697e-05", "gnorm": "0.947", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "59326"} +[2022-07-31 03:19:37,157][train_inner][INFO] - {"epoch": 4, "update": 3.859, "loss": "2.314", "ppl": "4.97", "wps": "395902", "ups": "3.36", "wpb": "117998", "bsz": "256", "num_updates": "198600", "lr": "8.09495e-05", "gnorm": "0.947", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "59386"} +[2022-07-31 03:20:36,967][train_inner][INFO] - {"epoch": 4, "update": 3.863, "loss": "2.309", "ppl": "4.95", "wps": "395839", "ups": "3.34", "wpb": "118375", "bsz": "256", "num_updates": "198800", "lr": "8.09293e-05", "gnorm": "0.943", "loss_scale": "64", "train_wall": "60", "gb_free": "22.4", "wall": "59445"} +[2022-07-31 03:21:36,037][train_inner][INFO] - {"epoch": 4, "update": 3.866, "loss": "2.308", "ppl": "4.95", "wps": "400055", "ups": "3.39", "wpb": "118155", "bsz": "256", "num_updates": "199000", "lr": "8.09091e-05", "gnorm": "0.946", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "59504"} +[2022-07-31 03:22:17,220][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-31 03:22:35,478][train_inner][INFO] - {"epoch": 4, "update": 3.87, "loss": "2.309", "ppl": "4.96", "wps": "396934", "ups": "3.36", "wpb": "117970", "bsz": "256", "num_updates": "199200", "lr": "8.08889e-05", "gnorm": "0.949", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "59564"} +[2022-07-31 03:23:34,836][train_inner][INFO] - {"epoch": 4, "update": 3.874, "loss": "2.317", "ppl": "4.98", "wps": "396137", "ups": "3.37", "wpb": "117568", "bsz": "256", "num_updates": "199400", "lr": "8.08687e-05", "gnorm": "0.948", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "59623"} +[2022-07-31 03:24:34,102][train_inner][INFO] - {"epoch": 4, "update": 3.878, "loss": "2.314", "ppl": "4.97", "wps": "397399", "ups": "3.37", "wpb": "117761", "bsz": "256", "num_updates": "199600", "lr": "8.08485e-05", "gnorm": "0.947", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "59683"} +[2022-07-31 03:25:15,952][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 03:25:33,747][train_inner][INFO] - {"epoch": 4, "update": 3.882, "loss": "2.308", "ppl": "4.95", "wps": "395664", "ups": "3.35", "wpb": "117996", "bsz": "256", "num_updates": "199800", "lr": "8.08283e-05", "gnorm": "0.949", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "59742"} +[2022-07-31 03:26:33,264][train_inner][INFO] - {"epoch": 4, "update": 3.886, "loss": "2.314", "ppl": "4.97", "wps": "398370", "ups": "3.36", "wpb": "118547", "bsz": "256", "num_updates": "200000", "lr": "8.08081e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "59802"} +[2022-07-31 03:26:33,265][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-31 03:26:55,988][valid][INFO] - {"epoch": 4, "valid_loss": "2.199", "valid_ppl": "4.59", "valid_wps": "1.61483e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "200000", "valid_best_loss": "2.199"} +[2022-07-31 03:26:55,991][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 4 @ 200000 updates +[2022-07-31 03:26:55,992][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_4_200000.pt +[2022-07-31 03:27:02,225][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_4_200000.pt +[2022-07-31 03:27:26,674][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_4_200000.pt (epoch 4 @ 200000 updates, score 2.199) (writing took 30.682487638667226 seconds) +[2022-07-31 03:28:26,028][train_inner][INFO] - {"epoch": 4, "update": 3.89, "loss": "2.31", "ppl": "4.96", "wps": "210479", "ups": "1.77", "wpb": "118672", "bsz": "256", "num_updates": "200200", "lr": "8.07879e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "59914"} +[2022-07-31 03:29:25,683][train_inner][INFO] - {"epoch": 4, "update": 3.894, "loss": "2.31", "ppl": "4.96", "wps": "397112", "ups": "3.35", "wpb": "118447", "bsz": "256", "num_updates": "200400", "lr": "8.07677e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "59974"} +[2022-07-31 03:30:12,166][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 03:30:25,876][train_inner][INFO] - {"epoch": 4, "update": 3.898, "loss": "2.309", "ppl": "4.95", "wps": "393804", "ups": "3.32", "wpb": "118521", "bsz": "256", "num_updates": "200600", "lr": "8.07475e-05", "gnorm": "0.945", "loss_scale": "16", "train_wall": "60", "gb_free": "24.6", "wall": "60034"} +[2022-07-31 03:31:25,803][train_inner][INFO] - {"epoch": 4, "update": 3.901, "loss": "2.303", "ppl": "4.94", "wps": "395403", "ups": "3.34", "wpb": "118476", "bsz": "256", "num_updates": "200800", "lr": "8.07273e-05", "gnorm": "0.946", "loss_scale": "16", "train_wall": "60", "gb_free": "26.1", "wall": "60094"} +[2022-07-31 03:32:25,112][train_inner][INFO] - {"epoch": 4, "update": 3.905, "loss": "2.308", "ppl": "4.95", "wps": "401010", "ups": "3.37", "wpb": "118915", "bsz": "256", "num_updates": "201000", "lr": "8.07071e-05", "gnorm": "0.945", "loss_scale": "16", "train_wall": "59", "gb_free": "27", "wall": "60154"} +[2022-07-31 03:33:24,817][train_inner][INFO] - {"epoch": 4, "update": 3.909, "loss": "2.314", "ppl": "4.97", "wps": "396234", "ups": "3.35", "wpb": "118286", "bsz": "256", "num_updates": "201200", "lr": "8.06869e-05", "gnorm": "0.948", "loss_scale": "16", "train_wall": "59", "gb_free": "23.2", "wall": "60213"} +[2022-07-31 03:34:24,224][train_inner][INFO] - {"epoch": 4, "update": 3.913, "loss": "2.313", "ppl": "4.97", "wps": "397474", "ups": "3.37", "wpb": "118063", "bsz": "256", "num_updates": "201400", "lr": "8.06667e-05", "gnorm": "0.948", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "60273"} +[2022-07-31 03:35:23,736][train_inner][INFO] - {"epoch": 4, "update": 3.917, "loss": "2.3", "ppl": "4.92", "wps": "398504", "ups": "3.36", "wpb": "118577", "bsz": "256", "num_updates": "201600", "lr": "8.06465e-05", "gnorm": "0.944", "loss_scale": "16", "train_wall": "59", "gb_free": "23.5", "wall": "60332"} +[2022-07-31 03:36:23,275][train_inner][INFO] - {"epoch": 4, "update": 3.921, "loss": "2.311", "ppl": "4.96", "wps": "396665", "ups": "3.36", "wpb": "118084", "bsz": "255.9", "num_updates": "201800", "lr": "8.06263e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "60392"} +[2022-07-31 03:37:22,691][train_inner][INFO] - {"epoch": 4, "update": 3.925, "loss": "2.31", "ppl": "4.96", "wps": "398058", "ups": "3.37", "wpb": "118254", "bsz": "256", "num_updates": "202000", "lr": "8.06061e-05", "gnorm": "0.944", "loss_scale": "16", "train_wall": "59", "gb_free": "23.3", "wall": "60451"} +[2022-07-31 03:38:23,214][train_inner][INFO] - {"epoch": 4, "update": 3.929, "loss": "2.301", "ppl": "4.93", "wps": "391086", "ups": "3.3", "wpb": "118349", "bsz": "256", "num_updates": "202200", "lr": "8.05859e-05", "gnorm": "0.946", "loss_scale": "16", "train_wall": "60", "gb_free": "27.1", "wall": "60512"} +[2022-07-31 03:39:22,563][train_inner][INFO] - {"epoch": 4, "update": 3.933, "loss": "2.312", "ppl": "4.97", "wps": "398957", "ups": "3.37", "wpb": "118387", "bsz": "256", "num_updates": "202400", "lr": "8.05657e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "60571"} +[2022-07-31 03:40:21,475][train_inner][INFO] - {"epoch": 4, "update": 3.936, "loss": "2.31", "ppl": "4.96", "wps": "401036", "ups": "3.39", "wpb": "118129", "bsz": "256", "num_updates": "202600", "lr": "8.05455e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "60630"} +[2022-07-31 03:41:20,656][train_inner][INFO] - {"epoch": 4, "update": 3.94, "loss": "2.308", "ppl": "4.95", "wps": "398717", "ups": "3.38", "wpb": "117981", "bsz": "256", "num_updates": "202800", "lr": "8.05253e-05", "gnorm": "0.95", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "60689"} +[2022-07-31 03:42:20,267][train_inner][INFO] - {"epoch": 4, "update": 3.944, "loss": "2.308", "ppl": "4.95", "wps": "396594", "ups": "3.36", "wpb": "118206", "bsz": "256", "num_updates": "203000", "lr": "8.05051e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "60749"} +[2022-07-31 03:43:19,779][train_inner][INFO] - {"epoch": 4, "update": 3.948, "loss": "2.299", "ppl": "4.92", "wps": "397578", "ups": "3.36", "wpb": "118303", "bsz": "256", "num_updates": "203200", "lr": "8.04848e-05", "gnorm": "0.944", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "60808"} +[2022-07-31 03:44:19,055][train_inner][INFO] - {"epoch": 4, "update": 3.952, "loss": "2.312", "ppl": "4.96", "wps": "397588", "ups": "3.37", "wpb": "117837", "bsz": "256", "num_updates": "203400", "lr": "8.04646e-05", "gnorm": "0.95", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "60867"} +[2022-07-31 03:45:18,299][train_inner][INFO] - {"epoch": 4, "update": 3.956, "loss": "2.31", "ppl": "4.96", "wps": "397788", "ups": "3.38", "wpb": "117833", "bsz": "256", "num_updates": "203600", "lr": "8.04444e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "60927"} +[2022-07-31 03:46:17,781][train_inner][INFO] - {"epoch": 4, "update": 3.96, "loss": "2.308", "ppl": "4.95", "wps": "395874", "ups": "3.36", "wpb": "117734", "bsz": "256", "num_updates": "203800", "lr": "8.04242e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "60986"} +[2022-07-31 03:47:16,965][train_inner][INFO] - {"epoch": 4, "update": 3.964, "loss": "2.307", "ppl": "4.95", "wps": "399173", "ups": "3.38", "wpb": "118124", "bsz": "256", "num_updates": "204000", "lr": "8.0404e-05", "gnorm": "0.949", "loss_scale": "32", "train_wall": "59", "gb_free": "27.2", "wall": "61045"} +[2022-07-31 03:48:16,538][train_inner][INFO] - {"epoch": 4, "update": 3.968, "loss": "2.307", "ppl": "4.95", "wps": "396765", "ups": "3.36", "wpb": "118181", "bsz": "256", "num_updates": "204200", "lr": "8.03838e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "61105"} +[2022-07-31 03:49:16,264][train_inner][INFO] - {"epoch": 4, "update": 3.971, "loss": "2.303", "ppl": "4.93", "wps": "397768", "ups": "3.35", "wpb": "118785", "bsz": "256", "num_updates": "204400", "lr": "8.03636e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "25.7", "wall": "61165"} +[2022-07-31 03:50:15,672][train_inner][INFO] - {"epoch": 4, "update": 3.975, "loss": "2.306", "ppl": "4.95", "wps": "397397", "ups": "3.37", "wpb": "118041", "bsz": "256", "num_updates": "204600", "lr": "8.03434e-05", "gnorm": "0.949", "loss_scale": "32", "train_wall": "59", "gb_free": "24.6", "wall": "61224"} +[2022-07-31 03:51:14,901][train_inner][INFO] - {"epoch": 4, "update": 3.979, "loss": "2.308", "ppl": "4.95", "wps": "397737", "ups": "3.38", "wpb": "117787", "bsz": "256", "num_updates": "204800", "lr": "8.03232e-05", "gnorm": "0.951", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "61283"} +[2022-07-31 03:51:52,460][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 03:52:14,274][train_inner][INFO] - {"epoch": 4, "update": 3.983, "loss": "2.314", "ppl": "4.97", "wps": "396694", "ups": "3.37", "wpb": "117765", "bsz": "256", "num_updates": "205000", "lr": "8.0303e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "61343"} +[2022-07-31 03:53:13,739][train_inner][INFO] - {"epoch": 4, "update": 3.987, "loss": "2.302", "ppl": "4.93", "wps": "399367", "ups": "3.36", "wpb": "118741", "bsz": "256", "num_updates": "205200", "lr": "8.02828e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "25.2", "wall": "61402"} +[2022-07-31 03:54:13,167][train_inner][INFO] - {"epoch": 4, "update": 3.991, "loss": "2.306", "ppl": "4.95", "wps": "399844", "ups": "3.37", "wpb": "118809", "bsz": "256", "num_updates": "205400", "lr": "8.02626e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "61462"} +[2022-07-31 03:55:12,489][train_inner][INFO] - {"epoch": 4, "update": 3.995, "loss": "2.309", "ppl": "4.95", "wps": "397926", "ups": "3.37", "wpb": "118027", "bsz": "256", "num_updates": "205600", "lr": "8.02424e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "61521"} +[2022-07-31 03:56:12,242][train_inner][INFO] - {"epoch": 4, "update": 3.999, "loss": "2.301", "ppl": "4.93", "wps": "394996", "ups": "3.35", "wpb": "118012", "bsz": "256", "num_updates": "205800", "lr": "8.02222e-05", "gnorm": "0.949", "loss_scale": "32", "train_wall": "59", "gb_free": "24.6", "wall": "61581"} +[2022-07-31 03:56:33,352][fairseq_cli.train][INFO] - end of epoch 4 (average epoch stats below) +[2022-07-31 03:56:33,353][train][INFO] - {"epoch": 4, "train_loss": "2.338", "train_ppl": "5.06", "train_wps": "395495", "train_ups": "3.34", "train_wpb": "118299", "train_bsz": "256", "train_num_updates": "205872", "train_lr": "8.02149e-05", "train_gnorm": "0.939", "train_loss_scale": "32", "train_train_wall": "15245", "train_gb_free": "23.7", "train_wall": "61602"} +[2022-07-31 03:56:33,464][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-31 03:56:33,467][fairseq.trainer][INFO] - begin training epoch 5 +[2022-07-31 03:56:33,467][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-31 03:57:22,435][train_inner][INFO] - {"epoch": 5, "update": 4.002, "loss": "2.307", "ppl": "4.95", "wps": "334784", "ups": "2.85", "wpb": "117496", "bsz": "255.4", "num_updates": "206000", "lr": "8.0202e-05", "gnorm": "0.952", "loss_scale": "32", "train_wall": "60", "gb_free": "28.9", "wall": "61651"} +[2022-07-31 03:58:21,833][train_inner][INFO] - {"epoch": 5, "update": 4.006, "loss": "2.306", "ppl": "4.94", "wps": "397152", "ups": "3.37", "wpb": "117950", "bsz": "256", "num_updates": "206200", "lr": "8.01818e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "61710"} +[2022-07-31 03:59:21,336][train_inner][INFO] - {"epoch": 5, "update": 4.01, "loss": "2.291", "ppl": "4.89", "wps": "397816", "ups": "3.36", "wpb": "118356", "bsz": "256", "num_updates": "206400", "lr": "8.01616e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "61770"} +[2022-07-31 04:00:20,670][train_inner][INFO] - {"epoch": 5, "update": 4.014, "loss": "2.301", "ppl": "4.93", "wps": "398448", "ups": "3.37", "wpb": "118207", "bsz": "256", "num_updates": "206600", "lr": "8.01414e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "61829"} +[2022-07-31 04:01:20,318][train_inner][INFO] - {"epoch": 5, "update": 4.018, "loss": "2.302", "ppl": "4.93", "wps": "397019", "ups": "3.35", "wpb": "118404", "bsz": "256", "num_updates": "206800", "lr": "8.01212e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "61889"} +[2022-07-31 04:02:19,697][train_inner][INFO] - {"epoch": 5, "update": 4.022, "loss": "2.3", "ppl": "4.92", "wps": "399447", "ups": "3.37", "wpb": "118593", "bsz": "256", "num_updates": "207000", "lr": "8.0101e-05", "gnorm": "0.946", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "61948"} +[2022-07-31 04:03:19,841][train_inner][INFO] - {"epoch": 5, "update": 4.026, "loss": "2.3", "ppl": "4.92", "wps": "393736", "ups": "3.33", "wpb": "118403", "bsz": "256", "num_updates": "207200", "lr": "8.00808e-05", "gnorm": "0.947", "loss_scale": "64", "train_wall": "60", "gb_free": "22", "wall": "62008"} +[2022-07-31 04:04:19,549][train_inner][INFO] - {"epoch": 5, "update": 4.03, "loss": "2.295", "ppl": "4.91", "wps": "398299", "ups": "3.35", "wpb": "118908", "bsz": "256", "num_updates": "207400", "lr": "8.00606e-05", "gnorm": "0.947", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "62068"} +[2022-07-31 04:05:18,964][train_inner][INFO] - {"epoch": 5, "update": 4.034, "loss": "2.296", "ppl": "4.91", "wps": "399020", "ups": "3.37", "wpb": "118537", "bsz": "256", "num_updates": "207600", "lr": "8.00404e-05", "gnorm": "0.947", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "62127"} +[2022-07-31 04:06:18,787][train_inner][INFO] - {"epoch": 5, "update": 4.037, "loss": "2.299", "ppl": "4.92", "wps": "397015", "ups": "3.34", "wpb": "118754", "bsz": "256", "num_updates": "207800", "lr": "8.00202e-05", "gnorm": "0.949", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "62187"} +[2022-07-31 04:06:52,363][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 04:07:18,459][train_inner][INFO] - {"epoch": 5, "update": 4.041, "loss": "2.298", "ppl": "4.92", "wps": "397113", "ups": "3.35", "wpb": "118481", "bsz": "256", "num_updates": "208000", "lr": "8e-05", "gnorm": "0.946", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "62247"} +[2022-07-31 04:08:18,092][train_inner][INFO] - {"epoch": 5, "update": 4.045, "loss": "2.302", "ppl": "4.93", "wps": "396514", "ups": "3.35", "wpb": "118225", "bsz": "256", "num_updates": "208200", "lr": "7.99798e-05", "gnorm": "0.95", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "62307"} +[2022-07-31 04:09:17,821][train_inner][INFO] - {"epoch": 5, "update": 4.049, "loss": "2.297", "ppl": "4.91", "wps": "396325", "ups": "3.35", "wpb": "118361", "bsz": "256", "num_updates": "208400", "lr": "7.99596e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "59", "gb_free": "26.5", "wall": "62366"} +[2022-07-31 04:10:16,223][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 04:10:17,100][train_inner][INFO] - {"epoch": 5, "update": 4.053, "loss": "2.304", "ppl": "4.94", "wps": "398446", "ups": "3.37", "wpb": "118097", "bsz": "256", "num_updates": "208600", "lr": "7.99394e-05", "gnorm": "0.955", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "62426"} +[2022-07-31 04:11:16,638][train_inner][INFO] - {"epoch": 5, "update": 4.057, "loss": "2.302", "ppl": "4.93", "wps": "396102", "ups": "3.36", "wpb": "117914", "bsz": "256", "num_updates": "208800", "lr": "7.99192e-05", "gnorm": "0.951", "loss_scale": "16", "train_wall": "59", "gb_free": "25.5", "wall": "62485"} +[2022-07-31 04:12:16,510][train_inner][INFO] - {"epoch": 5, "update": 4.061, "loss": "2.296", "ppl": "4.91", "wps": "395565", "ups": "3.34", "wpb": "118417", "bsz": "256", "num_updates": "209000", "lr": "7.9899e-05", "gnorm": "0.951", "loss_scale": "16", "train_wall": "60", "gb_free": "21.9", "wall": "62545"} +[2022-07-31 04:13:16,410][train_inner][INFO] - {"epoch": 5, "update": 4.065, "loss": "2.302", "ppl": "4.93", "wps": "394128", "ups": "3.34", "wpb": "118040", "bsz": "256", "num_updates": "209200", "lr": "7.98788e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "60", "gb_free": "21.3", "wall": "62605"} +[2022-07-31 04:14:15,723][train_inner][INFO] - {"epoch": 5, "update": 4.069, "loss": "2.297", "ppl": "4.91", "wps": "397564", "ups": "3.37", "wpb": "117902", "bsz": "256", "num_updates": "209400", "lr": "7.98586e-05", "gnorm": "0.951", "loss_scale": "16", "train_wall": "59", "gb_free": "26.8", "wall": "62664"} +[2022-07-31 04:15:15,475][train_inner][INFO] - {"epoch": 5, "update": 4.072, "loss": "2.298", "ppl": "4.92", "wps": "397323", "ups": "3.35", "wpb": "118704", "bsz": "256", "num_updates": "209600", "lr": "7.98384e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "26.7", "wall": "62724"} +[2022-07-31 04:16:14,788][train_inner][INFO] - {"epoch": 5, "update": 4.076, "loss": "2.298", "ppl": "4.92", "wps": "399567", "ups": "3.37", "wpb": "118497", "bsz": "256", "num_updates": "209800", "lr": "7.98182e-05", "gnorm": "0.95", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "62783"} +[2022-07-31 04:17:14,582][train_inner][INFO] - {"epoch": 5, "update": 4.08, "loss": "2.295", "ppl": "4.91", "wps": "394656", "ups": "3.34", "wpb": "117990", "bsz": "256", "num_updates": "210000", "lr": "7.9798e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "28.5", "wall": "62843"} +[2022-07-31 04:18:13,790][train_inner][INFO] - {"epoch": 5, "update": 4.084, "loss": "2.294", "ppl": "4.9", "wps": "399686", "ups": "3.38", "wpb": "118321", "bsz": "255.9", "num_updates": "210200", "lr": "7.97778e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "24.3", "wall": "62902"} +[2022-07-31 04:19:13,346][train_inner][INFO] - {"epoch": 5, "update": 4.088, "loss": "2.296", "ppl": "4.91", "wps": "399645", "ups": "3.36", "wpb": "119006", "bsz": "256", "num_updates": "210400", "lr": "7.97576e-05", "gnorm": "0.948", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "62962"} +[2022-07-31 04:19:50,374][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 04:20:12,979][train_inner][INFO] - {"epoch": 5, "update": 4.092, "loss": "2.293", "ppl": "4.9", "wps": "396531", "ups": "3.35", "wpb": "118231", "bsz": "256", "num_updates": "210600", "lr": "7.97374e-05", "gnorm": "0.947", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "63021"} +[2022-07-31 04:21:12,116][train_inner][INFO] - {"epoch": 5, "update": 4.096, "loss": "2.298", "ppl": "4.92", "wps": "399429", "ups": "3.38", "wpb": "118106", "bsz": "256", "num_updates": "210800", "lr": "7.97172e-05", "gnorm": "0.952", "loss_scale": "8", "train_wall": "59", "gb_free": "22.8", "wall": "63081"} +[2022-07-31 04:22:11,629][train_inner][INFO] - {"epoch": 5, "update": 4.1, "loss": "2.294", "ppl": "4.91", "wps": "397422", "ups": "3.36", "wpb": "118258", "bsz": "256", "num_updates": "211000", "lr": "7.9697e-05", "gnorm": "0.948", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "63140"} +[2022-07-31 04:23:11,126][train_inner][INFO] - {"epoch": 5, "update": 4.104, "loss": "2.301", "ppl": "4.93", "wps": "396723", "ups": "3.36", "wpb": "118019", "bsz": "256", "num_updates": "211200", "lr": "7.96768e-05", "gnorm": "0.949", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "63200"} +[2022-07-31 04:24:10,778][train_inner][INFO] - {"epoch": 5, "update": 4.107, "loss": "2.299", "ppl": "4.92", "wps": "397486", "ups": "3.35", "wpb": "118552", "bsz": "256", "num_updates": "211400", "lr": "7.96566e-05", "gnorm": "0.951", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "63259"} +[2022-07-31 04:25:10,095][train_inner][INFO] - {"epoch": 5, "update": 4.111, "loss": "2.298", "ppl": "4.92", "wps": "399703", "ups": "3.37", "wpb": "118545", "bsz": "256", "num_updates": "211600", "lr": "7.96364e-05", "gnorm": "0.95", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "63319"} +[2022-07-31 04:26:09,848][train_inner][INFO] - {"epoch": 5, "update": 4.115, "loss": "2.293", "ppl": "4.9", "wps": "395557", "ups": "3.35", "wpb": "118179", "bsz": "256", "num_updates": "211800", "lr": "7.96162e-05", "gnorm": "0.951", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "63378"} +[2022-07-31 04:27:09,151][train_inner][INFO] - {"epoch": 5, "update": 4.119, "loss": "2.296", "ppl": "4.91", "wps": "398591", "ups": "3.37", "wpb": "118186", "bsz": "256", "num_updates": "212000", "lr": "7.9596e-05", "gnorm": "0.952", "loss_scale": "8", "train_wall": "59", "gb_free": "23.5", "wall": "63438"} +[2022-07-31 04:28:08,648][train_inner][INFO] - {"epoch": 5, "update": 4.123, "loss": "2.287", "ppl": "4.88", "wps": "398760", "ups": "3.36", "wpb": "118625", "bsz": "256", "num_updates": "212200", "lr": "7.95758e-05", "gnorm": "0.949", "loss_scale": "8", "train_wall": "59", "gb_free": "24.6", "wall": "63497"} +[2022-07-31 04:29:08,030][train_inner][INFO] - {"epoch": 5, "update": 4.127, "loss": "2.298", "ppl": "4.92", "wps": "399563", "ups": "3.37", "wpb": "118633", "bsz": "256", "num_updates": "212400", "lr": "7.95556e-05", "gnorm": "0.951", "loss_scale": "8", "train_wall": "59", "gb_free": "24", "wall": "63556"} +[2022-07-31 04:30:07,505][train_inner][INFO] - {"epoch": 5, "update": 4.131, "loss": "2.289", "ppl": "4.89", "wps": "397265", "ups": "3.36", "wpb": "118136", "bsz": "256", "num_updates": "212600", "lr": "7.95354e-05", "gnorm": "0.95", "loss_scale": "16", "train_wall": "59", "gb_free": "24.1", "wall": "63616"} +[2022-07-31 04:31:07,025][train_inner][INFO] - {"epoch": 5, "update": 4.135, "loss": "2.292", "ppl": "4.9", "wps": "397587", "ups": "3.36", "wpb": "118322", "bsz": "256", "num_updates": "212800", "lr": "7.95152e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "25.3", "wall": "63675"} +[2022-07-31 04:32:06,426][train_inner][INFO] - {"epoch": 5, "update": 4.138, "loss": "2.295", "ppl": "4.91", "wps": "396686", "ups": "3.37", "wpb": "117817", "bsz": "256", "num_updates": "213000", "lr": "7.94949e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "63735"} +[2022-07-31 04:33:06,010][train_inner][INFO] - {"epoch": 5, "update": 4.142, "loss": "2.289", "ppl": "4.89", "wps": "397451", "ups": "3.36", "wpb": "118407", "bsz": "256", "num_updates": "213200", "lr": "7.94747e-05", "gnorm": "0.947", "loss_scale": "16", "train_wall": "59", "gb_free": "23", "wall": "63794"} +[2022-07-31 04:34:05,666][train_inner][INFO] - {"epoch": 5, "update": 4.146, "loss": "2.291", "ppl": "4.89", "wps": "395564", "ups": "3.35", "wpb": "117989", "bsz": "256", "num_updates": "213400", "lr": "7.94545e-05", "gnorm": "0.951", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "63854"} +[2022-07-31 04:35:05,196][train_inner][INFO] - {"epoch": 5, "update": 4.15, "loss": "2.293", "ppl": "4.9", "wps": "398400", "ups": "3.36", "wpb": "118583", "bsz": "256", "num_updates": "213600", "lr": "7.94343e-05", "gnorm": "0.951", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "63914"} +[2022-07-31 04:36:04,237][train_inner][INFO] - {"epoch": 5, "update": 4.154, "loss": "2.294", "ppl": "4.9", "wps": "399257", "ups": "3.39", "wpb": "117862", "bsz": "256", "num_updates": "213800", "lr": "7.94141e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "63973"} +[2022-07-31 04:37:03,888][train_inner][INFO] - {"epoch": 5, "update": 4.158, "loss": "2.298", "ppl": "4.92", "wps": "396166", "ups": "3.35", "wpb": "118158", "bsz": "256", "num_updates": "214000", "lr": "7.93939e-05", "gnorm": "0.951", "loss_scale": "16", "train_wall": "59", "gb_free": "29.9", "wall": "64032"} +[2022-07-31 04:38:03,257][train_inner][INFO] - {"epoch": 5, "update": 4.162, "loss": "2.292", "ppl": "4.9", "wps": "395849", "ups": "3.37", "wpb": "117505", "bsz": "256", "num_updates": "214200", "lr": "7.93737e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "25.3", "wall": "64092"} +[2022-07-31 04:39:02,660][train_inner][INFO] - {"epoch": 5, "update": 4.166, "loss": "2.292", "ppl": "4.9", "wps": "397243", "ups": "3.37", "wpb": "117985", "bsz": "256", "num_updates": "214400", "lr": "7.93535e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "64151"} +[2022-07-31 04:40:02,628][train_inner][INFO] - {"epoch": 5, "update": 4.17, "loss": "2.29", "ppl": "4.89", "wps": "392575", "ups": "3.34", "wpb": "117710", "bsz": "256", "num_updates": "214600", "lr": "7.93333e-05", "gnorm": "0.952", "loss_scale": "16", "train_wall": "60", "gb_free": "21.7", "wall": "64211"} +[2022-07-31 04:41:02,296][train_inner][INFO] - {"epoch": 5, "update": 4.173, "loss": "2.29", "ppl": "4.89", "wps": "397968", "ups": "3.35", "wpb": "118728", "bsz": "256", "num_updates": "214800", "lr": "7.93131e-05", "gnorm": "0.95", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "64271"} +[2022-07-31 04:42:02,095][train_inner][INFO] - {"epoch": 5, "update": 4.177, "loss": "2.295", "ppl": "4.91", "wps": "395095", "ups": "3.34", "wpb": "118132", "bsz": "256", "num_updates": "215000", "lr": "7.92929e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "64331"} +[2022-07-31 04:43:01,328][train_inner][INFO] - {"epoch": 5, "update": 4.181, "loss": "2.295", "ppl": "4.91", "wps": "400111", "ups": "3.38", "wpb": "118498", "bsz": "256", "num_updates": "215200", "lr": "7.92727e-05", "gnorm": "0.952", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "64390"} +[2022-07-31 04:44:00,644][train_inner][INFO] - {"epoch": 5, "update": 4.185, "loss": "2.297", "ppl": "4.91", "wps": "398114", "ups": "3.37", "wpb": "118071", "bsz": "256", "num_updates": "215400", "lr": "7.92525e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "64449"} +[2022-07-31 04:45:00,371][train_inner][INFO] - {"epoch": 5, "update": 4.189, "loss": "2.292", "ppl": "4.9", "wps": "395849", "ups": "3.35", "wpb": "118213", "bsz": "256", "num_updates": "215600", "lr": "7.92323e-05", "gnorm": "0.95", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "64509"} +[2022-07-31 04:46:00,056][train_inner][INFO] - {"epoch": 5, "update": 4.193, "loss": "2.289", "ppl": "4.89", "wps": "396430", "ups": "3.35", "wpb": "118304", "bsz": "256", "num_updates": "215800", "lr": "7.92121e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "64568"} +[2022-07-31 04:46:59,469][train_inner][INFO] - {"epoch": 5, "update": 4.197, "loss": "2.285", "ppl": "4.87", "wps": "399801", "ups": "3.37", "wpb": "118766", "bsz": "256", "num_updates": "216000", "lr": "7.91919e-05", "gnorm": "0.949", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "64628"} +[2022-07-31 04:47:59,323][train_inner][INFO] - {"epoch": 5, "update": 4.201, "loss": "2.287", "ppl": "4.88", "wps": "396859", "ups": "3.34", "wpb": "118768", "bsz": "256", "num_updates": "216200", "lr": "7.91717e-05", "gnorm": "0.949", "loss_scale": "32", "train_wall": "60", "gb_free": "21.3", "wall": "64688"} +[2022-07-31 04:48:58,484][train_inner][INFO] - {"epoch": 5, "update": 4.205, "loss": "2.287", "ppl": "4.88", "wps": "399369", "ups": "3.38", "wpb": "118136", "bsz": "256", "num_updates": "216400", "lr": "7.91515e-05", "gnorm": "0.952", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "64747"} +[2022-07-31 04:49:58,038][train_inner][INFO] - {"epoch": 5, "update": 4.208, "loss": "2.295", "ppl": "4.91", "wps": "395918", "ups": "3.36", "wpb": "117891", "bsz": "256", "num_updates": "216600", "lr": "7.91313e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "64806"} +[2022-07-31 04:50:57,812][train_inner][INFO] - {"epoch": 5, "update": 4.212, "loss": "2.286", "ppl": "4.88", "wps": "396910", "ups": "3.35", "wpb": "118623", "bsz": "256", "num_updates": "216800", "lr": "7.91111e-05", "gnorm": "0.95", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "64866"} +[2022-07-31 04:51:57,335][train_inner][INFO] - {"epoch": 5, "update": 4.216, "loss": "2.295", "ppl": "4.91", "wps": "397641", "ups": "3.36", "wpb": "118342", "bsz": "256", "num_updates": "217000", "lr": "7.90909e-05", "gnorm": "0.952", "loss_scale": "64", "train_wall": "59", "gb_free": "22.1", "wall": "64926"} +[2022-07-31 04:52:41,986][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 04:52:57,235][train_inner][INFO] - {"epoch": 5, "update": 4.22, "loss": "2.288", "ppl": "4.88", "wps": "395126", "ups": "3.34", "wpb": "118341", "bsz": "256", "num_updates": "217200", "lr": "7.90707e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "60", "gb_free": "21.9", "wall": "64986"} +[2022-07-31 04:53:56,751][train_inner][INFO] - {"epoch": 5, "update": 4.224, "loss": "2.289", "ppl": "4.89", "wps": "396583", "ups": "3.36", "wpb": "118015", "bsz": "256", "num_updates": "217400", "lr": "7.90505e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "65045"} +[2022-07-31 04:54:55,929][train_inner][INFO] - {"epoch": 5, "update": 4.228, "loss": "2.296", "ppl": "4.91", "wps": "399587", "ups": "3.38", "wpb": "118231", "bsz": "256", "num_updates": "217600", "lr": "7.90303e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "65104"} +[2022-07-31 04:55:55,305][train_inner][INFO] - {"epoch": 5, "update": 4.232, "loss": "2.292", "ppl": "4.9", "wps": "399030", "ups": "3.37", "wpb": "118465", "bsz": "256", "num_updates": "217800", "lr": "7.90101e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "65164"} +[2022-07-31 04:56:54,978][train_inner][INFO] - {"epoch": 5, "update": 4.236, "loss": "2.29", "ppl": "4.89", "wps": "397126", "ups": "3.35", "wpb": "118487", "bsz": "256", "num_updates": "218000", "lr": "7.89899e-05", "gnorm": "0.952", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "65223"} +[2022-07-31 04:57:54,010][train_inner][INFO] - {"epoch": 5, "update": 4.24, "loss": "2.287", "ppl": "4.88", "wps": "401383", "ups": "3.39", "wpb": "118472", "bsz": "256", "num_updates": "218200", "lr": "7.89697e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "65282"} +[2022-07-31 04:58:53,473][train_inner][INFO] - {"epoch": 5, "update": 4.243, "loss": "2.282", "ppl": "4.86", "wps": "399125", "ups": "3.36", "wpb": "118665", "bsz": "256", "num_updates": "218400", "lr": "7.89495e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "65342"} +[2022-07-31 04:59:50,125][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 04:59:52,825][train_inner][INFO] - {"epoch": 5, "update": 4.247, "loss": "2.294", "ppl": "4.9", "wps": "396765", "ups": "3.37", "wpb": "117742", "bsz": "256", "num_updates": "218600", "lr": "7.89293e-05", "gnorm": "0.955", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "65401"} +[2022-07-31 05:00:52,241][train_inner][INFO] - {"epoch": 5, "update": 4.251, "loss": "2.291", "ppl": "4.89", "wps": "399960", "ups": "3.37", "wpb": "118820", "bsz": "256", "num_updates": "218800", "lr": "7.89091e-05", "gnorm": "0.95", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "65461"} +[2022-07-31 05:01:51,546][train_inner][INFO] - {"epoch": 5, "update": 4.255, "loss": "2.285", "ppl": "4.87", "wps": "399535", "ups": "3.37", "wpb": "118471", "bsz": "256", "num_updates": "219000", "lr": "7.88889e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "65520"} +[2022-07-31 05:02:50,860][train_inner][INFO] - {"epoch": 5, "update": 4.259, "loss": "2.289", "ppl": "4.89", "wps": "398764", "ups": "3.37", "wpb": "118261", "bsz": "256", "num_updates": "219200", "lr": "7.88687e-05", "gnorm": "0.954", "loss_scale": "16", "train_wall": "59", "gb_free": "26", "wall": "65579"} +[2022-07-31 05:03:50,284][train_inner][INFO] - {"epoch": 5, "update": 4.263, "loss": "2.287", "ppl": "4.88", "wps": "398882", "ups": "3.37", "wpb": "118514", "bsz": "256", "num_updates": "219400", "lr": "7.88485e-05", "gnorm": "0.951", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "65639"} +[2022-07-31 05:04:50,470][train_inner][INFO] - {"epoch": 5, "update": 4.267, "loss": "2.285", "ppl": "4.87", "wps": "394699", "ups": "3.32", "wpb": "118776", "bsz": "256", "num_updates": "219600", "lr": "7.88283e-05", "gnorm": "0.95", "loss_scale": "16", "train_wall": "60", "gb_free": "21.7", "wall": "65699"} +[2022-07-31 05:05:50,052][train_inner][INFO] - {"epoch": 5, "update": 4.271, "loss": "2.291", "ppl": "4.9", "wps": "397929", "ups": "3.36", "wpb": "118548", "bsz": "256", "num_updates": "219800", "lr": "7.88081e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "65758"} +[2022-07-31 05:06:49,166][train_inner][INFO] - {"epoch": 5, "update": 4.274, "loss": "2.285", "ppl": "4.87", "wps": "402240", "ups": "3.38", "wpb": "118888", "bsz": "256", "num_updates": "220000", "lr": "7.87879e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "23.8", "wall": "65818"} +[2022-07-31 05:07:48,664][train_inner][INFO] - {"epoch": 5, "update": 4.278, "loss": "2.284", "ppl": "4.87", "wps": "398017", "ups": "3.36", "wpb": "118405", "bsz": "256", "num_updates": "220200", "lr": "7.87677e-05", "gnorm": "0.949", "loss_scale": "16", "train_wall": "59", "gb_free": "24.3", "wall": "65877"} +[2022-07-31 05:08:48,174][train_inner][INFO] - {"epoch": 5, "update": 4.282, "loss": "2.285", "ppl": "4.87", "wps": "396552", "ups": "3.36", "wpb": "117994", "bsz": "256", "num_updates": "220400", "lr": "7.87475e-05", "gnorm": "0.952", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "65937"} +[2022-07-31 05:09:47,255][train_inner][INFO] - {"epoch": 5, "update": 4.286, "loss": "2.293", "ppl": "4.9", "wps": "398558", "ups": "3.39", "wpb": "117735", "bsz": "256", "num_updates": "220600", "lr": "7.87273e-05", "gnorm": "0.956", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "65996"} +[2022-07-31 05:10:46,891][train_inner][INFO] - {"epoch": 5, "update": 4.29, "loss": "2.283", "ppl": "4.87", "wps": "396226", "ups": "3.35", "wpb": "118145", "bsz": "256", "num_updates": "220800", "lr": "7.87071e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "66055"} +[2022-07-31 05:11:46,081][train_inner][INFO] - {"epoch": 5, "update": 4.294, "loss": "2.283", "ppl": "4.87", "wps": "396889", "ups": "3.38", "wpb": "117460", "bsz": "256", "num_updates": "221000", "lr": "7.86869e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "66115"} +[2022-07-31 05:12:45,632][train_inner][INFO] - {"epoch": 5, "update": 4.298, "loss": "2.281", "ppl": "4.86", "wps": "397230", "ups": "3.36", "wpb": "118275", "bsz": "256", "num_updates": "221200", "lr": "7.86667e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "66174"} +[2022-07-31 05:13:44,920][train_inner][INFO] - {"epoch": 5, "update": 4.302, "loss": "2.28", "ppl": "4.86", "wps": "399090", "ups": "3.37", "wpb": "118307", "bsz": "256", "num_updates": "221400", "lr": "7.86465e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "66233"} +[2022-07-31 05:14:44,717][train_inner][INFO] - {"epoch": 5, "update": 4.306, "loss": "2.285", "ppl": "4.87", "wps": "395896", "ups": "3.34", "wpb": "118366", "bsz": "256", "num_updates": "221600", "lr": "7.86263e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "66293"} +[2022-07-31 05:15:45,214][train_inner][INFO] - {"epoch": 5, "update": 4.309, "loss": "2.292", "ppl": "4.9", "wps": "390667", "ups": "3.31", "wpb": "118170", "bsz": "256", "num_updates": "221800", "lr": "7.86061e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "60", "gb_free": "24.7", "wall": "66354"} +[2022-07-31 05:16:44,882][train_inner][INFO] - {"epoch": 5, "update": 4.313, "loss": "2.28", "ppl": "4.86", "wps": "395488", "ups": "3.35", "wpb": "117989", "bsz": "256", "num_updates": "222000", "lr": "7.85859e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "66413"} +[2022-07-31 05:17:45,429][train_inner][INFO] - {"epoch": 5, "update": 4.317, "loss": "2.287", "ppl": "4.88", "wps": "389538", "ups": "3.3", "wpb": "117926", "bsz": "256", "num_updates": "222200", "lr": "7.85657e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "60", "gb_free": "22.2", "wall": "66474"} +[2022-07-31 05:18:44,998][train_inner][INFO] - {"epoch": 5, "update": 4.321, "loss": "2.286", "ppl": "4.88", "wps": "398697", "ups": "3.36", "wpb": "118751", "bsz": "256", "num_updates": "222400", "lr": "7.85455e-05", "gnorm": "0.948", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "66533"} +[2022-07-31 05:19:44,542][train_inner][INFO] - {"epoch": 5, "update": 4.325, "loss": "2.282", "ppl": "4.86", "wps": "399398", "ups": "3.36", "wpb": "118907", "bsz": "256", "num_updates": "222600", "lr": "7.85253e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "32.9", "wall": "66593"} +[2022-07-31 05:20:44,366][train_inner][INFO] - {"epoch": 5, "update": 4.329, "loss": "2.289", "ppl": "4.89", "wps": "395411", "ups": "3.34", "wpb": "118275", "bsz": "256", "num_updates": "222800", "lr": "7.85051e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "66653"} +[2022-07-31 05:21:43,615][train_inner][INFO] - {"epoch": 5, "update": 4.333, "loss": "2.278", "ppl": "4.85", "wps": "401253", "ups": "3.38", "wpb": "118868", "bsz": "256", "num_updates": "223000", "lr": "7.84848e-05", "gnorm": "0.948", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "66712"} +[2022-07-31 05:22:42,574][train_inner][INFO] - {"epoch": 5, "update": 4.337, "loss": "2.287", "ppl": "4.88", "wps": "401408", "ups": "3.39", "wpb": "118331", "bsz": "256", "num_updates": "223200", "lr": "7.84646e-05", "gnorm": "0.953", "loss_scale": "64", "train_wall": "59", "gb_free": "23.3", "wall": "66771"} +[2022-07-31 05:23:41,879][train_inner][INFO] - {"epoch": 5, "update": 4.341, "loss": "2.287", "ppl": "4.88", "wps": "398781", "ups": "3.37", "wpb": "118248", "bsz": "256", "num_updates": "223400", "lr": "7.84444e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "22.2", "wall": "66830"} +[2022-07-31 05:23:42,172][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 05:24:41,211][train_inner][INFO] - {"epoch": 5, "update": 4.344, "loss": "2.285", "ppl": "4.87", "wps": "397599", "ups": "3.37", "wpb": "117950", "bsz": "256", "num_updates": "223600", "lr": "7.84242e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "66890"} +[2022-07-31 05:25:40,922][train_inner][INFO] - {"epoch": 5, "update": 4.348, "loss": "2.284", "ppl": "4.87", "wps": "398144", "ups": "3.35", "wpb": "118869", "bsz": "256", "num_updates": "223800", "lr": "7.8404e-05", "gnorm": "0.947", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "66949"} +[2022-07-31 05:26:40,301][train_inner][INFO] - {"epoch": 5, "update": 4.352, "loss": "2.273", "ppl": "4.83", "wps": "398584", "ups": "3.37", "wpb": "118335", "bsz": "256", "num_updates": "224000", "lr": "7.83838e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "67009"} +[2022-07-31 05:27:41,018][train_inner][INFO] - {"epoch": 5, "update": 4.356, "loss": "2.286", "ppl": "4.88", "wps": "389631", "ups": "3.29", "wpb": "118286", "bsz": "256", "num_updates": "224200", "lr": "7.83636e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "60", "gb_free": "23.3", "wall": "67069"} +[2022-07-31 05:28:40,746][train_inner][INFO] - {"epoch": 5, "update": 4.36, "loss": "2.276", "ppl": "4.84", "wps": "396934", "ups": "3.35", "wpb": "118540", "bsz": "256", "num_updates": "224400", "lr": "7.83434e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "67129"} +[2022-07-31 05:29:40,333][train_inner][INFO] - {"epoch": 5, "update": 4.364, "loss": "2.28", "ppl": "4.86", "wps": "396254", "ups": "3.36", "wpb": "118056", "bsz": "256", "num_updates": "224600", "lr": "7.83232e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "67189"} +[2022-07-31 05:30:40,021][train_inner][INFO] - {"epoch": 5, "update": 4.368, "loss": "2.288", "ppl": "4.88", "wps": "393196", "ups": "3.35", "wpb": "117345", "bsz": "256", "num_updates": "224800", "lr": "7.8303e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "67248"} +[2022-07-31 05:31:39,344][train_inner][INFO] - {"epoch": 5, "update": 4.372, "loss": "2.28", "ppl": "4.86", "wps": "399439", "ups": "3.37", "wpb": "118480", "bsz": "256", "num_updates": "225000", "lr": "7.82828e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "67308"} +[2022-07-31 05:32:39,134][train_inner][INFO] - {"epoch": 5, "update": 4.376, "loss": "2.276", "ppl": "4.84", "wps": "396057", "ups": "3.35", "wpb": "118400", "bsz": "256", "num_updates": "225200", "lr": "7.82626e-05", "gnorm": "0.952", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "67368"} +[2022-07-31 05:33:38,649][train_inner][INFO] - {"epoch": 5, "update": 4.379, "loss": "2.279", "ppl": "4.86", "wps": "397729", "ups": "3.36", "wpb": "118353", "bsz": "256", "num_updates": "225400", "lr": "7.82424e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "67427"} +[2022-07-31 05:34:38,139][train_inner][INFO] - {"epoch": 5, "update": 4.383, "loss": "2.276", "ppl": "4.84", "wps": "397991", "ups": "3.36", "wpb": "118382", "bsz": "256", "num_updates": "225600", "lr": "7.82222e-05", "gnorm": "0.954", "loss_scale": "64", "train_wall": "59", "gb_free": "25.3", "wall": "67487"} +[2022-07-31 05:35:37,755][train_inner][INFO] - {"epoch": 5, "update": 4.387, "loss": "2.279", "ppl": "4.85", "wps": "396666", "ups": "3.35", "wpb": "118238", "bsz": "256", "num_updates": "225800", "lr": "7.8202e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "67546"} +[2022-07-31 05:35:55,639][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 05:36:37,854][train_inner][INFO] - {"epoch": 5, "update": 4.391, "loss": "2.284", "ppl": "4.87", "wps": "393757", "ups": "3.33", "wpb": "118320", "bsz": "256", "num_updates": "226000", "lr": "7.81818e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "60", "gb_free": "21.4", "wall": "67606"} +[2022-07-31 05:37:37,544][train_inner][INFO] - {"epoch": 5, "update": 4.395, "loss": "2.277", "ppl": "4.85", "wps": "395436", "ups": "3.35", "wpb": "118018", "bsz": "256", "num_updates": "226200", "lr": "7.81616e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "67666"} +[2022-07-31 05:38:36,835][train_inner][INFO] - {"epoch": 5, "update": 4.399, "loss": "2.282", "ppl": "4.86", "wps": "398979", "ups": "3.37", "wpb": "118278", "bsz": "256", "num_updates": "226400", "lr": "7.81414e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "67725"} +[2022-07-31 05:39:36,307][train_inner][INFO] - {"epoch": 5, "update": 4.403, "loss": "2.282", "ppl": "4.86", "wps": "398840", "ups": "3.36", "wpb": "118600", "bsz": "256", "num_updates": "226600", "lr": "7.81212e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "26.7", "wall": "67785"} +[2022-07-31 05:40:37,300][train_inner][INFO] - {"epoch": 5, "update": 4.407, "loss": "2.28", "ppl": "4.86", "wps": "388336", "ups": "3.28", "wpb": "118428", "bsz": "256", "num_updates": "226800", "lr": "7.8101e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "61", "gb_free": "27.6", "wall": "67846"} +[2022-07-31 05:41:36,885][train_inner][INFO] - {"epoch": 5, "update": 4.41, "loss": "2.275", "ppl": "4.84", "wps": "398604", "ups": "3.36", "wpb": "118752", "bsz": "256", "num_updates": "227000", "lr": "7.80808e-05", "gnorm": "0.952", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "67905"} +[2022-07-31 05:42:36,296][train_inner][INFO] - {"epoch": 5, "update": 4.414, "loss": "2.282", "ppl": "4.86", "wps": "399709", "ups": "3.37", "wpb": "118736", "bsz": "256", "num_updates": "227200", "lr": "7.80606e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "67965"} +[2022-07-31 05:43:35,853][train_inner][INFO] - {"epoch": 5, "update": 4.418, "loss": "2.281", "ppl": "4.86", "wps": "397563", "ups": "3.36", "wpb": "118386", "bsz": "256", "num_updates": "227400", "lr": "7.80404e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "68024"} +[2022-07-31 05:44:35,046][train_inner][INFO] - {"epoch": 5, "update": 4.422, "loss": "2.276", "ppl": "4.84", "wps": "399301", "ups": "3.38", "wpb": "118180", "bsz": "256", "num_updates": "227600", "lr": "7.80202e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "68083"} +[2022-07-31 05:45:34,577][train_inner][INFO] - {"epoch": 5, "update": 4.426, "loss": "2.284", "ppl": "4.87", "wps": "398588", "ups": "3.36", "wpb": "118639", "bsz": "256", "num_updates": "227800", "lr": "7.8e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "68143"} +[2022-07-31 05:46:34,055][train_inner][INFO] - {"epoch": 5, "update": 4.43, "loss": "2.28", "ppl": "4.86", "wps": "399267", "ups": "3.36", "wpb": "118738", "bsz": "256", "num_updates": "228000", "lr": "7.79798e-05", "gnorm": "0.95", "loss_scale": "64", "train_wall": "59", "gb_free": "24", "wall": "68202"} +[2022-07-31 05:47:23,152][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 05:47:33,321][train_inner][INFO] - {"epoch": 5, "update": 4.434, "loss": "2.275", "ppl": "4.84", "wps": "398924", "ups": "3.37", "wpb": "118213", "bsz": "256", "num_updates": "228200", "lr": "7.79596e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "26.1", "wall": "68262"} +[2022-07-31 05:48:32,724][train_inner][INFO] - {"epoch": 5, "update": 4.438, "loss": "2.276", "ppl": "4.84", "wps": "397320", "ups": "3.37", "wpb": "118008", "bsz": "256", "num_updates": "228400", "lr": "7.79394e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "68321"} +[2022-07-31 05:49:31,745][train_inner][INFO] - {"epoch": 5, "update": 4.442, "loss": "2.274", "ppl": "4.84", "wps": "400378", "ups": "3.39", "wpb": "118154", "bsz": "256", "num_updates": "228600", "lr": "7.79192e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "26.2", "wall": "68380"} +[2022-07-31 05:50:31,206][train_inner][INFO] - {"epoch": 5, "update": 4.445, "loss": "2.281", "ppl": "4.86", "wps": "398041", "ups": "3.36", "wpb": "118337", "bsz": "256", "num_updates": "228800", "lr": "7.7899e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "26", "wall": "68440"} +[2022-07-31 05:51:30,778][train_inner][INFO] - {"epoch": 5, "update": 4.449, "loss": "2.277", "ppl": "4.85", "wps": "396960", "ups": "3.36", "wpb": "118238", "bsz": "256", "num_updates": "229000", "lr": "7.78788e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "68499"} +[2022-07-31 05:51:55,476][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 05:52:30,717][train_inner][INFO] - {"epoch": 5, "update": 4.453, "loss": "2.278", "ppl": "4.85", "wps": "395500", "ups": "3.34", "wpb": "118530", "bsz": "256", "num_updates": "229200", "lr": "7.78586e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "60", "gb_free": "22.1", "wall": "68559"} +[2022-07-31 05:53:30,028][train_inner][INFO] - {"epoch": 5, "update": 4.457, "loss": "2.28", "ppl": "4.86", "wps": "398322", "ups": "3.37", "wpb": "118123", "bsz": "256", "num_updates": "229400", "lr": "7.78384e-05", "gnorm": "0.957", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "68618"} +[2022-07-31 05:54:29,572][train_inner][INFO] - {"epoch": 5, "update": 4.461, "loss": "2.273", "ppl": "4.83", "wps": "397980", "ups": "3.36", "wpb": "118486", "bsz": "256", "num_updates": "229600", "lr": "7.78182e-05", "gnorm": "0.954", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "68678"} +[2022-07-31 05:55:28,607][train_inner][INFO] - {"epoch": 5, "update": 4.465, "loss": "2.275", "ppl": "4.84", "wps": "399531", "ups": "3.39", "wpb": "117930", "bsz": "256", "num_updates": "229800", "lr": "7.7798e-05", "gnorm": "0.957", "loss_scale": "16", "train_wall": "59", "gb_free": "26.6", "wall": "68737"} +[2022-07-31 05:56:29,547][train_inner][INFO] - {"epoch": 5, "update": 4.469, "loss": "2.273", "ppl": "4.83", "wps": "389835", "ups": "3.28", "wpb": "118782", "bsz": "256", "num_updates": "230000", "lr": "7.77778e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "61", "gb_free": "28.3", "wall": "68798"} +[2022-07-31 05:57:29,824][train_inner][INFO] - {"epoch": 5, "update": 4.473, "loss": "2.278", "ppl": "4.85", "wps": "391272", "ups": "3.32", "wpb": "117924", "bsz": "256", "num_updates": "230200", "lr": "7.77576e-05", "gnorm": "0.956", "loss_scale": "16", "train_wall": "60", "gb_free": "21.3", "wall": "68858"} +[2022-07-31 05:58:29,232][train_inner][INFO] - {"epoch": 5, "update": 4.477, "loss": "2.279", "ppl": "4.85", "wps": "398733", "ups": "3.37", "wpb": "118438", "bsz": "256", "num_updates": "230400", "lr": "7.77374e-05", "gnorm": "0.953", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "68918"} +[2022-07-31 05:59:28,843][train_inner][INFO] - {"epoch": 5, "update": 4.48, "loss": "2.273", "ppl": "4.83", "wps": "396183", "ups": "3.36", "wpb": "118085", "bsz": "256", "num_updates": "230600", "lr": "7.77172e-05", "gnorm": "0.955", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "68977"} +[2022-07-31 06:00:28,185][train_inner][INFO] - {"epoch": 5, "update": 4.484, "loss": "2.276", "ppl": "4.84", "wps": "398004", "ups": "3.37", "wpb": "118091", "bsz": "256", "num_updates": "230800", "lr": "7.7697e-05", "gnorm": "0.956", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "69037"} +[2022-07-31 06:01:27,621][train_inner][INFO] - {"epoch": 5, "update": 4.488, "loss": "2.273", "ppl": "4.83", "wps": "397208", "ups": "3.36", "wpb": "118042", "bsz": "256", "num_updates": "231000", "lr": "7.76768e-05", "gnorm": "0.956", "loss_scale": "16", "train_wall": "59", "gb_free": "25.8", "wall": "69096"} +[2022-07-31 06:02:26,872][train_inner][INFO] - {"epoch": 5, "update": 4.492, "loss": "2.272", "ppl": "4.83", "wps": "398514", "ups": "3.38", "wpb": "118060", "bsz": "256", "num_updates": "231200", "lr": "7.76566e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "69155"} +[2022-07-31 06:03:26,568][train_inner][INFO] - {"epoch": 5, "update": 4.496, "loss": "2.279", "ppl": "4.85", "wps": "395817", "ups": "3.35", "wpb": "118143", "bsz": "256", "num_updates": "231400", "lr": "7.76364e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "69215"} +[2022-07-31 06:04:26,245][train_inner][INFO] - {"epoch": 5, "update": 4.5, "loss": "2.274", "ppl": "4.84", "wps": "396907", "ups": "3.35", "wpb": "118431", "bsz": "256", "num_updates": "231600", "lr": "7.76162e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "32", "wall": "69275"} +[2022-07-31 06:05:25,877][train_inner][INFO] - {"epoch": 5, "update": 4.504, "loss": "2.268", "ppl": "4.82", "wps": "397641", "ups": "3.35", "wpb": "118559", "bsz": "256", "num_updates": "231800", "lr": "7.7596e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "69334"} +[2022-07-31 06:06:24,629][train_inner][INFO] - {"epoch": 5, "update": 4.508, "loss": "2.274", "ppl": "4.84", "wps": "402464", "ups": "3.4", "wpb": "118229", "bsz": "256", "num_updates": "232000", "lr": "7.75758e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "58", "gb_free": "22.9", "wall": "69393"} +[2022-07-31 06:07:24,382][train_inner][INFO] - {"epoch": 5, "update": 4.512, "loss": "2.272", "ppl": "4.83", "wps": "396768", "ups": "3.35", "wpb": "118538", "bsz": "256", "num_updates": "232200", "lr": "7.75556e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "69453"} +[2022-07-31 06:08:24,019][train_inner][INFO] - {"epoch": 5, "update": 4.515, "loss": "2.269", "ppl": "4.82", "wps": "398024", "ups": "3.35", "wpb": "118685", "bsz": "256", "num_updates": "232400", "lr": "7.75354e-05", "gnorm": "0.951", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "69512"} +[2022-07-31 06:09:23,351][train_inner][INFO] - {"epoch": 5, "update": 4.519, "loss": "2.268", "ppl": "4.82", "wps": "402571", "ups": "3.37", "wpb": "119425", "bsz": "256", "num_updates": "232600", "lr": "7.75152e-05", "gnorm": "0.95", "loss_scale": "32", "train_wall": "59", "gb_free": "27.8", "wall": "69572"} +[2022-07-31 06:10:22,796][train_inner][INFO] - {"epoch": 5, "update": 4.523, "loss": "2.269", "ppl": "4.82", "wps": "398919", "ups": "3.36", "wpb": "118568", "bsz": "256", "num_updates": "232800", "lr": "7.74949e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "69631"} +[2022-07-31 06:11:22,321][train_inner][INFO] - {"epoch": 5, "update": 4.527, "loss": "2.281", "ppl": "4.86", "wps": "399466", "ups": "3.36", "wpb": "118889", "bsz": "256", "num_updates": "233000", "lr": "7.74747e-05", "gnorm": "0.953", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "69691"} +[2022-07-31 06:12:22,205][train_inner][INFO] - {"epoch": 5, "update": 4.531, "loss": "2.275", "ppl": "4.84", "wps": "394485", "ups": "3.34", "wpb": "118116", "bsz": "256", "num_updates": "233200", "lr": "7.74545e-05", "gnorm": "0.958", "loss_scale": "64", "train_wall": "60", "gb_free": "21.6", "wall": "69751"} +[2022-07-31 06:13:21,858][train_inner][INFO] - {"epoch": 5, "update": 4.535, "loss": "2.272", "ppl": "4.83", "wps": "397991", "ups": "3.35", "wpb": "118706", "bsz": "256", "num_updates": "233400", "lr": "7.74343e-05", "gnorm": "0.951", "loss_scale": "64", "train_wall": "59", "gb_free": "25.2", "wall": "69810"} +[2022-07-31 06:14:21,549][train_inner][INFO] - {"epoch": 5, "update": 4.539, "loss": "2.267", "ppl": "4.81", "wps": "397768", "ups": "3.35", "wpb": "118716", "bsz": "256", "num_updates": "233600", "lr": "7.74141e-05", "gnorm": "0.951", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "69870"} +[2022-07-31 06:15:21,073][train_inner][INFO] - {"epoch": 5, "update": 4.543, "loss": "2.273", "ppl": "4.83", "wps": "397464", "ups": "3.36", "wpb": "118292", "bsz": "256", "num_updates": "233800", "lr": "7.73939e-05", "gnorm": "0.956", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "69930"} +[2022-07-31 06:16:20,517][train_inner][INFO] - {"epoch": 5, "update": 4.546, "loss": "2.271", "ppl": "4.83", "wps": "399261", "ups": "3.36", "wpb": "118666", "bsz": "256", "num_updates": "234000", "lr": "7.73737e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "69989"} +[2022-07-31 06:17:19,762][train_inner][INFO] - {"epoch": 5, "update": 4.55, "loss": "2.273", "ppl": "4.83", "wps": "399599", "ups": "3.38", "wpb": "118371", "bsz": "256", "num_updates": "234200", "lr": "7.73535e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "70048"} +[2022-07-31 06:18:19,280][train_inner][INFO] - {"epoch": 5, "update": 4.554, "loss": "2.276", "ppl": "4.84", "wps": "397932", "ups": "3.36", "wpb": "118419", "bsz": "256", "num_updates": "234400", "lr": "7.73333e-05", "gnorm": "0.954", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "70108"} +[2022-07-31 06:19:18,925][train_inner][INFO] - {"epoch": 5, "update": 4.558, "loss": "2.273", "ppl": "4.83", "wps": "397186", "ups": "3.35", "wpb": "118451", "bsz": "256", "num_updates": "234600", "lr": "7.73131e-05", "gnorm": "0.956", "loss_scale": "64", "train_wall": "59", "gb_free": "23", "wall": "70167"} +[2022-07-31 06:20:18,486][train_inner][INFO] - {"epoch": 5, "update": 4.562, "loss": "2.27", "ppl": "4.82", "wps": "397572", "ups": "3.36", "wpb": "118398", "bsz": "256", "num_updates": "234800", "lr": "7.72929e-05", "gnorm": "0.956", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "70227"} +[2022-07-31 06:21:19,009][train_inner][INFO] - {"epoch": 5, "update": 4.566, "loss": "2.267", "ppl": "4.81", "wps": "391568", "ups": "3.3", "wpb": "118493", "bsz": "256", "num_updates": "235000", "lr": "7.72727e-05", "gnorm": "0.956", "loss_scale": "64", "train_wall": "60", "gb_free": "22.2", "wall": "70287"} +[2022-07-31 06:22:18,617][train_inner][INFO] - {"epoch": 5, "update": 4.57, "loss": "2.269", "ppl": "4.82", "wps": "395602", "ups": "3.36", "wpb": "117905", "bsz": "256", "num_updates": "235200", "lr": "7.72525e-05", "gnorm": "0.957", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "70347"} +[2022-07-31 06:23:12,855][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-07-31 06:23:18,213][train_inner][INFO] - {"epoch": 5, "update": 4.574, "loss": "2.277", "ppl": "4.85", "wps": "396107", "ups": "3.36", "wpb": "118031", "bsz": "256", "num_updates": "235400", "lr": "7.72323e-05", "gnorm": "0.957", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "70407"} +[2022-07-31 06:23:50,805][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 06:24:18,019][train_inner][INFO] - {"epoch": 5, "update": 4.578, "loss": "2.274", "ppl": "4.84", "wps": "393900", "ups": "3.34", "wpb": "117788", "bsz": "256", "num_updates": "235600", "lr": "7.72121e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "70466"} +[2022-07-31 06:25:17,109][train_inner][INFO] - {"epoch": 5, "update": 4.581, "loss": "2.268", "ppl": "4.82", "wps": "399885", "ups": "3.38", "wpb": "118145", "bsz": "256", "num_updates": "235800", "lr": "7.71919e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "70526"} +[2022-07-31 06:26:16,441][train_inner][INFO] - {"epoch": 5, "update": 4.585, "loss": "2.272", "ppl": "4.83", "wps": "396793", "ups": "3.37", "wpb": "117713", "bsz": "255.9", "num_updates": "236000", "lr": "7.71717e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "70585"} +[2022-07-31 06:27:15,857][train_inner][INFO] - {"epoch": 5, "update": 4.589, "loss": "2.273", "ppl": "4.83", "wps": "395414", "ups": "3.37", "wpb": "117468", "bsz": "256", "num_updates": "236200", "lr": "7.71515e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "70644"} +[2022-07-31 06:28:15,586][train_inner][INFO] - {"epoch": 5, "update": 4.593, "loss": "2.27", "ppl": "4.82", "wps": "396095", "ups": "3.35", "wpb": "118290", "bsz": "256", "num_updates": "236400", "lr": "7.71313e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "70704"} +[2022-07-31 06:29:15,326][train_inner][INFO] - {"epoch": 5, "update": 4.597, "loss": "2.265", "ppl": "4.8", "wps": "396204", "ups": "3.35", "wpb": "118346", "bsz": "256", "num_updates": "236600", "lr": "7.71111e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "70764"} +[2022-07-31 06:30:15,091][train_inner][INFO] - {"epoch": 5, "update": 4.601, "loss": "2.271", "ppl": "4.83", "wps": "395712", "ups": "3.35", "wpb": "118247", "bsz": "256", "num_updates": "236800", "lr": "7.70909e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "70824"} +[2022-07-31 06:31:14,482][train_inner][INFO] - {"epoch": 5, "update": 4.605, "loss": "2.265", "ppl": "4.81", "wps": "398716", "ups": "3.37", "wpb": "118401", "bsz": "256", "num_updates": "237000", "lr": "7.70707e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "26", "wall": "70883"} +[2022-07-31 06:32:13,995][train_inner][INFO] - {"epoch": 5, "update": 4.609, "loss": "2.274", "ppl": "4.84", "wps": "396870", "ups": "3.36", "wpb": "118092", "bsz": "256", "num_updates": "237200", "lr": "7.70505e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "31.1", "wall": "70942"} +[2022-07-31 06:33:13,323][train_inner][INFO] - {"epoch": 5, "update": 4.613, "loss": "2.267", "ppl": "4.81", "wps": "399991", "ups": "3.37", "wpb": "118653", "bsz": "256", "num_updates": "237400", "lr": "7.70303e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "59", "gb_free": "25.6", "wall": "71002"} +[2022-07-31 06:34:12,866][train_inner][INFO] - {"epoch": 5, "update": 4.616, "loss": "2.269", "ppl": "4.82", "wps": "396335", "ups": "3.36", "wpb": "117995", "bsz": "256", "num_updates": "237600", "lr": "7.70101e-05", "gnorm": "0.961", "loss_scale": "64", "train_wall": "59", "gb_free": "25.5", "wall": "71061"} +[2022-07-31 06:35:12,036][train_inner][INFO] - {"epoch": 5, "update": 4.62, "loss": "2.263", "ppl": "4.8", "wps": "400364", "ups": "3.38", "wpb": "118446", "bsz": "256", "num_updates": "237800", "lr": "7.69899e-05", "gnorm": "0.956", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "71120"} +[2022-07-31 06:36:11,225][train_inner][INFO] - {"epoch": 5, "update": 4.624, "loss": "2.265", "ppl": "4.81", "wps": "398744", "ups": "3.38", "wpb": "118007", "bsz": "256", "num_updates": "238000", "lr": "7.69697e-05", "gnorm": "0.957", "loss_scale": "64", "train_wall": "59", "gb_free": "25.1", "wall": "71180"} +[2022-07-31 06:37:10,446][train_inner][INFO] - {"epoch": 5, "update": 4.628, "loss": "2.268", "ppl": "4.82", "wps": "400230", "ups": "3.38", "wpb": "118508", "bsz": "256", "num_updates": "238200", "lr": "7.69495e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "71239"} +[2022-07-31 06:38:09,486][train_inner][INFO] - {"epoch": 5, "update": 4.632, "loss": "2.271", "ppl": "4.83", "wps": "396963", "ups": "3.39", "wpb": "117183", "bsz": "256", "num_updates": "238400", "lr": "7.69293e-05", "gnorm": "0.962", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "71298"} +[2022-07-31 06:38:20,860][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 06:39:09,617][train_inner][INFO] - {"epoch": 5, "update": 4.636, "loss": "2.271", "ppl": "4.83", "wps": "392678", "ups": "3.33", "wpb": "118059", "bsz": "256", "num_updates": "238600", "lr": "7.69091e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "60", "gb_free": "22.4", "wall": "71358"} +[2022-07-31 06:40:08,674][train_inner][INFO] - {"epoch": 5, "update": 4.64, "loss": "2.272", "ppl": "4.83", "wps": "402015", "ups": "3.39", "wpb": "118708", "bsz": "256", "num_updates": "238800", "lr": "7.68889e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "71417"} +[2022-07-31 06:41:07,858][train_inner][INFO] - {"epoch": 5, "update": 4.644, "loss": "2.262", "ppl": "4.8", "wps": "401335", "ups": "3.38", "wpb": "118763", "bsz": "256", "num_updates": "239000", "lr": "7.68687e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "71476"} +[2022-07-31 06:42:07,606][train_inner][INFO] - {"epoch": 5, "update": 4.648, "loss": "2.269", "ppl": "4.82", "wps": "397108", "ups": "3.35", "wpb": "118631", "bsz": "256", "num_updates": "239200", "lr": "7.68485e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "71536"} +[2022-07-31 06:43:07,062][train_inner][INFO] - {"epoch": 5, "update": 4.651, "loss": "2.268", "ppl": "4.82", "wps": "398665", "ups": "3.36", "wpb": "118513", "bsz": "256", "num_updates": "239400", "lr": "7.68283e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "71595"} +[2022-07-31 06:44:06,561][train_inner][INFO] - {"epoch": 5, "update": 4.655, "loss": "2.263", "ppl": "4.8", "wps": "398590", "ups": "3.36", "wpb": "118578", "bsz": "256", "num_updates": "239600", "lr": "7.68081e-05", "gnorm": "0.954", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "71655"} +[2022-07-31 06:45:05,855][train_inner][INFO] - {"epoch": 5, "update": 4.659, "loss": "2.266", "ppl": "4.81", "wps": "397038", "ups": "3.37", "wpb": "117709", "bsz": "256", "num_updates": "239800", "lr": "7.67879e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "71714"} +[2022-07-31 06:46:05,314][train_inner][INFO] - {"epoch": 5, "update": 4.663, "loss": "2.27", "ppl": "4.82", "wps": "397701", "ups": "3.36", "wpb": "118235", "bsz": "256", "num_updates": "240000", "lr": "7.67677e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "71774"} +[2022-07-31 06:47:04,790][train_inner][INFO] - {"epoch": 5, "update": 4.667, "loss": "2.273", "ppl": "4.83", "wps": "395970", "ups": "3.36", "wpb": "117753", "bsz": "256", "num_updates": "240200", "lr": "7.67475e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "71833"} +[2022-07-31 06:48:04,540][train_inner][INFO] - {"epoch": 5, "update": 4.671, "loss": "2.262", "ppl": "4.8", "wps": "398498", "ups": "3.35", "wpb": "119051", "bsz": "256", "num_updates": "240400", "lr": "7.67273e-05", "gnorm": "0.952", "loss_scale": "32", "train_wall": "59", "gb_free": "26.1", "wall": "71893"} +[2022-07-31 06:49:03,616][train_inner][INFO] - {"epoch": 5, "update": 4.675, "loss": "2.263", "ppl": "4.8", "wps": "399326", "ups": "3.39", "wpb": "117952", "bsz": "256", "num_updates": "240600", "lr": "7.67071e-05", "gnorm": "0.959", "loss_scale": "64", "train_wall": "59", "gb_free": "22.8", "wall": "71952"} +[2022-07-31 06:50:02,821][train_inner][INFO] - {"epoch": 5, "update": 4.679, "loss": "2.261", "ppl": "4.79", "wps": "398008", "ups": "3.38", "wpb": "117819", "bsz": "256", "num_updates": "240800", "lr": "7.66869e-05", "gnorm": "0.959", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "72011"} +[2022-07-31 06:51:02,485][train_inner][INFO] - {"epoch": 5, "update": 4.682, "loss": "2.266", "ppl": "4.81", "wps": "395703", "ups": "3.35", "wpb": "118046", "bsz": "256", "num_updates": "241000", "lr": "7.66667e-05", "gnorm": "0.958", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "72071"} +[2022-07-31 06:52:02,093][train_inner][INFO] - {"epoch": 5, "update": 4.686, "loss": "2.267", "ppl": "4.81", "wps": "395212", "ups": "3.36", "wpb": "117788", "bsz": "256", "num_updates": "241200", "lr": "7.66465e-05", "gnorm": "0.959", "loss_scale": "64", "train_wall": "59", "gb_free": "24.6", "wall": "72131"} +[2022-07-31 06:53:00,936][train_inner][INFO] - {"epoch": 5, "update": 4.69, "loss": "2.26", "ppl": "4.79", "wps": "402214", "ups": "3.4", "wpb": "118337", "bsz": "256", "num_updates": "241400", "lr": "7.66263e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "72189"} +[2022-07-31 06:54:00,459][train_inner][INFO] - {"epoch": 5, "update": 4.694, "loss": "2.258", "ppl": "4.78", "wps": "400586", "ups": "3.36", "wpb": "119220", "bsz": "256", "num_updates": "241600", "lr": "7.66061e-05", "gnorm": "0.955", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "72249"} +[2022-07-31 06:54:59,907][train_inner][INFO] - {"epoch": 5, "update": 4.698, "loss": "2.26", "ppl": "4.79", "wps": "400223", "ups": "3.36", "wpb": "118961", "bsz": "256", "num_updates": "241800", "lr": "7.65859e-05", "gnorm": "0.954", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "72308"} +[2022-07-31 06:55:09,906][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 06:55:59,534][train_inner][INFO] - {"epoch": 5, "update": 4.702, "loss": "2.265", "ppl": "4.81", "wps": "397336", "ups": "3.35", "wpb": "118459", "bsz": "256", "num_updates": "242000", "lr": "7.65657e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "72368"} +[2022-07-31 06:56:59,209][train_inner][INFO] - {"epoch": 5, "update": 4.706, "loss": "2.262", "ppl": "4.8", "wps": "397503", "ups": "3.35", "wpb": "118604", "bsz": "256", "num_updates": "242200", "lr": "7.65455e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "72428"} +[2022-07-31 06:57:58,413][train_inner][INFO] - {"epoch": 5, "update": 4.71, "loss": "2.26", "ppl": "4.79", "wps": "399475", "ups": "3.38", "wpb": "118252", "bsz": "256", "num_updates": "242400", "lr": "7.65253e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "72487"} +[2022-07-31 06:58:57,791][train_inner][INFO] - {"epoch": 5, "update": 4.714, "loss": "2.265", "ppl": "4.81", "wps": "399262", "ups": "3.37", "wpb": "118534", "bsz": "256", "num_updates": "242600", "lr": "7.65051e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "72546"} +[2022-07-31 06:59:57,305][train_inner][INFO] - {"epoch": 5, "update": 4.717, "loss": "2.268", "ppl": "4.82", "wps": "397568", "ups": "3.36", "wpb": "118304", "bsz": "256", "num_updates": "242800", "lr": "7.64848e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "72606"} +[2022-07-31 07:00:56,999][train_inner][INFO] - {"epoch": 5, "update": 4.721, "loss": "2.269", "ppl": "4.82", "wps": "395463", "ups": "3.35", "wpb": "118032", "bsz": "256", "num_updates": "243000", "lr": "7.64646e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "72665"} +[2022-07-31 07:01:56,415][train_inner][INFO] - {"epoch": 5, "update": 4.725, "loss": "2.262", "ppl": "4.8", "wps": "397760", "ups": "3.37", "wpb": "118167", "bsz": "256", "num_updates": "243200", "lr": "7.64444e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "24.8", "wall": "72725"} +[2022-07-31 07:02:56,007][train_inner][INFO] - {"epoch": 5, "update": 4.729, "loss": "2.257", "ppl": "4.78", "wps": "398357", "ups": "3.36", "wpb": "118693", "bsz": "256", "num_updates": "243400", "lr": "7.64242e-05", "gnorm": "0.956", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "72784"} +[2022-07-31 07:03:55,406][train_inner][INFO] - {"epoch": 5, "update": 4.733, "loss": "2.264", "ppl": "4.8", "wps": "399702", "ups": "3.37", "wpb": "118710", "bsz": "256", "num_updates": "243600", "lr": "7.6404e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "72844"} +[2022-07-31 07:04:54,562][train_inner][INFO] - {"epoch": 5, "update": 4.737, "loss": "2.269", "ppl": "4.82", "wps": "396044", "ups": "3.38", "wpb": "117139", "bsz": "256", "num_updates": "243800", "lr": "7.63838e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "72903"} +[2022-07-31 07:05:53,923][train_inner][INFO] - {"epoch": 5, "update": 4.741, "loss": "2.26", "ppl": "4.79", "wps": "398196", "ups": "3.37", "wpb": "118187", "bsz": "256", "num_updates": "244000", "lr": "7.63636e-05", "gnorm": "0.957", "loss_scale": "64", "train_wall": "59", "gb_free": "22.7", "wall": "72962"} +[2022-07-31 07:06:53,758][train_inner][INFO] - {"epoch": 5, "update": 4.745, "loss": "2.266", "ppl": "4.81", "wps": "394659", "ups": "3.34", "wpb": "118071", "bsz": "256", "num_updates": "244200", "lr": "7.63434e-05", "gnorm": "0.957", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "73022"} +[2022-07-31 07:07:53,540][train_inner][INFO] - {"epoch": 5, "update": 4.749, "loss": "2.256", "ppl": "4.78", "wps": "396982", "ups": "3.35", "wpb": "118661", "bsz": "256", "num_updates": "244400", "lr": "7.63232e-05", "gnorm": "0.956", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "73082"} +[2022-07-31 07:08:53,262][train_inner][INFO] - {"epoch": 5, "update": 4.752, "loss": "2.255", "ppl": "4.77", "wps": "396362", "ups": "3.35", "wpb": "118356", "bsz": "256", "num_updates": "244600", "lr": "7.6303e-05", "gnorm": "0.958", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "73142"} +[2022-07-31 07:09:49,746][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 07:09:53,051][train_inner][INFO] - {"epoch": 5, "update": 4.756, "loss": "2.259", "ppl": "4.79", "wps": "393158", "ups": "3.35", "wpb": "117533", "bsz": "256", "num_updates": "244800", "lr": "7.62828e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "73201"} +[2022-07-31 07:10:52,582][train_inner][INFO] - {"epoch": 5, "update": 4.76, "loss": "2.258", "ppl": "4.78", "wps": "397055", "ups": "3.36", "wpb": "118183", "bsz": "256", "num_updates": "245000", "lr": "7.62626e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "25.3", "wall": "73261"} +[2022-07-31 07:11:52,083][train_inner][INFO] - {"epoch": 5, "update": 4.764, "loss": "2.267", "ppl": "4.81", "wps": "396542", "ups": "3.36", "wpb": "117973", "bsz": "256", "num_updates": "245200", "lr": "7.62424e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "27.4", "wall": "73321"} +[2022-07-31 07:12:51,634][train_inner][INFO] - {"epoch": 5, "update": 4.768, "loss": "2.264", "ppl": "4.8", "wps": "396596", "ups": "3.36", "wpb": "118087", "bsz": "256", "num_updates": "245400", "lr": "7.62222e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "26.1", "wall": "73380"} +[2022-07-31 07:13:50,963][train_inner][INFO] - {"epoch": 5, "update": 4.772, "loss": "2.264", "ppl": "4.8", "wps": "398348", "ups": "3.37", "wpb": "118168", "bsz": "256", "num_updates": "245600", "lr": "7.6202e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "73439"} +[2022-07-31 07:14:50,486][train_inner][INFO] - {"epoch": 5, "update": 4.776, "loss": "2.264", "ppl": "4.8", "wps": "397375", "ups": "3.36", "wpb": "118264", "bsz": "256", "num_updates": "245800", "lr": "7.61818e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "73499"} +[2022-07-31 07:15:50,416][train_inner][INFO] - {"epoch": 5, "update": 4.78, "loss": "2.252", "ppl": "4.76", "wps": "395096", "ups": "3.34", "wpb": "118389", "bsz": "256", "num_updates": "246000", "lr": "7.61616e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "60", "gb_free": "21.4", "wall": "73559"} +[2022-07-31 07:16:50,113][train_inner][INFO] - {"epoch": 5, "update": 4.784, "loss": "2.261", "ppl": "4.79", "wps": "396466", "ups": "3.35", "wpb": "118338", "bsz": "256", "num_updates": "246200", "lr": "7.61414e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "73619"} +[2022-07-31 07:17:49,555][train_inner][INFO] - {"epoch": 5, "update": 4.787, "loss": "2.262", "ppl": "4.8", "wps": "398911", "ups": "3.36", "wpb": "118561", "bsz": "256", "num_updates": "246400", "lr": "7.61212e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "73678"} +[2022-07-31 07:18:49,032][train_inner][INFO] - {"epoch": 5, "update": 4.791, "loss": "2.253", "ppl": "4.77", "wps": "399738", "ups": "3.36", "wpb": "118874", "bsz": "256", "num_updates": "246600", "lr": "7.6101e-05", "gnorm": "0.955", "loss_scale": "32", "train_wall": "59", "gb_free": "25.1", "wall": "73737"} +[2022-07-31 07:19:48,270][train_inner][INFO] - {"epoch": 5, "update": 4.795, "loss": "2.263", "ppl": "4.8", "wps": "400046", "ups": "3.38", "wpb": "118489", "bsz": "256", "num_updates": "246800", "lr": "7.60808e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "73797"} +[2022-07-31 07:20:47,377][train_inner][INFO] - {"epoch": 5, "update": 4.799, "loss": "2.258", "ppl": "4.78", "wps": "399787", "ups": "3.38", "wpb": "118150", "bsz": "256", "num_updates": "247000", "lr": "7.60606e-05", "gnorm": "0.959", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "73856"} +[2022-07-31 07:21:01,961][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 07:21:47,157][train_inner][INFO] - {"epoch": 5, "update": 4.803, "loss": "2.257", "ppl": "4.78", "wps": "395195", "ups": "3.35", "wpb": "118123", "bsz": "256", "num_updates": "247200", "lr": "7.60404e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "73916"} +[2022-07-31 07:22:48,135][train_inner][INFO] - {"epoch": 5, "update": 4.807, "loss": "2.254", "ppl": "4.77", "wps": "388838", "ups": "3.28", "wpb": "118551", "bsz": "256", "num_updates": "247400", "lr": "7.60202e-05", "gnorm": "0.957", "loss_scale": "32", "train_wall": "61", "gb_free": "21.9", "wall": "73977"} +[2022-07-31 07:23:48,526][train_inner][INFO] - {"epoch": 5, "update": 4.811, "loss": "2.256", "ppl": "4.78", "wps": "392895", "ups": "3.31", "wpb": "118636", "bsz": "256", "num_updates": "247600", "lr": "7.6e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "60", "gb_free": "28.3", "wall": "74037"} +[2022-07-31 07:24:49,106][train_inner][INFO] - {"epoch": 5, "update": 4.815, "loss": "2.265", "ppl": "4.81", "wps": "388224", "ups": "3.3", "wpb": "117593", "bsz": "256", "num_updates": "247800", "lr": "7.59798e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "60", "gb_free": "22.4", "wall": "74098"} +[2022-07-31 07:25:48,665][train_inner][INFO] - {"epoch": 5, "update": 4.819, "loss": "2.26", "ppl": "4.79", "wps": "397014", "ups": "3.36", "wpb": "118228", "bsz": "256", "num_updates": "248000", "lr": "7.59596e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "74157"} +[2022-07-31 07:26:01,015][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 07:26:48,364][train_inner][INFO] - {"epoch": 5, "update": 4.822, "loss": "2.256", "ppl": "4.78", "wps": "396796", "ups": "3.35", "wpb": "118442", "bsz": "256", "num_updates": "248200", "lr": "7.59394e-05", "gnorm": "0.958", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "74217"} +[2022-07-31 07:27:47,838][train_inner][INFO] - {"epoch": 5, "update": 4.826, "loss": "2.252", "ppl": "4.76", "wps": "397993", "ups": "3.36", "wpb": "118350", "bsz": "256", "num_updates": "248400", "lr": "7.59192e-05", "gnorm": "0.961", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "74276"} +[2022-07-31 07:28:47,474][train_inner][INFO] - {"epoch": 5, "update": 4.83, "loss": "2.264", "ppl": "4.8", "wps": "395149", "ups": "3.35", "wpb": "117824", "bsz": "256", "num_updates": "248600", "lr": "7.5899e-05", "gnorm": "0.965", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "74336"} +[2022-07-31 07:29:46,248][train_inner][INFO] - {"epoch": 5, "update": 4.834, "loss": "2.256", "ppl": "4.78", "wps": "403978", "ups": "3.4", "wpb": "118717", "bsz": "256", "num_updates": "248800", "lr": "7.58788e-05", "gnorm": "0.963", "loss_scale": "16", "train_wall": "58", "gb_free": "22.1", "wall": "74395"} +[2022-07-31 07:30:45,745][train_inner][INFO] - {"epoch": 5, "update": 4.838, "loss": "2.255", "ppl": "4.77", "wps": "397636", "ups": "3.36", "wpb": "118289", "bsz": "256", "num_updates": "249000", "lr": "7.58586e-05", "gnorm": "0.961", "loss_scale": "16", "train_wall": "59", "gb_free": "23.2", "wall": "74454"} +[2022-07-31 07:31:45,108][train_inner][INFO] - {"epoch": 5, "update": 4.842, "loss": "2.258", "ppl": "4.78", "wps": "399028", "ups": "3.37", "wpb": "118438", "bsz": "256", "num_updates": "249200", "lr": "7.58384e-05", "gnorm": "0.962", "loss_scale": "16", "train_wall": "59", "gb_free": "24.4", "wall": "74514"} +[2022-07-31 07:32:44,401][train_inner][INFO] - {"epoch": 5, "update": 4.846, "loss": "2.266", "ppl": "4.81", "wps": "398451", "ups": "3.37", "wpb": "118126", "bsz": "256", "num_updates": "249400", "lr": "7.58182e-05", "gnorm": "0.959", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "74573"} +[2022-07-31 07:33:44,058][train_inner][INFO] - {"epoch": 5, "update": 4.85, "loss": "2.251", "ppl": "4.76", "wps": "398685", "ups": "3.35", "wpb": "118920", "bsz": "256", "num_updates": "249600", "lr": "7.5798e-05", "gnorm": "0.954", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "74632"} +[2022-07-31 07:34:43,462][train_inner][INFO] - {"epoch": 5, "update": 4.853, "loss": "2.258", "ppl": "4.78", "wps": "398182", "ups": "3.37", "wpb": "118269", "bsz": "256", "num_updates": "249800", "lr": "7.57778e-05", "gnorm": "0.961", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "74692"} +[2022-07-31 07:35:42,703][train_inner][INFO] - {"epoch": 5, "update": 4.857, "loss": "2.255", "ppl": "4.77", "wps": "398858", "ups": "3.38", "wpb": "118142", "bsz": "256", "num_updates": "250000", "lr": "7.57576e-05", "gnorm": "0.966", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "74751"} +[2022-07-31 07:35:42,704][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-31 07:36:05,261][valid][INFO] - {"epoch": 5, "valid_loss": "2.148", "valid_ppl": "4.43", "valid_wps": "1.53465e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "250000", "valid_best_loss": "2.148"} +[2022-07-31 07:36:05,264][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 5 @ 250000 updates +[2022-07-31 07:36:05,265][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_5_250000.pt +[2022-07-31 07:36:11,798][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_5_250000.pt +[2022-07-31 07:36:35,604][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_5_250000.pt (epoch 5 @ 250000 updates, score 2.148) (writing took 30.3390778247267 seconds) +[2022-07-31 07:37:34,748][train_inner][INFO] - {"epoch": 5, "update": 4.861, "loss": "2.253", "ppl": "4.77", "wps": "211401", "ups": "1.79", "wpb": "118431", "bsz": "256", "num_updates": "250200", "lr": "7.57374e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "74863"} +[2022-07-31 07:38:34,044][train_inner][INFO] - {"epoch": 5, "update": 4.865, "loss": "2.259", "ppl": "4.79", "wps": "399580", "ups": "3.37", "wpb": "118467", "bsz": "256", "num_updates": "250400", "lr": "7.57172e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "25.8", "wall": "74922"} +[2022-07-31 07:39:03,029][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 07:39:33,780][train_inner][INFO] - {"epoch": 5, "update": 4.869, "loss": "2.262", "ppl": "4.8", "wps": "395086", "ups": "3.35", "wpb": "118003", "bsz": "256", "num_updates": "250600", "lr": "7.5697e-05", "gnorm": "0.961", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "74982"} +[2022-07-31 07:40:34,109][train_inner][INFO] - {"epoch": 5, "update": 4.873, "loss": "2.252", "ppl": "4.76", "wps": "393539", "ups": "3.32", "wpb": "118709", "bsz": "256", "num_updates": "250800", "lr": "7.56768e-05", "gnorm": "0.959", "loss_scale": "16", "train_wall": "60", "gb_free": "28.5", "wall": "75043"} +[2022-07-31 07:41:33,408][train_inner][INFO] - {"epoch": 5, "update": 4.877, "loss": "2.256", "ppl": "4.78", "wps": "398846", "ups": "3.37", "wpb": "118255", "bsz": "256", "num_updates": "251000", "lr": "7.56566e-05", "gnorm": "0.96", "loss_scale": "16", "train_wall": "59", "gb_free": "27", "wall": "75102"} +[2022-07-31 07:42:32,724][train_inner][INFO] - {"epoch": 5, "update": 4.881, "loss": "2.258", "ppl": "4.78", "wps": "398655", "ups": "3.37", "wpb": "118232", "bsz": "256", "num_updates": "251200", "lr": "7.56364e-05", "gnorm": "0.96", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "75161"} +[2022-07-31 07:43:31,849][train_inner][INFO] - {"epoch": 5, "update": 4.885, "loss": "2.262", "ppl": "4.8", "wps": "400637", "ups": "3.38", "wpb": "118439", "bsz": "256", "num_updates": "251400", "lr": "7.56162e-05", "gnorm": "0.962", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "75220"} +[2022-07-31 07:44:30,973][train_inner][INFO] - {"epoch": 5, "update": 4.888, "loss": "2.257", "ppl": "4.78", "wps": "398728", "ups": "3.38", "wpb": "117870", "bsz": "256", "num_updates": "251600", "lr": "7.5596e-05", "gnorm": "0.963", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "75279"} +[2022-07-31 07:45:30,591][train_inner][INFO] - {"epoch": 5, "update": 4.892, "loss": "2.25", "ppl": "4.76", "wps": "396998", "ups": "3.35", "wpb": "118341", "bsz": "256", "num_updates": "251800", "lr": "7.55758e-05", "gnorm": "0.959", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "75339"} +[2022-07-31 07:46:29,960][train_inner][INFO] - {"epoch": 5, "update": 4.896, "loss": "2.255", "ppl": "4.77", "wps": "397192", "ups": "3.37", "wpb": "117903", "bsz": "256", "num_updates": "252000", "lr": "7.55556e-05", "gnorm": "0.965", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "75398"} +[2022-07-31 07:47:29,044][train_inner][INFO] - {"epoch": 5, "update": 4.9, "loss": "2.252", "ppl": "4.76", "wps": "396980", "ups": "3.39", "wpb": "117275", "bsz": "256", "num_updates": "252200", "lr": "7.55354e-05", "gnorm": "0.965", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "75457"} +[2022-07-31 07:48:28,311][train_inner][INFO] - {"epoch": 5, "update": 4.904, "loss": "2.254", "ppl": "4.77", "wps": "398274", "ups": "3.37", "wpb": "118023", "bsz": "256", "num_updates": "252400", "lr": "7.55152e-05", "gnorm": "0.962", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "75517"} +[2022-07-31 07:49:27,590][train_inner][INFO] - {"epoch": 5, "update": 4.908, "loss": "2.265", "ppl": "4.81", "wps": "399133", "ups": "3.37", "wpb": "118300", "bsz": "256", "num_updates": "252600", "lr": "7.54949e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "75576"} +[2022-07-31 07:50:27,185][train_inner][INFO] - {"epoch": 5, "update": 4.912, "loss": "2.25", "ppl": "4.76", "wps": "396778", "ups": "3.36", "wpb": "118228", "bsz": "256", "num_updates": "252800", "lr": "7.54747e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "75636"} +[2022-07-31 07:51:26,379][train_inner][INFO] - {"epoch": 5, "update": 4.916, "loss": "2.254", "ppl": "4.77", "wps": "400220", "ups": "3.38", "wpb": "118452", "bsz": "256", "num_updates": "253000", "lr": "7.54545e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "28.7", "wall": "75695"} +[2022-07-31 07:52:25,319][train_inner][INFO] - {"epoch": 5, "update": 4.92, "loss": "2.254", "ppl": "4.77", "wps": "400201", "ups": "3.39", "wpb": "117940", "bsz": "256", "num_updates": "253200", "lr": "7.54343e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "27.4", "wall": "75754"} +[2022-07-31 07:53:24,831][train_inner][INFO] - {"epoch": 5, "update": 4.923, "loss": "2.253", "ppl": "4.77", "wps": "398305", "ups": "3.36", "wpb": "118518", "bsz": "256", "num_updates": "253400", "lr": "7.54141e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "75813"} +[2022-07-31 07:54:24,466][train_inner][INFO] - {"epoch": 5, "update": 4.927, "loss": "2.254", "ppl": "4.77", "wps": "396458", "ups": "3.35", "wpb": "118214", "bsz": "256", "num_updates": "253600", "lr": "7.53939e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "75873"} +[2022-07-31 07:55:24,159][train_inner][INFO] - {"epoch": 5, "update": 4.931, "loss": "2.248", "ppl": "4.75", "wps": "396192", "ups": "3.35", "wpb": "118248", "bsz": "256", "num_updates": "253800", "lr": "7.53737e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "24.6", "wall": "75933"} +[2022-07-31 07:56:23,474][train_inner][INFO] - {"epoch": 5, "update": 4.935, "loss": "2.257", "ppl": "4.78", "wps": "398563", "ups": "3.37", "wpb": "118202", "bsz": "256", "num_updates": "254000", "lr": "7.53535e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "75992"} +[2022-07-31 07:57:23,102][train_inner][INFO] - {"epoch": 5, "update": 4.939, "loss": "2.251", "ppl": "4.76", "wps": "398704", "ups": "3.35", "wpb": "118869", "bsz": "256", "num_updates": "254200", "lr": "7.53333e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "76052"} +[2022-07-31 07:58:22,465][train_inner][INFO] - {"epoch": 5, "update": 4.943, "loss": "2.252", "ppl": "4.76", "wps": "398837", "ups": "3.37", "wpb": "118381", "bsz": "256", "num_updates": "254400", "lr": "7.53131e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "25", "wall": "76111"} +[2022-07-31 07:59:21,691][train_inner][INFO] - {"epoch": 5, "update": 4.947, "loss": "2.252", "ppl": "4.76", "wps": "399728", "ups": "3.38", "wpb": "118371", "bsz": "256", "num_updates": "254600", "lr": "7.52929e-05", "gnorm": "0.963", "loss_scale": "64", "train_wall": "59", "gb_free": "27.6", "wall": "76170"} +[2022-07-31 08:00:21,521][train_inner][INFO] - {"epoch": 5, "update": 4.951, "loss": "2.254", "ppl": "4.77", "wps": "394660", "ups": "3.34", "wpb": "118062", "bsz": "256", "num_updates": "254800", "lr": "7.52727e-05", "gnorm": "0.962", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "76230"} +[2022-07-31 08:01:20,831][train_inner][INFO] - {"epoch": 5, "update": 4.954, "loss": "2.257", "ppl": "4.78", "wps": "398277", "ups": "3.37", "wpb": "118107", "bsz": "256", "num_updates": "255000", "lr": "7.52525e-05", "gnorm": "0.962", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "76289"} +[2022-07-31 08:01:51,888][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 08:02:20,054][train_inner][INFO] - {"epoch": 5, "update": 4.958, "loss": "2.258", "ppl": "4.78", "wps": "398548", "ups": "3.38", "wpb": "118016", "bsz": "256", "num_updates": "255200", "lr": "7.52323e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "76348"} +[2022-07-31 08:03:19,350][train_inner][INFO] - {"epoch": 5, "update": 4.962, "loss": "2.255", "ppl": "4.77", "wps": "400097", "ups": "3.37", "wpb": "118619", "bsz": "256", "num_updates": "255400", "lr": "7.52121e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "76408"} +[2022-07-31 08:04:18,834][train_inner][INFO] - {"epoch": 5, "update": 4.966, "loss": "2.246", "ppl": "4.74", "wps": "397490", "ups": "3.36", "wpb": "118221", "bsz": "256", "num_updates": "255600", "lr": "7.51919e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "76467"} +[2022-07-31 08:05:18,137][train_inner][INFO] - {"epoch": 5, "update": 4.97, "loss": "2.251", "ppl": "4.76", "wps": "395336", "ups": "3.37", "wpb": "117223", "bsz": "256", "num_updates": "255800", "lr": "7.51717e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "76527"} +[2022-07-31 08:06:17,897][train_inner][INFO] - {"epoch": 5, "update": 4.974, "loss": "2.249", "ppl": "4.75", "wps": "397837", "ups": "3.35", "wpb": "118872", "bsz": "256", "num_updates": "256000", "lr": "7.51515e-05", "gnorm": "0.958", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "76586"} +[2022-07-31 08:07:17,313][train_inner][INFO] - {"epoch": 5, "update": 4.978, "loss": "2.253", "ppl": "4.77", "wps": "398261", "ups": "3.37", "wpb": "118315", "bsz": "256", "num_updates": "256200", "lr": "7.51313e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "76646"} +[2022-07-31 08:08:16,876][train_inner][INFO] - {"epoch": 5, "update": 4.982, "loss": "2.253", "ppl": "4.77", "wps": "398197", "ups": "3.36", "wpb": "118587", "bsz": "256", "num_updates": "256400", "lr": "7.51111e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "76705"} +[2022-07-31 08:09:15,934][train_inner][INFO] - {"epoch": 5, "update": 4.986, "loss": "2.257", "ppl": "4.78", "wps": "399842", "ups": "3.39", "wpb": "118069", "bsz": "256", "num_updates": "256600", "lr": "7.50909e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "76764"} +[2022-07-31 08:10:15,328][train_inner][INFO] - {"epoch": 5, "update": 4.989, "loss": "2.256", "ppl": "4.78", "wps": "398603", "ups": "3.37", "wpb": "118372", "bsz": "256", "num_updates": "256800", "lr": "7.50707e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "76824"} +[2022-07-31 08:11:14,701][train_inner][INFO] - {"epoch": 5, "update": 4.993, "loss": "2.253", "ppl": "4.77", "wps": "398177", "ups": "3.37", "wpb": "118204", "bsz": "256", "num_updates": "257000", "lr": "7.50505e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "76883"} +[2022-07-31 08:12:15,386][train_inner][INFO] - {"epoch": 5, "update": 4.997, "loss": "2.25", "ppl": "4.76", "wps": "390030", "ups": "3.3", "wpb": "118344", "bsz": "256", "num_updates": "257200", "lr": "7.50303e-05", "gnorm": "0.96", "loss_scale": "64", "train_wall": "60", "gb_free": "21.8", "wall": "76944"} +[2022-07-31 08:12:42,310][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 08:12:57,330][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-31 08:13:19,939][valid][INFO] - {"epoch": 5, "valid_loss": "2.135", "valid_ppl": "4.39", "valid_wps": "1.58683e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "257341", "valid_best_loss": "2.135"} +[2022-07-31 08:13:19,941][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 5 @ 257341 updates +[2022-07-31 08:13:19,942][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_best.pt +[2022-07-31 08:13:26,161][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_best.pt +[2022-07-31 08:13:39,497][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_best.pt (epoch 5 @ 257341 updates, score 2.135) (writing took 19.55606442131102 seconds) +[2022-07-31 08:13:39,498][fairseq_cli.train][INFO] - end of epoch 5 (average epoch stats below) +[2022-07-31 08:13:39,499][train][INFO] - {"epoch": 5, "train_loss": "2.275", "train_ppl": "4.84", "train_wps": "394700", "train_ups": "3.34", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "257341", "train_lr": "7.50161e-05", "train_gnorm": "0.956", "train_loss_scale": "32", "train_train_wall": "15232", "train_gb_free": "22.2", "train_wall": "77028"} +[2022-07-31 08:13:39,593][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-31 08:13:39,596][fairseq.trainer][INFO] - begin training epoch 6 +[2022-07-31 08:13:39,596][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-31 08:14:05,879][train_inner][INFO] - {"epoch": 6, "update": 5.001, "loss": "2.245", "ppl": "4.74", "wps": "213435", "ups": "1.81", "wpb": "117915", "bsz": "255.4", "num_updates": "257400", "lr": "7.50101e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "77054"} +[2022-07-31 08:15:05,364][train_inner][INFO] - {"epoch": 6, "update": 5.005, "loss": "2.249", "ppl": "4.75", "wps": "397882", "ups": "3.36", "wpb": "118338", "bsz": "256", "num_updates": "257600", "lr": "7.49899e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "25.8", "wall": "77114"} +[2022-07-31 08:16:04,647][train_inner][INFO] - {"epoch": 6, "update": 5.009, "loss": "2.247", "ppl": "4.75", "wps": "398616", "ups": "3.37", "wpb": "118157", "bsz": "256", "num_updates": "257800", "lr": "7.49697e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "77173"} +[2022-07-31 08:17:04,052][train_inner][INFO] - {"epoch": 6, "update": 5.013, "loss": "2.245", "ppl": "4.74", "wps": "399371", "ups": "3.37", "wpb": "118622", "bsz": "256", "num_updates": "258000", "lr": "7.49495e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "77232"} +[2022-07-31 08:18:03,541][train_inner][INFO] - {"epoch": 6, "update": 5.017, "loss": "2.244", "ppl": "4.74", "wps": "398812", "ups": "3.36", "wpb": "118624", "bsz": "256", "num_updates": "258200", "lr": "7.49293e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "77292"} +[2022-07-31 08:19:02,759][train_inner][INFO] - {"epoch": 6, "update": 5.021, "loss": "2.253", "ppl": "4.77", "wps": "399177", "ups": "3.38", "wpb": "118191", "bsz": "256", "num_updates": "258400", "lr": "7.49091e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "77351"} +[2022-07-31 08:20:02,175][train_inner][INFO] - {"epoch": 6, "update": 5.024, "loss": "2.251", "ppl": "4.76", "wps": "396970", "ups": "3.37", "wpb": "117931", "bsz": "256", "num_updates": "258600", "lr": "7.48889e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "26.1", "wall": "77411"} +[2022-07-31 08:21:01,528][train_inner][INFO] - {"epoch": 6, "update": 5.028, "loss": "2.251", "ppl": "4.76", "wps": "394744", "ups": "3.37", "wpb": "117145", "bsz": "256", "num_updates": "258800", "lr": "7.48687e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "77470"} +[2022-07-31 08:22:01,078][train_inner][INFO] - {"epoch": 6, "update": 5.032, "loss": "2.243", "ppl": "4.73", "wps": "399234", "ups": "3.36", "wpb": "118872", "bsz": "256", "num_updates": "259000", "lr": "7.48485e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "77530"} +[2022-07-31 08:22:59,986][train_inner][INFO] - {"epoch": 6, "update": 5.036, "loss": "2.241", "ppl": "4.73", "wps": "403275", "ups": "3.4", "wpb": "118779", "bsz": "256", "num_updates": "259200", "lr": "7.48283e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "77588"} +[2022-07-31 08:23:59,384][train_inner][INFO] - {"epoch": 6, "update": 5.04, "loss": "2.243", "ppl": "4.73", "wps": "399560", "ups": "3.37", "wpb": "118666", "bsz": "256", "num_updates": "259400", "lr": "7.48081e-05", "gnorm": "0.961", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "77648"} +[2022-07-31 08:24:58,449][train_inner][INFO] - {"epoch": 6, "update": 5.044, "loss": "2.253", "ppl": "4.77", "wps": "399732", "ups": "3.39", "wpb": "118049", "bsz": "256", "num_updates": "259600", "lr": "7.47879e-05", "gnorm": "0.966", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "77707"} +[2022-07-31 08:25:57,866][train_inner][INFO] - {"epoch": 6, "update": 5.048, "loss": "2.252", "ppl": "4.76", "wps": "397756", "ups": "3.37", "wpb": "118168", "bsz": "256", "num_updates": "259800", "lr": "7.47677e-05", "gnorm": "0.963", "loss_scale": "64", "train_wall": "59", "gb_free": "23.1", "wall": "77766"} +[2022-07-31 08:26:33,322][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 08:26:57,752][train_inner][INFO] - {"epoch": 6, "update": 5.052, "loss": "2.242", "ppl": "4.73", "wps": "395716", "ups": "3.34", "wpb": "118489", "bsz": "256", "num_updates": "260000", "lr": "7.47475e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "60", "gb_free": "29.7", "wall": "77826"} +[2022-07-31 08:27:57,084][train_inner][INFO] - {"epoch": 6, "update": 5.056, "loss": "2.241", "ppl": "4.73", "wps": "400620", "ups": "3.37", "wpb": "118846", "bsz": "256", "num_updates": "260200", "lr": "7.47273e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "77886"} +[2022-07-31 08:28:56,234][train_inner][INFO] - {"epoch": 6, "update": 5.059, "loss": "2.238", "ppl": "4.72", "wps": "401934", "ups": "3.38", "wpb": "118872", "bsz": "256", "num_updates": "260400", "lr": "7.47071e-05", "gnorm": "0.959", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "77945"} +[2022-07-31 08:29:55,711][train_inner][INFO] - {"epoch": 6, "update": 5.063, "loss": "2.253", "ppl": "4.77", "wps": "397081", "ups": "3.36", "wpb": "118085", "bsz": "256", "num_updates": "260600", "lr": "7.46869e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "78004"} +[2022-07-31 08:30:54,816][train_inner][INFO] - {"epoch": 6, "update": 5.067, "loss": "2.241", "ppl": "4.73", "wps": "399564", "ups": "3.38", "wpb": "118079", "bsz": "256", "num_updates": "260800", "lr": "7.46667e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "78063"} +[2022-07-31 08:31:54,381][train_inner][INFO] - {"epoch": 6, "update": 5.071, "loss": "2.238", "ppl": "4.72", "wps": "399387", "ups": "3.36", "wpb": "118948", "bsz": "256", "num_updates": "261000", "lr": "7.46465e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "78123"} +[2022-07-31 08:32:53,927][train_inner][INFO] - {"epoch": 6, "update": 5.075, "loss": "2.241", "ppl": "4.73", "wps": "397227", "ups": "3.36", "wpb": "118265", "bsz": "256", "num_updates": "261200", "lr": "7.46263e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "23.8", "wall": "78182"} +[2022-07-31 08:33:53,291][train_inner][INFO] - {"epoch": 6, "update": 5.079, "loss": "2.245", "ppl": "4.74", "wps": "399634", "ups": "3.37", "wpb": "118619", "bsz": "256", "num_updates": "261400", "lr": "7.46061e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "78242"} +[2022-07-31 08:34:52,561][train_inner][INFO] - {"epoch": 6, "update": 5.083, "loss": "2.241", "ppl": "4.73", "wps": "398813", "ups": "3.37", "wpb": "118187", "bsz": "256", "num_updates": "261600", "lr": "7.45859e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "29.3", "wall": "78301"} +[2022-07-31 08:35:51,542][train_inner][INFO] - {"epoch": 6, "update": 5.087, "loss": "2.247", "ppl": "4.75", "wps": "400195", "ups": "3.39", "wpb": "118018", "bsz": "256", "num_updates": "261800", "lr": "7.45657e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "59", "gb_free": "24.4", "wall": "78360"} +[2022-07-31 08:36:50,738][train_inner][INFO] - {"epoch": 6, "update": 5.091, "loss": "2.241", "ppl": "4.73", "wps": "400115", "ups": "3.38", "wpb": "118426", "bsz": "256", "num_updates": "262000", "lr": "7.45455e-05", "gnorm": "0.96", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "78419"} +[2022-07-31 08:37:50,077][train_inner][INFO] - {"epoch": 6, "update": 5.094, "loss": "2.239", "ppl": "4.72", "wps": "399464", "ups": "3.37", "wpb": "118518", "bsz": "256", "num_updates": "262200", "lr": "7.45253e-05", "gnorm": "0.962", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "78479"} +[2022-07-31 08:38:16,637][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 08:38:49,877][train_inner][INFO] - {"epoch": 6, "update": 5.098, "loss": "2.248", "ppl": "4.75", "wps": "396015", "ups": "3.34", "wpb": "118408", "bsz": "256", "num_updates": "262400", "lr": "7.45051e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "78538"} +[2022-07-31 08:39:49,399][train_inner][INFO] - {"epoch": 6, "update": 5.102, "loss": "2.244", "ppl": "4.74", "wps": "396869", "ups": "3.36", "wpb": "118112", "bsz": "256", "num_updates": "262600", "lr": "7.44848e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "33.1", "wall": "78598"} +[2022-07-31 08:40:01,494][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 08:40:49,211][train_inner][INFO] - {"epoch": 6, "update": 5.106, "loss": "2.243", "ppl": "4.73", "wps": "393897", "ups": "3.34", "wpb": "117797", "bsz": "256", "num_updates": "262800", "lr": "7.44646e-05", "gnorm": "0.965", "loss_scale": "16", "train_wall": "60", "gb_free": "26.7", "wall": "78658"} +[2022-07-31 08:41:48,504][train_inner][INFO] - {"epoch": 6, "update": 5.11, "loss": "2.242", "ppl": "4.73", "wps": "398016", "ups": "3.37", "wpb": "117998", "bsz": "256", "num_updates": "263000", "lr": "7.44444e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "59", "gb_free": "27", "wall": "78717"} +[2022-07-31 08:42:47,758][train_inner][INFO] - {"epoch": 6, "update": 5.114, "loss": "2.242", "ppl": "4.73", "wps": "398415", "ups": "3.38", "wpb": "118037", "bsz": "256", "num_updates": "263200", "lr": "7.44242e-05", "gnorm": "0.967", "loss_scale": "16", "train_wall": "59", "gb_free": "24.3", "wall": "78776"} +[2022-07-31 08:43:46,728][train_inner][INFO] - {"epoch": 6, "update": 5.118, "loss": "2.245", "ppl": "4.74", "wps": "399024", "ups": "3.39", "wpb": "117651", "bsz": "256", "num_updates": "263400", "lr": "7.4404e-05", "gnorm": "0.967", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "78835"} +[2022-07-31 08:44:46,230][train_inner][INFO] - {"epoch": 6, "update": 5.122, "loss": "2.247", "ppl": "4.75", "wps": "394960", "ups": "3.36", "wpb": "117504", "bsz": "256", "num_updates": "263600", "lr": "7.43838e-05", "gnorm": "0.967", "loss_scale": "16", "train_wall": "59", "gb_free": "30.2", "wall": "78895"} +[2022-07-31 08:45:45,365][train_inner][INFO] - {"epoch": 6, "update": 5.126, "loss": "2.244", "ppl": "4.74", "wps": "400626", "ups": "3.38", "wpb": "118455", "bsz": "256", "num_updates": "263800", "lr": "7.43636e-05", "gnorm": "0.963", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "78954"} +[2022-07-31 08:46:44,692][train_inner][INFO] - {"epoch": 6, "update": 5.129, "loss": "2.242", "ppl": "4.73", "wps": "398251", "ups": "3.37", "wpb": "118133", "bsz": "256", "num_updates": "264000", "lr": "7.43434e-05", "gnorm": "0.965", "loss_scale": "16", "train_wall": "59", "gb_free": "25.4", "wall": "79013"} +[2022-07-31 08:47:43,925][train_inner][INFO] - {"epoch": 6, "update": 5.133, "loss": "2.246", "ppl": "4.75", "wps": "398009", "ups": "3.38", "wpb": "117876", "bsz": "256", "num_updates": "264200", "lr": "7.43232e-05", "gnorm": "0.966", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "79072"} +[2022-07-31 08:48:44,179][train_inner][INFO] - {"epoch": 6, "update": 5.137, "loss": "2.245", "ppl": "4.74", "wps": "392562", "ups": "3.32", "wpb": "118266", "bsz": "256", "num_updates": "264400", "lr": "7.4303e-05", "gnorm": "0.963", "loss_scale": "16", "train_wall": "60", "gb_free": "31.5", "wall": "79133"} +[2022-07-31 08:49:43,496][train_inner][INFO] - {"epoch": 6, "update": 5.141, "loss": "2.25", "ppl": "4.76", "wps": "396475", "ups": "3.37", "wpb": "117587", "bsz": "256", "num_updates": "264600", "lr": "7.42828e-05", "gnorm": "0.968", "loss_scale": "16", "train_wall": "59", "gb_free": "23.5", "wall": "79192"} +[2022-07-31 08:50:43,199][train_inner][INFO] - {"epoch": 6, "update": 5.145, "loss": "2.24", "ppl": "4.72", "wps": "397603", "ups": "3.35", "wpb": "118690", "bsz": "256", "num_updates": "264800", "lr": "7.42626e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "79252"} +[2022-07-31 08:51:42,536][train_inner][INFO] - {"epoch": 6, "update": 5.149, "loss": "2.238", "ppl": "4.72", "wps": "396544", "ups": "3.37", "wpb": "117647", "bsz": "256", "num_updates": "265000", "lr": "7.42424e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "79311"} +[2022-07-31 08:52:41,880][train_inner][INFO] - {"epoch": 6, "update": 5.153, "loss": "2.245", "ppl": "4.74", "wps": "399185", "ups": "3.37", "wpb": "118446", "bsz": "256", "num_updates": "265200", "lr": "7.42222e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "79370"} +[2022-07-31 08:53:41,395][train_inner][INFO] - {"epoch": 6, "update": 5.157, "loss": "2.241", "ppl": "4.73", "wps": "396613", "ups": "3.36", "wpb": "118021", "bsz": "256", "num_updates": "265400", "lr": "7.4202e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "24.4", "wall": "79430"} +[2022-07-31 08:54:40,651][train_inner][INFO] - {"epoch": 6, "update": 5.16, "loss": "2.243", "ppl": "4.74", "wps": "401367", "ups": "3.38", "wpb": "118916", "bsz": "256", "num_updates": "265600", "lr": "7.41818e-05", "gnorm": "0.96", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "79489"} +[2022-07-31 08:55:40,060][train_inner][INFO] - {"epoch": 6, "update": 5.164, "loss": "2.244", "ppl": "4.74", "wps": "398435", "ups": "3.37", "wpb": "118353", "bsz": "256", "num_updates": "265800", "lr": "7.41616e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "79548"} +[2022-07-31 08:56:39,420][train_inner][INFO] - {"epoch": 6, "update": 5.168, "loss": "2.241", "ppl": "4.73", "wps": "399478", "ups": "3.37", "wpb": "118564", "bsz": "256", "num_updates": "266000", "lr": "7.41414e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "79608"} +[2022-07-31 08:57:38,482][train_inner][INFO] - {"epoch": 6, "update": 5.172, "loss": "2.244", "ppl": "4.74", "wps": "400716", "ups": "3.39", "wpb": "118334", "bsz": "256", "num_updates": "266200", "lr": "7.41212e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "79667"} +[2022-07-31 08:58:37,606][train_inner][INFO] - {"epoch": 6, "update": 5.176, "loss": "2.239", "ppl": "4.72", "wps": "400478", "ups": "3.38", "wpb": "118390", "bsz": "256", "num_updates": "266400", "lr": "7.4101e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "25.2", "wall": "79726"} +[2022-07-31 08:59:37,328][train_inner][INFO] - {"epoch": 6, "update": 5.18, "loss": "2.243", "ppl": "4.73", "wps": "395535", "ups": "3.35", "wpb": "118110", "bsz": "256", "num_updates": "266600", "lr": "7.40808e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "79786"} +[2022-07-31 09:00:36,649][train_inner][INFO] - {"epoch": 6, "update": 5.184, "loss": "2.24", "ppl": "4.72", "wps": "399049", "ups": "3.37", "wpb": "118358", "bsz": "256", "num_updates": "266800", "lr": "7.40606e-05", "gnorm": "0.964", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "79845"} +[2022-07-31 09:01:36,131][train_inner][INFO] - {"epoch": 6, "update": 5.188, "loss": "2.246", "ppl": "4.74", "wps": "396781", "ups": "3.36", "wpb": "118007", "bsz": "256", "num_updates": "267000", "lr": "7.40404e-05", "gnorm": "0.966", "loss_scale": "64", "train_wall": "59", "gb_free": "25.6", "wall": "79905"} +[2022-07-31 09:01:57,299][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 09:02:35,668][train_inner][INFO] - {"epoch": 6, "update": 5.192, "loss": "2.245", "ppl": "4.74", "wps": "396889", "ups": "3.36", "wpb": "118146", "bsz": "256", "num_updates": "267200", "lr": "7.40202e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "26.2", "wall": "79964"} +[2022-07-31 09:03:35,218][train_inner][INFO] - {"epoch": 6, "update": 5.195, "loss": "2.244", "ppl": "4.74", "wps": "396276", "ups": "3.36", "wpb": "117991", "bsz": "256", "num_updates": "267400", "lr": "7.4e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "80024"} +[2022-07-31 09:04:34,264][train_inner][INFO] - {"epoch": 6, "update": 5.199, "loss": "2.245", "ppl": "4.74", "wps": "401252", "ups": "3.39", "wpb": "118461", "bsz": "256", "num_updates": "267600", "lr": "7.39798e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "80083"} +[2022-07-31 09:05:33,899][train_inner][INFO] - {"epoch": 6, "update": 5.203, "loss": "2.241", "ppl": "4.73", "wps": "396172", "ups": "3.35", "wpb": "118127", "bsz": "256", "num_updates": "267800", "lr": "7.39596e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "28.2", "wall": "80142"} +[2022-07-31 09:06:33,317][train_inner][INFO] - {"epoch": 6, "update": 5.207, "loss": "2.239", "ppl": "4.72", "wps": "397793", "ups": "3.37", "wpb": "118180", "bsz": "256", "num_updates": "268000", "lr": "7.39394e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "80202"} +[2022-07-31 09:07:32,649][train_inner][INFO] - {"epoch": 6, "update": 5.211, "loss": "2.239", "ppl": "4.72", "wps": "396845", "ups": "3.37", "wpb": "117727", "bsz": "256", "num_updates": "268200", "lr": "7.39192e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "80261"} +[2022-07-31 09:08:32,089][train_inner][INFO] - {"epoch": 6, "update": 5.215, "loss": "2.24", "ppl": "4.72", "wps": "398179", "ups": "3.36", "wpb": "118339", "bsz": "256", "num_updates": "268400", "lr": "7.3899e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "80321"} +[2022-07-31 09:09:31,704][train_inner][INFO] - {"epoch": 6, "update": 5.219, "loss": "2.239", "ppl": "4.72", "wps": "398370", "ups": "3.35", "wpb": "118742", "bsz": "255.9", "num_updates": "268600", "lr": "7.38788e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "35.3", "wall": "80380"} +[2022-07-31 09:10:31,136][train_inner][INFO] - {"epoch": 6, "update": 5.223, "loss": "2.24", "ppl": "4.72", "wps": "397678", "ups": "3.37", "wpb": "118174", "bsz": "256", "num_updates": "268800", "lr": "7.38586e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "80440"} +[2022-07-31 09:11:30,626][train_inner][INFO] - {"epoch": 6, "update": 5.227, "loss": "2.24", "ppl": "4.72", "wps": "398369", "ups": "3.36", "wpb": "118493", "bsz": "256", "num_updates": "269000", "lr": "7.38384e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "80499"} +[2022-07-31 09:12:29,770][train_inner][INFO] - {"epoch": 6, "update": 5.23, "loss": "2.242", "ppl": "4.73", "wps": "401441", "ups": "3.38", "wpb": "118713", "bsz": "256", "num_updates": "269200", "lr": "7.38182e-05", "gnorm": "0.963", "loss_scale": "64", "train_wall": "59", "gb_free": "24.1", "wall": "80558"} +[2022-07-31 09:13:29,191][train_inner][INFO] - {"epoch": 6, "update": 5.234, "loss": "2.245", "ppl": "4.74", "wps": "397579", "ups": "3.37", "wpb": "118123", "bsz": "255.9", "num_updates": "269400", "lr": "7.3798e-05", "gnorm": "0.968", "loss_scale": "64", "train_wall": "59", "gb_free": "21.4", "wall": "80618"} +[2022-07-31 09:14:28,235][train_inner][INFO] - {"epoch": 6, "update": 5.238, "loss": "2.239", "ppl": "4.72", "wps": "400403", "ups": "3.39", "wpb": "118205", "bsz": "256", "num_updates": "269600", "lr": "7.37778e-05", "gnorm": "0.965", "loss_scale": "64", "train_wall": "59", "gb_free": "25.7", "wall": "80677"} +[2022-07-31 09:14:59,143][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 09:15:27,886][train_inner][INFO] - {"epoch": 6, "update": 5.242, "loss": "2.244", "ppl": "4.74", "wps": "393280", "ups": "3.35", "wpb": "117297", "bsz": "256", "num_updates": "269800", "lr": "7.37576e-05", "gnorm": "0.971", "loss_scale": "32", "train_wall": "59", "gb_free": "27", "wall": "80736"} +[2022-07-31 09:16:27,145][train_inner][INFO] - {"epoch": 6, "update": 5.246, "loss": "2.241", "ppl": "4.73", "wps": "399953", "ups": "3.38", "wpb": "118503", "bsz": "256", "num_updates": "270000", "lr": "7.37374e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "80796"} +[2022-07-31 09:17:26,796][train_inner][INFO] - {"epoch": 6, "update": 5.25, "loss": "2.236", "ppl": "4.71", "wps": "397894", "ups": "3.35", "wpb": "118674", "bsz": "256", "num_updates": "270200", "lr": "7.37172e-05", "gnorm": "0.963", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "80855"} +[2022-07-31 09:18:25,812][train_inner][INFO] - {"epoch": 6, "update": 5.254, "loss": "2.23", "ppl": "4.69", "wps": "400825", "ups": "3.39", "wpb": "118275", "bsz": "256", "num_updates": "270400", "lr": "7.3697e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "80914"} +[2022-07-31 09:19:25,122][train_inner][INFO] - {"epoch": 6, "update": 5.258, "loss": "2.237", "ppl": "4.72", "wps": "398796", "ups": "3.37", "wpb": "118262", "bsz": "256", "num_updates": "270600", "lr": "7.36768e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "80974"} +[2022-07-31 09:20:24,350][train_inner][INFO] - {"epoch": 6, "update": 5.261, "loss": "2.24", "ppl": "4.72", "wps": "398189", "ups": "3.38", "wpb": "117919", "bsz": "256", "num_updates": "270800", "lr": "7.36566e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "81033"} +[2022-07-31 09:21:24,006][train_inner][INFO] - {"epoch": 6, "update": 5.265, "loss": "2.234", "ppl": "4.7", "wps": "399539", "ups": "3.35", "wpb": "119173", "bsz": "256", "num_updates": "271000", "lr": "7.36364e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "81092"} +[2022-07-31 09:22:24,757][train_inner][INFO] - {"epoch": 6, "update": 5.269, "loss": "2.242", "ppl": "4.73", "wps": "389139", "ups": "3.29", "wpb": "118202", "bsz": "256", "num_updates": "271200", "lr": "7.36162e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "60", "gb_free": "25.7", "wall": "81153"} +[2022-07-31 09:23:24,409][train_inner][INFO] - {"epoch": 6, "update": 5.273, "loss": "2.242", "ppl": "4.73", "wps": "395854", "ups": "3.35", "wpb": "118068", "bsz": "256", "num_updates": "271400", "lr": "7.3596e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "81213"} +[2022-07-31 09:24:23,830][train_inner][INFO] - {"epoch": 6, "update": 5.277, "loss": "2.235", "ppl": "4.71", "wps": "398769", "ups": "3.37", "wpb": "118475", "bsz": "256", "num_updates": "271600", "lr": "7.35758e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "81272"} +[2022-07-31 09:25:24,421][train_inner][INFO] - {"epoch": 6, "update": 5.281, "loss": "2.235", "ppl": "4.71", "wps": "389232", "ups": "3.3", "wpb": "117918", "bsz": "256", "num_updates": "271800", "lr": "7.35556e-05", "gnorm": "0.966", "loss_scale": "64", "train_wall": "60", "gb_free": "21.7", "wall": "81333"} +[2022-07-31 09:26:23,852][train_inner][INFO] - {"epoch": 6, "update": 5.285, "loss": "2.235", "ppl": "4.71", "wps": "396988", "ups": "3.37", "wpb": "117966", "bsz": "256", "num_updates": "272000", "lr": "7.35354e-05", "gnorm": "0.969", "loss_scale": "64", "train_wall": "59", "gb_free": "26.9", "wall": "81392"} +[2022-07-31 09:27:23,082][train_inner][INFO] - {"epoch": 6, "update": 5.289, "loss": "2.234", "ppl": "4.7", "wps": "398451", "ups": "3.38", "wpb": "118000", "bsz": "256", "num_updates": "272200", "lr": "7.35152e-05", "gnorm": "0.966", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "81452"} +[2022-07-31 09:28:22,569][train_inner][INFO] - {"epoch": 6, "update": 5.293, "loss": "2.236", "ppl": "4.71", "wps": "398080", "ups": "3.36", "wpb": "118404", "bsz": "256", "num_updates": "272400", "lr": "7.34949e-05", "gnorm": "0.964", "loss_scale": "64", "train_wall": "59", "gb_free": "23.8", "wall": "81511"} +[2022-07-31 09:29:21,822][train_inner][INFO] - {"epoch": 6, "update": 5.296, "loss": "2.239", "ppl": "4.72", "wps": "400529", "ups": "3.38", "wpb": "118661", "bsz": "256", "num_updates": "272600", "lr": "7.34747e-05", "gnorm": "0.965", "loss_scale": "64", "train_wall": "59", "gb_free": "24.3", "wall": "81570"} +[2022-07-31 09:30:21,257][train_inner][INFO] - {"epoch": 6, "update": 5.3, "loss": "2.235", "ppl": "4.71", "wps": "399217", "ups": "3.37", "wpb": "118636", "bsz": "256", "num_updates": "272800", "lr": "7.34545e-05", "gnorm": "0.964", "loss_scale": "64", "train_wall": "59", "gb_free": "24.2", "wall": "81630"} +[2022-07-31 09:31:20,463][train_inner][INFO] - {"epoch": 6, "update": 5.304, "loss": "2.229", "ppl": "4.69", "wps": "399069", "ups": "3.38", "wpb": "118137", "bsz": "256", "num_updates": "273000", "lr": "7.34343e-05", "gnorm": "0.966", "loss_scale": "64", "train_wall": "59", "gb_free": "25.8", "wall": "81689"} +[2022-07-31 09:32:19,852][train_inner][INFO] - {"epoch": 6, "update": 5.308, "loss": "2.234", "ppl": "4.7", "wps": "397322", "ups": "3.37", "wpb": "117981", "bsz": "256", "num_updates": "273200", "lr": "7.34141e-05", "gnorm": "0.967", "loss_scale": "64", "train_wall": "59", "gb_free": "21.9", "wall": "81748"} +[2022-07-31 09:32:34,521][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 09:33:19,323][train_inner][INFO] - {"epoch": 6, "update": 5.312, "loss": "2.237", "ppl": "4.71", "wps": "397705", "ups": "3.36", "wpb": "118259", "bsz": "256", "num_updates": "273400", "lr": "7.33939e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "81808"} +[2022-07-31 09:34:18,857][train_inner][INFO] - {"epoch": 6, "update": 5.316, "loss": "2.238", "ppl": "4.72", "wps": "396960", "ups": "3.36", "wpb": "118163", "bsz": "256", "num_updates": "273600", "lr": "7.33737e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "81867"} +[2022-07-31 09:35:18,966][train_inner][INFO] - {"epoch": 6, "update": 5.32, "loss": "2.238", "ppl": "4.72", "wps": "392545", "ups": "3.33", "wpb": "117976", "bsz": "256", "num_updates": "273800", "lr": "7.33535e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "60", "gb_free": "22", "wall": "81927"} +[2022-07-31 09:36:18,426][train_inner][INFO] - {"epoch": 6, "update": 5.324, "loss": "2.235", "ppl": "4.71", "wps": "399230", "ups": "3.36", "wpb": "118692", "bsz": "256", "num_updates": "274000", "lr": "7.33333e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "81987"} +[2022-07-31 09:37:17,731][train_inner][INFO] - {"epoch": 6, "update": 5.328, "loss": "2.231", "ppl": "4.69", "wps": "399596", "ups": "3.37", "wpb": "118488", "bsz": "256", "num_updates": "274200", "lr": "7.33131e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "82046"} +[2022-07-31 09:38:17,267][train_inner][INFO] - {"epoch": 6, "update": 5.331, "loss": "2.233", "ppl": "4.7", "wps": "398434", "ups": "3.36", "wpb": "118606", "bsz": "256", "num_updates": "274400", "lr": "7.32929e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "82106"} +[2022-07-31 09:39:16,637][train_inner][INFO] - {"epoch": 6, "update": 5.335, "loss": "2.233", "ppl": "4.7", "wps": "398821", "ups": "3.37", "wpb": "118390", "bsz": "256", "num_updates": "274600", "lr": "7.32727e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "82165"} +[2022-07-31 09:40:16,072][train_inner][INFO] - {"epoch": 6, "update": 5.339, "loss": "2.234", "ppl": "4.7", "wps": "397102", "ups": "3.37", "wpb": "118008", "bsz": "256", "num_updates": "274800", "lr": "7.32525e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "82225"} +[2022-07-31 09:41:15,378][train_inner][INFO] - {"epoch": 6, "update": 5.343, "loss": "2.233", "ppl": "4.7", "wps": "399045", "ups": "3.37", "wpb": "118328", "bsz": "256", "num_updates": "275000", "lr": "7.32323e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "26.5", "wall": "82284"} +[2022-07-31 09:42:14,887][train_inner][INFO] - {"epoch": 6, "update": 5.347, "loss": "2.234", "ppl": "4.7", "wps": "397940", "ups": "3.36", "wpb": "118405", "bsz": "256", "num_updates": "275200", "lr": "7.32121e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "27.4", "wall": "82343"} +[2022-07-31 09:43:14,185][train_inner][INFO] - {"epoch": 6, "update": 5.351, "loss": "2.23", "ppl": "4.69", "wps": "399244", "ups": "3.37", "wpb": "118369", "bsz": "256", "num_updates": "275400", "lr": "7.31919e-05", "gnorm": "0.966", "loss_scale": "64", "train_wall": "59", "gb_free": "23.2", "wall": "82403"} +[2022-07-31 09:44:13,856][train_inner][INFO] - {"epoch": 6, "update": 5.355, "loss": "2.233", "ppl": "4.7", "wps": "394973", "ups": "3.35", "wpb": "117842", "bsz": "256", "num_updates": "275600", "lr": "7.31717e-05", "gnorm": "0.969", "loss_scale": "64", "train_wall": "59", "gb_free": "23.7", "wall": "82462"} +[2022-07-31 09:45:13,395][train_inner][INFO] - {"epoch": 6, "update": 5.359, "loss": "2.231", "ppl": "4.7", "wps": "398103", "ups": "3.36", "wpb": "118512", "bsz": "256", "num_updates": "275800", "lr": "7.31515e-05", "gnorm": "0.967", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "82522"} +[2022-07-31 09:46:12,551][train_inner][INFO] - {"epoch": 6, "update": 5.363, "loss": "2.233", "ppl": "4.7", "wps": "400497", "ups": "3.38", "wpb": "118459", "bsz": "256", "num_updates": "276000", "lr": "7.31313e-05", "gnorm": "0.965", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "82581"} +[2022-07-31 09:47:12,002][train_inner][INFO] - {"epoch": 6, "update": 5.366, "loss": "2.235", "ppl": "4.71", "wps": "395673", "ups": "3.36", "wpb": "117615", "bsz": "256", "num_updates": "276200", "lr": "7.31111e-05", "gnorm": "0.97", "loss_scale": "64", "train_wall": "59", "gb_free": "22", "wall": "82640"} +[2022-07-31 09:47:31,438][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 09:48:11,776][train_inner][INFO] - {"epoch": 6, "update": 5.37, "loss": "2.236", "ppl": "4.71", "wps": "395262", "ups": "3.35", "wpb": "118132", "bsz": "256", "num_updates": "276400", "lr": "7.30909e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "82700"} +[2022-07-31 09:49:11,448][train_inner][INFO] - {"epoch": 6, "update": 5.374, "loss": "2.232", "ppl": "4.7", "wps": "398379", "ups": "3.35", "wpb": "118859", "bsz": "256", "num_updates": "276600", "lr": "7.30707e-05", "gnorm": "0.961", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "82760"} +[2022-07-31 09:50:10,875][train_inner][INFO] - {"epoch": 6, "update": 5.378, "loss": "2.229", "ppl": "4.69", "wps": "399455", "ups": "3.37", "wpb": "118692", "bsz": "256", "num_updates": "276800", "lr": "7.30505e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "82819"} +[2022-07-31 09:51:11,237][train_inner][INFO] - {"epoch": 6, "update": 5.382, "loss": "2.235", "ppl": "4.71", "wps": "391345", "ups": "3.31", "wpb": "118111", "bsz": "256", "num_updates": "277000", "lr": "7.30303e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "60", "gb_free": "22.9", "wall": "82880"} +[2022-07-31 09:52:10,873][train_inner][INFO] - {"epoch": 6, "update": 5.386, "loss": "2.223", "ppl": "4.67", "wps": "398156", "ups": "3.35", "wpb": "118721", "bsz": "256", "num_updates": "277200", "lr": "7.30101e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "24.1", "wall": "82939"} +[2022-07-31 09:53:10,518][train_inner][INFO] - {"epoch": 6, "update": 5.39, "loss": "2.233", "ppl": "4.7", "wps": "394987", "ups": "3.35", "wpb": "117793", "bsz": "256", "num_updates": "277400", "lr": "7.29899e-05", "gnorm": "0.97", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "82999"} +[2022-07-31 09:54:10,050][train_inner][INFO] - {"epoch": 6, "update": 5.394, "loss": "2.223", "ppl": "4.67", "wps": "398746", "ups": "3.36", "wpb": "118690", "bsz": "256", "num_updates": "277600", "lr": "7.29697e-05", "gnorm": "0.965", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "83058"} +[2022-07-31 09:55:09,658][train_inner][INFO] - {"epoch": 6, "update": 5.397, "loss": "2.23", "ppl": "4.69", "wps": "397650", "ups": "3.36", "wpb": "118516", "bsz": "256", "num_updates": "277800", "lr": "7.29495e-05", "gnorm": "0.964", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "83118"} +[2022-07-31 09:56:08,844][train_inner][INFO] - {"epoch": 6, "update": 5.401, "loss": "2.232", "ppl": "4.7", "wps": "399370", "ups": "3.38", "wpb": "118184", "bsz": "256", "num_updates": "278000", "lr": "7.29293e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "83177"} +[2022-07-31 09:57:08,026][train_inner][INFO] - {"epoch": 6, "update": 5.405, "loss": "2.232", "ppl": "4.7", "wps": "399810", "ups": "3.38", "wpb": "118308", "bsz": "256", "num_updates": "278200", "lr": "7.29091e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "25.4", "wall": "83236"} +[2022-07-31 09:58:07,494][train_inner][INFO] - {"epoch": 6, "update": 5.409, "loss": "2.23", "ppl": "4.69", "wps": "397547", "ups": "3.36", "wpb": "118204", "bsz": "256", "num_updates": "278400", "lr": "7.28889e-05", "gnorm": "0.967", "loss_scale": "64", "train_wall": "59", "gb_free": "23.6", "wall": "83296"} +[2022-07-31 09:59:06,961][train_inner][INFO] - {"epoch": 6, "update": 5.413, "loss": "2.232", "ppl": "4.7", "wps": "399505", "ups": "3.36", "wpb": "118788", "bsz": "256", "num_updates": "278600", "lr": "7.28687e-05", "gnorm": "0.964", "loss_scale": "64", "train_wall": "59", "gb_free": "29.8", "wall": "83355"} +[2022-07-31 10:00:05,555][train_inner][INFO] - {"epoch": 6, "update": 5.417, "loss": "2.235", "ppl": "4.71", "wps": "401282", "ups": "3.41", "wpb": "117562", "bsz": "256", "num_updates": "278800", "lr": "7.28485e-05", "gnorm": "0.972", "loss_scale": "64", "train_wall": "58", "gb_free": "21.3", "wall": "83414"} +[2022-07-31 10:00:40,011][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 10:01:05,246][train_inner][INFO] - {"epoch": 6, "update": 5.421, "loss": "2.234", "ppl": "4.7", "wps": "396973", "ups": "3.35", "wpb": "118478", "bsz": "256", "num_updates": "279000", "lr": "7.28283e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "24.1", "wall": "83474"} +[2022-07-31 10:02:04,758][train_inner][INFO] - {"epoch": 6, "update": 5.425, "loss": "2.237", "ppl": "4.71", "wps": "396108", "ups": "3.36", "wpb": "117864", "bsz": "256", "num_updates": "279200", "lr": "7.28081e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "83533"} +[2022-07-31 10:03:04,401][train_inner][INFO] - {"epoch": 6, "update": 5.429, "loss": "2.222", "ppl": "4.67", "wps": "399301", "ups": "3.35", "wpb": "119078", "bsz": "256", "num_updates": "279400", "lr": "7.27879e-05", "gnorm": "0.962", "loss_scale": "32", "train_wall": "59", "gb_free": "25.1", "wall": "83593"} +[2022-07-31 10:04:03,697][train_inner][INFO] - {"epoch": 6, "update": 5.432, "loss": "2.232", "ppl": "4.7", "wps": "400494", "ups": "3.37", "wpb": "118736", "bsz": "256", "num_updates": "279600", "lr": "7.27677e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "83652"} +[2022-07-31 10:05:03,331][train_inner][INFO] - {"epoch": 6, "update": 5.436, "loss": "2.228", "ppl": "4.69", "wps": "396499", "ups": "3.35", "wpb": "118225", "bsz": "256", "num_updates": "279800", "lr": "7.27475e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "25.5", "wall": "83712"} +[2022-07-31 10:06:02,866][train_inner][INFO] - {"epoch": 6, "update": 5.44, "loss": "2.227", "ppl": "4.68", "wps": "398057", "ups": "3.36", "wpb": "118489", "bsz": "256", "num_updates": "280000", "lr": "7.27273e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "83771"} +[2022-07-31 10:07:02,601][train_inner][INFO] - {"epoch": 6, "update": 5.444, "loss": "2.231", "ppl": "4.69", "wps": "396440", "ups": "3.35", "wpb": "118406", "bsz": "256", "num_updates": "280200", "lr": "7.27071e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "83831"} +[2022-07-31 10:07:59,961][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 10:08:02,637][train_inner][INFO] - {"epoch": 6, "update": 5.448, "loss": "2.223", "ppl": "4.67", "wps": "396661", "ups": "3.33", "wpb": "119069", "bsz": "256", "num_updates": "280400", "lr": "7.26869e-05", "gnorm": "0.962", "loss_scale": "16", "train_wall": "60", "gb_free": "24.5", "wall": "83891"} +[2022-07-31 10:09:02,248][train_inner][INFO] - {"epoch": 6, "update": 5.452, "loss": "2.224", "ppl": "4.67", "wps": "398060", "ups": "3.36", "wpb": "118644", "bsz": "256", "num_updates": "280600", "lr": "7.26667e-05", "gnorm": "0.967", "loss_scale": "16", "train_wall": "59", "gb_free": "25.4", "wall": "83951"} +[2022-07-31 10:10:01,916][train_inner][INFO] - {"epoch": 6, "update": 5.456, "loss": "2.23", "ppl": "4.69", "wps": "398049", "ups": "3.35", "wpb": "118754", "bsz": "256", "num_updates": "280800", "lr": "7.26465e-05", "gnorm": "0.966", "loss_scale": "16", "train_wall": "59", "gb_free": "24.2", "wall": "84010"} +[2022-07-31 10:11:01,169][train_inner][INFO] - {"epoch": 6, "update": 5.46, "loss": "2.228", "ppl": "4.69", "wps": "400019", "ups": "3.38", "wpb": "118510", "bsz": "256", "num_updates": "281000", "lr": "7.26263e-05", "gnorm": "0.966", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "84070"} +[2022-07-31 10:12:00,692][train_inner][INFO] - {"epoch": 6, "update": 5.464, "loss": "2.231", "ppl": "4.69", "wps": "395235", "ups": "3.36", "wpb": "117628", "bsz": "256", "num_updates": "281200", "lr": "7.26061e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "59", "gb_free": "27.9", "wall": "84129"} +[2022-07-31 10:13:00,460][train_inner][INFO] - {"epoch": 6, "update": 5.467, "loss": "2.226", "ppl": "4.68", "wps": "396648", "ups": "3.35", "wpb": "118534", "bsz": "256", "num_updates": "281400", "lr": "7.25859e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "84189"} +[2022-07-31 10:13:59,859][train_inner][INFO] - {"epoch": 6, "update": 5.471, "loss": "2.229", "ppl": "4.69", "wps": "398088", "ups": "3.37", "wpb": "118228", "bsz": "256", "num_updates": "281600", "lr": "7.25657e-05", "gnorm": "0.968", "loss_scale": "16", "train_wall": "59", "gb_free": "27.2", "wall": "84248"} +[2022-07-31 10:14:58,990][train_inner][INFO] - {"epoch": 6, "update": 5.475, "loss": "2.23", "ppl": "4.69", "wps": "399163", "ups": "3.38", "wpb": "118014", "bsz": "256", "num_updates": "281800", "lr": "7.25455e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "26", "wall": "84307"} +[2022-07-31 10:15:58,538][train_inner][INFO] - {"epoch": 6, "update": 5.479, "loss": "2.226", "ppl": "4.68", "wps": "397177", "ups": "3.36", "wpb": "118256", "bsz": "256", "num_updates": "282000", "lr": "7.25253e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "84367"} +[2022-07-31 10:16:57,688][train_inner][INFO] - {"epoch": 6, "update": 5.483, "loss": "2.226", "ppl": "4.68", "wps": "401266", "ups": "3.38", "wpb": "118673", "bsz": "256", "num_updates": "282200", "lr": "7.25051e-05", "gnorm": "0.967", "loss_scale": "16", "train_wall": "59", "gb_free": "30.9", "wall": "84426"} +[2022-07-31 10:17:57,196][train_inner][INFO] - {"epoch": 6, "update": 5.487, "loss": "2.227", "ppl": "4.68", "wps": "397733", "ups": "3.36", "wpb": "118340", "bsz": "256", "num_updates": "282400", "lr": "7.24848e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "84486"} +[2022-07-31 10:18:56,397][train_inner][INFO] - {"epoch": 6, "update": 5.491, "loss": "2.227", "ppl": "4.68", "wps": "400222", "ups": "3.38", "wpb": "118467", "bsz": "256", "num_updates": "282600", "lr": "7.24646e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "84545"} +[2022-07-31 10:19:55,093][train_inner][INFO] - {"epoch": 6, "update": 5.495, "loss": "2.235", "ppl": "4.71", "wps": "403072", "ups": "3.41", "wpb": "118294", "bsz": "256", "num_updates": "282800", "lr": "7.24444e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "58", "gb_free": "29.3", "wall": "84604"} +[2022-07-31 10:20:54,255][train_inner][INFO] - {"epoch": 6, "update": 5.499, "loss": "2.228", "ppl": "4.69", "wps": "397353", "ups": "3.38", "wpb": "117541", "bsz": "256", "num_updates": "283000", "lr": "7.24242e-05", "gnorm": "0.974", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "84663"} +[2022-07-31 10:21:53,489][train_inner][INFO] - {"epoch": 6, "update": 5.502, "loss": "2.226", "ppl": "4.68", "wps": "400188", "ups": "3.38", "wpb": "118521", "bsz": "256", "num_updates": "283200", "lr": "7.2404e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "84722"} +[2022-07-31 10:22:53,016][train_inner][INFO] - {"epoch": 6, "update": 5.506, "loss": "2.228", "ppl": "4.69", "wps": "394712", "ups": "3.36", "wpb": "117480", "bsz": "256", "num_updates": "283400", "lr": "7.23838e-05", "gnorm": "0.973", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "84781"} +[2022-07-31 10:23:52,317][train_inner][INFO] - {"epoch": 6, "update": 5.51, "loss": "2.234", "ppl": "4.7", "wps": "399245", "ups": "3.37", "wpb": "118378", "bsz": "256", "num_updates": "283600", "lr": "7.23636e-05", "gnorm": "0.97", "loss_scale": "32", "train_wall": "59", "gb_free": "23.8", "wall": "84841"} +[2022-07-31 10:24:51,150][train_inner][INFO] - {"epoch": 6, "update": 5.514, "loss": "2.228", "ppl": "4.68", "wps": "401983", "ups": "3.4", "wpb": "118248", "bsz": "256", "num_updates": "283800", "lr": "7.23434e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "58", "gb_free": "21.3", "wall": "84900"} +[2022-07-31 10:25:51,595][train_inner][INFO] - {"epoch": 6, "update": 5.518, "loss": "2.227", "ppl": "4.68", "wps": "390319", "ups": "3.31", "wpb": "117964", "bsz": "256", "num_updates": "284000", "lr": "7.23232e-05", "gnorm": "0.97", "loss_scale": "32", "train_wall": "60", "gb_free": "21.7", "wall": "84960"} +[2022-07-31 10:26:50,834][train_inner][INFO] - {"epoch": 6, "update": 5.522, "loss": "2.224", "ppl": "4.67", "wps": "399877", "ups": "3.38", "wpb": "118440", "bsz": "256", "num_updates": "284200", "lr": "7.2303e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "24.4", "wall": "85019"} +[2022-07-31 10:27:50,412][train_inner][INFO] - {"epoch": 6, "update": 5.526, "loss": "2.235", "ppl": "4.71", "wps": "397368", "ups": "3.36", "wpb": "118371", "bsz": "256", "num_updates": "284400", "lr": "7.22828e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "85079"} +[2022-07-31 10:28:49,704][train_inner][INFO] - {"epoch": 6, "update": 5.53, "loss": "2.229", "ppl": "4.69", "wps": "398870", "ups": "3.37", "wpb": "118248", "bsz": "256", "num_updates": "284600", "lr": "7.22626e-05", "gnorm": "0.967", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "85138"} +[2022-07-31 10:29:48,794][train_inner][INFO] - {"epoch": 6, "update": 5.533, "loss": "2.224", "ppl": "4.67", "wps": "401373", "ups": "3.38", "wpb": "118585", "bsz": "256", "num_updates": "284800", "lr": "7.22424e-05", "gnorm": "0.968", "loss_scale": "64", "train_wall": "59", "gb_free": "27.7", "wall": "85197"} +[2022-07-31 10:30:48,361][train_inner][INFO] - {"epoch": 6, "update": 5.537, "loss": "2.226", "ppl": "4.68", "wps": "395923", "ups": "3.36", "wpb": "117919", "bsz": "256", "num_updates": "285000", "lr": "7.22222e-05", "gnorm": "0.97", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "85257"} +[2022-07-31 10:31:47,714][train_inner][INFO] - {"epoch": 6, "update": 5.541, "loss": "2.224", "ppl": "4.67", "wps": "399130", "ups": "3.37", "wpb": "118447", "bsz": "256", "num_updates": "285200", "lr": "7.2202e-05", "gnorm": "0.969", "loss_scale": "64", "train_wall": "59", "gb_free": "22.3", "wall": "85316"} +[2022-07-31 10:32:46,914][train_inner][INFO] - {"epoch": 6, "update": 5.545, "loss": "2.219", "ppl": "4.66", "wps": "401476", "ups": "3.38", "wpb": "118836", "bsz": "256", "num_updates": "285400", "lr": "7.21818e-05", "gnorm": "0.964", "loss_scale": "64", "train_wall": "59", "gb_free": "22.4", "wall": "85375"} +[2022-07-31 10:33:46,496][train_inner][INFO] - {"epoch": 6, "update": 5.549, "loss": "2.22", "ppl": "4.66", "wps": "395956", "ups": "3.36", "wpb": "117960", "bsz": "256", "num_updates": "285600", "lr": "7.21616e-05", "gnorm": "0.969", "loss_scale": "64", "train_wall": "59", "gb_free": "21.8", "wall": "85435"} +[2022-07-31 10:34:45,373][train_inner][INFO] - {"epoch": 6, "update": 5.553, "loss": "2.226", "ppl": "4.68", "wps": "402755", "ups": "3.4", "wpb": "118563", "bsz": "256", "num_updates": "285800", "lr": "7.21414e-05", "gnorm": "0.966", "loss_scale": "64", "train_wall": "59", "gb_free": "22.8", "wall": "85494"} +[2022-07-31 10:35:45,111][train_inner][INFO] - {"epoch": 6, "update": 5.557, "loss": "2.222", "ppl": "4.67", "wps": "396248", "ups": "3.35", "wpb": "118355", "bsz": "256", "num_updates": "286000", "lr": "7.21212e-05", "gnorm": "0.969", "loss_scale": "64", "train_wall": "59", "gb_free": "23.8", "wall": "85554"} +[2022-07-31 10:36:44,482][train_inner][INFO] - {"epoch": 6, "update": 5.561, "loss": "2.23", "ppl": "4.69", "wps": "397485", "ups": "3.37", "wpb": "117995", "bsz": "256", "num_updates": "286200", "lr": "7.2101e-05", "gnorm": "0.969", "loss_scale": "64", "train_wall": "59", "gb_free": "24", "wall": "85613"} +[2022-07-31 10:37:43,405][train_inner][INFO] - {"epoch": 6, "update": 5.565, "loss": "2.222", "ppl": "4.67", "wps": "403068", "ups": "3.39", "wpb": "118748", "bsz": "256", "num_updates": "286400", "lr": "7.20808e-05", "gnorm": "0.967", "loss_scale": "64", "train_wall": "59", "gb_free": "21.6", "wall": "85672"} +[2022-07-31 10:38:18,538][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 10:38:42,893][train_inner][INFO] - {"epoch": 6, "update": 5.568, "loss": "2.225", "ppl": "4.67", "wps": "394744", "ups": "3.36", "wpb": "117412", "bsz": "256", "num_updates": "286600", "lr": "7.20606e-05", "gnorm": "0.972", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "85731"} +[2022-07-31 10:39:42,548][train_inner][INFO] - {"epoch": 6, "update": 5.572, "loss": "2.226", "ppl": "4.68", "wps": "397672", "ups": "3.35", "wpb": "118615", "bsz": "256", "num_updates": "286800", "lr": "7.20404e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "85791"} +[2022-07-31 10:40:41,939][train_inner][INFO] - {"epoch": 6, "update": 5.576, "loss": "2.224", "ppl": "4.67", "wps": "399694", "ups": "3.37", "wpb": "118690", "bsz": "256", "num_updates": "287000", "lr": "7.20202e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "85850"} +[2022-07-31 10:41:41,634][train_inner][INFO] - {"epoch": 6, "update": 5.58, "loss": "2.231", "ppl": "4.69", "wps": "396021", "ups": "3.35", "wpb": "118203", "bsz": "256", "num_updates": "287200", "lr": "7.2e-05", "gnorm": "0.972", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "85910"} +[2022-07-31 10:42:41,298][train_inner][INFO] - {"epoch": 6, "update": 5.584, "loss": "2.225", "ppl": "4.68", "wps": "395997", "ups": "3.35", "wpb": "118132", "bsz": "256", "num_updates": "287400", "lr": "7.19798e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "85970"} +[2022-07-31 10:43:41,543][train_inner][INFO] - {"epoch": 6, "update": 5.588, "loss": "2.22", "ppl": "4.66", "wps": "393116", "ups": "3.32", "wpb": "118416", "bsz": "256", "num_updates": "287600", "lr": "7.19596e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "60", "gb_free": "29.2", "wall": "86030"} +[2022-07-31 10:44:41,162][train_inner][INFO] - {"epoch": 6, "update": 5.592, "loss": "2.224", "ppl": "4.67", "wps": "395581", "ups": "3.35", "wpb": "117921", "bsz": "256", "num_updates": "287800", "lr": "7.19394e-05", "gnorm": "0.971", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "86090"} +[2022-07-31 10:45:40,790][train_inner][INFO] - {"epoch": 6, "update": 5.596, "loss": "2.218", "ppl": "4.65", "wps": "396819", "ups": "3.35", "wpb": "118307", "bsz": "256", "num_updates": "288000", "lr": "7.19192e-05", "gnorm": "0.967", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "86149"} +[2022-07-31 10:46:40,010][train_inner][INFO] - {"epoch": 6, "update": 5.6, "loss": "2.224", "ppl": "4.67", "wps": "399076", "ups": "3.38", "wpb": "118165", "bsz": "256", "num_updates": "288200", "lr": "7.1899e-05", "gnorm": "0.969", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "86208"} +[2022-07-31 10:47:39,827][train_inner][INFO] - {"epoch": 6, "update": 5.603, "loss": "2.219", "ppl": "4.66", "wps": "395582", "ups": "3.34", "wpb": "118313", "bsz": "256", "num_updates": "288400", "lr": "7.18788e-05", "gnorm": "0.972", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "86268"} +[2022-07-31 10:48:39,208][train_inner][INFO] - {"epoch": 6, "update": 5.607, "loss": "2.229", "ppl": "4.69", "wps": "397747", "ups": "3.37", "wpb": "118092", "bsz": "256", "num_updates": "288600", "lr": "7.18586e-05", "gnorm": "0.971", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "86328"} +[2022-07-31 10:49:34,899][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 10:49:39,134][train_inner][INFO] - {"epoch": 6, "update": 5.611, "loss": "2.226", "ppl": "4.68", "wps": "395759", "ups": "3.34", "wpb": "118580", "bsz": "256", "num_updates": "288800", "lr": "7.18384e-05", "gnorm": "0.97", "loss_scale": "32", "train_wall": "60", "gb_free": "23.8", "wall": "86388"} +[2022-07-31 10:50:29,514][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 10:50:38,997][train_inner][INFO] - {"epoch": 6, "update": 5.615, "loss": "2.225", "ppl": "4.67", "wps": "395996", "ups": "3.34", "wpb": "118528", "bsz": "256", "num_updates": "289000", "lr": "7.18182e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "60", "gb_free": "28.8", "wall": "86447"} +[2022-07-31 10:51:38,237][train_inner][INFO] - {"epoch": 6, "update": 5.619, "loss": "2.22", "ppl": "4.66", "wps": "400956", "ups": "3.38", "wpb": "118762", "bsz": "256", "num_updates": "289200", "lr": "7.1798e-05", "gnorm": "0.971", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "86507"} +[2022-07-31 10:52:37,815][train_inner][INFO] - {"epoch": 6, "update": 5.623, "loss": "2.223", "ppl": "4.67", "wps": "396373", "ups": "3.36", "wpb": "118075", "bsz": "256", "num_updates": "289400", "lr": "7.17778e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "86566"} +[2022-07-31 10:53:37,476][train_inner][INFO] - {"epoch": 6, "update": 5.627, "loss": "2.228", "ppl": "4.69", "wps": "397130", "ups": "3.35", "wpb": "118464", "bsz": "256", "num_updates": "289600", "lr": "7.17576e-05", "gnorm": "0.971", "loss_scale": "16", "train_wall": "59", "gb_free": "23.3", "wall": "86626"} +[2022-07-31 10:54:36,853][train_inner][INFO] - {"epoch": 6, "update": 5.631, "loss": "2.227", "ppl": "4.68", "wps": "397422", "ups": "3.37", "wpb": "117989", "bsz": "256", "num_updates": "289800", "lr": "7.17374e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "23.9", "wall": "86685"} +[2022-07-31 10:55:36,222][train_inner][INFO] - {"epoch": 6, "update": 5.635, "loss": "2.221", "ppl": "4.66", "wps": "397635", "ups": "3.37", "wpb": "118034", "bsz": "256", "num_updates": "290000", "lr": "7.17172e-05", "gnorm": "0.971", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "86745"} +[2022-07-31 10:56:36,008][train_inner][INFO] - {"epoch": 6, "update": 5.638, "loss": "2.221", "ppl": "4.66", "wps": "396900", "ups": "3.35", "wpb": "118644", "bsz": "256", "num_updates": "290200", "lr": "7.1697e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "59", "gb_free": "24.9", "wall": "86804"} +[2022-07-31 10:57:36,837][train_inner][INFO] - {"epoch": 6, "update": 5.642, "loss": "2.224", "ppl": "4.67", "wps": "389534", "ups": "3.29", "wpb": "118474", "bsz": "256", "num_updates": "290400", "lr": "7.16768e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "60", "gb_free": "21.6", "wall": "86865"} +[2022-07-31 10:58:36,316][train_inner][INFO] - {"epoch": 6, "update": 5.646, "loss": "2.221", "ppl": "4.66", "wps": "398140", "ups": "3.36", "wpb": "118405", "bsz": "256", "num_updates": "290600", "lr": "7.16566e-05", "gnorm": "0.971", "loss_scale": "16", "train_wall": "59", "gb_free": "25.2", "wall": "86925"} +[2022-07-31 10:59:35,905][train_inner][INFO] - {"epoch": 6, "update": 5.65, "loss": "2.22", "ppl": "4.66", "wps": "396462", "ups": "3.36", "wpb": "118123", "bsz": "256", "num_updates": "290800", "lr": "7.16364e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "86984"} +[2022-07-31 11:00:35,036][train_inner][INFO] - {"epoch": 6, "update": 5.654, "loss": "2.229", "ppl": "4.69", "wps": "398284", "ups": "3.38", "wpb": "117754", "bsz": "256", "num_updates": "291000", "lr": "7.16162e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "31.9", "wall": "87043"} +[2022-07-31 11:01:34,581][train_inner][INFO] - {"epoch": 6, "update": 5.658, "loss": "2.221", "ppl": "4.66", "wps": "397936", "ups": "3.36", "wpb": "118474", "bsz": "256", "num_updates": "291200", "lr": "7.1596e-05", "gnorm": "0.97", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "87103"} +[2022-07-31 11:02:34,552][train_inner][INFO] - {"epoch": 6, "update": 5.662, "loss": "2.219", "ppl": "4.66", "wps": "394715", "ups": "3.33", "wpb": "118357", "bsz": "256", "num_updates": "291400", "lr": "7.15758e-05", "gnorm": "0.968", "loss_scale": "32", "train_wall": "60", "gb_free": "23.5", "wall": "87163"} +[2022-07-31 11:02:38,137][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 11:03:34,392][train_inner][INFO] - {"epoch": 6, "update": 5.666, "loss": "2.218", "ppl": "4.65", "wps": "394556", "ups": "3.34", "wpb": "118051", "bsz": "256", "num_updates": "291600", "lr": "7.15556e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "60", "gb_free": "23.6", "wall": "87223"} +[2022-07-31 11:04:33,859][train_inner][INFO] - {"epoch": 6, "update": 5.67, "loss": "2.227", "ppl": "4.68", "wps": "396289", "ups": "3.36", "wpb": "117829", "bsz": "256", "num_updates": "291800", "lr": "7.15354e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "87282"} +[2022-07-31 11:04:45,556][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 11:05:34,046][train_inner][INFO] - {"epoch": 6, "update": 5.673, "loss": "2.222", "ppl": "4.67", "wps": "393956", "ups": "3.32", "wpb": "118556", "bsz": "256", "num_updates": "292000", "lr": "7.15152e-05", "gnorm": "0.97", "loss_scale": "8", "train_wall": "60", "gb_free": "21.6", "wall": "87342"} +[2022-07-31 11:06:33,990][train_inner][INFO] - {"epoch": 6, "update": 5.677, "loss": "2.22", "ppl": "4.66", "wps": "394216", "ups": "3.34", "wpb": "118153", "bsz": "256", "num_updates": "292200", "lr": "7.14949e-05", "gnorm": "0.974", "loss_scale": "8", "train_wall": "60", "gb_free": "25.4", "wall": "87402"} +[2022-07-31 11:07:33,621][train_inner][INFO] - {"epoch": 6, "update": 5.681, "loss": "2.219", "ppl": "4.65", "wps": "397750", "ups": "3.35", "wpb": "118590", "bsz": "256", "num_updates": "292400", "lr": "7.14747e-05", "gnorm": "0.969", "loss_scale": "8", "train_wall": "59", "gb_free": "25.7", "wall": "87462"} +[2022-07-31 11:08:33,093][train_inner][INFO] - {"epoch": 6, "update": 5.685, "loss": "2.213", "ppl": "4.64", "wps": "399258", "ups": "3.36", "wpb": "118723", "bsz": "256", "num_updates": "292600", "lr": "7.14545e-05", "gnorm": "0.966", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "87522"} +[2022-07-31 11:09:32,625][train_inner][INFO] - {"epoch": 6, "update": 5.689, "loss": "2.223", "ppl": "4.67", "wps": "397623", "ups": "3.36", "wpb": "118356", "bsz": "256", "num_updates": "292800", "lr": "7.14343e-05", "gnorm": "0.971", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "87581"} +[2022-07-31 11:10:32,216][train_inner][INFO] - {"epoch": 6, "update": 5.693, "loss": "2.221", "ppl": "4.66", "wps": "396531", "ups": "3.36", "wpb": "118147", "bsz": "256", "num_updates": "293000", "lr": "7.14141e-05", "gnorm": "0.971", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "87641"} +[2022-07-31 11:11:33,047][train_inner][INFO] - {"epoch": 6, "update": 5.697, "loss": "2.221", "ppl": "4.66", "wps": "387780", "ups": "3.29", "wpb": "117944", "bsz": "256", "num_updates": "293200", "lr": "7.13939e-05", "gnorm": "0.973", "loss_scale": "8", "train_wall": "61", "gb_free": "23", "wall": "87701"} +[2022-07-31 11:12:32,668][train_inner][INFO] - {"epoch": 6, "update": 5.701, "loss": "2.218", "ppl": "4.65", "wps": "397521", "ups": "3.35", "wpb": "118504", "bsz": "256", "num_updates": "293400", "lr": "7.13737e-05", "gnorm": "0.971", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "87761"} +[2022-07-31 11:13:31,577][train_inner][INFO] - {"epoch": 6, "update": 5.704, "loss": "2.215", "ppl": "4.64", "wps": "400170", "ups": "3.4", "wpb": "117866", "bsz": "256", "num_updates": "293600", "lr": "7.13535e-05", "gnorm": "0.971", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "87820"} +[2022-07-31 11:14:31,187][train_inner][INFO] - {"epoch": 6, "update": 5.708, "loss": "2.219", "ppl": "4.65", "wps": "396178", "ups": "3.36", "wpb": "118081", "bsz": "256", "num_updates": "293800", "lr": "7.13333e-05", "gnorm": "0.974", "loss_scale": "8", "train_wall": "59", "gb_free": "24", "wall": "87880"} +[2022-07-31 11:15:30,589][train_inner][INFO] - {"epoch": 6, "update": 5.712, "loss": "2.214", "ppl": "4.64", "wps": "399143", "ups": "3.37", "wpb": "118549", "bsz": "256", "num_updates": "294000", "lr": "7.13131e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "87939"} +[2022-07-31 11:16:30,231][train_inner][INFO] - {"epoch": 6, "update": 5.716, "loss": "2.217", "ppl": "4.65", "wps": "397501", "ups": "3.35", "wpb": "118538", "bsz": "256", "num_updates": "294200", "lr": "7.12929e-05", "gnorm": "0.971", "loss_scale": "16", "train_wall": "59", "gb_free": "23.5", "wall": "87999"} +[2022-07-31 11:17:29,950][train_inner][INFO] - {"epoch": 6, "update": 5.72, "loss": "2.222", "ppl": "4.67", "wps": "396882", "ups": "3.35", "wpb": "118507", "bsz": "256", "num_updates": "294400", "lr": "7.12727e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "88058"} +[2022-07-31 11:18:29,325][train_inner][INFO] - {"epoch": 6, "update": 5.724, "loss": "2.224", "ppl": "4.67", "wps": "396064", "ups": "3.37", "wpb": "117580", "bsz": "256", "num_updates": "294600", "lr": "7.12525e-05", "gnorm": "0.979", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "88118"} +[2022-07-31 11:19:29,159][train_inner][INFO] - {"epoch": 6, "update": 5.728, "loss": "2.219", "ppl": "4.65", "wps": "395505", "ups": "3.34", "wpb": "118322", "bsz": "256", "num_updates": "294800", "lr": "7.12323e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "60", "gb_free": "23.7", "wall": "88178"} +[2022-07-31 11:20:28,888][train_inner][INFO] - {"epoch": 6, "update": 5.732, "loss": "2.217", "ppl": "4.65", "wps": "397588", "ups": "3.35", "wpb": "118738", "bsz": "256", "num_updates": "295000", "lr": "7.12121e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "88237"} +[2022-07-31 11:21:28,564][train_inner][INFO] - {"epoch": 6, "update": 5.736, "loss": "2.215", "ppl": "4.64", "wps": "398361", "ups": "3.35", "wpb": "118862", "bsz": "256", "num_updates": "295200", "lr": "7.11919e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "24", "wall": "88297"} +[2022-07-31 11:21:47,865][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 11:22:28,337][train_inner][INFO] - {"epoch": 6, "update": 5.739, "loss": "2.22", "ppl": "4.66", "wps": "395842", "ups": "3.35", "wpb": "118301", "bsz": "256", "num_updates": "295400", "lr": "7.11717e-05", "gnorm": "0.995", "loss_scale": "8", "train_wall": "59", "gb_free": "24.3", "wall": "88357"} +[2022-07-31 11:23:27,863][train_inner][INFO] - {"epoch": 6, "update": 5.743, "loss": "2.22", "ppl": "4.66", "wps": "394910", "ups": "3.36", "wpb": "117537", "bsz": "256", "num_updates": "295600", "lr": "7.11515e-05", "gnorm": "0.975", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "88416"} +[2022-07-31 11:24:26,902][train_inner][INFO] - {"epoch": 6, "update": 5.747, "loss": "2.215", "ppl": "4.64", "wps": "401203", "ups": "3.39", "wpb": "118432", "bsz": "256", "num_updates": "295800", "lr": "7.11313e-05", "gnorm": "0.973", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "88475"} +[2022-07-31 11:25:27,250][train_inner][INFO] - {"epoch": 6, "update": 5.751, "loss": "2.216", "ppl": "4.65", "wps": "391507", "ups": "3.31", "wpb": "118134", "bsz": "256", "num_updates": "296000", "lr": "7.11111e-05", "gnorm": "0.972", "loss_scale": "8", "train_wall": "60", "gb_free": "23", "wall": "88536"} +[2022-07-31 11:26:26,652][train_inner][INFO] - {"epoch": 6, "update": 5.755, "loss": "2.22", "ppl": "4.66", "wps": "398480", "ups": "3.37", "wpb": "118351", "bsz": "256", "num_updates": "296200", "lr": "7.10909e-05", "gnorm": "0.973", "loss_scale": "8", "train_wall": "59", "gb_free": "25.3", "wall": "88595"} +[2022-07-31 11:27:26,320][train_inner][INFO] - {"epoch": 6, "update": 5.759, "loss": "2.219", "ppl": "4.65", "wps": "396270", "ups": "3.35", "wpb": "118222", "bsz": "256", "num_updates": "296400", "lr": "7.10707e-05", "gnorm": "0.973", "loss_scale": "8", "train_wall": "59", "gb_free": "23.1", "wall": "88655"} +[2022-07-31 11:28:25,994][train_inner][INFO] - {"epoch": 6, "update": 5.763, "loss": "2.211", "ppl": "4.63", "wps": "397959", "ups": "3.35", "wpb": "118737", "bsz": "256", "num_updates": "296600", "lr": "7.10505e-05", "gnorm": "0.967", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "88714"} +[2022-07-31 11:29:25,683][train_inner][INFO] - {"epoch": 6, "update": 5.767, "loss": "2.215", "ppl": "4.64", "wps": "397943", "ups": "3.35", "wpb": "118764", "bsz": "256", "num_updates": "296800", "lr": "7.10303e-05", "gnorm": "0.968", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "88774"} +[2022-07-31 11:30:25,010][train_inner][INFO] - {"epoch": 6, "update": 5.771, "loss": "2.222", "ppl": "4.66", "wps": "398983", "ups": "3.37", "wpb": "118351", "bsz": "256", "num_updates": "297000", "lr": "7.10101e-05", "gnorm": "0.969", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "88833"} +[2022-07-31 11:31:24,349][train_inner][INFO] - {"epoch": 6, "update": 5.774, "loss": "2.216", "ppl": "4.65", "wps": "397555", "ups": "3.37", "wpb": "117951", "bsz": "256", "num_updates": "297200", "lr": "7.09899e-05", "gnorm": "0.972", "loss_scale": "8", "train_wall": "59", "gb_free": "24.5", "wall": "88893"} +[2022-07-31 11:32:24,084][train_inner][INFO] - {"epoch": 6, "update": 5.778, "loss": "2.213", "ppl": "4.64", "wps": "397801", "ups": "3.35", "wpb": "118812", "bsz": "256", "num_updates": "297400", "lr": "7.09697e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "88953"} +[2022-07-31 11:33:23,862][train_inner][INFO] - {"epoch": 6, "update": 5.782, "loss": "2.215", "ppl": "4.64", "wps": "397602", "ups": "3.35", "wpb": "118839", "bsz": "256", "num_updates": "297600", "lr": "7.09495e-05", "gnorm": "0.971", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "89012"} +[2022-07-31 11:34:22,894][train_inner][INFO] - {"epoch": 6, "update": 5.786, "loss": "2.218", "ppl": "4.65", "wps": "399428", "ups": "3.39", "wpb": "117895", "bsz": "256", "num_updates": "297800", "lr": "7.09293e-05", "gnorm": "0.98", "loss_scale": "16", "train_wall": "59", "gb_free": "25.4", "wall": "89071"} +[2022-07-31 11:35:22,443][train_inner][INFO] - {"epoch": 6, "update": 5.79, "loss": "2.211", "ppl": "4.63", "wps": "397965", "ups": "3.36", "wpb": "118492", "bsz": "256", "num_updates": "298000", "lr": "7.09091e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "59", "gb_free": "30.3", "wall": "89131"} +[2022-07-31 11:36:21,842][train_inner][INFO] - {"epoch": 6, "update": 5.794, "loss": "2.212", "ppl": "4.63", "wps": "399874", "ups": "3.37", "wpb": "118761", "bsz": "256", "num_updates": "298200", "lr": "7.08889e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "89190"} +[2022-07-31 11:37:22,004][train_inner][INFO] - {"epoch": 6, "update": 5.798, "loss": "2.219", "ppl": "4.65", "wps": "392742", "ups": "3.32", "wpb": "118139", "bsz": "256", "num_updates": "298400", "lr": "7.08687e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "60", "gb_free": "21.5", "wall": "89250"} +[2022-07-31 11:38:22,263][train_inner][INFO] - {"epoch": 6, "update": 5.802, "loss": "2.212", "ppl": "4.63", "wps": "390336", "ups": "3.32", "wpb": "117606", "bsz": "256", "num_updates": "298600", "lr": "7.08485e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "60", "gb_free": "24.8", "wall": "89311"} +[2022-07-31 11:39:22,969][train_inner][INFO] - {"epoch": 6, "update": 5.806, "loss": "2.219", "ppl": "4.65", "wps": "388491", "ups": "3.29", "wpb": "117917", "bsz": "256", "num_updates": "298800", "lr": "7.08283e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "60", "gb_free": "22.2", "wall": "89371"} +[2022-07-31 11:40:22,300][train_inner][INFO] - {"epoch": 6, "update": 5.809, "loss": "2.212", "ppl": "4.63", "wps": "400312", "ups": "3.37", "wpb": "118755", "bsz": "256", "num_updates": "299000", "lr": "7.08081e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "89431"} +[2022-07-31 11:41:21,835][train_inner][INFO] - {"epoch": 6, "update": 5.813, "loss": "2.218", "ppl": "4.65", "wps": "395628", "ups": "3.36", "wpb": "117767", "bsz": "256", "num_updates": "299200", "lr": "7.07879e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "26.1", "wall": "89490"} +[2022-07-31 11:42:21,221][train_inner][INFO] - {"epoch": 6, "update": 5.817, "loss": "2.219", "ppl": "4.66", "wps": "397203", "ups": "3.37", "wpb": "117941", "bsz": "256", "num_updates": "299400", "lr": "7.07677e-05", "gnorm": "0.975", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "89550"} +[2022-07-31 11:42:58,017][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 11:43:20,792][train_inner][INFO] - {"epoch": 6, "update": 5.821, "loss": "2.21", "ppl": "4.63", "wps": "397529", "ups": "3.36", "wpb": "118406", "bsz": "256", "num_updates": "299600", "lr": "7.07475e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "89609"} +[2022-07-31 11:44:20,174][train_inner][INFO] - {"epoch": 6, "update": 5.825, "loss": "2.217", "ppl": "4.65", "wps": "398784", "ups": "3.37", "wpb": "118402", "bsz": "256", "num_updates": "299800", "lr": "7.07273e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "89669"} +[2022-07-31 11:45:19,630][train_inner][INFO] - {"epoch": 6, "update": 5.829, "loss": "2.208", "ppl": "4.62", "wps": "399221", "ups": "3.36", "wpb": "118679", "bsz": "256", "num_updates": "300000", "lr": "7.07071e-05", "gnorm": "0.97", "loss_scale": "16", "train_wall": "59", "gb_free": "30.2", "wall": "89728"} +[2022-07-31 11:45:19,630][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-31 11:45:42,503][valid][INFO] - {"epoch": 6, "valid_loss": "2.104", "valid_ppl": "4.3", "valid_wps": "1.57541e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "300000", "valid_best_loss": "2.104"} +[2022-07-31 11:45:42,507][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 6 @ 300000 updates +[2022-07-31 11:45:42,507][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_6_300000.pt +[2022-07-31 11:45:48,728][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_6_300000.pt +[2022-07-31 11:46:09,381][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_6_300000.pt (epoch 6 @ 300000 updates, score 2.104) (writing took 26.874464755877852 seconds) +[2022-07-31 11:47:08,817][train_inner][INFO] - {"epoch": 6, "update": 5.833, "loss": "2.211", "ppl": "4.63", "wps": "216462", "ups": "1.83", "wpb": "118174", "bsz": "256", "num_updates": "300200", "lr": "7.06869e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "89837"} +[2022-07-31 11:48:08,641][train_inner][INFO] - {"epoch": 6, "update": 5.837, "loss": "2.216", "ppl": "4.65", "wps": "395876", "ups": "3.34", "wpb": "118414", "bsz": "256", "num_updates": "300400", "lr": "7.06667e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "23.9", "wall": "89897"} +[2022-07-31 11:49:08,017][train_inner][INFO] - {"epoch": 6, "update": 5.84, "loss": "2.21", "ppl": "4.63", "wps": "399470", "ups": "3.37", "wpb": "118594", "bsz": "256", "num_updates": "300600", "lr": "7.06465e-05", "gnorm": "0.971", "loss_scale": "16", "train_wall": "59", "gb_free": "24.7", "wall": "89956"} +[2022-07-31 11:50:07,482][train_inner][INFO] - {"epoch": 6, "update": 5.844, "loss": "2.212", "ppl": "4.63", "wps": "397815", "ups": "3.36", "wpb": "118279", "bsz": "256", "num_updates": "300800", "lr": "7.06263e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "90016"} +[2022-07-31 11:51:06,642][train_inner][INFO] - {"epoch": 6, "update": 5.848, "loss": "2.213", "ppl": "4.64", "wps": "401755", "ups": "3.38", "wpb": "118840", "bsz": "256", "num_updates": "301000", "lr": "7.06061e-05", "gnorm": "0.969", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "90075"} +[2022-07-31 11:52:06,167][train_inner][INFO] - {"epoch": 6, "update": 5.852, "loss": "2.215", "ppl": "4.64", "wps": "397178", "ups": "3.36", "wpb": "118208", "bsz": "256", "num_updates": "301200", "lr": "7.05859e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "25.4", "wall": "90135"} +[2022-07-31 11:53:05,362][train_inner][INFO] - {"epoch": 6, "update": 5.856, "loss": "2.224", "ppl": "4.67", "wps": "396737", "ups": "3.38", "wpb": "117424", "bsz": "256", "num_updates": "301400", "lr": "7.05657e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "90194"} +[2022-07-31 11:54:05,984][train_inner][INFO] - {"epoch": 6, "update": 5.86, "loss": "2.213", "ppl": "4.64", "wps": "391362", "ups": "3.3", "wpb": "118624", "bsz": "256", "num_updates": "301600", "lr": "7.05455e-05", "gnorm": "0.972", "loss_scale": "32", "train_wall": "60", "gb_free": "21.9", "wall": "90254"} +[2022-07-31 11:55:05,546][train_inner][INFO] - {"epoch": 6, "update": 5.864, "loss": "2.208", "ppl": "4.62", "wps": "399381", "ups": "3.36", "wpb": "118940", "bsz": "256", "num_updates": "301800", "lr": "7.05253e-05", "gnorm": "0.966", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "90314"} +[2022-07-31 11:56:05,282][train_inner][INFO] - {"epoch": 6, "update": 5.868, "loss": "2.214", "ppl": "4.64", "wps": "394568", "ups": "3.35", "wpb": "117847", "bsz": "256", "num_updates": "302000", "lr": "7.05051e-05", "gnorm": "0.976", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "90374"} +[2022-07-31 11:57:04,674][train_inner][INFO] - {"epoch": 6, "update": 5.872, "loss": "2.209", "ppl": "4.62", "wps": "399736", "ups": "3.37", "wpb": "118706", "bsz": "256", "num_updates": "302200", "lr": "7.04848e-05", "gnorm": "0.973", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "90433"} +[2022-07-31 11:58:04,036][train_inner][INFO] - {"epoch": 6, "update": 5.875, "loss": "2.215", "ppl": "4.64", "wps": "398518", "ups": "3.37", "wpb": "118283", "bsz": "256", "num_updates": "302400", "lr": "7.04646e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "90492"} +[2022-07-31 11:59:03,337][train_inner][INFO] - {"epoch": 6, "update": 5.879, "loss": "2.216", "ppl": "4.65", "wps": "398686", "ups": "3.37", "wpb": "118211", "bsz": "256", "num_updates": "302600", "lr": "7.04444e-05", "gnorm": "0.983", "loss_scale": "32", "train_wall": "59", "gb_free": "25.2", "wall": "90552"} +[2022-07-31 11:59:06,020][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 12:00:02,899][train_inner][INFO] - {"epoch": 6, "update": 5.883, "loss": "2.212", "ppl": "4.63", "wps": "396588", "ups": "3.36", "wpb": "118108", "bsz": "256", "num_updates": "302800", "lr": "7.04242e-05", "gnorm": "0.975", "loss_scale": "16", "train_wall": "59", "gb_free": "22.5", "wall": "90611"} +[2022-07-31 12:01:02,398][train_inner][INFO] - {"epoch": 6, "update": 5.887, "loss": "2.212", "ppl": "4.63", "wps": "397296", "ups": "3.36", "wpb": "118191", "bsz": "256", "num_updates": "303000", "lr": "7.0404e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "90671"} +[2022-07-31 12:02:02,267][train_inner][INFO] - {"epoch": 6, "update": 5.891, "loss": "2.212", "ppl": "4.63", "wps": "396011", "ups": "3.34", "wpb": "118544", "bsz": "256", "num_updates": "303200", "lr": "7.03838e-05", "gnorm": "0.975", "loss_scale": "16", "train_wall": "60", "gb_free": "24.1", "wall": "90731"} +[2022-07-31 12:03:01,611][train_inner][INFO] - {"epoch": 6, "update": 5.895, "loss": "2.218", "ppl": "4.65", "wps": "397574", "ups": "3.37", "wpb": "117967", "bsz": "256", "num_updates": "303400", "lr": "7.03636e-05", "gnorm": "0.975", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "90790"} +[2022-07-31 12:03:08,931][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 12:04:02,169][train_inner][INFO] - {"epoch": 6, "update": 5.899, "loss": "2.21", "ppl": "4.63", "wps": "391376", "ups": "3.3", "wpb": "118505", "bsz": "256", "num_updates": "303600", "lr": "7.03434e-05", "gnorm": "0.97", "loss_scale": "8", "train_wall": "60", "gb_free": "22.9", "wall": "90851"} +[2022-07-31 12:05:01,266][train_inner][INFO] - {"epoch": 6, "update": 5.903, "loss": "2.216", "ppl": "4.65", "wps": "397921", "ups": "3.38", "wpb": "117578", "bsz": "256", "num_updates": "303800", "lr": "7.03232e-05", "gnorm": "0.978", "loss_scale": "8", "train_wall": "59", "gb_free": "25.4", "wall": "90910"} +[2022-07-31 12:06:00,948][train_inner][INFO] - {"epoch": 6, "update": 5.907, "loss": "2.208", "ppl": "4.62", "wps": "397234", "ups": "3.35", "wpb": "118537", "bsz": "256", "num_updates": "304000", "lr": "7.0303e-05", "gnorm": "0.974", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "90969"} +[2022-07-31 12:07:00,370][train_inner][INFO] - {"epoch": 6, "update": 5.91, "loss": "2.208", "ppl": "4.62", "wps": "399866", "ups": "3.37", "wpb": "118804", "bsz": "256", "num_updates": "304200", "lr": "7.02828e-05", "gnorm": "0.972", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "91029"} +[2022-07-31 12:07:59,942][train_inner][INFO] - {"epoch": 6, "update": 5.914, "loss": "2.208", "ppl": "4.62", "wps": "397429", "ups": "3.36", "wpb": "118376", "bsz": "256", "num_updates": "304400", "lr": "7.02626e-05", "gnorm": "0.972", "loss_scale": "8", "train_wall": "59", "gb_free": "23", "wall": "91088"} +[2022-07-31 12:08:59,281][train_inner][INFO] - {"epoch": 6, "update": 5.918, "loss": "2.211", "ppl": "4.63", "wps": "399143", "ups": "3.37", "wpb": "118423", "bsz": "256", "num_updates": "304600", "lr": "7.02424e-05", "gnorm": "0.974", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "91148"} +[2022-07-31 12:09:58,568][train_inner][INFO] - {"epoch": 6, "update": 5.922, "loss": "2.208", "ppl": "4.62", "wps": "399700", "ups": "3.37", "wpb": "118485", "bsz": "256", "num_updates": "304800", "lr": "7.02222e-05", "gnorm": "0.97", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "91207"} +[2022-07-31 12:10:57,939][train_inner][INFO] - {"epoch": 6, "update": 5.926, "loss": "2.207", "ppl": "4.62", "wps": "400254", "ups": "3.37", "wpb": "118816", "bsz": "256", "num_updates": "305000", "lr": "7.0202e-05", "gnorm": "0.969", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "91266"} +[2022-07-31 12:11:57,474][train_inner][INFO] - {"epoch": 6, "update": 5.93, "loss": "2.211", "ppl": "4.63", "wps": "397576", "ups": "3.36", "wpb": "118348", "bsz": "256", "num_updates": "305200", "lr": "7.01818e-05", "gnorm": "0.973", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "91326"} +[2022-07-31 12:12:56,796][train_inner][INFO] - {"epoch": 6, "update": 5.934, "loss": "2.212", "ppl": "4.63", "wps": "398868", "ups": "3.37", "wpb": "118307", "bsz": "256", "num_updates": "305400", "lr": "7.01616e-05", "gnorm": "0.978", "loss_scale": "8", "train_wall": "59", "gb_free": "24.1", "wall": "91385"} +[2022-07-31 12:13:56,440][train_inner][INFO] - {"epoch": 6, "update": 5.938, "loss": "2.205", "ppl": "4.61", "wps": "397137", "ups": "3.35", "wpb": "118433", "bsz": "256", "num_updates": "305600", "lr": "7.01414e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "91445"} +[2022-07-31 12:14:55,932][train_inner][INFO] - {"epoch": 6, "update": 5.942, "loss": "2.213", "ppl": "4.64", "wps": "397385", "ups": "3.36", "wpb": "118193", "bsz": "256", "num_updates": "305800", "lr": "7.01212e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "91504"} +[2022-07-31 12:15:54,970][train_inner][INFO] - {"epoch": 6, "update": 5.945, "loss": "2.211", "ppl": "4.63", "wps": "398915", "ups": "3.39", "wpb": "117755", "bsz": "256", "num_updates": "306000", "lr": "7.0101e-05", "gnorm": "0.978", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "91563"} +[2022-07-31 12:16:54,245][train_inner][INFO] - {"epoch": 6, "update": 5.949, "loss": "2.211", "ppl": "4.63", "wps": "400768", "ups": "3.37", "wpb": "118776", "bsz": "256", "num_updates": "306200", "lr": "7.00808e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "91623"} +[2022-07-31 12:17:53,378][train_inner][INFO] - {"epoch": 6, "update": 5.953, "loss": "2.212", "ppl": "4.63", "wps": "398934", "ups": "3.38", "wpb": "117951", "bsz": "256", "num_updates": "306400", "lr": "7.00606e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "23.2", "wall": "91682"} +[2022-07-31 12:18:52,603][train_inner][INFO] - {"epoch": 6, "update": 5.957, "loss": "2.208", "ppl": "4.62", "wps": "399675", "ups": "3.38", "wpb": "118353", "bsz": "256", "num_updates": "306600", "lr": "7.00404e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "29.9", "wall": "91741"} +[2022-07-31 12:19:51,799][train_inner][INFO] - {"epoch": 6, "update": 5.961, "loss": "2.211", "ppl": "4.63", "wps": "397942", "ups": "3.38", "wpb": "117782", "bsz": "256", "num_updates": "306800", "lr": "7.00202e-05", "gnorm": "0.975", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "91800"} +[2022-07-31 12:20:51,186][train_inner][INFO] - {"epoch": 6, "update": 5.965, "loss": "2.204", "ppl": "4.61", "wps": "400579", "ups": "3.37", "wpb": "118945", "bsz": "256", "num_updates": "307000", "lr": "7e-05", "gnorm": "0.972", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "91860"} +[2022-07-31 12:21:50,796][train_inner][INFO] - {"epoch": 6, "update": 5.969, "loss": "2.209", "ppl": "4.62", "wps": "398075", "ups": "3.36", "wpb": "118645", "bsz": "256", "num_updates": "307200", "lr": "6.99798e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "91919"} +[2022-07-31 12:22:50,238][train_inner][INFO] - {"epoch": 6, "update": 5.973, "loss": "2.212", "ppl": "4.63", "wps": "398548", "ups": "3.36", "wpb": "118452", "bsz": "256", "num_updates": "307400", "lr": "6.99596e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "91979"} +[2022-07-31 12:23:50,611][train_inner][INFO] - {"epoch": 6, "update": 5.976, "loss": "2.209", "ppl": "4.62", "wps": "392616", "ups": "3.31", "wpb": "118516", "bsz": "256", "num_updates": "307600", "lr": "6.99394e-05", "gnorm": "0.978", "loss_scale": "32", "train_wall": "60", "gb_free": "21.4", "wall": "92039"} +[2022-07-31 12:24:50,184][train_inner][INFO] - {"epoch": 6, "update": 5.98, "loss": "2.214", "ppl": "4.64", "wps": "396615", "ups": "3.36", "wpb": "118137", "bsz": "256", "num_updates": "307800", "lr": "6.99192e-05", "gnorm": "0.975", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "92099"} +[2022-07-31 12:25:49,421][train_inner][INFO] - {"epoch": 6, "update": 5.984, "loss": "2.212", "ppl": "4.63", "wps": "399175", "ups": "3.38", "wpb": "118228", "bsz": "256", "num_updates": "308000", "lr": "6.9899e-05", "gnorm": "0.973", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "92158"} +[2022-07-31 12:25:58,350][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 12:26:49,108][train_inner][INFO] - {"epoch": 6, "update": 5.988, "loss": "2.207", "ppl": "4.62", "wps": "395582", "ups": "3.35", "wpb": "118055", "bsz": "256", "num_updates": "308200", "lr": "6.98788e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "92218"} +[2022-07-31 12:27:48,645][train_inner][INFO] - {"epoch": 6, "update": 5.992, "loss": "2.207", "ppl": "4.62", "wps": "396948", "ups": "3.36", "wpb": "118165", "bsz": "256", "num_updates": "308400", "lr": "6.98586e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "92277"} +[2022-07-31 12:28:48,233][train_inner][INFO] - {"epoch": 6, "update": 5.996, "loss": "2.214", "ppl": "4.64", "wps": "397017", "ups": "3.36", "wpb": "118287", "bsz": "256", "num_updates": "308600", "lr": "6.98384e-05", "gnorm": "0.974", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "92337"} +[2022-07-31 12:29:47,865][train_inner][INFO] - {"epoch": 6, "update": 6.0, "loss": "2.209", "ppl": "4.62", "wps": "396380", "ups": "3.35", "wpb": "118184", "bsz": "256", "num_updates": "308800", "lr": "6.98182e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "26", "wall": "92396"} +[2022-07-31 12:29:50,757][fairseq_cli.train][INFO] - end of epoch 6 (average epoch stats below) +[2022-07-31 12:29:50,758][train][INFO] - {"epoch": 6, "train_loss": "2.228", "train_ppl": "4.68", "train_wps": "396107", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "308810", "train_lr": "6.98172e-05", "train_gnorm": "0.969", "train_loss_scale": "16", "train_train_wall": "15229", "train_gb_free": "29", "train_wall": "92399"} +[2022-07-31 12:29:50,866][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-31 12:29:50,869][fairseq.trainer][INFO] - begin training epoch 7 +[2022-07-31 12:29:50,869][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-31 12:30:58,424][train_inner][INFO] - {"epoch": 7, "update": 6.004, "loss": "2.203", "ppl": "4.61", "wps": "334254", "ups": "2.83", "wpb": "117922", "bsz": "255.4", "num_updates": "309000", "lr": "6.9798e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "60", "gb_free": "24.3", "wall": "92467"} +[2022-07-31 12:31:57,734][train_inner][INFO] - {"epoch": 7, "update": 6.008, "loss": "2.208", "ppl": "4.62", "wps": "397790", "ups": "3.37", "wpb": "117963", "bsz": "256", "num_updates": "309200", "lr": "6.97778e-05", "gnorm": "0.979", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "92526"} +[2022-07-31 12:32:09,184][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 12:32:57,441][train_inner][INFO] - {"epoch": 7, "update": 6.011, "loss": "2.204", "ppl": "4.61", "wps": "395897", "ups": "3.35", "wpb": "118188", "bsz": "256", "num_updates": "309400", "lr": "6.97576e-05", "gnorm": "0.973", "loss_scale": "8", "train_wall": "59", "gb_free": "25.8", "wall": "92586"} +[2022-07-31 12:33:56,410][train_inner][INFO] - {"epoch": 7, "update": 6.015, "loss": "2.21", "ppl": "4.63", "wps": "402408", "ups": "3.39", "wpb": "118647", "bsz": "256", "num_updates": "309600", "lr": "6.97374e-05", "gnorm": "0.979", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "92645"} +[2022-07-31 12:34:55,655][train_inner][INFO] - {"epoch": 7, "update": 6.019, "loss": "2.204", "ppl": "4.61", "wps": "398645", "ups": "3.38", "wpb": "118088", "bsz": "256", "num_updates": "309800", "lr": "6.97172e-05", "gnorm": "0.978", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "92704"} +[2022-07-31 12:35:55,129][train_inner][INFO] - {"epoch": 7, "update": 6.023, "loss": "2.21", "ppl": "4.63", "wps": "398286", "ups": "3.36", "wpb": "118439", "bsz": "256", "num_updates": "310000", "lr": "6.9697e-05", "gnorm": "0.98", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "92764"} +[2022-07-31 12:36:54,394][train_inner][INFO] - {"epoch": 7, "update": 6.027, "loss": "2.212", "ppl": "4.63", "wps": "396776", "ups": "3.37", "wpb": "117573", "bsz": "256", "num_updates": "310200", "lr": "6.96768e-05", "gnorm": "0.983", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "92823"} +[2022-07-31 12:37:54,299][train_inner][INFO] - {"epoch": 7, "update": 6.031, "loss": "2.202", "ppl": "4.6", "wps": "395247", "ups": "3.34", "wpb": "118386", "bsz": "256", "num_updates": "310400", "lr": "6.96566e-05", "gnorm": "0.974", "loss_scale": "8", "train_wall": "60", "gb_free": "23.1", "wall": "92883"} +[2022-07-31 12:38:53,957][train_inner][INFO] - {"epoch": 7, "update": 6.035, "loss": "2.203", "ppl": "4.6", "wps": "395709", "ups": "3.35", "wpb": "118034", "bsz": "256", "num_updates": "310600", "lr": "6.96364e-05", "gnorm": "0.989", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "92942"} +[2022-07-31 12:39:53,360][train_inner][INFO] - {"epoch": 7, "update": 6.039, "loss": "2.211", "ppl": "4.63", "wps": "398086", "ups": "3.37", "wpb": "118237", "bsz": "256", "num_updates": "310800", "lr": "6.96162e-05", "gnorm": "0.98", "loss_scale": "8", "train_wall": "59", "gb_free": "24.3", "wall": "93002"} +[2022-07-31 12:40:52,795][train_inner][INFO] - {"epoch": 7, "update": 6.043, "loss": "2.205", "ppl": "4.61", "wps": "397380", "ups": "3.37", "wpb": "118090", "bsz": "256", "num_updates": "311000", "lr": "6.9596e-05", "gnorm": "0.979", "loss_scale": "8", "train_wall": "59", "gb_free": "25.3", "wall": "93061"} +[2022-07-31 12:41:52,511][train_inner][INFO] - {"epoch": 7, "update": 6.046, "loss": "2.195", "ppl": "4.58", "wps": "399589", "ups": "3.35", "wpb": "119308", "bsz": "256", "num_updates": "311200", "lr": "6.95758e-05", "gnorm": "0.971", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "93121"} +[2022-07-31 12:42:51,873][train_inner][INFO] - {"epoch": 7, "update": 6.05, "loss": "2.203", "ppl": "4.61", "wps": "399713", "ups": "3.37", "wpb": "118640", "bsz": "256", "num_updates": "311400", "lr": "6.95556e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "93180"} +[2022-07-31 12:43:51,243][train_inner][INFO] - {"epoch": 7, "update": 6.054, "loss": "2.21", "ppl": "4.63", "wps": "399060", "ups": "3.37", "wpb": "118459", "bsz": "256", "num_updates": "311600", "lr": "6.95354e-05", "gnorm": "0.975", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "93240"} +[2022-07-31 12:44:50,481][train_inner][INFO] - {"epoch": 7, "update": 6.058, "loss": "2.204", "ppl": "4.61", "wps": "400437", "ups": "3.38", "wpb": "118606", "bsz": "256", "num_updates": "311800", "lr": "6.95152e-05", "gnorm": "0.979", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "93299"} +[2022-07-31 12:45:49,923][train_inner][INFO] - {"epoch": 7, "update": 6.062, "loss": "2.207", "ppl": "4.62", "wps": "397684", "ups": "3.36", "wpb": "118195", "bsz": "256", "num_updates": "312000", "lr": "6.94949e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "29.2", "wall": "93358"} +[2022-07-31 12:46:49,414][train_inner][INFO] - {"epoch": 7, "update": 6.066, "loss": "2.202", "ppl": "4.6", "wps": "397632", "ups": "3.36", "wpb": "118275", "bsz": "256", "num_updates": "312200", "lr": "6.94747e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "93418"} +[2022-07-31 12:47:49,062][train_inner][INFO] - {"epoch": 7, "update": 6.07, "loss": "2.207", "ppl": "4.62", "wps": "396325", "ups": "3.35", "wpb": "118200", "bsz": "256", "num_updates": "312400", "lr": "6.94545e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "59", "gb_free": "23", "wall": "93477"} +[2022-07-31 12:48:49,616][train_inner][INFO] - {"epoch": 7, "update": 6.074, "loss": "2.204", "ppl": "4.61", "wps": "392394", "ups": "3.3", "wpb": "118804", "bsz": "256", "num_updates": "312600", "lr": "6.94343e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "60", "gb_free": "21.3", "wall": "93538"} +[2022-07-31 12:49:49,005][train_inner][INFO] - {"epoch": 7, "update": 6.078, "loss": "2.204", "ppl": "4.61", "wps": "398596", "ups": "3.37", "wpb": "118360", "bsz": "256", "num_updates": "312800", "lr": "6.94141e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "93597"} +[2022-07-31 12:50:48,707][train_inner][INFO] - {"epoch": 7, "update": 6.081, "loss": "2.201", "ppl": "4.6", "wps": "395523", "ups": "3.35", "wpb": "118066", "bsz": "256", "num_updates": "313000", "lr": "6.93939e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "93657"} +[2022-07-31 12:51:48,453][train_inner][INFO] - {"epoch": 7, "update": 6.085, "loss": "2.21", "ppl": "4.63", "wps": "396864", "ups": "3.35", "wpb": "118555", "bsz": "256", "num_updates": "313200", "lr": "6.93737e-05", "gnorm": "0.98", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "93717"} +[2022-07-31 12:52:48,096][train_inner][INFO] - {"epoch": 7, "update": 6.089, "loss": "2.211", "ppl": "4.63", "wps": "396264", "ups": "3.35", "wpb": "118171", "bsz": "256", "num_updates": "313400", "lr": "6.93535e-05", "gnorm": "0.977", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "93777"} +[2022-07-31 12:53:47,350][train_inner][INFO] - {"epoch": 7, "update": 6.093, "loss": "2.201", "ppl": "4.6", "wps": "399947", "ups": "3.38", "wpb": "118490", "bsz": "256", "num_updates": "313600", "lr": "6.93333e-05", "gnorm": "0.976", "loss_scale": "32", "train_wall": "59", "gb_free": "25.5", "wall": "93836"} +[2022-07-31 12:54:47,614][train_inner][INFO] - {"epoch": 7, "update": 6.097, "loss": "2.205", "ppl": "4.61", "wps": "395024", "ups": "3.32", "wpb": "119028", "bsz": "256", "num_updates": "313800", "lr": "6.93131e-05", "gnorm": "0.973", "loss_scale": "32", "train_wall": "60", "gb_free": "24.7", "wall": "93896"} +[2022-07-31 12:55:47,480][train_inner][INFO] - {"epoch": 7, "update": 6.101, "loss": "2.202", "ppl": "4.6", "wps": "396241", "ups": "3.34", "wpb": "118606", "bsz": "256", "num_updates": "314000", "lr": "6.92929e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "60", "gb_free": "21.8", "wall": "93956"} +[2022-07-31 12:56:46,835][train_inner][INFO] - {"epoch": 7, "update": 6.105, "loss": "2.205", "ppl": "4.61", "wps": "396708", "ups": "3.37", "wpb": "117734", "bsz": "256", "num_updates": "314200", "lr": "6.92727e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "25.4", "wall": "94015"} +[2022-07-31 12:57:46,155][train_inner][INFO] - {"epoch": 7, "update": 6.109, "loss": "2.202", "ppl": "4.6", "wps": "397316", "ups": "3.37", "wpb": "117842", "bsz": "256", "num_updates": "314400", "lr": "6.92525e-05", "gnorm": "0.981", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "94075"} +[2022-07-31 12:58:19,131][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 12:58:46,172][train_inner][INFO] - {"epoch": 7, "update": 6.112, "loss": "2.215", "ppl": "4.64", "wps": "395080", "ups": "3.33", "wpb": "118556", "bsz": "256", "num_updates": "314600", "lr": "6.92323e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "60", "gb_free": "21.5", "wall": "94135"} +[2022-07-31 12:59:45,019][train_inner][INFO] - {"epoch": 7, "update": 6.116, "loss": "2.198", "ppl": "4.59", "wps": "402251", "ups": "3.4", "wpb": "118356", "bsz": "256", "num_updates": "314800", "lr": "6.92121e-05", "gnorm": "0.975", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "94193"} +[2022-07-31 13:00:44,563][train_inner][INFO] - {"epoch": 7, "update": 6.12, "loss": "2.198", "ppl": "4.59", "wps": "397256", "ups": "3.36", "wpb": "118270", "bsz": "256", "num_updates": "315000", "lr": "6.91919e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "94253"} +[2022-07-31 13:01:43,931][train_inner][INFO] - {"epoch": 7, "update": 6.124, "loss": "2.202", "ppl": "4.6", "wps": "398179", "ups": "3.37", "wpb": "118195", "bsz": "256", "num_updates": "315200", "lr": "6.91717e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "94312"} +[2022-07-31 13:02:43,593][train_inner][INFO] - {"epoch": 7, "update": 6.128, "loss": "2.203", "ppl": "4.6", "wps": "396505", "ups": "3.35", "wpb": "118280", "bsz": "256", "num_updates": "315400", "lr": "6.91515e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "59", "gb_free": "22.7", "wall": "94372"} +[2022-07-31 13:03:42,838][train_inner][INFO] - {"epoch": 7, "update": 6.132, "loss": "2.21", "ppl": "4.63", "wps": "398641", "ups": "3.38", "wpb": "118087", "bsz": "256", "num_updates": "315600", "lr": "6.91313e-05", "gnorm": "0.978", "loss_scale": "16", "train_wall": "59", "gb_free": "24.1", "wall": "94431"} +[2022-07-31 13:04:42,150][train_inner][INFO] - {"epoch": 7, "update": 6.136, "loss": "2.202", "ppl": "4.6", "wps": "398391", "ups": "3.37", "wpb": "118145", "bsz": "256", "num_updates": "315800", "lr": "6.91111e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "94491"} +[2022-07-31 13:05:36,770][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 13:05:41,600][train_inner][INFO] - {"epoch": 7, "update": 6.14, "loss": "2.208", "ppl": "4.62", "wps": "395764", "ups": "3.36", "wpb": "117641", "bsz": "256", "num_updates": "316000", "lr": "6.90909e-05", "gnorm": "0.98", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "94550"} +[2022-07-31 13:06:42,028][train_inner][INFO] - {"epoch": 7, "update": 6.144, "loss": "2.207", "ppl": "4.62", "wps": "391991", "ups": "3.31", "wpb": "118435", "bsz": "256", "num_updates": "316200", "lr": "6.90707e-05", "gnorm": "0.978", "loss_scale": "8", "train_wall": "60", "gb_free": "21.3", "wall": "94610"} +[2022-07-31 13:07:41,477][train_inner][INFO] - {"epoch": 7, "update": 6.147, "loss": "2.199", "ppl": "4.59", "wps": "399946", "ups": "3.36", "wpb": "118882", "bsz": "256", "num_updates": "316400", "lr": "6.90505e-05", "gnorm": "0.976", "loss_scale": "8", "train_wall": "59", "gb_free": "23.2", "wall": "94670"} +[2022-07-31 13:08:40,882][train_inner][INFO] - {"epoch": 7, "update": 6.151, "loss": "2.205", "ppl": "4.61", "wps": "398072", "ups": "3.37", "wpb": "118235", "bsz": "256", "num_updates": "316600", "lr": "6.90303e-05", "gnorm": "0.978", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "94729"} +[2022-07-31 13:09:39,907][train_inner][INFO] - {"epoch": 7, "update": 6.155, "loss": "2.209", "ppl": "4.62", "wps": "400598", "ups": "3.39", "wpb": "118226", "bsz": "256", "num_updates": "316800", "lr": "6.90101e-05", "gnorm": "0.978", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "94788"} +[2022-07-31 13:10:39,447][train_inner][INFO] - {"epoch": 7, "update": 6.159, "loss": "2.202", "ppl": "4.6", "wps": "397128", "ups": "3.36", "wpb": "118226", "bsz": "256", "num_updates": "317000", "lr": "6.89899e-05", "gnorm": "0.98", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "94848"} +[2022-07-31 13:11:38,486][train_inner][INFO] - {"epoch": 7, "update": 6.163, "loss": "2.203", "ppl": "4.6", "wps": "402040", "ups": "3.39", "wpb": "118679", "bsz": "256", "num_updates": "317200", "lr": "6.89697e-05", "gnorm": "0.975", "loss_scale": "8", "train_wall": "59", "gb_free": "29.2", "wall": "94907"} +[2022-07-31 13:12:37,455][train_inner][INFO] - {"epoch": 7, "update": 6.167, "loss": "2.199", "ppl": "4.59", "wps": "401393", "ups": "3.39", "wpb": "118347", "bsz": "256", "num_updates": "317400", "lr": "6.89495e-05", "gnorm": "0.976", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "94966"} +[2022-07-31 13:13:36,641][train_inner][INFO] - {"epoch": 7, "update": 6.171, "loss": "2.202", "ppl": "4.6", "wps": "400761", "ups": "3.38", "wpb": "118597", "bsz": "256", "num_updates": "317600", "lr": "6.89293e-05", "gnorm": "0.978", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "95025"} +[2022-07-31 13:14:35,718][train_inner][INFO] - {"epoch": 7, "update": 6.175, "loss": "2.205", "ppl": "4.61", "wps": "400080", "ups": "3.39", "wpb": "118176", "bsz": "256", "num_updates": "317800", "lr": "6.89091e-05", "gnorm": "0.982", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "95084"} +[2022-07-31 13:15:35,275][train_inner][INFO] - {"epoch": 7, "update": 6.179, "loss": "2.199", "ppl": "4.59", "wps": "396545", "ups": "3.36", "wpb": "118085", "bsz": "256", "num_updates": "318000", "lr": "6.88889e-05", "gnorm": "0.979", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "95144"} +[2022-07-31 13:16:34,767][train_inner][INFO] - {"epoch": 7, "update": 6.182, "loss": "2.202", "ppl": "4.6", "wps": "397922", "ups": "3.36", "wpb": "118365", "bsz": "256", "num_updates": "318200", "lr": "6.88687e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "23.2", "wall": "95203"} +[2022-07-31 13:17:34,383][train_inner][INFO] - {"epoch": 7, "update": 6.186, "loss": "2.205", "ppl": "4.61", "wps": "395259", "ups": "3.35", "wpb": "117819", "bsz": "256", "num_updates": "318400", "lr": "6.88485e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "25.8", "wall": "95263"} +[2022-07-31 13:18:33,638][train_inner][INFO] - {"epoch": 7, "update": 6.19, "loss": "2.199", "ppl": "4.59", "wps": "399550", "ups": "3.38", "wpb": "118375", "bsz": "256", "num_updates": "318600", "lr": "6.88283e-05", "gnorm": "0.978", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "95322"} +[2022-07-31 13:19:33,129][train_inner][INFO] - {"epoch": 7, "update": 6.194, "loss": "2.203", "ppl": "4.61", "wps": "394335", "ups": "3.36", "wpb": "117295", "bsz": "256", "num_updates": "318800", "lr": "6.88081e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "95382"} +[2022-07-31 13:20:32,377][train_inner][INFO] - {"epoch": 7, "update": 6.198, "loss": "2.194", "ppl": "4.58", "wps": "399247", "ups": "3.38", "wpb": "118273", "bsz": "256", "num_updates": "319000", "lr": "6.87879e-05", "gnorm": "0.977", "loss_scale": "16", "train_wall": "59", "gb_free": "30.2", "wall": "95441"} +[2022-07-31 13:21:32,046][train_inner][INFO] - {"epoch": 7, "update": 6.202, "loss": "2.205", "ppl": "4.61", "wps": "397859", "ups": "3.35", "wpb": "118698", "bsz": "256", "num_updates": "319200", "lr": "6.87677e-05", "gnorm": "0.978", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "95500"} +[2022-07-31 13:22:31,466][train_inner][INFO] - {"epoch": 7, "update": 6.206, "loss": "2.2", "ppl": "4.59", "wps": "397396", "ups": "3.37", "wpb": "118067", "bsz": "256", "num_updates": "319400", "lr": "6.87475e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "95560"} +[2022-07-31 13:23:31,005][train_inner][INFO] - {"epoch": 7, "update": 6.21, "loss": "2.198", "ppl": "4.59", "wps": "398200", "ups": "3.36", "wpb": "118542", "bsz": "256", "num_updates": "319600", "lr": "6.87273e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "95619"} +[2022-07-31 13:24:30,638][train_inner][INFO] - {"epoch": 7, "update": 6.214, "loss": "2.199", "ppl": "4.59", "wps": "398804", "ups": "3.35", "wpb": "118907", "bsz": "256", "num_updates": "319800", "lr": "6.87071e-05", "gnorm": "0.973", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "95679"} +[2022-07-31 13:25:30,077][train_inner][INFO] - {"epoch": 7, "update": 6.217, "loss": "2.197", "ppl": "4.58", "wps": "397137", "ups": "3.36", "wpb": "118027", "bsz": "256", "num_updates": "320000", "lr": "6.86869e-05", "gnorm": "0.979", "loss_scale": "16", "train_wall": "59", "gb_free": "28.9", "wall": "95739"} +[2022-07-31 13:26:29,418][train_inner][INFO] - {"epoch": 7, "update": 6.221, "loss": "2.201", "ppl": "4.6", "wps": "398642", "ups": "3.37", "wpb": "118278", "bsz": "256", "num_updates": "320200", "lr": "6.86667e-05", "gnorm": "0.978", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "95798"} +[2022-07-31 13:27:29,056][train_inner][INFO] - {"epoch": 7, "update": 6.225, "loss": "2.208", "ppl": "4.62", "wps": "393880", "ups": "3.35", "wpb": "117450", "bsz": "256", "num_updates": "320400", "lr": "6.86465e-05", "gnorm": "0.983", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "95857"} +[2022-07-31 13:28:28,879][train_inner][INFO] - {"epoch": 7, "update": 6.229, "loss": "2.194", "ppl": "4.58", "wps": "396413", "ups": "3.34", "wpb": "118574", "bsz": "256", "num_updates": "320600", "lr": "6.86263e-05", "gnorm": "0.977", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "95917"} +[2022-07-31 13:29:28,686][train_inner][INFO] - {"epoch": 7, "update": 6.233, "loss": "2.199", "ppl": "4.59", "wps": "395906", "ups": "3.34", "wpb": "118388", "bsz": "256", "num_updates": "320800", "lr": "6.86061e-05", "gnorm": "0.976", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "95977"} +[2022-07-31 13:30:28,283][train_inner][INFO] - {"epoch": 7, "update": 6.237, "loss": "2.201", "ppl": "4.6", "wps": "395959", "ups": "3.36", "wpb": "117988", "bsz": "256", "num_updates": "321000", "lr": "6.85859e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "96037"} +[2022-07-31 13:31:29,203][train_inner][INFO] - {"epoch": 7, "update": 6.241, "loss": "2.201", "ppl": "4.6", "wps": "387219", "ups": "3.28", "wpb": "117948", "bsz": "256", "num_updates": "321200", "lr": "6.85657e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "61", "gb_free": "27.9", "wall": "96098"} +[2022-07-31 13:31:54,112][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 13:32:28,942][train_inner][INFO] - {"epoch": 7, "update": 6.245, "loss": "2.205", "ppl": "4.61", "wps": "393670", "ups": "3.35", "wpb": "117586", "bsz": "256", "num_updates": "321400", "lr": "6.85455e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "23.3", "wall": "96157"} +[2022-07-31 13:33:28,263][train_inner][INFO] - {"epoch": 7, "update": 6.248, "loss": "2.195", "ppl": "4.58", "wps": "399553", "ups": "3.37", "wpb": "118508", "bsz": "256", "num_updates": "321600", "lr": "6.85253e-05", "gnorm": "0.979", "loss_scale": "16", "train_wall": "59", "gb_free": "23.5", "wall": "96217"} +[2022-07-31 13:34:27,705][train_inner][INFO] - {"epoch": 7, "update": 6.252, "loss": "2.2", "ppl": "4.59", "wps": "397876", "ups": "3.36", "wpb": "118252", "bsz": "256", "num_updates": "321800", "lr": "6.85051e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "96276"} +[2022-07-31 13:35:27,425][train_inner][INFO] - {"epoch": 7, "update": 6.256, "loss": "2.199", "ppl": "4.59", "wps": "394687", "ups": "3.35", "wpb": "117853", "bsz": "256", "num_updates": "322000", "lr": "6.84848e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "96336"} +[2022-07-31 13:36:27,229][train_inner][INFO] - {"epoch": 7, "update": 6.26, "loss": "2.198", "ppl": "4.59", "wps": "395921", "ups": "3.34", "wpb": "118387", "bsz": "256", "num_updates": "322200", "lr": "6.84646e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "96396"} +[2022-07-31 13:37:26,875][train_inner][INFO] - {"epoch": 7, "update": 6.264, "loss": "2.2", "ppl": "4.6", "wps": "394600", "ups": "3.35", "wpb": "117680", "bsz": "256", "num_updates": "322400", "lr": "6.84444e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "96455"} +[2022-07-31 13:38:26,457][train_inner][INFO] - {"epoch": 7, "update": 6.268, "loss": "2.201", "ppl": "4.6", "wps": "397141", "ups": "3.36", "wpb": "118311", "bsz": "256", "num_updates": "322600", "lr": "6.84242e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "96515"} +[2022-07-31 13:39:27,057][train_inner][INFO] - {"epoch": 7, "update": 6.272, "loss": "2.195", "ppl": "4.58", "wps": "392367", "ups": "3.3", "wpb": "118887", "bsz": "256", "num_updates": "322800", "lr": "6.8404e-05", "gnorm": "0.976", "loss_scale": "16", "train_wall": "60", "gb_free": "23", "wall": "96575"} +[2022-07-31 13:40:26,501][train_inner][INFO] - {"epoch": 7, "update": 6.276, "loss": "2.202", "ppl": "4.6", "wps": "397169", "ups": "3.36", "wpb": "118046", "bsz": "256", "num_updates": "323000", "lr": "6.83838e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "96635"} +[2022-07-31 13:41:25,916][train_inner][INFO] - {"epoch": 7, "update": 6.28, "loss": "2.199", "ppl": "4.59", "wps": "397556", "ups": "3.37", "wpb": "118102", "bsz": "256", "num_updates": "323200", "lr": "6.83636e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "96694"} +[2022-07-31 13:42:25,789][train_inner][INFO] - {"epoch": 7, "update": 6.283, "loss": "2.197", "ppl": "4.58", "wps": "394945", "ups": "3.34", "wpb": "118233", "bsz": "256", "num_updates": "323400", "lr": "6.83434e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "60", "gb_free": "23.4", "wall": "96754"} +[2022-07-31 13:43:25,466][train_inner][INFO] - {"epoch": 7, "update": 6.287, "loss": "2.2", "ppl": "4.6", "wps": "397065", "ups": "3.35", "wpb": "118478", "bsz": "256", "num_updates": "323600", "lr": "6.83232e-05", "gnorm": "0.977", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "96814"} +[2022-07-31 13:44:25,044][train_inner][INFO] - {"epoch": 7, "update": 6.291, "loss": "2.191", "ppl": "4.57", "wps": "397751", "ups": "3.36", "wpb": "118484", "bsz": "256", "num_updates": "323800", "lr": "6.8303e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "96873"} +[2022-07-31 13:45:24,007][train_inner][INFO] - {"epoch": 7, "update": 6.295, "loss": "2.198", "ppl": "4.59", "wps": "399362", "ups": "3.39", "wpb": "117737", "bsz": "256", "num_updates": "324000", "lr": "6.82828e-05", "gnorm": "0.981", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "96932"} +[2022-07-31 13:46:23,399][train_inner][INFO] - {"epoch": 7, "update": 6.299, "loss": "2.196", "ppl": "4.58", "wps": "398566", "ups": "3.37", "wpb": "118357", "bsz": "256", "num_updates": "324200", "lr": "6.82626e-05", "gnorm": "0.978", "loss_scale": "32", "train_wall": "59", "gb_free": "24.6", "wall": "96992"} +[2022-07-31 13:47:23,204][train_inner][INFO] - {"epoch": 7, "update": 6.303, "loss": "2.198", "ppl": "4.59", "wps": "395005", "ups": "3.34", "wpb": "118117", "bsz": "256", "num_updates": "324400", "lr": "6.82424e-05", "gnorm": "0.984", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "97052"} +[2022-07-31 13:48:23,013][train_inner][INFO] - {"epoch": 7, "update": 6.307, "loss": "2.197", "ppl": "4.59", "wps": "395750", "ups": "3.34", "wpb": "118346", "bsz": "256", "num_updates": "324600", "lr": "6.82222e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "97111"} +[2022-07-31 13:49:22,110][train_inner][INFO] - {"epoch": 7, "update": 6.311, "loss": "2.204", "ppl": "4.61", "wps": "402143", "ups": "3.38", "wpb": "118827", "bsz": "256", "num_updates": "324800", "lr": "6.8202e-05", "gnorm": "0.978", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "97171"} +[2022-07-31 13:50:21,526][train_inner][INFO] - {"epoch": 7, "update": 6.315, "loss": "2.196", "ppl": "4.58", "wps": "397296", "ups": "3.37", "wpb": "118028", "bsz": "256", "num_updates": "325000", "lr": "6.81818e-05", "gnorm": "0.982", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "97230"} +[2022-07-31 13:51:21,006][train_inner][INFO] - {"epoch": 7, "update": 6.318, "loss": "2.195", "ppl": "4.58", "wps": "397296", "ups": "3.36", "wpb": "118155", "bsz": "256", "num_updates": "325200", "lr": "6.81616e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "97289"} +[2022-07-31 13:52:20,634][train_inner][INFO] - {"epoch": 7, "update": 6.322, "loss": "2.197", "ppl": "4.59", "wps": "396841", "ups": "3.35", "wpb": "118314", "bsz": "256", "num_updates": "325400", "lr": "6.81414e-05", "gnorm": "0.978", "loss_scale": "64", "train_wall": "59", "gb_free": "22.7", "wall": "97349"} +[2022-07-31 13:52:24,992][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 13:53:20,010][train_inner][INFO] - {"epoch": 7, "update": 6.326, "loss": "2.201", "ppl": "4.6", "wps": "398632", "ups": "3.37", "wpb": "118344", "bsz": "256", "num_updates": "325600", "lr": "6.81212e-05", "gnorm": "0.981", "loss_scale": "32", "train_wall": "59", "gb_free": "23.2", "wall": "97408"} +[2022-07-31 13:54:19,469][train_inner][INFO] - {"epoch": 7, "update": 6.33, "loss": "2.198", "ppl": "4.59", "wps": "397882", "ups": "3.36", "wpb": "118287", "bsz": "256", "num_updates": "325800", "lr": "6.8101e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "59", "gb_free": "27.3", "wall": "97468"} +[2022-07-31 13:55:18,940][train_inner][INFO] - {"epoch": 7, "update": 6.334, "loss": "2.192", "ppl": "4.57", "wps": "399670", "ups": "3.36", "wpb": "118843", "bsz": "256", "num_updates": "326000", "lr": "6.80808e-05", "gnorm": "0.978", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "97527"} +[2022-07-31 13:56:18,245][train_inner][INFO] - {"epoch": 7, "update": 6.338, "loss": "2.192", "ppl": "4.57", "wps": "399050", "ups": "3.37", "wpb": "118329", "bsz": "256", "num_updates": "326200", "lr": "6.80606e-05", "gnorm": "0.981", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "97587"} +[2022-07-31 13:57:17,604][train_inner][INFO] - {"epoch": 7, "update": 6.342, "loss": "2.199", "ppl": "4.59", "wps": "397827", "ups": "3.37", "wpb": "118071", "bsz": "256", "num_updates": "326400", "lr": "6.80404e-05", "gnorm": "0.982", "loss_scale": "32", "train_wall": "59", "gb_free": "25.8", "wall": "97646"} +[2022-07-31 13:58:17,051][train_inner][INFO] - {"epoch": 7, "update": 6.346, "loss": "2.193", "ppl": "4.57", "wps": "398717", "ups": "3.36", "wpb": "118512", "bsz": "256", "num_updates": "326600", "lr": "6.80202e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "97705"} +[2022-07-31 13:59:16,881][train_inner][INFO] - {"epoch": 7, "update": 6.349, "loss": "2.188", "ppl": "4.56", "wps": "394919", "ups": "3.34", "wpb": "118139", "bsz": "256", "num_updates": "326800", "lr": "6.8e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "97765"} +[2022-07-31 14:00:16,263][train_inner][INFO] - {"epoch": 7, "update": 6.353, "loss": "2.194", "ppl": "4.57", "wps": "399636", "ups": "3.37", "wpb": "118656", "bsz": "256", "num_updates": "327000", "lr": "6.79798e-05", "gnorm": "0.978", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "97825"} +[2022-07-31 14:01:15,773][train_inner][INFO] - {"epoch": 7, "update": 6.357, "loss": "2.196", "ppl": "4.58", "wps": "398314", "ups": "3.36", "wpb": "118517", "bsz": "256", "num_updates": "327200", "lr": "6.79596e-05", "gnorm": "0.981", "loss_scale": "32", "train_wall": "59", "gb_free": "29", "wall": "97884"} +[2022-07-31 14:02:15,141][train_inner][INFO] - {"epoch": 7, "update": 6.361, "loss": "2.195", "ppl": "4.58", "wps": "399252", "ups": "3.37", "wpb": "118512", "bsz": "256", "num_updates": "327400", "lr": "6.79394e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "26.8", "wall": "97944"} +[2022-07-31 14:02:31,553][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 14:03:14,794][train_inner][INFO] - {"epoch": 7, "update": 6.365, "loss": "2.201", "ppl": "4.6", "wps": "393575", "ups": "3.35", "wpb": "117389", "bsz": "256", "num_updates": "327600", "lr": "6.79192e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "98003"} +[2022-07-31 14:04:13,931][train_inner][INFO] - {"epoch": 7, "update": 6.369, "loss": "2.192", "ppl": "4.57", "wps": "399249", "ups": "3.38", "wpb": "118052", "bsz": "256", "num_updates": "327800", "lr": "6.7899e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "98062"} +[2022-07-31 14:05:13,551][train_inner][INFO] - {"epoch": 7, "update": 6.373, "loss": "2.193", "ppl": "4.57", "wps": "397185", "ups": "3.35", "wpb": "118400", "bsz": "256", "num_updates": "328000", "lr": "6.78788e-05", "gnorm": "0.979", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "98122"} +[2022-07-31 14:06:13,061][train_inner][INFO] - {"epoch": 7, "update": 6.377, "loss": "2.198", "ppl": "4.59", "wps": "397272", "ups": "3.36", "wpb": "118207", "bsz": "256", "num_updates": "328200", "lr": "6.78586e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "98181"} +[2022-07-31 14:07:12,438][train_inner][INFO] - {"epoch": 7, "update": 6.381, "loss": "2.194", "ppl": "4.58", "wps": "398556", "ups": "3.37", "wpb": "118324", "bsz": "256", "num_updates": "328400", "lr": "6.78384e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "98241"} +[2022-07-31 14:08:11,587][train_inner][INFO] - {"epoch": 7, "update": 6.384, "loss": "2.201", "ppl": "4.6", "wps": "399438", "ups": "3.38", "wpb": "118132", "bsz": "256", "num_updates": "328600", "lr": "6.78182e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "22.5", "wall": "98300"} +[2022-07-31 14:09:10,728][train_inner][INFO] - {"epoch": 7, "update": 6.388, "loss": "2.197", "ppl": "4.58", "wps": "397390", "ups": "3.38", "wpb": "117510", "bsz": "256", "num_updates": "328800", "lr": "6.7798e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "98359"} +[2022-07-31 14:10:10,531][train_inner][INFO] - {"epoch": 7, "update": 6.392, "loss": "2.196", "ppl": "4.58", "wps": "393816", "ups": "3.34", "wpb": "117756", "bsz": "256", "num_updates": "329000", "lr": "6.77778e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "98419"} +[2022-07-31 14:11:09,885][train_inner][INFO] - {"epoch": 7, "update": 6.396, "loss": "2.196", "ppl": "4.58", "wps": "397394", "ups": "3.37", "wpb": "117933", "bsz": "256", "num_updates": "329200", "lr": "6.77576e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "98478"} +[2022-07-31 14:12:09,446][train_inner][INFO] - {"epoch": 7, "update": 6.4, "loss": "2.193", "ppl": "4.57", "wps": "396178", "ups": "3.36", "wpb": "117983", "bsz": "256", "num_updates": "329400", "lr": "6.77374e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "24.3", "wall": "98538"} +[2022-07-31 14:13:09,079][train_inner][INFO] - {"epoch": 7, "update": 6.404, "loss": "2.191", "ppl": "4.57", "wps": "396732", "ups": "3.35", "wpb": "118290", "bsz": "256", "num_updates": "329600", "lr": "6.77172e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "59", "gb_free": "26.9", "wall": "98598"} +[2022-07-31 14:14:08,429][train_inner][INFO] - {"epoch": 7, "update": 6.408, "loss": "2.19", "ppl": "4.56", "wps": "397855", "ups": "3.37", "wpb": "118063", "bsz": "256", "num_updates": "329800", "lr": "6.7697e-05", "gnorm": "0.982", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "98657"} +[2022-07-31 14:15:08,127][train_inner][INFO] - {"epoch": 7, "update": 6.412, "loss": "2.186", "ppl": "4.55", "wps": "397834", "ups": "3.35", "wpb": "118749", "bsz": "256", "num_updates": "330000", "lr": "6.76768e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "98717"} +[2022-07-31 14:16:07,801][train_inner][INFO] - {"epoch": 7, "update": 6.416, "loss": "2.188", "ppl": "4.56", "wps": "397732", "ups": "3.35", "wpb": "118670", "bsz": "256", "num_updates": "330200", "lr": "6.76566e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "98776"} +[2022-07-31 14:17:07,298][train_inner][INFO] - {"epoch": 7, "update": 6.419, "loss": "2.2", "ppl": "4.59", "wps": "397417", "ups": "3.36", "wpb": "118225", "bsz": "256", "num_updates": "330400", "lr": "6.76364e-05", "gnorm": "0.982", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "98836"} +[2022-07-31 14:18:06,563][train_inner][INFO] - {"epoch": 7, "update": 6.423, "loss": "2.193", "ppl": "4.57", "wps": "397710", "ups": "3.37", "wpb": "117850", "bsz": "256", "num_updates": "330600", "lr": "6.76162e-05", "gnorm": "0.984", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "98895"} +[2022-07-31 14:19:05,747][train_inner][INFO] - {"epoch": 7, "update": 6.427, "loss": "2.192", "ppl": "4.57", "wps": "398843", "ups": "3.38", "wpb": "118026", "bsz": "256", "num_updates": "330800", "lr": "6.7596e-05", "gnorm": "0.981", "loss_scale": "32", "train_wall": "59", "gb_free": "27.6", "wall": "98954"} +[2022-07-31 14:20:05,308][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 14:20:05,598][train_inner][INFO] - {"epoch": 7, "update": 6.431, "loss": "2.193", "ppl": "4.57", "wps": "394845", "ups": "3.34", "wpb": "118158", "bsz": "256", "num_updates": "331000", "lr": "6.75758e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "60", "gb_free": "21.6", "wall": "99014"} +[2022-07-31 14:21:05,469][train_inner][INFO] - {"epoch": 7, "update": 6.435, "loss": "2.197", "ppl": "4.59", "wps": "394277", "ups": "3.34", "wpb": "118030", "bsz": "256", "num_updates": "331200", "lr": "6.75556e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "60", "gb_free": "22.9", "wall": "99074"} +[2022-07-31 14:22:04,788][train_inner][INFO] - {"epoch": 7, "update": 6.439, "loss": "2.19", "ppl": "4.56", "wps": "398756", "ups": "3.37", "wpb": "118268", "bsz": "256", "num_updates": "331400", "lr": "6.75354e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "99133"} +[2022-07-31 14:23:04,596][train_inner][INFO] - {"epoch": 7, "update": 6.443, "loss": "2.184", "ppl": "4.54", "wps": "396713", "ups": "3.34", "wpb": "118632", "bsz": "256", "num_updates": "331600", "lr": "6.75152e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "23.6", "wall": "99193"} +[2022-07-31 14:24:04,136][train_inner][INFO] - {"epoch": 7, "update": 6.447, "loss": "2.19", "ppl": "4.56", "wps": "396578", "ups": "3.36", "wpb": "118061", "bsz": "256", "num_updates": "331800", "lr": "6.74949e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "99253"} +[2022-07-31 14:25:03,293][train_inner][INFO] - {"epoch": 7, "update": 6.451, "loss": "2.193", "ppl": "4.57", "wps": "398476", "ups": "3.38", "wpb": "117863", "bsz": "256", "num_updates": "332000", "lr": "6.74747e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "99312"} +[2022-07-31 14:26:02,784][train_inner][INFO] - {"epoch": 7, "update": 6.454, "loss": "2.192", "ppl": "4.57", "wps": "398227", "ups": "3.36", "wpb": "118454", "bsz": "256", "num_updates": "332200", "lr": "6.74545e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "99371"} +[2022-07-31 14:27:02,015][train_inner][INFO] - {"epoch": 7, "update": 6.458, "loss": "2.196", "ppl": "4.58", "wps": "398072", "ups": "3.38", "wpb": "117891", "bsz": "256", "num_updates": "332400", "lr": "6.74343e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "99430"} +[2022-07-31 14:28:01,099][train_inner][INFO] - {"epoch": 7, "update": 6.462, "loss": "2.192", "ppl": "4.57", "wps": "400261", "ups": "3.39", "wpb": "118244", "bsz": "256", "num_updates": "332600", "lr": "6.74141e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "25.5", "wall": "99490"} +[2022-07-31 14:29:00,414][train_inner][INFO] - {"epoch": 7, "update": 6.466, "loss": "2.197", "ppl": "4.59", "wps": "398298", "ups": "3.37", "wpb": "118124", "bsz": "256", "num_updates": "332800", "lr": "6.73939e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "99549"} +[2022-07-31 14:29:59,761][train_inner][INFO] - {"epoch": 7, "update": 6.47, "loss": "2.189", "ppl": "4.56", "wps": "396735", "ups": "3.37", "wpb": "117723", "bsz": "256", "num_updates": "333000", "lr": "6.73737e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "99608"} +[2022-07-31 14:30:59,061][train_inner][INFO] - {"epoch": 7, "update": 6.474, "loss": "2.183", "ppl": "4.54", "wps": "400721", "ups": "3.37", "wpb": "118813", "bsz": "255.9", "num_updates": "333200", "lr": "6.73535e-05", "gnorm": "0.979", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "99667"} +[2022-07-31 14:31:58,705][train_inner][INFO] - {"epoch": 7, "update": 6.478, "loss": "2.194", "ppl": "4.57", "wps": "396458", "ups": "3.35", "wpb": "118232", "bsz": "256", "num_updates": "333400", "lr": "6.73333e-05", "gnorm": "0.985", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "99727"} +[2022-07-31 14:32:57,965][train_inner][INFO] - {"epoch": 7, "update": 6.482, "loss": "2.194", "ppl": "4.57", "wps": "400078", "ups": "3.38", "wpb": "118541", "bsz": "256", "num_updates": "333600", "lr": "6.73131e-05", "gnorm": "0.982", "loss_scale": "32", "train_wall": "59", "gb_free": "25.1", "wall": "99786"} +[2022-07-31 14:33:57,507][train_inner][INFO] - {"epoch": 7, "update": 6.485, "loss": "2.179", "ppl": "4.53", "wps": "399763", "ups": "3.36", "wpb": "119014", "bsz": "256", "num_updates": "333800", "lr": "6.72929e-05", "gnorm": "0.977", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "99846"} +[2022-07-31 14:34:04,234][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 14:34:56,837][train_inner][INFO] - {"epoch": 7, "update": 6.489, "loss": "2.195", "ppl": "4.58", "wps": "397857", "ups": "3.37", "wpb": "118024", "bsz": "256", "num_updates": "334000", "lr": "6.72727e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "99905"} +[2022-07-31 14:35:56,394][train_inner][INFO] - {"epoch": 7, "update": 6.493, "loss": "2.197", "ppl": "4.58", "wps": "396003", "ups": "3.36", "wpb": "117923", "bsz": "256", "num_updates": "334200", "lr": "6.72525e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "31.2", "wall": "99965"} +[2022-07-31 14:36:55,896][train_inner][INFO] - {"epoch": 7, "update": 6.497, "loss": "2.195", "ppl": "4.58", "wps": "398832", "ups": "3.36", "wpb": "118654", "bsz": "256", "num_updates": "334400", "lr": "6.72323e-05", "gnorm": "0.981", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "100024"} +[2022-07-31 14:37:55,042][train_inner][INFO] - {"epoch": 7, "update": 6.501, "loss": "2.191", "ppl": "4.57", "wps": "399920", "ups": "3.38", "wpb": "118269", "bsz": "256", "num_updates": "334600", "lr": "6.72121e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "100083"} +[2022-07-31 14:38:54,418][train_inner][INFO] - {"epoch": 7, "update": 6.505, "loss": "2.188", "ppl": "4.56", "wps": "398732", "ups": "3.37", "wpb": "118374", "bsz": "256", "num_updates": "334800", "lr": "6.71919e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "100143"} +[2022-07-31 14:39:54,237][train_inner][INFO] - {"epoch": 7, "update": 6.509, "loss": "2.189", "ppl": "4.56", "wps": "395293", "ups": "3.34", "wpb": "118230", "bsz": "256", "num_updates": "335000", "lr": "6.71717e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "25.1", "wall": "100203"} +[2022-07-31 14:40:53,865][train_inner][INFO] - {"epoch": 7, "update": 6.513, "loss": "2.191", "ppl": "4.57", "wps": "395505", "ups": "3.35", "wpb": "117915", "bsz": "256", "num_updates": "335200", "lr": "6.71515e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "100262"} +[2022-07-31 14:40:56,821][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 14:41:54,065][train_inner][INFO] - {"epoch": 7, "update": 6.517, "loss": "2.194", "ppl": "4.58", "wps": "392621", "ups": "3.32", "wpb": "118178", "bsz": "256", "num_updates": "335400", "lr": "6.71313e-05", "gnorm": "0.983", "loss_scale": "8", "train_wall": "60", "gb_free": "21.3", "wall": "100322"} +[2022-07-31 14:42:53,254][train_inner][INFO] - {"epoch": 7, "update": 6.52, "loss": "2.194", "ppl": "4.57", "wps": "396148", "ups": "3.38", "wpb": "117236", "bsz": "256", "num_updates": "335600", "lr": "6.71111e-05", "gnorm": "0.989", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "100382"} +[2022-07-31 14:43:52,494][train_inner][INFO] - {"epoch": 7, "update": 6.524, "loss": "2.189", "ppl": "4.56", "wps": "400690", "ups": "3.38", "wpb": "118684", "bsz": "256", "num_updates": "335800", "lr": "6.70909e-05", "gnorm": "0.982", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "100441"} +[2022-07-31 14:44:53,034][train_inner][INFO] - {"epoch": 7, "update": 6.528, "loss": "2.187", "ppl": "4.55", "wps": "392383", "ups": "3.3", "wpb": "118774", "bsz": "256", "num_updates": "336000", "lr": "6.70707e-05", "gnorm": "0.981", "loss_scale": "8", "train_wall": "60", "gb_free": "23", "wall": "100501"} +[2022-07-31 14:45:52,601][train_inner][INFO] - {"epoch": 7, "update": 6.532, "loss": "2.192", "ppl": "4.57", "wps": "396457", "ups": "3.36", "wpb": "118078", "bsz": "256", "num_updates": "336200", "lr": "6.70505e-05", "gnorm": "0.987", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "100561"} +[2022-07-31 14:46:52,147][train_inner][INFO] - {"epoch": 7, "update": 6.536, "loss": "2.191", "ppl": "4.57", "wps": "395564", "ups": "3.36", "wpb": "117771", "bsz": "256", "num_updates": "336400", "lr": "6.70303e-05", "gnorm": "0.988", "loss_scale": "8", "train_wall": "59", "gb_free": "24.4", "wall": "100621"} +[2022-07-31 14:47:51,618][train_inner][INFO] - {"epoch": 7, "update": 6.54, "loss": "2.183", "ppl": "4.54", "wps": "397474", "ups": "3.36", "wpb": "118190", "bsz": "256", "num_updates": "336600", "lr": "6.70101e-05", "gnorm": "0.983", "loss_scale": "8", "train_wall": "59", "gb_free": "27.3", "wall": "100680"} +[2022-07-31 14:48:51,329][train_inner][INFO] - {"epoch": 7, "update": 6.544, "loss": "2.192", "ppl": "4.57", "wps": "398472", "ups": "3.35", "wpb": "118964", "bsz": "256", "num_updates": "336800", "lr": "6.69899e-05", "gnorm": "0.98", "loss_scale": "8", "train_wall": "59", "gb_free": "24.1", "wall": "100740"} +[2022-07-31 14:49:50,297][train_inner][INFO] - {"epoch": 7, "update": 6.548, "loss": "2.19", "ppl": "4.56", "wps": "400227", "ups": "3.39", "wpb": "118002", "bsz": "256", "num_updates": "337000", "lr": "6.69697e-05", "gnorm": "0.986", "loss_scale": "8", "train_wall": "59", "gb_free": "27.1", "wall": "100799"} +[2022-07-31 14:50:50,279][train_inner][INFO] - {"epoch": 7, "update": 6.552, "loss": "2.185", "ppl": "4.55", "wps": "395455", "ups": "3.33", "wpb": "118600", "bsz": "256", "num_updates": "337200", "lr": "6.69495e-05", "gnorm": "0.982", "loss_scale": "8", "train_wall": "60", "gb_free": "23.8", "wall": "100859"} +[2022-07-31 14:51:50,068][train_inner][INFO] - {"epoch": 7, "update": 6.555, "loss": "2.19", "ppl": "4.56", "wps": "397027", "ups": "3.35", "wpb": "118688", "bsz": "256", "num_updates": "337400", "lr": "6.69293e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "100918"} +[2022-07-31 14:52:49,396][train_inner][INFO] - {"epoch": 7, "update": 6.559, "loss": "2.184", "ppl": "4.54", "wps": "398612", "ups": "3.37", "wpb": "118245", "bsz": "256", "num_updates": "337600", "lr": "6.69091e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "26.6", "wall": "100978"} +[2022-07-31 14:53:48,804][train_inner][INFO] - {"epoch": 7, "update": 6.563, "loss": "2.187", "ppl": "4.55", "wps": "399182", "ups": "3.37", "wpb": "118572", "bsz": "256", "num_updates": "337800", "lr": "6.68889e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "23.6", "wall": "101037"} +[2022-07-31 14:54:48,213][train_inner][INFO] - {"epoch": 7, "update": 6.567, "loss": "2.195", "ppl": "4.58", "wps": "397393", "ups": "3.37", "wpb": "118043", "bsz": "256", "num_updates": "338000", "lr": "6.68687e-05", "gnorm": "0.997", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "101097"} +[2022-07-31 14:55:47,911][train_inner][INFO] - {"epoch": 7, "update": 6.571, "loss": "2.183", "ppl": "4.54", "wps": "397282", "ups": "3.35", "wpb": "118582", "bsz": "256", "num_updates": "338200", "lr": "6.68485e-05", "gnorm": "0.983", "loss_scale": "16", "train_wall": "59", "gb_free": "24.3", "wall": "101156"} +[2022-07-31 14:56:47,831][train_inner][INFO] - {"epoch": 7, "update": 6.575, "loss": "2.185", "ppl": "4.55", "wps": "394979", "ups": "3.34", "wpb": "118335", "bsz": "256", "num_updates": "338400", "lr": "6.68283e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "60", "gb_free": "22.4", "wall": "101216"} +[2022-07-31 14:57:47,263][train_inner][INFO] - {"epoch": 7, "update": 6.579, "loss": "2.19", "ppl": "4.56", "wps": "397314", "ups": "3.37", "wpb": "118065", "bsz": "256", "num_updates": "338600", "lr": "6.68081e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "26.5", "wall": "101276"} +[2022-07-31 14:58:46,553][train_inner][INFO] - {"epoch": 7, "update": 6.583, "loss": "2.189", "ppl": "4.56", "wps": "396791", "ups": "3.37", "wpb": "117628", "bsz": "256", "num_updates": "338800", "lr": "6.67879e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "101335"} +[2022-07-31 14:59:46,101][train_inner][INFO] - {"epoch": 7, "update": 6.587, "loss": "2.191", "ppl": "4.57", "wps": "394630", "ups": "3.36", "wpb": "117497", "bsz": "256", "num_updates": "339000", "lr": "6.67677e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "101395"} +[2022-07-31 15:00:45,746][train_inner][INFO] - {"epoch": 7, "update": 6.59, "loss": "2.186", "ppl": "4.55", "wps": "397492", "ups": "3.35", "wpb": "118540", "bsz": "256", "num_updates": "339200", "lr": "6.67475e-05", "gnorm": "0.982", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "101454"} +[2022-07-31 15:01:44,949][train_inner][INFO] - {"epoch": 7, "update": 6.594, "loss": "2.182", "ppl": "4.54", "wps": "402398", "ups": "3.38", "wpb": "119114", "bsz": "256", "num_updates": "339400", "lr": "6.67273e-05", "gnorm": "0.98", "loss_scale": "32", "train_wall": "59", "gb_free": "26.2", "wall": "101513"} +[2022-07-31 15:02:44,868][train_inner][INFO] - {"epoch": 7, "update": 6.598, "loss": "2.189", "ppl": "4.56", "wps": "393778", "ups": "3.34", "wpb": "117974", "bsz": "256", "num_updates": "339600", "lr": "6.67071e-05", "gnorm": "0.985", "loss_scale": "32", "train_wall": "60", "gb_free": "21.7", "wall": "101573"} +[2022-07-31 15:03:43,938][train_inner][INFO] - {"epoch": 7, "update": 6.602, "loss": "2.189", "ppl": "4.56", "wps": "400683", "ups": "3.39", "wpb": "118342", "bsz": "256", "num_updates": "339800", "lr": "6.66869e-05", "gnorm": "0.986", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "101632"} +[2022-07-31 15:04:43,316][train_inner][INFO] - {"epoch": 7, "update": 6.606, "loss": "2.185", "ppl": "4.55", "wps": "399251", "ups": "3.37", "wpb": "118532", "bsz": "256", "num_updates": "340000", "lr": "6.66667e-05", "gnorm": "0.981", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "101692"} +[2022-07-31 15:05:43,009][train_inner][INFO] - {"epoch": 7, "update": 6.61, "loss": "2.185", "ppl": "4.55", "wps": "398536", "ups": "3.35", "wpb": "118948", "bsz": "256", "num_updates": "340200", "lr": "6.66465e-05", "gnorm": "0.982", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "101751"} +[2022-07-31 15:06:11,798][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 15:06:42,807][train_inner][INFO] - {"epoch": 7, "update": 6.614, "loss": "2.19", "ppl": "4.56", "wps": "396242", "ups": "3.34", "wpb": "118473", "bsz": "256", "num_updates": "340400", "lr": "6.66263e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "25.1", "wall": "101811"} +[2022-07-31 15:07:42,511][train_inner][INFO] - {"epoch": 7, "update": 6.618, "loss": "2.184", "ppl": "4.54", "wps": "397888", "ups": "3.35", "wpb": "118775", "bsz": "256", "num_updates": "340600", "lr": "6.66061e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "101871"} +[2022-07-31 15:08:41,635][train_inner][INFO] - {"epoch": 7, "update": 6.622, "loss": "2.186", "ppl": "4.55", "wps": "400531", "ups": "3.38", "wpb": "118406", "bsz": "256", "num_updates": "340800", "lr": "6.65859e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "101930"} +[2022-07-31 15:09:40,943][train_inner][INFO] - {"epoch": 7, "update": 6.625, "loss": "2.188", "ppl": "4.56", "wps": "399436", "ups": "3.37", "wpb": "118447", "bsz": "256", "num_updates": "341000", "lr": "6.65657e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "101989"} +[2022-07-31 15:10:40,638][train_inner][INFO] - {"epoch": 7, "update": 6.629, "loss": "2.189", "ppl": "4.56", "wps": "395525", "ups": "3.35", "wpb": "118054", "bsz": "256", "num_updates": "341200", "lr": "6.65455e-05", "gnorm": "0.996", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "102049"} +[2022-07-31 15:11:40,123][train_inner][INFO] - {"epoch": 7, "update": 6.633, "loss": "2.194", "ppl": "4.57", "wps": "395929", "ups": "3.36", "wpb": "117757", "bsz": "256", "num_updates": "341400", "lr": "6.65253e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "102109"} +[2022-07-31 15:12:39,560][train_inner][INFO] - {"epoch": 7, "update": 6.637, "loss": "2.182", "ppl": "4.54", "wps": "399781", "ups": "3.36", "wpb": "118808", "bsz": "256", "num_updates": "341600", "lr": "6.65051e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "25.4", "wall": "102168"} +[2022-07-31 15:13:07,512][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 15:13:39,401][train_inner][INFO] - {"epoch": 7, "update": 6.641, "loss": "2.183", "ppl": "4.54", "wps": "396810", "ups": "3.34", "wpb": "118727", "bsz": "256", "num_updates": "341800", "lr": "6.64848e-05", "gnorm": "0.984", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "102228"} +[2022-07-31 15:14:38,685][train_inner][INFO] - {"epoch": 7, "update": 6.645, "loss": "2.189", "ppl": "4.56", "wps": "399058", "ups": "3.37", "wpb": "118288", "bsz": "256", "num_updates": "342000", "lr": "6.64646e-05", "gnorm": "0.989", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "102287"} +[2022-07-31 15:15:38,296][train_inner][INFO] - {"epoch": 7, "update": 6.649, "loss": "2.184", "ppl": "4.55", "wps": "397043", "ups": "3.36", "wpb": "118341", "bsz": "256", "num_updates": "342200", "lr": "6.64444e-05", "gnorm": "0.984", "loss_scale": "8", "train_wall": "59", "gb_free": "23.1", "wall": "102347"} +[2022-07-31 15:16:37,504][train_inner][INFO] - {"epoch": 7, "update": 6.653, "loss": "2.191", "ppl": "4.57", "wps": "397192", "ups": "3.38", "wpb": "117585", "bsz": "256", "num_updates": "342400", "lr": "6.64242e-05", "gnorm": "0.99", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "102406"} +[2022-07-31 15:17:37,216][train_inner][INFO] - {"epoch": 7, "update": 6.656, "loss": "2.185", "ppl": "4.55", "wps": "399400", "ups": "3.35", "wpb": "119244", "bsz": "256", "num_updates": "342600", "lr": "6.6404e-05", "gnorm": "0.981", "loss_scale": "8", "train_wall": "59", "gb_free": "23.8", "wall": "102466"} +[2022-07-31 15:18:36,478][train_inner][INFO] - {"epoch": 7, "update": 6.66, "loss": "2.194", "ppl": "4.57", "wps": "397411", "ups": "3.37", "wpb": "117755", "bsz": "256", "num_updates": "342800", "lr": "6.63838e-05", "gnorm": "0.99", "loss_scale": "8", "train_wall": "59", "gb_free": "27.3", "wall": "102525"} +[2022-07-31 15:19:35,736][train_inner][INFO] - {"epoch": 7, "update": 6.664, "loss": "2.185", "ppl": "4.55", "wps": "399282", "ups": "3.38", "wpb": "118302", "bsz": "256", "num_updates": "343000", "lr": "6.63636e-05", "gnorm": "0.995", "loss_scale": "8", "train_wall": "59", "gb_free": "24", "wall": "102584"} +[2022-07-31 15:20:35,240][train_inner][INFO] - {"epoch": 7, "update": 6.668, "loss": "2.185", "ppl": "4.55", "wps": "398297", "ups": "3.36", "wpb": "118502", "bsz": "256", "num_updates": "343200", "lr": "6.63434e-05", "gnorm": "0.987", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "102644"} +[2022-07-31 15:21:34,877][train_inner][INFO] - {"epoch": 7, "update": 6.672, "loss": "2.177", "ppl": "4.52", "wps": "395662", "ups": "3.35", "wpb": "117979", "bsz": "256", "num_updates": "343400", "lr": "6.63232e-05", "gnorm": "0.986", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "102703"} +[2022-07-31 15:22:34,486][train_inner][INFO] - {"epoch": 7, "update": 6.676, "loss": "2.177", "ppl": "4.52", "wps": "399132", "ups": "3.36", "wpb": "118959", "bsz": "256", "num_updates": "343600", "lr": "6.6303e-05", "gnorm": "0.981", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "102763"} +[2022-07-31 15:23:33,908][train_inner][INFO] - {"epoch": 7, "update": 6.68, "loss": "2.189", "ppl": "4.56", "wps": "399473", "ups": "3.37", "wpb": "118686", "bsz": "256", "num_updates": "343800", "lr": "6.62828e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "25.8", "wall": "102822"} +[2022-07-31 15:24:33,225][train_inner][INFO] - {"epoch": 7, "update": 6.684, "loss": "2.188", "ppl": "4.56", "wps": "400960", "ups": "3.37", "wpb": "118918", "bsz": "256", "num_updates": "344000", "lr": "6.62626e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "102882"} +[2022-07-31 15:25:32,592][train_inner][INFO] - {"epoch": 7, "update": 6.688, "loss": "2.19", "ppl": "4.56", "wps": "398246", "ups": "3.37", "wpb": "118211", "bsz": "256", "num_updates": "344200", "lr": "6.62424e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "102941"} +[2022-07-31 15:26:31,924][train_inner][INFO] - {"epoch": 7, "update": 6.691, "loss": "2.178", "ppl": "4.53", "wps": "399636", "ups": "3.37", "wpb": "118557", "bsz": "256", "num_updates": "344400", "lr": "6.62222e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "103000"} +[2022-07-31 15:27:31,462][train_inner][INFO] - {"epoch": 7, "update": 6.695, "loss": "2.179", "ppl": "4.53", "wps": "397654", "ups": "3.36", "wpb": "118376", "bsz": "256", "num_updates": "344600", "lr": "6.6202e-05", "gnorm": "0.99", "loss_scale": "16", "train_wall": "59", "gb_free": "25", "wall": "103060"} +[2022-07-31 15:28:30,806][train_inner][INFO] - {"epoch": 7, "update": 6.699, "loss": "2.184", "ppl": "4.55", "wps": "398068", "ups": "3.37", "wpb": "118114", "bsz": "256", "num_updates": "344800", "lr": "6.61818e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "26.6", "wall": "103119"} +[2022-07-31 15:29:30,111][train_inner][INFO] - {"epoch": 7, "update": 6.703, "loss": "2.188", "ppl": "4.56", "wps": "396944", "ups": "3.37", "wpb": "117703", "bsz": "256", "num_updates": "345000", "lr": "6.61616e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "103179"} +[2022-07-31 15:30:29,608][train_inner][INFO] - {"epoch": 7, "update": 6.707, "loss": "2.188", "ppl": "4.56", "wps": "397189", "ups": "3.36", "wpb": "118157", "bsz": "256", "num_updates": "345200", "lr": "6.61414e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "24.7", "wall": "103238"} +[2022-07-31 15:31:29,152][train_inner][INFO] - {"epoch": 7, "update": 6.711, "loss": "2.182", "ppl": "4.54", "wps": "397097", "ups": "3.36", "wpb": "118223", "bsz": "256", "num_updates": "345400", "lr": "6.61212e-05", "gnorm": "0.986", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "103298"} +[2022-07-31 15:32:28,611][train_inner][INFO] - {"epoch": 7, "update": 6.715, "loss": "2.189", "ppl": "4.56", "wps": "396811", "ups": "3.36", "wpb": "117970", "bsz": "256", "num_updates": "345600", "lr": "6.6101e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "59", "gb_free": "25.6", "wall": "103357"} +[2022-07-31 15:33:28,114][train_inner][INFO] - {"epoch": 7, "update": 6.719, "loss": "2.183", "ppl": "4.54", "wps": "397396", "ups": "3.36", "wpb": "118229", "bsz": "256", "num_updates": "345800", "lr": "6.60808e-05", "gnorm": "0.987", "loss_scale": "32", "train_wall": "59", "gb_free": "26.2", "wall": "103417"} +[2022-07-31 15:34:27,848][train_inner][INFO] - {"epoch": 7, "update": 6.723, "loss": "2.186", "ppl": "4.55", "wps": "394745", "ups": "3.35", "wpb": "117897", "bsz": "256", "num_updates": "346000", "lr": "6.60606e-05", "gnorm": "0.986", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "103476"} +[2022-07-31 15:35:27,062][train_inner][INFO] - {"epoch": 7, "update": 6.726, "loss": "2.18", "ppl": "4.53", "wps": "401802", "ups": "3.38", "wpb": "118961", "bsz": "256", "num_updates": "346200", "lr": "6.60404e-05", "gnorm": "0.985", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "103535"} +[2022-07-31 15:36:26,214][train_inner][INFO] - {"epoch": 7, "update": 6.73, "loss": "2.186", "ppl": "4.55", "wps": "400350", "ups": "3.38", "wpb": "118408", "bsz": "256", "num_updates": "346400", "lr": "6.60202e-05", "gnorm": "0.983", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "103595"} +[2022-07-31 15:37:25,879][train_inner][INFO] - {"epoch": 7, "update": 6.734, "loss": "2.184", "ppl": "4.54", "wps": "396362", "ups": "3.35", "wpb": "118243", "bsz": "256", "num_updates": "346600", "lr": "6.6e-05", "gnorm": "0.989", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "103654"} +[2022-07-31 15:38:25,484][train_inner][INFO] - {"epoch": 7, "update": 6.738, "loss": "2.19", "ppl": "4.56", "wps": "396720", "ups": "3.36", "wpb": "118232", "bsz": "256", "num_updates": "346800", "lr": "6.59798e-05", "gnorm": "0.988", "loss_scale": "32", "train_wall": "59", "gb_free": "25.1", "wall": "103714"} +[2022-07-31 15:39:25,115][train_inner][INFO] - {"epoch": 7, "update": 6.742, "loss": "2.181", "ppl": "4.53", "wps": "396566", "ups": "3.35", "wpb": "118237", "bsz": "256", "num_updates": "347000", "lr": "6.59596e-05", "gnorm": "0.985", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "103774"} +[2022-07-31 15:40:24,618][train_inner][INFO] - {"epoch": 7, "update": 6.746, "loss": "2.181", "ppl": "4.54", "wps": "396883", "ups": "3.36", "wpb": "118079", "bsz": "256", "num_updates": "347200", "lr": "6.59394e-05", "gnorm": "0.988", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "103833"} +[2022-07-31 15:41:09,703][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 15:41:24,675][train_inner][INFO] - {"epoch": 7, "update": 6.75, "loss": "2.183", "ppl": "4.54", "wps": "396437", "ups": "3.33", "wpb": "119043", "bsz": "256", "num_updates": "347400", "lr": "6.59192e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "60", "gb_free": "23.6", "wall": "103893"} +[2022-07-31 15:42:23,911][train_inner][INFO] - {"epoch": 7, "update": 6.754, "loss": "2.184", "ppl": "4.54", "wps": "398470", "ups": "3.38", "wpb": "118019", "bsz": "256", "num_updates": "347600", "lr": "6.5899e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "59", "gb_free": "25.9", "wall": "103952"} +[2022-07-31 15:43:22,768][train_inner][INFO] - {"epoch": 7, "update": 6.757, "loss": "2.182", "ppl": "4.54", "wps": "401380", "ups": "3.4", "wpb": "118118", "bsz": "256", "num_updates": "347800", "lr": "6.58788e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "104011"} +[2022-07-31 15:44:22,408][train_inner][INFO] - {"epoch": 7, "update": 6.761, "loss": "2.18", "ppl": "4.53", "wps": "397074", "ups": "3.35", "wpb": "118407", "bsz": "256", "num_updates": "348000", "lr": "6.58586e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "24.1", "wall": "104071"} +[2022-07-31 15:45:21,850][train_inner][INFO] - {"epoch": 7, "update": 6.765, "loss": "2.184", "ppl": "4.54", "wps": "397383", "ups": "3.36", "wpb": "118106", "bsz": "256", "num_updates": "348200", "lr": "6.58384e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "26.4", "wall": "104130"} +[2022-07-31 15:46:21,134][train_inner][INFO] - {"epoch": 7, "update": 6.769, "loss": "2.184", "ppl": "4.55", "wps": "399662", "ups": "3.37", "wpb": "118466", "bsz": "256", "num_updates": "348400", "lr": "6.58182e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "104190"} +[2022-07-31 15:47:20,942][train_inner][INFO] - {"epoch": 7, "update": 6.773, "loss": "2.179", "ppl": "4.53", "wps": "396671", "ups": "3.34", "wpb": "118621", "bsz": "256", "num_updates": "348600", "lr": "6.5798e-05", "gnorm": "0.985", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "104249"} +[2022-07-31 15:48:20,552][train_inner][INFO] - {"epoch": 7, "update": 6.777, "loss": "2.177", "ppl": "4.52", "wps": "395900", "ups": "3.36", "wpb": "117996", "bsz": "256", "num_updates": "348800", "lr": "6.57778e-05", "gnorm": "0.99", "loss_scale": "16", "train_wall": "59", "gb_free": "24.5", "wall": "104309"} +[2022-07-31 15:49:19,788][train_inner][INFO] - {"epoch": 7, "update": 6.781, "loss": "2.18", "ppl": "4.53", "wps": "399284", "ups": "3.38", "wpb": "118259", "bsz": "256", "num_updates": "349000", "lr": "6.57576e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "23.7", "wall": "104368"} +[2022-07-31 15:50:19,033][train_inner][INFO] - {"epoch": 7, "update": 6.785, "loss": "2.178", "ppl": "4.52", "wps": "401051", "ups": "3.38", "wpb": "118801", "bsz": "256", "num_updates": "349200", "lr": "6.57374e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "104427"} +[2022-07-31 15:51:19,508][train_inner][INFO] - {"epoch": 7, "update": 6.789, "loss": "2.179", "ppl": "4.53", "wps": "391896", "ups": "3.31", "wpb": "118500", "bsz": "256", "num_updates": "349400", "lr": "6.57172e-05", "gnorm": "0.992", "loss_scale": "32", "train_wall": "60", "gb_free": "23.9", "wall": "104488"} +[2022-07-31 15:52:19,035][train_inner][INFO] - {"epoch": 7, "update": 6.792, "loss": "2.178", "ppl": "4.52", "wps": "395436", "ups": "3.36", "wpb": "117693", "bsz": "256", "num_updates": "349600", "lr": "6.5697e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "59", "gb_free": "24.5", "wall": "104547"} +[2022-07-31 15:53:18,945][train_inner][INFO] - {"epoch": 7, "update": 6.796, "loss": "2.185", "ppl": "4.55", "wps": "394616", "ups": "3.34", "wpb": "118207", "bsz": "256", "num_updates": "349800", "lr": "6.56768e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "60", "gb_free": "23", "wall": "104607"} +[2022-07-31 15:54:18,644][train_inner][INFO] - {"epoch": 7, "update": 6.8, "loss": "2.181", "ppl": "4.53", "wps": "396806", "ups": "3.35", "wpb": "118444", "bsz": "256", "num_updates": "350000", "lr": "6.56566e-05", "gnorm": "0.988", "loss_scale": "32", "train_wall": "59", "gb_free": "29.2", "wall": "104667"} +[2022-07-31 15:54:18,645][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-31 15:54:41,408][valid][INFO] - {"epoch": 7, "valid_loss": "2.072", "valid_ppl": "4.2", "valid_wps": "1.56698e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "350000", "valid_best_loss": "2.072"} +[2022-07-31 15:54:41,411][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 7 @ 350000 updates +[2022-07-31 15:54:41,412][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_7_350000.pt +[2022-07-31 15:54:50,290][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_7_350000.pt +[2022-07-31 15:55:12,789][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_7_350000.pt (epoch 7 @ 350000 updates, score 2.072) (writing took 31.377559075132012 seconds) +[2022-07-31 15:56:12,101][train_inner][INFO] - {"epoch": 7, "update": 6.804, "loss": "2.174", "ppl": "4.51", "wps": "208956", "ups": "1.76", "wpb": "118537", "bsz": "256", "num_updates": "350200", "lr": "6.56364e-05", "gnorm": "0.987", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "104781"} +[2022-07-31 15:57:11,549][train_inner][INFO] - {"epoch": 7, "update": 6.808, "loss": "2.181", "ppl": "4.53", "wps": "399728", "ups": "3.36", "wpb": "118813", "bsz": "255.9", "num_updates": "350400", "lr": "6.56162e-05", "gnorm": "0.985", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "104840"} +[2022-07-31 15:58:11,050][train_inner][INFO] - {"epoch": 7, "update": 6.812, "loss": "2.184", "ppl": "4.55", "wps": "396772", "ups": "3.36", "wpb": "118042", "bsz": "256", "num_updates": "350600", "lr": "6.5596e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "104899"} +[2022-07-31 15:59:09,907][train_inner][INFO] - {"epoch": 7, "update": 6.816, "loss": "2.187", "ppl": "4.55", "wps": "401013", "ups": "3.4", "wpb": "118010", "bsz": "256", "num_updates": "350800", "lr": "6.55758e-05", "gnorm": "0.993", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "104958"} +[2022-07-31 16:00:09,214][train_inner][INFO] - {"epoch": 7, "update": 6.82, "loss": "2.188", "ppl": "4.56", "wps": "397905", "ups": "3.37", "wpb": "117992", "bsz": "256", "num_updates": "351000", "lr": "6.55556e-05", "gnorm": "0.992", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "105018"} +[2022-07-31 16:01:08,783][train_inner][INFO] - {"epoch": 7, "update": 6.824, "loss": "2.175", "ppl": "4.52", "wps": "398204", "ups": "3.36", "wpb": "118603", "bsz": "256", "num_updates": "351200", "lr": "6.55354e-05", "gnorm": "0.984", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "105077"} +[2022-07-31 16:01:50,381][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 16:02:08,379][train_inner][INFO] - {"epoch": 7, "update": 6.827, "loss": "2.183", "ppl": "4.54", "wps": "396242", "ups": "3.36", "wpb": "118070", "bsz": "256", "num_updates": "351400", "lr": "6.55152e-05", "gnorm": "0.991", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "105137"} +[2022-07-31 16:03:07,741][train_inner][INFO] - {"epoch": 7, "update": 6.831, "loss": "2.18", "ppl": "4.53", "wps": "399725", "ups": "3.37", "wpb": "118643", "bsz": "256", "num_updates": "351600", "lr": "6.54949e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "105196"} +[2022-07-31 16:04:07,292][train_inner][INFO] - {"epoch": 7, "update": 6.835, "loss": "2.182", "ppl": "4.54", "wps": "397167", "ups": "3.36", "wpb": "118257", "bsz": "256", "num_updates": "351800", "lr": "6.54747e-05", "gnorm": "0.991", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "105256"} +[2022-07-31 16:05:06,112][train_inner][INFO] - {"epoch": 7, "update": 6.839, "loss": "2.178", "ppl": "4.53", "wps": "401685", "ups": "3.4", "wpb": "118134", "bsz": "256", "num_updates": "352000", "lr": "6.54545e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "58", "gb_free": "24", "wall": "105315"} +[2022-07-31 16:06:05,166][train_inner][INFO] - {"epoch": 7, "update": 6.843, "loss": "2.183", "ppl": "4.54", "wps": "400545", "ups": "3.39", "wpb": "118269", "bsz": "256", "num_updates": "352200", "lr": "6.54343e-05", "gnorm": "0.998", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "105374"} +[2022-07-31 16:07:04,346][train_inner][INFO] - {"epoch": 7, "update": 6.847, "loss": "2.181", "ppl": "4.53", "wps": "398641", "ups": "3.38", "wpb": "117956", "bsz": "256", "num_updates": "352400", "lr": "6.54141e-05", "gnorm": "0.991", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "105433"} +[2022-07-31 16:08:04,048][train_inner][INFO] - {"epoch": 7, "update": 6.851, "loss": "2.18", "ppl": "4.53", "wps": "398716", "ups": "3.35", "wpb": "119020", "bsz": "256", "num_updates": "352600", "lr": "6.53939e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "105492"} +[2022-07-31 16:09:03,207][train_inner][INFO] - {"epoch": 7, "update": 6.855, "loss": "2.18", "ppl": "4.53", "wps": "398776", "ups": "3.38", "wpb": "117955", "bsz": "256", "num_updates": "352800", "lr": "6.53737e-05", "gnorm": "0.992", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "105552"} +[2022-07-31 16:10:02,509][train_inner][INFO] - {"epoch": 7, "update": 6.859, "loss": "2.184", "ppl": "4.54", "wps": "397208", "ups": "3.37", "wpb": "117775", "bsz": "256", "num_updates": "353000", "lr": "6.53535e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "105611"} +[2022-07-31 16:11:02,227][train_inner][INFO] - {"epoch": 7, "update": 6.862, "loss": "2.181", "ppl": "4.54", "wps": "396740", "ups": "3.35", "wpb": "118462", "bsz": "256", "num_updates": "353200", "lr": "6.53333e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "105671"} +[2022-07-31 16:12:01,775][train_inner][INFO] - {"epoch": 7, "update": 6.866, "loss": "2.18", "ppl": "4.53", "wps": "397588", "ups": "3.36", "wpb": "118377", "bsz": "256", "num_updates": "353400", "lr": "6.53131e-05", "gnorm": "0.989", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "105730"} +[2022-07-31 16:13:01,571][train_inner][INFO] - {"epoch": 7, "update": 6.87, "loss": "2.178", "ppl": "4.53", "wps": "398872", "ups": "3.34", "wpb": "119253", "bsz": "256", "num_updates": "353600", "lr": "6.52929e-05", "gnorm": "0.985", "loss_scale": "32", "train_wall": "59", "gb_free": "29.3", "wall": "105790"} +[2022-07-31 16:14:01,246][train_inner][INFO] - {"epoch": 7, "update": 6.874, "loss": "2.176", "ppl": "4.52", "wps": "397640", "ups": "3.35", "wpb": "118646", "bsz": "256", "num_updates": "353800", "lr": "6.52727e-05", "gnorm": "0.987", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "105850"} +[2022-07-31 16:15:00,807][train_inner][INFO] - {"epoch": 7, "update": 6.878, "loss": "2.178", "ppl": "4.53", "wps": "396926", "ups": "3.36", "wpb": "118205", "bsz": "256", "num_updates": "354000", "lr": "6.52525e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "105909"} +[2022-07-31 16:16:00,501][train_inner][INFO] - {"epoch": 7, "update": 6.882, "loss": "2.18", "ppl": "4.53", "wps": "396974", "ups": "3.35", "wpb": "118485", "bsz": "256", "num_updates": "354200", "lr": "6.52323e-05", "gnorm": "0.988", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "105969"} +[2022-07-31 16:17:00,138][train_inner][INFO] - {"epoch": 7, "update": 6.886, "loss": "2.18", "ppl": "4.53", "wps": "399015", "ups": "3.35", "wpb": "118979", "bsz": "256", "num_updates": "354400", "lr": "6.52121e-05", "gnorm": "0.983", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "106029"} +[2022-07-31 16:17:26,638][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 16:17:59,746][train_inner][INFO] - {"epoch": 7, "update": 6.89, "loss": "2.178", "ppl": "4.53", "wps": "397432", "ups": "3.36", "wpb": "118451", "bsz": "256", "num_updates": "354600", "lr": "6.51919e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "106088"} +[2022-07-31 16:18:59,324][train_inner][INFO] - {"epoch": 7, "update": 6.893, "loss": "2.179", "ppl": "4.53", "wps": "397252", "ups": "3.36", "wpb": "118335", "bsz": "256", "num_updates": "354800", "lr": "6.51717e-05", "gnorm": "0.991", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "106148"} +[2022-07-31 16:19:58,812][train_inner][INFO] - {"epoch": 7, "update": 6.897, "loss": "2.176", "ppl": "4.52", "wps": "397784", "ups": "3.36", "wpb": "118317", "bsz": "256", "num_updates": "355000", "lr": "6.51515e-05", "gnorm": "0.99", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "106207"} +[2022-07-31 16:20:58,238][train_inner][INFO] - {"epoch": 7, "update": 6.901, "loss": "2.182", "ppl": "4.54", "wps": "400270", "ups": "3.37", "wpb": "118931", "bsz": "256", "num_updates": "355200", "lr": "6.51313e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "59", "gb_free": "23.7", "wall": "106267"} +[2022-07-31 16:21:57,843][train_inner][INFO] - {"epoch": 7, "update": 6.905, "loss": "2.182", "ppl": "4.54", "wps": "395873", "ups": "3.36", "wpb": "117980", "bsz": "256", "num_updates": "355400", "lr": "6.51111e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "31.3", "wall": "106326"} +[2022-07-31 16:22:57,732][train_inner][INFO] - {"epoch": 7, "update": 6.909, "loss": "2.172", "ppl": "4.51", "wps": "396558", "ups": "3.34", "wpb": "118747", "bsz": "256", "num_updates": "355600", "lr": "6.50909e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "60", "gb_free": "23.3", "wall": "106386"} +[2022-07-31 16:23:57,154][train_inner][INFO] - {"epoch": 7, "update": 6.913, "loss": "2.178", "ppl": "4.53", "wps": "397472", "ups": "3.37", "wpb": "118092", "bsz": "256", "num_updates": "355800", "lr": "6.50707e-05", "gnorm": "1.001", "loss_scale": "16", "train_wall": "59", "gb_free": "24.3", "wall": "106446"} +[2022-07-31 16:24:56,681][train_inner][INFO] - {"epoch": 7, "update": 6.917, "loss": "2.179", "ppl": "4.53", "wps": "395388", "ups": "3.36", "wpb": "117681", "bsz": "256", "num_updates": "356000", "lr": "6.50505e-05", "gnorm": "0.996", "loss_scale": "16", "train_wall": "59", "gb_free": "23.6", "wall": "106505"} +[2022-07-31 16:25:56,191][train_inner][INFO] - {"epoch": 7, "update": 6.921, "loss": "2.176", "ppl": "4.52", "wps": "397810", "ups": "3.36", "wpb": "118367", "bsz": "256", "num_updates": "356200", "lr": "6.50303e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "24.7", "wall": "106565"} +[2022-07-31 16:26:55,785][train_inner][INFO] - {"epoch": 7, "update": 6.925, "loss": "2.177", "ppl": "4.52", "wps": "396879", "ups": "3.36", "wpb": "118258", "bsz": "256", "num_updates": "356400", "lr": "6.50101e-05", "gnorm": "0.99", "loss_scale": "16", "train_wall": "59", "gb_free": "31.5", "wall": "106624"} +[2022-07-31 16:27:56,452][train_inner][INFO] - {"epoch": 7, "update": 6.928, "loss": "2.178", "ppl": "4.53", "wps": "390038", "ups": "3.3", "wpb": "118311", "bsz": "256", "num_updates": "356600", "lr": "6.49899e-05", "gnorm": "0.99", "loss_scale": "32", "train_wall": "60", "gb_free": "27.5", "wall": "106685"} +[2022-07-31 16:28:55,809][train_inner][INFO] - {"epoch": 7, "update": 6.932, "loss": "2.18", "ppl": "4.53", "wps": "396861", "ups": "3.37", "wpb": "117783", "bsz": "256", "num_updates": "356800", "lr": "6.49697e-05", "gnorm": "0.992", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "106744"} +[2022-07-31 16:29:55,325][train_inner][INFO] - {"epoch": 7, "update": 6.936, "loss": "2.175", "ppl": "4.52", "wps": "398382", "ups": "3.36", "wpb": "118548", "bsz": "256", "num_updates": "357000", "lr": "6.49495e-05", "gnorm": "0.989", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "106804"} +[2022-07-31 16:30:55,053][train_inner][INFO] - {"epoch": 7, "update": 6.94, "loss": "2.178", "ppl": "4.52", "wps": "396607", "ups": "3.35", "wpb": "118442", "bsz": "256", "num_updates": "357200", "lr": "6.49293e-05", "gnorm": "0.99", "loss_scale": "32", "train_wall": "59", "gb_free": "26.5", "wall": "106863"} +[2022-07-31 16:31:54,400][train_inner][INFO] - {"epoch": 7, "update": 6.944, "loss": "2.179", "ppl": "4.53", "wps": "398876", "ups": "3.37", "wpb": "118360", "bsz": "256", "num_updates": "357400", "lr": "6.49091e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "106923"} +[2022-07-31 16:32:54,225][train_inner][INFO] - {"epoch": 7, "update": 6.948, "loss": "2.175", "ppl": "4.52", "wps": "397820", "ups": "3.34", "wpb": "118997", "bsz": "256", "num_updates": "357600", "lr": "6.48889e-05", "gnorm": "0.988", "loss_scale": "32", "train_wall": "59", "gb_free": "24.6", "wall": "106983"} +[2022-07-31 16:33:53,591][train_inner][INFO] - {"epoch": 7, "update": 6.952, "loss": "2.169", "ppl": "4.5", "wps": "399835", "ups": "3.37", "wpb": "118683", "bsz": "256", "num_updates": "357800", "lr": "6.48687e-05", "gnorm": "0.987", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "107042"} +[2022-07-31 16:34:43,579][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 16:34:53,308][train_inner][INFO] - {"epoch": 7, "update": 6.956, "loss": "2.173", "ppl": "4.51", "wps": "398006", "ups": "3.35", "wpb": "118836", "bsz": "256", "num_updates": "358000", "lr": "6.48485e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "24.8", "wall": "107102"} +[2022-07-31 16:35:53,118][train_inner][INFO] - {"epoch": 7, "update": 6.96, "loss": "2.176", "ppl": "4.52", "wps": "395703", "ups": "3.34", "wpb": "118335", "bsz": "256", "num_updates": "358200", "lr": "6.48283e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "27.7", "wall": "107162"} +[2022-07-31 16:36:52,616][train_inner][INFO] - {"epoch": 7, "update": 6.963, "loss": "2.177", "ppl": "4.52", "wps": "397718", "ups": "3.36", "wpb": "118316", "bsz": "256", "num_updates": "358400", "lr": "6.48081e-05", "gnorm": "0.989", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "107221"} +[2022-07-31 16:37:51,893][train_inner][INFO] - {"epoch": 7, "update": 6.967, "loss": "2.177", "ppl": "4.52", "wps": "400418", "ups": "3.37", "wpb": "118676", "bsz": "256", "num_updates": "358600", "lr": "6.47879e-05", "gnorm": "0.99", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "107280"} +[2022-07-31 16:38:51,226][train_inner][INFO] - {"epoch": 7, "update": 6.971, "loss": "2.174", "ppl": "4.51", "wps": "401156", "ups": "3.37", "wpb": "119010", "bsz": "256", "num_updates": "358800", "lr": "6.47677e-05", "gnorm": "0.987", "loss_scale": "16", "train_wall": "59", "gb_free": "24.6", "wall": "107340"} +[2022-07-31 16:39:50,762][train_inner][INFO] - {"epoch": 7, "update": 6.975, "loss": "2.183", "ppl": "4.54", "wps": "395685", "ups": "3.36", "wpb": "117786", "bsz": "256", "num_updates": "359000", "lr": "6.47475e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "107399"} +[2022-07-31 16:40:50,179][train_inner][INFO] - {"epoch": 7, "update": 6.979, "loss": "2.171", "ppl": "4.5", "wps": "399418", "ups": "3.37", "wpb": "118659", "bsz": "256", "num_updates": "359200", "lr": "6.47273e-05", "gnorm": "0.988", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "107459"} +[2022-07-31 16:41:49,777][train_inner][INFO] - {"epoch": 7, "update": 6.983, "loss": "2.174", "ppl": "4.51", "wps": "394975", "ups": "3.36", "wpb": "117699", "bsz": "256", "num_updates": "359400", "lr": "6.47071e-05", "gnorm": "0.991", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "107518"} +[2022-07-31 16:42:48,901][train_inner][INFO] - {"epoch": 7, "update": 6.987, "loss": "2.167", "ppl": "4.49", "wps": "403193", "ups": "3.38", "wpb": "119191", "bsz": "256", "num_updates": "359600", "lr": "6.46869e-05", "gnorm": "0.984", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "107577"} +[2022-07-31 16:43:48,454][train_inner][INFO] - {"epoch": 7, "update": 6.991, "loss": "2.179", "ppl": "4.53", "wps": "395445", "ups": "3.36", "wpb": "117748", "bsz": "256", "num_updates": "359800", "lr": "6.46667e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "107637"} +[2022-07-31 16:44:47,820][train_inner][INFO] - {"epoch": 7, "update": 6.995, "loss": "2.174", "ppl": "4.51", "wps": "399174", "ups": "3.37", "wpb": "118487", "bsz": "256", "num_updates": "360000", "lr": "6.46465e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "24.1", "wall": "107696"} +[2022-07-31 16:45:47,103][train_inner][INFO] - {"epoch": 7, "update": 6.998, "loss": "2.178", "ppl": "4.53", "wps": "399629", "ups": "3.37", "wpb": "118454", "bsz": "256", "num_updates": "360200", "lr": "6.46263e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "107756"} +[2022-07-31 16:46:11,768][fairseq_cli.train][INFO] - end of epoch 7 (average epoch stats below) +[2022-07-31 16:46:11,769][train][INFO] - {"epoch": 7, "train_loss": "2.191", "train_ppl": "4.57", "train_wps": "395890", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "360283", "train_lr": "6.46179e-05", "train_gnorm": "0.984", "train_loss_scale": "32", "train_train_wall": "15227", "train_gb_free": "22.1", "train_wall": "107780"} +[2022-07-31 16:46:11,879][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-31 16:46:11,882][fairseq.trainer][INFO] - begin training epoch 8 +[2022-07-31 16:46:11,883][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-31 16:46:57,765][train_inner][INFO] - {"epoch": 8, "update": 7.002, "loss": "2.177", "ppl": "4.52", "wps": "333027", "ups": "2.83", "wpb": "117662", "bsz": "255.4", "num_updates": "360400", "lr": "6.46061e-05", "gnorm": "0.995", "loss_scale": "32", "train_wall": "60", "gb_free": "32.9", "wall": "107826"} +[2022-07-31 16:47:57,112][train_inner][INFO] - {"epoch": 8, "update": 7.006, "loss": "2.178", "ppl": "4.52", "wps": "396490", "ups": "3.37", "wpb": "117652", "bsz": "256", "num_updates": "360600", "lr": "6.45859e-05", "gnorm": "0.994", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "107886"} +[2022-07-31 16:48:56,408][train_inner][INFO] - {"epoch": 8, "update": 7.01, "loss": "2.174", "ppl": "4.51", "wps": "399024", "ups": "3.37", "wpb": "118300", "bsz": "256", "num_updates": "360800", "lr": "6.45657e-05", "gnorm": "0.992", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "107945"} +[2022-07-31 16:49:55,784][train_inner][INFO] - {"epoch": 8, "update": 7.014, "loss": "2.172", "ppl": "4.5", "wps": "396763", "ups": "3.37", "wpb": "117790", "bsz": "256", "num_updates": "361000", "lr": "6.45455e-05", "gnorm": "0.993", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "108004"} +[2022-07-31 16:50:55,535][train_inner][INFO] - {"epoch": 8, "update": 7.018, "loss": "2.172", "ppl": "4.51", "wps": "397134", "ups": "3.35", "wpb": "118644", "bsz": "255.9", "num_updates": "361200", "lr": "6.45253e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "59", "gb_free": "25.9", "wall": "108064"} +[2022-07-31 16:51:55,238][train_inner][INFO] - {"epoch": 8, "update": 7.022, "loss": "2.171", "ppl": "4.5", "wps": "394318", "ups": "3.35", "wpb": "117710", "bsz": "256", "num_updates": "361400", "lr": "6.45051e-05", "gnorm": "0.994", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "108124"} +[2022-07-31 16:52:54,891][train_inner][INFO] - {"epoch": 8, "update": 7.026, "loss": "2.173", "ppl": "4.51", "wps": "396109", "ups": "3.35", "wpb": "118145", "bsz": "256", "num_updates": "361600", "lr": "6.44848e-05", "gnorm": "0.993", "loss_scale": "32", "train_wall": "59", "gb_free": "22.2", "wall": "108183"} +[2022-07-31 16:53:55,369][train_inner][INFO] - {"epoch": 8, "update": 7.029, "loss": "2.173", "ppl": "4.51", "wps": "391244", "ups": "3.31", "wpb": "118306", "bsz": "256", "num_updates": "361800", "lr": "6.44646e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "60", "gb_free": "21.3", "wall": "108244"} +[2022-07-31 16:54:54,767][train_inner][INFO] - {"epoch": 8, "update": 7.033, "loss": "2.172", "ppl": "4.51", "wps": "397639", "ups": "3.37", "wpb": "118093", "bsz": "256", "num_updates": "362000", "lr": "6.44444e-05", "gnorm": "0.993", "loss_scale": "32", "train_wall": "59", "gb_free": "22.1", "wall": "108303"} +[2022-07-31 16:55:54,075][train_inner][INFO] - {"epoch": 8, "update": 7.037, "loss": "2.166", "ppl": "4.49", "wps": "401568", "ups": "3.37", "wpb": "119080", "bsz": "256", "num_updates": "362200", "lr": "6.44242e-05", "gnorm": "0.987", "loss_scale": "64", "train_wall": "59", "gb_free": "22.6", "wall": "108363"} +[2022-07-31 16:56:53,560][train_inner][INFO] - {"epoch": 8, "update": 7.041, "loss": "2.171", "ppl": "4.5", "wps": "397836", "ups": "3.36", "wpb": "118326", "bsz": "256", "num_updates": "362400", "lr": "6.4404e-05", "gnorm": "0.992", "loss_scale": "64", "train_wall": "59", "gb_free": "23.4", "wall": "108422"} +[2022-07-31 16:57:05,825][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 16:57:52,745][train_inner][INFO] - {"epoch": 8, "update": 7.045, "loss": "2.178", "ppl": "4.53", "wps": "397710", "ups": "3.38", "wpb": "117691", "bsz": "256", "num_updates": "362600", "lr": "6.43838e-05", "gnorm": "0.996", "loss_scale": "32", "train_wall": "59", "gb_free": "29.7", "wall": "108481"} +[2022-07-31 16:58:52,396][train_inner][INFO] - {"epoch": 8, "update": 7.049, "loss": "2.179", "ppl": "4.53", "wps": "394423", "ups": "3.35", "wpb": "117639", "bsz": "256", "num_updates": "362800", "lr": "6.43636e-05", "gnorm": "0.996", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "108541"} +[2022-07-31 16:59:52,914][train_inner][INFO] - {"epoch": 8, "update": 7.053, "loss": "2.173", "ppl": "4.51", "wps": "393045", "ups": "3.3", "wpb": "118930", "bsz": "256", "num_updates": "363000", "lr": "6.43434e-05", "gnorm": "0.989", "loss_scale": "32", "train_wall": "60", "gb_free": "21.5", "wall": "108601"} +[2022-07-31 17:00:52,084][train_inner][INFO] - {"epoch": 8, "update": 7.057, "loss": "2.177", "ppl": "4.52", "wps": "400000", "ups": "3.38", "wpb": "118338", "bsz": "256", "num_updates": "363200", "lr": "6.43232e-05", "gnorm": "0.992", "loss_scale": "32", "train_wall": "59", "gb_free": "26.4", "wall": "108661"} +[2022-07-31 17:01:51,314][train_inner][INFO] - {"epoch": 8, "update": 7.061, "loss": "2.179", "ppl": "4.53", "wps": "399338", "ups": "3.38", "wpb": "118264", "bsz": "256", "num_updates": "363400", "lr": "6.4303e-05", "gnorm": "0.997", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "108720"} +[2022-07-31 17:02:51,180][train_inner][INFO] - {"epoch": 8, "update": 7.064, "loss": "2.167", "ppl": "4.49", "wps": "398340", "ups": "3.34", "wpb": "119233", "bsz": "256", "num_updates": "363600", "lr": "6.42828e-05", "gnorm": "0.987", "loss_scale": "32", "train_wall": "60", "gb_free": "23.2", "wall": "108780"} +[2022-07-31 17:03:50,593][train_inner][INFO] - {"epoch": 8, "update": 7.068, "loss": "2.169", "ppl": "4.5", "wps": "398554", "ups": "3.37", "wpb": "118396", "bsz": "256", "num_updates": "363800", "lr": "6.42626e-05", "gnorm": "0.992", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "108839"} +[2022-07-31 17:04:50,218][train_inner][INFO] - {"epoch": 8, "update": 7.072, "loss": "2.173", "ppl": "4.51", "wps": "397461", "ups": "3.35", "wpb": "118493", "bsz": "256", "num_updates": "364000", "lr": "6.42424e-05", "gnorm": "0.994", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "108899"} +[2022-07-31 17:05:07,872][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 17:05:49,461][train_inner][INFO] - {"epoch": 8, "update": 7.076, "loss": "2.169", "ppl": "4.5", "wps": "399753", "ups": "3.38", "wpb": "118411", "bsz": "256", "num_updates": "364200", "lr": "6.42222e-05", "gnorm": "0.991", "loss_scale": "16", "train_wall": "59", "gb_free": "22.7", "wall": "108958"} +[2022-07-31 17:06:49,032][train_inner][INFO] - {"epoch": 8, "update": 7.08, "loss": "2.175", "ppl": "4.52", "wps": "396578", "ups": "3.36", "wpb": "118122", "bsz": "256", "num_updates": "364400", "lr": "6.4202e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "24.1", "wall": "109017"} +[2022-07-31 17:07:48,679][train_inner][INFO] - {"epoch": 8, "update": 7.084, "loss": "2.168", "ppl": "4.49", "wps": "398618", "ups": "3.35", "wpb": "118881", "bsz": "256", "num_updates": "364600", "lr": "6.41818e-05", "gnorm": "0.991", "loss_scale": "16", "train_wall": "59", "gb_free": "26", "wall": "109077"} +[2022-07-31 17:08:47,793][train_inner][INFO] - {"epoch": 8, "update": 7.088, "loss": "2.169", "ppl": "4.5", "wps": "400154", "ups": "3.38", "wpb": "118272", "bsz": "256", "num_updates": "364800", "lr": "6.41616e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "26.8", "wall": "109136"} +[2022-07-31 17:09:46,995][train_inner][INFO] - {"epoch": 8, "update": 7.092, "loss": "2.171", "ppl": "4.5", "wps": "397199", "ups": "3.38", "wpb": "117574", "bsz": "256", "num_updates": "365000", "lr": "6.41414e-05", "gnorm": "0.998", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "109195"} +[2022-07-31 17:10:46,456][train_inner][INFO] - {"epoch": 8, "update": 7.096, "loss": "2.166", "ppl": "4.49", "wps": "399567", "ups": "3.36", "wpb": "118791", "bsz": "256", "num_updates": "365200", "lr": "6.41212e-05", "gnorm": "0.992", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "109255"} +[2022-07-31 17:11:45,891][train_inner][INFO] - {"epoch": 8, "update": 7.099, "loss": "2.169", "ppl": "4.5", "wps": "398192", "ups": "3.37", "wpb": "118332", "bsz": "256", "num_updates": "365400", "lr": "6.4101e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "109314"} +[2022-07-31 17:12:46,617][train_inner][INFO] - {"epoch": 8, "update": 7.103, "loss": "2.17", "ppl": "4.5", "wps": "388426", "ups": "3.29", "wpb": "117937", "bsz": "256", "num_updates": "365600", "lr": "6.40808e-05", "gnorm": "0.998", "loss_scale": "16", "train_wall": "60", "gb_free": "21.6", "wall": "109375"} +[2022-07-31 17:13:46,228][train_inner][INFO] - {"epoch": 8, "update": 7.107, "loss": "2.165", "ppl": "4.49", "wps": "396299", "ups": "3.36", "wpb": "118118", "bsz": "256", "num_updates": "365800", "lr": "6.40606e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "109435"} +[2022-07-31 17:14:45,046][train_inner][INFO] - {"epoch": 8, "update": 7.111, "loss": "2.177", "ppl": "4.52", "wps": "400876", "ups": "3.4", "wpb": "117891", "bsz": "256", "num_updates": "366000", "lr": "6.40404e-05", "gnorm": "0.997", "loss_scale": "16", "train_wall": "59", "gb_free": "27.9", "wall": "109493"} +[2022-07-31 17:15:44,665][train_inner][INFO] - {"epoch": 8, "update": 7.115, "loss": "2.169", "ppl": "4.5", "wps": "397228", "ups": "3.35", "wpb": "118412", "bsz": "256", "num_updates": "366200", "lr": "6.40202e-05", "gnorm": "0.993", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "109553"} +[2022-07-31 17:16:43,937][train_inner][INFO] - {"epoch": 8, "update": 7.119, "loss": "2.175", "ppl": "4.52", "wps": "398774", "ups": "3.37", "wpb": "118179", "bsz": "256", "num_updates": "366400", "lr": "6.4e-05", "gnorm": "0.997", "loss_scale": "32", "train_wall": "59", "gb_free": "31.9", "wall": "109612"} +[2022-07-31 17:17:43,151][train_inner][INFO] - {"epoch": 8, "update": 7.123, "loss": "2.168", "ppl": "4.49", "wps": "397875", "ups": "3.38", "wpb": "117798", "bsz": "256", "num_updates": "366600", "lr": "6.39798e-05", "gnorm": "0.996", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "109672"} +[2022-07-31 17:18:42,474][train_inner][INFO] - {"epoch": 8, "update": 7.127, "loss": "2.17", "ppl": "4.5", "wps": "398268", "ups": "3.37", "wpb": "118131", "bsz": "256", "num_updates": "366800", "lr": "6.39596e-05", "gnorm": "0.995", "loss_scale": "32", "train_wall": "59", "gb_free": "22.6", "wall": "109731"} +[2022-07-31 17:19:41,673][train_inner][INFO] - {"epoch": 8, "update": 7.13, "loss": "2.172", "ppl": "4.51", "wps": "398313", "ups": "3.38", "wpb": "117898", "bsz": "256", "num_updates": "367000", "lr": "6.39394e-05", "gnorm": "0.996", "loss_scale": "32", "train_wall": "59", "gb_free": "24.2", "wall": "109790"} +[2022-07-31 17:20:41,257][train_inner][INFO] - {"epoch": 8, "update": 7.134, "loss": "2.176", "ppl": "4.52", "wps": "397240", "ups": "3.36", "wpb": "118345", "bsz": "256", "num_updates": "367200", "lr": "6.39192e-05", "gnorm": "0.995", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "109850"} +[2022-07-31 17:21:40,500][train_inner][INFO] - {"epoch": 8, "update": 7.138, "loss": "2.176", "ppl": "4.52", "wps": "398066", "ups": "3.38", "wpb": "117912", "bsz": "256", "num_updates": "367400", "lr": "6.3899e-05", "gnorm": "0.998", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "109909"} +[2022-07-31 17:22:40,222][train_inner][INFO] - {"epoch": 8, "update": 7.142, "loss": "2.17", "ppl": "4.5", "wps": "395840", "ups": "3.35", "wpb": "118200", "bsz": "256", "num_updates": "367600", "lr": "6.38788e-05", "gnorm": "0.999", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "109969"} +[2022-07-31 17:23:39,403][train_inner][INFO] - {"epoch": 8, "update": 7.146, "loss": "2.167", "ppl": "4.49", "wps": "400848", "ups": "3.38", "wpb": "118612", "bsz": "256", "num_updates": "367800", "lr": "6.38586e-05", "gnorm": "0.994", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "110028"} +[2022-07-31 17:24:38,547][train_inner][INFO] - {"epoch": 8, "update": 7.15, "loss": "2.164", "ppl": "4.48", "wps": "401840", "ups": "3.38", "wpb": "118832", "bsz": "256", "num_updates": "368000", "lr": "6.38384e-05", "gnorm": "0.991", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "110087"} +[2022-07-31 17:25:39,008][train_inner][INFO] - {"epoch": 8, "update": 7.154, "loss": "2.168", "ppl": "4.49", "wps": "392122", "ups": "3.31", "wpb": "118539", "bsz": "256", "num_updates": "368200", "lr": "6.38182e-05", "gnorm": "0.993", "loss_scale": "64", "train_wall": "60", "gb_free": "24.8", "wall": "110147"} +[2022-07-31 17:26:38,926][train_inner][INFO] - {"epoch": 8, "update": 7.158, "loss": "2.166", "ppl": "4.49", "wps": "394320", "ups": "3.34", "wpb": "118133", "bsz": "256", "num_updates": "368400", "lr": "6.3798e-05", "gnorm": "0.997", "loss_scale": "64", "train_wall": "60", "gb_free": "25.7", "wall": "110207"} +[2022-07-31 17:27:38,344][train_inner][INFO] - {"epoch": 8, "update": 7.162, "loss": "2.171", "ppl": "4.5", "wps": "396261", "ups": "3.37", "wpb": "117724", "bsz": "256", "num_updates": "368600", "lr": "6.37778e-05", "gnorm": "0.998", "loss_scale": "64", "train_wall": "59", "gb_free": "23.5", "wall": "110267"} +[2022-07-31 17:28:13,675][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 17:28:38,101][train_inner][INFO] - {"epoch": 8, "update": 7.165, "loss": "2.171", "ppl": "4.5", "wps": "393601", "ups": "3.35", "wpb": "117601", "bsz": "256", "num_updates": "368800", "lr": "6.37576e-05", "gnorm": "0.999", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "110327"} +[2022-07-31 17:28:39,269][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 17:29:37,779][train_inner][INFO] - {"epoch": 8, "update": 7.169, "loss": "2.167", "ppl": "4.49", "wps": "396714", "ups": "3.35", "wpb": "118376", "bsz": "256", "num_updates": "369000", "lr": "6.37374e-05", "gnorm": "0.996", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "110386"} +[2022-07-31 17:30:36,877][train_inner][INFO] - {"epoch": 8, "update": 7.173, "loss": "2.158", "ppl": "4.46", "wps": "400034", "ups": "3.38", "wpb": "118204", "bsz": "256", "num_updates": "369200", "lr": "6.37172e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "110445"} +[2022-07-31 17:31:36,449][train_inner][INFO] - {"epoch": 8, "update": 7.177, "loss": "2.173", "ppl": "4.51", "wps": "397619", "ups": "3.36", "wpb": "118434", "bsz": "256", "num_updates": "369400", "lr": "6.3697e-05", "gnorm": "0.993", "loss_scale": "16", "train_wall": "59", "gb_free": "24.3", "wall": "110505"} +[2022-07-31 17:32:35,578][train_inner][INFO] - {"epoch": 8, "update": 7.181, "loss": "2.172", "ppl": "4.51", "wps": "398109", "ups": "3.38", "wpb": "117698", "bsz": "256", "num_updates": "369600", "lr": "6.36768e-05", "gnorm": "0.997", "loss_scale": "16", "train_wall": "59", "gb_free": "22.7", "wall": "110564"} +[2022-07-31 17:33:34,634][train_inner][INFO] - {"epoch": 8, "update": 7.185, "loss": "2.173", "ppl": "4.51", "wps": "400499", "ups": "3.39", "wpb": "118259", "bsz": "256", "num_updates": "369800", "lr": "6.36566e-05", "gnorm": "0.997", "loss_scale": "16", "train_wall": "59", "gb_free": "23.9", "wall": "110623"} +[2022-07-31 17:34:34,231][train_inner][INFO] - {"epoch": 8, "update": 7.189, "loss": "2.167", "ppl": "4.49", "wps": "397341", "ups": "3.36", "wpb": "118401", "bsz": "256", "num_updates": "370000", "lr": "6.36364e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "110683"} +[2022-07-31 17:35:33,641][train_inner][INFO] - {"epoch": 8, "update": 7.193, "loss": "2.168", "ppl": "4.49", "wps": "396668", "ups": "3.37", "wpb": "117829", "bsz": "256", "num_updates": "370200", "lr": "6.36162e-05", "gnorm": "0.998", "loss_scale": "16", "train_wall": "59", "gb_free": "25.9", "wall": "110742"} +[2022-07-31 17:36:33,369][train_inner][INFO] - {"epoch": 8, "update": 7.197, "loss": "2.171", "ppl": "4.5", "wps": "395394", "ups": "3.35", "wpb": "118079", "bsz": "256", "num_updates": "370400", "lr": "6.3596e-05", "gnorm": "0.997", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "110802"} +[2022-07-31 17:37:32,598][train_inner][INFO] - {"epoch": 8, "update": 7.2, "loss": "2.161", "ppl": "4.47", "wps": "400940", "ups": "3.38", "wpb": "118736", "bsz": "256", "num_updates": "370600", "lr": "6.35758e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "110861"} +[2022-07-31 17:38:31,863][train_inner][INFO] - {"epoch": 8, "update": 7.204, "loss": "2.177", "ppl": "4.52", "wps": "398199", "ups": "3.37", "wpb": "117996", "bsz": "256", "num_updates": "370800", "lr": "6.35556e-05", "gnorm": "0.997", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "110920"} +[2022-07-31 17:39:31,142][train_inner][INFO] - {"epoch": 8, "update": 7.208, "loss": "2.171", "ppl": "4.5", "wps": "399697", "ups": "3.37", "wpb": "118468", "bsz": "256", "num_updates": "371000", "lr": "6.35354e-05", "gnorm": "0.995", "loss_scale": "32", "train_wall": "59", "gb_free": "22.3", "wall": "110980"} +[2022-07-31 17:40:30,456][train_inner][INFO] - {"epoch": 8, "update": 7.212, "loss": "2.165", "ppl": "4.48", "wps": "399434", "ups": "3.37", "wpb": "118459", "bsz": "256", "num_updates": "371200", "lr": "6.35152e-05", "gnorm": "0.998", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "111039"} +[2022-07-31 17:41:30,840][train_inner][INFO] - {"epoch": 8, "update": 7.216, "loss": "2.174", "ppl": "4.51", "wps": "390408", "ups": "3.31", "wpb": "117870", "bsz": "256", "num_updates": "371400", "lr": "6.34949e-05", "gnorm": "0.998", "loss_scale": "32", "train_wall": "60", "gb_free": "21.5", "wall": "111099"} +[2022-07-31 17:42:30,489][train_inner][INFO] - {"epoch": 8, "update": 7.22, "loss": "2.171", "ppl": "4.5", "wps": "396818", "ups": "3.35", "wpb": "118349", "bsz": "256", "num_updates": "371600", "lr": "6.34747e-05", "gnorm": "0.994", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "111159"} +[2022-07-31 17:43:29,581][train_inner][INFO] - {"epoch": 8, "update": 7.224, "loss": "2.168", "ppl": "4.49", "wps": "400776", "ups": "3.38", "wpb": "118413", "bsz": "256", "num_updates": "371800", "lr": "6.34545e-05", "gnorm": "0.999", "loss_scale": "32", "train_wall": "59", "gb_free": "24.5", "wall": "111218"} +[2022-07-31 17:44:29,377][train_inner][INFO] - {"epoch": 8, "update": 7.228, "loss": "2.171", "ppl": "4.5", "wps": "396237", "ups": "3.34", "wpb": "118465", "bsz": "256", "num_updates": "372000", "lr": "6.34343e-05", "gnorm": "0.995", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "111278"} +[2022-07-31 17:45:29,161][train_inner][INFO] - {"epoch": 8, "update": 7.232, "loss": "2.166", "ppl": "4.49", "wps": "397922", "ups": "3.35", "wpb": "118946", "bsz": "256", "num_updates": "372200", "lr": "6.34141e-05", "gnorm": "0.998", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "111338"} +[2022-07-31 17:46:28,327][train_inner][INFO] - {"epoch": 8, "update": 7.235, "loss": "2.172", "ppl": "4.51", "wps": "397804", "ups": "3.38", "wpb": "117682", "bsz": "256", "num_updates": "372400", "lr": "6.33939e-05", "gnorm": "1.001", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "111397"} +[2022-07-31 17:46:56,017][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 17:47:11,260][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 17:47:28,302][train_inner][INFO] - {"epoch": 8, "update": 7.239, "loss": "2.16", "ppl": "4.47", "wps": "392703", "ups": "3.33", "wpb": "117761", "bsz": "256", "num_updates": "372600", "lr": "6.33737e-05", "gnorm": "0.997", "loss_scale": "8", "train_wall": "60", "gb_free": "22.9", "wall": "111457"} +[2022-07-31 17:48:27,482][train_inner][INFO] - {"epoch": 8, "update": 7.243, "loss": "2.164", "ppl": "4.48", "wps": "400769", "ups": "3.38", "wpb": "118586", "bsz": "256", "num_updates": "372800", "lr": "6.33535e-05", "gnorm": "0.994", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "111516"} +[2022-07-31 17:49:26,777][train_inner][INFO] - {"epoch": 8, "update": 7.247, "loss": "2.169", "ppl": "4.5", "wps": "398236", "ups": "3.37", "wpb": "118067", "bsz": "256", "num_updates": "373000", "lr": "6.33333e-05", "gnorm": "0.999", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "111575"} +[2022-07-31 17:50:26,153][train_inner][INFO] - {"epoch": 8, "update": 7.251, "loss": "2.167", "ppl": "4.49", "wps": "397660", "ups": "3.37", "wpb": "118057", "bsz": "256", "num_updates": "373200", "lr": "6.33131e-05", "gnorm": "1", "loss_scale": "8", "train_wall": "59", "gb_free": "24.2", "wall": "111635"} +[2022-07-31 17:51:25,564][train_inner][INFO] - {"epoch": 8, "update": 7.255, "loss": "2.173", "ppl": "4.51", "wps": "397988", "ups": "3.37", "wpb": "118222", "bsz": "256", "num_updates": "373400", "lr": "6.32929e-05", "gnorm": "0.999", "loss_scale": "8", "train_wall": "59", "gb_free": "22.3", "wall": "111694"} +[2022-07-31 17:52:25,456][train_inner][INFO] - {"epoch": 8, "update": 7.259, "loss": "2.168", "ppl": "4.49", "wps": "393032", "ups": "3.34", "wpb": "117697", "bsz": "256", "num_updates": "373600", "lr": "6.32727e-05", "gnorm": "0.998", "loss_scale": "8", "train_wall": "60", "gb_free": "22.9", "wall": "111754"} +[2022-07-31 17:53:25,328][train_inner][INFO] - {"epoch": 8, "update": 7.263, "loss": "2.171", "ppl": "4.5", "wps": "393820", "ups": "3.34", "wpb": "117893", "bsz": "256", "num_updates": "373800", "lr": "6.32525e-05", "gnorm": "1.003", "loss_scale": "8", "train_wall": "60", "gb_free": "22.1", "wall": "111814"} +[2022-07-31 17:54:24,896][train_inner][INFO] - {"epoch": 8, "update": 7.267, "loss": "2.171", "ppl": "4.5", "wps": "397487", "ups": "3.36", "wpb": "118386", "bsz": "256", "num_updates": "374000", "lr": "6.32323e-05", "gnorm": "0.995", "loss_scale": "8", "train_wall": "59", "gb_free": "23.1", "wall": "111873"} +[2022-07-31 17:55:24,228][train_inner][INFO] - {"epoch": 8, "update": 7.27, "loss": "2.166", "ppl": "4.49", "wps": "400199", "ups": "3.37", "wpb": "118722", "bsz": "256", "num_updates": "374200", "lr": "6.32121e-05", "gnorm": "0.997", "loss_scale": "8", "train_wall": "59", "gb_free": "25.1", "wall": "111933"} +[2022-07-31 17:56:23,837][train_inner][INFO] - {"epoch": 8, "update": 7.274, "loss": "2.17", "ppl": "4.5", "wps": "396768", "ups": "3.36", "wpb": "118255", "bsz": "256", "num_updates": "374400", "lr": "6.31919e-05", "gnorm": "0.997", "loss_scale": "8", "train_wall": "59", "gb_free": "23.8", "wall": "111992"} +[2022-07-31 17:57:23,054][train_inner][INFO] - {"epoch": 8, "update": 7.278, "loss": "2.168", "ppl": "4.5", "wps": "398386", "ups": "3.38", "wpb": "117956", "bsz": "256", "num_updates": "374600", "lr": "6.31717e-05", "gnorm": "0.999", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "112051"} +[2022-07-31 17:58:22,602][train_inner][INFO] - {"epoch": 8, "update": 7.282, "loss": "2.166", "ppl": "4.49", "wps": "397150", "ups": "3.36", "wpb": "118245", "bsz": "256", "num_updates": "374800", "lr": "6.31515e-05", "gnorm": "0.996", "loss_scale": "16", "train_wall": "59", "gb_free": "23", "wall": "112111"} +[2022-07-31 17:59:22,298][train_inner][INFO] - {"epoch": 8, "update": 7.286, "loss": "2.169", "ppl": "4.5", "wps": "397887", "ups": "3.35", "wpb": "118761", "bsz": "256", "num_updates": "375000", "lr": "6.31313e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "112171"} +[2022-07-31 18:00:21,940][train_inner][INFO] - {"epoch": 8, "update": 7.29, "loss": "2.163", "ppl": "4.48", "wps": "397779", "ups": "3.35", "wpb": "118622", "bsz": "256", "num_updates": "375200", "lr": "6.31111e-05", "gnorm": "0.996", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "112230"} +[2022-07-31 18:01:21,530][train_inner][INFO] - {"epoch": 8, "update": 7.294, "loss": "2.161", "ppl": "4.47", "wps": "398319", "ups": "3.36", "wpb": "118677", "bsz": "256", "num_updates": "375400", "lr": "6.30909e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "112290"} +[2022-07-31 18:02:21,026][train_inner][INFO] - {"epoch": 8, "update": 7.298, "loss": "2.172", "ppl": "4.51", "wps": "397279", "ups": "3.36", "wpb": "118182", "bsz": "256", "num_updates": "375600", "lr": "6.30707e-05", "gnorm": "0.999", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "112349"} +[2022-07-31 18:03:20,449][train_inner][INFO] - {"epoch": 8, "update": 7.301, "loss": "2.168", "ppl": "4.49", "wps": "397074", "ups": "3.37", "wpb": "117976", "bsz": "256", "num_updates": "375800", "lr": "6.30505e-05", "gnorm": "1.003", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "112409"} +[2022-07-31 18:04:20,062][train_inner][INFO] - {"epoch": 8, "update": 7.305, "loss": "2.165", "ppl": "4.48", "wps": "396379", "ups": "3.36", "wpb": "118145", "bsz": "256", "num_updates": "376000", "lr": "6.30303e-05", "gnorm": "0.998", "loss_scale": "16", "train_wall": "59", "gb_free": "24.4", "wall": "112468"} +[2022-07-31 18:05:19,432][train_inner][INFO] - {"epoch": 8, "update": 7.309, "loss": "2.164", "ppl": "4.48", "wps": "399018", "ups": "3.37", "wpb": "118450", "bsz": "256", "num_updates": "376200", "lr": "6.30101e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "112528"} +[2022-07-31 18:05:27,573][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 18:06:19,402][train_inner][INFO] - {"epoch": 8, "update": 7.313, "loss": "2.167", "ppl": "4.49", "wps": "394526", "ups": "3.34", "wpb": "118298", "bsz": "256", "num_updates": "376400", "lr": "6.29899e-05", "gnorm": "1.007", "loss_scale": "8", "train_wall": "60", "gb_free": "23", "wall": "112588"} +[2022-07-31 18:07:18,973][train_inner][INFO] - {"epoch": 8, "update": 7.317, "loss": "2.171", "ppl": "4.5", "wps": "396844", "ups": "3.36", "wpb": "118201", "bsz": "256", "num_updates": "376600", "lr": "6.29697e-05", "gnorm": "0.998", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "112647"} +[2022-07-31 18:08:18,532][train_inner][INFO] - {"epoch": 8, "update": 7.321, "loss": "2.166", "ppl": "4.49", "wps": "398852", "ups": "3.36", "wpb": "118774", "bsz": "256", "num_updates": "376800", "lr": "6.29495e-05", "gnorm": "0.997", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "112707"} +[2022-07-31 18:09:18,021][train_inner][INFO] - {"epoch": 8, "update": 7.325, "loss": "2.164", "ppl": "4.48", "wps": "396920", "ups": "3.36", "wpb": "118063", "bsz": "256", "num_updates": "377000", "lr": "6.29293e-05", "gnorm": "1", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "112766"} +[2022-07-31 18:10:17,514][train_inner][INFO] - {"epoch": 8, "update": 7.329, "loss": "2.164", "ppl": "4.48", "wps": "396116", "ups": "3.36", "wpb": "117822", "bsz": "256", "num_updates": "377200", "lr": "6.29091e-05", "gnorm": "0.999", "loss_scale": "8", "train_wall": "59", "gb_free": "23", "wall": "112826"} +[2022-07-31 18:11:17,047][train_inner][INFO] - {"epoch": 8, "update": 7.333, "loss": "2.165", "ppl": "4.48", "wps": "397509", "ups": "3.36", "wpb": "118322", "bsz": "256", "num_updates": "377400", "lr": "6.28889e-05", "gnorm": "0.999", "loss_scale": "8", "train_wall": "59", "gb_free": "22.7", "wall": "112885"} +[2022-07-31 18:12:16,594][train_inner][INFO] - {"epoch": 8, "update": 7.336, "loss": "2.167", "ppl": "4.49", "wps": "398453", "ups": "3.36", "wpb": "118631", "bsz": "256", "num_updates": "377600", "lr": "6.28687e-05", "gnorm": "0.996", "loss_scale": "8", "train_wall": "59", "gb_free": "22.8", "wall": "112945"} +[2022-07-31 18:13:16,182][train_inner][INFO] - {"epoch": 8, "update": 7.34, "loss": "2.162", "ppl": "4.47", "wps": "396620", "ups": "3.36", "wpb": "118169", "bsz": "256", "num_updates": "377800", "lr": "6.28485e-05", "gnorm": "0.997", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "113005"} +[2022-07-31 18:14:15,909][train_inner][INFO] - {"epoch": 8, "update": 7.344, "loss": "2.163", "ppl": "4.48", "wps": "398227", "ups": "3.35", "wpb": "118923", "bsz": "256", "num_updates": "378000", "lr": "6.28283e-05", "gnorm": "0.996", "loss_scale": "8", "train_wall": "59", "gb_free": "27.7", "wall": "113064"} +[2022-07-31 18:15:15,669][train_inner][INFO] - {"epoch": 8, "update": 7.348, "loss": "2.165", "ppl": "4.49", "wps": "395677", "ups": "3.35", "wpb": "118228", "bsz": "256", "num_updates": "378200", "lr": "6.28081e-05", "gnorm": "1", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "113124"} +[2022-07-31 18:16:15,710][train_inner][INFO] - {"epoch": 8, "update": 7.352, "loss": "2.164", "ppl": "4.48", "wps": "395660", "ups": "3.33", "wpb": "118779", "bsz": "256", "num_updates": "378400", "lr": "6.27879e-05", "gnorm": "0.998", "loss_scale": "16", "train_wall": "60", "gb_free": "21.6", "wall": "113184"} +[2022-07-31 18:17:14,944][train_inner][INFO] - {"epoch": 8, "update": 7.356, "loss": "2.164", "ppl": "4.48", "wps": "399835", "ups": "3.38", "wpb": "118419", "bsz": "256", "num_updates": "378600", "lr": "6.27677e-05", "gnorm": "0.998", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "113243"} +[2022-07-31 18:18:14,319][train_inner][INFO] - {"epoch": 8, "update": 7.36, "loss": "2.164", "ppl": "4.48", "wps": "397360", "ups": "3.37", "wpb": "117964", "bsz": "256", "num_updates": "378800", "lr": "6.27475e-05", "gnorm": "1.002", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "113303"} +[2022-07-31 18:19:14,117][train_inner][INFO] - {"epoch": 8, "update": 7.364, "loss": "2.162", "ppl": "4.47", "wps": "395272", "ups": "3.34", "wpb": "118182", "bsz": "256", "num_updates": "379000", "lr": "6.27273e-05", "gnorm": "1.001", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "113363"} +[2022-07-31 18:20:13,388][train_inner][INFO] - {"epoch": 8, "update": 7.368, "loss": "2.169", "ppl": "4.5", "wps": "398149", "ups": "3.37", "wpb": "117993", "bsz": "256", "num_updates": "379200", "lr": "6.27071e-05", "gnorm": "1", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "113422"} +[2022-07-31 18:21:12,435][train_inner][INFO] - {"epoch": 8, "update": 7.371, "loss": "2.168", "ppl": "4.49", "wps": "399325", "ups": "3.39", "wpb": "117894", "bsz": "256", "num_updates": "379400", "lr": "6.26869e-05", "gnorm": "1.002", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "113481"} +[2022-07-31 18:22:11,690][train_inner][INFO] - {"epoch": 8, "update": 7.375, "loss": "2.166", "ppl": "4.49", "wps": "399629", "ups": "3.38", "wpb": "118400", "bsz": "256", "num_updates": "379600", "lr": "6.26667e-05", "gnorm": "1", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "113540"} +[2022-07-31 18:23:10,906][train_inner][INFO] - {"epoch": 8, "update": 7.379, "loss": "2.164", "ppl": "4.48", "wps": "399487", "ups": "3.38", "wpb": "118278", "bsz": "256", "num_updates": "379800", "lr": "6.26465e-05", "gnorm": "1", "loss_scale": "16", "train_wall": "59", "gb_free": "35.3", "wall": "113599"} +[2022-07-31 18:24:10,671][train_inner][INFO] - {"epoch": 8, "update": 7.383, "loss": "2.164", "ppl": "4.48", "wps": "397815", "ups": "3.35", "wpb": "118876", "bsz": "256", "num_updates": "380000", "lr": "6.26263e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "59", "gb_free": "26.3", "wall": "113659"} +[2022-07-31 18:25:10,313][train_inner][INFO] - {"epoch": 8, "update": 7.387, "loss": "2.166", "ppl": "4.49", "wps": "396655", "ups": "3.35", "wpb": "118287", "bsz": "256", "num_updates": "380200", "lr": "6.26061e-05", "gnorm": "1.001", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "113719"} +[2022-07-31 18:25:41,466][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 18:26:10,413][train_inner][INFO] - {"epoch": 8, "update": 7.391, "loss": "2.158", "ppl": "4.46", "wps": "394854", "ups": "3.33", "wpb": "118651", "bsz": "256", "num_updates": "380400", "lr": "6.25859e-05", "gnorm": "0.997", "loss_scale": "8", "train_wall": "60", "gb_free": "22.1", "wall": "113779"} +[2022-07-31 18:27:10,079][train_inner][INFO] - {"epoch": 8, "update": 7.395, "loss": "2.164", "ppl": "4.48", "wps": "397129", "ups": "3.35", "wpb": "118475", "bsz": "256", "num_updates": "380600", "lr": "6.25657e-05", "gnorm": "1.002", "loss_scale": "8", "train_wall": "59", "gb_free": "24.4", "wall": "113839"} +[2022-07-31 18:28:09,569][train_inner][INFO] - {"epoch": 8, "update": 7.399, "loss": "2.162", "ppl": "4.48", "wps": "397862", "ups": "3.36", "wpb": "118344", "bsz": "256", "num_updates": "380800", "lr": "6.25455e-05", "gnorm": "1.002", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "113898"} +[2022-07-31 18:29:08,823][train_inner][INFO] - {"epoch": 8, "update": 7.403, "loss": "2.165", "ppl": "4.48", "wps": "398359", "ups": "3.38", "wpb": "118021", "bsz": "256", "num_updates": "381000", "lr": "6.25253e-05", "gnorm": "1.002", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "113957"} +[2022-07-31 18:30:08,287][train_inner][INFO] - {"epoch": 8, "update": 7.406, "loss": "2.164", "ppl": "4.48", "wps": "396326", "ups": "3.36", "wpb": "117836", "bsz": "256", "num_updates": "381200", "lr": "6.25051e-05", "gnorm": "1.001", "loss_scale": "8", "train_wall": "59", "gb_free": "24", "wall": "114017"} +[2022-07-31 18:31:07,646][train_inner][INFO] - {"epoch": 8, "update": 7.41, "loss": "2.165", "ppl": "4.48", "wps": "398471", "ups": "3.37", "wpb": "118263", "bsz": "256", "num_updates": "381400", "lr": "6.24848e-05", "gnorm": "1.005", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "114076"} +[2022-07-31 18:32:07,291][train_inner][INFO] - {"epoch": 8, "update": 7.414, "loss": "2.161", "ppl": "4.47", "wps": "398160", "ups": "3.35", "wpb": "118740", "bsz": "256", "num_updates": "381600", "lr": "6.24646e-05", "gnorm": "0.999", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "114136"} +[2022-07-31 18:33:06,771][train_inner][INFO] - {"epoch": 8, "update": 7.418, "loss": "2.167", "ppl": "4.49", "wps": "398215", "ups": "3.36", "wpb": "118428", "bsz": "256", "num_updates": "381800", "lr": "6.24444e-05", "gnorm": "1.001", "loss_scale": "8", "train_wall": "59", "gb_free": "29.6", "wall": "114195"} +[2022-07-31 18:34:06,286][train_inner][INFO] - {"epoch": 8, "update": 7.422, "loss": "2.167", "ppl": "4.49", "wps": "397534", "ups": "3.36", "wpb": "118297", "bsz": "256", "num_updates": "382000", "lr": "6.24242e-05", "gnorm": "1.009", "loss_scale": "8", "train_wall": "59", "gb_free": "26.2", "wall": "114255"} +[2022-07-31 18:35:05,950][train_inner][INFO] - {"epoch": 8, "update": 7.426, "loss": "2.168", "ppl": "4.49", "wps": "397661", "ups": "3.35", "wpb": "118629", "bsz": "256", "num_updates": "382200", "lr": "6.2404e-05", "gnorm": "1.003", "loss_scale": "8", "train_wall": "59", "gb_free": "25.7", "wall": "114314"} +[2022-07-31 18:36:05,304][train_inner][INFO] - {"epoch": 8, "update": 7.43, "loss": "2.163", "ppl": "4.48", "wps": "398407", "ups": "3.37", "wpb": "118234", "bsz": "256", "num_updates": "382400", "lr": "6.23838e-05", "gnorm": "1.004", "loss_scale": "16", "train_wall": "59", "gb_free": "29.2", "wall": "114374"} +[2022-07-31 18:37:05,188][train_inner][INFO] - {"epoch": 8, "update": 7.434, "loss": "2.155", "ppl": "4.45", "wps": "398314", "ups": "3.34", "wpb": "119262", "bsz": "256", "num_updates": "382600", "lr": "6.23636e-05", "gnorm": "0.994", "loss_scale": "16", "train_wall": "60", "gb_free": "24.1", "wall": "114434"} +[2022-07-31 18:38:04,838][train_inner][INFO] - {"epoch": 8, "update": 7.437, "loss": "2.164", "ppl": "4.48", "wps": "395941", "ups": "3.35", "wpb": "118088", "bsz": "256", "num_updates": "382800", "lr": "6.23434e-05", "gnorm": "1", "loss_scale": "16", "train_wall": "59", "gb_free": "25", "wall": "114493"} +[2022-07-31 18:39:04,522][train_inner][INFO] - {"epoch": 8, "update": 7.441, "loss": "2.161", "ppl": "4.47", "wps": "395954", "ups": "3.35", "wpb": "118159", "bsz": "256", "num_updates": "383000", "lr": "6.23232e-05", "gnorm": "1.002", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "114553"} +[2022-07-31 18:40:04,424][train_inner][INFO] - {"epoch": 8, "update": 7.445, "loss": "2.153", "ppl": "4.45", "wps": "396264", "ups": "3.34", "wpb": "118686", "bsz": "256", "num_updates": "383200", "lr": "6.2303e-05", "gnorm": "0.995", "loss_scale": "16", "train_wall": "60", "gb_free": "22", "wall": "114613"} +[2022-07-31 18:41:03,993][train_inner][INFO] - {"epoch": 8, "update": 7.449, "loss": "2.163", "ppl": "4.48", "wps": "398630", "ups": "3.36", "wpb": "118728", "bsz": "256", "num_updates": "383400", "lr": "6.22828e-05", "gnorm": "0.999", "loss_scale": "16", "train_wall": "59", "gb_free": "27.3", "wall": "114672"} +[2022-07-31 18:42:03,174][train_inner][INFO] - {"epoch": 8, "update": 7.453, "loss": "2.165", "ppl": "4.49", "wps": "397221", "ups": "3.38", "wpb": "117538", "bsz": "256", "num_updates": "383600", "lr": "6.22626e-05", "gnorm": "1.005", "loss_scale": "16", "train_wall": "59", "gb_free": "23", "wall": "114732"} +[2022-07-31 18:43:02,901][train_inner][INFO] - {"epoch": 8, "update": 7.457, "loss": "2.167", "ppl": "4.49", "wps": "395796", "ups": "3.35", "wpb": "118199", "bsz": "256", "num_updates": "383800", "lr": "6.22424e-05", "gnorm": "1.003", "loss_scale": "16", "train_wall": "59", "gb_free": "23.7", "wall": "114791"} +[2022-07-31 18:44:02,336][train_inner][INFO] - {"epoch": 8, "update": 7.461, "loss": "2.156", "ppl": "4.46", "wps": "397364", "ups": "3.37", "wpb": "118085", "bsz": "256", "num_updates": "384000", "lr": "6.22222e-05", "gnorm": "1", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "114851"} +[2022-07-31 18:45:01,533][train_inner][INFO] - {"epoch": 8, "update": 7.465, "loss": "2.162", "ppl": "4.47", "wps": "399480", "ups": "3.38", "wpb": "118238", "bsz": "256", "num_updates": "384200", "lr": "6.2202e-05", "gnorm": "1.003", "loss_scale": "16", "train_wall": "59", "gb_free": "25.2", "wall": "114910"} +[2022-07-31 18:46:00,654][train_inner][INFO] - {"epoch": 8, "update": 7.469, "loss": "2.159", "ppl": "4.46", "wps": "400789", "ups": "3.38", "wpb": "118476", "bsz": "256", "num_updates": "384400", "lr": "6.21818e-05", "gnorm": "1.001", "loss_scale": "32", "train_wall": "59", "gb_free": "25.4", "wall": "114969"} +[2022-07-31 18:47:00,316][train_inner][INFO] - {"epoch": 8, "update": 7.472, "loss": "2.166", "ppl": "4.49", "wps": "395599", "ups": "3.35", "wpb": "118010", "bsz": "256", "num_updates": "384600", "lr": "6.21616e-05", "gnorm": "1.009", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "115029"} +[2022-07-31 18:47:59,934][train_inner][INFO] - {"epoch": 8, "update": 7.476, "loss": "2.163", "ppl": "4.48", "wps": "396497", "ups": "3.35", "wpb": "118190", "bsz": "256", "num_updates": "384800", "lr": "6.21414e-05", "gnorm": "1.005", "loss_scale": "32", "train_wall": "59", "gb_free": "24.5", "wall": "115088"} +[2022-07-31 18:48:59,660][train_inner][INFO] - {"epoch": 8, "update": 7.48, "loss": "2.168", "ppl": "4.49", "wps": "397252", "ups": "3.35", "wpb": "118630", "bsz": "256", "num_updates": "385000", "lr": "6.21212e-05", "gnorm": "1.001", "loss_scale": "32", "train_wall": "59", "gb_free": "24.9", "wall": "115148"} +[2022-07-31 18:49:58,981][train_inner][INFO] - {"epoch": 8, "update": 7.484, "loss": "2.164", "ppl": "4.48", "wps": "397484", "ups": "3.37", "wpb": "117894", "bsz": "256", "num_updates": "385200", "lr": "6.2101e-05", "gnorm": "1.007", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "115207"} +[2022-07-31 18:50:58,220][train_inner][INFO] - {"epoch": 8, "update": 7.488, "loss": "2.162", "ppl": "4.48", "wps": "397205", "ups": "3.38", "wpb": "117648", "bsz": "256", "num_updates": "385400", "lr": "6.20808e-05", "gnorm": "1.004", "loss_scale": "32", "train_wall": "59", "gb_free": "29.3", "wall": "115267"} +[2022-07-31 18:51:57,611][train_inner][INFO] - {"epoch": 8, "update": 7.492, "loss": "2.154", "ppl": "4.45", "wps": "398898", "ups": "3.37", "wpb": "118454", "bsz": "256", "num_updates": "385600", "lr": "6.20606e-05", "gnorm": "1.006", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "115326"} +[2022-07-31 18:52:57,078][train_inner][INFO] - {"epoch": 8, "update": 7.496, "loss": "2.16", "ppl": "4.47", "wps": "398477", "ups": "3.36", "wpb": "118481", "bsz": "256", "num_updates": "385800", "lr": "6.20404e-05", "gnorm": "1.001", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "115386"} +[2022-07-31 18:53:56,506][train_inner][INFO] - {"epoch": 8, "update": 7.5, "loss": "2.156", "ppl": "4.46", "wps": "399654", "ups": "3.37", "wpb": "118752", "bsz": "256", "num_updates": "386000", "lr": "6.20202e-05", "gnorm": "0.998", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "115445"} +[2022-07-31 18:54:56,064][train_inner][INFO] - {"epoch": 8, "update": 7.504, "loss": "2.155", "ppl": "4.45", "wps": "397228", "ups": "3.36", "wpb": "118290", "bsz": "256", "num_updates": "386200", "lr": "6.2e-05", "gnorm": "1.001", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "115504"} +[2022-07-31 18:55:55,485][train_inner][INFO] - {"epoch": 8, "update": 7.507, "loss": "2.163", "ppl": "4.48", "wps": "398502", "ups": "3.37", "wpb": "118395", "bsz": "256", "num_updates": "386400", "lr": "6.19798e-05", "gnorm": "1.003", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "115564"} +[2022-07-31 18:56:54,791][train_inner][INFO] - {"epoch": 8, "update": 7.511, "loss": "2.158", "ppl": "4.46", "wps": "399456", "ups": "3.37", "wpb": "118450", "bsz": "256", "num_updates": "386600", "lr": "6.19596e-05", "gnorm": "1", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "115623"} +[2022-07-31 18:57:54,568][train_inner][INFO] - {"epoch": 8, "update": 7.515, "loss": "2.159", "ppl": "4.47", "wps": "396603", "ups": "3.35", "wpb": "118537", "bsz": "256", "num_updates": "386800", "lr": "6.19394e-05", "gnorm": "1.001", "loss_scale": "64", "train_wall": "59", "gb_free": "24.4", "wall": "115683"} +[2022-07-31 18:58:53,785][train_inner][INFO] - {"epoch": 8, "update": 7.519, "loss": "2.16", "ppl": "4.47", "wps": "399247", "ups": "3.38", "wpb": "118211", "bsz": "256", "num_updates": "387000", "lr": "6.19192e-05", "gnorm": "1.002", "loss_scale": "64", "train_wall": "59", "gb_free": "21.5", "wall": "115742"} +[2022-07-31 18:59:28,400][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 18:59:53,633][train_inner][INFO] - {"epoch": 8, "update": 7.523, "loss": "2.156", "ppl": "4.46", "wps": "396383", "ups": "3.34", "wpb": "118613", "bsz": "256", "num_updates": "387200", "lr": "6.1899e-05", "gnorm": "1.004", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "115802"} +[2022-07-31 19:00:52,968][train_inner][INFO] - {"epoch": 8, "update": 7.527, "loss": "2.162", "ppl": "4.48", "wps": "399016", "ups": "3.37", "wpb": "118377", "bsz": "256", "num_updates": "387400", "lr": "6.18788e-05", "gnorm": "1.002", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "115861"} +[2022-07-31 19:01:52,457][train_inner][INFO] - {"epoch": 8, "update": 7.531, "loss": "2.163", "ppl": "4.48", "wps": "396321", "ups": "3.36", "wpb": "117883", "bsz": "256", "num_updates": "387600", "lr": "6.18586e-05", "gnorm": "1.004", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "115921"} +[2022-07-31 19:02:51,777][train_inner][INFO] - {"epoch": 8, "update": 7.535, "loss": "2.161", "ppl": "4.47", "wps": "398344", "ups": "3.37", "wpb": "118149", "bsz": "256", "num_updates": "387800", "lr": "6.18384e-05", "gnorm": "1.002", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "115980"} +[2022-07-31 19:03:51,454][train_inner][INFO] - {"epoch": 8, "update": 7.538, "loss": "2.16", "ppl": "4.47", "wps": "396204", "ups": "3.35", "wpb": "118219", "bsz": "256", "num_updates": "388000", "lr": "6.18182e-05", "gnorm": "1.004", "loss_scale": "32", "train_wall": "59", "gb_free": "24.4", "wall": "116040"} +[2022-07-31 19:04:40,780][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 19:04:51,091][train_inner][INFO] - {"epoch": 8, "update": 7.542, "loss": "2.159", "ppl": "4.47", "wps": "394720", "ups": "3.35", "wpb": "117700", "bsz": "256", "num_updates": "388200", "lr": "6.1798e-05", "gnorm": "1.005", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "116100"} +[2022-07-31 19:05:51,571][train_inner][INFO] - {"epoch": 8, "update": 7.546, "loss": "2.157", "ppl": "4.46", "wps": "390936", "ups": "3.31", "wpb": "118218", "bsz": "256", "num_updates": "388400", "lr": "6.17778e-05", "gnorm": "1.004", "loss_scale": "16", "train_wall": "60", "gb_free": "25.2", "wall": "116160"} +[2022-07-31 19:06:51,315][train_inner][INFO] - {"epoch": 8, "update": 7.55, "loss": "2.154", "ppl": "4.45", "wps": "395908", "ups": "3.35", "wpb": "118263", "bsz": "256", "num_updates": "388600", "lr": "6.17576e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "116220"} +[2022-07-31 19:07:50,573][train_inner][INFO] - {"epoch": 8, "update": 7.554, "loss": "2.157", "ppl": "4.46", "wps": "399134", "ups": "3.38", "wpb": "118258", "bsz": "256", "num_updates": "388800", "lr": "6.17374e-05", "gnorm": "1.003", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "116279"} +[2022-07-31 19:08:49,968][train_inner][INFO] - {"epoch": 8, "update": 7.558, "loss": "2.161", "ppl": "4.47", "wps": "398586", "ups": "3.37", "wpb": "118371", "bsz": "256", "num_updates": "389000", "lr": "6.17172e-05", "gnorm": "1.005", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "116338"} +[2022-07-31 19:09:49,343][train_inner][INFO] - {"epoch": 8, "update": 7.562, "loss": "2.162", "ppl": "4.48", "wps": "397870", "ups": "3.37", "wpb": "118116", "bsz": "256", "num_updates": "389200", "lr": "6.1697e-05", "gnorm": "1.006", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "116398"} +[2022-07-31 19:10:42,809][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 19:10:49,226][train_inner][INFO] - {"epoch": 8, "update": 7.566, "loss": "2.163", "ppl": "4.48", "wps": "394671", "ups": "3.34", "wpb": "118169", "bsz": "256", "num_updates": "389400", "lr": "6.16768e-05", "gnorm": "1.005", "loss_scale": "8", "train_wall": "60", "gb_free": "27.3", "wall": "116458"} +[2022-07-31 19:11:48,950][train_inner][INFO] - {"epoch": 8, "update": 7.57, "loss": "2.159", "ppl": "4.47", "wps": "395721", "ups": "3.35", "wpb": "118170", "bsz": "256", "num_updates": "389600", "lr": "6.16566e-05", "gnorm": "1.006", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "116517"} +[2022-07-31 19:12:48,138][train_inner][INFO] - {"epoch": 8, "update": 7.573, "loss": "2.154", "ppl": "4.45", "wps": "401350", "ups": "3.38", "wpb": "118774", "bsz": "256", "num_updates": "389800", "lr": "6.16364e-05", "gnorm": "1", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "116577"} +[2022-07-31 19:13:47,844][train_inner][INFO] - {"epoch": 8, "update": 7.577, "loss": "2.165", "ppl": "4.49", "wps": "396185", "ups": "3.35", "wpb": "118273", "bsz": "256", "num_updates": "390000", "lr": "6.16162e-05", "gnorm": "1.007", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "116636"} +[2022-07-31 19:14:47,148][train_inner][INFO] - {"epoch": 8, "update": 7.581, "loss": "2.156", "ppl": "4.46", "wps": "397698", "ups": "3.37", "wpb": "117923", "bsz": "256", "num_updates": "390200", "lr": "6.1596e-05", "gnorm": "1.003", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "116696"} +[2022-07-31 19:15:46,790][train_inner][INFO] - {"epoch": 8, "update": 7.585, "loss": "2.15", "ppl": "4.44", "wps": "397261", "ups": "3.35", "wpb": "118468", "bsz": "256", "num_updates": "390400", "lr": "6.15758e-05", "gnorm": "1.001", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "116755"} +[2022-07-31 19:16:46,295][train_inner][INFO] - {"epoch": 8, "update": 7.589, "loss": "2.156", "ppl": "4.46", "wps": "398443", "ups": "3.36", "wpb": "118545", "bsz": "256", "num_updates": "390600", "lr": "6.15556e-05", "gnorm": "1.002", "loss_scale": "8", "train_wall": "59", "gb_free": "26.6", "wall": "116815"} +[2022-07-31 19:17:46,095][train_inner][INFO] - {"epoch": 8, "update": 7.593, "loss": "2.158", "ppl": "4.46", "wps": "396590", "ups": "3.34", "wpb": "118579", "bsz": "256", "num_updates": "390800", "lr": "6.15354e-05", "gnorm": "1.001", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "116875"} +[2022-07-31 19:18:46,016][train_inner][INFO] - {"epoch": 8, "update": 7.597, "loss": "2.164", "ppl": "4.48", "wps": "394786", "ups": "3.34", "wpb": "118279", "bsz": "256", "num_updates": "391000", "lr": "6.15152e-05", "gnorm": "1.034", "loss_scale": "8", "train_wall": "60", "gb_free": "26.7", "wall": "116934"} +[2022-07-31 19:19:45,697][train_inner][INFO] - {"epoch": 8, "update": 7.601, "loss": "2.158", "ppl": "4.46", "wps": "396538", "ups": "3.35", "wpb": "118329", "bsz": "256", "num_updates": "391200", "lr": "6.14949e-05", "gnorm": "1.008", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "116994"} +[2022-07-31 19:20:45,070][train_inner][INFO] - {"epoch": 8, "update": 7.605, "loss": "2.16", "ppl": "4.47", "wps": "400308", "ups": "3.37", "wpb": "118836", "bsz": "256", "num_updates": "391400", "lr": "6.14747e-05", "gnorm": "1.002", "loss_scale": "8", "train_wall": "59", "gb_free": "25.2", "wall": "117053"} +[2022-07-31 19:21:44,875][train_inner][INFO] - {"epoch": 8, "update": 7.608, "loss": "2.16", "ppl": "4.47", "wps": "396842", "ups": "3.34", "wpb": "118664", "bsz": "256", "num_updates": "391600", "lr": "6.14545e-05", "gnorm": "1.002", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "117113"} +[2022-07-31 19:22:44,372][train_inner][INFO] - {"epoch": 8, "update": 7.612, "loss": "2.158", "ppl": "4.46", "wps": "400219", "ups": "3.36", "wpb": "119059", "bsz": "256", "num_updates": "391800", "lr": "6.14343e-05", "gnorm": "1.005", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "117173"} +[2022-07-31 19:23:43,590][train_inner][INFO] - {"epoch": 8, "update": 7.616, "loss": "2.164", "ppl": "4.48", "wps": "399692", "ups": "3.38", "wpb": "118343", "bsz": "256", "num_updates": "392000", "lr": "6.14141e-05", "gnorm": "1.007", "loss_scale": "16", "train_wall": "59", "gb_free": "22.7", "wall": "117232"} +[2022-07-31 19:24:42,510][train_inner][INFO] - {"epoch": 8, "update": 7.62, "loss": "2.162", "ppl": "4.47", "wps": "399698", "ups": "3.39", "wpb": "117751", "bsz": "256", "num_updates": "392200", "lr": "6.13939e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "24.8", "wall": "117291"} +[2022-07-31 19:25:41,438][train_inner][INFO] - {"epoch": 8, "update": 7.624, "loss": "2.162", "ppl": "4.48", "wps": "400600", "ups": "3.39", "wpb": "118031", "bsz": "256", "num_updates": "392400", "lr": "6.13737e-05", "gnorm": "1.007", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "117350"} +[2022-07-31 19:26:40,936][train_inner][INFO] - {"epoch": 8, "update": 7.628, "loss": "2.16", "ppl": "4.47", "wps": "398143", "ups": "3.36", "wpb": "118442", "bsz": "256", "num_updates": "392600", "lr": "6.13535e-05", "gnorm": "1.01", "loss_scale": "16", "train_wall": "59", "gb_free": "24.5", "wall": "117409"} +[2022-07-31 19:27:22,710][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 19:27:40,694][train_inner][INFO] - {"epoch": 8, "update": 7.632, "loss": "2.16", "ppl": "4.47", "wps": "397166", "ups": "3.35", "wpb": "118668", "bsz": "256", "num_updates": "392800", "lr": "6.13333e-05", "gnorm": "1.018", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "117469"} +[2022-07-31 19:28:07,460][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-07-31 19:28:40,398][train_inner][INFO] - {"epoch": 8, "update": 7.636, "loss": "2.153", "ppl": "4.45", "wps": "396093", "ups": "3.35", "wpb": "118242", "bsz": "256", "num_updates": "393000", "lr": "6.13131e-05", "gnorm": "1.006", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "117529"} +[2022-07-31 19:29:40,039][train_inner][INFO] - {"epoch": 8, "update": 7.64, "loss": "2.161", "ppl": "4.47", "wps": "398341", "ups": "3.35", "wpb": "118785", "bsz": "256", "num_updates": "393200", "lr": "6.12929e-05", "gnorm": "1.007", "loss_scale": "4", "train_wall": "59", "gb_free": "26.4", "wall": "117588"} +[2022-07-31 19:30:39,745][train_inner][INFO] - {"epoch": 8, "update": 7.643, "loss": "2.153", "ppl": "4.45", "wps": "397407", "ups": "3.35", "wpb": "118638", "bsz": "256", "num_updates": "393400", "lr": "6.12727e-05", "gnorm": "1.009", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "117648"} +[2022-07-31 19:31:39,365][train_inner][INFO] - {"epoch": 8, "update": 7.647, "loss": "2.155", "ppl": "4.45", "wps": "397238", "ups": "3.35", "wpb": "118414", "bsz": "256", "num_updates": "393600", "lr": "6.12525e-05", "gnorm": "1.02", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "117708"} +[2022-07-31 19:32:38,496][train_inner][INFO] - {"epoch": 8, "update": 7.651, "loss": "2.161", "ppl": "4.47", "wps": "398264", "ups": "3.38", "wpb": "117748", "bsz": "256", "num_updates": "393800", "lr": "6.12323e-05", "gnorm": "1.009", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "117767"} +[2022-07-31 19:33:37,763][train_inner][INFO] - {"epoch": 8, "update": 7.655, "loss": "2.161", "ppl": "4.47", "wps": "400359", "ups": "3.37", "wpb": "118641", "bsz": "256", "num_updates": "394000", "lr": "6.12121e-05", "gnorm": "1.008", "loss_scale": "4", "train_wall": "59", "gb_free": "27.8", "wall": "117826"} +[2022-07-31 19:34:37,431][train_inner][INFO] - {"epoch": 8, "update": 7.659, "loss": "2.157", "ppl": "4.46", "wps": "395961", "ups": "3.35", "wpb": "118130", "bsz": "256", "num_updates": "394200", "lr": "6.11919e-05", "gnorm": "1.008", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "117886"} +[2022-07-31 19:35:37,221][train_inner][INFO] - {"epoch": 8, "update": 7.663, "loss": "2.154", "ppl": "4.45", "wps": "396377", "ups": "3.35", "wpb": "118495", "bsz": "256", "num_updates": "394400", "lr": "6.11717e-05", "gnorm": "1.005", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "117946"} +[2022-07-31 19:36:36,791][train_inner][INFO] - {"epoch": 8, "update": 7.667, "loss": "2.157", "ppl": "4.46", "wps": "397711", "ups": "3.36", "wpb": "118458", "bsz": "256", "num_updates": "394600", "lr": "6.11515e-05", "gnorm": "1.011", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "118005"} +[2022-07-31 19:37:36,212][train_inner][INFO] - {"epoch": 8, "update": 7.671, "loss": "2.148", "ppl": "4.43", "wps": "399659", "ups": "3.37", "wpb": "118738", "bsz": "256", "num_updates": "394800", "lr": "6.11313e-05", "gnorm": "1.004", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "118065"} +[2022-07-31 19:38:35,508][train_inner][INFO] - {"epoch": 8, "update": 7.675, "loss": "2.157", "ppl": "4.46", "wps": "399238", "ups": "3.37", "wpb": "118366", "bsz": "256", "num_updates": "395000", "lr": "6.11111e-05", "gnorm": "1.006", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "118124"} +[2022-07-31 19:39:34,951][train_inner][INFO] - {"epoch": 8, "update": 7.678, "loss": "2.158", "ppl": "4.46", "wps": "397956", "ups": "3.36", "wpb": "118278", "bsz": "256", "num_updates": "395200", "lr": "6.10909e-05", "gnorm": "1.005", "loss_scale": "8", "train_wall": "59", "gb_free": "26.5", "wall": "118183"} +[2022-07-31 19:40:34,493][train_inner][INFO] - {"epoch": 8, "update": 7.682, "loss": "2.154", "ppl": "4.45", "wps": "397397", "ups": "3.36", "wpb": "118309", "bsz": "256", "num_updates": "395400", "lr": "6.10707e-05", "gnorm": "1.009", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "118243"} +[2022-07-31 19:41:33,685][train_inner][INFO] - {"epoch": 8, "update": 7.686, "loss": "2.155", "ppl": "4.45", "wps": "400696", "ups": "3.38", "wpb": "118589", "bsz": "256", "num_updates": "395600", "lr": "6.10505e-05", "gnorm": "1.003", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "118302"} +[2022-07-31 19:42:33,120][train_inner][INFO] - {"epoch": 8, "update": 7.69, "loss": "2.153", "ppl": "4.45", "wps": "400035", "ups": "3.37", "wpb": "118878", "bsz": "256", "num_updates": "395800", "lr": "6.10303e-05", "gnorm": "1.003", "loss_scale": "8", "train_wall": "59", "gb_free": "22.7", "wall": "118362"} +[2022-07-31 19:43:32,425][train_inner][INFO] - {"epoch": 8, "update": 7.694, "loss": "2.16", "ppl": "4.47", "wps": "398484", "ups": "3.37", "wpb": "118160", "bsz": "256", "num_updates": "396000", "lr": "6.10101e-05", "gnorm": "1.009", "loss_scale": "8", "train_wall": "59", "gb_free": "24.3", "wall": "118421"} +[2022-07-31 19:44:32,103][train_inner][INFO] - {"epoch": 8, "update": 7.698, "loss": "2.162", "ppl": "4.48", "wps": "396293", "ups": "3.35", "wpb": "118248", "bsz": "256", "num_updates": "396200", "lr": "6.09899e-05", "gnorm": "1.008", "loss_scale": "8", "train_wall": "59", "gb_free": "24.4", "wall": "118481"} +[2022-07-31 19:45:32,926][train_inner][INFO] - {"epoch": 8, "update": 7.702, "loss": "2.152", "ppl": "4.44", "wps": "389778", "ups": "3.29", "wpb": "118536", "bsz": "256", "num_updates": "396400", "lr": "6.09697e-05", "gnorm": "1.007", "loss_scale": "8", "train_wall": "60", "gb_free": "24.2", "wall": "118541"} +[2022-07-31 19:46:32,836][train_inner][INFO] - {"epoch": 8, "update": 7.706, "loss": "2.155", "ppl": "4.45", "wps": "395764", "ups": "3.34", "wpb": "118550", "bsz": "256", "num_updates": "396600", "lr": "6.09495e-05", "gnorm": "1.004", "loss_scale": "8", "train_wall": "60", "gb_free": "21.4", "wall": "118601"} +[2022-07-31 19:47:32,413][train_inner][INFO] - {"epoch": 8, "update": 7.709, "loss": "2.152", "ppl": "4.45", "wps": "397776", "ups": "3.36", "wpb": "118492", "bsz": "256", "num_updates": "396800", "lr": "6.09293e-05", "gnorm": "1.007", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "118661"} +[2022-07-31 19:48:31,783][train_inner][INFO] - {"epoch": 8, "update": 7.713, "loss": "2.155", "ppl": "4.45", "wps": "397171", "ups": "3.37", "wpb": "117899", "bsz": "256", "num_updates": "397000", "lr": "6.09091e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "118720"} +[2022-07-31 19:49:31,021][train_inner][INFO] - {"epoch": 8, "update": 7.717, "loss": "2.159", "ppl": "4.46", "wps": "397743", "ups": "3.38", "wpb": "117808", "bsz": "256", "num_updates": "397200", "lr": "6.08889e-05", "gnorm": "1.012", "loss_scale": "16", "train_wall": "59", "gb_free": "25", "wall": "118779"} +[2022-07-31 19:50:30,494][train_inner][INFO] - {"epoch": 8, "update": 7.721, "loss": "2.158", "ppl": "4.46", "wps": "397718", "ups": "3.36", "wpb": "118265", "bsz": "256", "num_updates": "397400", "lr": "6.08687e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "118839"} +[2022-07-31 19:51:29,783][train_inner][INFO] - {"epoch": 8, "update": 7.725, "loss": "2.152", "ppl": "4.44", "wps": "399515", "ups": "3.37", "wpb": "118434", "bsz": "256", "num_updates": "397600", "lr": "6.08485e-05", "gnorm": "1.005", "loss_scale": "16", "train_wall": "59", "gb_free": "27.9", "wall": "118898"} +[2022-07-31 19:52:29,065][train_inner][INFO] - {"epoch": 8, "update": 7.729, "loss": "2.154", "ppl": "4.45", "wps": "399113", "ups": "3.37", "wpb": "118301", "bsz": "256", "num_updates": "397800", "lr": "6.08283e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "118957"} +[2022-07-31 19:53:28,508][train_inner][INFO] - {"epoch": 8, "update": 7.733, "loss": "2.155", "ppl": "4.45", "wps": "398994", "ups": "3.36", "wpb": "118586", "bsz": "256", "num_updates": "398000", "lr": "6.08081e-05", "gnorm": "1.005", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "119017"} +[2022-07-31 19:54:27,914][train_inner][INFO] - {"epoch": 8, "update": 7.737, "loss": "2.152", "ppl": "4.44", "wps": "398315", "ups": "3.37", "wpb": "118309", "bsz": "256", "num_updates": "398200", "lr": "6.07879e-05", "gnorm": "1.007", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "119076"} +[2022-07-31 19:55:28,284][train_inner][INFO] - {"epoch": 8, "update": 7.741, "loss": "2.151", "ppl": "4.44", "wps": "392264", "ups": "3.31", "wpb": "118406", "bsz": "256", "num_updates": "398400", "lr": "6.07677e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "60", "gb_free": "22.6", "wall": "119137"} +[2022-07-31 19:56:27,647][train_inner][INFO] - {"epoch": 8, "update": 7.744, "loss": "2.154", "ppl": "4.45", "wps": "398493", "ups": "3.37", "wpb": "118277", "bsz": "256", "num_updates": "398600", "lr": "6.07475e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "119196"} +[2022-07-31 19:57:26,949][train_inner][INFO] - {"epoch": 8, "update": 7.748, "loss": "2.155", "ppl": "4.46", "wps": "397150", "ups": "3.37", "wpb": "117757", "bsz": "256", "num_updates": "398800", "lr": "6.07273e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "119255"} +[2022-07-31 19:58:26,401][train_inner][INFO] - {"epoch": 8, "update": 7.752, "loss": "2.156", "ppl": "4.46", "wps": "398116", "ups": "3.36", "wpb": "118344", "bsz": "256", "num_updates": "399000", "lr": "6.07071e-05", "gnorm": "1.007", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "119315"} +[2022-07-31 19:59:26,007][train_inner][INFO] - {"epoch": 8, "update": 7.756, "loss": "2.154", "ppl": "4.45", "wps": "397396", "ups": "3.36", "wpb": "118434", "bsz": "256", "num_updates": "399200", "lr": "6.06869e-05", "gnorm": "1.006", "loss_scale": "32", "train_wall": "59", "gb_free": "27.4", "wall": "119374"} +[2022-07-31 20:00:26,708][train_inner][INFO] - {"epoch": 8, "update": 7.76, "loss": "2.156", "ppl": "4.46", "wps": "390945", "ups": "3.29", "wpb": "118654", "bsz": "256", "num_updates": "399400", "lr": "6.06667e-05", "gnorm": "1.006", "loss_scale": "32", "train_wall": "60", "gb_free": "25.5", "wall": "119435"} +[2022-07-31 20:00:50,836][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 20:01:26,486][train_inner][INFO] - {"epoch": 8, "update": 7.764, "loss": "2.149", "ppl": "4.44", "wps": "396196", "ups": "3.35", "wpb": "118417", "bsz": "256", "num_updates": "399600", "lr": "6.06465e-05", "gnorm": "1.007", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "119495"} +[2022-07-31 20:01:29,658][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 20:02:27,303][train_inner][INFO] - {"epoch": 8, "update": 7.768, "loss": "2.152", "ppl": "4.45", "wps": "388950", "ups": "3.29", "wpb": "118273", "bsz": "256", "num_updates": "399800", "lr": "6.06263e-05", "gnorm": "1.023", "loss_scale": "8", "train_wall": "60", "gb_free": "22", "wall": "119556"} +[2022-07-31 20:03:26,618][train_inner][INFO] - {"epoch": 8, "update": 7.772, "loss": "2.15", "ppl": "4.44", "wps": "399772", "ups": "3.37", "wpb": "118562", "bsz": "256", "num_updates": "400000", "lr": "6.06061e-05", "gnorm": "1.007", "loss_scale": "8", "train_wall": "59", "gb_free": "27.6", "wall": "119615"} +[2022-07-31 20:03:26,619][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-07-31 20:03:49,169][valid][INFO] - {"epoch": 8, "valid_loss": "2.04", "valid_ppl": "4.11", "valid_wps": "1.61878e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "400000", "valid_best_loss": "2.04"} +[2022-07-31 20:03:49,172][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 8 @ 400000 updates +[2022-07-31 20:03:49,173][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_8_400000.pt +[2022-07-31 20:03:56,224][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_8_400000.pt +[2022-07-31 20:04:21,476][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_8_400000.pt (epoch 8 @ 400000 updates, score 2.04) (writing took 32.30431077629328 seconds) +[2022-07-31 20:05:20,996][train_inner][INFO] - {"epoch": 8, "update": 7.776, "loss": "2.154", "ppl": "4.45", "wps": "206496", "ups": "1.75", "wpb": "118092", "bsz": "256", "num_updates": "400200", "lr": "6.05859e-05", "gnorm": "1.01", "loss_scale": "8", "train_wall": "59", "gb_free": "23.5", "wall": "119729"} +[2022-07-31 20:06:20,335][train_inner][INFO] - {"epoch": 8, "update": 7.779, "loss": "2.161", "ppl": "4.47", "wps": "396183", "ups": "3.37", "wpb": "117545", "bsz": "256", "num_updates": "400400", "lr": "6.05657e-05", "gnorm": "1.013", "loss_scale": "8", "train_wall": "59", "gb_free": "25.8", "wall": "119789"} +[2022-07-31 20:07:19,788][train_inner][INFO] - {"epoch": 8, "update": 7.783, "loss": "2.159", "ppl": "4.47", "wps": "396066", "ups": "3.36", "wpb": "117736", "bsz": "256", "num_updates": "400600", "lr": "6.05455e-05", "gnorm": "1.011", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "119848"} +[2022-07-31 20:08:19,368][train_inner][INFO] - {"epoch": 8, "update": 7.787, "loss": "2.15", "ppl": "4.44", "wps": "398070", "ups": "3.36", "wpb": "118585", "bsz": "256", "num_updates": "400800", "lr": "6.05253e-05", "gnorm": "1.006", "loss_scale": "8", "train_wall": "59", "gb_free": "24.4", "wall": "119908"} +[2022-07-31 20:09:18,967][train_inner][INFO] - {"epoch": 8, "update": 7.791, "loss": "2.147", "ppl": "4.43", "wps": "397140", "ups": "3.36", "wpb": "118346", "bsz": "256", "num_updates": "401000", "lr": "6.05051e-05", "gnorm": "1.007", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "119967"} +[2022-07-31 20:10:18,381][train_inner][INFO] - {"epoch": 8, "update": 7.795, "loss": "2.15", "ppl": "4.44", "wps": "398760", "ups": "3.37", "wpb": "118459", "bsz": "256", "num_updates": "401200", "lr": "6.04848e-05", "gnorm": "1.009", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "120027"} +[2022-07-31 20:11:17,762][train_inner][INFO] - {"epoch": 8, "update": 7.799, "loss": "2.153", "ppl": "4.45", "wps": "397826", "ups": "3.37", "wpb": "118115", "bsz": "256", "num_updates": "401400", "lr": "6.04646e-05", "gnorm": "1.009", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "120086"} +[2022-07-31 20:12:17,062][train_inner][INFO] - {"epoch": 8, "update": 7.803, "loss": "2.153", "ppl": "4.45", "wps": "399487", "ups": "3.37", "wpb": "118447", "bsz": "256", "num_updates": "401600", "lr": "6.04444e-05", "gnorm": "1.021", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "120145"} +[2022-07-31 20:13:16,709][train_inner][INFO] - {"epoch": 8, "update": 7.807, "loss": "2.149", "ppl": "4.43", "wps": "397838", "ups": "3.35", "wpb": "118648", "bsz": "256", "num_updates": "401800", "lr": "6.04242e-05", "gnorm": "1.01", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "120205"} +[2022-07-31 20:14:15,885][train_inner][INFO] - {"epoch": 8, "update": 7.811, "loss": "2.156", "ppl": "4.46", "wps": "400861", "ups": "3.38", "wpb": "118606", "bsz": "256", "num_updates": "402000", "lr": "6.0404e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "25.9", "wall": "120264"} +[2022-07-31 20:15:15,142][train_inner][INFO] - {"epoch": 8, "update": 7.814, "loss": "2.155", "ppl": "4.45", "wps": "398016", "ups": "3.38", "wpb": "117926", "bsz": "256", "num_updates": "402200", "lr": "6.03838e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "120324"} +[2022-07-31 20:16:14,647][train_inner][INFO] - {"epoch": 8, "update": 7.818, "loss": "2.153", "ppl": "4.45", "wps": "397254", "ups": "3.36", "wpb": "118193", "bsz": "256", "num_updates": "402400", "lr": "6.03636e-05", "gnorm": "1.008", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "120383"} +[2022-07-31 20:17:14,473][train_inner][INFO] - {"epoch": 8, "update": 7.822, "loss": "2.15", "ppl": "4.44", "wps": "396354", "ups": "3.34", "wpb": "118560", "bsz": "256", "num_updates": "402600", "lr": "6.03434e-05", "gnorm": "1.008", "loss_scale": "16", "train_wall": "60", "gb_free": "21.4", "wall": "120443"} +[2022-07-31 20:18:14,238][train_inner][INFO] - {"epoch": 8, "update": 7.826, "loss": "2.156", "ppl": "4.46", "wps": "395682", "ups": "3.35", "wpb": "118241", "bsz": "256", "num_updates": "402800", "lr": "6.03232e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "24.1", "wall": "120503"} +[2022-07-31 20:19:13,895][train_inner][INFO] - {"epoch": 8, "update": 7.83, "loss": "2.152", "ppl": "4.44", "wps": "397306", "ups": "3.35", "wpb": "118510", "bsz": "256", "num_updates": "403000", "lr": "6.0303e-05", "gnorm": "1.009", "loss_scale": "16", "train_wall": "59", "gb_free": "31.1", "wall": "120562"} +[2022-07-31 20:20:13,105][train_inner][INFO] - {"epoch": 8, "update": 7.834, "loss": "2.153", "ppl": "4.45", "wps": "399933", "ups": "3.38", "wpb": "118398", "bsz": "256", "num_updates": "403200", "lr": "6.02828e-05", "gnorm": "1.014", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "120622"} +[2022-07-31 20:21:12,692][train_inner][INFO] - {"epoch": 8, "update": 7.838, "loss": "2.155", "ppl": "4.45", "wps": "396000", "ups": "3.36", "wpb": "117981", "bsz": "256", "num_updates": "403400", "lr": "6.02626e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "120681"} +[2022-07-31 20:22:12,360][train_inner][INFO] - {"epoch": 8, "update": 7.842, "loss": "2.152", "ppl": "4.45", "wps": "396594", "ups": "3.35", "wpb": "118320", "bsz": "256", "num_updates": "403600", "lr": "6.02424e-05", "gnorm": "1.01", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "120741"} +[2022-07-31 20:23:11,804][train_inner][INFO] - {"epoch": 8, "update": 7.845, "loss": "2.15", "ppl": "4.44", "wps": "398055", "ups": "3.36", "wpb": "118309", "bsz": "256", "num_updates": "403800", "lr": "6.02222e-05", "gnorm": "1.008", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "120800"} +[2022-07-31 20:24:11,344][train_inner][INFO] - {"epoch": 8, "update": 7.849, "loss": "2.15", "ppl": "4.44", "wps": "397558", "ups": "3.36", "wpb": "118353", "bsz": "256", "num_updates": "404000", "lr": "6.0202e-05", "gnorm": "1.008", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "120860"} +[2022-07-31 20:25:10,512][train_inner][INFO] - {"epoch": 8, "update": 7.853, "loss": "2.144", "ppl": "4.42", "wps": "399974", "ups": "3.38", "wpb": "118327", "bsz": "256", "num_updates": "404200", "lr": "6.01818e-05", "gnorm": "1.009", "loss_scale": "32", "train_wall": "59", "gb_free": "23.7", "wall": "120919"} +[2022-07-31 20:26:10,319][train_inner][INFO] - {"epoch": 8, "update": 7.857, "loss": "2.152", "ppl": "4.45", "wps": "397568", "ups": "3.34", "wpb": "118886", "bsz": "256", "num_updates": "404400", "lr": "6.01616e-05", "gnorm": "1.006", "loss_scale": "32", "train_wall": "59", "gb_free": "22.5", "wall": "120979"} +[2022-07-31 20:27:10,104][train_inner][INFO] - {"epoch": 8, "update": 7.861, "loss": "2.147", "ppl": "4.43", "wps": "394770", "ups": "3.35", "wpb": "118005", "bsz": "256", "num_updates": "404600", "lr": "6.01414e-05", "gnorm": "1.011", "loss_scale": "32", "train_wall": "59", "gb_free": "22", "wall": "121039"} +[2022-07-31 20:28:09,890][train_inner][INFO] - {"epoch": 8, "update": 7.865, "loss": "2.152", "ppl": "4.45", "wps": "397176", "ups": "3.35", "wpb": "118727", "bsz": "256", "num_updates": "404800", "lr": "6.01212e-05", "gnorm": "1.01", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "121098"} +[2022-07-31 20:29:09,406][train_inner][INFO] - {"epoch": 8, "update": 7.869, "loss": "2.152", "ppl": "4.45", "wps": "398925", "ups": "3.36", "wpb": "118711", "bsz": "256", "num_updates": "405000", "lr": "6.0101e-05", "gnorm": "1.011", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "121158"} +[2022-07-31 20:30:09,047][train_inner][INFO] - {"epoch": 8, "update": 7.873, "loss": "2.146", "ppl": "4.42", "wps": "397410", "ups": "3.35", "wpb": "118509", "bsz": "256", "num_updates": "405200", "lr": "6.00808e-05", "gnorm": "1.008", "loss_scale": "32", "train_wall": "59", "gb_free": "25.8", "wall": "121217"} +[2022-07-31 20:30:10,838][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 20:31:09,203][train_inner][INFO] - {"epoch": 8, "update": 7.877, "loss": "2.147", "ppl": "4.43", "wps": "395380", "ups": "3.32", "wpb": "118922", "bsz": "256", "num_updates": "405400", "lr": "6.00606e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "60", "gb_free": "22.1", "wall": "121278"} +[2022-07-31 20:32:08,785][train_inner][INFO] - {"epoch": 8, "update": 7.88, "loss": "2.15", "ppl": "4.44", "wps": "397825", "ups": "3.36", "wpb": "118515", "bsz": "256", "num_updates": "405600", "lr": "6.00404e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "121337"} +[2022-07-31 20:33:08,212][train_inner][INFO] - {"epoch": 8, "update": 7.884, "loss": "2.146", "ppl": "4.43", "wps": "398311", "ups": "3.37", "wpb": "118352", "bsz": "256", "num_updates": "405800", "lr": "6.00202e-05", "gnorm": "1.01", "loss_scale": "16", "train_wall": "59", "gb_free": "26", "wall": "121397"} +[2022-07-31 20:34:07,226][train_inner][INFO] - {"epoch": 8, "update": 7.888, "loss": "2.154", "ppl": "4.45", "wps": "400571", "ups": "3.39", "wpb": "118196", "bsz": "256", "num_updates": "406000", "lr": "6e-05", "gnorm": "1.017", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "121456"} +[2022-07-31 20:35:06,588][train_inner][INFO] - {"epoch": 8, "update": 7.892, "loss": "2.156", "ppl": "4.46", "wps": "398437", "ups": "3.37", "wpb": "118258", "bsz": "255.9", "num_updates": "406200", "lr": "5.99798e-05", "gnorm": "1.013", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "121515"} +[2022-07-31 20:36:05,446][train_inner][INFO] - {"epoch": 8, "update": 7.896, "loss": "2.156", "ppl": "4.46", "wps": "401156", "ups": "3.4", "wpb": "118056", "bsz": "256", "num_updates": "406400", "lr": "5.99596e-05", "gnorm": "1.014", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "121574"} +[2022-07-31 20:37:04,962][train_inner][INFO] - {"epoch": 8, "update": 7.9, "loss": "2.144", "ppl": "4.42", "wps": "398296", "ups": "3.36", "wpb": "118524", "bsz": "256", "num_updates": "406600", "lr": "5.99394e-05", "gnorm": "1.012", "loss_scale": "16", "train_wall": "59", "gb_free": "27.7", "wall": "121633"} +[2022-07-31 20:38:04,250][train_inner][INFO] - {"epoch": 8, "update": 7.904, "loss": "2.152", "ppl": "4.44", "wps": "398136", "ups": "3.37", "wpb": "118024", "bsz": "256", "num_updates": "406800", "lr": "5.99192e-05", "gnorm": "1.018", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "121693"} +[2022-07-31 20:39:03,693][train_inner][INFO] - {"epoch": 8, "update": 7.908, "loss": "2.151", "ppl": "4.44", "wps": "398677", "ups": "3.36", "wpb": "118491", "bsz": "256", "num_updates": "407000", "lr": "5.9899e-05", "gnorm": "1.012", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "121752"} +[2022-07-31 20:40:03,177][train_inner][INFO] - {"epoch": 8, "update": 7.912, "loss": "2.147", "ppl": "4.43", "wps": "398213", "ups": "3.36", "wpb": "118435", "bsz": "256", "num_updates": "407200", "lr": "5.98788e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "23.9", "wall": "121812"} +[2022-07-31 20:41:02,555][train_inner][INFO] - {"epoch": 8, "update": 7.915, "loss": "2.148", "ppl": "4.43", "wps": "398680", "ups": "3.37", "wpb": "118364", "bsz": "256", "num_updates": "407400", "lr": "5.98586e-05", "gnorm": "1.011", "loss_scale": "32", "train_wall": "59", "gb_free": "23.4", "wall": "121871"} +[2022-07-31 20:42:01,981][train_inner][INFO] - {"epoch": 8, "update": 7.919, "loss": "2.153", "ppl": "4.45", "wps": "398116", "ups": "3.37", "wpb": "118291", "bsz": "256", "num_updates": "407600", "lr": "5.98384e-05", "gnorm": "1.017", "loss_scale": "32", "train_wall": "59", "gb_free": "24.6", "wall": "121930"} +[2022-07-31 20:42:23,484][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 20:43:01,706][train_inner][INFO] - {"epoch": 8, "update": 7.923, "loss": "2.146", "ppl": "4.43", "wps": "397223", "ups": "3.35", "wpb": "118620", "bsz": "256", "num_updates": "407800", "lr": "5.98182e-05", "gnorm": "1.01", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "121990"} +[2022-07-31 20:44:00,996][train_inner][INFO] - {"epoch": 8, "update": 7.927, "loss": "2.152", "ppl": "4.44", "wps": "398238", "ups": "3.37", "wpb": "118058", "bsz": "256", "num_updates": "408000", "lr": "5.9798e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "26.9", "wall": "122049"} +[2022-07-31 20:45:00,383][train_inner][INFO] - {"epoch": 8, "update": 7.931, "loss": "2.148", "ppl": "4.43", "wps": "397808", "ups": "3.37", "wpb": "118123", "bsz": "256", "num_updates": "408200", "lr": "5.97778e-05", "gnorm": "1.013", "loss_scale": "16", "train_wall": "59", "gb_free": "24.6", "wall": "122109"} +[2022-07-31 20:46:00,061][train_inner][INFO] - {"epoch": 8, "update": 7.935, "loss": "2.148", "ppl": "4.43", "wps": "396181", "ups": "3.35", "wpb": "118214", "bsz": "256", "num_updates": "408400", "lr": "5.97576e-05", "gnorm": "1.013", "loss_scale": "16", "train_wall": "59", "gb_free": "23.8", "wall": "122168"} +[2022-07-31 20:46:59,286][train_inner][INFO] - {"epoch": 8, "update": 7.939, "loss": "2.144", "ppl": "4.42", "wps": "398534", "ups": "3.38", "wpb": "118016", "bsz": "256", "num_updates": "408600", "lr": "5.97374e-05", "gnorm": "1.01", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "122228"} +[2022-07-31 20:47:58,892][train_inner][INFO] - {"epoch": 8, "update": 7.943, "loss": "2.149", "ppl": "4.44", "wps": "396443", "ups": "3.36", "wpb": "118152", "bsz": "256", "num_updates": "408800", "lr": "5.97172e-05", "gnorm": "1.013", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "122287"} +[2022-07-31 20:48:58,514][train_inner][INFO] - {"epoch": 8, "update": 7.947, "loss": "2.145", "ppl": "4.42", "wps": "397340", "ups": "3.35", "wpb": "118450", "bsz": "256", "num_updates": "409000", "lr": "5.9697e-05", "gnorm": "1.013", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "122347"} +[2022-07-31 20:49:57,838][train_inner][INFO] - {"epoch": 8, "update": 7.95, "loss": "2.151", "ppl": "4.44", "wps": "398320", "ups": "3.37", "wpb": "118148", "bsz": "256", "num_updates": "409200", "lr": "5.96768e-05", "gnorm": "1.014", "loss_scale": "16", "train_wall": "59", "gb_free": "25.3", "wall": "122406"} +[2022-07-31 20:50:57,340][train_inner][INFO] - {"epoch": 8, "update": 7.954, "loss": "2.148", "ppl": "4.43", "wps": "397216", "ups": "3.36", "wpb": "118175", "bsz": "256", "num_updates": "409400", "lr": "5.96566e-05", "gnorm": "1.012", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "122466"} +[2022-07-31 20:51:57,035][train_inner][INFO] - {"epoch": 8, "update": 7.958, "loss": "2.15", "ppl": "4.44", "wps": "394964", "ups": "3.35", "wpb": "117886", "bsz": "256", "num_updates": "409600", "lr": "5.96364e-05", "gnorm": "1.014", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "122525"} +[2022-07-31 20:52:56,612][train_inner][INFO] - {"epoch": 8, "update": 7.962, "loss": "2.144", "ppl": "4.42", "wps": "397556", "ups": "3.36", "wpb": "118425", "bsz": "256", "num_updates": "409800", "lr": "5.96162e-05", "gnorm": "1.012", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "122585"} +[2022-07-31 20:53:56,417][train_inner][INFO] - {"epoch": 8, "update": 7.966, "loss": "2.143", "ppl": "4.42", "wps": "398508", "ups": "3.34", "wpb": "119163", "bsz": "256", "num_updates": "410000", "lr": "5.9596e-05", "gnorm": "1.007", "loss_scale": "32", "train_wall": "59", "gb_free": "23.6", "wall": "122645"} +[2022-07-31 20:54:55,863][train_inner][INFO] - {"epoch": 8, "update": 7.97, "loss": "2.144", "ppl": "4.42", "wps": "398170", "ups": "3.36", "wpb": "118347", "bsz": "256", "num_updates": "410200", "lr": "5.95758e-05", "gnorm": "1.014", "loss_scale": "32", "train_wall": "59", "gb_free": "21.8", "wall": "122704"} +[2022-07-31 20:55:55,009][train_inner][INFO] - {"epoch": 8, "update": 7.974, "loss": "2.149", "ppl": "4.44", "wps": "396544", "ups": "3.38", "wpb": "117270", "bsz": "256", "num_updates": "410400", "lr": "5.95556e-05", "gnorm": "1.019", "loss_scale": "32", "train_wall": "59", "gb_free": "26.8", "wall": "122763"} +[2022-07-31 20:56:54,371][train_inner][INFO] - {"epoch": 8, "update": 7.978, "loss": "2.151", "ppl": "4.44", "wps": "399605", "ups": "3.37", "wpb": "118605", "bsz": "256", "num_updates": "410600", "lr": "5.95354e-05", "gnorm": "1.013", "loss_scale": "32", "train_wall": "59", "gb_free": "23.9", "wall": "122823"} +[2022-07-31 20:57:53,697][train_inner][INFO] - {"epoch": 8, "update": 7.981, "loss": "2.154", "ppl": "4.45", "wps": "397154", "ups": "3.37", "wpb": "117807", "bsz": "256", "num_updates": "410800", "lr": "5.95152e-05", "gnorm": "1.019", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "122882"} +[2022-07-31 20:58:53,232][train_inner][INFO] - {"epoch": 8, "update": 7.985, "loss": "2.153", "ppl": "4.45", "wps": "397427", "ups": "3.36", "wpb": "118303", "bsz": "256", "num_updates": "411000", "lr": "5.94949e-05", "gnorm": "1.015", "loss_scale": "32", "train_wall": "59", "gb_free": "27.4", "wall": "122942"} +[2022-07-31 20:59:52,630][train_inner][INFO] - {"epoch": 8, "update": 7.989, "loss": "2.15", "ppl": "4.44", "wps": "397449", "ups": "3.37", "wpb": "118038", "bsz": "256", "num_updates": "411200", "lr": "5.94747e-05", "gnorm": "1.015", "loss_scale": "32", "train_wall": "59", "gb_free": "21.7", "wall": "123001"} +[2022-07-31 21:00:52,058][train_inner][INFO] - {"epoch": 8, "update": 7.993, "loss": "2.147", "ppl": "4.43", "wps": "400813", "ups": "3.37", "wpb": "119097", "bsz": "256", "num_updates": "411400", "lr": "5.94545e-05", "gnorm": "1.009", "loss_scale": "32", "train_wall": "59", "gb_free": "26.1", "wall": "123060"} +[2022-07-31 21:01:51,439][train_inner][INFO] - {"epoch": 8, "update": 7.997, "loss": "2.15", "ppl": "4.44", "wps": "396437", "ups": "3.37", "wpb": "117704", "bsz": "256", "num_updates": "411600", "lr": "5.94343e-05", "gnorm": "1.017", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "123120"} +[2022-07-31 21:02:37,077][fairseq_cli.train][INFO] - end of epoch 8 (average epoch stats below) +[2022-07-31 21:02:37,077][train][INFO] - {"epoch": 8, "train_loss": "2.161", "train_ppl": "4.47", "train_wps": "395762", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "411754", "train_lr": "5.94188e-05", "train_gnorm": "1.003", "train_loss_scale": "32", "train_train_wall": "15232", "train_gb_free": "23", "train_wall": "123166"} +[2022-07-31 21:02:37,184][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-07-31 21:02:37,187][fairseq.trainer][INFO] - begin training epoch 9 +[2022-07-31 21:02:37,187][fairseq_cli.train][INFO] - Start iterating over samples +[2022-07-31 21:02:57,578][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 21:03:02,220][train_inner][INFO] - {"epoch": 9, "update": 8.001, "loss": "2.154", "ppl": "4.45", "wps": "331204", "ups": "2.83", "wpb": "117214", "bsz": "255.4", "num_updates": "411800", "lr": "5.94141e-05", "gnorm": "1.018", "loss_scale": "32", "train_wall": "61", "gb_free": "21.4", "wall": "123191"} +[2022-07-31 21:04:01,760][train_inner][INFO] - {"epoch": 9, "update": 8.005, "loss": "2.146", "ppl": "4.43", "wps": "399111", "ups": "3.36", "wpb": "118815", "bsz": "256", "num_updates": "412000", "lr": "5.93939e-05", "gnorm": "1.011", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "123250"} +[2022-07-31 21:05:01,059][train_inner][INFO] - {"epoch": 9, "update": 8.009, "loss": "2.138", "ppl": "4.4", "wps": "401248", "ups": "3.37", "wpb": "118966", "bsz": "256", "num_updates": "412200", "lr": "5.93737e-05", "gnorm": "1.011", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "123309"} +[2022-07-31 21:05:27,069][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 21:06:00,841][train_inner][INFO] - {"epoch": 9, "update": 8.013, "loss": "2.144", "ppl": "4.42", "wps": "397019", "ups": "3.35", "wpb": "118673", "bsz": "256", "num_updates": "412400", "lr": "5.93535e-05", "gnorm": "1.011", "loss_scale": "16", "train_wall": "59", "gb_free": "26.5", "wall": "123369"} +[2022-07-31 21:07:00,336][train_inner][INFO] - {"epoch": 9, "update": 8.016, "loss": "2.143", "ppl": "4.42", "wps": "397985", "ups": "3.36", "wpb": "118389", "bsz": "256", "num_updates": "412600", "lr": "5.93333e-05", "gnorm": "1.015", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "123429"} +[2022-07-31 21:07:59,733][train_inner][INFO] - {"epoch": 9, "update": 8.02, "loss": "2.153", "ppl": "4.45", "wps": "396903", "ups": "3.37", "wpb": "117874", "bsz": "256", "num_updates": "412800", "lr": "5.93131e-05", "gnorm": "1.018", "loss_scale": "16", "train_wall": "59", "gb_free": "35.3", "wall": "123488"} +[2022-07-31 21:08:58,978][train_inner][INFO] - {"epoch": 9, "update": 8.024, "loss": "2.144", "ppl": "4.42", "wps": "399300", "ups": "3.38", "wpb": "118283", "bsz": "256", "num_updates": "413000", "lr": "5.92929e-05", "gnorm": "1.019", "loss_scale": "16", "train_wall": "59", "gb_free": "23.7", "wall": "123547"} +[2022-07-31 21:09:58,824][train_inner][INFO] - {"epoch": 9, "update": 8.028, "loss": "2.144", "ppl": "4.42", "wps": "395831", "ups": "3.34", "wpb": "118443", "bsz": "256", "num_updates": "413200", "lr": "5.92727e-05", "gnorm": "1.015", "loss_scale": "16", "train_wall": "60", "gb_free": "23.2", "wall": "123607"} +[2022-07-31 21:10:57,911][train_inner][INFO] - {"epoch": 9, "update": 8.032, "loss": "2.142", "ppl": "4.41", "wps": "402099", "ups": "3.38", "wpb": "118794", "bsz": "256", "num_updates": "413400", "lr": "5.92525e-05", "gnorm": "1.016", "loss_scale": "16", "train_wall": "59", "gb_free": "22.5", "wall": "123666"} +[2022-07-31 21:11:57,576][train_inner][INFO] - {"epoch": 9, "update": 8.036, "loss": "2.14", "ppl": "4.41", "wps": "398964", "ups": "3.35", "wpb": "119019", "bsz": "256", "num_updates": "413600", "lr": "5.92323e-05", "gnorm": "1.015", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "123726"} +[2022-07-31 21:12:56,865][train_inner][INFO] - {"epoch": 9, "update": 8.04, "loss": "2.147", "ppl": "4.43", "wps": "398096", "ups": "3.37", "wpb": "118012", "bsz": "256", "num_updates": "413800", "lr": "5.92121e-05", "gnorm": "1.017", "loss_scale": "16", "train_wall": "59", "gb_free": "26.8", "wall": "123785"} +[2022-07-31 21:13:56,304][train_inner][INFO] - {"epoch": 9, "update": 8.044, "loss": "2.145", "ppl": "4.42", "wps": "398578", "ups": "3.36", "wpb": "118456", "bsz": "256", "num_updates": "414000", "lr": "5.91919e-05", "gnorm": "1.017", "loss_scale": "16", "train_wall": "59", "gb_free": "22.9", "wall": "123845"} +[2022-07-31 21:14:55,961][train_inner][INFO] - {"epoch": 9, "update": 8.048, "loss": "2.145", "ppl": "4.42", "wps": "398344", "ups": "3.35", "wpb": "118819", "bsz": "256", "num_updates": "414200", "lr": "5.91717e-05", "gnorm": "1.016", "loss_scale": "16", "train_wall": "59", "gb_free": "23.5", "wall": "123904"} +[2022-07-31 21:15:55,319][train_inner][INFO] - {"epoch": 9, "update": 8.051, "loss": "2.144", "ppl": "4.42", "wps": "398922", "ups": "3.37", "wpb": "118394", "bsz": "256", "num_updates": "414400", "lr": "5.91515e-05", "gnorm": "1.019", "loss_scale": "32", "train_wall": "59", "gb_free": "24", "wall": "123964"} +[2022-07-31 21:16:54,925][train_inner][INFO] - {"epoch": 9, "update": 8.055, "loss": "2.143", "ppl": "4.42", "wps": "396998", "ups": "3.36", "wpb": "118318", "bsz": "256", "num_updates": "414600", "lr": "5.91313e-05", "gnorm": "1.017", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "124023"} +[2022-07-31 21:17:54,435][train_inner][INFO] - {"epoch": 9, "update": 8.059, "loss": "2.146", "ppl": "4.43", "wps": "399032", "ups": "3.36", "wpb": "118731", "bsz": "256", "num_updates": "414800", "lr": "5.91111e-05", "gnorm": "1.014", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "124083"} +[2022-07-31 21:18:53,986][train_inner][INFO] - {"epoch": 9, "update": 8.063, "loss": "2.148", "ppl": "4.43", "wps": "397702", "ups": "3.36", "wpb": "118416", "bsz": "256", "num_updates": "415000", "lr": "5.90909e-05", "gnorm": "1.018", "loss_scale": "32", "train_wall": "59", "gb_free": "23.5", "wall": "124142"} +[2022-07-31 21:19:53,589][train_inner][INFO] - {"epoch": 9, "update": 8.067, "loss": "2.151", "ppl": "4.44", "wps": "396158", "ups": "3.36", "wpb": "118061", "bsz": "256", "num_updates": "415200", "lr": "5.90707e-05", "gnorm": "1.019", "loss_scale": "32", "train_wall": "59", "gb_free": "26.5", "wall": "124202"} +[2022-07-31 21:20:52,864][train_inner][INFO] - {"epoch": 9, "update": 8.071, "loss": "2.142", "ppl": "4.41", "wps": "399107", "ups": "3.37", "wpb": "118284", "bsz": "256", "num_updates": "415400", "lr": "5.90505e-05", "gnorm": "1.017", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "124261"} +[2022-07-31 21:21:51,944][train_inner][INFO] - {"epoch": 9, "update": 8.075, "loss": "2.141", "ppl": "4.41", "wps": "400936", "ups": "3.39", "wpb": "118437", "bsz": "256", "num_updates": "415600", "lr": "5.90303e-05", "gnorm": "1.016", "loss_scale": "32", "train_wall": "59", "gb_free": "21.9", "wall": "124320"} +[2022-07-31 21:22:51,281][train_inner][INFO] - {"epoch": 9, "update": 8.079, "loss": "2.147", "ppl": "4.43", "wps": "399316", "ups": "3.37", "wpb": "118469", "bsz": "256", "num_updates": "415800", "lr": "5.90101e-05", "gnorm": "1.019", "loss_scale": "32", "train_wall": "59", "gb_free": "22.8", "wall": "124380"} +[2022-07-31 21:23:50,703][train_inner][INFO] - {"epoch": 9, "update": 8.083, "loss": "2.145", "ppl": "4.42", "wps": "397872", "ups": "3.37", "wpb": "118211", "bsz": "256", "num_updates": "416000", "lr": "5.89899e-05", "gnorm": "1.018", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "124439"} +[2022-07-31 21:24:51,174][train_inner][INFO] - {"epoch": 9, "update": 8.086, "loss": "2.149", "ppl": "4.44", "wps": "391356", "ups": "3.31", "wpb": "118329", "bsz": "256", "num_updates": "416200", "lr": "5.89697e-05", "gnorm": "1.02", "loss_scale": "32", "train_wall": "60", "gb_free": "22.5", "wall": "124500"} +[2022-07-31 21:25:50,367][train_inner][INFO] - {"epoch": 9, "update": 8.09, "loss": "2.146", "ppl": "4.43", "wps": "398292", "ups": "3.38", "wpb": "117878", "bsz": "256", "num_updates": "416400", "lr": "5.89495e-05", "gnorm": "1.02", "loss_scale": "64", "train_wall": "59", "gb_free": "25.3", "wall": "124559"} +[2022-07-31 21:26:49,796][train_inner][INFO] - {"epoch": 9, "update": 8.094, "loss": "2.135", "ppl": "4.39", "wps": "399254", "ups": "3.37", "wpb": "118636", "bsz": "256", "num_updates": "416600", "lr": "5.89293e-05", "gnorm": "1.016", "loss_scale": "64", "train_wall": "59", "gb_free": "22.9", "wall": "124618"} +[2022-07-31 21:27:49,459][train_inner][INFO] - {"epoch": 9, "update": 8.098, "loss": "2.145", "ppl": "4.42", "wps": "397219", "ups": "3.35", "wpb": "118496", "bsz": "256", "num_updates": "416800", "lr": "5.89091e-05", "gnorm": "1.017", "loss_scale": "64", "train_wall": "59", "gb_free": "24.2", "wall": "124678"} +[2022-07-31 21:28:48,599][train_inner][INFO] - {"epoch": 9, "update": 8.102, "loss": "2.148", "ppl": "4.43", "wps": "398942", "ups": "3.38", "wpb": "117966", "bsz": "256", "num_updates": "417000", "lr": "5.88889e-05", "gnorm": "1.022", "loss_scale": "64", "train_wall": "59", "gb_free": "21.7", "wall": "124737"} +[2022-07-31 21:29:31,726][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 21:29:48,486][train_inner][INFO] - {"epoch": 9, "update": 8.106, "loss": "2.14", "ppl": "4.41", "wps": "395784", "ups": "3.34", "wpb": "118510", "bsz": "256", "num_updates": "417200", "lr": "5.88687e-05", "gnorm": "1.017", "loss_scale": "32", "train_wall": "60", "gb_free": "21.4", "wall": "124797"} +[2022-07-31 21:30:48,014][train_inner][INFO] - {"epoch": 9, "update": 8.11, "loss": "2.145", "ppl": "4.42", "wps": "397022", "ups": "3.36", "wpb": "118168", "bsz": "256", "num_updates": "417400", "lr": "5.88485e-05", "gnorm": "1.02", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "124856"} +[2022-07-31 21:31:47,458][train_inner][INFO] - {"epoch": 9, "update": 8.114, "loss": "2.138", "ppl": "4.4", "wps": "398432", "ups": "3.36", "wpb": "118422", "bsz": "256", "num_updates": "417600", "lr": "5.88283e-05", "gnorm": "1.017", "loss_scale": "32", "train_wall": "59", "gb_free": "22.9", "wall": "124916"} +[2022-07-31 21:32:46,731][train_inner][INFO] - {"epoch": 9, "update": 8.117, "loss": "2.143", "ppl": "4.42", "wps": "398909", "ups": "3.37", "wpb": "118221", "bsz": "256", "num_updates": "417800", "lr": "5.88081e-05", "gnorm": "1.017", "loss_scale": "32", "train_wall": "59", "gb_free": "24.4", "wall": "124975"} +[2022-07-31 21:33:45,735][train_inner][INFO] - {"epoch": 9, "update": 8.121, "loss": "2.151", "ppl": "4.44", "wps": "396997", "ups": "3.39", "wpb": "117121", "bsz": "256", "num_updates": "418000", "lr": "5.87879e-05", "gnorm": "1.027", "loss_scale": "32", "train_wall": "59", "gb_free": "23", "wall": "125034"} +[2022-07-31 21:34:45,391][train_inner][INFO] - {"epoch": 9, "update": 8.125, "loss": "2.141", "ppl": "4.41", "wps": "394775", "ups": "3.35", "wpb": "117752", "bsz": "256", "num_updates": "418200", "lr": "5.87677e-05", "gnorm": "1.018", "loss_scale": "32", "train_wall": "59", "gb_free": "22.7", "wall": "125094"} +[2022-07-31 21:35:06,919][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 21:35:45,144][train_inner][INFO] - {"epoch": 9, "update": 8.129, "loss": "2.149", "ppl": "4.43", "wps": "395765", "ups": "3.35", "wpb": "118240", "bsz": "256", "num_updates": "418400", "lr": "5.87475e-05", "gnorm": "1.018", "loss_scale": "16", "train_wall": "59", "gb_free": "33.5", "wall": "125154"} +[2022-07-31 21:36:44,682][train_inner][INFO] - {"epoch": 9, "update": 8.133, "loss": "2.129", "ppl": "4.37", "wps": "399925", "ups": "3.36", "wpb": "119053", "bsz": "256", "num_updates": "418600", "lr": "5.87273e-05", "gnorm": "1.013", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "125213"} +[2022-07-31 21:36:53,480][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 21:37:44,464][train_inner][INFO] - {"epoch": 9, "update": 8.137, "loss": "2.144", "ppl": "4.42", "wps": "395279", "ups": "3.35", "wpb": "118154", "bsz": "256", "num_updates": "418800", "lr": "5.87071e-05", "gnorm": "1.02", "loss_scale": "8", "train_wall": "59", "gb_free": "25.8", "wall": "125273"} +[2022-07-31 21:38:44,079][train_inner][INFO] - {"epoch": 9, "update": 8.141, "loss": "2.141", "ppl": "4.41", "wps": "394061", "ups": "3.35", "wpb": "117458", "bsz": "256", "num_updates": "419000", "lr": "5.86869e-05", "gnorm": "1.026", "loss_scale": "8", "train_wall": "59", "gb_free": "27.2", "wall": "125333"} +[2022-07-31 21:39:43,692][train_inner][INFO] - {"epoch": 9, "update": 8.145, "loss": "2.144", "ppl": "4.42", "wps": "397029", "ups": "3.36", "wpb": "118338", "bsz": "256", "num_updates": "419200", "lr": "5.86667e-05", "gnorm": "1.019", "loss_scale": "8", "train_wall": "59", "gb_free": "22.7", "wall": "125392"} +[2022-07-31 21:40:44,145][train_inner][INFO] - {"epoch": 9, "update": 8.149, "loss": "2.142", "ppl": "4.41", "wps": "390420", "ups": "3.31", "wpb": "118011", "bsz": "256", "num_updates": "419400", "lr": "5.86465e-05", "gnorm": "1.021", "loss_scale": "8", "train_wall": "60", "gb_free": "22.5", "wall": "125453"} +[2022-07-31 21:41:43,701][train_inner][INFO] - {"epoch": 9, "update": 8.152, "loss": "2.143", "ppl": "4.42", "wps": "396471", "ups": "3.36", "wpb": "118060", "bsz": "256", "num_updates": "419600", "lr": "5.86263e-05", "gnorm": "1.031", "loss_scale": "8", "train_wall": "59", "gb_free": "24.1", "wall": "125512"} +[2022-07-31 21:42:43,466][train_inner][INFO] - {"epoch": 9, "update": 8.156, "loss": "2.146", "ppl": "4.43", "wps": "395662", "ups": "3.35", "wpb": "118232", "bsz": "256", "num_updates": "419800", "lr": "5.86061e-05", "gnorm": "1.022", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "125572"} +[2022-07-31 21:43:43,196][train_inner][INFO] - {"epoch": 9, "update": 8.16, "loss": "2.142", "ppl": "4.41", "wps": "397211", "ups": "3.35", "wpb": "118626", "bsz": "256", "num_updates": "420000", "lr": "5.85859e-05", "gnorm": "1.019", "loss_scale": "8", "train_wall": "59", "gb_free": "27.1", "wall": "125632"} +[2022-07-31 21:44:42,486][train_inner][INFO] - {"epoch": 9, "update": 8.164, "loss": "2.136", "ppl": "4.4", "wps": "398245", "ups": "3.37", "wpb": "118060", "bsz": "256", "num_updates": "420200", "lr": "5.85657e-05", "gnorm": "1.021", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "125691"} +[2022-07-31 21:45:42,229][train_inner][INFO] - {"epoch": 9, "update": 8.168, "loss": "2.14", "ppl": "4.41", "wps": "396236", "ups": "3.35", "wpb": "118361", "bsz": "256", "num_updates": "420400", "lr": "5.85455e-05", "gnorm": "1.025", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "125751"} +[2022-07-31 21:46:42,962][train_inner][INFO] - {"epoch": 9, "update": 8.172, "loss": "2.144", "ppl": "4.42", "wps": "389678", "ups": "3.29", "wpb": "118330", "bsz": "256", "num_updates": "420600", "lr": "5.85253e-05", "gnorm": "1.021", "loss_scale": "8", "train_wall": "60", "gb_free": "26.5", "wall": "125811"} +[2022-07-31 21:47:42,165][train_inner][INFO] - {"epoch": 9, "update": 8.176, "loss": "2.141", "ppl": "4.41", "wps": "400297", "ups": "3.38", "wpb": "118493", "bsz": "256", "num_updates": "420800", "lr": "5.85051e-05", "gnorm": "1.022", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "125871"} +[2022-07-31 21:48:41,672][train_inner][INFO] - {"epoch": 9, "update": 8.18, "loss": "2.143", "ppl": "4.42", "wps": "398151", "ups": "3.36", "wpb": "118464", "bsz": "256", "num_updates": "421000", "lr": "5.84848e-05", "gnorm": "1.019", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "125930"} +[2022-07-31 21:49:41,757][train_inner][INFO] - {"epoch": 9, "update": 8.184, "loss": "2.144", "ppl": "4.42", "wps": "392516", "ups": "3.33", "wpb": "117920", "bsz": "256", "num_updates": "421200", "lr": "5.84646e-05", "gnorm": "1.022", "loss_scale": "16", "train_wall": "60", "gb_free": "22.9", "wall": "125990"} +[2022-07-31 21:50:41,057][train_inner][INFO] - {"epoch": 9, "update": 8.187, "loss": "2.143", "ppl": "4.42", "wps": "398249", "ups": "3.37", "wpb": "118080", "bsz": "256", "num_updates": "421400", "lr": "5.84444e-05", "gnorm": "1.024", "loss_scale": "16", "train_wall": "59", "gb_free": "22.1", "wall": "126049"} +[2022-07-31 21:51:40,710][train_inner][INFO] - {"epoch": 9, "update": 8.191, "loss": "2.152", "ppl": "4.45", "wps": "395332", "ups": "3.35", "wpb": "117913", "bsz": "256", "num_updates": "421600", "lr": "5.84242e-05", "gnorm": "1.024", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "126109"} +[2022-07-31 21:52:40,080][train_inner][INFO] - {"epoch": 9, "update": 8.195, "loss": "2.143", "ppl": "4.42", "wps": "399460", "ups": "3.37", "wpb": "118579", "bsz": "256", "num_updates": "421800", "lr": "5.8404e-05", "gnorm": "1.021", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "126169"} +[2022-07-31 21:53:38,893][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 21:53:40,593][train_inner][INFO] - {"epoch": 9, "update": 8.199, "loss": "2.144", "ppl": "4.42", "wps": "390647", "ups": "3.31", "wpb": "118196", "bsz": "256", "num_updates": "422000", "lr": "5.83838e-05", "gnorm": "1.02", "loss_scale": "8", "train_wall": "60", "gb_free": "25.8", "wall": "126229"} +[2022-07-31 21:54:40,195][train_inner][INFO] - {"epoch": 9, "update": 8.203, "loss": "2.146", "ppl": "4.43", "wps": "396146", "ups": "3.36", "wpb": "118055", "bsz": "256", "num_updates": "422200", "lr": "5.83636e-05", "gnorm": "1.023", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "126289"} +[2022-07-31 21:55:41,124][train_inner][INFO] - {"epoch": 9, "update": 8.207, "loss": "2.142", "ppl": "4.41", "wps": "389430", "ups": "3.28", "wpb": "118638", "bsz": "256", "num_updates": "422400", "lr": "5.83434e-05", "gnorm": "1.02", "loss_scale": "8", "train_wall": "61", "gb_free": "23.7", "wall": "126350"} +[2022-07-31 21:56:40,868][train_inner][INFO] - {"epoch": 9, "update": 8.211, "loss": "2.139", "ppl": "4.4", "wps": "393890", "ups": "3.35", "wpb": "117662", "bsz": "256", "num_updates": "422600", "lr": "5.83232e-05", "gnorm": "1.028", "loss_scale": "8", "train_wall": "59", "gb_free": "22.8", "wall": "126409"} +[2022-07-31 21:57:40,620][train_inner][INFO] - {"epoch": 9, "update": 8.215, "loss": "2.143", "ppl": "4.42", "wps": "394539", "ups": "3.35", "wpb": "117870", "bsz": "256", "num_updates": "422800", "lr": "5.8303e-05", "gnorm": "1.03", "loss_scale": "8", "train_wall": "59", "gb_free": "24.2", "wall": "126469"} +[2022-07-31 21:58:40,165][train_inner][INFO] - {"epoch": 9, "update": 8.219, "loss": "2.144", "ppl": "4.42", "wps": "395496", "ups": "3.36", "wpb": "117749", "bsz": "256", "num_updates": "423000", "lr": "5.82828e-05", "gnorm": "1.024", "loss_scale": "8", "train_wall": "59", "gb_free": "23.5", "wall": "126529"} +[2022-07-31 21:59:39,676][train_inner][INFO] - {"epoch": 9, "update": 8.222, "loss": "2.137", "ppl": "4.4", "wps": "397264", "ups": "3.36", "wpb": "118206", "bsz": "255.9", "num_updates": "423200", "lr": "5.82626e-05", "gnorm": "1.021", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "126588"} +[2022-07-31 22:00:39,333][train_inner][INFO] - {"epoch": 9, "update": 8.226, "loss": "2.142", "ppl": "4.41", "wps": "397467", "ups": "3.35", "wpb": "118559", "bsz": "256", "num_updates": "423400", "lr": "5.82424e-05", "gnorm": "1.022", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "126648"} +[2022-07-31 22:01:38,597][train_inner][INFO] - {"epoch": 9, "update": 8.23, "loss": "2.138", "ppl": "4.4", "wps": "400161", "ups": "3.37", "wpb": "118574", "bsz": "256", "num_updates": "423600", "lr": "5.82222e-05", "gnorm": "1.02", "loss_scale": "8", "train_wall": "59", "gb_free": "24.2", "wall": "126707"} +[2022-07-31 22:02:38,329][train_inner][INFO] - {"epoch": 9, "update": 8.234, "loss": "2.136", "ppl": "4.4", "wps": "398078", "ups": "3.35", "wpb": "118889", "bsz": "256", "num_updates": "423800", "lr": "5.8202e-05", "gnorm": "1.02", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "126767"} +[2022-07-31 22:03:37,859][train_inner][INFO] - {"epoch": 9, "update": 8.238, "loss": "2.148", "ppl": "4.43", "wps": "395261", "ups": "3.36", "wpb": "117648", "bsz": "256", "num_updates": "424000", "lr": "5.81818e-05", "gnorm": "1.025", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "126826"} +[2022-07-31 22:03:51,567][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 22:04:38,589][train_inner][INFO] - {"epoch": 9, "update": 8.242, "loss": "2.141", "ppl": "4.41", "wps": "391467", "ups": "3.29", "wpb": "118870", "bsz": "256", "num_updates": "424200", "lr": "5.81616e-05", "gnorm": "1.023", "loss_scale": "8", "train_wall": "60", "gb_free": "22.4", "wall": "126887"} +[2022-07-31 22:05:37,628][train_inner][INFO] - {"epoch": 9, "update": 8.246, "loss": "2.138", "ppl": "4.4", "wps": "399267", "ups": "3.39", "wpb": "117861", "bsz": "256", "num_updates": "424400", "lr": "5.81414e-05", "gnorm": "1.024", "loss_scale": "8", "train_wall": "59", "gb_free": "23.8", "wall": "126946"} +[2022-07-31 22:06:36,610][train_inner][INFO] - {"epoch": 9, "update": 8.25, "loss": "2.148", "ppl": "4.43", "wps": "401216", "ups": "3.39", "wpb": "118322", "bsz": "256", "num_updates": "424600", "lr": "5.81212e-05", "gnorm": "1.022", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "127005"} +[2022-07-31 22:07:35,677][train_inner][INFO] - {"epoch": 9, "update": 8.254, "loss": "2.136", "ppl": "4.4", "wps": "398818", "ups": "3.39", "wpb": "117784", "bsz": "256", "num_updates": "424800", "lr": "5.8101e-05", "gnorm": "1.024", "loss_scale": "8", "train_wall": "59", "gb_free": "24", "wall": "127064"} +[2022-07-31 22:08:34,674][train_inner][INFO] - {"epoch": 9, "update": 8.257, "loss": "2.138", "ppl": "4.4", "wps": "401231", "ups": "3.39", "wpb": "118356", "bsz": "256", "num_updates": "425000", "lr": "5.80808e-05", "gnorm": "1.022", "loss_scale": "8", "train_wall": "59", "gb_free": "22.1", "wall": "127123"} +[2022-07-31 22:09:34,259][train_inner][INFO] - {"epoch": 9, "update": 8.261, "loss": "2.145", "ppl": "4.42", "wps": "398164", "ups": "3.36", "wpb": "118622", "bsz": "256", "num_updates": "425200", "lr": "5.80606e-05", "gnorm": "1.028", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "127183"} +[2022-07-31 22:10:33,029][train_inner][INFO] - {"epoch": 9, "update": 8.265, "loss": "2.14", "ppl": "4.41", "wps": "402676", "ups": "3.4", "wpb": "118326", "bsz": "256", "num_updates": "425400", "lr": "5.80404e-05", "gnorm": "1.024", "loss_scale": "8", "train_wall": "58", "gb_free": "23.8", "wall": "127241"} +[2022-07-31 22:11:32,412][train_inner][INFO] - {"epoch": 9, "update": 8.269, "loss": "2.144", "ppl": "4.42", "wps": "398298", "ups": "3.37", "wpb": "118259", "bsz": "256", "num_updates": "425600", "lr": "5.80202e-05", "gnorm": "1.025", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "127301"} +[2022-07-31 22:12:31,572][train_inner][INFO] - {"epoch": 9, "update": 8.273, "loss": "2.141", "ppl": "4.41", "wps": "398411", "ups": "3.38", "wpb": "117849", "bsz": "256", "num_updates": "425800", "lr": "5.8e-05", "gnorm": "1.027", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "127360"} +[2022-07-31 22:13:31,028][train_inner][INFO] - {"epoch": 9, "update": 8.277, "loss": "2.141", "ppl": "4.41", "wps": "398089", "ups": "3.36", "wpb": "118344", "bsz": "256", "num_updates": "426000", "lr": "5.79798e-05", "gnorm": "1.028", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "127419"} +[2022-07-31 22:14:30,266][train_inner][INFO] - {"epoch": 9, "update": 8.281, "loss": "2.141", "ppl": "4.41", "wps": "398982", "ups": "3.38", "wpb": "118174", "bsz": "256", "num_updates": "426200", "lr": "5.79596e-05", "gnorm": "1.027", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "127479"} +[2022-07-31 22:15:30,008][train_inner][INFO] - {"epoch": 9, "update": 8.285, "loss": "2.143", "ppl": "4.42", "wps": "395525", "ups": "3.35", "wpb": "118146", "bsz": "256", "num_updates": "426400", "lr": "5.79394e-05", "gnorm": "1.026", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "127538"} +[2022-07-31 22:16:29,077][train_inner][INFO] - {"epoch": 9, "update": 8.288, "loss": "2.14", "ppl": "4.41", "wps": "400601", "ups": "3.39", "wpb": "118315", "bsz": "256", "num_updates": "426600", "lr": "5.79192e-05", "gnorm": "1.025", "loss_scale": "16", "train_wall": "59", "gb_free": "25.4", "wall": "127598"} +[2022-07-31 22:16:50,886][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 22:17:28,526][train_inner][INFO] - {"epoch": 9, "update": 8.292, "loss": "2.143", "ppl": "4.42", "wps": "396073", "ups": "3.36", "wpb": "117730", "bsz": "256", "num_updates": "426800", "lr": "5.7899e-05", "gnorm": "1.029", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "127657"} +[2022-07-31 22:18:27,973][train_inner][INFO] - {"epoch": 9, "update": 8.296, "loss": "2.141", "ppl": "4.41", "wps": "398966", "ups": "3.36", "wpb": "118585", "bsz": "256", "num_updates": "427000", "lr": "5.78788e-05", "gnorm": "1.026", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "127716"} +[2022-07-31 22:19:27,251][train_inner][INFO] - {"epoch": 9, "update": 8.3, "loss": "2.139", "ppl": "4.4", "wps": "399365", "ups": "3.37", "wpb": "118368", "bsz": "256", "num_updates": "427200", "lr": "5.78586e-05", "gnorm": "1.024", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "127776"} +[2022-07-31 22:20:26,722][train_inner][INFO] - {"epoch": 9, "update": 8.304, "loss": "2.137", "ppl": "4.4", "wps": "397910", "ups": "3.36", "wpb": "118320", "bsz": "256", "num_updates": "427400", "lr": "5.78384e-05", "gnorm": "1.025", "loss_scale": "8", "train_wall": "59", "gb_free": "28.9", "wall": "127835"} +[2022-07-31 22:21:25,844][train_inner][INFO] - {"epoch": 9, "update": 8.308, "loss": "2.144", "ppl": "4.42", "wps": "400980", "ups": "3.38", "wpb": "118533", "bsz": "256", "num_updates": "427600", "lr": "5.78182e-05", "gnorm": "1.03", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "127894"} +[2022-07-31 22:22:25,359][train_inner][INFO] - {"epoch": 9, "update": 8.312, "loss": "2.139", "ppl": "4.4", "wps": "397512", "ups": "3.36", "wpb": "118289", "bsz": "256", "num_updates": "427800", "lr": "5.7798e-05", "gnorm": "1.026", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "127954"} +[2022-07-31 22:23:24,927][train_inner][INFO] - {"epoch": 9, "update": 8.316, "loss": "2.131", "ppl": "4.38", "wps": "397764", "ups": "3.36", "wpb": "118469", "bsz": "256", "num_updates": "428000", "lr": "5.77778e-05", "gnorm": "1.023", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "128013"} +[2022-07-31 22:24:24,203][train_inner][INFO] - {"epoch": 9, "update": 8.32, "loss": "2.144", "ppl": "4.42", "wps": "398942", "ups": "3.37", "wpb": "118238", "bsz": "256", "num_updates": "428200", "lr": "5.77576e-05", "gnorm": "1.028", "loss_scale": "8", "train_wall": "59", "gb_free": "25.5", "wall": "128073"} +[2022-07-31 22:25:23,442][train_inner][INFO] - {"epoch": 9, "update": 8.323, "loss": "2.139", "ppl": "4.4", "wps": "398230", "ups": "3.38", "wpb": "117952", "bsz": "256", "num_updates": "428400", "lr": "5.77374e-05", "gnorm": "1.027", "loss_scale": "8", "train_wall": "59", "gb_free": "22.8", "wall": "128132"} +[2022-07-31 22:26:22,729][train_inner][INFO] - {"epoch": 9, "update": 8.327, "loss": "2.142", "ppl": "4.41", "wps": "398306", "ups": "3.37", "wpb": "118072", "bsz": "256", "num_updates": "428600", "lr": "5.77172e-05", "gnorm": "1.028", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "128191"} +[2022-07-31 22:27:22,122][train_inner][INFO] - {"epoch": 9, "update": 8.331, "loss": "2.14", "ppl": "4.41", "wps": "397038", "ups": "3.37", "wpb": "117904", "bsz": "256", "num_updates": "428800", "lr": "5.7697e-05", "gnorm": "1.028", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "128251"} +[2022-07-31 22:28:21,448][train_inner][INFO] - {"epoch": 9, "update": 8.335, "loss": "2.145", "ppl": "4.42", "wps": "398047", "ups": "3.37", "wpb": "118073", "bsz": "256", "num_updates": "429000", "lr": "5.76768e-05", "gnorm": "1.029", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "128310"} +[2022-07-31 22:29:20,958][train_inner][INFO] - {"epoch": 9, "update": 8.339, "loss": "2.145", "ppl": "4.42", "wps": "395347", "ups": "3.36", "wpb": "117636", "bsz": "256", "num_updates": "429200", "lr": "5.76566e-05", "gnorm": "1.03", "loss_scale": "16", "train_wall": "59", "gb_free": "24.7", "wall": "128369"} +[2022-07-31 22:30:20,231][train_inner][INFO] - {"epoch": 9, "update": 8.343, "loss": "2.133", "ppl": "4.39", "wps": "400257", "ups": "3.37", "wpb": "118621", "bsz": "256", "num_updates": "429400", "lr": "5.76364e-05", "gnorm": "1.025", "loss_scale": "16", "train_wall": "59", "gb_free": "28.6", "wall": "128429"} +[2022-07-31 22:31:19,720][train_inner][INFO] - {"epoch": 9, "update": 8.347, "loss": "2.142", "ppl": "4.41", "wps": "398223", "ups": "3.36", "wpb": "118448", "bsz": "256", "num_updates": "429600", "lr": "5.76162e-05", "gnorm": "1.026", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "128488"} +[2022-07-31 22:32:19,123][train_inner][INFO] - {"epoch": 9, "update": 8.351, "loss": "2.14", "ppl": "4.41", "wps": "397342", "ups": "3.37", "wpb": "118016", "bsz": "255.9", "num_updates": "429800", "lr": "5.7596e-05", "gnorm": "1.037", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "128548"} +[2022-07-31 22:33:18,502][train_inner][INFO] - {"epoch": 9, "update": 8.355, "loss": "2.14", "ppl": "4.41", "wps": "398231", "ups": "3.37", "wpb": "118232", "bsz": "256", "num_updates": "430000", "lr": "5.75758e-05", "gnorm": "1.028", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "128607"} +[2022-07-31 22:34:18,204][train_inner][INFO] - {"epoch": 9, "update": 8.358, "loss": "2.14", "ppl": "4.41", "wps": "394970", "ups": "3.35", "wpb": "117901", "bsz": "256", "num_updates": "430200", "lr": "5.75556e-05", "gnorm": "1.032", "loss_scale": "16", "train_wall": "59", "gb_free": "25.9", "wall": "128667"} +[2022-07-31 22:35:17,345][train_inner][INFO] - {"epoch": 9, "update": 8.362, "loss": "2.144", "ppl": "4.42", "wps": "398790", "ups": "3.38", "wpb": "117923", "bsz": "256", "num_updates": "430400", "lr": "5.75354e-05", "gnorm": "1.031", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "128726"} +[2022-07-31 22:36:16,799][train_inner][INFO] - {"epoch": 9, "update": 8.366, "loss": "2.139", "ppl": "4.4", "wps": "398898", "ups": "3.36", "wpb": "118580", "bsz": "256", "num_updates": "430600", "lr": "5.75152e-05", "gnorm": "1.025", "loss_scale": "16", "train_wall": "59", "gb_free": "26.3", "wall": "128785"} +[2022-07-31 22:37:16,487][train_inner][INFO] - {"epoch": 9, "update": 8.37, "loss": "2.139", "ppl": "4.4", "wps": "395496", "ups": "3.35", "wpb": "118031", "bsz": "256", "num_updates": "430800", "lr": "5.74949e-05", "gnorm": "1.03", "loss_scale": "32", "train_wall": "59", "gb_free": "24.3", "wall": "128845"} +[2022-07-31 22:38:15,887][train_inner][INFO] - {"epoch": 9, "update": 8.374, "loss": "2.142", "ppl": "4.41", "wps": "398137", "ups": "3.37", "wpb": "118247", "bsz": "256", "num_updates": "431000", "lr": "5.74747e-05", "gnorm": "1.03", "loss_scale": "32", "train_wall": "59", "gb_free": "23.8", "wall": "128904"} +[2022-07-31 22:39:15,396][train_inner][INFO] - {"epoch": 9, "update": 8.378, "loss": "2.141", "ppl": "4.41", "wps": "398074", "ups": "3.36", "wpb": "118444", "bsz": "256", "num_updates": "431200", "lr": "5.74545e-05", "gnorm": "1.028", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "128964"} +[2022-07-31 22:40:15,339][train_inner][INFO] - {"epoch": 9, "update": 8.382, "loss": "2.14", "ppl": "4.41", "wps": "395885", "ups": "3.34", "wpb": "118651", "bsz": "256", "num_updates": "431400", "lr": "5.74343e-05", "gnorm": "1.028", "loss_scale": "32", "train_wall": "60", "gb_free": "22.2", "wall": "129024"} +[2022-07-31 22:41:14,525][train_inner][INFO] - {"epoch": 9, "update": 8.386, "loss": "2.138", "ppl": "4.4", "wps": "399858", "ups": "3.38", "wpb": "118330", "bsz": "256", "num_updates": "431600", "lr": "5.74141e-05", "gnorm": "1.032", "loss_scale": "32", "train_wall": "59", "gb_free": "25.8", "wall": "129083"} +[2022-07-31 22:42:13,963][train_inner][INFO] - {"epoch": 9, "update": 8.389, "loss": "2.134", "ppl": "4.39", "wps": "399015", "ups": "3.36", "wpb": "118581", "bsz": "256", "num_updates": "431800", "lr": "5.73939e-05", "gnorm": "1.028", "loss_scale": "32", "train_wall": "59", "gb_free": "30.5", "wall": "129142"} +[2022-07-31 22:43:13,653][train_inner][INFO] - {"epoch": 9, "update": 8.393, "loss": "2.146", "ppl": "4.43", "wps": "394922", "ups": "3.35", "wpb": "117863", "bsz": "256", "num_updates": "432000", "lr": "5.73737e-05", "gnorm": "1.037", "loss_scale": "32", "train_wall": "59", "gb_free": "21.4", "wall": "129202"} +[2022-07-31 22:44:13,053][train_inner][INFO] - {"epoch": 9, "update": 8.397, "loss": "2.136", "ppl": "4.4", "wps": "399524", "ups": "3.37", "wpb": "118659", "bsz": "256", "num_updates": "432200", "lr": "5.73535e-05", "gnorm": "1.027", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "129261"} +[2022-07-31 22:45:12,754][train_inner][INFO] - {"epoch": 9, "update": 8.401, "loss": "2.137", "ppl": "4.4", "wps": "395043", "ups": "3.35", "wpb": "117921", "bsz": "256", "num_updates": "432400", "lr": "5.73333e-05", "gnorm": "1.031", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "129321"} +[2022-07-31 22:46:12,308][train_inner][INFO] - {"epoch": 9, "update": 8.405, "loss": "2.14", "ppl": "4.41", "wps": "395443", "ups": "3.36", "wpb": "117750", "bsz": "256", "num_updates": "432600", "lr": "5.73131e-05", "gnorm": "1.035", "loss_scale": "32", "train_wall": "59", "gb_free": "27.4", "wall": "129381"} +[2022-07-31 22:47:11,468][train_inner][INFO] - {"epoch": 9, "update": 8.409, "loss": "2.138", "ppl": "4.4", "wps": "399795", "ups": "3.38", "wpb": "118259", "bsz": "256", "num_updates": "432800", "lr": "5.72929e-05", "gnorm": "1.031", "loss_scale": "32", "train_wall": "59", "gb_free": "24.7", "wall": "129440"} +[2022-07-31 22:48:11,061][train_inner][INFO] - {"epoch": 9, "update": 8.413, "loss": "2.134", "ppl": "4.39", "wps": "397435", "ups": "3.36", "wpb": "118420", "bsz": "256", "num_updates": "433000", "lr": "5.72727e-05", "gnorm": "1.03", "loss_scale": "64", "train_wall": "59", "gb_free": "21.3", "wall": "129499"} +[2022-07-31 22:49:11,028][train_inner][INFO] - {"epoch": 9, "update": 8.417, "loss": "2.132", "ppl": "4.38", "wps": "394845", "ups": "3.34", "wpb": "118388", "bsz": "256", "num_updates": "433200", "lr": "5.72525e-05", "gnorm": "1.03", "loss_scale": "64", "train_wall": "60", "gb_free": "21.3", "wall": "129559"} +[2022-07-31 22:50:10,587][train_inner][INFO] - {"epoch": 9, "update": 8.421, "loss": "2.137", "ppl": "4.4", "wps": "396870", "ups": "3.36", "wpb": "118184", "bsz": "256", "num_updates": "433400", "lr": "5.72323e-05", "gnorm": "1.032", "loss_scale": "64", "train_wall": "59", "gb_free": "26.4", "wall": "129619"} +[2022-07-31 22:51:09,375][train_inner][INFO] - {"epoch": 9, "update": 8.424, "loss": "2.144", "ppl": "4.42", "wps": "401259", "ups": "3.4", "wpb": "117944", "bsz": "256", "num_updates": "433600", "lr": "5.72121e-05", "gnorm": "1.034", "loss_scale": "64", "train_wall": "58", "gb_free": "29", "wall": "129678"} +[2022-07-31 22:51:57,004][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-07-31 22:52:08,661][train_inner][INFO] - {"epoch": 9, "update": 8.428, "loss": "2.133", "ppl": "4.39", "wps": "398359", "ups": "3.37", "wpb": "118086", "bsz": "256", "num_updates": "433800", "lr": "5.71919e-05", "gnorm": "1.033", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "129737"} +[2022-07-31 22:53:07,816][train_inner][INFO] - {"epoch": 9, "update": 8.432, "loss": "2.133", "ppl": "4.39", "wps": "398526", "ups": "3.38", "wpb": "117872", "bsz": "256", "num_updates": "434000", "lr": "5.71717e-05", "gnorm": "1.033", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "129796"} +[2022-07-31 22:53:26,830][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 22:54:08,793][train_inner][INFO] - {"epoch": 9, "update": 8.436, "loss": "2.138", "ppl": "4.4", "wps": "387450", "ups": "3.28", "wpb": "118127", "bsz": "256", "num_updates": "434200", "lr": "5.71515e-05", "gnorm": "1.032", "loss_scale": "16", "train_wall": "61", "gb_free": "21.4", "wall": "129857"} +[2022-07-31 22:55:07,725][train_inner][INFO] - {"epoch": 9, "update": 8.44, "loss": "2.14", "ppl": "4.41", "wps": "401610", "ups": "3.39", "wpb": "118337", "bsz": "256", "num_updates": "434400", "lr": "5.71313e-05", "gnorm": "1.033", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "129916"} +[2022-07-31 22:56:07,540][train_inner][INFO] - {"epoch": 9, "update": 8.444, "loss": "2.134", "ppl": "4.39", "wps": "396203", "ups": "3.34", "wpb": "118495", "bsz": "256", "num_updates": "434600", "lr": "5.71111e-05", "gnorm": "1.03", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "129976"} +[2022-07-31 22:57:07,507][train_inner][INFO] - {"epoch": 9, "update": 8.448, "loss": "2.132", "ppl": "4.38", "wps": "393460", "ups": "3.34", "wpb": "117971", "bsz": "256", "num_updates": "434800", "lr": "5.70909e-05", "gnorm": "1.035", "loss_scale": "16", "train_wall": "60", "gb_free": "22.2", "wall": "130036"} +[2022-07-31 22:58:06,847][train_inner][INFO] - {"epoch": 9, "update": 8.452, "loss": "2.136", "ppl": "4.39", "wps": "398623", "ups": "3.37", "wpb": "118270", "bsz": "256", "num_updates": "435000", "lr": "5.70707e-05", "gnorm": "1.033", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "130095"} +[2022-07-31 22:59:07,084][train_inner][INFO] - {"epoch": 9, "update": 8.456, "loss": "2.133", "ppl": "4.39", "wps": "393710", "ups": "3.32", "wpb": "118581", "bsz": "256", "num_updates": "435200", "lr": "5.70505e-05", "gnorm": "1.03", "loss_scale": "16", "train_wall": "60", "gb_free": "21.9", "wall": "130156"} +[2022-07-31 23:00:06,590][train_inner][INFO] - {"epoch": 9, "update": 8.459, "loss": "2.128", "ppl": "4.37", "wps": "399341", "ups": "3.36", "wpb": "118815", "bsz": "256", "num_updates": "435400", "lr": "5.70303e-05", "gnorm": "1.031", "loss_scale": "16", "train_wall": "59", "gb_free": "23.6", "wall": "130215"} +[2022-07-31 23:01:06,284][train_inner][INFO] - {"epoch": 9, "update": 8.463, "loss": "2.143", "ppl": "4.42", "wps": "395628", "ups": "3.35", "wpb": "118082", "bsz": "256", "num_updates": "435600", "lr": "5.70101e-05", "gnorm": "1.036", "loss_scale": "16", "train_wall": "59", "gb_free": "23.6", "wall": "130275"} +[2022-07-31 23:02:05,658][train_inner][INFO] - {"epoch": 9, "update": 8.467, "loss": "2.131", "ppl": "4.38", "wps": "399666", "ups": "3.37", "wpb": "118648", "bsz": "256", "num_updates": "435800", "lr": "5.69899e-05", "gnorm": "1.031", "loss_scale": "16", "train_wall": "59", "gb_free": "22.5", "wall": "130334"} +[2022-07-31 23:03:05,207][train_inner][INFO] - {"epoch": 9, "update": 8.471, "loss": "2.137", "ppl": "4.4", "wps": "397672", "ups": "3.36", "wpb": "118404", "bsz": "256", "num_updates": "436000", "lr": "5.69697e-05", "gnorm": "1.035", "loss_scale": "16", "train_wall": "59", "gb_free": "25.5", "wall": "130394"} +[2022-07-31 23:04:04,415][train_inner][INFO] - {"epoch": 9, "update": 8.475, "loss": "2.132", "ppl": "4.38", "wps": "398515", "ups": "3.38", "wpb": "117974", "bsz": "256", "num_updates": "436200", "lr": "5.69495e-05", "gnorm": "1.039", "loss_scale": "32", "train_wall": "59", "gb_free": "21.6", "wall": "130453"} +[2022-07-31 23:05:04,420][train_inner][INFO] - {"epoch": 9, "update": 8.479, "loss": "2.139", "ppl": "4.4", "wps": "394261", "ups": "3.33", "wpb": "118288", "bsz": "256", "num_updates": "436400", "lr": "5.69293e-05", "gnorm": "1.038", "loss_scale": "32", "train_wall": "60", "gb_free": "29.1", "wall": "130513"} +[2022-07-31 23:06:03,929][train_inner][INFO] - {"epoch": 9, "update": 8.483, "loss": "2.129", "ppl": "4.37", "wps": "398554", "ups": "3.36", "wpb": "118587", "bsz": "256", "num_updates": "436600", "lr": "5.69091e-05", "gnorm": "1.031", "loss_scale": "32", "train_wall": "59", "gb_free": "21.3", "wall": "130572"} +[2022-07-31 23:07:03,482][train_inner][INFO] - {"epoch": 9, "update": 8.487, "loss": "2.135", "ppl": "4.39", "wps": "397739", "ups": "3.36", "wpb": "118432", "bsz": "256", "num_updates": "436800", "lr": "5.68889e-05", "gnorm": "1.033", "loss_scale": "32", "train_wall": "59", "gb_free": "26.8", "wall": "130632"} +[2022-07-31 23:08:02,400][train_inner][INFO] - {"epoch": 9, "update": 8.491, "loss": "2.14", "ppl": "4.41", "wps": "401558", "ups": "3.39", "wpb": "118295", "bsz": "256", "num_updates": "437000", "lr": "5.68687e-05", "gnorm": "1.038", "loss_scale": "32", "train_wall": "59", "gb_free": "25.4", "wall": "130691"} +[2022-07-31 23:09:01,987][train_inner][INFO] - {"epoch": 9, "update": 8.494, "loss": "2.134", "ppl": "4.39", "wps": "396931", "ups": "3.36", "wpb": "118259", "bsz": "256", "num_updates": "437200", "lr": "5.68485e-05", "gnorm": "1.033", "loss_scale": "32", "train_wall": "59", "gb_free": "23.1", "wall": "130750"} +[2022-07-31 23:10:01,589][train_inner][INFO] - {"epoch": 9, "update": 8.498, "loss": "2.134", "ppl": "4.39", "wps": "397800", "ups": "3.36", "wpb": "118548", "bsz": "256", "num_updates": "437400", "lr": "5.68283e-05", "gnorm": "1.034", "loss_scale": "32", "train_wall": "59", "gb_free": "21.5", "wall": "130810"} +[2022-07-31 23:10:10,578][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 23:11:01,229][train_inner][INFO] - {"epoch": 9, "update": 8.502, "loss": "2.133", "ppl": "4.39", "wps": "396036", "ups": "3.35", "wpb": "118097", "bsz": "256", "num_updates": "437600", "lr": "5.68081e-05", "gnorm": "1.035", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "130870"} +[2022-07-31 23:12:00,895][train_inner][INFO] - {"epoch": 9, "update": 8.506, "loss": "2.139", "ppl": "4.4", "wps": "396297", "ups": "3.35", "wpb": "118226", "bsz": "256", "num_updates": "437800", "lr": "5.67879e-05", "gnorm": "1.038", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "130929"} +[2022-07-31 23:13:00,281][train_inner][INFO] - {"epoch": 9, "update": 8.51, "loss": "2.135", "ppl": "4.39", "wps": "397955", "ups": "3.37", "wpb": "118163", "bsz": "256", "num_updates": "438000", "lr": "5.67677e-05", "gnorm": "1.037", "loss_scale": "16", "train_wall": "59", "gb_free": "23.4", "wall": "130989"} +[2022-07-31 23:13:59,842][train_inner][INFO] - {"epoch": 9, "update": 8.514, "loss": "2.137", "ppl": "4.4", "wps": "395383", "ups": "3.36", "wpb": "117747", "bsz": "256", "num_updates": "438200", "lr": "5.67475e-05", "gnorm": "1.045", "loss_scale": "16", "train_wall": "59", "gb_free": "23", "wall": "131048"} +[2022-07-31 23:14:59,430][train_inner][INFO] - {"epoch": 9, "update": 8.518, "loss": "2.131", "ppl": "4.38", "wps": "397774", "ups": "3.36", "wpb": "118512", "bsz": "256", "num_updates": "438400", "lr": "5.67273e-05", "gnorm": "1.035", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "131108"} +[2022-07-31 23:15:58,792][train_inner][INFO] - {"epoch": 9, "update": 8.522, "loss": "2.135", "ppl": "4.39", "wps": "397900", "ups": "3.37", "wpb": "118100", "bsz": "256", "num_updates": "438600", "lr": "5.67071e-05", "gnorm": "1.038", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "131167"} +[2022-07-31 23:16:58,227][train_inner][INFO] - {"epoch": 9, "update": 8.526, "loss": "2.137", "ppl": "4.4", "wps": "398232", "ups": "3.37", "wpb": "118344", "bsz": "256", "num_updates": "438800", "lr": "5.66869e-05", "gnorm": "1.039", "loss_scale": "16", "train_wall": "59", "gb_free": "30.2", "wall": "131227"} +[2022-07-31 23:17:58,504][train_inner][INFO] - {"epoch": 9, "update": 8.529, "loss": "2.135", "ppl": "4.39", "wps": "391948", "ups": "3.32", "wpb": "118125", "bsz": "256", "num_updates": "439000", "lr": "5.66667e-05", "gnorm": "1.038", "loss_scale": "16", "train_wall": "60", "gb_free": "22.8", "wall": "131287"} +[2022-07-31 23:18:12,604][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 23:18:58,360][train_inner][INFO] - {"epoch": 9, "update": 8.533, "loss": "2.129", "ppl": "4.37", "wps": "394730", "ups": "3.34", "wpb": "118134", "bsz": "256", "num_updates": "439200", "lr": "5.66465e-05", "gnorm": "1.041", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "131347"} +[2022-07-31 23:19:58,060][train_inner][INFO] - {"epoch": 9, "update": 8.537, "loss": "2.135", "ppl": "4.39", "wps": "396428", "ups": "3.35", "wpb": "118333", "bsz": "256", "num_updates": "439400", "lr": "5.66263e-05", "gnorm": "1.037", "loss_scale": "8", "train_wall": "59", "gb_free": "27.4", "wall": "131406"} +[2022-07-31 23:20:57,723][train_inner][INFO] - {"epoch": 9, "update": 8.541, "loss": "2.129", "ppl": "4.37", "wps": "397190", "ups": "3.35", "wpb": "118486", "bsz": "256", "num_updates": "439600", "lr": "5.66061e-05", "gnorm": "1.04", "loss_scale": "8", "train_wall": "59", "gb_free": "27.6", "wall": "131466"} +[2022-07-31 23:21:57,160][train_inner][INFO] - {"epoch": 9, "update": 8.545, "loss": "2.134", "ppl": "4.39", "wps": "399300", "ups": "3.36", "wpb": "118667", "bsz": "256", "num_updates": "439800", "lr": "5.65859e-05", "gnorm": "1.037", "loss_scale": "8", "train_wall": "59", "gb_free": "24.7", "wall": "131526"} +[2022-07-31 23:22:56,765][train_inner][INFO] - {"epoch": 9, "update": 8.549, "loss": "2.132", "ppl": "4.38", "wps": "398842", "ups": "3.36", "wpb": "118863", "bsz": "256", "num_updates": "440000", "lr": "5.65657e-05", "gnorm": "1.038", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "131585"} +[2022-07-31 23:23:56,402][train_inner][INFO] - {"epoch": 9, "update": 8.553, "loss": "2.132", "ppl": "4.38", "wps": "398930", "ups": "3.35", "wpb": "118954", "bsz": "256", "num_updates": "440200", "lr": "5.65455e-05", "gnorm": "1.039", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "131645"} +[2022-07-31 23:24:55,666][train_inner][INFO] - {"epoch": 9, "update": 8.557, "loss": "2.137", "ppl": "4.4", "wps": "400198", "ups": "3.37", "wpb": "118585", "bsz": "256", "num_updates": "440400", "lr": "5.65253e-05", "gnorm": "1.043", "loss_scale": "8", "train_wall": "59", "gb_free": "22.1", "wall": "131704"} +[2022-07-31 23:25:54,928][train_inner][INFO] - {"epoch": 9, "update": 8.56, "loss": "2.133", "ppl": "4.39", "wps": "399768", "ups": "3.37", "wpb": "118455", "bsz": "256", "num_updates": "440600", "lr": "5.65051e-05", "gnorm": "1.039", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "131763"} +[2022-07-31 23:26:54,539][train_inner][INFO] - {"epoch": 9, "update": 8.564, "loss": "2.134", "ppl": "4.39", "wps": "397784", "ups": "3.36", "wpb": "118561", "bsz": "256", "num_updates": "440800", "lr": "5.64848e-05", "gnorm": "1.039", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "131823"} +[2022-07-31 23:27:53,305][train_inner][INFO] - {"epoch": 9, "update": 8.568, "loss": "2.139", "ppl": "4.4", "wps": "400721", "ups": "3.4", "wpb": "117741", "bsz": "256", "num_updates": "441000", "lr": "5.64646e-05", "gnorm": "1.045", "loss_scale": "8", "train_wall": "58", "gb_free": "21.7", "wall": "131882"} +[2022-07-31 23:28:52,482][train_inner][INFO] - {"epoch": 9, "update": 8.572, "loss": "2.126", "ppl": "4.37", "wps": "402273", "ups": "3.38", "wpb": "119026", "bsz": "256", "num_updates": "441200", "lr": "5.64444e-05", "gnorm": "1.036", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "131941"} +[2022-07-31 23:29:52,066][train_inner][INFO] - {"epoch": 9, "update": 8.576, "loss": "2.14", "ppl": "4.41", "wps": "396080", "ups": "3.36", "wpb": "118000", "bsz": "256", "num_updates": "441400", "lr": "5.64242e-05", "gnorm": "1.046", "loss_scale": "16", "train_wall": "59", "gb_free": "21.3", "wall": "132000"} +[2022-07-31 23:30:51,214][train_inner][INFO] - {"epoch": 9, "update": 8.58, "loss": "2.134", "ppl": "4.39", "wps": "398882", "ups": "3.38", "wpb": "117965", "bsz": "256", "num_updates": "441600", "lr": "5.6404e-05", "gnorm": "1.043", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "132060"} +[2022-07-31 23:31:50,528][train_inner][INFO] - {"epoch": 9, "update": 8.584, "loss": "2.136", "ppl": "4.39", "wps": "399569", "ups": "3.37", "wpb": "118499", "bsz": "256", "num_updates": "441800", "lr": "5.63838e-05", "gnorm": "1.04", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "132119"} +[2022-07-31 23:32:50,148][train_inner][INFO] - {"epoch": 9, "update": 8.588, "loss": "2.133", "ppl": "4.39", "wps": "395157", "ups": "3.35", "wpb": "117796", "bsz": "256", "num_updates": "442000", "lr": "5.63636e-05", "gnorm": "1.041", "loss_scale": "16", "train_wall": "59", "gb_free": "22.2", "wall": "132179"} +[2022-07-31 23:33:49,019][train_inner][INFO] - {"epoch": 9, "update": 8.592, "loss": "2.136", "ppl": "4.4", "wps": "402895", "ups": "3.4", "wpb": "118593", "bsz": "256", "num_updates": "442200", "lr": "5.63434e-05", "gnorm": "1.045", "loss_scale": "16", "train_wall": "59", "gb_free": "21.9", "wall": "132237"} +[2022-07-31 23:34:48,407][train_inner][INFO] - {"epoch": 9, "update": 8.595, "loss": "2.138", "ppl": "4.4", "wps": "400449", "ups": "3.37", "wpb": "118907", "bsz": "256", "num_updates": "442400", "lr": "5.63232e-05", "gnorm": "1.039", "loss_scale": "16", "train_wall": "59", "gb_free": "22.8", "wall": "132297"} +[2022-07-31 23:35:48,064][train_inner][INFO] - {"epoch": 9, "update": 8.599, "loss": "2.14", "ppl": "4.41", "wps": "396306", "ups": "3.35", "wpb": "118212", "bsz": "256", "num_updates": "442600", "lr": "5.6303e-05", "gnorm": "1.046", "loss_scale": "16", "train_wall": "59", "gb_free": "25.6", "wall": "132356"} +[2022-07-31 23:36:47,705][train_inner][INFO] - {"epoch": 9, "update": 8.603, "loss": "2.129", "ppl": "4.37", "wps": "396172", "ups": "3.35", "wpb": "118139", "bsz": "256", "num_updates": "442800", "lr": "5.62828e-05", "gnorm": "1.041", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "132416"} +[2022-07-31 23:37:47,495][train_inner][INFO] - {"epoch": 9, "update": 8.607, "loss": "2.134", "ppl": "4.39", "wps": "396662", "ups": "3.35", "wpb": "118580", "bsz": "256", "num_updates": "443000", "lr": "5.62626e-05", "gnorm": "1.044", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "132476"} +[2022-07-31 23:38:46,860][train_inner][INFO] - {"epoch": 9, "update": 8.611, "loss": "2.13", "ppl": "4.38", "wps": "399759", "ups": "3.37", "wpb": "118659", "bsz": "256", "num_updates": "443200", "lr": "5.62424e-05", "gnorm": "1.046", "loss_scale": "32", "train_wall": "59", "gb_free": "22.4", "wall": "132535"} +[2022-07-31 23:38:48,648][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-07-31 23:39:17,886][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 23:39:46,975][train_inner][INFO] - {"epoch": 9, "update": 8.615, "loss": "2.13", "ppl": "4.38", "wps": "395136", "ups": "3.33", "wpb": "118767", "bsz": "256", "num_updates": "443400", "lr": "5.62222e-05", "gnorm": "1.041", "loss_scale": "8", "train_wall": "60", "gb_free": "21.3", "wall": "132595"} +[2022-07-31 23:40:46,776][train_inner][INFO] - {"epoch": 9, "update": 8.619, "loss": "2.129", "ppl": "4.37", "wps": "396704", "ups": "3.34", "wpb": "118615", "bsz": "256", "num_updates": "443600", "lr": "5.6202e-05", "gnorm": "1.045", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "132655"} +[2022-07-31 23:41:46,639][train_inner][INFO] - {"epoch": 9, "update": 8.623, "loss": "2.124", "ppl": "4.36", "wps": "395271", "ups": "3.34", "wpb": "118308", "bsz": "256", "num_updates": "443800", "lr": "5.61818e-05", "gnorm": "1.042", "loss_scale": "8", "train_wall": "60", "gb_free": "27.5", "wall": "132715"} +[2022-07-31 23:42:46,165][train_inner][INFO] - {"epoch": 9, "update": 8.627, "loss": "2.137", "ppl": "4.4", "wps": "396189", "ups": "3.36", "wpb": "117918", "bsz": "256", "num_updates": "444000", "lr": "5.61616e-05", "gnorm": "1.051", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "132775"} +[2022-07-31 23:43:45,586][train_inner][INFO] - {"epoch": 9, "update": 8.63, "loss": "2.128", "ppl": "4.37", "wps": "399921", "ups": "3.37", "wpb": "118817", "bsz": "256", "num_updates": "444200", "lr": "5.61414e-05", "gnorm": "1.042", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "132834"} +[2022-07-31 23:44:46,144][train_inner][INFO] - {"epoch": 9, "update": 8.634, "loss": "2.133", "ppl": "4.39", "wps": "390031", "ups": "3.3", "wpb": "118097", "bsz": "256", "num_updates": "444400", "lr": "5.61212e-05", "gnorm": "1.049", "loss_scale": "8", "train_wall": "60", "gb_free": "24.4", "wall": "132895"} +[2022-07-31 23:45:45,404][train_inner][INFO] - {"epoch": 9, "update": 8.638, "loss": "2.135", "ppl": "4.39", "wps": "399735", "ups": "3.37", "wpb": "118440", "bsz": "256", "num_updates": "444600", "lr": "5.6101e-05", "gnorm": "1.048", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "132954"} +[2022-07-31 23:46:44,656][train_inner][INFO] - {"epoch": 9, "update": 8.642, "loss": "2.137", "ppl": "4.4", "wps": "397785", "ups": "3.38", "wpb": "117848", "bsz": "256", "num_updates": "444800", "lr": "5.60808e-05", "gnorm": "1.049", "loss_scale": "8", "train_wall": "59", "gb_free": "23.1", "wall": "133013"} +[2022-07-31 23:47:44,348][train_inner][INFO] - {"epoch": 9, "update": 8.646, "loss": "2.128", "ppl": "4.37", "wps": "397026", "ups": "3.35", "wpb": "118494", "bsz": "256", "num_updates": "445000", "lr": "5.60606e-05", "gnorm": "1.046", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "133073"} +[2022-07-31 23:48:43,903][train_inner][INFO] - {"epoch": 9, "update": 8.65, "loss": "2.126", "ppl": "4.36", "wps": "398501", "ups": "3.36", "wpb": "118664", "bsz": "256", "num_updates": "445200", "lr": "5.60404e-05", "gnorm": "1.044", "loss_scale": "8", "train_wall": "59", "gb_free": "24.6", "wall": "133132"} +[2022-07-31 23:49:43,072][train_inner][INFO] - {"epoch": 9, "update": 8.654, "loss": "2.131", "ppl": "4.38", "wps": "401160", "ups": "3.38", "wpb": "118681", "bsz": "256", "num_updates": "445400", "lr": "5.60202e-05", "gnorm": "1.048", "loss_scale": "16", "train_wall": "59", "gb_free": "26.5", "wall": "133192"} +[2022-07-31 23:50:43,701][train_inner][INFO] - {"epoch": 9, "update": 8.658, "loss": "2.132", "ppl": "4.38", "wps": "390799", "ups": "3.3", "wpb": "118466", "bsz": "256", "num_updates": "445600", "lr": "5.6e-05", "gnorm": "1.049", "loss_scale": "16", "train_wall": "60", "gb_free": "21.8", "wall": "133252"} +[2022-07-31 23:51:43,074][train_inner][INFO] - {"epoch": 9, "update": 8.662, "loss": "2.127", "ppl": "4.37", "wps": "399135", "ups": "3.37", "wpb": "118490", "bsz": "256", "num_updates": "445800", "lr": "5.59798e-05", "gnorm": "1.046", "loss_scale": "16", "train_wall": "59", "gb_free": "25.5", "wall": "133312"} +[2022-07-31 23:52:42,521][train_inner][INFO] - {"epoch": 9, "update": 8.665, "loss": "2.132", "ppl": "4.38", "wps": "397873", "ups": "3.36", "wpb": "118261", "bsz": "256", "num_updates": "446000", "lr": "5.59596e-05", "gnorm": "1.047", "loss_scale": "16", "train_wall": "59", "gb_free": "22.6", "wall": "133371"} +[2022-07-31 23:53:42,254][train_inner][INFO] - {"epoch": 9, "update": 8.669, "loss": "2.129", "ppl": "4.37", "wps": "396060", "ups": "3.35", "wpb": "118287", "bsz": "256", "num_updates": "446200", "lr": "5.59394e-05", "gnorm": "1.05", "loss_scale": "16", "train_wall": "59", "gb_free": "23.5", "wall": "133431"} +[2022-07-31 23:53:52,053][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-07-31 23:54:42,178][train_inner][INFO] - {"epoch": 9, "update": 8.673, "loss": "2.131", "ppl": "4.38", "wps": "395234", "ups": "3.34", "wpb": "118420", "bsz": "256", "num_updates": "446400", "lr": "5.59192e-05", "gnorm": "1.054", "loss_scale": "8", "train_wall": "60", "gb_free": "25", "wall": "133491"} +[2022-07-31 23:55:41,691][train_inner][INFO] - {"epoch": 9, "update": 8.677, "loss": "2.134", "ppl": "4.39", "wps": "398318", "ups": "3.36", "wpb": "118526", "bsz": "256", "num_updates": "446600", "lr": "5.5899e-05", "gnorm": "1.05", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "133550"} +[2022-07-31 23:56:41,196][train_inner][INFO] - {"epoch": 9, "update": 8.681, "loss": "2.126", "ppl": "4.37", "wps": "395621", "ups": "3.36", "wpb": "117704", "bsz": "256", "num_updates": "446800", "lr": "5.58788e-05", "gnorm": "1.054", "loss_scale": "8", "train_wall": "59", "gb_free": "24", "wall": "133610"} +[2022-07-31 23:57:40,692][train_inner][INFO] - {"epoch": 9, "update": 8.685, "loss": "2.132", "ppl": "4.38", "wps": "399374", "ups": "3.36", "wpb": "118807", "bsz": "256", "num_updates": "447000", "lr": "5.58586e-05", "gnorm": "1.048", "loss_scale": "8", "train_wall": "59", "gb_free": "26.7", "wall": "133669"} +[2022-07-31 23:58:41,411][train_inner][INFO] - {"epoch": 9, "update": 8.689, "loss": "2.131", "ppl": "4.38", "wps": "390228", "ups": "3.29", "wpb": "118470", "bsz": "256", "num_updates": "447200", "lr": "5.58384e-05", "gnorm": "1.054", "loss_scale": "8", "train_wall": "60", "gb_free": "21.9", "wall": "133730"} +[2022-07-31 23:59:41,180][train_inner][INFO] - {"epoch": 9, "update": 8.693, "loss": "2.137", "ppl": "4.4", "wps": "394094", "ups": "3.35", "wpb": "117771", "bsz": "256", "num_updates": "447400", "lr": "5.58182e-05", "gnorm": "1.056", "loss_scale": "8", "train_wall": "59", "gb_free": "25", "wall": "133790"} +[2022-08-01 00:00:40,853][train_inner][INFO] - {"epoch": 9, "update": 8.696, "loss": "2.132", "ppl": "4.38", "wps": "395108", "ups": "3.35", "wpb": "117887", "bsz": "256", "num_updates": "447600", "lr": "5.5798e-05", "gnorm": "1.055", "loss_scale": "8", "train_wall": "59", "gb_free": "22.1", "wall": "133849"} +[2022-08-01 00:01:40,392][train_inner][INFO] - {"epoch": 9, "update": 8.7, "loss": "2.131", "ppl": "4.38", "wps": "398805", "ups": "3.36", "wpb": "118721", "bsz": "256", "num_updates": "447800", "lr": "5.57778e-05", "gnorm": "1.051", "loss_scale": "8", "train_wall": "59", "gb_free": "26", "wall": "133909"} +[2022-08-01 00:02:39,843][train_inner][INFO] - {"epoch": 9, "update": 8.704, "loss": "2.129", "ppl": "4.37", "wps": "398302", "ups": "3.36", "wpb": "118397", "bsz": "256", "num_updates": "448000", "lr": "5.57576e-05", "gnorm": "1.052", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "133968"} +[2022-08-01 00:03:39,105][train_inner][INFO] - {"epoch": 9, "update": 8.708, "loss": "2.129", "ppl": "4.37", "wps": "397795", "ups": "3.37", "wpb": "117870", "bsz": "256", "num_updates": "448200", "lr": "5.57374e-05", "gnorm": "1.056", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "134028"} +[2022-08-01 00:04:38,893][train_inner][INFO] - {"epoch": 9, "update": 8.712, "loss": "2.129", "ppl": "4.37", "wps": "396304", "ups": "3.35", "wpb": "118469", "bsz": "256", "num_updates": "448400", "lr": "5.57172e-05", "gnorm": "1.057", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "134087"} +[2022-08-01 00:05:38,175][train_inner][INFO] - {"epoch": 9, "update": 8.716, "loss": "2.131", "ppl": "4.38", "wps": "397728", "ups": "3.37", "wpb": "117891", "bsz": "256", "num_updates": "448600", "lr": "5.5697e-05", "gnorm": "1.06", "loss_scale": "16", "train_wall": "59", "gb_free": "23.7", "wall": "134147"} +[2022-08-01 00:06:37,739][train_inner][INFO] - {"epoch": 9, "update": 8.72, "loss": "2.128", "ppl": "4.37", "wps": "397704", "ups": "3.36", "wpb": "118442", "bsz": "256", "num_updates": "448800", "lr": "5.56768e-05", "gnorm": "1.053", "loss_scale": "16", "train_wall": "59", "gb_free": "27.1", "wall": "134206"} +[2022-08-01 00:07:37,029][train_inner][INFO] - {"epoch": 9, "update": 8.724, "loss": "2.124", "ppl": "4.36", "wps": "400290", "ups": "3.37", "wpb": "118666", "bsz": "256", "num_updates": "449000", "lr": "5.56566e-05", "gnorm": "1.051", "loss_scale": "16", "train_wall": "59", "gb_free": "22", "wall": "134265"} +[2022-08-01 00:08:36,607][train_inner][INFO] - {"epoch": 9, "update": 8.728, "loss": "2.128", "ppl": "4.37", "wps": "398852", "ups": "3.36", "wpb": "118812", "bsz": "256", "num_updates": "449200", "lr": "5.56364e-05", "gnorm": "1.051", "loss_scale": "16", "train_wall": "59", "gb_free": "22.4", "wall": "134325"} +[2022-08-01 00:09:36,148][train_inner][INFO] - {"epoch": 9, "update": 8.731, "loss": "2.134", "ppl": "4.39", "wps": "398261", "ups": "3.36", "wpb": "118564", "bsz": "256", "num_updates": "449400", "lr": "5.56162e-05", "gnorm": "1.056", "loss_scale": "16", "train_wall": "59", "gb_free": "24.7", "wall": "134385"} +[2022-08-01 00:10:18,217][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 00:10:35,734][train_inner][INFO] - {"epoch": 9, "update": 8.735, "loss": "2.129", "ppl": "4.37", "wps": "396982", "ups": "3.36", "wpb": "118272", "bsz": "256", "num_updates": "449600", "lr": "5.5596e-05", "gnorm": "1.057", "loss_scale": "8", "train_wall": "59", "gb_free": "24.6", "wall": "134444"} +[2022-08-01 00:11:35,097][train_inner][INFO] - {"epoch": 9, "update": 8.739, "loss": "2.13", "ppl": "4.38", "wps": "399764", "ups": "3.37", "wpb": "118656", "bsz": "256", "num_updates": "449800", "lr": "5.55758e-05", "gnorm": "1.068", "loss_scale": "8", "train_wall": "59", "gb_free": "22.1", "wall": "134504"} +[2022-08-01 00:11:59,742][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 00:12:34,598][train_inner][INFO] - {"epoch": 9, "update": 8.743, "loss": "2.13", "ppl": "4.38", "wps": "397773", "ups": "3.36", "wpb": "118339", "bsz": "256", "num_updates": "450000", "lr": "5.55556e-05", "gnorm": "1.058", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "134563"} +[2022-08-01 00:12:34,599][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 00:12:57,359][valid][INFO] - {"epoch": 9, "valid_loss": "2.02", "valid_ppl": "4.06", "valid_wps": "1.61114e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "450000", "valid_best_loss": "2.02"} +[2022-08-01 00:12:57,362][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 9 @ 450000 updates +[2022-08-01 00:12:57,363][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_9_450000.pt +[2022-08-01 00:13:04,210][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_9_450000.pt +[2022-08-01 00:13:23,400][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_9_450000.pt (epoch 9 @ 450000 updates, score 2.02) (writing took 26.037539007142186 seconds) +[2022-08-01 00:14:23,131][train_inner][INFO] - {"epoch": 9, "update": 8.747, "loss": "2.124", "ppl": "4.36", "wps": "218426", "ups": "1.84", "wpb": "118532", "bsz": "256", "num_updates": "450200", "lr": "5.55354e-05", "gnorm": "1.057", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "134672"} +[2022-08-01 00:15:22,945][train_inner][INFO] - {"epoch": 9, "update": 8.751, "loss": "2.136", "ppl": "4.39", "wps": "392880", "ups": "3.34", "wpb": "117498", "bsz": "256", "num_updates": "450400", "lr": "5.55152e-05", "gnorm": "1.064", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "134731"} +[2022-08-01 00:16:22,114][train_inner][INFO] - {"epoch": 9, "update": 8.755, "loss": "2.133", "ppl": "4.39", "wps": "399738", "ups": "3.38", "wpb": "118259", "bsz": "256", "num_updates": "450600", "lr": "5.54949e-05", "gnorm": "1.061", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "134791"} +[2022-08-01 00:17:21,778][train_inner][INFO] - {"epoch": 9, "update": 8.759, "loss": "2.13", "ppl": "4.38", "wps": "396481", "ups": "3.35", "wpb": "118278", "bsz": "256", "num_updates": "450800", "lr": "5.54747e-05", "gnorm": "1.061", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "134850"} +[2022-08-01 00:18:20,900][train_inner][INFO] - {"epoch": 9, "update": 8.763, "loss": "2.131", "ppl": "4.38", "wps": "398262", "ups": "3.38", "wpb": "117729", "bsz": "256", "num_updates": "451000", "lr": "5.54545e-05", "gnorm": "1.062", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "134909"} +[2022-08-01 00:19:20,311][train_inner][INFO] - {"epoch": 9, "update": 8.766, "loss": "2.135", "ppl": "4.39", "wps": "397171", "ups": "3.37", "wpb": "117980", "bsz": "256", "num_updates": "451200", "lr": "5.54343e-05", "gnorm": "1.069", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "134969"} +[2022-08-01 00:20:19,738][train_inner][INFO] - {"epoch": 9, "update": 8.77, "loss": "2.128", "ppl": "4.37", "wps": "397665", "ups": "3.37", "wpb": "118159", "bsz": "256", "num_updates": "451400", "lr": "5.54141e-05", "gnorm": "1.062", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "135028"} +[2022-08-01 00:21:19,421][train_inner][INFO] - {"epoch": 9, "update": 8.774, "loss": "2.128", "ppl": "4.37", "wps": "396828", "ups": "3.35", "wpb": "118418", "bsz": "256", "num_updates": "451600", "lr": "5.53939e-05", "gnorm": "1.059", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "135088"} +[2022-08-01 00:22:19,084][train_inner][INFO] - {"epoch": 9, "update": 8.778, "loss": "2.131", "ppl": "4.38", "wps": "395718", "ups": "3.35", "wpb": "118049", "bsz": "256", "num_updates": "451800", "lr": "5.53737e-05", "gnorm": "1.066", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "135148"} +[2022-08-01 00:23:18,724][train_inner][INFO] - {"epoch": 9, "update": 8.782, "loss": "2.135", "ppl": "4.39", "wps": "398186", "ups": "3.35", "wpb": "118739", "bsz": "256", "num_updates": "452000", "lr": "5.53535e-05", "gnorm": "1.06", "loss_scale": "8", "train_wall": "59", "gb_free": "24.6", "wall": "135207"} +[2022-08-01 00:24:18,533][train_inner][INFO] - {"epoch": 9, "update": 8.786, "loss": "2.129", "ppl": "4.38", "wps": "395529", "ups": "3.34", "wpb": "118279", "bsz": "256", "num_updates": "452200", "lr": "5.53333e-05", "gnorm": "1.063", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "135267"} +[2022-08-01 00:25:18,121][train_inner][INFO] - {"epoch": 9, "update": 8.79, "loss": "2.131", "ppl": "4.38", "wps": "398387", "ups": "3.36", "wpb": "118696", "bsz": "256", "num_updates": "452400", "lr": "5.53131e-05", "gnorm": "1.065", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "135327"} +[2022-08-01 00:26:17,560][train_inner][INFO] - {"epoch": 9, "update": 8.794, "loss": "2.133", "ppl": "4.39", "wps": "398025", "ups": "3.36", "wpb": "118289", "bsz": "256", "num_updates": "452600", "lr": "5.52929e-05", "gnorm": "1.067", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "135386"} +[2022-08-01 00:27:16,985][train_inner][INFO] - {"epoch": 9, "update": 8.798, "loss": "2.136", "ppl": "4.4", "wps": "397104", "ups": "3.37", "wpb": "117989", "bsz": "256", "num_updates": "452800", "lr": "5.52727e-05", "gnorm": "1.069", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "135445"} +[2022-08-01 00:28:16,580][train_inner][INFO] - {"epoch": 9, "update": 8.801, "loss": "2.13", "ppl": "4.38", "wps": "398402", "ups": "3.36", "wpb": "118713", "bsz": "256", "num_updates": "453000", "lr": "5.52525e-05", "gnorm": "1.068", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "135505"} +[2022-08-01 00:29:15,997][train_inner][INFO] - {"epoch": 9, "update": 8.805, "loss": "2.131", "ppl": "4.38", "wps": "399305", "ups": "3.37", "wpb": "118628", "bsz": "256", "num_updates": "453200", "lr": "5.52323e-05", "gnorm": "1.067", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "135564"} +[2022-08-01 00:29:27,990][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 00:30:15,887][train_inner][INFO] - {"epoch": 9, "update": 8.809, "loss": "2.129", "ppl": "4.37", "wps": "396449", "ups": "3.34", "wpb": "118715", "bsz": "256", "num_updates": "453400", "lr": "5.52121e-05", "gnorm": "1.066", "loss_scale": "4", "train_wall": "60", "gb_free": "23.4", "wall": "135624"} +[2022-08-01 00:31:15,400][train_inner][INFO] - {"epoch": 9, "update": 8.813, "loss": "2.127", "ppl": "4.37", "wps": "399062", "ups": "3.36", "wpb": "118747", "bsz": "256", "num_updates": "453600", "lr": "5.51919e-05", "gnorm": "1.063", "loss_scale": "4", "train_wall": "59", "gb_free": "26.2", "wall": "135684"} +[2022-08-01 00:32:15,000][train_inner][INFO] - {"epoch": 9, "update": 8.817, "loss": "2.131", "ppl": "4.38", "wps": "397236", "ups": "3.36", "wpb": "118376", "bsz": "256", "num_updates": "453800", "lr": "5.51717e-05", "gnorm": "1.069", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "135743"} +[2022-08-01 00:33:14,200][train_inner][INFO] - {"epoch": 9, "update": 8.821, "loss": "2.125", "ppl": "4.36", "wps": "400642", "ups": "3.38", "wpb": "118588", "bsz": "256", "num_updates": "454000", "lr": "5.51515e-05", "gnorm": "1.066", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "135803"} +[2022-08-01 00:34:14,705][train_inner][INFO] - {"epoch": 9, "update": 8.825, "loss": "2.129", "ppl": "4.37", "wps": "392497", "ups": "3.31", "wpb": "118740", "bsz": "256", "num_updates": "454200", "lr": "5.51313e-05", "gnorm": "1.073", "loss_scale": "4", "train_wall": "60", "gb_free": "22.3", "wall": "135863"} +[2022-08-01 00:35:14,483][train_inner][INFO] - {"epoch": 9, "update": 8.829, "loss": "2.125", "ppl": "4.36", "wps": "396304", "ups": "3.35", "wpb": "118450", "bsz": "256", "num_updates": "454400", "lr": "5.51111e-05", "gnorm": "1.067", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "135923"} +[2022-08-01 00:36:13,632][train_inner][INFO] - {"epoch": 9, "update": 8.833, "loss": "2.129", "ppl": "4.37", "wps": "397719", "ups": "3.38", "wpb": "117623", "bsz": "256", "num_updates": "454600", "lr": "5.50909e-05", "gnorm": "1.075", "loss_scale": "4", "train_wall": "59", "gb_free": "30.2", "wall": "135982"} +[2022-08-01 00:37:13,251][train_inner][INFO] - {"epoch": 9, "update": 8.836, "loss": "2.127", "ppl": "4.37", "wps": "396482", "ups": "3.35", "wpb": "118188", "bsz": "256", "num_updates": "454800", "lr": "5.50707e-05", "gnorm": "1.075", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "136042"} +[2022-08-01 00:38:12,799][train_inner][INFO] - {"epoch": 9, "update": 8.84, "loss": "2.126", "ppl": "4.37", "wps": "399393", "ups": "3.36", "wpb": "118915", "bsz": "256", "num_updates": "455000", "lr": "5.50505e-05", "gnorm": "1.072", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "136101"} +[2022-08-01 00:39:12,338][train_inner][INFO] - {"epoch": 9, "update": 8.844, "loss": "2.127", "ppl": "4.37", "wps": "395604", "ups": "3.36", "wpb": "117768", "bsz": "256", "num_updates": "455200", "lr": "5.50303e-05", "gnorm": "1.073", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "136161"} +[2022-08-01 00:40:11,973][train_inner][INFO] - {"epoch": 9, "update": 8.848, "loss": "2.125", "ppl": "4.36", "wps": "395805", "ups": "3.35", "wpb": "118017", "bsz": "256", "num_updates": "455400", "lr": "5.50101e-05", "gnorm": "1.072", "loss_scale": "8", "train_wall": "59", "gb_free": "24.9", "wall": "136220"} +[2022-08-01 00:41:11,536][train_inner][INFO] - {"epoch": 9, "update": 8.852, "loss": "2.132", "ppl": "4.38", "wps": "398018", "ups": "3.36", "wpb": "118537", "bsz": "256", "num_updates": "455600", "lr": "5.49899e-05", "gnorm": "1.073", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "136280"} +[2022-08-01 00:42:10,751][train_inner][INFO] - {"epoch": 9, "update": 8.856, "loss": "2.131", "ppl": "4.38", "wps": "400294", "ups": "3.38", "wpb": "118515", "bsz": "256", "num_updates": "455800", "lr": "5.49697e-05", "gnorm": "1.074", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "136339"} +[2022-08-01 00:43:10,291][train_inner][INFO] - {"epoch": 9, "update": 8.86, "loss": "2.126", "ppl": "4.36", "wps": "399942", "ups": "3.36", "wpb": "119063", "bsz": "256", "num_updates": "456000", "lr": "5.49495e-05", "gnorm": "1.069", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "136399"} +[2022-08-01 00:44:09,935][train_inner][INFO] - {"epoch": 9, "update": 8.864, "loss": "2.13", "ppl": "4.38", "wps": "395274", "ups": "3.35", "wpb": "117878", "bsz": "256", "num_updates": "456200", "lr": "5.49293e-05", "gnorm": "1.078", "loss_scale": "8", "train_wall": "59", "gb_free": "23.9", "wall": "136458"} +[2022-08-01 00:45:09,228][train_inner][INFO] - {"epoch": 9, "update": 8.867, "loss": "2.129", "ppl": "4.37", "wps": "396642", "ups": "3.37", "wpb": "117589", "bsz": "256", "num_updates": "456400", "lr": "5.49091e-05", "gnorm": "1.078", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "136518"} +[2022-08-01 00:46:08,645][train_inner][INFO] - {"epoch": 9, "update": 8.871, "loss": "2.128", "ppl": "4.37", "wps": "397736", "ups": "3.37", "wpb": "118161", "bsz": "256", "num_updates": "456600", "lr": "5.48889e-05", "gnorm": "1.085", "loss_scale": "8", "train_wall": "59", "gb_free": "27.5", "wall": "136577"} +[2022-08-01 00:47:08,270][train_inner][INFO] - {"epoch": 9, "update": 8.875, "loss": "2.137", "ppl": "4.4", "wps": "394494", "ups": "3.35", "wpb": "117607", "bsz": "256", "num_updates": "456800", "lr": "5.48687e-05", "gnorm": "1.083", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "136637"} +[2022-08-01 00:48:06,084][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 00:48:07,858][train_inner][INFO] - {"epoch": 9, "update": 8.879, "loss": "2.133", "ppl": "4.39", "wps": "394334", "ups": "3.36", "wpb": "117488", "bsz": "256", "num_updates": "457000", "lr": "5.48485e-05", "gnorm": "1.087", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "136696"} +[2022-08-01 00:49:07,324][train_inner][INFO] - {"epoch": 9, "update": 8.883, "loss": "2.134", "ppl": "4.39", "wps": "397935", "ups": "3.36", "wpb": "118317", "bsz": "256", "num_updates": "457200", "lr": "5.48283e-05", "gnorm": "1.079", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "136756"} +[2022-08-01 00:50:07,253][train_inner][INFO] - {"epoch": 9, "update": 8.887, "loss": "2.126", "ppl": "4.36", "wps": "398757", "ups": "3.34", "wpb": "119484", "bsz": "256", "num_updates": "457400", "lr": "5.48081e-05", "gnorm": "1.072", "loss_scale": "4", "train_wall": "60", "gb_free": "22.2", "wall": "136816"} +[2022-08-01 00:51:06,566][train_inner][INFO] - {"epoch": 9, "update": 8.891, "loss": "2.127", "ppl": "4.37", "wps": "399777", "ups": "3.37", "wpb": "118559", "bsz": "256", "num_updates": "457600", "lr": "5.47879e-05", "gnorm": "1.086", "loss_scale": "4", "train_wall": "59", "gb_free": "25.7", "wall": "136875"} +[2022-08-01 00:52:06,175][train_inner][INFO] - {"epoch": 9, "update": 8.895, "loss": "2.13", "ppl": "4.38", "wps": "398562", "ups": "3.36", "wpb": "118788", "bsz": "256", "num_updates": "457800", "lr": "5.47677e-05", "gnorm": "1.086", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "136935"} +[2022-08-01 00:53:05,684][train_inner][INFO] - {"epoch": 9, "update": 8.899, "loss": "2.126", "ppl": "4.36", "wps": "399014", "ups": "3.36", "wpb": "118724", "bsz": "256", "num_updates": "458000", "lr": "5.47475e-05", "gnorm": "1.084", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "136994"} +[2022-08-01 00:54:04,754][train_inner][INFO] - {"epoch": 9, "update": 8.902, "loss": "2.125", "ppl": "4.36", "wps": "401816", "ups": "3.39", "wpb": "118676", "bsz": "256", "num_updates": "458200", "lr": "5.47273e-05", "gnorm": "1.083", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "137053"} +[2022-08-01 00:55:04,513][train_inner][INFO] - {"epoch": 9, "update": 8.906, "loss": "2.124", "ppl": "4.36", "wps": "396982", "ups": "3.35", "wpb": "118614", "bsz": "256", "num_updates": "458400", "lr": "5.47071e-05", "gnorm": "1.085", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "137113"} +[2022-08-01 00:56:03,925][train_inner][INFO] - {"epoch": 9, "update": 8.91, "loss": "2.133", "ppl": "4.39", "wps": "397396", "ups": "3.37", "wpb": "118050", "bsz": "256", "num_updates": "458600", "lr": "5.46869e-05", "gnorm": "1.088", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "137172"} +[2022-08-01 00:57:03,397][train_inner][INFO] - {"epoch": 9, "update": 8.914, "loss": "2.126", "ppl": "4.36", "wps": "398976", "ups": "3.36", "wpb": "118639", "bsz": "256", "num_updates": "458800", "lr": "5.46667e-05", "gnorm": "1.088", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "137232"} +[2022-08-01 00:58:02,853][train_inner][INFO] - {"epoch": 9, "update": 8.918, "loss": "2.131", "ppl": "4.38", "wps": "397163", "ups": "3.36", "wpb": "118068", "bsz": "256", "num_updates": "459000", "lr": "5.46465e-05", "gnorm": "1.094", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "137291"} +[2022-08-01 00:59:02,466][train_inner][INFO] - {"epoch": 9, "update": 8.922, "loss": "2.128", "ppl": "4.37", "wps": "397201", "ups": "3.35", "wpb": "118391", "bsz": "256", "num_updates": "459200", "lr": "5.46263e-05", "gnorm": "1.093", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "137351"} +[2022-08-01 01:00:01,862][train_inner][INFO] - {"epoch": 9, "update": 8.926, "loss": "2.126", "ppl": "4.37", "wps": "398066", "ups": "3.37", "wpb": "118216", "bsz": "256", "num_updates": "459400", "lr": "5.46061e-05", "gnorm": "1.092", "loss_scale": "8", "train_wall": "59", "gb_free": "23.8", "wall": "137410"} +[2022-08-01 01:01:01,037][train_inner][INFO] - {"epoch": 9, "update": 8.93, "loss": "2.133", "ppl": "4.39", "wps": "398779", "ups": "3.38", "wpb": "117988", "bsz": "256", "num_updates": "459600", "lr": "5.45859e-05", "gnorm": "1.098", "loss_scale": "8", "train_wall": "59", "gb_free": "26.4", "wall": "137469"} +[2022-08-01 01:02:00,236][train_inner][INFO] - {"epoch": 9, "update": 8.934, "loss": "2.132", "ppl": "4.38", "wps": "398396", "ups": "3.38", "wpb": "117923", "bsz": "256", "num_updates": "459800", "lr": "5.45657e-05", "gnorm": "1.097", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "137529"} +[2022-08-01 01:02:59,827][train_inner][INFO] - {"epoch": 9, "update": 8.937, "loss": "2.13", "ppl": "4.38", "wps": "396374", "ups": "3.36", "wpb": "118101", "bsz": "256", "num_updates": "460000", "lr": "5.45455e-05", "gnorm": "1.096", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "137588"} +[2022-08-01 01:03:59,061][train_inner][INFO] - {"epoch": 9, "update": 8.941, "loss": "2.126", "ppl": "4.36", "wps": "398791", "ups": "3.38", "wpb": "118109", "bsz": "256", "num_updates": "460200", "lr": "5.45253e-05", "gnorm": "1.098", "loss_scale": "8", "train_wall": "59", "gb_free": "23", "wall": "137647"} +[2022-08-01 01:04:57,996][train_inner][INFO] - {"epoch": 9, "update": 8.945, "loss": "2.127", "ppl": "4.37", "wps": "399899", "ups": "3.39", "wpb": "117838", "bsz": "256", "num_updates": "460400", "lr": "5.45051e-05", "gnorm": "1.098", "loss_scale": "8", "train_wall": "59", "gb_free": "23.8", "wall": "137706"} +[2022-08-01 01:05:57,459][train_inner][INFO] - {"epoch": 9, "update": 8.949, "loss": "2.132", "ppl": "4.38", "wps": "397987", "ups": "3.36", "wpb": "118327", "bsz": "256", "num_updates": "460600", "lr": "5.44848e-05", "gnorm": "1.096", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "137766"} +[2022-08-01 01:06:56,956][train_inner][INFO] - {"epoch": 9, "update": 8.953, "loss": "2.127", "ppl": "4.37", "wps": "397863", "ups": "3.36", "wpb": "118357", "bsz": "256", "num_updates": "460800", "lr": "5.44646e-05", "gnorm": "1.099", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "137825"} +[2022-08-01 01:07:56,492][train_inner][INFO] - {"epoch": 9, "update": 8.957, "loss": "2.126", "ppl": "4.36", "wps": "395894", "ups": "3.36", "wpb": "117849", "bsz": "256", "num_updates": "461000", "lr": "5.44444e-05", "gnorm": "1.1", "loss_scale": "8", "train_wall": "59", "gb_free": "25.1", "wall": "137885"} +[2022-08-01 01:08:55,533][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 01:08:55,824][train_inner][INFO] - {"epoch": 9, "update": 8.961, "loss": "2.135", "ppl": "4.39", "wps": "399702", "ups": "3.37", "wpb": "118575", "bsz": "256", "num_updates": "461200", "lr": "5.44242e-05", "gnorm": "1.105", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "137944"} +[2022-08-01 01:09:55,434][train_inner][INFO] - {"epoch": 9, "update": 8.965, "loss": "2.128", "ppl": "4.37", "wps": "396100", "ups": "3.36", "wpb": "118057", "bsz": "256", "num_updates": "461400", "lr": "5.4404e-05", "gnorm": "1.102", "loss_scale": "8", "train_wall": "59", "gb_free": "31.4", "wall": "138004"} +[2022-08-01 01:10:55,029][train_inner][INFO] - {"epoch": 9, "update": 8.968, "loss": "2.13", "ppl": "4.38", "wps": "395733", "ups": "3.36", "wpb": "117919", "bsz": "256", "num_updates": "461600", "lr": "5.43838e-05", "gnorm": "1.107", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "138063"} +[2022-08-01 01:11:00,391][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 01:11:54,898][train_inner][INFO] - {"epoch": 9, "update": 8.972, "loss": "2.128", "ppl": "4.37", "wps": "396258", "ups": "3.34", "wpb": "118616", "bsz": "256", "num_updates": "461800", "lr": "5.43636e-05", "gnorm": "1.17", "loss_scale": "4", "train_wall": "60", "gb_free": "24.4", "wall": "138123"} +[2022-08-01 01:12:53,970][train_inner][INFO] - {"epoch": 9, "update": 8.976, "loss": "2.131", "ppl": "4.38", "wps": "398332", "ups": "3.39", "wpb": "117650", "bsz": "256", "num_updates": "462000", "lr": "5.43434e-05", "gnorm": "1.112", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "138182"} +[2022-08-01 01:13:53,524][train_inner][INFO] - {"epoch": 9, "update": 8.98, "loss": "2.124", "ppl": "4.36", "wps": "398415", "ups": "3.36", "wpb": "118636", "bsz": "256", "num_updates": "462200", "lr": "5.43232e-05", "gnorm": "1.108", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "138242"} +[2022-08-01 01:14:52,829][train_inner][INFO] - {"epoch": 9, "update": 8.984, "loss": "2.117", "ppl": "4.34", "wps": "400103", "ups": "3.37", "wpb": "118640", "bsz": "256", "num_updates": "462400", "lr": "5.4303e-05", "gnorm": "1.106", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "138301"} +[2022-08-01 01:15:52,382][train_inner][INFO] - {"epoch": 9, "update": 8.988, "loss": "2.13", "ppl": "4.38", "wps": "397391", "ups": "3.36", "wpb": "118327", "bsz": "256", "num_updates": "462600", "lr": "5.42828e-05", "gnorm": "1.113", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "138361"} +[2022-08-01 01:16:51,886][train_inner][INFO] - {"epoch": 9, "update": 8.992, "loss": "2.134", "ppl": "4.39", "wps": "395204", "ups": "3.36", "wpb": "117580", "bsz": "256", "num_updates": "462800", "lr": "5.42626e-05", "gnorm": "1.116", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "138420"} +[2022-08-01 01:17:51,333][train_inner][INFO] - {"epoch": 9, "update": 8.996, "loss": "2.126", "ppl": "4.37", "wps": "398821", "ups": "3.36", "wpb": "118544", "bsz": "256", "num_updates": "463000", "lr": "5.42424e-05", "gnorm": "1.114", "loss_scale": "4", "train_wall": "59", "gb_free": "26.6", "wall": "138480"} +[2022-08-01 01:18:51,049][train_inner][INFO] - {"epoch": 9, "update": 9.0, "loss": "2.131", "ppl": "4.38", "wps": "395876", "ups": "3.35", "wpb": "118198", "bsz": "256", "num_updates": "463200", "lr": "5.42222e-05", "gnorm": "1.113", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "138539"} +[2022-08-01 01:18:57,361][fairseq_cli.train][INFO] - end of epoch 9 (average epoch stats below) +[2022-08-01 01:18:57,361][train][INFO] - {"epoch": 9, "train_loss": "2.136", "train_ppl": "4.39", "train_wps": "395861", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "463221", "train_lr": "5.42201e-05", "train_gnorm": "1.045", "train_loss_scale": "4", "train_train_wall": "15234", "train_gb_free": "24.4", "train_wall": "138546"} +[2022-08-01 01:18:57,451][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 01:18:57,454][fairseq.trainer][INFO] - begin training epoch 10 +[2022-08-01 01:18:57,455][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 01:20:01,728][train_inner][INFO] - {"epoch": 10, "update": 9.003, "loss": "2.123", "ppl": "4.36", "wps": "333825", "ups": "2.83", "wpb": "117972", "bsz": "255.4", "num_updates": "463400", "lr": "5.4202e-05", "gnorm": "1.115", "loss_scale": "4", "train_wall": "60", "gb_free": "22.4", "wall": "138610"} +[2022-08-01 01:21:02,627][train_inner][INFO] - {"epoch": 10, "update": 9.007, "loss": "2.121", "ppl": "4.35", "wps": "388011", "ups": "3.28", "wpb": "118146", "bsz": "256", "num_updates": "463600", "lr": "5.41818e-05", "gnorm": "1.117", "loss_scale": "4", "train_wall": "61", "gb_free": "22.4", "wall": "138671"} +[2022-08-01 01:22:01,961][train_inner][INFO] - {"epoch": 10, "update": 9.011, "loss": "2.129", "ppl": "4.38", "wps": "397941", "ups": "3.37", "wpb": "118055", "bsz": "256", "num_updates": "463800", "lr": "5.41616e-05", "gnorm": "1.119", "loss_scale": "8", "train_wall": "59", "gb_free": "28.2", "wall": "138730"} +[2022-08-01 01:23:01,359][train_inner][INFO] - {"epoch": 10, "update": 9.015, "loss": "2.128", "ppl": "4.37", "wps": "397240", "ups": "3.37", "wpb": "117976", "bsz": "256", "num_updates": "464000", "lr": "5.41414e-05", "gnorm": "1.125", "loss_scale": "8", "train_wall": "59", "gb_free": "23", "wall": "138790"} +[2022-08-01 01:24:00,901][train_inner][INFO] - {"epoch": 10, "update": 9.019, "loss": "2.123", "ppl": "4.36", "wps": "397628", "ups": "3.36", "wpb": "118377", "bsz": "256", "num_updates": "464200", "lr": "5.41212e-05", "gnorm": "1.122", "loss_scale": "8", "train_wall": "59", "gb_free": "22.3", "wall": "138849"} +[2022-08-01 01:25:00,203][train_inner][INFO] - {"epoch": 10, "update": 9.023, "loss": "2.13", "ppl": "4.38", "wps": "397368", "ups": "3.37", "wpb": "117823", "bsz": "256", "num_updates": "464400", "lr": "5.4101e-05", "gnorm": "1.125", "loss_scale": "8", "train_wall": "59", "gb_free": "25.2", "wall": "138909"} +[2022-08-01 01:25:59,645][train_inner][INFO] - {"epoch": 10, "update": 9.027, "loss": "2.121", "ppl": "4.35", "wps": "396238", "ups": "3.36", "wpb": "117766", "bsz": "256", "num_updates": "464600", "lr": "5.40808e-05", "gnorm": "1.127", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "138968"} +[2022-08-01 01:26:59,236][train_inner][INFO] - {"epoch": 10, "update": 9.031, "loss": "2.127", "ppl": "4.37", "wps": "397800", "ups": "3.36", "wpb": "118525", "bsz": "256", "num_updates": "464800", "lr": "5.40606e-05", "gnorm": "1.124", "loss_scale": "8", "train_wall": "59", "gb_free": "23", "wall": "139028"} +[2022-08-01 01:27:58,474][train_inner][INFO] - {"epoch": 10, "update": 9.035, "loss": "2.122", "ppl": "4.35", "wps": "399874", "ups": "3.38", "wpb": "118437", "bsz": "256", "num_updates": "465000", "lr": "5.40404e-05", "gnorm": "1.126", "loss_scale": "8", "train_wall": "59", "gb_free": "25.5", "wall": "139087"} +[2022-08-01 01:28:57,761][train_inner][INFO] - {"epoch": 10, "update": 9.038, "loss": "2.117", "ppl": "4.34", "wps": "399411", "ups": "3.37", "wpb": "118400", "bsz": "256", "num_updates": "465200", "lr": "5.40202e-05", "gnorm": "1.127", "loss_scale": "8", "train_wall": "59", "gb_free": "26", "wall": "139146"} +[2022-08-01 01:29:56,880][train_inner][INFO] - {"epoch": 10, "update": 9.042, "loss": "2.123", "ppl": "4.36", "wps": "400634", "ups": "3.38", "wpb": "118425", "bsz": "256", "num_updates": "465400", "lr": "5.4e-05", "gnorm": "1.138", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "139205"} +[2022-08-01 01:30:56,562][train_inner][INFO] - {"epoch": 10, "update": 9.046, "loss": "2.127", "ppl": "4.37", "wps": "396327", "ups": "3.35", "wpb": "118266", "bsz": "256", "num_updates": "465600", "lr": "5.39798e-05", "gnorm": "1.135", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "139265"} +[2022-08-01 01:31:55,926][train_inner][INFO] - {"epoch": 10, "update": 9.05, "loss": "2.12", "ppl": "4.35", "wps": "398754", "ups": "3.37", "wpb": "118357", "bsz": "256", "num_updates": "465800", "lr": "5.39596e-05", "gnorm": "1.138", "loss_scale": "16", "train_wall": "59", "gb_free": "21.4", "wall": "139324"} +[2022-08-01 01:32:55,065][train_inner][INFO] - {"epoch": 10, "update": 9.054, "loss": "2.119", "ppl": "4.34", "wps": "401298", "ups": "3.38", "wpb": "118661", "bsz": "256", "num_updates": "466000", "lr": "5.39394e-05", "gnorm": "1.133", "loss_scale": "16", "train_wall": "59", "gb_free": "25.9", "wall": "139383"} +[2022-08-01 01:33:54,697][train_inner][INFO] - {"epoch": 10, "update": 9.058, "loss": "2.123", "ppl": "4.36", "wps": "397394", "ups": "3.35", "wpb": "118486", "bsz": "256", "num_updates": "466200", "lr": "5.39192e-05", "gnorm": "1.14", "loss_scale": "16", "train_wall": "59", "gb_free": "24.2", "wall": "139443"} +[2022-08-01 01:34:54,507][train_inner][INFO] - {"epoch": 10, "update": 9.062, "loss": "2.127", "ppl": "4.37", "wps": "396556", "ups": "3.34", "wpb": "118590", "bsz": "256", "num_updates": "466400", "lr": "5.3899e-05", "gnorm": "1.145", "loss_scale": "16", "train_wall": "59", "gb_free": "21.5", "wall": "139503"} +[2022-08-01 01:35:54,201][train_inner][INFO] - {"epoch": 10, "update": 9.066, "loss": "2.118", "ppl": "4.34", "wps": "397327", "ups": "3.35", "wpb": "118590", "bsz": "256", "num_updates": "466600", "lr": "5.38788e-05", "gnorm": "1.139", "loss_scale": "16", "train_wall": "59", "gb_free": "23.1", "wall": "139563"} +[2022-08-01 01:36:53,521][train_inner][INFO] - {"epoch": 10, "update": 9.07, "loss": "2.126", "ppl": "4.36", "wps": "399053", "ups": "3.37", "wpb": "118359", "bsz": "256", "num_updates": "466800", "lr": "5.38586e-05", "gnorm": "1.144", "loss_scale": "16", "train_wall": "59", "gb_free": "24.5", "wall": "139622"} +[2022-08-01 01:37:52,778][train_inner][INFO] - {"epoch": 10, "update": 9.073, "loss": "2.124", "ppl": "4.36", "wps": "399074", "ups": "3.38", "wpb": "118239", "bsz": "256", "num_updates": "467000", "lr": "5.38384e-05", "gnorm": "1.147", "loss_scale": "16", "train_wall": "59", "gb_free": "23.7", "wall": "139681"} +[2022-08-01 01:38:53,049][train_inner][INFO] - {"epoch": 10, "update": 9.077, "loss": "2.121", "ppl": "4.35", "wps": "391911", "ups": "3.32", "wpb": "118102", "bsz": "256", "num_updates": "467200", "lr": "5.38182e-05", "gnorm": "1.147", "loss_scale": "16", "train_wall": "60", "gb_free": "24.9", "wall": "139741"} +[2022-08-01 01:39:53,551][train_inner][INFO] - {"epoch": 10, "update": 9.081, "loss": "2.122", "ppl": "4.35", "wps": "391454", "ups": "3.31", "wpb": "118419", "bsz": "256", "num_updates": "467400", "lr": "5.3798e-05", "gnorm": "1.15", "loss_scale": "16", "train_wall": "60", "gb_free": "22", "wall": "139802"} +[2022-08-01 01:40:53,086][train_inner][INFO] - {"epoch": 10, "update": 9.085, "loss": "2.124", "ppl": "4.36", "wps": "398055", "ups": "3.36", "wpb": "118490", "bsz": "256", "num_updates": "467600", "lr": "5.37778e-05", "gnorm": "1.15", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "139862"} +[2022-08-01 01:41:36,402][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 01:41:53,056][train_inner][INFO] - {"epoch": 10, "update": 9.089, "loss": "2.121", "ppl": "4.35", "wps": "393548", "ups": "3.34", "wpb": "118005", "bsz": "256", "num_updates": "467800", "lr": "5.37576e-05", "gnorm": "1.173", "loss_scale": "8", "train_wall": "60", "gb_free": "21.3", "wall": "139921"} +[2022-08-01 01:42:34,027][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 01:42:52,969][train_inner][INFO] - {"epoch": 10, "update": 9.093, "loss": "2.124", "ppl": "4.36", "wps": "394695", "ups": "3.34", "wpb": "118236", "bsz": "256", "num_updates": "468000", "lr": "5.37374e-05", "gnorm": "1.158", "loss_scale": "4", "train_wall": "60", "gb_free": "27.7", "wall": "139981"} +[2022-08-01 01:43:52,533][train_inner][INFO] - {"epoch": 10, "update": 9.097, "loss": "2.131", "ppl": "4.38", "wps": "394265", "ups": "3.36", "wpb": "117419", "bsz": "256", "num_updates": "468200", "lr": "5.37172e-05", "gnorm": "1.169", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "140041"} +[2022-08-01 01:44:51,670][train_inner][INFO] - {"epoch": 10, "update": 9.101, "loss": "2.126", "ppl": "4.37", "wps": "399535", "ups": "3.38", "wpb": "118136", "bsz": "256", "num_updates": "468400", "lr": "5.3697e-05", "gnorm": "1.159", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "140100"} +[2022-08-01 01:45:50,904][train_inner][INFO] - {"epoch": 10, "update": 9.105, "loss": "2.127", "ppl": "4.37", "wps": "400445", "ups": "3.38", "wpb": "118597", "bsz": "256", "num_updates": "468600", "lr": "5.36768e-05", "gnorm": "1.175", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "140159"} +[2022-08-01 01:46:50,523][train_inner][INFO] - {"epoch": 10, "update": 9.108, "loss": "2.116", "ppl": "4.33", "wps": "399204", "ups": "3.35", "wpb": "119000", "bsz": "256", "num_updates": "468800", "lr": "5.36566e-05", "gnorm": "1.159", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "140219"} +[2022-08-01 01:47:50,297][train_inner][INFO] - {"epoch": 10, "update": 9.112, "loss": "2.121", "ppl": "4.35", "wps": "396626", "ups": "3.35", "wpb": "118539", "bsz": "256", "num_updates": "469000", "lr": "5.36364e-05", "gnorm": "1.174", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "140279"} +[2022-08-01 01:48:49,769][train_inner][INFO] - {"epoch": 10, "update": 9.116, "loss": "2.124", "ppl": "4.36", "wps": "399647", "ups": "3.36", "wpb": "118838", "bsz": "256", "num_updates": "469200", "lr": "5.36162e-05", "gnorm": "1.166", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "140338"} +[2022-08-01 01:49:49,277][train_inner][INFO] - {"epoch": 10, "update": 9.12, "loss": "2.12", "ppl": "4.35", "wps": "398281", "ups": "3.36", "wpb": "118504", "bsz": "256", "num_updates": "469400", "lr": "5.3596e-05", "gnorm": "1.173", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "140398"} +[2022-08-01 01:50:48,726][train_inner][INFO] - {"epoch": 10, "update": 9.124, "loss": "2.125", "ppl": "4.36", "wps": "398972", "ups": "3.36", "wpb": "118592", "bsz": "256", "num_updates": "469600", "lr": "5.35758e-05", "gnorm": "1.177", "loss_scale": "4", "train_wall": "59", "gb_free": "28.2", "wall": "140457"} +[2022-08-01 01:51:48,739][train_inner][INFO] - {"epoch": 10, "update": 9.128, "loss": "2.117", "ppl": "4.34", "wps": "398224", "ups": "3.33", "wpb": "119492", "bsz": "256", "num_updates": "469800", "lr": "5.35556e-05", "gnorm": "1.17", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "140517"} +[2022-08-01 01:52:48,180][train_inner][INFO] - {"epoch": 10, "update": 9.132, "loss": "2.128", "ppl": "4.37", "wps": "397954", "ups": "3.36", "wpb": "118272", "bsz": "256", "num_updates": "470000", "lr": "5.35354e-05", "gnorm": "1.178", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "140577"} +[2022-08-01 01:53:47,748][train_inner][INFO] - {"epoch": 10, "update": 9.136, "loss": "2.12", "ppl": "4.35", "wps": "396657", "ups": "3.36", "wpb": "118140", "bsz": "256", "num_updates": "470200", "lr": "5.35152e-05", "gnorm": "1.186", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "140636"} +[2022-08-01 01:54:47,428][train_inner][INFO] - {"epoch": 10, "update": 9.139, "loss": "2.121", "ppl": "4.35", "wps": "396667", "ups": "3.35", "wpb": "118365", "bsz": "256", "num_updates": "470400", "lr": "5.34949e-05", "gnorm": "1.182", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "140696"} +[2022-08-01 01:55:46,842][train_inner][INFO] - {"epoch": 10, "update": 9.143, "loss": "2.133", "ppl": "4.39", "wps": "395672", "ups": "3.37", "wpb": "117541", "bsz": "256", "num_updates": "470600", "lr": "5.34747e-05", "gnorm": "1.189", "loss_scale": "8", "train_wall": "59", "gb_free": "25.3", "wall": "140755"} +[2022-08-01 01:56:46,275][train_inner][INFO] - {"epoch": 10, "update": 9.147, "loss": "2.128", "ppl": "4.37", "wps": "396926", "ups": "3.37", "wpb": "117951", "bsz": "256", "num_updates": "470800", "lr": "5.34545e-05", "gnorm": "1.196", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "140815"} +[2022-08-01 01:57:46,175][train_inner][INFO] - {"epoch": 10, "update": 9.151, "loss": "2.121", "ppl": "4.35", "wps": "395765", "ups": "3.34", "wpb": "118532", "bsz": "256", "num_updates": "471000", "lr": "5.34343e-05", "gnorm": "1.192", "loss_scale": "8", "train_wall": "60", "gb_free": "23.1", "wall": "140875"} +[2022-08-01 01:58:45,960][train_inner][INFO] - {"epoch": 10, "update": 9.155, "loss": "2.123", "ppl": "4.36", "wps": "396165", "ups": "3.35", "wpb": "118422", "bsz": "256", "num_updates": "471200", "lr": "5.34141e-05", "gnorm": "1.192", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "140934"} +[2022-08-01 01:59:45,615][train_inner][INFO] - {"epoch": 10, "update": 9.159, "loss": "2.125", "ppl": "4.36", "wps": "396191", "ups": "3.35", "wpb": "118174", "bsz": "256", "num_updates": "471400", "lr": "5.33939e-05", "gnorm": "1.209", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "140994"} +[2022-08-01 02:00:45,280][train_inner][INFO] - {"epoch": 10, "update": 9.163, "loss": "2.122", "ppl": "4.35", "wps": "397648", "ups": "3.35", "wpb": "118626", "bsz": "256", "num_updates": "471600", "lr": "5.33737e-05", "gnorm": "1.201", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "141054"} +[2022-08-01 02:01:44,558][train_inner][INFO] - {"epoch": 10, "update": 9.167, "loss": "2.128", "ppl": "4.37", "wps": "399795", "ups": "3.37", "wpb": "118494", "bsz": "256", "num_updates": "471800", "lr": "5.33535e-05", "gnorm": "1.202", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "141113"} +[2022-08-01 02:02:43,882][train_inner][INFO] - {"epoch": 10, "update": 9.171, "loss": "2.128", "ppl": "4.37", "wps": "397744", "ups": "3.37", "wpb": "117978", "bsz": "256", "num_updates": "472000", "lr": "5.33333e-05", "gnorm": "1.21", "loss_scale": "8", "train_wall": "59", "gb_free": "25.1", "wall": "141172"} +[2022-08-01 02:03:43,323][train_inner][INFO] - {"epoch": 10, "update": 9.174, "loss": "2.124", "ppl": "4.36", "wps": "398528", "ups": "3.36", "wpb": "118444", "bsz": "256", "num_updates": "472200", "lr": "5.33131e-05", "gnorm": "1.207", "loss_scale": "16", "train_wall": "59", "gb_free": "21.8", "wall": "141232"} +[2022-08-01 02:04:42,764][train_inner][INFO] - {"epoch": 10, "update": 9.178, "loss": "2.117", "ppl": "4.34", "wps": "398886", "ups": "3.36", "wpb": "118550", "bsz": "256", "num_updates": "472400", "lr": "5.32929e-05", "gnorm": "1.211", "loss_scale": "16", "train_wall": "59", "gb_free": "21.7", "wall": "141291"} +[2022-08-01 02:05:42,434][train_inner][INFO] - {"epoch": 10, "update": 9.182, "loss": "2.118", "ppl": "4.34", "wps": "398340", "ups": "3.35", "wpb": "118845", "bsz": "256", "num_updates": "472600", "lr": "5.32727e-05", "gnorm": "1.203", "loss_scale": "16", "train_wall": "59", "gb_free": "23.5", "wall": "141351"} +[2022-08-01 02:06:41,893][train_inner][INFO] - {"epoch": 10, "update": 9.186, "loss": "2.125", "ppl": "4.36", "wps": "396128", "ups": "3.36", "wpb": "117765", "bsz": "256", "num_updates": "472800", "lr": "5.32525e-05", "gnorm": "1.219", "loss_scale": "16", "train_wall": "59", "gb_free": "25.3", "wall": "141410"} +[2022-08-01 02:07:41,295][train_inner][INFO] - {"epoch": 10, "update": 9.19, "loss": "2.122", "ppl": "4.35", "wps": "398946", "ups": "3.37", "wpb": "118490", "bsz": "256", "num_updates": "473000", "lr": "5.32323e-05", "gnorm": "1.223", "loss_scale": "16", "train_wall": "59", "gb_free": "23.8", "wall": "141470"} +[2022-08-01 02:08:41,076][train_inner][INFO] - {"epoch": 10, "update": 9.194, "loss": "2.119", "ppl": "4.35", "wps": "395498", "ups": "3.35", "wpb": "118216", "bsz": "256", "num_updates": "473200", "lr": "5.32121e-05", "gnorm": "1.22", "loss_scale": "16", "train_wall": "59", "gb_free": "22.3", "wall": "141530"} +[2022-08-01 02:08:42,898][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 02:09:40,883][train_inner][INFO] - {"epoch": 10, "update": 9.198, "loss": "2.128", "ppl": "4.37", "wps": "395982", "ups": "3.34", "wpb": "118410", "bsz": "256", "num_updates": "473400", "lr": "5.31919e-05", "gnorm": "1.229", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "141589"} +[2022-08-01 02:10:40,699][train_inner][INFO] - {"epoch": 10, "update": 9.202, "loss": "2.118", "ppl": "4.34", "wps": "396349", "ups": "3.34", "wpb": "118541", "bsz": "256", "num_updates": "473600", "lr": "5.31717e-05", "gnorm": "1.215", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "141649"} +[2022-08-01 02:11:40,234][train_inner][INFO] - {"epoch": 10, "update": 9.206, "loss": "2.124", "ppl": "4.36", "wps": "397837", "ups": "3.36", "wpb": "118425", "bsz": "256", "num_updates": "473800", "lr": "5.31515e-05", "gnorm": "1.231", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "141709"} +[2022-08-01 02:12:39,200][train_inner][INFO] - {"epoch": 10, "update": 9.209, "loss": "2.118", "ppl": "4.34", "wps": "399656", "ups": "3.39", "wpb": "117830", "bsz": "256", "num_updates": "474000", "lr": "5.31313e-05", "gnorm": "1.247", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "141768"} +[2022-08-01 02:13:39,176][train_inner][INFO] - {"epoch": 10, "update": 9.213, "loss": "2.122", "ppl": "4.35", "wps": "393774", "ups": "3.33", "wpb": "118083", "bsz": "256", "num_updates": "474200", "lr": "5.31111e-05", "gnorm": "1.256", "loss_scale": "8", "train_wall": "60", "gb_free": "21.5", "wall": "141828"} +[2022-08-01 02:14:38,718][train_inner][INFO] - {"epoch": 10, "update": 9.217, "loss": "2.12", "ppl": "4.35", "wps": "397246", "ups": "3.36", "wpb": "118263", "bsz": "256", "num_updates": "474400", "lr": "5.30909e-05", "gnorm": "1.245", "loss_scale": "8", "train_wall": "59", "gb_free": "27", "wall": "141887"} +[2022-08-01 02:15:38,368][train_inner][INFO] - {"epoch": 10, "update": 9.221, "loss": "2.123", "ppl": "4.36", "wps": "395640", "ups": "3.35", "wpb": "117999", "bsz": "256", "num_updates": "474600", "lr": "5.30707e-05", "gnorm": "1.242", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "141947"} +[2022-08-01 02:16:38,031][train_inner][INFO] - {"epoch": 10, "update": 9.225, "loss": "2.124", "ppl": "4.36", "wps": "396371", "ups": "3.35", "wpb": "118243", "bsz": "256", "num_updates": "474800", "lr": "5.30505e-05", "gnorm": "1.244", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "142006"} +[2022-08-01 02:17:37,897][train_inner][INFO] - {"epoch": 10, "update": 9.229, "loss": "2.119", "ppl": "4.34", "wps": "397032", "ups": "3.34", "wpb": "118844", "bsz": "256", "num_updates": "475000", "lr": "5.30303e-05", "gnorm": "1.254", "loss_scale": "8", "train_wall": "60", "gb_free": "22.3", "wall": "142066"} +[2022-08-01 02:18:37,402][train_inner][INFO] - {"epoch": 10, "update": 9.233, "loss": "2.12", "ppl": "4.35", "wps": "399553", "ups": "3.36", "wpb": "118876", "bsz": "256", "num_updates": "475200", "lr": "5.30101e-05", "gnorm": "1.262", "loss_scale": "8", "train_wall": "59", "gb_free": "22.9", "wall": "142126"} +[2022-08-01 02:19:36,994][train_inner][INFO] - {"epoch": 10, "update": 9.237, "loss": "2.121", "ppl": "4.35", "wps": "395847", "ups": "3.36", "wpb": "117946", "bsz": "256", "num_updates": "475400", "lr": "5.29899e-05", "gnorm": "1.265", "loss_scale": "16", "train_wall": "59", "gb_free": "21.6", "wall": "142185"} +[2022-08-01 02:19:56,006][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 02:20:36,832][train_inner][INFO] - {"epoch": 10, "update": 9.241, "loss": "2.118", "ppl": "4.34", "wps": "396436", "ups": "3.34", "wpb": "118610", "bsz": "256", "num_updates": "475600", "lr": "5.29697e-05", "gnorm": "1.266", "loss_scale": "8", "train_wall": "59", "gb_free": "22", "wall": "142245"} +[2022-08-01 02:21:36,392][train_inner][INFO] - {"epoch": 10, "update": 9.244, "loss": "2.119", "ppl": "4.34", "wps": "397218", "ups": "3.36", "wpb": "118290", "bsz": "256", "num_updates": "475800", "lr": "5.29495e-05", "gnorm": "1.271", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "142305"} +[2022-08-01 02:22:35,598][train_inner][INFO] - {"epoch": 10, "update": 9.248, "loss": "2.118", "ppl": "4.34", "wps": "400740", "ups": "3.38", "wpb": "118630", "bsz": "256", "num_updates": "476000", "lr": "5.29293e-05", "gnorm": "1.267", "loss_scale": "8", "train_wall": "59", "gb_free": "27.4", "wall": "142364"} +[2022-08-01 02:23:35,104][train_inner][INFO] - {"epoch": 10, "update": 9.252, "loss": "2.121", "ppl": "4.35", "wps": "398636", "ups": "3.36", "wpb": "118606", "bsz": "256", "num_updates": "476200", "lr": "5.29091e-05", "gnorm": "1.261", "loss_scale": "8", "train_wall": "59", "gb_free": "23.1", "wall": "142424"} +[2022-08-01 02:24:34,427][train_inner][INFO] - {"epoch": 10, "update": 9.256, "loss": "2.114", "ppl": "4.33", "wps": "398021", "ups": "3.37", "wpb": "118057", "bsz": "256", "num_updates": "476400", "lr": "5.28889e-05", "gnorm": "1.28", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "142483"} +[2022-08-01 02:25:33,898][train_inner][INFO] - {"epoch": 10, "update": 9.26, "loss": "2.126", "ppl": "4.36", "wps": "397472", "ups": "3.36", "wpb": "118189", "bsz": "256", "num_updates": "476600", "lr": "5.28687e-05", "gnorm": "1.302", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "142542"} +[2022-08-01 02:26:33,331][train_inner][INFO] - {"epoch": 10, "update": 9.264, "loss": "2.12", "ppl": "4.35", "wps": "398753", "ups": "3.37", "wpb": "118495", "bsz": "256", "num_updates": "476800", "lr": "5.28485e-05", "gnorm": "1.278", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "142602"} +[2022-08-01 02:27:33,018][train_inner][INFO] - {"epoch": 10, "update": 9.268, "loss": "2.118", "ppl": "4.34", "wps": "396084", "ups": "3.35", "wpb": "118205", "bsz": "256", "num_updates": "477000", "lr": "5.28283e-05", "gnorm": "1.306", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "142661"} +[2022-08-01 02:28:32,745][train_inner][INFO] - {"epoch": 10, "update": 9.272, "loss": "2.124", "ppl": "4.36", "wps": "396353", "ups": "3.35", "wpb": "118364", "bsz": "255.9", "num_updates": "477200", "lr": "5.28081e-05", "gnorm": "1.304", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "142721"} +[2022-08-01 02:29:32,500][train_inner][INFO] - {"epoch": 10, "update": 9.275, "loss": "2.118", "ppl": "4.34", "wps": "396587", "ups": "3.35", "wpb": "118488", "bsz": "256", "num_updates": "477400", "lr": "5.27879e-05", "gnorm": "1.286", "loss_scale": "8", "train_wall": "59", "gb_free": "24.4", "wall": "142781"} +[2022-08-01 02:30:11,315][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 02:30:33,605][train_inner][INFO] - {"epoch": 10, "update": 9.279, "loss": "2.125", "ppl": "4.36", "wps": "386995", "ups": "3.27", "wpb": "118236", "bsz": "256", "num_updates": "477600", "lr": "5.27677e-05", "gnorm": "1.287", "loss_scale": "8", "train_wall": "61", "gb_free": "22", "wall": "142842"} +[2022-08-01 02:31:33,172][train_inner][INFO] - {"epoch": 10, "update": 9.283, "loss": "2.118", "ppl": "4.34", "wps": "395902", "ups": "3.36", "wpb": "117912", "bsz": "256", "num_updates": "477800", "lr": "5.27475e-05", "gnorm": "1.314", "loss_scale": "8", "train_wall": "59", "gb_free": "27.9", "wall": "142902"} +[2022-08-01 02:32:32,756][train_inner][INFO] - {"epoch": 10, "update": 9.287, "loss": "2.123", "ppl": "4.36", "wps": "395278", "ups": "3.36", "wpb": "117762", "bsz": "256", "num_updates": "478000", "lr": "5.27273e-05", "gnorm": "1.347", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "142961"} +[2022-08-01 02:33:32,156][train_inner][INFO] - {"epoch": 10, "update": 9.291, "loss": "2.122", "ppl": "4.35", "wps": "397598", "ups": "3.37", "wpb": "118084", "bsz": "256", "num_updates": "478200", "lr": "5.27071e-05", "gnorm": "1.328", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "143021"} +[2022-08-01 02:34:31,689][train_inner][INFO] - {"epoch": 10, "update": 9.295, "loss": "2.123", "ppl": "4.36", "wps": "397701", "ups": "3.36", "wpb": "118381", "bsz": "256", "num_updates": "478400", "lr": "5.26869e-05", "gnorm": "1.313", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "143080"} +[2022-08-01 02:35:31,526][train_inner][INFO] - {"epoch": 10, "update": 9.299, "loss": "2.122", "ppl": "4.35", "wps": "396026", "ups": "3.34", "wpb": "118485", "bsz": "256", "num_updates": "478600", "lr": "5.26667e-05", "gnorm": "1.31", "loss_scale": "8", "train_wall": "59", "gb_free": "22.3", "wall": "143140"} +[2022-08-01 02:36:30,695][train_inner][INFO] - {"epoch": 10, "update": 9.303, "loss": "2.124", "ppl": "4.36", "wps": "398448", "ups": "3.38", "wpb": "117879", "bsz": "256", "num_updates": "478800", "lr": "5.26465e-05", "gnorm": "1.352", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "143199"} +[2022-08-01 02:37:30,351][train_inner][INFO] - {"epoch": 10, "update": 9.307, "loss": "2.122", "ppl": "4.35", "wps": "395488", "ups": "3.35", "wpb": "117963", "bsz": "256", "num_updates": "479000", "lr": "5.26263e-05", "gnorm": "1.336", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "143259"} +[2022-08-01 02:38:29,859][train_inner][INFO] - {"epoch": 10, "update": 9.31, "loss": "2.121", "ppl": "4.35", "wps": "398169", "ups": "3.36", "wpb": "118471", "bsz": "256", "num_updates": "479200", "lr": "5.26061e-05", "gnorm": "1.36", "loss_scale": "8", "train_wall": "59", "gb_free": "24.7", "wall": "143318"} +[2022-08-01 02:39:29,529][train_inner][INFO] - {"epoch": 10, "update": 9.314, "loss": "2.122", "ppl": "4.35", "wps": "395434", "ups": "3.35", "wpb": "117978", "bsz": "256", "num_updates": "479400", "lr": "5.25859e-05", "gnorm": "1.361", "loss_scale": "8", "train_wall": "59", "gb_free": "23.2", "wall": "143378"} +[2022-08-01 02:40:25,136][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 02:40:29,363][train_inner][INFO] - {"epoch": 10, "update": 9.318, "loss": "2.119", "ppl": "4.35", "wps": "396346", "ups": "3.34", "wpb": "118573", "bsz": "256", "num_updates": "479600", "lr": "5.25657e-05", "gnorm": "1.374", "loss_scale": "8", "train_wall": "59", "gb_free": "23.5", "wall": "143438"} +[2022-08-01 02:41:29,077][train_inner][INFO] - {"epoch": 10, "update": 9.322, "loss": "2.116", "ppl": "4.33", "wps": "395587", "ups": "3.35", "wpb": "118110", "bsz": "256", "num_updates": "479800", "lr": "5.25455e-05", "gnorm": "1.36", "loss_scale": "8", "train_wall": "59", "gb_free": "25.2", "wall": "143498"} +[2022-08-01 02:42:28,657][train_inner][INFO] - {"epoch": 10, "update": 9.326, "loss": "2.128", "ppl": "4.37", "wps": "396308", "ups": "3.36", "wpb": "118059", "bsz": "256", "num_updates": "480000", "lr": "5.25253e-05", "gnorm": "1.344", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "143557"} +[2022-08-01 02:43:28,134][train_inner][INFO] - {"epoch": 10, "update": 9.33, "loss": "2.119", "ppl": "4.34", "wps": "397017", "ups": "3.36", "wpb": "118066", "bsz": "256", "num_updates": "480200", "lr": "5.25051e-05", "gnorm": "1.347", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "143617"} +[2022-08-01 02:44:28,636][train_inner][INFO] - {"epoch": 10, "update": 9.334, "loss": "2.117", "ppl": "4.34", "wps": "390403", "ups": "3.31", "wpb": "118100", "bsz": "256", "num_updates": "480400", "lr": "5.24848e-05", "gnorm": "1.37", "loss_scale": "8", "train_wall": "60", "gb_free": "21.4", "wall": "143677"} +[2022-08-01 02:45:28,517][train_inner][INFO] - {"epoch": 10, "update": 9.338, "loss": "2.12", "ppl": "4.35", "wps": "399362", "ups": "3.34", "wpb": "119570", "bsz": "256", "num_updates": "480600", "lr": "5.24646e-05", "gnorm": "1.388", "loss_scale": "8", "train_wall": "60", "gb_free": "23.4", "wall": "143737"} +[2022-08-01 02:46:28,326][train_inner][INFO] - {"epoch": 10, "update": 9.342, "loss": "2.123", "ppl": "4.36", "wps": "396676", "ups": "3.34", "wpb": "118623", "bsz": "256", "num_updates": "480800", "lr": "5.24444e-05", "gnorm": "1.375", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "143797"} +[2022-08-01 02:47:27,858][train_inner][INFO] - {"epoch": 10, "update": 9.345, "loss": "2.12", "ppl": "4.35", "wps": "397485", "ups": "3.36", "wpb": "118316", "bsz": "256", "num_updates": "481000", "lr": "5.24242e-05", "gnorm": "1.39", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "143856"} +[2022-08-01 02:48:27,436][train_inner][INFO] - {"epoch": 10, "update": 9.349, "loss": "2.119", "ppl": "4.35", "wps": "396472", "ups": "3.36", "wpb": "118104", "bsz": "256", "num_updates": "481200", "lr": "5.2404e-05", "gnorm": "1.422", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "143916"} +[2022-08-01 02:49:27,392][train_inner][INFO] - {"epoch": 10, "update": 9.353, "loss": "2.12", "ppl": "4.35", "wps": "396463", "ups": "3.34", "wpb": "118850", "bsz": "256", "num_updates": "481400", "lr": "5.23838e-05", "gnorm": "1.398", "loss_scale": "8", "train_wall": "60", "gb_free": "23.4", "wall": "143976"} +[2022-08-01 02:50:26,664][train_inner][INFO] - {"epoch": 10, "update": 9.357, "loss": "2.126", "ppl": "4.36", "wps": "397473", "ups": "3.37", "wpb": "117794", "bsz": "256", "num_updates": "481600", "lr": "5.23636e-05", "gnorm": "1.414", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "144035"} +[2022-08-01 02:50:37,527][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 02:51:26,616][train_inner][INFO] - {"epoch": 10, "update": 9.361, "loss": "2.118", "ppl": "4.34", "wps": "395228", "ups": "3.34", "wpb": "118474", "bsz": "256", "num_updates": "481800", "lr": "5.23434e-05", "gnorm": "1.385", "loss_scale": "8", "train_wall": "60", "gb_free": "21.6", "wall": "144095"} +[2022-08-01 02:52:26,214][train_inner][INFO] - {"epoch": 10, "update": 9.365, "loss": "2.12", "ppl": "4.35", "wps": "397962", "ups": "3.36", "wpb": "118587", "bsz": "256", "num_updates": "482000", "lr": "5.23232e-05", "gnorm": "1.46", "loss_scale": "8", "train_wall": "59", "gb_free": "23.1", "wall": "144155"} +[2022-08-01 02:53:25,541][train_inner][INFO] - {"epoch": 10, "update": 9.369, "loss": "2.122", "ppl": "4.35", "wps": "399354", "ups": "3.37", "wpb": "118462", "bsz": "256", "num_updates": "482200", "lr": "5.2303e-05", "gnorm": "1.406", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "144214"} +[2022-08-01 02:54:25,121][train_inner][INFO] - {"epoch": 10, "update": 9.373, "loss": "2.125", "ppl": "4.36", "wps": "397813", "ups": "3.36", "wpb": "118507", "bsz": "256", "num_updates": "482400", "lr": "5.22828e-05", "gnorm": "1.455", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "144274"} +[2022-08-01 02:55:24,391][train_inner][INFO] - {"epoch": 10, "update": 9.377, "loss": "2.121", "ppl": "4.35", "wps": "395670", "ups": "3.37", "wpb": "117257", "bsz": "256", "num_updates": "482600", "lr": "5.22626e-05", "gnorm": "1.419", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "144333"} +[2022-08-01 02:56:23,922][train_inner][INFO] - {"epoch": 10, "update": 9.38, "loss": "2.118", "ppl": "4.34", "wps": "396052", "ups": "3.36", "wpb": "117885", "bsz": "256", "num_updates": "482800", "lr": "5.22424e-05", "gnorm": "1.46", "loss_scale": "8", "train_wall": "59", "gb_free": "25.6", "wall": "144392"} +[2022-08-01 02:57:23,462][train_inner][INFO] - {"epoch": 10, "update": 9.384, "loss": "2.121", "ppl": "4.35", "wps": "395628", "ups": "3.36", "wpb": "117778", "bsz": "256", "num_updates": "483000", "lr": "5.22222e-05", "gnorm": "1.437", "loss_scale": "8", "train_wall": "59", "gb_free": "23.6", "wall": "144452"} +[2022-08-01 02:58:22,716][train_inner][INFO] - {"epoch": 10, "update": 9.388, "loss": "2.12", "ppl": "4.35", "wps": "398668", "ups": "3.38", "wpb": "118114", "bsz": "256", "num_updates": "483200", "lr": "5.2202e-05", "gnorm": "1.446", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "144511"} +[2022-08-01 02:59:22,571][train_inner][INFO] - {"epoch": 10, "update": 9.392, "loss": "2.12", "ppl": "4.35", "wps": "395024", "ups": "3.34", "wpb": "118220", "bsz": "256", "num_updates": "483400", "lr": "5.21818e-05", "gnorm": "1.46", "loss_scale": "8", "train_wall": "60", "gb_free": "24.5", "wall": "144571"} +[2022-08-01 03:00:21,820][train_inner][INFO] - {"epoch": 10, "update": 9.396, "loss": "2.124", "ppl": "4.36", "wps": "399040", "ups": "3.38", "wpb": "118212", "bsz": "256", "num_updates": "483600", "lr": "5.21616e-05", "gnorm": "1.443", "loss_scale": "8", "train_wall": "59", "gb_free": "25.6", "wall": "144630"} +[2022-08-01 03:00:47,068][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 03:01:21,956][train_inner][INFO] - {"epoch": 10, "update": 9.4, "loss": "2.115", "ppl": "4.33", "wps": "393322", "ups": "3.33", "wpb": "118264", "bsz": "256", "num_updates": "483800", "lr": "5.21414e-05", "gnorm": "1.468", "loss_scale": "8", "train_wall": "60", "gb_free": "22.9", "wall": "144690"} +[2022-08-01 03:02:18,143][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 03:02:21,992][train_inner][INFO] - {"epoch": 10, "update": 9.404, "loss": "2.121", "ppl": "4.35", "wps": "394502", "ups": "3.33", "wpb": "118419", "bsz": "256", "num_updates": "484000", "lr": "5.21212e-05", "gnorm": "1.442", "loss_scale": "4", "train_wall": "60", "gb_free": "23.6", "wall": "144750"} +[2022-08-01 03:03:21,867][train_inner][INFO] - {"epoch": 10, "update": 9.408, "loss": "2.12", "ppl": "4.35", "wps": "395046", "ups": "3.34", "wpb": "118267", "bsz": "256", "num_updates": "484200", "lr": "5.2101e-05", "gnorm": "1.489", "loss_scale": "4", "train_wall": "60", "gb_free": "22.7", "wall": "144810"} +[2022-08-01 03:04:21,246][train_inner][INFO] - {"epoch": 10, "update": 9.412, "loss": "2.13", "ppl": "4.38", "wps": "397032", "ups": "3.37", "wpb": "117876", "bsz": "256", "num_updates": "484400", "lr": "5.20808e-05", "gnorm": "1.518", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "144870"} +[2022-08-01 03:05:20,897][train_inner][INFO] - {"epoch": 10, "update": 9.415, "loss": "2.116", "ppl": "4.34", "wps": "396560", "ups": "3.35", "wpb": "118276", "bsz": "256", "num_updates": "484600", "lr": "5.20606e-05", "gnorm": "1.484", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "144929"} +[2022-08-01 03:06:20,496][train_inner][INFO] - {"epoch": 10, "update": 9.419, "loss": "2.123", "ppl": "4.36", "wps": "396391", "ups": "3.36", "wpb": "118122", "bsz": "256", "num_updates": "484800", "lr": "5.20404e-05", "gnorm": "1.522", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "144989"} +[2022-08-01 03:07:20,168][train_inner][INFO] - {"epoch": 10, "update": 9.423, "loss": "2.114", "ppl": "4.33", "wps": "397936", "ups": "3.35", "wpb": "118727", "bsz": "256", "num_updates": "485000", "lr": "5.20202e-05", "gnorm": "1.501", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "145049"} +[2022-08-01 03:08:20,055][train_inner][INFO] - {"epoch": 10, "update": 9.427, "loss": "2.12", "ppl": "4.35", "wps": "395923", "ups": "3.34", "wpb": "118552", "bsz": "256", "num_updates": "485200", "lr": "5.2e-05", "gnorm": "1.494", "loss_scale": "4", "train_wall": "60", "gb_free": "25.6", "wall": "145108"} +[2022-08-01 03:09:19,393][train_inner][INFO] - {"epoch": 10, "update": 9.431, "loss": "2.114", "ppl": "4.33", "wps": "398131", "ups": "3.37", "wpb": "118120", "bsz": "256", "num_updates": "485400", "lr": "5.19798e-05", "gnorm": "1.499", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "145168"} +[2022-08-01 03:10:18,789][train_inner][INFO] - {"epoch": 10, "update": 9.435, "loss": "2.122", "ppl": "4.35", "wps": "399938", "ups": "3.37", "wpb": "118773", "bsz": "256", "num_updates": "485600", "lr": "5.19596e-05", "gnorm": "1.53", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "145227"} +[2022-08-01 03:11:18,142][train_inner][INFO] - {"epoch": 10, "update": 9.439, "loss": "2.114", "ppl": "4.33", "wps": "397847", "ups": "3.37", "wpb": "118066", "bsz": "256", "num_updates": "485800", "lr": "5.19394e-05", "gnorm": "1.531", "loss_scale": "4", "train_wall": "59", "gb_free": "23.3", "wall": "145287"} +[2022-08-01 03:12:17,633][train_inner][INFO] - {"epoch": 10, "update": 9.443, "loss": "2.117", "ppl": "4.34", "wps": "397586", "ups": "3.36", "wpb": "118263", "bsz": "256", "num_updates": "486000", "lr": "5.19192e-05", "gnorm": "1.529", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "145346"} +[2022-08-01 03:12:40,033][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 03:13:17,082][train_inner][INFO] - {"epoch": 10, "update": 9.446, "loss": "2.115", "ppl": "4.33", "wps": "399343", "ups": "3.36", "wpb": "118703", "bsz": "256", "num_updates": "486200", "lr": "5.1899e-05", "gnorm": "1.558", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "145406"} +[2022-08-01 03:14:16,745][train_inner][INFO] - {"epoch": 10, "update": 9.45, "loss": "2.118", "ppl": "4.34", "wps": "396090", "ups": "3.35", "wpb": "118158", "bsz": "256", "num_updates": "486400", "lr": "5.18788e-05", "gnorm": "1.556", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "145465"} +[2022-08-01 03:15:16,271][train_inner][INFO] - {"epoch": 10, "update": 9.454, "loss": "2.118", "ppl": "4.34", "wps": "397701", "ups": "3.36", "wpb": "118367", "bsz": "256", "num_updates": "486600", "lr": "5.18586e-05", "gnorm": "1.533", "loss_scale": "4", "train_wall": "59", "gb_free": "24.7", "wall": "145525"} +[2022-08-01 03:16:16,047][train_inner][INFO] - {"epoch": 10, "update": 9.458, "loss": "2.117", "ppl": "4.34", "wps": "395390", "ups": "3.35", "wpb": "118172", "bsz": "256", "num_updates": "486800", "lr": "5.18384e-05", "gnorm": "1.565", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "145584"} +[2022-08-01 03:17:15,352][train_inner][INFO] - {"epoch": 10, "update": 9.462, "loss": "2.116", "ppl": "4.33", "wps": "400134", "ups": "3.37", "wpb": "118649", "bsz": "256", "num_updates": "487000", "lr": "5.18182e-05", "gnorm": "1.647", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "145644"} +[2022-08-01 03:18:15,139][train_inner][INFO] - {"epoch": 10, "update": 9.466, "loss": "2.121", "ppl": "4.35", "wps": "396791", "ups": "3.35", "wpb": "118615", "bsz": "256", "num_updates": "487200", "lr": "5.1798e-05", "gnorm": "1.625", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "145704"} +[2022-08-01 03:19:14,457][train_inner][INFO] - {"epoch": 10, "update": 9.47, "loss": "2.119", "ppl": "4.34", "wps": "398059", "ups": "3.37", "wpb": "118058", "bsz": "256", "num_updates": "487400", "lr": "5.17778e-05", "gnorm": "1.685", "loss_scale": "4", "train_wall": "59", "gb_free": "23.5", "wall": "145763"} +[2022-08-01 03:20:13,916][train_inner][INFO] - {"epoch": 10, "update": 9.474, "loss": "2.122", "ppl": "4.35", "wps": "398792", "ups": "3.36", "wpb": "118558", "bsz": "256", "num_updates": "487600", "lr": "5.17576e-05", "gnorm": "1.612", "loss_scale": "4", "train_wall": "59", "gb_free": "23.9", "wall": "145822"} +[2022-08-01 03:21:13,541][train_inner][INFO] - {"epoch": 10, "update": 9.478, "loss": "2.118", "ppl": "4.34", "wps": "396867", "ups": "3.35", "wpb": "118316", "bsz": "256", "num_updates": "487800", "lr": "5.17374e-05", "gnorm": "1.643", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "145882"} +[2022-08-01 03:22:12,748][train_inner][INFO] - {"epoch": 10, "update": 9.481, "loss": "2.117", "ppl": "4.34", "wps": "400216", "ups": "3.38", "wpb": "118476", "bsz": "256", "num_updates": "488000", "lr": "5.17172e-05", "gnorm": "1.647", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "145941"} +[2022-08-01 03:23:06,681][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 03:23:12,772][train_inner][INFO] - {"epoch": 10, "update": 9.485, "loss": "2.114", "ppl": "4.33", "wps": "394951", "ups": "3.33", "wpb": "118532", "bsz": "256", "num_updates": "488200", "lr": "5.1697e-05", "gnorm": "1.689", "loss_scale": "4", "train_wall": "60", "gb_free": "22.1", "wall": "146001"} +[2022-08-01 03:24:12,547][train_inner][INFO] - {"epoch": 10, "update": 9.489, "loss": "2.114", "ppl": "4.33", "wps": "396121", "ups": "3.35", "wpb": "118390", "bsz": "256", "num_updates": "488400", "lr": "5.16768e-05", "gnorm": "1.647", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "146061"} +[2022-08-01 03:25:12,161][train_inner][INFO] - {"epoch": 10, "update": 9.493, "loss": "2.115", "ppl": "4.33", "wps": "399018", "ups": "3.35", "wpb": "118935", "bsz": "256", "num_updates": "488600", "lr": "5.16566e-05", "gnorm": "1.704", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "146121"} +[2022-08-01 03:26:11,590][train_inner][INFO] - {"epoch": 10, "update": 9.497, "loss": "2.114", "ppl": "4.33", "wps": "397563", "ups": "3.37", "wpb": "118133", "bsz": "256", "num_updates": "488800", "lr": "5.16364e-05", "gnorm": "1.644", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "146180"} +[2022-08-01 03:27:10,879][train_inner][INFO] - {"epoch": 10, "update": 9.501, "loss": "2.117", "ppl": "4.34", "wps": "400239", "ups": "3.37", "wpb": "118649", "bsz": "256", "num_updates": "489000", "lr": "5.16162e-05", "gnorm": "1.651", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "146239"} +[2022-08-01 03:28:10,169][train_inner][INFO] - {"epoch": 10, "update": 9.505, "loss": "2.122", "ppl": "4.35", "wps": "399395", "ups": "3.37", "wpb": "118399", "bsz": "256", "num_updates": "489200", "lr": "5.1596e-05", "gnorm": "1.622", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "146299"} +[2022-08-01 03:29:09,466][train_inner][INFO] - {"epoch": 10, "update": 9.509, "loss": "2.12", "ppl": "4.35", "wps": "398175", "ups": "3.37", "wpb": "118052", "bsz": "256", "num_updates": "489400", "lr": "5.15758e-05", "gnorm": "1.727", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "146358"} +[2022-08-01 03:30:08,950][train_inner][INFO] - {"epoch": 10, "update": 9.513, "loss": "2.117", "ppl": "4.34", "wps": "396911", "ups": "3.36", "wpb": "118049", "bsz": "256", "num_updates": "489600", "lr": "5.15556e-05", "gnorm": "1.595", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "146417"} +[2022-08-01 03:31:08,300][train_inner][INFO] - {"epoch": 10, "update": 9.516, "loss": "2.117", "ppl": "4.34", "wps": "398944", "ups": "3.37", "wpb": "118387", "bsz": "256", "num_updates": "489800", "lr": "5.15354e-05", "gnorm": "1.697", "loss_scale": "4", "train_wall": "59", "gb_free": "25.8", "wall": "146477"} +[2022-08-01 03:32:07,234][train_inner][INFO] - {"epoch": 10, "update": 9.52, "loss": "2.117", "ppl": "4.34", "wps": "401077", "ups": "3.39", "wpb": "118185", "bsz": "256", "num_updates": "490000", "lr": "5.15152e-05", "gnorm": "1.763", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "146536"} +[2022-08-01 03:33:06,207][train_inner][INFO] - {"epoch": 10, "update": 9.524, "loss": "2.122", "ppl": "4.35", "wps": "399672", "ups": "3.39", "wpb": "117847", "bsz": "256", "num_updates": "490200", "lr": "5.14949e-05", "gnorm": "1.67", "loss_scale": "4", "train_wall": "59", "gb_free": "25.2", "wall": "146595"} +[2022-08-01 03:33:19,100][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 03:34:05,656][train_inner][INFO] - {"epoch": 10, "update": 9.528, "loss": "2.116", "ppl": "4.34", "wps": "398696", "ups": "3.36", "wpb": "118510", "bsz": "256", "num_updates": "490400", "lr": "5.14747e-05", "gnorm": "1.681", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "146654"} +[2022-08-01 03:35:05,490][train_inner][INFO] - {"epoch": 10, "update": 9.532, "loss": "2.118", "ppl": "4.34", "wps": "394305", "ups": "3.34", "wpb": "117963", "bsz": "255.9", "num_updates": "490600", "lr": "5.14545e-05", "gnorm": "1.692", "loss_scale": "4", "train_wall": "59", "gb_free": "27.7", "wall": "146714"} +[2022-08-01 03:36:04,599][train_inner][INFO] - {"epoch": 10, "update": 9.536, "loss": "2.117", "ppl": "4.34", "wps": "399904", "ups": "3.38", "wpb": "118189", "bsz": "256", "num_updates": "490800", "lr": "5.14343e-05", "gnorm": "1.755", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "146773"} +[2022-08-01 03:37:03,556][train_inner][INFO] - {"epoch": 10, "update": 9.54, "loss": "2.117", "ppl": "4.34", "wps": "399634", "ups": "3.39", "wpb": "117806", "bsz": "256", "num_updates": "491000", "lr": "5.14141e-05", "gnorm": "1.674", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "146832"} +[2022-08-01 03:38:04,009][train_inner][INFO] - {"epoch": 10, "update": 9.544, "loss": "2.117", "ppl": "4.34", "wps": "390867", "ups": "3.31", "wpb": "118144", "bsz": "256", "num_updates": "491200", "lr": "5.13939e-05", "gnorm": "1.676", "loss_scale": "4", "train_wall": "60", "gb_free": "22.1", "wall": "146892"} +[2022-08-01 03:39:03,238][train_inner][INFO] - {"epoch": 10, "update": 9.548, "loss": "2.119", "ppl": "4.34", "wps": "399180", "ups": "3.38", "wpb": "118214", "bsz": "256", "num_updates": "491400", "lr": "5.13737e-05", "gnorm": "1.671", "loss_scale": "4", "train_wall": "59", "gb_free": "25.2", "wall": "146952"} +[2022-08-01 03:40:02,286][train_inner][INFO] - {"epoch": 10, "update": 9.551, "loss": "2.117", "ppl": "4.34", "wps": "399352", "ups": "3.39", "wpb": "117903", "bsz": "256", "num_updates": "491600", "lr": "5.13535e-05", "gnorm": "1.744", "loss_scale": "4", "train_wall": "59", "gb_free": "25.9", "wall": "147011"} +[2022-08-01 03:41:01,985][train_inner][INFO] - {"epoch": 10, "update": 9.555, "loss": "2.114", "ppl": "4.33", "wps": "396766", "ups": "3.35", "wpb": "118431", "bsz": "256", "num_updates": "491800", "lr": "5.13333e-05", "gnorm": "1.714", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "147070"} +[2022-08-01 03:42:01,538][train_inner][INFO] - {"epoch": 10, "update": 9.559, "loss": "2.115", "ppl": "4.33", "wps": "398713", "ups": "3.36", "wpb": "118723", "bsz": "256", "num_updates": "492000", "lr": "5.13131e-05", "gnorm": "1.775", "loss_scale": "4", "train_wall": "59", "gb_free": "28.4", "wall": "147130"} +[2022-08-01 03:43:01,345][train_inner][INFO] - {"epoch": 10, "update": 9.563, "loss": "2.121", "ppl": "4.35", "wps": "393867", "ups": "3.34", "wpb": "117779", "bsz": "256", "num_updates": "492200", "lr": "5.12929e-05", "gnorm": "1.751", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "147190"} +[2022-08-01 03:43:31,141][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 03:44:01,098][train_inner][INFO] - {"epoch": 10, "update": 9.567, "loss": "2.116", "ppl": "4.33", "wps": "395043", "ups": "3.35", "wpb": "118023", "bsz": "256", "num_updates": "492400", "lr": "5.12727e-05", "gnorm": "1.79", "loss_scale": "4", "train_wall": "59", "gb_free": "23.3", "wall": "147250"} +[2022-08-01 03:45:00,251][train_inner][INFO] - {"epoch": 10, "update": 9.571, "loss": "2.122", "ppl": "4.35", "wps": "398599", "ups": "3.38", "wpb": "117893", "bsz": "256", "num_updates": "492600", "lr": "5.12525e-05", "gnorm": "1.669", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "147309"} +[2022-08-01 03:45:59,487][train_inner][INFO] - {"epoch": 10, "update": 9.575, "loss": "2.122", "ppl": "4.35", "wps": "399590", "ups": "3.38", "wpb": "118348", "bsz": "256", "num_updates": "492800", "lr": "5.12323e-05", "gnorm": "1.745", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "147368"} +[2022-08-01 03:46:59,296][train_inner][INFO] - {"epoch": 10, "update": 9.579, "loss": "2.119", "ppl": "4.34", "wps": "394470", "ups": "3.34", "wpb": "117965", "bsz": "256", "num_updates": "493000", "lr": "5.12121e-05", "gnorm": "1.741", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "147428"} +[2022-08-01 03:47:58,563][train_inner][INFO] - {"epoch": 10, "update": 9.583, "loss": "2.121", "ppl": "4.35", "wps": "397636", "ups": "3.37", "wpb": "117833", "bsz": "256", "num_updates": "493200", "lr": "5.11919e-05", "gnorm": "1.732", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "147487"} +[2022-08-01 03:48:58,061][train_inner][INFO] - {"epoch": 10, "update": 9.586, "loss": "2.118", "ppl": "4.34", "wps": "398971", "ups": "3.36", "wpb": "118689", "bsz": "256", "num_updates": "493400", "lr": "5.11717e-05", "gnorm": "1.806", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "147546"} +[2022-08-01 03:49:57,425][train_inner][INFO] - {"epoch": 10, "update": 9.59, "loss": "2.122", "ppl": "4.35", "wps": "397793", "ups": "3.37", "wpb": "118072", "bsz": "256", "num_updates": "493600", "lr": "5.11515e-05", "gnorm": "1.785", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "147606"} +[2022-08-01 03:50:57,090][train_inner][INFO] - {"epoch": 10, "update": 9.594, "loss": "2.119", "ppl": "4.34", "wps": "396460", "ups": "3.35", "wpb": "118272", "bsz": "256", "num_updates": "493800", "lr": "5.11313e-05", "gnorm": "1.807", "loss_scale": "4", "train_wall": "59", "gb_free": "24.9", "wall": "147666"} +[2022-08-01 03:51:56,260][train_inner][INFO] - {"epoch": 10, "update": 9.598, "loss": "2.119", "ppl": "4.34", "wps": "399160", "ups": "3.38", "wpb": "118092", "bsz": "256", "num_updates": "494000", "lr": "5.11111e-05", "gnorm": "1.797", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "147725"} +[2022-08-01 03:52:55,988][train_inner][INFO] - {"epoch": 10, "update": 9.602, "loss": "2.118", "ppl": "4.34", "wps": "396145", "ups": "3.35", "wpb": "118303", "bsz": "256", "num_updates": "494200", "lr": "5.10909e-05", "gnorm": "1.9", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "147784"} +[2022-08-01 03:53:40,715][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 03:53:55,968][train_inner][INFO] - {"epoch": 10, "update": 9.606, "loss": "2.118", "ppl": "4.34", "wps": "392614", "ups": "3.33", "wpb": "117744", "bsz": "256", "num_updates": "494400", "lr": "5.10707e-05", "gnorm": "1.877", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "147844"} +[2022-08-01 03:54:55,609][train_inner][INFO] - {"epoch": 10, "update": 9.61, "loss": "2.114", "ppl": "4.33", "wps": "398417", "ups": "3.35", "wpb": "118810", "bsz": "256", "num_updates": "494600", "lr": "5.10505e-05", "gnorm": "1.743", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "147904"} +[2022-08-01 03:55:55,242][train_inner][INFO] - {"epoch": 10, "update": 9.614, "loss": "2.113", "ppl": "4.33", "wps": "399037", "ups": "3.35", "wpb": "118977", "bsz": "256", "num_updates": "494800", "lr": "5.10303e-05", "gnorm": "1.795", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "147964"} +[2022-08-01 03:56:54,362][train_inner][INFO] - {"epoch": 10, "update": 9.617, "loss": "2.115", "ppl": "4.33", "wps": "399611", "ups": "3.38", "wpb": "118124", "bsz": "256", "num_updates": "495000", "lr": "5.10101e-05", "gnorm": "1.802", "loss_scale": "4", "train_wall": "59", "gb_free": "28.3", "wall": "148023"} +[2022-08-01 03:57:53,741][train_inner][INFO] - {"epoch": 10, "update": 9.621, "loss": "2.115", "ppl": "4.33", "wps": "400105", "ups": "3.37", "wpb": "118790", "bsz": "256", "num_updates": "495200", "lr": "5.09899e-05", "gnorm": "1.828", "loss_scale": "4", "train_wall": "59", "gb_free": "25.1", "wall": "148082"} +[2022-08-01 03:58:53,036][train_inner][INFO] - {"epoch": 10, "update": 9.625, "loss": "2.12", "ppl": "4.35", "wps": "397159", "ups": "3.37", "wpb": "117747", "bsz": "256", "num_updates": "495400", "lr": "5.09697e-05", "gnorm": "1.781", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "148141"} +[2022-08-01 03:59:52,580][train_inner][INFO] - {"epoch": 10, "update": 9.629, "loss": "2.115", "ppl": "4.33", "wps": "396478", "ups": "3.36", "wpb": "118039", "bsz": "256", "num_updates": "495600", "lr": "5.09495e-05", "gnorm": "1.843", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "148201"} +[2022-08-01 04:00:51,705][train_inner][INFO] - {"epoch": 10, "update": 9.633, "loss": "2.115", "ppl": "4.33", "wps": "399151", "ups": "3.38", "wpb": "117997", "bsz": "256", "num_updates": "495800", "lr": "5.09293e-05", "gnorm": "1.734", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "148260"} +[2022-08-01 04:01:51,475][train_inner][INFO] - {"epoch": 10, "update": 9.637, "loss": "2.114", "ppl": "4.33", "wps": "396397", "ups": "3.35", "wpb": "118462", "bsz": "256", "num_updates": "496000", "lr": "5.09091e-05", "gnorm": "1.857", "loss_scale": "4", "train_wall": "59", "gb_free": "26.2", "wall": "148320"} +[2022-08-01 04:02:51,105][train_inner][INFO] - {"epoch": 10, "update": 9.641, "loss": "2.119", "ppl": "4.34", "wps": "394276", "ups": "3.35", "wpb": "117553", "bsz": "256", "num_updates": "496200", "lr": "5.08889e-05", "gnorm": "1.902", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "148380"} +[2022-08-01 04:03:50,614][train_inner][INFO] - {"epoch": 10, "update": 9.645, "loss": "2.112", "ppl": "4.32", "wps": "399902", "ups": "3.36", "wpb": "118988", "bsz": "256", "num_updates": "496400", "lr": "5.08687e-05", "gnorm": "1.806", "loss_scale": "8", "train_wall": "59", "gb_free": "22.1", "wall": "148439"} +[2022-08-01 04:03:50,908][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 04:04:50,092][train_inner][INFO] - {"epoch": 10, "update": 9.649, "loss": "2.121", "ppl": "4.35", "wps": "397112", "ups": "3.36", "wpb": "118096", "bsz": "256", "num_updates": "496600", "lr": "5.08485e-05", "gnorm": "1.916", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "148499"} +[2022-08-01 04:05:49,443][train_inner][INFO] - {"epoch": 10, "update": 9.652, "loss": "2.115", "ppl": "4.33", "wps": "398281", "ups": "3.37", "wpb": "118192", "bsz": "256", "num_updates": "496800", "lr": "5.08283e-05", "gnorm": "1.833", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "148558"} +[2022-08-01 04:06:49,152][train_inner][INFO] - {"epoch": 10, "update": 9.656, "loss": "2.116", "ppl": "4.34", "wps": "399661", "ups": "3.35", "wpb": "119316", "bsz": "256", "num_updates": "497000", "lr": "5.08081e-05", "gnorm": "1.896", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "148618"} +[2022-08-01 04:07:48,241][train_inner][INFO] - {"epoch": 10, "update": 9.66, "loss": "2.12", "ppl": "4.35", "wps": "400106", "ups": "3.38", "wpb": "118208", "bsz": "256", "num_updates": "497200", "lr": "5.07879e-05", "gnorm": "1.828", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "148677"} +[2022-08-01 04:08:48,549][train_inner][INFO] - {"epoch": 10, "update": 9.664, "loss": "2.116", "ppl": "4.34", "wps": "391982", "ups": "3.32", "wpb": "118199", "bsz": "256", "num_updates": "497400", "lr": "5.07677e-05", "gnorm": "1.764", "loss_scale": "4", "train_wall": "60", "gb_free": "24.4", "wall": "148737"} +[2022-08-01 04:09:47,733][train_inner][INFO] - {"epoch": 10, "update": 9.668, "loss": "2.118", "ppl": "4.34", "wps": "400265", "ups": "3.38", "wpb": "118445", "bsz": "256", "num_updates": "497600", "lr": "5.07475e-05", "gnorm": "1.78", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "148796"} +[2022-08-01 04:10:47,352][train_inner][INFO] - {"epoch": 10, "update": 9.672, "loss": "2.113", "ppl": "4.33", "wps": "397267", "ups": "3.35", "wpb": "118422", "bsz": "256", "num_updates": "497800", "lr": "5.07273e-05", "gnorm": "1.946", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "148856"} +[2022-08-01 04:11:46,756][train_inner][INFO] - {"epoch": 10, "update": 9.676, "loss": "2.123", "ppl": "4.36", "wps": "397878", "ups": "3.37", "wpb": "118178", "bsz": "256", "num_updates": "498000", "lr": "5.07071e-05", "gnorm": "1.784", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "148915"} +[2022-08-01 04:12:47,437][train_inner][INFO] - {"epoch": 10, "update": 9.68, "loss": "2.117", "ppl": "4.34", "wps": "389975", "ups": "3.3", "wpb": "118320", "bsz": "256", "num_updates": "498200", "lr": "5.06869e-05", "gnorm": "1.953", "loss_scale": "4", "train_wall": "60", "gb_free": "25.8", "wall": "148976"} +[2022-08-01 04:13:46,995][train_inner][INFO] - {"epoch": 10, "update": 9.684, "loss": "2.115", "ppl": "4.33", "wps": "398402", "ups": "3.36", "wpb": "118637", "bsz": "256", "num_updates": "498400", "lr": "5.06667e-05", "gnorm": "1.842", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "149035"} +[2022-08-01 04:14:02,512][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 04:14:46,850][train_inner][INFO] - {"epoch": 10, "update": 9.687, "loss": "2.121", "ppl": "4.35", "wps": "393221", "ups": "3.34", "wpb": "117681", "bsz": "256", "num_updates": "498600", "lr": "5.06465e-05", "gnorm": "1.833", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "149095"} +[2022-08-01 04:15:46,460][train_inner][INFO] - {"epoch": 10, "update": 9.691, "loss": "2.11", "ppl": "4.32", "wps": "397671", "ups": "3.36", "wpb": "118524", "bsz": "256", "num_updates": "498800", "lr": "5.06263e-05", "gnorm": "1.886", "loss_scale": "4", "train_wall": "59", "gb_free": "27.2", "wall": "149155"} +[2022-08-01 04:16:46,428][train_inner][INFO] - {"epoch": 10, "update": 9.695, "loss": "2.114", "ppl": "4.33", "wps": "394657", "ups": "3.34", "wpb": "118333", "bsz": "256", "num_updates": "499000", "lr": "5.06061e-05", "gnorm": "1.876", "loss_scale": "4", "train_wall": "60", "gb_free": "21.9", "wall": "149215"} +[2022-08-01 04:17:19,870][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 04:17:46,041][train_inner][INFO] - {"epoch": 10, "update": 9.699, "loss": "2.113", "ppl": "4.32", "wps": "395571", "ups": "3.35", "wpb": "117905", "bsz": "256", "num_updates": "499200", "lr": "5.05859e-05", "gnorm": "2.074", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "149274"} +[2022-08-01 04:18:45,685][train_inner][INFO] - {"epoch": 10, "update": 9.703, "loss": "2.115", "ppl": "4.33", "wps": "397126", "ups": "3.35", "wpb": "118431", "bsz": "256", "num_updates": "499400", "lr": "5.05657e-05", "gnorm": "2.047", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "149334"} +[2022-08-01 04:19:45,406][train_inner][INFO] - {"epoch": 10, "update": 9.707, "loss": "2.119", "ppl": "4.34", "wps": "393408", "ups": "3.35", "wpb": "117473", "bsz": "256", "num_updates": "499600", "lr": "5.05455e-05", "gnorm": "1.881", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "149394"} +[2022-08-01 04:20:44,531][train_inner][INFO] - {"epoch": 10, "update": 9.711, "loss": "2.115", "ppl": "4.33", "wps": "402602", "ups": "3.38", "wpb": "119017", "bsz": "256", "num_updates": "499800", "lr": "5.05253e-05", "gnorm": "1.926", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "149453"} +[2022-08-01 04:21:44,925][train_inner][INFO] - {"epoch": 10, "update": 9.715, "loss": "2.115", "ppl": "4.33", "wps": "390685", "ups": "3.31", "wpb": "117974", "bsz": "256", "num_updates": "500000", "lr": "5.05051e-05", "gnorm": "1.895", "loss_scale": "2", "train_wall": "60", "gb_free": "22.1", "wall": "149513"} +[2022-08-01 04:21:44,925][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 04:22:07,604][valid][INFO] - {"epoch": 10, "valid_loss": "1.998", "valid_ppl": "4", "valid_wps": "1.58102e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "500000", "valid_best_loss": "1.998"} +[2022-08-01 04:22:07,607][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 10 @ 500000 updates +[2022-08-01 04:22:07,607][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_10_500000.pt +[2022-08-01 04:22:13,786][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_10_500000.pt +[2022-08-01 04:22:32,780][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_10_500000.pt (epoch 10 @ 500000 updates, score 1.998) (writing took 25.172977281734347 seconds) +[2022-08-01 04:23:32,373][train_inner][INFO] - {"epoch": 10, "update": 9.719, "loss": "2.113", "ppl": "4.33", "wps": "220540", "ups": "1.86", "wpb": "118483", "bsz": "256", "num_updates": "500200", "lr": "5.04848e-05", "gnorm": "1.926", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "149621"} +[2022-08-01 04:24:32,007][train_inner][INFO] - {"epoch": 10, "update": 9.722, "loss": "2.116", "ppl": "4.34", "wps": "396830", "ups": "3.35", "wpb": "118323", "bsz": "256", "num_updates": "500400", "lr": "5.04646e-05", "gnorm": "1.85", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "149680"} +[2022-08-01 04:25:31,671][train_inner][INFO] - {"epoch": 10, "update": 9.726, "loss": "2.113", "ppl": "4.33", "wps": "397638", "ups": "3.35", "wpb": "118622", "bsz": "256", "num_updates": "500600", "lr": "5.04444e-05", "gnorm": "2.015", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "149740"} +[2022-08-01 04:26:32,062][train_inner][INFO] - {"epoch": 10, "update": 9.73, "loss": "2.108", "ppl": "4.31", "wps": "392639", "ups": "3.31", "wpb": "118560", "bsz": "256", "num_updates": "500800", "lr": "5.04242e-05", "gnorm": "1.83", "loss_scale": "2", "train_wall": "60", "gb_free": "22.8", "wall": "149800"} +[2022-08-01 04:27:31,191][train_inner][INFO] - {"epoch": 10, "update": 9.734, "loss": "2.114", "ppl": "4.33", "wps": "399896", "ups": "3.38", "wpb": "118225", "bsz": "256", "num_updates": "501000", "lr": "5.0404e-05", "gnorm": "1.926", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "149860"} +[2022-08-01 04:28:30,946][train_inner][INFO] - {"epoch": 10, "update": 9.738, "loss": "2.104", "ppl": "4.3", "wps": "397839", "ups": "3.35", "wpb": "118863", "bsz": "256", "num_updates": "501200", "lr": "5.03838e-05", "gnorm": "1.965", "loss_scale": "4", "train_wall": "59", "gb_free": "32.3", "wall": "149919"} +[2022-08-01 04:29:29,923][train_inner][INFO] - {"epoch": 10, "update": 9.742, "loss": "2.122", "ppl": "4.35", "wps": "400836", "ups": "3.39", "wpb": "118201", "bsz": "256", "num_updates": "501400", "lr": "5.03636e-05", "gnorm": "1.945", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "149978"} +[2022-08-01 04:30:29,500][train_inner][INFO] - {"epoch": 10, "update": 9.746, "loss": "2.113", "ppl": "4.32", "wps": "397789", "ups": "3.36", "wpb": "118495", "bsz": "256", "num_updates": "501600", "lr": "5.03434e-05", "gnorm": "1.923", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "150038"} +[2022-08-01 04:31:29,014][train_inner][INFO] - {"epoch": 10, "update": 9.75, "loss": "2.121", "ppl": "4.35", "wps": "396866", "ups": "3.36", "wpb": "118095", "bsz": "256", "num_updates": "501800", "lr": "5.03232e-05", "gnorm": "1.999", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "150097"} +[2022-08-01 04:32:28,882][train_inner][INFO] - {"epoch": 10, "update": 9.753, "loss": "2.112", "ppl": "4.32", "wps": "398030", "ups": "3.34", "wpb": "119146", "bsz": "256", "num_updates": "502000", "lr": "5.0303e-05", "gnorm": "1.908", "loss_scale": "4", "train_wall": "60", "gb_free": "22.9", "wall": "150157"} +[2022-08-01 04:33:28,590][train_inner][INFO] - {"epoch": 10, "update": 9.757, "loss": "2.118", "ppl": "4.34", "wps": "394143", "ups": "3.35", "wpb": "117667", "bsz": "256", "num_updates": "502200", "lr": "5.02828e-05", "gnorm": "1.981", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "150217"} +[2022-08-01 04:34:27,991][train_inner][INFO] - {"epoch": 10, "update": 9.761, "loss": "2.115", "ppl": "4.33", "wps": "399496", "ups": "3.37", "wpb": "118651", "bsz": "256", "num_updates": "502400", "lr": "5.02626e-05", "gnorm": "1.945", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "150276"} +[2022-08-01 04:35:27,104][train_inner][INFO] - {"epoch": 10, "update": 9.765, "loss": "2.117", "ppl": "4.34", "wps": "398488", "ups": "3.38", "wpb": "117779", "bsz": "256", "num_updates": "502600", "lr": "5.02424e-05", "gnorm": "1.983", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "150336"} +[2022-08-01 04:35:40,783][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 04:36:27,181][train_inner][INFO] - {"epoch": 10, "update": 9.769, "loss": "2.103", "ppl": "4.3", "wps": "395000", "ups": "3.33", "wpb": "118651", "bsz": "256", "num_updates": "502800", "lr": "5.02222e-05", "gnorm": "1.869", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "150396"} +[2022-08-01 04:37:26,984][train_inner][INFO] - {"epoch": 10, "update": 9.773, "loss": "2.11", "ppl": "4.32", "wps": "395638", "ups": "3.34", "wpb": "118300", "bsz": "256", "num_updates": "503000", "lr": "5.0202e-05", "gnorm": "2.054", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "150455"} +[2022-08-01 04:38:26,518][train_inner][INFO] - {"epoch": 10, "update": 9.777, "loss": "2.113", "ppl": "4.32", "wps": "396394", "ups": "3.36", "wpb": "117995", "bsz": "256", "num_updates": "503200", "lr": "5.01818e-05", "gnorm": "1.93", "loss_scale": "2", "train_wall": "59", "gb_free": "29.8", "wall": "150515"} +[2022-08-01 04:39:26,338][train_inner][INFO] - {"epoch": 10, "update": 9.781, "loss": "2.12", "ppl": "4.35", "wps": "394544", "ups": "3.34", "wpb": "118007", "bsz": "256", "num_updates": "503400", "lr": "5.01616e-05", "gnorm": "2.081", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "150575"} +[2022-08-01 04:40:25,612][train_inner][INFO] - {"epoch": 10, "update": 9.785, "loss": "2.122", "ppl": "4.35", "wps": "401522", "ups": "3.37", "wpb": "118999", "bsz": "256", "num_updates": "503600", "lr": "5.01414e-05", "gnorm": "1.896", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "150634"} +[2022-08-01 04:41:24,966][train_inner][INFO] - {"epoch": 10, "update": 9.788, "loss": "2.107", "ppl": "4.31", "wps": "397931", "ups": "3.37", "wpb": "118093", "bsz": "256", "num_updates": "503800", "lr": "5.01212e-05", "gnorm": "1.905", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "150693"} +[2022-08-01 04:42:24,395][train_inner][INFO] - {"epoch": 10, "update": 9.792, "loss": "2.112", "ppl": "4.32", "wps": "397518", "ups": "3.37", "wpb": "118120", "bsz": "256", "num_updates": "504000", "lr": "5.0101e-05", "gnorm": "2.037", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "150753"} +[2022-08-01 04:43:23,524][train_inner][INFO] - {"epoch": 10, "update": 9.796, "loss": "2.111", "ppl": "4.32", "wps": "397962", "ups": "3.38", "wpb": "117654", "bsz": "256", "num_updates": "504200", "lr": "5.00808e-05", "gnorm": "1.924", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "150812"} +[2022-08-01 04:44:22,734][train_inner][INFO] - {"epoch": 10, "update": 9.8, "loss": "2.114", "ppl": "4.33", "wps": "401685", "ups": "3.38", "wpb": "118917", "bsz": "256", "num_updates": "504400", "lr": "5.00606e-05", "gnorm": "1.966", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "150871"} +[2022-08-01 04:45:22,082][train_inner][INFO] - {"epoch": 10, "update": 9.804, "loss": "2.116", "ppl": "4.33", "wps": "398260", "ups": "3.37", "wpb": "118179", "bsz": "256", "num_updates": "504600", "lr": "5.00404e-05", "gnorm": "2.112", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "150931"} +[2022-08-01 04:46:21,640][train_inner][INFO] - {"epoch": 10, "update": 9.808, "loss": "2.116", "ppl": "4.34", "wps": "394946", "ups": "3.36", "wpb": "117611", "bsz": "256", "num_updates": "504800", "lr": "5.00202e-05", "gnorm": "2.027", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "150990"} +[2022-08-01 04:47:18,322][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 04:47:21,281][train_inner][INFO] - {"epoch": 10, "update": 9.812, "loss": "2.116", "ppl": "4.34", "wps": "396938", "ups": "3.35", "wpb": "118368", "bsz": "256", "num_updates": "505000", "lr": "5e-05", "gnorm": "1.993", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "151050"} +[2022-08-01 04:48:21,007][train_inner][INFO] - {"epoch": 10, "update": 9.816, "loss": "2.111", "ppl": "4.32", "wps": "397190", "ups": "3.35", "wpb": "118611", "bsz": "256", "num_updates": "505200", "lr": "4.99798e-05", "gnorm": "2.036", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "151109"} +[2022-08-01 04:49:20,416][train_inner][INFO] - {"epoch": 10, "update": 9.82, "loss": "2.111", "ppl": "4.32", "wps": "397770", "ups": "3.37", "wpb": "118155", "bsz": "256", "num_updates": "505400", "lr": "4.99596e-05", "gnorm": "2.058", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "151169"} +[2022-08-01 04:50:19,777][train_inner][INFO] - {"epoch": 10, "update": 9.823, "loss": "2.11", "ppl": "4.32", "wps": "397360", "ups": "3.37", "wpb": "117939", "bsz": "256", "num_updates": "505600", "lr": "4.99394e-05", "gnorm": "1.944", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "151228"} +[2022-08-01 04:51:19,027][train_inner][INFO] - {"epoch": 10, "update": 9.827, "loss": "2.118", "ppl": "4.34", "wps": "398278", "ups": "3.38", "wpb": "117988", "bsz": "256", "num_updates": "505800", "lr": "4.99192e-05", "gnorm": "2.088", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "151287"} +[2022-08-01 04:52:18,166][train_inner][INFO] - {"epoch": 10, "update": 9.831, "loss": "2.112", "ppl": "4.32", "wps": "397771", "ups": "3.38", "wpb": "117618", "bsz": "256", "num_updates": "506000", "lr": "4.9899e-05", "gnorm": "2.181", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "151347"} +[2022-08-01 04:53:17,656][train_inner][INFO] - {"epoch": 10, "update": 9.835, "loss": "2.114", "ppl": "4.33", "wps": "396468", "ups": "3.36", "wpb": "117929", "bsz": "256", "num_updates": "506200", "lr": "4.98788e-05", "gnorm": "2.001", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "151406"} +[2022-08-01 04:54:16,772][train_inner][INFO] - {"epoch": 10, "update": 9.839, "loss": "2.115", "ppl": "4.33", "wps": "401120", "ups": "3.38", "wpb": "118562", "bsz": "256", "num_updates": "506400", "lr": "4.98586e-05", "gnorm": "1.996", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "151465"} +[2022-08-01 04:55:16,126][train_inner][INFO] - {"epoch": 10, "update": 9.843, "loss": "2.111", "ppl": "4.32", "wps": "397592", "ups": "3.37", "wpb": "117991", "bsz": "256", "num_updates": "506600", "lr": "4.98384e-05", "gnorm": "2.143", "loss_scale": "2", "train_wall": "59", "gb_free": "27.1", "wall": "151525"} +[2022-08-01 04:56:15,622][train_inner][INFO] - {"epoch": 10, "update": 9.847, "loss": "2.11", "ppl": "4.32", "wps": "399336", "ups": "3.36", "wpb": "118794", "bsz": "256", "num_updates": "506800", "lr": "4.98182e-05", "gnorm": "1.939", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "151584"} +[2022-08-01 04:57:14,741][train_inner][INFO] - {"epoch": 10, "update": 9.851, "loss": "2.114", "ppl": "4.33", "wps": "402445", "ups": "3.38", "wpb": "118960", "bsz": "256", "num_updates": "507000", "lr": "4.9798e-05", "gnorm": "2.028", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "151643"} +[2022-08-01 04:58:13,976][train_inner][INFO] - {"epoch": 10, "update": 9.855, "loss": "2.106", "ppl": "4.31", "wps": "401641", "ups": "3.38", "wpb": "118955", "bsz": "256", "num_updates": "507200", "lr": "4.97778e-05", "gnorm": "2.022", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "151702"} +[2022-08-01 04:58:14,541][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 04:59:14,034][train_inner][INFO] - {"epoch": 10, "update": 9.858, "loss": "2.108", "ppl": "4.31", "wps": "391965", "ups": "3.33", "wpb": "117704", "bsz": "256", "num_updates": "507400", "lr": "4.97576e-05", "gnorm": "2.092", "loss_scale": "2", "train_wall": "60", "gb_free": "26.8", "wall": "151762"} +[2022-08-01 05:00:13,560][train_inner][INFO] - {"epoch": 10, "update": 9.862, "loss": "2.112", "ppl": "4.32", "wps": "397249", "ups": "3.36", "wpb": "118232", "bsz": "256", "num_updates": "507600", "lr": "4.97374e-05", "gnorm": "1.963", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "151822"} +[2022-08-01 05:01:13,284][train_inner][INFO] - {"epoch": 10, "update": 9.866, "loss": "2.108", "ppl": "4.31", "wps": "395492", "ups": "3.35", "wpb": "118101", "bsz": "256", "num_updates": "507800", "lr": "4.97172e-05", "gnorm": "2.139", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "151882"} +[2022-08-01 05:02:12,753][train_inner][INFO] - {"epoch": 10, "update": 9.87, "loss": "2.111", "ppl": "4.32", "wps": "398300", "ups": "3.36", "wpb": "118432", "bsz": "256", "num_updates": "508000", "lr": "4.9697e-05", "gnorm": "2.133", "loss_scale": "2", "train_wall": "59", "gb_free": "27.1", "wall": "151941"} +[2022-08-01 05:03:12,701][train_inner][INFO] - {"epoch": 10, "update": 9.874, "loss": "2.113", "ppl": "4.33", "wps": "394217", "ups": "3.34", "wpb": "118161", "bsz": "256", "num_updates": "508200", "lr": "4.96768e-05", "gnorm": "2.184", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "152001"} +[2022-08-01 05:04:12,192][train_inner][INFO] - {"epoch": 10, "update": 9.878, "loss": "2.115", "ppl": "4.33", "wps": "398357", "ups": "3.36", "wpb": "118493", "bsz": "256", "num_updates": "508400", "lr": "4.96566e-05", "gnorm": "2.09", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "152061"} +[2022-08-01 05:05:11,603][train_inner][INFO] - {"epoch": 10, "update": 9.882, "loss": "2.107", "ppl": "4.31", "wps": "398024", "ups": "3.37", "wpb": "118234", "bsz": "256", "num_updates": "508600", "lr": "4.96364e-05", "gnorm": "1.972", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "152120"} +[2022-08-01 05:06:11,254][train_inner][INFO] - {"epoch": 10, "update": 9.886, "loss": "2.11", "ppl": "4.32", "wps": "398237", "ups": "3.35", "wpb": "118776", "bsz": "256", "num_updates": "508800", "lr": "4.96162e-05", "gnorm": "2.261", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "152180"} +[2022-08-01 05:07:10,538][train_inner][INFO] - {"epoch": 10, "update": 9.89, "loss": "2.107", "ppl": "4.31", "wps": "399480", "ups": "3.37", "wpb": "118413", "bsz": "256", "num_updates": "509000", "lr": "4.9596e-05", "gnorm": "2.085", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "152239"} +[2022-08-01 05:08:09,922][train_inner][INFO] - {"epoch": 10, "update": 9.893, "loss": "2.106", "ppl": "4.3", "wps": "397655", "ups": "3.37", "wpb": "118072", "bsz": "256", "num_updates": "509200", "lr": "4.95758e-05", "gnorm": "2.093", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "152298"} +[2022-08-01 05:09:09,522][train_inner][INFO] - {"epoch": 10, "update": 9.897, "loss": "2.118", "ppl": "4.34", "wps": "395294", "ups": "3.36", "wpb": "117796", "bsz": "256", "num_updates": "509400", "lr": "4.95556e-05", "gnorm": "2.083", "loss_scale": "4", "train_wall": "59", "gb_free": "26.1", "wall": "152358"} +[2022-08-01 05:10:09,154][train_inner][INFO] - {"epoch": 10, "update": 9.901, "loss": "2.105", "ppl": "4.3", "wps": "398859", "ups": "3.35", "wpb": "118922", "bsz": "256", "num_updates": "509600", "lr": "4.95354e-05", "gnorm": "2.033", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "152418"} +[2022-08-01 05:10:37,375][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 05:11:09,116][train_inner][INFO] - {"epoch": 10, "update": 9.905, "loss": "2.107", "ppl": "4.31", "wps": "394395", "ups": "3.34", "wpb": "118243", "bsz": "256", "num_updates": "509800", "lr": "4.95152e-05", "gnorm": "2.282", "loss_scale": "2", "train_wall": "60", "gb_free": "23.2", "wall": "152478"} +[2022-08-01 05:12:08,697][train_inner][INFO] - {"epoch": 10, "update": 9.909, "loss": "2.111", "ppl": "4.32", "wps": "396712", "ups": "3.36", "wpb": "118183", "bsz": "256", "num_updates": "510000", "lr": "4.94949e-05", "gnorm": "2.196", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "152537"} +[2022-08-01 05:13:07,773][train_inner][INFO] - {"epoch": 10, "update": 9.913, "loss": "2.118", "ppl": "4.34", "wps": "399680", "ups": "3.39", "wpb": "118056", "bsz": "256", "num_updates": "510200", "lr": "4.94747e-05", "gnorm": "2.027", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "152596"} +[2022-08-01 05:14:07,854][train_inner][INFO] - {"epoch": 10, "update": 9.917, "loss": "2.111", "ppl": "4.32", "wps": "393042", "ups": "3.33", "wpb": "118071", "bsz": "256", "num_updates": "510400", "lr": "4.94545e-05", "gnorm": "2.08", "loss_scale": "2", "train_wall": "60", "gb_free": "28.5", "wall": "152656"} +[2022-08-01 05:15:07,550][train_inner][INFO] - {"epoch": 10, "update": 9.921, "loss": "2.109", "ppl": "4.31", "wps": "397180", "ups": "3.35", "wpb": "118549", "bsz": "256", "num_updates": "510600", "lr": "4.94343e-05", "gnorm": "2.057", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "152716"} +[2022-08-01 05:16:08,139][train_inner][INFO] - {"epoch": 10, "update": 9.924, "loss": "2.111", "ppl": "4.32", "wps": "391054", "ups": "3.3", "wpb": "118467", "bsz": "256", "num_updates": "510800", "lr": "4.94141e-05", "gnorm": "2.092", "loss_scale": "2", "train_wall": "60", "gb_free": "24.6", "wall": "152777"} +[2022-08-01 05:17:07,681][train_inner][INFO] - {"epoch": 10, "update": 9.928, "loss": "2.107", "ppl": "4.31", "wps": "398990", "ups": "3.36", "wpb": "118782", "bsz": "256", "num_updates": "511000", "lr": "4.93939e-05", "gnorm": "2.064", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "152836"} +[2022-08-01 05:18:06,864][train_inner][INFO] - {"epoch": 10, "update": 9.932, "loss": "2.111", "ppl": "4.32", "wps": "396452", "ups": "3.38", "wpb": "117315", "bsz": "256", "num_updates": "511200", "lr": "4.93737e-05", "gnorm": "2.168", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "152895"} +[2022-08-01 05:19:06,707][train_inner][INFO] - {"epoch": 10, "update": 9.936, "loss": "2.107", "ppl": "4.31", "wps": "394926", "ups": "3.34", "wpb": "118166", "bsz": "256", "num_updates": "511400", "lr": "4.93535e-05", "gnorm": "2.16", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "152955"} +[2022-08-01 05:20:06,018][train_inner][INFO] - {"epoch": 10, "update": 9.94, "loss": "2.109", "ppl": "4.31", "wps": "398246", "ups": "3.37", "wpb": "118101", "bsz": "256", "num_updates": "511600", "lr": "4.93333e-05", "gnorm": "2.08", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "153014"} +[2022-08-01 05:21:05,298][train_inner][INFO] - {"epoch": 10, "update": 9.944, "loss": "2.111", "ppl": "4.32", "wps": "398456", "ups": "3.37", "wpb": "118101", "bsz": "256", "num_updates": "511800", "lr": "4.93131e-05", "gnorm": "2.127", "loss_scale": "4", "train_wall": "59", "gb_free": "26.4", "wall": "153074"} +[2022-08-01 05:21:22,291][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 05:22:04,775][train_inner][INFO] - {"epoch": 10, "update": 9.948, "loss": "2.109", "ppl": "4.31", "wps": "397774", "ups": "3.36", "wpb": "118292", "bsz": "256", "num_updates": "512000", "lr": "4.92929e-05", "gnorm": "2.187", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "153133"} +[2022-08-01 05:23:04,205][train_inner][INFO] - {"epoch": 10, "update": 9.952, "loss": "2.104", "ppl": "4.3", "wps": "399797", "ups": "3.37", "wpb": "118798", "bsz": "256", "num_updates": "512200", "lr": "4.92727e-05", "gnorm": "2.139", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "153193"} +[2022-08-01 05:24:03,726][train_inner][INFO] - {"epoch": 10, "update": 9.956, "loss": "2.105", "ppl": "4.3", "wps": "400142", "ups": "3.36", "wpb": "119084", "bsz": "256", "num_updates": "512400", "lr": "4.92525e-05", "gnorm": "2.145", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "153252"} +[2022-08-01 05:25:02,961][train_inner][INFO] - {"epoch": 10, "update": 9.959, "loss": "2.107", "ppl": "4.31", "wps": "399065", "ups": "3.38", "wpb": "118191", "bsz": "256", "num_updates": "512600", "lr": "4.92323e-05", "gnorm": "2.231", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "153311"} +[2022-08-01 05:26:02,710][train_inner][INFO] - {"epoch": 10, "update": 9.963, "loss": "2.111", "ppl": "4.32", "wps": "394898", "ups": "3.35", "wpb": "117974", "bsz": "256", "num_updates": "512800", "lr": "4.92121e-05", "gnorm": "2.065", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "153371"} +[2022-08-01 05:27:02,504][train_inner][INFO] - {"epoch": 10, "update": 9.967, "loss": "2.109", "ppl": "4.31", "wps": "396577", "ups": "3.34", "wpb": "118562", "bsz": "256", "num_updates": "513000", "lr": "4.91919e-05", "gnorm": "2.091", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "153431"} +[2022-08-01 05:28:01,688][train_inner][INFO] - {"epoch": 10, "update": 9.971, "loss": "2.111", "ppl": "4.32", "wps": "400052", "ups": "3.38", "wpb": "118384", "bsz": "256", "num_updates": "513200", "lr": "4.91717e-05", "gnorm": "2.031", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "153490"} +[2022-08-01 05:29:01,329][train_inner][INFO] - {"epoch": 10, "update": 9.975, "loss": "2.108", "ppl": "4.31", "wps": "396520", "ups": "3.35", "wpb": "118242", "bsz": "256", "num_updates": "513400", "lr": "4.91515e-05", "gnorm": "2.108", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "153550"} +[2022-08-01 05:30:00,825][train_inner][INFO] - {"epoch": 10, "update": 9.979, "loss": "2.107", "ppl": "4.31", "wps": "398787", "ups": "3.36", "wpb": "118631", "bsz": "256", "num_updates": "513600", "lr": "4.91313e-05", "gnorm": "2.136", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "153609"} +[2022-08-01 05:31:00,776][train_inner][INFO] - {"epoch": 10, "update": 9.983, "loss": "2.108", "ppl": "4.31", "wps": "397780", "ups": "3.34", "wpb": "119236", "bsz": "256", "num_updates": "513800", "lr": "4.91111e-05", "gnorm": "2.48", "loss_scale": "2", "train_wall": "60", "gb_free": "22.5", "wall": "153669"} +[2022-08-01 05:31:59,880][train_inner][INFO] - {"epoch": 10, "update": 9.987, "loss": "2.112", "ppl": "4.32", "wps": "399014", "ups": "3.38", "wpb": "117916", "bsz": "256", "num_updates": "514000", "lr": "4.90909e-05", "gnorm": "2.001", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "153728"} +[2022-08-01 05:32:35,181][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 05:32:59,445][train_inner][INFO] - {"epoch": 10, "update": 9.991, "loss": "2.114", "ppl": "4.33", "wps": "398551", "ups": "3.36", "wpb": "118697", "bsz": "256", "num_updates": "514200", "lr": "4.90707e-05", "gnorm": "2.305", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "153788"} +[2022-08-01 05:33:59,158][train_inner][INFO] - {"epoch": 10, "update": 9.994, "loss": "2.114", "ppl": "4.33", "wps": "395022", "ups": "3.35", "wpb": "117940", "bsz": "256", "num_updates": "514400", "lr": "4.90505e-05", "gnorm": "1.973", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "153848"} +[2022-08-01 05:34:58,650][train_inner][INFO] - {"epoch": 10, "update": 9.998, "loss": "2.109", "ppl": "4.31", "wps": "397684", "ups": "3.36", "wpb": "118294", "bsz": "256", "num_updates": "514600", "lr": "4.90303e-05", "gnorm": "2.179", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "153907"} +[2022-08-01 05:35:24,295][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 05:35:47,135][valid][INFO] - {"epoch": 10, "valid_loss": "1.993", "valid_ppl": "3.98", "valid_wps": "1.61174e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "514686", "valid_best_loss": "1.993"} +[2022-08-01 05:35:47,138][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 10 @ 514686 updates +[2022-08-01 05:35:47,139][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_best.pt +[2022-08-01 05:35:55,186][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_best.pt +[2022-08-01 05:36:08,999][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_best.pt (epoch 10 @ 514686 updates, score 1.993) (writing took 21.861514328047633 seconds) +[2022-08-01 05:36:09,000][fairseq_cli.train][INFO] - end of epoch 10 (average epoch stats below) +[2022-08-01 05:36:09,001][train][INFO] - {"epoch": 10, "train_loss": "2.118", "train_ppl": "4.34", "train_wps": "394528", "train_ups": "3.34", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "514686", "train_lr": "4.90216e-05", "train_gnorm": "1.631", "train_loss_scale": "2", "train_train_wall": "15237", "train_gb_free": "22.6", "train_wall": "153977"} +[2022-08-01 05:36:09,114][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 05:36:09,116][fairseq.trainer][INFO] - begin training epoch 11 +[2022-08-01 05:36:09,117][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 05:36:52,455][train_inner][INFO] - {"epoch": 11, "update": 10.002, "loss": "2.114", "ppl": "4.33", "wps": "205845", "ups": "1.76", "wpb": "117130", "bsz": "255.4", "num_updates": "514800", "lr": "4.90101e-05", "gnorm": "2.222", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "154021"} +[2022-08-01 05:37:51,950][train_inner][INFO] - {"epoch": 11, "update": 10.006, "loss": "2.103", "ppl": "4.3", "wps": "397072", "ups": "3.36", "wpb": "118119", "bsz": "256", "num_updates": "515000", "lr": "4.89899e-05", "gnorm": "2.233", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "154080"} +[2022-08-01 05:38:51,257][train_inner][INFO] - {"epoch": 11, "update": 10.01, "loss": "2.107", "ppl": "4.31", "wps": "398233", "ups": "3.37", "wpb": "118087", "bsz": "256", "num_updates": "515200", "lr": "4.89697e-05", "gnorm": "2.075", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "154140"} +[2022-08-01 05:39:50,429][train_inner][INFO] - {"epoch": 11, "update": 10.014, "loss": "2.107", "ppl": "4.31", "wps": "397572", "ups": "3.38", "wpb": "117625", "bsz": "256", "num_updates": "515400", "lr": "4.89495e-05", "gnorm": "2.116", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "154199"} +[2022-08-01 05:40:49,954][train_inner][INFO] - {"epoch": 11, "update": 10.018, "loss": "2.109", "ppl": "4.31", "wps": "397590", "ups": "3.36", "wpb": "118332", "bsz": "256", "num_updates": "515600", "lr": "4.89293e-05", "gnorm": "2.254", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "154258"} +[2022-08-01 05:41:49,167][train_inner][INFO] - {"epoch": 11, "update": 10.022, "loss": "2.108", "ppl": "4.31", "wps": "398910", "ups": "3.38", "wpb": "118104", "bsz": "256", "num_updates": "515800", "lr": "4.89091e-05", "gnorm": "2.114", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "154318"} +[2022-08-01 05:42:48,691][train_inner][INFO] - {"epoch": 11, "update": 10.026, "loss": "2.106", "ppl": "4.31", "wps": "395099", "ups": "3.36", "wpb": "117588", "bsz": "256", "num_updates": "516000", "lr": "4.88889e-05", "gnorm": "2.186", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "154377"} +[2022-08-01 05:43:47,845][train_inner][INFO] - {"epoch": 11, "update": 10.029, "loss": "2.114", "ppl": "4.33", "wps": "399201", "ups": "3.38", "wpb": "118072", "bsz": "256", "num_updates": "516200", "lr": "4.88687e-05", "gnorm": "2.107", "loss_scale": "4", "train_wall": "59", "gb_free": "28.1", "wall": "154436"} +[2022-08-01 05:44:02,263][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 05:44:47,681][train_inner][INFO] - {"epoch": 11, "update": 10.033, "loss": "2.102", "ppl": "4.29", "wps": "395105", "ups": "3.34", "wpb": "118207", "bsz": "256", "num_updates": "516400", "lr": "4.88485e-05", "gnorm": "2.227", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "154496"} +[2022-08-01 05:45:46,952][train_inner][INFO] - {"epoch": 11, "update": 10.037, "loss": "2.104", "ppl": "4.3", "wps": "399324", "ups": "3.37", "wpb": "118341", "bsz": "256", "num_updates": "516600", "lr": "4.88283e-05", "gnorm": "2.118", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "154555"} +[2022-08-01 05:46:46,114][train_inner][INFO] - {"epoch": 11, "update": 10.041, "loss": "2.104", "ppl": "4.3", "wps": "398503", "ups": "3.38", "wpb": "117880", "bsz": "256", "num_updates": "516800", "lr": "4.88081e-05", "gnorm": "2.272", "loss_scale": "2", "train_wall": "59", "gb_free": "31.3", "wall": "154615"} +[2022-08-01 05:47:45,209][train_inner][INFO] - {"epoch": 11, "update": 10.045, "loss": "2.107", "ppl": "4.31", "wps": "399626", "ups": "3.38", "wpb": "118079", "bsz": "256", "num_updates": "517000", "lr": "4.87879e-05", "gnorm": "2.254", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "154674"} +[2022-08-01 05:48:44,897][train_inner][INFO] - {"epoch": 11, "update": 10.049, "loss": "2.105", "ppl": "4.3", "wps": "397993", "ups": "3.35", "wpb": "118778", "bsz": "256", "num_updates": "517200", "lr": "4.87677e-05", "gnorm": "2.383", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "154733"} +[2022-08-01 05:49:44,374][train_inner][INFO] - {"epoch": 11, "update": 10.053, "loss": "2.107", "ppl": "4.31", "wps": "398285", "ups": "3.36", "wpb": "118442", "bsz": "256", "num_updates": "517400", "lr": "4.87475e-05", "gnorm": "2.372", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "154793"} +[2022-08-01 05:50:43,961][train_inner][INFO] - {"epoch": 11, "update": 10.057, "loss": "2.107", "ppl": "4.31", "wps": "398839", "ups": "3.36", "wpb": "118828", "bsz": "256", "num_updates": "517600", "lr": "4.87273e-05", "gnorm": "2.117", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "154852"} +[2022-08-01 05:51:43,524][train_inner][INFO] - {"epoch": 11, "update": 10.06, "loss": "2.104", "ppl": "4.3", "wps": "398399", "ups": "3.36", "wpb": "118648", "bsz": "256", "num_updates": "517800", "lr": "4.87071e-05", "gnorm": "2.2", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "154912"} +[2022-08-01 05:52:43,101][train_inner][INFO] - {"epoch": 11, "update": 10.064, "loss": "2.104", "ppl": "4.3", "wps": "397707", "ups": "3.36", "wpb": "118470", "bsz": "256", "num_updates": "518000", "lr": "4.86869e-05", "gnorm": "2.113", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "154972"} +[2022-08-01 05:53:42,483][train_inner][INFO] - {"epoch": 11, "update": 10.068, "loss": "2.102", "ppl": "4.29", "wps": "398108", "ups": "3.37", "wpb": "118202", "bsz": "256", "num_updates": "518200", "lr": "4.86667e-05", "gnorm": "2.225", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "155031"} +[2022-08-01 05:54:27,621][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 05:54:42,074][train_inner][INFO] - {"epoch": 11, "update": 10.072, "loss": "2.105", "ppl": "4.3", "wps": "396003", "ups": "3.36", "wpb": "117990", "bsz": "256", "num_updates": "518400", "lr": "4.86465e-05", "gnorm": "2.104", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "155091"} +[2022-08-01 05:55:41,747][train_inner][INFO] - {"epoch": 11, "update": 10.076, "loss": "2.103", "ppl": "4.3", "wps": "397517", "ups": "3.35", "wpb": "118606", "bsz": "256", "num_updates": "518600", "lr": "4.86263e-05", "gnorm": "2.086", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "155150"} +[2022-08-01 05:56:40,803][train_inner][INFO] - {"epoch": 11, "update": 10.08, "loss": "2.1", "ppl": "4.29", "wps": "400028", "ups": "3.39", "wpb": "118118", "bsz": "256", "num_updates": "518800", "lr": "4.86061e-05", "gnorm": "2.346", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "155209"} +[2022-08-01 05:57:40,330][train_inner][INFO] - {"epoch": 11, "update": 10.084, "loss": "2.103", "ppl": "4.3", "wps": "397989", "ups": "3.36", "wpb": "118454", "bsz": "256", "num_updates": "519000", "lr": "4.85859e-05", "gnorm": "2.051", "loss_scale": "2", "train_wall": "59", "gb_free": "28.3", "wall": "155269"} +[2022-08-01 05:58:40,513][train_inner][INFO] - {"epoch": 11, "update": 10.088, "loss": "2.102", "ppl": "4.29", "wps": "393306", "ups": "3.32", "wpb": "118352", "bsz": "256", "num_updates": "519200", "lr": "4.85657e-05", "gnorm": "2.29", "loss_scale": "2", "train_wall": "60", "gb_free": "23.9", "wall": "155329"} +[2022-08-01 05:59:40,199][train_inner][INFO] - {"epoch": 11, "update": 10.092, "loss": "2.111", "ppl": "4.32", "wps": "394607", "ups": "3.35", "wpb": "117761", "bsz": "256", "num_updates": "519400", "lr": "4.85455e-05", "gnorm": "2.032", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "155389"} +[2022-08-01 06:00:39,860][train_inner][INFO] - {"epoch": 11, "update": 10.095, "loss": "2.102", "ppl": "4.29", "wps": "398044", "ups": "3.35", "wpb": "118737", "bsz": "256", "num_updates": "519600", "lr": "4.85253e-05", "gnorm": "2.265", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "155448"} +[2022-08-01 06:01:39,303][train_inner][INFO] - {"epoch": 11, "update": 10.099, "loss": "2.103", "ppl": "4.3", "wps": "397203", "ups": "3.36", "wpb": "118055", "bsz": "256", "num_updates": "519800", "lr": "4.85051e-05", "gnorm": "2.08", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "155508"} +[2022-08-01 06:02:38,959][train_inner][INFO] - {"epoch": 11, "update": 10.103, "loss": "2.101", "ppl": "4.29", "wps": "397289", "ups": "3.35", "wpb": "118501", "bsz": "256", "num_updates": "520000", "lr": "4.84848e-05", "gnorm": "2.319", "loss_scale": "2", "train_wall": "59", "gb_free": "27.2", "wall": "155567"} +[2022-08-01 06:03:38,854][train_inner][INFO] - {"epoch": 11, "update": 10.107, "loss": "2.097", "ppl": "4.28", "wps": "397485", "ups": "3.34", "wpb": "119037", "bsz": "256", "num_updates": "520200", "lr": "4.84646e-05", "gnorm": "2.234", "loss_scale": "2", "train_wall": "60", "gb_free": "23.5", "wall": "155627"} +[2022-08-01 06:04:37,978][train_inner][INFO] - {"epoch": 11, "update": 10.111, "loss": "2.106", "ppl": "4.3", "wps": "400670", "ups": "3.38", "wpb": "118445", "bsz": "256", "num_updates": "520400", "lr": "4.84444e-05", "gnorm": "2.105", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "155686"} +[2022-08-01 06:04:56,135][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 06:05:37,702][train_inner][INFO] - {"epoch": 11, "update": 10.115, "loss": "2.103", "ppl": "4.29", "wps": "395186", "ups": "3.35", "wpb": "118011", "bsz": "256", "num_updates": "520600", "lr": "4.84242e-05", "gnorm": "2.325", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "155746"} +[2022-08-01 06:06:36,908][train_inner][INFO] - {"epoch": 11, "update": 10.119, "loss": "2.103", "ppl": "4.3", "wps": "400285", "ups": "3.38", "wpb": "118494", "bsz": "256", "num_updates": "520800", "lr": "4.8404e-05", "gnorm": "2.022", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "155805"} +[2022-08-01 06:07:36,017][train_inner][INFO] - {"epoch": 11, "update": 10.123, "loss": "2.102", "ppl": "4.29", "wps": "399518", "ups": "3.38", "wpb": "118076", "bsz": "256", "num_updates": "521000", "lr": "4.83838e-05", "gnorm": "2.283", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "155864"} +[2022-08-01 06:08:35,297][train_inner][INFO] - {"epoch": 11, "update": 10.127, "loss": "2.106", "ppl": "4.31", "wps": "398324", "ups": "3.37", "wpb": "118063", "bsz": "256", "num_updates": "521200", "lr": "4.83636e-05", "gnorm": "2.072", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "155924"} +[2022-08-01 06:09:34,956][train_inner][INFO] - {"epoch": 11, "update": 10.13, "loss": "2.101", "ppl": "4.29", "wps": "398246", "ups": "3.35", "wpb": "118793", "bsz": "256", "num_updates": "521400", "lr": "4.83434e-05", "gnorm": "2.238", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "155983"} +[2022-08-01 06:10:34,299][train_inner][INFO] - {"epoch": 11, "update": 10.134, "loss": "2.106", "ppl": "4.3", "wps": "399018", "ups": "3.37", "wpb": "118395", "bsz": "256", "num_updates": "521600", "lr": "4.83232e-05", "gnorm": "2.196", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "156043"} +[2022-08-01 06:11:33,432][train_inner][INFO] - {"epoch": 11, "update": 10.138, "loss": "2.103", "ppl": "4.3", "wps": "401033", "ups": "3.38", "wpb": "118571", "bsz": "256", "num_updates": "521800", "lr": "4.8303e-05", "gnorm": "2.088", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "156102"} +[2022-08-01 06:12:32,389][train_inner][INFO] - {"epoch": 11, "update": 10.142, "loss": "2.101", "ppl": "4.29", "wps": "401280", "ups": "3.39", "wpb": "118291", "bsz": "256", "num_updates": "522000", "lr": "4.82828e-05", "gnorm": "2.47", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "156161"} +[2022-08-01 06:13:31,789][train_inner][INFO] - {"epoch": 11, "update": 10.146, "loss": "2.099", "ppl": "4.28", "wps": "398163", "ups": "3.37", "wpb": "118253", "bsz": "256", "num_updates": "522200", "lr": "4.82626e-05", "gnorm": "2.182", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "156220"} +[2022-08-01 06:14:31,502][train_inner][INFO] - {"epoch": 11, "update": 10.15, "loss": "2.106", "ppl": "4.3", "wps": "395810", "ups": "3.35", "wpb": "118174", "bsz": "256", "num_updates": "522400", "lr": "4.82424e-05", "gnorm": "2.209", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "156280"} +[2022-08-01 06:15:09,792][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 06:15:31,197][train_inner][INFO] - {"epoch": 11, "update": 10.154, "loss": "2.098", "ppl": "4.28", "wps": "397315", "ups": "3.35", "wpb": "118588", "bsz": "256", "num_updates": "522600", "lr": "4.82222e-05", "gnorm": "2.129", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "156340"} +[2022-08-01 06:16:30,820][train_inner][INFO] - {"epoch": 11, "update": 10.158, "loss": "2.102", "ppl": "4.29", "wps": "398220", "ups": "3.35", "wpb": "118714", "bsz": "256", "num_updates": "522800", "lr": "4.8202e-05", "gnorm": "2.256", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "156399"} +[2022-08-01 06:17:30,575][train_inner][INFO] - {"epoch": 11, "update": 10.162, "loss": "2.101", "ppl": "4.29", "wps": "396068", "ups": "3.35", "wpb": "118335", "bsz": "256", "num_updates": "523000", "lr": "4.81818e-05", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "156459"} +[2022-08-01 06:18:31,130][train_inner][INFO] - {"epoch": 11, "update": 10.165, "loss": "2.099", "ppl": "4.28", "wps": "393980", "ups": "3.3", "wpb": "119287", "bsz": "256", "num_updates": "523200", "lr": "4.81616e-05", "gnorm": "2.338", "loss_scale": "2", "train_wall": "60", "gb_free": "28.1", "wall": "156520"} +[2022-08-01 06:19:31,060][train_inner][INFO] - {"epoch": 11, "update": 10.169, "loss": "2.099", "ppl": "4.28", "wps": "396521", "ups": "3.34", "wpb": "118816", "bsz": "256", "num_updates": "523400", "lr": "4.81414e-05", "gnorm": "2.1", "loss_scale": "2", "train_wall": "60", "gb_free": "22", "wall": "156579"} +[2022-08-01 06:20:30,551][train_inner][INFO] - {"epoch": 11, "update": 10.173, "loss": "2.101", "ppl": "4.29", "wps": "396412", "ups": "3.36", "wpb": "117914", "bsz": "256", "num_updates": "523600", "lr": "4.81212e-05", "gnorm": "2.14", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "156639"} +[2022-08-01 06:21:30,395][train_inner][INFO] - {"epoch": 11, "update": 10.177, "loss": "2.096", "ppl": "4.27", "wps": "398761", "ups": "3.34", "wpb": "119316", "bsz": "256", "num_updates": "523800", "lr": "4.8101e-05", "gnorm": "2.219", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "156699"} +[2022-08-01 06:22:30,109][train_inner][INFO] - {"epoch": 11, "update": 10.181, "loss": "2.103", "ppl": "4.3", "wps": "395934", "ups": "3.35", "wpb": "118214", "bsz": "256", "num_updates": "524000", "lr": "4.80808e-05", "gnorm": "2.235", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "156759"} +[2022-08-01 06:23:29,310][train_inner][INFO] - {"epoch": 11, "update": 10.185, "loss": "2.099", "ppl": "4.28", "wps": "400995", "ups": "3.38", "wpb": "118695", "bsz": "256", "num_updates": "524200", "lr": "4.80606e-05", "gnorm": "2.044", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "156818"} +[2022-08-01 06:24:28,862][train_inner][INFO] - {"epoch": 11, "update": 10.189, "loss": "2.108", "ppl": "4.31", "wps": "396074", "ups": "3.36", "wpb": "117936", "bsz": "256", "num_updates": "524400", "lr": "4.80404e-05", "gnorm": "2.428", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "156877"} +[2022-08-01 06:25:28,385][train_inner][INFO] - {"epoch": 11, "update": 10.193, "loss": "2.107", "ppl": "4.31", "wps": "396681", "ups": "3.36", "wpb": "118056", "bsz": "256", "num_updates": "524600", "lr": "4.80202e-05", "gnorm": "2.249", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "156937"} +[2022-08-01 06:25:59,950][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 06:26:27,894][train_inner][INFO] - {"epoch": 11, "update": 10.197, "loss": "2.109", "ppl": "4.31", "wps": "396374", "ups": "3.36", "wpb": "117938", "bsz": "256", "num_updates": "524800", "lr": "4.8e-05", "gnorm": "2.164", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "156996"} +[2022-08-01 06:27:27,646][train_inner][INFO] - {"epoch": 11, "update": 10.2, "loss": "2.104", "ppl": "4.3", "wps": "395325", "ups": "3.35", "wpb": "118108", "bsz": "256", "num_updates": "525000", "lr": "4.79798e-05", "gnorm": "2.149", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "157056"} +[2022-08-01 06:28:27,071][train_inner][INFO] - {"epoch": 11, "update": 10.204, "loss": "2.104", "ppl": "4.3", "wps": "399783", "ups": "3.37", "wpb": "118785", "bsz": "256", "num_updates": "525200", "lr": "4.79596e-05", "gnorm": "2.196", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "157116"} +[2022-08-01 06:29:26,474][train_inner][INFO] - {"epoch": 11, "update": 10.208, "loss": "2.099", "ppl": "4.28", "wps": "398466", "ups": "3.37", "wpb": "118349", "bsz": "256", "num_updates": "525400", "lr": "4.79394e-05", "gnorm": "2.291", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "157175"} +[2022-08-01 06:30:25,804][train_inner][INFO] - {"epoch": 11, "update": 10.212, "loss": "2.098", "ppl": "4.28", "wps": "398666", "ups": "3.37", "wpb": "118264", "bsz": "256", "num_updates": "525600", "lr": "4.79192e-05", "gnorm": "2.232", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "157234"} +[2022-08-01 06:31:25,350][train_inner][INFO] - {"epoch": 11, "update": 10.216, "loss": "2.094", "ppl": "4.27", "wps": "398770", "ups": "3.36", "wpb": "118724", "bsz": "256", "num_updates": "525800", "lr": "4.7899e-05", "gnorm": "2.181", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "157294"} +[2022-08-01 06:32:25,112][train_inner][INFO] - {"epoch": 11, "update": 10.22, "loss": "2.1", "ppl": "4.29", "wps": "398744", "ups": "3.35", "wpb": "119149", "bsz": "256", "num_updates": "526000", "lr": "4.78788e-05", "gnorm": "2.129", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "157354"} +[2022-08-01 06:33:24,639][train_inner][INFO] - {"epoch": 11, "update": 10.224, "loss": "2.101", "ppl": "4.29", "wps": "398652", "ups": "3.36", "wpb": "118651", "bsz": "256", "num_updates": "526200", "lr": "4.78586e-05", "gnorm": "2.379", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "157413"} +[2022-08-01 06:34:24,182][train_inner][INFO] - {"epoch": 11, "update": 10.228, "loss": "2.107", "ppl": "4.31", "wps": "397191", "ups": "3.36", "wpb": "118249", "bsz": "256", "num_updates": "526400", "lr": "4.78384e-05", "gnorm": "2.251", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "157473"} +[2022-08-01 06:35:24,117][train_inner][INFO] - {"epoch": 11, "update": 10.231, "loss": "2.107", "ppl": "4.31", "wps": "394276", "ups": "3.34", "wpb": "118154", "bsz": "256", "num_updates": "526600", "lr": "4.78182e-05", "gnorm": "2.239", "loss_scale": "2", "train_wall": "60", "gb_free": "21.5", "wall": "157533"} +[2022-08-01 06:36:23,455][train_inner][INFO] - {"epoch": 11, "update": 10.235, "loss": "2.106", "ppl": "4.31", "wps": "396210", "ups": "3.37", "wpb": "117552", "bsz": "256", "num_updates": "526800", "lr": "4.7798e-05", "gnorm": "2.235", "loss_scale": "4", "train_wall": "59", "gb_free": "26.3", "wall": "157592"} +[2022-08-01 06:37:08,079][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 06:37:22,958][train_inner][INFO] - {"epoch": 11, "update": 10.239, "loss": "2.1", "ppl": "4.29", "wps": "396813", "ups": "3.36", "wpb": "118056", "bsz": "256", "num_updates": "527000", "lr": "4.77778e-05", "gnorm": "2.236", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "157651"} +[2022-08-01 06:38:22,664][train_inner][INFO] - {"epoch": 11, "update": 10.243, "loss": "2.105", "ppl": "4.3", "wps": "396848", "ups": "3.35", "wpb": "118470", "bsz": "256", "num_updates": "527200", "lr": "4.77576e-05", "gnorm": "2.141", "loss_scale": "2", "train_wall": "59", "gb_free": "26.8", "wall": "157711"} +[2022-08-01 06:39:21,994][train_inner][INFO] - {"epoch": 11, "update": 10.247, "loss": "2.102", "ppl": "4.29", "wps": "397640", "ups": "3.37", "wpb": "117960", "bsz": "256", "num_updates": "527400", "lr": "4.77374e-05", "gnorm": "2.233", "loss_scale": "2", "train_wall": "59", "gb_free": "26.9", "wall": "157770"} +[2022-08-01 06:40:21,622][train_inner][INFO] - {"epoch": 11, "update": 10.251, "loss": "2.093", "ppl": "4.27", "wps": "398769", "ups": "3.35", "wpb": "118887", "bsz": "256", "num_updates": "527600", "lr": "4.77172e-05", "gnorm": "2.113", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "157830"} +[2022-08-01 06:41:21,590][train_inner][INFO] - {"epoch": 11, "update": 10.255, "loss": "2.101", "ppl": "4.29", "wps": "395744", "ups": "3.34", "wpb": "118660", "bsz": "256", "num_updates": "527800", "lr": "4.7697e-05", "gnorm": "2.262", "loss_scale": "2", "train_wall": "60", "gb_free": "22.6", "wall": "157890"} +[2022-08-01 06:42:21,043][train_inner][INFO] - {"epoch": 11, "update": 10.259, "loss": "2.094", "ppl": "4.27", "wps": "399376", "ups": "3.36", "wpb": "118720", "bsz": "256", "num_updates": "528000", "lr": "4.76768e-05", "gnorm": "2.139", "loss_scale": "2", "train_wall": "59", "gb_free": "28.8", "wall": "157949"} +[2022-08-01 06:43:20,394][train_inner][INFO] - {"epoch": 11, "update": 10.263, "loss": "2.102", "ppl": "4.29", "wps": "398461", "ups": "3.37", "wpb": "118243", "bsz": "256", "num_updates": "528200", "lr": "4.76566e-05", "gnorm": "2.095", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "158009"} +[2022-08-01 06:44:19,653][train_inner][INFO] - {"epoch": 11, "update": 10.266, "loss": "2.096", "ppl": "4.28", "wps": "400290", "ups": "3.38", "wpb": "118604", "bsz": "256", "num_updates": "528400", "lr": "4.76364e-05", "gnorm": "2.142", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "158068"} +[2022-08-01 06:45:19,250][train_inner][INFO] - {"epoch": 11, "update": 10.27, "loss": "2.108", "ppl": "4.31", "wps": "396574", "ups": "3.36", "wpb": "118172", "bsz": "256", "num_updates": "528600", "lr": "4.76162e-05", "gnorm": "2.291", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "158128"} +[2022-08-01 06:46:18,769][train_inner][INFO] - {"epoch": 11, "update": 10.274, "loss": "2.103", "ppl": "4.3", "wps": "396843", "ups": "3.36", "wpb": "118098", "bsz": "256", "num_updates": "528800", "lr": "4.7596e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "158187"} +[2022-08-01 06:47:18,145][train_inner][INFO] - {"epoch": 11, "update": 10.278, "loss": "2.097", "ppl": "4.28", "wps": "397519", "ups": "3.37", "wpb": "118015", "bsz": "256", "num_updates": "529000", "lr": "4.75758e-05", "gnorm": "2.412", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "158247"} +[2022-08-01 06:47:32,285][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 06:48:18,001][train_inner][INFO] - {"epoch": 11, "update": 10.282, "loss": "2.097", "ppl": "4.28", "wps": "395704", "ups": "3.34", "wpb": "118425", "bsz": "256", "num_updates": "529200", "lr": "4.75556e-05", "gnorm": "2.217", "loss_scale": "2", "train_wall": "60", "gb_free": "23", "wall": "158306"} +[2022-08-01 06:49:17,686][train_inner][INFO] - {"epoch": 11, "update": 10.286, "loss": "2.098", "ppl": "4.28", "wps": "396874", "ups": "3.35", "wpb": "118436", "bsz": "256", "num_updates": "529400", "lr": "4.75354e-05", "gnorm": "2.236", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "158366"} +[2022-08-01 06:50:17,283][train_inner][INFO] - {"epoch": 11, "update": 10.29, "loss": "2.1", "ppl": "4.29", "wps": "396215", "ups": "3.36", "wpb": "118065", "bsz": "256", "num_updates": "529600", "lr": "4.75152e-05", "gnorm": "2.158", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "158426"} +[2022-08-01 06:51:16,943][train_inner][INFO] - {"epoch": 11, "update": 10.294, "loss": "2.105", "ppl": "4.3", "wps": "395886", "ups": "3.35", "wpb": "118092", "bsz": "256", "num_updates": "529800", "lr": "4.74949e-05", "gnorm": "2.2", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "158485"} +[2022-08-01 06:52:16,373][train_inner][INFO] - {"epoch": 11, "update": 10.298, "loss": "2.098", "ppl": "4.28", "wps": "398530", "ups": "3.37", "wpb": "118423", "bsz": "256", "num_updates": "530000", "lr": "4.74747e-05", "gnorm": "2.367", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "158545"} +[2022-08-01 06:53:15,839][train_inner][INFO] - {"epoch": 11, "update": 10.301, "loss": "2.094", "ppl": "4.27", "wps": "399904", "ups": "3.36", "wpb": "118902", "bsz": "256", "num_updates": "530200", "lr": "4.74545e-05", "gnorm": "2.338", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "158604"} +[2022-08-01 06:54:15,496][train_inner][INFO] - {"epoch": 11, "update": 10.305, "loss": "2.096", "ppl": "4.28", "wps": "395713", "ups": "3.35", "wpb": "118035", "bsz": "256", "num_updates": "530400", "lr": "4.74343e-05", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "158664"} +[2022-08-01 06:55:15,192][train_inner][INFO] - {"epoch": 11, "update": 10.309, "loss": "2.106", "ppl": "4.3", "wps": "393415", "ups": "3.35", "wpb": "117425", "bsz": "256", "num_updates": "530600", "lr": "4.74141e-05", "gnorm": "2.466", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "158724"} +[2022-08-01 06:56:14,514][train_inner][INFO] - {"epoch": 11, "update": 10.313, "loss": "2.094", "ppl": "4.27", "wps": "400641", "ups": "3.37", "wpb": "118834", "bsz": "256", "num_updates": "530800", "lr": "4.73939e-05", "gnorm": "2.2", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "158783"} +[2022-08-01 06:57:14,264][train_inner][INFO] - {"epoch": 11, "update": 10.317, "loss": "2.101", "ppl": "4.29", "wps": "395128", "ups": "3.35", "wpb": "118043", "bsz": "256", "num_updates": "531000", "lr": "4.73737e-05", "gnorm": "2.334", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "158843"} +[2022-08-01 06:57:57,556][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 06:58:15,378][train_inner][INFO] - {"epoch": 11, "update": 10.321, "loss": "2.1", "ppl": "4.29", "wps": "387241", "ups": "3.27", "wpb": "118329", "bsz": "256", "num_updates": "531200", "lr": "4.73535e-05", "gnorm": "2.366", "loss_scale": "2", "train_wall": "61", "gb_free": "22", "wall": "158904"} +[2022-08-01 06:59:14,954][train_inner][INFO] - {"epoch": 11, "update": 10.325, "loss": "2.099", "ppl": "4.28", "wps": "396254", "ups": "3.36", "wpb": "118035", "bsz": "256", "num_updates": "531400", "lr": "4.73333e-05", "gnorm": "2.497", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "158963"} +[2022-08-01 07:00:14,525][train_inner][INFO] - {"epoch": 11, "update": 10.329, "loss": "2.101", "ppl": "4.29", "wps": "396696", "ups": "3.36", "wpb": "118157", "bsz": "256", "num_updates": "531600", "lr": "4.73131e-05", "gnorm": "2.114", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "159023"} +[2022-08-01 07:01:13,871][train_inner][INFO] - {"epoch": 11, "update": 10.333, "loss": "2.103", "ppl": "4.3", "wps": "397767", "ups": "3.37", "wpb": "118028", "bsz": "256", "num_updates": "531800", "lr": "4.72929e-05", "gnorm": "2.253", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "159082"} +[2022-08-01 07:02:13,312][train_inner][INFO] - {"epoch": 11, "update": 10.336, "loss": "2.098", "ppl": "4.28", "wps": "399538", "ups": "3.36", "wpb": "118745", "bsz": "256", "num_updates": "532000", "lr": "4.72727e-05", "gnorm": "2.44", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "159142"} +[2022-08-01 07:03:12,903][train_inner][INFO] - {"epoch": 11, "update": 10.34, "loss": "2.097", "ppl": "4.28", "wps": "397776", "ups": "3.36", "wpb": "118519", "bsz": "256", "num_updates": "532200", "lr": "4.72525e-05", "gnorm": "2.288", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "159201"} +[2022-08-01 07:04:12,157][train_inner][INFO] - {"epoch": 11, "update": 10.344, "loss": "2.099", "ppl": "4.28", "wps": "398842", "ups": "3.38", "wpb": "118164", "bsz": "256", "num_updates": "532400", "lr": "4.72323e-05", "gnorm": "2.349", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "159261"} +[2022-08-01 07:05:11,411][train_inner][INFO] - {"epoch": 11, "update": 10.348, "loss": "2.101", "ppl": "4.29", "wps": "399276", "ups": "3.38", "wpb": "118293", "bsz": "256", "num_updates": "532600", "lr": "4.72121e-05", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "27.1", "wall": "159320"} +[2022-08-01 07:06:10,640][train_inner][INFO] - {"epoch": 11, "update": 10.352, "loss": "2.103", "ppl": "4.3", "wps": "398423", "ups": "3.38", "wpb": "117990", "bsz": "256", "num_updates": "532800", "lr": "4.71919e-05", "gnorm": "2.253", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "159379"} +[2022-08-01 07:07:10,206][train_inner][INFO] - {"epoch": 11, "update": 10.356, "loss": "2.101", "ppl": "4.29", "wps": "397329", "ups": "3.36", "wpb": "118336", "bsz": "256", "num_updates": "533000", "lr": "4.71717e-05", "gnorm": "2.322", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "159439"} +[2022-08-01 07:08:09,713][train_inner][INFO] - {"epoch": 11, "update": 10.36, "loss": "2.095", "ppl": "4.27", "wps": "400714", "ups": "3.36", "wpb": "119226", "bsz": "256", "num_updates": "533200", "lr": "4.71515e-05", "gnorm": "2.289", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "159498"} +[2022-08-01 07:08:59,888][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 07:09:10,664][train_inner][INFO] - {"epoch": 11, "update": 10.364, "loss": "2.094", "ppl": "4.27", "wps": "391161", "ups": "3.28", "wpb": "119207", "bsz": "256", "num_updates": "533400", "lr": "4.71313e-05", "gnorm": "2.118", "loss_scale": "2", "train_wall": "61", "gb_free": "27.9", "wall": "159559"} +[2022-08-01 07:10:10,004][train_inner][INFO] - {"epoch": 11, "update": 10.368, "loss": "2.099", "ppl": "4.29", "wps": "398301", "ups": "3.37", "wpb": "118174", "bsz": "256", "num_updates": "533600", "lr": "4.71111e-05", "gnorm": "2.24", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "159618"} +[2022-08-01 07:11:10,425][train_inner][INFO] - {"epoch": 11, "update": 10.371, "loss": "2.097", "ppl": "4.28", "wps": "392002", "ups": "3.31", "wpb": "118427", "bsz": "256", "num_updates": "533800", "lr": "4.70909e-05", "gnorm": "2.209", "loss_scale": "2", "train_wall": "60", "gb_free": "26.4", "wall": "159679"} +[2022-08-01 07:12:09,741][train_inner][INFO] - {"epoch": 11, "update": 10.375, "loss": "2.108", "ppl": "4.31", "wps": "398310", "ups": "3.37", "wpb": "118129", "bsz": "256", "num_updates": "534000", "lr": "4.70707e-05", "gnorm": "2.414", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "159738"} +[2022-08-01 07:13:09,084][train_inner][INFO] - {"epoch": 11, "update": 10.379, "loss": "2.1", "ppl": "4.29", "wps": "397067", "ups": "3.37", "wpb": "117814", "bsz": "256", "num_updates": "534200", "lr": "4.70505e-05", "gnorm": "2.212", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "159798"} +[2022-08-01 07:14:08,729][train_inner][INFO] - {"epoch": 11, "update": 10.383, "loss": "2.096", "ppl": "4.27", "wps": "397041", "ups": "3.35", "wpb": "118407", "bsz": "256", "num_updates": "534400", "lr": "4.70303e-05", "gnorm": "2.294", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "159857"} +[2022-08-01 07:15:08,383][train_inner][INFO] - {"epoch": 11, "update": 10.387, "loss": "2.104", "ppl": "4.3", "wps": "396896", "ups": "3.35", "wpb": "118381", "bsz": "256", "num_updates": "534600", "lr": "4.70101e-05", "gnorm": "2.248", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "159917"} +[2022-08-01 07:16:07,814][train_inner][INFO] - {"epoch": 11, "update": 10.391, "loss": "2.097", "ppl": "4.28", "wps": "395951", "ups": "3.37", "wpb": "117659", "bsz": "256", "num_updates": "534800", "lr": "4.69899e-05", "gnorm": "2.148", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "159976"} +[2022-08-01 07:17:07,492][train_inner][INFO] - {"epoch": 11, "update": 10.395, "loss": "2.103", "ppl": "4.29", "wps": "395332", "ups": "3.35", "wpb": "117963", "bsz": "256", "num_updates": "535000", "lr": "4.69697e-05", "gnorm": "2.199", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "160036"} +[2022-08-01 07:18:07,017][train_inner][INFO] - {"epoch": 11, "update": 10.399, "loss": "2.098", "ppl": "4.28", "wps": "397565", "ups": "3.36", "wpb": "118324", "bsz": "256", "num_updates": "535200", "lr": "4.69495e-05", "gnorm": "2.296", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "160095"} +[2022-08-01 07:19:06,644][train_inner][INFO] - {"epoch": 11, "update": 10.402, "loss": "2.099", "ppl": "4.28", "wps": "396110", "ups": "3.35", "wpb": "118094", "bsz": "256", "num_updates": "535400", "lr": "4.69293e-05", "gnorm": "2.207", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "160155"} +[2022-08-01 07:19:22,061][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 07:20:06,364][train_inner][INFO] - {"epoch": 11, "update": 10.406, "loss": "2.098", "ppl": "4.28", "wps": "395928", "ups": "3.35", "wpb": "118223", "bsz": "256", "num_updates": "535600", "lr": "4.69091e-05", "gnorm": "2.43", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "160215"} +[2022-08-01 07:21:05,833][train_inner][INFO] - {"epoch": 11, "update": 10.41, "loss": "2.097", "ppl": "4.28", "wps": "398525", "ups": "3.36", "wpb": "118497", "bsz": "256", "num_updates": "535800", "lr": "4.68889e-05", "gnorm": "2.229", "loss_scale": "2", "train_wall": "59", "gb_free": "27.6", "wall": "160274"} +[2022-08-01 07:22:05,130][train_inner][INFO] - {"epoch": 11, "update": 10.414, "loss": "2.1", "ppl": "4.29", "wps": "399046", "ups": "3.37", "wpb": "118311", "bsz": "256", "num_updates": "536000", "lr": "4.68687e-05", "gnorm": "2.229", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "160334"} +[2022-08-01 07:23:04,952][train_inner][INFO] - {"epoch": 11, "update": 10.418, "loss": "2.096", "ppl": "4.28", "wps": "396043", "ups": "3.34", "wpb": "118460", "bsz": "256", "num_updates": "536200", "lr": "4.68485e-05", "gnorm": "2.244", "loss_scale": "2", "train_wall": "59", "gb_free": "27.6", "wall": "160393"} +[2022-08-01 07:24:04,440][train_inner][INFO] - {"epoch": 11, "update": 10.422, "loss": "2.098", "ppl": "4.28", "wps": "398968", "ups": "3.36", "wpb": "118667", "bsz": "255.9", "num_updates": "536400", "lr": "4.68283e-05", "gnorm": "2.254", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "160453"} +[2022-08-01 07:25:03,739][train_inner][INFO] - {"epoch": 11, "update": 10.426, "loss": "2.101", "ppl": "4.29", "wps": "398008", "ups": "3.37", "wpb": "118007", "bsz": "256", "num_updates": "536600", "lr": "4.68081e-05", "gnorm": "2.24", "loss_scale": "2", "train_wall": "59", "gb_free": "25.5", "wall": "160512"} +[2022-08-01 07:26:03,373][train_inner][INFO] - {"epoch": 11, "update": 10.43, "loss": "2.101", "ppl": "4.29", "wps": "396289", "ups": "3.35", "wpb": "118161", "bsz": "256", "num_updates": "536800", "lr": "4.67879e-05", "gnorm": "2.155", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "160572"} +[2022-08-01 07:27:02,770][train_inner][INFO] - {"epoch": 11, "update": 10.434, "loss": "2.105", "ppl": "4.3", "wps": "397252", "ups": "3.37", "wpb": "117978", "bsz": "256", "num_updates": "537000", "lr": "4.67677e-05", "gnorm": "2.434", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "160631"} +[2022-08-01 07:28:02,128][train_inner][INFO] - {"epoch": 11, "update": 10.437, "loss": "2.097", "ppl": "4.28", "wps": "396968", "ups": "3.37", "wpb": "117816", "bsz": "256", "num_updates": "537200", "lr": "4.67475e-05", "gnorm": "2.193", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "160691"} +[2022-08-01 07:29:01,545][train_inner][INFO] - {"epoch": 11, "update": 10.441, "loss": "2.095", "ppl": "4.27", "wps": "396764", "ups": "3.37", "wpb": "117871", "bsz": "256", "num_updates": "537400", "lr": "4.67273e-05", "gnorm": "2.278", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "160750"} +[2022-08-01 07:29:31,643][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 07:30:01,735][train_inner][INFO] - {"epoch": 11, "update": 10.445, "loss": "2.1", "ppl": "4.29", "wps": "394658", "ups": "3.32", "wpb": "118772", "bsz": "256", "num_updates": "537600", "lr": "4.67071e-05", "gnorm": "2.19", "loss_scale": "2", "train_wall": "60", "gb_free": "21.5", "wall": "160810"} +[2022-08-01 07:31:01,648][train_inner][INFO] - {"epoch": 11, "update": 10.449, "loss": "2.097", "ppl": "4.28", "wps": "395053", "ups": "3.34", "wpb": "118342", "bsz": "256", "num_updates": "537800", "lr": "4.66869e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "160870"} +[2022-08-01 07:32:01,340][train_inner][INFO] - {"epoch": 11, "update": 10.453, "loss": "2.094", "ppl": "4.27", "wps": "396559", "ups": "3.35", "wpb": "118357", "bsz": "256", "num_updates": "538000", "lr": "4.66667e-05", "gnorm": "2.186", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "160930"} +[2022-08-01 07:33:00,640][train_inner][INFO] - {"epoch": 11, "update": 10.457, "loss": "2.108", "ppl": "4.31", "wps": "397181", "ups": "3.37", "wpb": "117763", "bsz": "256", "num_updates": "538200", "lr": "4.66465e-05", "gnorm": "2.201", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "160989"} +[2022-08-01 07:34:00,198][train_inner][INFO] - {"epoch": 11, "update": 10.461, "loss": "2.103", "ppl": "4.3", "wps": "396512", "ups": "3.36", "wpb": "118077", "bsz": "256", "num_updates": "538400", "lr": "4.66263e-05", "gnorm": "2.214", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "161049"} +[2022-08-01 07:34:59,905][train_inner][INFO] - {"epoch": 11, "update": 10.465, "loss": "2.101", "ppl": "4.29", "wps": "393964", "ups": "3.35", "wpb": "117611", "bsz": "256", "num_updates": "538600", "lr": "4.66061e-05", "gnorm": "2.241", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "161108"} +[2022-08-01 07:35:59,473][train_inner][INFO] - {"epoch": 11, "update": 10.469, "loss": "2.095", "ppl": "4.27", "wps": "398133", "ups": "3.36", "wpb": "118579", "bsz": "256", "num_updates": "538800", "lr": "4.65859e-05", "gnorm": "2.419", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "161168"} +[2022-08-01 07:36:59,488][train_inner][INFO] - {"epoch": 11, "update": 10.472, "loss": "2.094", "ppl": "4.27", "wps": "395272", "ups": "3.33", "wpb": "118610", "bsz": "256", "num_updates": "539000", "lr": "4.65657e-05", "gnorm": "2.143", "loss_scale": "2", "train_wall": "60", "gb_free": "21.9", "wall": "161228"} +[2022-08-01 07:37:59,005][train_inner][INFO] - {"epoch": 11, "update": 10.476, "loss": "2.097", "ppl": "4.28", "wps": "396437", "ups": "3.36", "wpb": "117974", "bsz": "256", "num_updates": "539200", "lr": "4.65455e-05", "gnorm": "2.227", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "161287"} +[2022-08-01 07:38:58,474][train_inner][INFO] - {"epoch": 11, "update": 10.48, "loss": "2.099", "ppl": "4.28", "wps": "398185", "ups": "3.36", "wpb": "118397", "bsz": "256", "num_updates": "539400", "lr": "4.65253e-05", "gnorm": "2.434", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "161347"} +[2022-08-01 07:39:43,413][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 07:39:58,256][train_inner][INFO] - {"epoch": 11, "update": 10.484, "loss": "2.097", "ppl": "4.28", "wps": "396236", "ups": "3.35", "wpb": "118439", "bsz": "256", "num_updates": "539600", "lr": "4.65051e-05", "gnorm": "2.532", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "161407"} +[2022-08-01 07:40:57,647][train_inner][INFO] - {"epoch": 11, "update": 10.488, "loss": "2.102", "ppl": "4.29", "wps": "397722", "ups": "3.37", "wpb": "118103", "bsz": "256", "num_updates": "539800", "lr": "4.64848e-05", "gnorm": "2.193", "loss_scale": "2", "train_wall": "59", "gb_free": "26.2", "wall": "161466"} +[2022-08-01 07:41:56,996][train_inner][INFO] - {"epoch": 11, "update": 10.492, "loss": "2.094", "ppl": "4.27", "wps": "398120", "ups": "3.37", "wpb": "118141", "bsz": "256", "num_updates": "540000", "lr": "4.64646e-05", "gnorm": "2.455", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "161525"} +[2022-08-01 07:42:56,467][train_inner][INFO] - {"epoch": 11, "update": 10.496, "loss": "2.093", "ppl": "4.27", "wps": "399358", "ups": "3.36", "wpb": "118750", "bsz": "256", "num_updates": "540200", "lr": "4.64444e-05", "gnorm": "2.305", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "161585"} +[2022-08-01 07:43:55,711][train_inner][INFO] - {"epoch": 11, "update": 10.5, "loss": "2.095", "ppl": "4.27", "wps": "398039", "ups": "3.38", "wpb": "117907", "bsz": "256", "num_updates": "540400", "lr": "4.64242e-05", "gnorm": "2.172", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "161644"} +[2022-08-01 07:44:55,298][train_inner][INFO] - {"epoch": 11, "update": 10.504, "loss": "2.104", "ppl": "4.3", "wps": "397333", "ups": "3.36", "wpb": "118377", "bsz": "256", "num_updates": "540600", "lr": "4.6404e-05", "gnorm": "2.421", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "161704"} +[2022-08-01 07:45:55,127][train_inner][INFO] - {"epoch": 11, "update": 10.507, "loss": "2.094", "ppl": "4.27", "wps": "393627", "ups": "3.34", "wpb": "117752", "bsz": "256", "num_updates": "540800", "lr": "4.63838e-05", "gnorm": "2.381", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "161764"} +[2022-08-01 07:46:54,578][train_inner][INFO] - {"epoch": 11, "update": 10.511, "loss": "2.097", "ppl": "4.28", "wps": "400426", "ups": "3.36", "wpb": "119027", "bsz": "256", "num_updates": "541000", "lr": "4.63636e-05", "gnorm": "2.34", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "161823"} +[2022-08-01 07:47:54,043][train_inner][INFO] - {"epoch": 11, "update": 10.515, "loss": "2.09", "ppl": "4.26", "wps": "396919", "ups": "3.36", "wpb": "118013", "bsz": "256", "num_updates": "541200", "lr": "4.63434e-05", "gnorm": "2.251", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "161882"} +[2022-08-01 07:48:53,490][train_inner][INFO] - {"epoch": 11, "update": 10.519, "loss": "2.095", "ppl": "4.27", "wps": "396506", "ups": "3.36", "wpb": "117855", "bsz": "256", "num_updates": "541400", "lr": "4.63232e-05", "gnorm": "2.185", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "161942"} +[2022-08-01 07:49:52,818][train_inner][INFO] - {"epoch": 11, "update": 10.523, "loss": "2.089", "ppl": "4.25", "wps": "401407", "ups": "3.37", "wpb": "119073", "bsz": "256", "num_updates": "541600", "lr": "4.6303e-05", "gnorm": "2.466", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "162001"} +[2022-08-01 07:50:26,307][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 07:50:52,722][train_inner][INFO] - {"epoch": 11, "update": 10.527, "loss": "2.099", "ppl": "4.29", "wps": "393309", "ups": "3.34", "wpb": "117803", "bsz": "256", "num_updates": "541800", "lr": "4.62828e-05", "gnorm": "2.403", "loss_scale": "2", "train_wall": "60", "gb_free": "23.2", "wall": "162061"} +[2022-08-01 07:51:52,178][train_inner][INFO] - {"epoch": 11, "update": 10.531, "loss": "2.099", "ppl": "4.28", "wps": "398096", "ups": "3.36", "wpb": "118344", "bsz": "256", "num_updates": "542000", "lr": "4.62626e-05", "gnorm": "2.051", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "162121"} +[2022-08-01 07:52:52,004][train_inner][INFO] - {"epoch": 11, "update": 10.535, "loss": "2.095", "ppl": "4.27", "wps": "395242", "ups": "3.34", "wpb": "118229", "bsz": "256", "num_updates": "542200", "lr": "4.62424e-05", "gnorm": "2.422", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "162180"} +[2022-08-01 07:53:51,384][train_inner][INFO] - {"epoch": 11, "update": 10.539, "loss": "2.098", "ppl": "4.28", "wps": "400176", "ups": "3.37", "wpb": "118812", "bsz": "256", "num_updates": "542400", "lr": "4.62222e-05", "gnorm": "2.337", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "162240"} +[2022-08-01 07:54:50,997][train_inner][INFO] - {"epoch": 11, "update": 10.542, "loss": "2.095", "ppl": "4.27", "wps": "395826", "ups": "3.36", "wpb": "117980", "bsz": "256", "num_updates": "542600", "lr": "4.6202e-05", "gnorm": "2.341", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "162299"} +[2022-08-01 07:55:50,676][train_inner][INFO] - {"epoch": 11, "update": 10.546, "loss": "2.09", "ppl": "4.26", "wps": "397569", "ups": "3.35", "wpb": "118632", "bsz": "256", "num_updates": "542800", "lr": "4.61818e-05", "gnorm": "2.257", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "162359"} +[2022-08-01 07:56:49,832][train_inner][INFO] - {"epoch": 11, "update": 10.55, "loss": "2.1", "ppl": "4.29", "wps": "398862", "ups": "3.38", "wpb": "117975", "bsz": "256", "num_updates": "543000", "lr": "4.61616e-05", "gnorm": "2.467", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "162418"} +[2022-08-01 07:57:49,616][train_inner][INFO] - {"epoch": 11, "update": 10.554, "loss": "2.099", "ppl": "4.28", "wps": "394791", "ups": "3.35", "wpb": "118010", "bsz": "256", "num_updates": "543200", "lr": "4.61414e-05", "gnorm": "2.317", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "162478"} +[2022-08-01 07:58:49,056][train_inner][INFO] - {"epoch": 11, "update": 10.558, "loss": "2.095", "ppl": "4.27", "wps": "396811", "ups": "3.36", "wpb": "117931", "bsz": "256", "num_updates": "543400", "lr": "4.61212e-05", "gnorm": "2.435", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "162537"} +[2022-08-01 07:59:48,323][train_inner][INFO] - {"epoch": 11, "update": 10.562, "loss": "2.096", "ppl": "4.27", "wps": "398898", "ups": "3.37", "wpb": "118207", "bsz": "256", "num_updates": "543600", "lr": "4.6101e-05", "gnorm": "2.263", "loss_scale": "2", "train_wall": "59", "gb_free": "25.5", "wall": "162597"} +[2022-08-01 08:00:41,114][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 08:00:48,196][train_inner][INFO] - {"epoch": 11, "update": 10.566, "loss": "2.094", "ppl": "4.27", "wps": "395024", "ups": "3.34", "wpb": "118255", "bsz": "256", "num_updates": "543800", "lr": "4.60808e-05", "gnorm": "2.313", "loss_scale": "2", "train_wall": "60", "gb_free": "21.5", "wall": "162657"} +[2022-08-01 08:01:47,453][train_inner][INFO] - {"epoch": 11, "update": 10.57, "loss": "2.092", "ppl": "4.26", "wps": "398479", "ups": "3.38", "wpb": "118062", "bsz": "256", "num_updates": "544000", "lr": "4.60606e-05", "gnorm": "2.275", "loss_scale": "2", "train_wall": "59", "gb_free": "28", "wall": "162716"} +[2022-08-01 08:02:47,279][train_inner][INFO] - {"epoch": 11, "update": 10.573, "loss": "2.097", "ppl": "4.28", "wps": "396776", "ups": "3.34", "wpb": "118688", "bsz": "256", "num_updates": "544200", "lr": "4.60404e-05", "gnorm": "2.461", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "162776"} +[2022-08-01 08:03:46,846][train_inner][INFO] - {"epoch": 11, "update": 10.577, "loss": "2.092", "ppl": "4.26", "wps": "398032", "ups": "3.36", "wpb": "118546", "bsz": "256", "num_updates": "544400", "lr": "4.60202e-05", "gnorm": "2.476", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "162835"} +[2022-08-01 08:04:46,273][train_inner][INFO] - {"epoch": 11, "update": 10.581, "loss": "2.094", "ppl": "4.27", "wps": "399187", "ups": "3.37", "wpb": "118612", "bsz": "256", "num_updates": "544600", "lr": "4.6e-05", "gnorm": "2.441", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "162895"} +[2022-08-01 08:05:45,967][train_inner][INFO] - {"epoch": 11, "update": 10.585, "loss": "2.09", "ppl": "4.26", "wps": "395823", "ups": "3.35", "wpb": "118142", "bsz": "256", "num_updates": "544800", "lr": "4.59798e-05", "gnorm": "2.326", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "162954"} +[2022-08-01 08:06:45,394][train_inner][INFO] - {"epoch": 11, "update": 10.589, "loss": "2.095", "ppl": "4.27", "wps": "399475", "ups": "3.37", "wpb": "118697", "bsz": "256", "num_updates": "545000", "lr": "4.59596e-05", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "163014"} +[2022-08-01 08:07:45,100][train_inner][INFO] - {"epoch": 11, "update": 10.593, "loss": "2.092", "ppl": "4.26", "wps": "397683", "ups": "3.35", "wpb": "118720", "bsz": "256", "num_updates": "545200", "lr": "4.59394e-05", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "163074"} +[2022-08-01 08:08:44,959][train_inner][INFO] - {"epoch": 11, "update": 10.597, "loss": "2.088", "ppl": "4.25", "wps": "397391", "ups": "3.34", "wpb": "118935", "bsz": "256", "num_updates": "545400", "lr": "4.59192e-05", "gnorm": "2.367", "loss_scale": "2", "train_wall": "60", "gb_free": "22.3", "wall": "163133"} +[2022-08-01 08:09:44,107][train_inner][INFO] - {"epoch": 11, "update": 10.601, "loss": "2.094", "ppl": "4.27", "wps": "402159", "ups": "3.38", "wpb": "118934", "bsz": "256", "num_updates": "545600", "lr": "4.5899e-05", "gnorm": "2.463", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "163193"} +[2022-08-01 08:10:43,676][train_inner][INFO] - {"epoch": 11, "update": 10.605, "loss": "2.098", "ppl": "4.28", "wps": "398173", "ups": "3.36", "wpb": "118594", "bsz": "256", "num_updates": "545800", "lr": "4.58788e-05", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "163252"} +[2022-08-01 08:11:18,387][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 08:11:43,861][train_inner][INFO] - {"epoch": 11, "update": 10.608, "loss": "2.092", "ppl": "4.26", "wps": "395184", "ups": "3.32", "wpb": "118920", "bsz": "256", "num_updates": "546000", "lr": "4.58586e-05", "gnorm": "2.213", "loss_scale": "2", "train_wall": "60", "gb_free": "26.7", "wall": "163312"} +[2022-08-01 08:12:43,355][train_inner][INFO] - {"epoch": 11, "update": 10.612, "loss": "2.088", "ppl": "4.25", "wps": "399013", "ups": "3.36", "wpb": "118693", "bsz": "256", "num_updates": "546200", "lr": "4.58384e-05", "gnorm": "2.287", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "163372"} +[2022-08-01 08:13:42,936][train_inner][INFO] - {"epoch": 11, "update": 10.616, "loss": "2.096", "ppl": "4.28", "wps": "395726", "ups": "3.36", "wpb": "117888", "bsz": "256", "num_updates": "546400", "lr": "4.58182e-05", "gnorm": "2.284", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "163431"} +[2022-08-01 08:14:42,389][train_inner][INFO] - {"epoch": 11, "update": 10.62, "loss": "2.103", "ppl": "4.29", "wps": "395611", "ups": "3.36", "wpb": "117600", "bsz": "256", "num_updates": "546600", "lr": "4.5798e-05", "gnorm": "2.35", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "163491"} +[2022-08-01 08:15:41,968][train_inner][INFO] - {"epoch": 11, "update": 10.624, "loss": "2.095", "ppl": "4.27", "wps": "397188", "ups": "3.36", "wpb": "118319", "bsz": "256", "num_updates": "546800", "lr": "4.57778e-05", "gnorm": "2.441", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "163550"} +[2022-08-01 08:16:41,412][train_inner][INFO] - {"epoch": 11, "update": 10.628, "loss": "2.099", "ppl": "4.28", "wps": "398095", "ups": "3.36", "wpb": "118322", "bsz": "256", "num_updates": "547000", "lr": "4.57576e-05", "gnorm": "2.33", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "163610"} +[2022-08-01 08:17:40,793][train_inner][INFO] - {"epoch": 11, "update": 10.632, "loss": "2.091", "ppl": "4.26", "wps": "397495", "ups": "3.37", "wpb": "118017", "bsz": "256", "num_updates": "547200", "lr": "4.57374e-05", "gnorm": "2.532", "loss_scale": "2", "train_wall": "59", "gb_free": "28.8", "wall": "163669"} +[2022-08-01 08:18:40,440][train_inner][INFO] - {"epoch": 11, "update": 10.636, "loss": "2.094", "ppl": "4.27", "wps": "394961", "ups": "3.35", "wpb": "117790", "bsz": "256", "num_updates": "547400", "lr": "4.57172e-05", "gnorm": "2.308", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "163729"} +[2022-08-01 08:19:39,598][train_inner][INFO] - {"epoch": 11, "update": 10.64, "loss": "2.092", "ppl": "4.26", "wps": "400041", "ups": "3.38", "wpb": "118328", "bsz": "256", "num_updates": "547600", "lr": "4.5697e-05", "gnorm": "2.31", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "163788"} +[2022-08-01 08:20:38,803][train_inner][INFO] - {"epoch": 11, "update": 10.643, "loss": "2.095", "ppl": "4.27", "wps": "400851", "ups": "3.38", "wpb": "118660", "bsz": "256", "num_updates": "547800", "lr": "4.56768e-05", "gnorm": "2.467", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "163847"} +[2022-08-01 08:21:38,654][train_inner][INFO] - {"epoch": 11, "update": 10.647, "loss": "2.09", "ppl": "4.26", "wps": "394692", "ups": "3.34", "wpb": "118112", "bsz": "256", "num_updates": "548000", "lr": "4.56566e-05", "gnorm": "2.292", "loss_scale": "4", "train_wall": "60", "gb_free": "22.8", "wall": "163907"} +[2022-08-01 08:21:43,899][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 08:22:38,022][train_inner][INFO] - {"epoch": 11, "update": 10.651, "loss": "2.096", "ppl": "4.28", "wps": "397856", "ups": "3.37", "wpb": "118099", "bsz": "256", "num_updates": "548200", "lr": "4.56364e-05", "gnorm": "2.639", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "163966"} +[2022-08-01 08:23:37,605][train_inner][INFO] - {"epoch": 11, "update": 10.655, "loss": "2.102", "ppl": "4.29", "wps": "395582", "ups": "3.36", "wpb": "117850", "bsz": "256", "num_updates": "548400", "lr": "4.56162e-05", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "164026"} +[2022-08-01 08:24:37,083][train_inner][INFO] - {"epoch": 11, "update": 10.659, "loss": "2.09", "ppl": "4.26", "wps": "398712", "ups": "3.36", "wpb": "118571", "bsz": "256", "num_updates": "548600", "lr": "4.5596e-05", "gnorm": "2.351", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "164086"} +[2022-08-01 08:25:36,432][train_inner][INFO] - {"epoch": 11, "update": 10.663, "loss": "2.09", "ppl": "4.26", "wps": "399527", "ups": "3.37", "wpb": "118557", "bsz": "256", "num_updates": "548800", "lr": "4.55758e-05", "gnorm": "2.492", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "164145"} +[2022-08-01 08:26:35,705][train_inner][INFO] - {"epoch": 11, "update": 10.667, "loss": "2.099", "ppl": "4.29", "wps": "397682", "ups": "3.37", "wpb": "117858", "bsz": "256", "num_updates": "549000", "lr": "4.55556e-05", "gnorm": "2.42", "loss_scale": "2", "train_wall": "59", "gb_free": "27.5", "wall": "164204"} +[2022-08-01 08:27:35,489][train_inner][INFO] - {"epoch": 11, "update": 10.671, "loss": "2.099", "ppl": "4.28", "wps": "395123", "ups": "3.35", "wpb": "118110", "bsz": "256", "num_updates": "549200", "lr": "4.55354e-05", "gnorm": "2.184", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "164264"} +[2022-08-01 08:28:34,844][train_inner][INFO] - {"epoch": 11, "update": 10.675, "loss": "2.092", "ppl": "4.26", "wps": "398314", "ups": "3.37", "wpb": "118210", "bsz": "256", "num_updates": "549400", "lr": "4.55152e-05", "gnorm": "2.489", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "164323"} +[2022-08-01 08:29:34,695][train_inner][INFO] - {"epoch": 11, "update": 10.678, "loss": "2.094", "ppl": "4.27", "wps": "396832", "ups": "3.34", "wpb": "118753", "bsz": "256", "num_updates": "549600", "lr": "4.54949e-05", "gnorm": "2.293", "loss_scale": "2", "train_wall": "60", "gb_free": "22", "wall": "164383"} +[2022-08-01 08:30:34,201][train_inner][INFO] - {"epoch": 11, "update": 10.682, "loss": "2.088", "ppl": "4.25", "wps": "396297", "ups": "3.36", "wpb": "117909", "bsz": "256", "num_updates": "549800", "lr": "4.54747e-05", "gnorm": "2.248", "loss_scale": "2", "train_wall": "59", "gb_free": "27.5", "wall": "164443"} +[2022-08-01 08:31:33,393][train_inner][INFO] - {"epoch": 11, "update": 10.686, "loss": "2.091", "ppl": "4.26", "wps": "399997", "ups": "3.38", "wpb": "118382", "bsz": "256", "num_updates": "550000", "lr": "4.54545e-05", "gnorm": "2.321", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "164502"} +[2022-08-01 08:31:33,394][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 08:31:56,155][valid][INFO] - {"epoch": 11, "valid_loss": "1.979", "valid_ppl": "3.94", "valid_wps": "1.59362e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "550000", "valid_best_loss": "1.979"} +[2022-08-01 08:31:56,158][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 11 @ 550000 updates +[2022-08-01 08:31:56,159][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_11_550000.pt +[2022-08-01 08:32:02,530][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_11_550000.pt +[2022-08-01 08:32:23,549][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_11_550000.pt (epoch 11 @ 550000 updates, score 1.979) (writing took 27.390695843845606 seconds) +[2022-08-01 08:32:56,469][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 08:33:23,410][train_inner][INFO] - {"epoch": 11, "update": 10.69, "loss": "2.093", "ppl": "4.27", "wps": "214991", "ups": "1.82", "wpb": "118262", "bsz": "256", "num_updates": "550200", "lr": "4.54343e-05", "gnorm": "2.259", "loss_scale": "2", "train_wall": "60", "gb_free": "22.6", "wall": "164612"} +[2022-08-01 08:34:22,913][train_inner][INFO] - {"epoch": 11, "update": 10.694, "loss": "2.095", "ppl": "4.27", "wps": "397315", "ups": "3.36", "wpb": "118206", "bsz": "256", "num_updates": "550400", "lr": "4.54141e-05", "gnorm": "2.324", "loss_scale": "2", "train_wall": "59", "gb_free": "27.6", "wall": "164671"} +[2022-08-01 08:35:22,470][train_inner][INFO] - {"epoch": 11, "update": 10.698, "loss": "2.097", "ppl": "4.28", "wps": "396990", "ups": "3.36", "wpb": "118218", "bsz": "256", "num_updates": "550600", "lr": "4.53939e-05", "gnorm": "2.337", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "164731"} +[2022-08-01 08:36:21,562][train_inner][INFO] - {"epoch": 11, "update": 10.702, "loss": "2.1", "ppl": "4.29", "wps": "399367", "ups": "3.38", "wpb": "117996", "bsz": "255.9", "num_updates": "550800", "lr": "4.53737e-05", "gnorm": "2.462", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "164790"} +[2022-08-01 08:37:21,161][train_inner][INFO] - {"epoch": 11, "update": 10.706, "loss": "2.092", "ppl": "4.26", "wps": "398327", "ups": "3.36", "wpb": "118699", "bsz": "256", "num_updates": "551000", "lr": "4.53535e-05", "gnorm": "2.412", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "164850"} +[2022-08-01 08:38:20,644][train_inner][INFO] - {"epoch": 11, "update": 10.71, "loss": "2.097", "ppl": "4.28", "wps": "395853", "ups": "3.36", "wpb": "117732", "bsz": "256", "num_updates": "551200", "lr": "4.53333e-05", "gnorm": "2.235", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "164909"} +[2022-08-01 08:39:20,089][train_inner][INFO] - {"epoch": 11, "update": 10.713, "loss": "2.092", "ppl": "4.26", "wps": "398022", "ups": "3.36", "wpb": "118301", "bsz": "256", "num_updates": "551400", "lr": "4.53131e-05", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "164969"} +[2022-08-01 08:40:19,523][train_inner][INFO] - {"epoch": 11, "update": 10.717, "loss": "2.094", "ppl": "4.27", "wps": "397185", "ups": "3.37", "wpb": "118030", "bsz": "256", "num_updates": "551600", "lr": "4.52929e-05", "gnorm": "2.549", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "165028"} +[2022-08-01 08:41:19,239][train_inner][INFO] - {"epoch": 11, "update": 10.721, "loss": "2.09", "ppl": "4.26", "wps": "395370", "ups": "3.35", "wpb": "118049", "bsz": "256", "num_updates": "551800", "lr": "4.52727e-05", "gnorm": "2.377", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "165088"} +[2022-08-01 08:42:18,607][train_inner][INFO] - {"epoch": 11, "update": 10.725, "loss": "2.087", "ppl": "4.25", "wps": "400320", "ups": "3.37", "wpb": "118831", "bsz": "256", "num_updates": "552000", "lr": "4.52525e-05", "gnorm": "2.614", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "165147"} +[2022-08-01 08:43:18,322][train_inner][INFO] - {"epoch": 11, "update": 10.729, "loss": "2.093", "ppl": "4.27", "wps": "396356", "ups": "3.35", "wpb": "118342", "bsz": "256", "num_updates": "552200", "lr": "4.52323e-05", "gnorm": "2.394", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "165207"} +[2022-08-01 08:43:51,356][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 08:44:18,157][train_inner][INFO] - {"epoch": 11, "update": 10.733, "loss": "2.092", "ppl": "4.26", "wps": "397294", "ups": "3.34", "wpb": "118860", "bsz": "256", "num_updates": "552400", "lr": "4.52121e-05", "gnorm": "2.214", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "165267"} +[2022-08-01 08:45:17,758][train_inner][INFO] - {"epoch": 11, "update": 10.737, "loss": "2.092", "ppl": "4.26", "wps": "397745", "ups": "3.36", "wpb": "118529", "bsz": "256", "num_updates": "552600", "lr": "4.51919e-05", "gnorm": "2.248", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "165326"} +[2022-08-01 08:46:17,500][train_inner][INFO] - {"epoch": 11, "update": 10.741, "loss": "2.091", "ppl": "4.26", "wps": "394726", "ups": "3.35", "wpb": "117908", "bsz": "256", "num_updates": "552800", "lr": "4.51717e-05", "gnorm": "2.413", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "165386"} +[2022-08-01 08:47:17,193][train_inner][INFO] - {"epoch": 11, "update": 10.744, "loss": "2.101", "ppl": "4.29", "wps": "394987", "ups": "3.35", "wpb": "117888", "bsz": "256", "num_updates": "553000", "lr": "4.51515e-05", "gnorm": "2.385", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "165446"} +[2022-08-01 08:48:16,457][train_inner][INFO] - {"epoch": 11, "update": 10.748, "loss": "2.091", "ppl": "4.26", "wps": "397838", "ups": "3.37", "wpb": "117886", "bsz": "256", "num_updates": "553200", "lr": "4.51313e-05", "gnorm": "2.239", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "165505"} +[2022-08-01 08:49:15,551][train_inner][INFO] - {"epoch": 11, "update": 10.752, "loss": "2.093", "ppl": "4.27", "wps": "399683", "ups": "3.38", "wpb": "118093", "bsz": "256", "num_updates": "553400", "lr": "4.51111e-05", "gnorm": "2.321", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "165564"} +[2022-08-01 08:50:14,886][train_inner][INFO] - {"epoch": 11, "update": 10.756, "loss": "2.091", "ppl": "4.26", "wps": "399022", "ups": "3.37", "wpb": "118379", "bsz": "256", "num_updates": "553600", "lr": "4.50909e-05", "gnorm": "2.15", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "165623"} +[2022-08-01 08:51:14,443][train_inner][INFO] - {"epoch": 11, "update": 10.76, "loss": "2.09", "ppl": "4.26", "wps": "398797", "ups": "3.36", "wpb": "118755", "bsz": "256", "num_updates": "553800", "lr": "4.50707e-05", "gnorm": "2.35", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "165683"} +[2022-08-01 08:52:13,696][train_inner][INFO] - {"epoch": 11, "update": 10.764, "loss": "2.093", "ppl": "4.27", "wps": "397130", "ups": "3.38", "wpb": "117656", "bsz": "256", "num_updates": "554000", "lr": "4.50505e-05", "gnorm": "2.451", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "165742"} +[2022-08-01 08:53:12,945][train_inner][INFO] - {"epoch": 11, "update": 10.768, "loss": "2.102", "ppl": "4.29", "wps": "398596", "ups": "3.38", "wpb": "118081", "bsz": "256", "num_updates": "554200", "lr": "4.50303e-05", "gnorm": "2.366", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "165801"} +[2022-08-01 08:54:12,321][train_inner][INFO] - {"epoch": 11, "update": 10.772, "loss": "2.092", "ppl": "4.26", "wps": "397684", "ups": "3.37", "wpb": "118064", "bsz": "256", "num_updates": "554400", "lr": "4.50101e-05", "gnorm": "2.605", "loss_scale": "4", "train_wall": "59", "gb_free": "25.7", "wall": "165861"} +[2022-08-01 08:54:14,024][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 08:55:11,628][train_inner][INFO] - {"epoch": 11, "update": 10.776, "loss": "2.093", "ppl": "4.27", "wps": "399635", "ups": "3.37", "wpb": "118504", "bsz": "256", "num_updates": "554600", "lr": "4.49899e-05", "gnorm": "2.402", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "165920"} +[2022-08-01 08:56:10,818][train_inner][INFO] - {"epoch": 11, "update": 10.779, "loss": "2.095", "ppl": "4.27", "wps": "398253", "ups": "3.38", "wpb": "117864", "bsz": "256", "num_updates": "554800", "lr": "4.49697e-05", "gnorm": "2.489", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "165979"} +[2022-08-01 08:57:10,546][train_inner][INFO] - {"epoch": 11, "update": 10.783, "loss": "2.093", "ppl": "4.27", "wps": "398005", "ups": "3.35", "wpb": "118858", "bsz": "256", "num_updates": "555000", "lr": "4.49495e-05", "gnorm": "2.335", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "166039"} +[2022-08-01 08:58:09,727][train_inner][INFO] - {"epoch": 11, "update": 10.787, "loss": "2.086", "ppl": "4.25", "wps": "401429", "ups": "3.38", "wpb": "118786", "bsz": "256", "num_updates": "555200", "lr": "4.49293e-05", "gnorm": "2.556", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "166098"} +[2022-08-01 08:59:09,886][train_inner][INFO] - {"epoch": 11, "update": 10.791, "loss": "2.087", "ppl": "4.25", "wps": "393519", "ups": "3.32", "wpb": "118366", "bsz": "256", "num_updates": "555400", "lr": "4.49091e-05", "gnorm": "2.489", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "166158"} +[2022-08-01 09:00:09,423][train_inner][INFO] - {"epoch": 11, "update": 10.795, "loss": "2.085", "ppl": "4.24", "wps": "398568", "ups": "3.36", "wpb": "118647", "bsz": "256", "num_updates": "555600", "lr": "4.48889e-05", "gnorm": "2.101", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "166218"} +[2022-08-01 09:01:08,566][train_inner][INFO] - {"epoch": 11, "update": 10.799, "loss": "2.089", "ppl": "4.25", "wps": "399791", "ups": "3.38", "wpb": "118224", "bsz": "256", "num_updates": "555800", "lr": "4.48687e-05", "gnorm": "2.369", "loss_scale": "2", "train_wall": "59", "gb_free": "27.5", "wall": "166277"} +[2022-08-01 09:02:07,733][train_inner][INFO] - {"epoch": 11, "update": 10.803, "loss": "2.092", "ppl": "4.26", "wps": "398081", "ups": "3.38", "wpb": "117766", "bsz": "256", "num_updates": "556000", "lr": "4.48485e-05", "gnorm": "2.406", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "166336"} +[2022-08-01 09:03:07,157][train_inner][INFO] - {"epoch": 11, "update": 10.807, "loss": "2.089", "ppl": "4.26", "wps": "398114", "ups": "3.37", "wpb": "118287", "bsz": "256", "num_updates": "556200", "lr": "4.48283e-05", "gnorm": "2.446", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "166396"} +[2022-08-01 09:04:06,918][train_inner][INFO] - {"epoch": 11, "update": 10.811, "loss": "2.084", "ppl": "4.24", "wps": "399419", "ups": "3.35", "wpb": "119347", "bsz": "256", "num_updates": "556400", "lr": "4.48081e-05", "gnorm": "2.453", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "166455"} +[2022-08-01 09:05:05,913][train_inner][INFO] - {"epoch": 11, "update": 10.814, "loss": "2.091", "ppl": "4.26", "wps": "400420", "ups": "3.39", "wpb": "118114", "bsz": "256", "num_updates": "556600", "lr": "4.47879e-05", "gnorm": "2.244", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "166514"} +[2022-08-01 09:05:13,106][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 09:06:05,731][train_inner][INFO] - {"epoch": 11, "update": 10.818, "loss": "2.092", "ppl": "4.26", "wps": "392966", "ups": "3.34", "wpb": "117530", "bsz": "256", "num_updates": "556800", "lr": "4.47677e-05", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "166574"} +[2022-08-01 09:07:05,134][train_inner][INFO] - {"epoch": 11, "update": 10.822, "loss": "2.084", "ppl": "4.24", "wps": "399341", "ups": "3.37", "wpb": "118610", "bsz": "256", "num_updates": "557000", "lr": "4.47475e-05", "gnorm": "2.461", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "166634"} +[2022-08-01 09:08:04,332][train_inner][INFO] - {"epoch": 11, "update": 10.826, "loss": "2.1", "ppl": "4.29", "wps": "398185", "ups": "3.38", "wpb": "117857", "bsz": "256", "num_updates": "557200", "lr": "4.47273e-05", "gnorm": "2.354", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "166693"} +[2022-08-01 09:09:03,564][train_inner][INFO] - {"epoch": 11, "update": 10.83, "loss": "2.093", "ppl": "4.27", "wps": "398765", "ups": "3.38", "wpb": "118098", "bsz": "256", "num_updates": "557400", "lr": "4.47071e-05", "gnorm": "2.343", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "166752"} +[2022-08-01 09:10:03,308][train_inner][INFO] - {"epoch": 11, "update": 10.834, "loss": "2.084", "ppl": "4.24", "wps": "397473", "ups": "3.35", "wpb": "118733", "bsz": "256", "num_updates": "557600", "lr": "4.46869e-05", "gnorm": "2.202", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "166812"} +[2022-08-01 09:11:02,604][train_inner][INFO] - {"epoch": 11, "update": 10.838, "loss": "2.086", "ppl": "4.25", "wps": "399555", "ups": "3.37", "wpb": "118460", "bsz": "256", "num_updates": "557800", "lr": "4.46667e-05", "gnorm": "2.436", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "166871"} +[2022-08-01 09:12:01,813][train_inner][INFO] - {"epoch": 11, "update": 10.842, "loss": "2.089", "ppl": "4.25", "wps": "399680", "ups": "3.38", "wpb": "118322", "bsz": "256", "num_updates": "558000", "lr": "4.46465e-05", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "166930"} +[2022-08-01 09:13:00,583][train_inner][INFO] - {"epoch": 11, "update": 10.846, "loss": "2.094", "ppl": "4.27", "wps": "400687", "ups": "3.4", "wpb": "117742", "bsz": "256", "num_updates": "558200", "lr": "4.46263e-05", "gnorm": "2.209", "loss_scale": "2", "train_wall": "58", "gb_free": "30.3", "wall": "166989"} +[2022-08-01 09:14:00,229][train_inner][INFO] - {"epoch": 11, "update": 10.849, "loss": "2.09", "ppl": "4.26", "wps": "395825", "ups": "3.35", "wpb": "118046", "bsz": "256", "num_updates": "558400", "lr": "4.46061e-05", "gnorm": "2.552", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "167049"} +[2022-08-01 09:14:59,650][train_inner][INFO] - {"epoch": 11, "update": 10.853, "loss": "2.088", "ppl": "4.25", "wps": "401837", "ups": "3.37", "wpb": "119388", "bsz": "256", "num_updates": "558600", "lr": "4.45859e-05", "gnorm": "2.498", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "167108"} +[2022-08-01 09:15:55,198][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 09:15:59,064][train_inner][INFO] - {"epoch": 11, "update": 10.857, "loss": "2.095", "ppl": "4.27", "wps": "396246", "ups": "3.37", "wpb": "117711", "bsz": "256", "num_updates": "558800", "lr": "4.45657e-05", "gnorm": "2.31", "loss_scale": "2", "train_wall": "59", "gb_free": "28.3", "wall": "167167"} +[2022-08-01 09:16:58,754][train_inner][INFO] - {"epoch": 11, "update": 10.861, "loss": "2.084", "ppl": "4.24", "wps": "396678", "ups": "3.35", "wpb": "118388", "bsz": "256", "num_updates": "559000", "lr": "4.45455e-05", "gnorm": "2.343", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "167227"} +[2022-08-01 09:17:59,709][train_inner][INFO] - {"epoch": 11, "update": 10.865, "loss": "2.091", "ppl": "4.26", "wps": "387514", "ups": "3.28", "wpb": "118105", "bsz": "256", "num_updates": "559200", "lr": "4.45253e-05", "gnorm": "2.496", "loss_scale": "2", "train_wall": "61", "gb_free": "23.6", "wall": "167288"} +[2022-08-01 09:18:59,189][train_inner][INFO] - {"epoch": 11, "update": 10.869, "loss": "2.09", "ppl": "4.26", "wps": "396204", "ups": "3.36", "wpb": "117831", "bsz": "256", "num_updates": "559400", "lr": "4.45051e-05", "gnorm": "2.458", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "167348"} +[2022-08-01 09:19:58,742][train_inner][INFO] - {"epoch": 11, "update": 10.873, "loss": "2.095", "ppl": "4.27", "wps": "395936", "ups": "3.36", "wpb": "117894", "bsz": "256", "num_updates": "559600", "lr": "4.44848e-05", "gnorm": "2.49", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "167407"} +[2022-08-01 09:20:58,209][train_inner][INFO] - {"epoch": 11, "update": 10.877, "loss": "2.086", "ppl": "4.25", "wps": "400387", "ups": "3.36", "wpb": "119048", "bsz": "256", "num_updates": "559800", "lr": "4.44646e-05", "gnorm": "2.354", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "167467"} +[2022-08-01 09:21:59,146][train_inner][INFO] - {"epoch": 11, "update": 10.88, "loss": "2.092", "ppl": "4.26", "wps": "389668", "ups": "3.28", "wpb": "118727", "bsz": "256", "num_updates": "560000", "lr": "4.44444e-05", "gnorm": "2.355", "loss_scale": "2", "train_wall": "61", "gb_free": "24.7", "wall": "167528"} +[2022-08-01 09:22:58,828][train_inner][INFO] - {"epoch": 11, "update": 10.884, "loss": "2.085", "ppl": "4.24", "wps": "397783", "ups": "3.35", "wpb": "118701", "bsz": "256", "num_updates": "560200", "lr": "4.44242e-05", "gnorm": "2.314", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "167587"} +[2022-08-01 09:23:58,162][train_inner][INFO] - {"epoch": 11, "update": 10.888, "loss": "2.085", "ppl": "4.24", "wps": "400150", "ups": "3.37", "wpb": "118713", "bsz": "256", "num_updates": "560400", "lr": "4.4404e-05", "gnorm": "2.482", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "167647"} +[2022-08-01 09:24:57,810][train_inner][INFO] - {"epoch": 11, "update": 10.892, "loss": "2.087", "ppl": "4.25", "wps": "395213", "ups": "3.35", "wpb": "117867", "bsz": "256", "num_updates": "560600", "lr": "4.43838e-05", "gnorm": "2.281", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "167706"} +[2022-08-01 09:25:57,357][train_inner][INFO] - {"epoch": 11, "update": 10.896, "loss": "2.086", "ppl": "4.25", "wps": "398890", "ups": "3.36", "wpb": "118762", "bsz": "256", "num_updates": "560800", "lr": "4.43636e-05", "gnorm": "2.564", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "167766"} +[2022-08-01 09:26:19,480][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 09:26:57,152][train_inner][INFO] - {"epoch": 11, "update": 10.9, "loss": "2.085", "ppl": "4.24", "wps": "396593", "ups": "3.34", "wpb": "118572", "bsz": "256", "num_updates": "561000", "lr": "4.43434e-05", "gnorm": "2.384", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "167826"} +[2022-08-01 09:27:56,446][train_inner][INFO] - {"epoch": 11, "update": 10.904, "loss": "2.091", "ppl": "4.26", "wps": "399267", "ups": "3.37", "wpb": "118368", "bsz": "256", "num_updates": "561200", "lr": "4.43232e-05", "gnorm": "2.334", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "167885"} +[2022-08-01 09:28:55,956][train_inner][INFO] - {"epoch": 11, "update": 10.908, "loss": "2.087", "ppl": "4.25", "wps": "397282", "ups": "3.36", "wpb": "118211", "bsz": "256", "num_updates": "561400", "lr": "4.4303e-05", "gnorm": "2.369", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "167944"} +[2022-08-01 09:29:55,460][train_inner][INFO] - {"epoch": 11, "update": 10.912, "loss": "2.089", "ppl": "4.26", "wps": "396977", "ups": "3.36", "wpb": "118108", "bsz": "256", "num_updates": "561600", "lr": "4.42828e-05", "gnorm": "2.303", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "168004"} +[2022-08-01 09:30:54,743][train_inner][INFO] - {"epoch": 11, "update": 10.915, "loss": "2.088", "ppl": "4.25", "wps": "401226", "ups": "3.37", "wpb": "118928", "bsz": "256", "num_updates": "561800", "lr": "4.42626e-05", "gnorm": "2.481", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "168063"} +[2022-08-01 09:31:53,986][train_inner][INFO] - {"epoch": 11, "update": 10.919, "loss": "2.09", "ppl": "4.26", "wps": "397023", "ups": "3.38", "wpb": "117603", "bsz": "256", "num_updates": "562000", "lr": "4.42424e-05", "gnorm": "2.416", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "168122"} +[2022-08-01 09:32:53,663][train_inner][INFO] - {"epoch": 11, "update": 10.923, "loss": "2.088", "ppl": "4.25", "wps": "396973", "ups": "3.35", "wpb": "118451", "bsz": "256", "num_updates": "562200", "lr": "4.42222e-05", "gnorm": "2.452", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "168182"} +[2022-08-01 09:33:53,545][train_inner][INFO] - {"epoch": 11, "update": 10.927, "loss": "2.087", "ppl": "4.25", "wps": "395071", "ups": "3.34", "wpb": "118288", "bsz": "256", "num_updates": "562400", "lr": "4.4202e-05", "gnorm": "2.402", "loss_scale": "2", "train_wall": "60", "gb_free": "23.1", "wall": "168242"} +[2022-08-01 09:34:53,104][train_inner][INFO] - {"epoch": 11, "update": 10.931, "loss": "2.092", "ppl": "4.26", "wps": "397129", "ups": "3.36", "wpb": "118262", "bsz": "256", "num_updates": "562600", "lr": "4.41818e-05", "gnorm": "2.866", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "168302"} +[2022-08-01 09:35:52,333][train_inner][INFO] - {"epoch": 11, "update": 10.935, "loss": "2.095", "ppl": "4.27", "wps": "399556", "ups": "3.38", "wpb": "118324", "bsz": "256", "num_updates": "562800", "lr": "4.41616e-05", "gnorm": "2.442", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "168361"} +[2022-08-01 09:36:34,645][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 09:36:51,537][train_inner][INFO] - {"epoch": 11, "update": 10.939, "loss": "2.088", "ppl": "4.25", "wps": "397973", "ups": "3.38", "wpb": "117808", "bsz": "256", "num_updates": "563000", "lr": "4.41414e-05", "gnorm": "2.317", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "168420"} +[2022-08-01 09:37:51,323][train_inner][INFO] - {"epoch": 11, "update": 10.943, "loss": "2.092", "ppl": "4.26", "wps": "394242", "ups": "3.35", "wpb": "117851", "bsz": "256", "num_updates": "563200", "lr": "4.41212e-05", "gnorm": "2.181", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "168480"} +[2022-08-01 09:38:50,931][train_inner][INFO] - {"epoch": 11, "update": 10.947, "loss": "2.089", "ppl": "4.26", "wps": "396338", "ups": "3.36", "wpb": "118124", "bsz": "256", "num_updates": "563400", "lr": "4.4101e-05", "gnorm": "2.304", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "168539"} +[2022-08-01 09:39:50,469][train_inner][INFO] - {"epoch": 11, "update": 10.95, "loss": "2.091", "ppl": "4.26", "wps": "396730", "ups": "3.36", "wpb": "118102", "bsz": "256", "num_updates": "563600", "lr": "4.40808e-05", "gnorm": "2.561", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "168599"} +[2022-08-01 09:40:49,916][train_inner][INFO] - {"epoch": 11, "update": 10.954, "loss": "2.097", "ppl": "4.28", "wps": "395681", "ups": "3.36", "wpb": "117609", "bsz": "256", "num_updates": "563800", "lr": "4.40606e-05", "gnorm": "2.304", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "168658"} +[2022-08-01 09:41:49,841][train_inner][INFO] - {"epoch": 11, "update": 10.958, "loss": "2.091", "ppl": "4.26", "wps": "395444", "ups": "3.34", "wpb": "118484", "bsz": "256", "num_updates": "564000", "lr": "4.40404e-05", "gnorm": "2.467", "loss_scale": "2", "train_wall": "60", "gb_free": "22.6", "wall": "168718"} +[2022-08-01 09:42:49,221][train_inner][INFO] - {"epoch": 11, "update": 10.962, "loss": "2.088", "ppl": "4.25", "wps": "397481", "ups": "3.37", "wpb": "118012", "bsz": "256", "num_updates": "564200", "lr": "4.40202e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "168778"} +[2022-08-01 09:43:48,874][train_inner][INFO] - {"epoch": 11, "update": 10.966, "loss": "2.084", "ppl": "4.24", "wps": "397004", "ups": "3.35", "wpb": "118411", "bsz": "256", "num_updates": "564400", "lr": "4.4e-05", "gnorm": "2.676", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "168837"} +[2022-08-01 09:44:48,434][train_inner][INFO] - {"epoch": 11, "update": 10.97, "loss": "2.092", "ppl": "4.26", "wps": "396958", "ups": "3.36", "wpb": "118215", "bsz": "256", "num_updates": "564600", "lr": "4.39798e-05", "gnorm": "2.309", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "168897"} +[2022-08-01 09:45:47,957][train_inner][INFO] - {"epoch": 11, "update": 10.974, "loss": "2.085", "ppl": "4.24", "wps": "398775", "ups": "3.36", "wpb": "118680", "bsz": "256", "num_updates": "564800", "lr": "4.39596e-05", "gnorm": "2.419", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "168956"} +[2022-08-01 09:46:47,219][train_inner][INFO] - {"epoch": 11, "update": 10.978, "loss": "2.096", "ppl": "4.27", "wps": "399159", "ups": "3.37", "wpb": "118273", "bsz": "256", "num_updates": "565000", "lr": "4.39394e-05", "gnorm": "2.36", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "169016"} +[2022-08-01 09:47:09,347][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 09:47:46,978][train_inner][INFO] - {"epoch": 11, "update": 10.982, "loss": "2.09", "ppl": "4.26", "wps": "397768", "ups": "3.35", "wpb": "118851", "bsz": "256", "num_updates": "565200", "lr": "4.39192e-05", "gnorm": "2.408", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "169075"} +[2022-08-01 09:48:45,970][train_inner][INFO] - {"epoch": 11, "update": 10.985, "loss": "2.094", "ppl": "4.27", "wps": "401732", "ups": "3.39", "wpb": "118496", "bsz": "256", "num_updates": "565400", "lr": "4.3899e-05", "gnorm": "2.305", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "169134"} +[2022-08-01 09:49:45,529][train_inner][INFO] - {"epoch": 11, "update": 10.989, "loss": "2.089", "ppl": "4.25", "wps": "398763", "ups": "3.36", "wpb": "118749", "bsz": "256", "num_updates": "565600", "lr": "4.38788e-05", "gnorm": "2.418", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "169194"} +[2022-08-01 09:50:44,203][train_inner][INFO] - {"epoch": 11, "update": 10.993, "loss": "2.095", "ppl": "4.27", "wps": "400479", "ups": "3.41", "wpb": "117488", "bsz": "256", "num_updates": "565800", "lr": "4.38586e-05", "gnorm": "2.316", "loss_scale": "2", "train_wall": "58", "gb_free": "22", "wall": "169253"} +[2022-08-01 09:51:43,377][train_inner][INFO] - {"epoch": 11, "update": 10.997, "loss": "2.093", "ppl": "4.27", "wps": "399597", "ups": "3.38", "wpb": "118227", "bsz": "256", "num_updates": "566000", "lr": "4.38384e-05", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "169312"} +[2022-08-01 09:52:27,673][fairseq_cli.train][INFO] - end of epoch 11 (average epoch stats below) +[2022-08-01 09:52:27,673][train][INFO] - {"epoch": 11, "train_loss": "2.097", "train_ppl": "4.28", "train_wps": "395878", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "566150", "train_lr": "4.38232e-05", "train_gnorm": "2.308", "train_loss_scale": "2", "train_train_wall": "15232", "train_gb_free": "26.5", "train_wall": "169356"} +[2022-08-01 09:52:27,782][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 09:52:27,785][fairseq.trainer][INFO] - begin training epoch 12 +[2022-08-01 09:52:27,785][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 09:52:53,607][train_inner][INFO] - {"epoch": 12, "update": 11.001, "loss": "2.086", "ppl": "4.25", "wps": "335992", "ups": "2.85", "wpb": "117982", "bsz": "255.4", "num_updates": "566200", "lr": "4.38182e-05", "gnorm": "2.46", "loss_scale": "2", "train_wall": "60", "gb_free": "24.1", "wall": "169382"} +[2022-08-01 09:53:52,955][train_inner][INFO] - {"epoch": 12, "update": 11.005, "loss": "2.084", "ppl": "4.24", "wps": "398829", "ups": "3.37", "wpb": "118349", "bsz": "256", "num_updates": "566400", "lr": "4.3798e-05", "gnorm": "2.435", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "169441"} +[2022-08-01 09:54:52,376][train_inner][INFO] - {"epoch": 12, "update": 11.009, "loss": "2.085", "ppl": "4.24", "wps": "396798", "ups": "3.37", "wpb": "117890", "bsz": "256", "num_updates": "566600", "lr": "4.37778e-05", "gnorm": "2.299", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "169501"} +[2022-08-01 09:55:51,820][train_inner][INFO] - {"epoch": 12, "update": 11.013, "loss": "2.086", "ppl": "4.25", "wps": "398224", "ups": "3.36", "wpb": "118358", "bsz": "256", "num_updates": "566800", "lr": "4.37576e-05", "gnorm": "2.247", "loss_scale": "2", "train_wall": "59", "gb_free": "30.1", "wall": "169560"} +[2022-08-01 09:56:51,002][train_inner][INFO] - {"epoch": 12, "update": 11.017, "loss": "2.08", "ppl": "4.23", "wps": "400201", "ups": "3.38", "wpb": "118424", "bsz": "256", "num_updates": "567000", "lr": "4.37374e-05", "gnorm": "2.44", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "169619"} +[2022-08-01 09:57:50,553][train_inner][INFO] - {"epoch": 12, "update": 11.02, "loss": "2.088", "ppl": "4.25", "wps": "397195", "ups": "3.36", "wpb": "118267", "bsz": "256", "num_updates": "567200", "lr": "4.37172e-05", "gnorm": "2.304", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "169679"} +[2022-08-01 09:58:19,349][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 09:58:50,145][train_inner][INFO] - {"epoch": 12, "update": 11.024, "loss": "2.085", "ppl": "4.24", "wps": "395640", "ups": "3.36", "wpb": "117884", "bsz": "256", "num_updates": "567400", "lr": "4.3697e-05", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "26.8", "wall": "169739"} +[2022-08-01 09:59:49,318][train_inner][INFO] - {"epoch": 12, "update": 11.028, "loss": "2.085", "ppl": "4.24", "wps": "399156", "ups": "3.38", "wpb": "118096", "bsz": "256", "num_updates": "567600", "lr": "4.36768e-05", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "169798"} +[2022-08-01 10:00:48,389][train_inner][INFO] - {"epoch": 12, "update": 11.032, "loss": "2.088", "ppl": "4.25", "wps": "398145", "ups": "3.39", "wpb": "117592", "bsz": "256", "num_updates": "567800", "lr": "4.36566e-05", "gnorm": "2.314", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "169857"} +[2022-08-01 10:01:47,792][train_inner][INFO] - {"epoch": 12, "update": 11.036, "loss": "2.087", "ppl": "4.25", "wps": "398062", "ups": "3.37", "wpb": "118230", "bsz": "256", "num_updates": "568000", "lr": "4.36364e-05", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "169916"} +[2022-08-01 10:02:48,201][train_inner][INFO] - {"epoch": 12, "update": 11.04, "loss": "2.087", "ppl": "4.25", "wps": "391144", "ups": "3.31", "wpb": "118141", "bsz": "256", "num_updates": "568200", "lr": "4.36162e-05", "gnorm": "2.307", "loss_scale": "2", "train_wall": "60", "gb_free": "23.6", "wall": "169977"} +[2022-08-01 10:03:47,590][train_inner][INFO] - {"epoch": 12, "update": 11.044, "loss": "2.088", "ppl": "4.25", "wps": "398098", "ups": "3.37", "wpb": "118214", "bsz": "256", "num_updates": "568400", "lr": "4.3596e-05", "gnorm": "2.688", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "170036"} +[2022-08-01 10:04:46,862][train_inner][INFO] - {"epoch": 12, "update": 11.048, "loss": "2.084", "ppl": "4.24", "wps": "400413", "ups": "3.37", "wpb": "118664", "bsz": "256", "num_updates": "568600", "lr": "4.35758e-05", "gnorm": "2.444", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "170095"} +[2022-08-01 10:05:46,498][train_inner][INFO] - {"epoch": 12, "update": 11.051, "loss": "2.084", "ppl": "4.24", "wps": "395186", "ups": "3.35", "wpb": "117837", "bsz": "256", "num_updates": "568800", "lr": "4.35556e-05", "gnorm": "2.635", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "170155"} +[2022-08-01 10:06:46,260][train_inner][INFO] - {"epoch": 12, "update": 11.055, "loss": "2.077", "ppl": "4.22", "wps": "394986", "ups": "3.35", "wpb": "118024", "bsz": "256", "num_updates": "569000", "lr": "4.35354e-05", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "170215"} +[2022-08-01 10:07:45,750][train_inner][INFO] - {"epoch": 12, "update": 11.059, "loss": "2.083", "ppl": "4.24", "wps": "398205", "ups": "3.36", "wpb": "118447", "bsz": "256", "num_updates": "569200", "lr": "4.35152e-05", "gnorm": "2.323", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "170274"} +[2022-08-01 10:08:44,718][train_inner][INFO] - {"epoch": 12, "update": 11.063, "loss": "2.089", "ppl": "4.26", "wps": "401910", "ups": "3.39", "wpb": "118497", "bsz": "256", "num_updates": "569400", "lr": "4.34949e-05", "gnorm": "2.216", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "170333"} +[2022-08-01 10:09:44,068][train_inner][INFO] - {"epoch": 12, "update": 11.067, "loss": "2.082", "ppl": "4.23", "wps": "399869", "ups": "3.37", "wpb": "118660", "bsz": "256", "num_updates": "569600", "lr": "4.34747e-05", "gnorm": "2.213", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "170392"} +[2022-08-01 10:09:46,150][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 10:10:43,579][train_inner][INFO] - {"epoch": 12, "update": 11.071, "loss": "2.086", "ppl": "4.25", "wps": "398693", "ups": "3.36", "wpb": "118632", "bsz": "256", "num_updates": "569800", "lr": "4.34545e-05", "gnorm": "2.659", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "170452"} +[2022-08-01 10:11:43,278][train_inner][INFO] - {"epoch": 12, "update": 11.075, "loss": "2.084", "ppl": "4.24", "wps": "398217", "ups": "3.35", "wpb": "118866", "bsz": "256", "num_updates": "570000", "lr": "4.34343e-05", "gnorm": "2.528", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "170512"} +[2022-08-01 10:12:42,879][train_inner][INFO] - {"epoch": 12, "update": 11.079, "loss": "2.084", "ppl": "4.24", "wps": "397107", "ups": "3.36", "wpb": "118338", "bsz": "256", "num_updates": "570200", "lr": "4.34141e-05", "gnorm": "2.268", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "170571"} +[2022-08-01 10:13:42,719][train_inner][INFO] - {"epoch": 12, "update": 11.083, "loss": "2.08", "ppl": "4.23", "wps": "396609", "ups": "3.34", "wpb": "118665", "bsz": "256", "num_updates": "570400", "lr": "4.33939e-05", "gnorm": "2.338", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "170631"} +[2022-08-01 10:14:41,988][train_inner][INFO] - {"epoch": 12, "update": 11.086, "loss": "2.087", "ppl": "4.25", "wps": "398311", "ups": "3.37", "wpb": "118036", "bsz": "256", "num_updates": "570600", "lr": "4.33737e-05", "gnorm": "2.368", "loss_scale": "2", "train_wall": "59", "gb_free": "25.8", "wall": "170690"} +[2022-08-01 10:15:41,692][train_inner][INFO] - {"epoch": 12, "update": 11.09, "loss": "2.082", "ppl": "4.24", "wps": "397005", "ups": "3.35", "wpb": "118514", "bsz": "256", "num_updates": "570800", "lr": "4.33535e-05", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "170750"} +[2022-08-01 10:16:41,101][train_inner][INFO] - {"epoch": 12, "update": 11.094, "loss": "2.087", "ppl": "4.25", "wps": "394774", "ups": "3.37", "wpb": "117266", "bsz": "256", "num_updates": "571000", "lr": "4.33333e-05", "gnorm": "2.379", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "170810"} +[2022-08-01 10:17:41,810][train_inner][INFO] - {"epoch": 12, "update": 11.098, "loss": "2.086", "ppl": "4.25", "wps": "391066", "ups": "3.29", "wpb": "118705", "bsz": "256", "num_updates": "571200", "lr": "4.33131e-05", "gnorm": "2.442", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "170870"} +[2022-08-01 10:18:41,004][train_inner][INFO] - {"epoch": 12, "update": 11.102, "loss": "2.087", "ppl": "4.25", "wps": "400214", "ups": "3.38", "wpb": "118451", "bsz": "256", "num_updates": "571400", "lr": "4.32929e-05", "gnorm": "2.265", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "170929"} +[2022-08-01 10:19:40,609][train_inner][INFO] - {"epoch": 12, "update": 11.106, "loss": "2.081", "ppl": "4.23", "wps": "397994", "ups": "3.36", "wpb": "118611", "bsz": "256", "num_updates": "571600", "lr": "4.32727e-05", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "170989"} +[2022-08-01 10:20:16,158][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 10:20:40,474][train_inner][INFO] - {"epoch": 12, "update": 11.11, "loss": "2.082", "ppl": "4.23", "wps": "396031", "ups": "3.34", "wpb": "118541", "bsz": "256", "num_updates": "571800", "lr": "4.32525e-05", "gnorm": "2.191", "loss_scale": "2", "train_wall": "60", "gb_free": "23.6", "wall": "171049"} +[2022-08-01 10:21:40,397][train_inner][INFO] - {"epoch": 12, "update": 11.114, "loss": "2.081", "ppl": "4.23", "wps": "396528", "ups": "3.34", "wpb": "118805", "bsz": "256", "num_updates": "572000", "lr": "4.32323e-05", "gnorm": "2.385", "loss_scale": "2", "train_wall": "60", "gb_free": "25", "wall": "171109"} +[2022-08-01 10:22:40,183][train_inner][INFO] - {"epoch": 12, "update": 11.118, "loss": "2.08", "ppl": "4.23", "wps": "394618", "ups": "3.35", "wpb": "117962", "bsz": "256", "num_updates": "572200", "lr": "4.32121e-05", "gnorm": "2.275", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "171169"} +[2022-08-01 10:23:39,722][train_inner][INFO] - {"epoch": 12, "update": 11.121, "loss": "2.081", "ppl": "4.23", "wps": "398463", "ups": "3.36", "wpb": "118621", "bsz": "256", "num_updates": "572400", "lr": "4.31919e-05", "gnorm": "2.271", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "171228"} +[2022-08-01 10:24:38,939][train_inner][INFO] - {"epoch": 12, "update": 11.125, "loss": "2.084", "ppl": "4.24", "wps": "399506", "ups": "3.38", "wpb": "118285", "bsz": "256", "num_updates": "572600", "lr": "4.31717e-05", "gnorm": "2.324", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "171287"} +[2022-08-01 10:25:38,610][train_inner][INFO] - {"epoch": 12, "update": 11.129, "loss": "2.093", "ppl": "4.27", "wps": "398012", "ups": "3.35", "wpb": "118749", "bsz": "256", "num_updates": "572800", "lr": "4.31515e-05", "gnorm": "2.309", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "171347"} +[2022-08-01 10:26:38,142][train_inner][INFO] - {"epoch": 12, "update": 11.133, "loss": "2.086", "ppl": "4.24", "wps": "398501", "ups": "3.36", "wpb": "118616", "bsz": "256", "num_updates": "573000", "lr": "4.31313e-05", "gnorm": "2.375", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "171407"} +[2022-08-01 10:27:37,834][train_inner][INFO] - {"epoch": 12, "update": 11.137, "loss": "2.081", "ppl": "4.23", "wps": "398237", "ups": "3.35", "wpb": "118858", "bsz": "256", "num_updates": "573200", "lr": "4.31111e-05", "gnorm": "2.42", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "171466"} +[2022-08-01 10:28:37,535][train_inner][INFO] - {"epoch": 12, "update": 11.141, "loss": "2.084", "ppl": "4.24", "wps": "396901", "ups": "3.35", "wpb": "118476", "bsz": "256", "num_updates": "573400", "lr": "4.30909e-05", "gnorm": "2.372", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "171526"} +[2022-08-01 10:29:37,728][train_inner][INFO] - {"epoch": 12, "update": 11.145, "loss": "2.078", "ppl": "4.22", "wps": "391884", "ups": "3.32", "wpb": "117944", "bsz": "256", "num_updates": "573600", "lr": "4.30707e-05", "gnorm": "2.358", "loss_scale": "2", "train_wall": "60", "gb_free": "23.9", "wall": "171586"} +[2022-08-01 10:30:28,736][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 10:30:37,486][train_inner][INFO] - {"epoch": 12, "update": 11.149, "loss": "2.084", "ppl": "4.24", "wps": "396964", "ups": "3.35", "wpb": "118608", "bsz": "256", "num_updates": "573800", "lr": "4.30505e-05", "gnorm": "2.49", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "171646"} +[2022-08-01 10:31:37,093][train_inner][INFO] - {"epoch": 12, "update": 11.153, "loss": "2.082", "ppl": "4.24", "wps": "397239", "ups": "3.36", "wpb": "118380", "bsz": "256", "num_updates": "574000", "lr": "4.30303e-05", "gnorm": "2.545", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "171706"} +[2022-08-01 10:32:36,946][train_inner][INFO] - {"epoch": 12, "update": 11.156, "loss": "2.078", "ppl": "4.22", "wps": "394798", "ups": "3.34", "wpb": "118144", "bsz": "256", "num_updates": "574200", "lr": "4.30101e-05", "gnorm": "2.194", "loss_scale": "2", "train_wall": "60", "gb_free": "27.8", "wall": "171765"} +[2022-08-01 10:33:36,422][train_inner][INFO] - {"epoch": 12, "update": 11.16, "loss": "2.079", "ppl": "4.23", "wps": "398051", "ups": "3.36", "wpb": "118372", "bsz": "256", "num_updates": "574400", "lr": "4.29899e-05", "gnorm": "2.325", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "171825"} +[2022-08-01 10:34:36,055][train_inner][INFO] - {"epoch": 12, "update": 11.164, "loss": "2.083", "ppl": "4.24", "wps": "397413", "ups": "3.35", "wpb": "118494", "bsz": "256", "num_updates": "574600", "lr": "4.29697e-05", "gnorm": "2.433", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "171884"} +[2022-08-01 10:35:35,708][train_inner][INFO] - {"epoch": 12, "update": 11.168, "loss": "2.085", "ppl": "4.24", "wps": "396152", "ups": "3.35", "wpb": "118158", "bsz": "256", "num_updates": "574800", "lr": "4.29495e-05", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "171944"} +[2022-08-01 10:36:34,820][train_inner][INFO] - {"epoch": 12, "update": 11.172, "loss": "2.088", "ppl": "4.25", "wps": "398220", "ups": "3.38", "wpb": "117697", "bsz": "256", "num_updates": "575000", "lr": "4.29293e-05", "gnorm": "2.377", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "172003"} +[2022-08-01 10:37:34,378][train_inner][INFO] - {"epoch": 12, "update": 11.176, "loss": "2.077", "ppl": "4.22", "wps": "399409", "ups": "3.36", "wpb": "118938", "bsz": "256", "num_updates": "575200", "lr": "4.29091e-05", "gnorm": "2.218", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "172063"} +[2022-08-01 10:38:33,756][train_inner][INFO] - {"epoch": 12, "update": 11.18, "loss": "2.079", "ppl": "4.23", "wps": "399412", "ups": "3.37", "wpb": "118580", "bsz": "256", "num_updates": "575400", "lr": "4.28889e-05", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "172122"} +[2022-08-01 10:39:33,062][train_inner][INFO] - {"epoch": 12, "update": 11.184, "loss": "2.078", "ppl": "4.22", "wps": "399786", "ups": "3.37", "wpb": "118549", "bsz": "256", "num_updates": "575600", "lr": "4.28687e-05", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "172181"} +[2022-08-01 10:40:32,649][train_inner][INFO] - {"epoch": 12, "update": 11.188, "loss": "2.08", "ppl": "4.23", "wps": "397022", "ups": "3.36", "wpb": "118286", "bsz": "256", "num_updates": "575800", "lr": "4.28485e-05", "gnorm": "2.378", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "172241"} +[2022-08-01 10:40:50,535][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 10:41:32,651][train_inner][INFO] - {"epoch": 12, "update": 11.191, "loss": "2.079", "ppl": "4.22", "wps": "394917", "ups": "3.33", "wpb": "118477", "bsz": "256", "num_updates": "576000", "lr": "4.28283e-05", "gnorm": "2.271", "loss_scale": "2", "train_wall": "60", "gb_free": "24.2", "wall": "172301"} +[2022-08-01 10:42:31,825][train_inner][INFO] - {"epoch": 12, "update": 11.195, "loss": "2.085", "ppl": "4.24", "wps": "401156", "ups": "3.38", "wpb": "118690", "bsz": "256", "num_updates": "576200", "lr": "4.28081e-05", "gnorm": "2.449", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "172360"} +[2022-08-01 10:43:31,186][train_inner][INFO] - {"epoch": 12, "update": 11.199, "loss": "2.075", "ppl": "4.21", "wps": "399286", "ups": "3.37", "wpb": "118510", "bsz": "256", "num_updates": "576400", "lr": "4.27879e-05", "gnorm": "2.31", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "172420"} +[2022-08-01 10:44:31,054][train_inner][INFO] - {"epoch": 12, "update": 11.203, "loss": "2.087", "ppl": "4.25", "wps": "395706", "ups": "3.34", "wpb": "118450", "bsz": "256", "num_updates": "576600", "lr": "4.27677e-05", "gnorm": "2.273", "loss_scale": "2", "train_wall": "60", "gb_free": "23.5", "wall": "172479"} +[2022-08-01 10:45:30,142][train_inner][INFO] - {"epoch": 12, "update": 11.207, "loss": "2.083", "ppl": "4.24", "wps": "401761", "ups": "3.38", "wpb": "118694", "bsz": "256", "num_updates": "576800", "lr": "4.27475e-05", "gnorm": "2.706", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "172539"} +[2022-08-01 10:46:29,675][train_inner][INFO] - {"epoch": 12, "update": 11.211, "loss": "2.083", "ppl": "4.24", "wps": "396348", "ups": "3.36", "wpb": "117979", "bsz": "256", "num_updates": "577000", "lr": "4.27273e-05", "gnorm": "2.572", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "172598"} +[2022-08-01 10:47:29,177][train_inner][INFO] - {"epoch": 12, "update": 11.215, "loss": "2.083", "ppl": "4.24", "wps": "399469", "ups": "3.36", "wpb": "118846", "bsz": "256", "num_updates": "577200", "lr": "4.27071e-05", "gnorm": "2.487", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "172658"} +[2022-08-01 10:48:28,672][train_inner][INFO] - {"epoch": 12, "update": 11.219, "loss": "2.088", "ppl": "4.25", "wps": "395522", "ups": "3.36", "wpb": "117657", "bsz": "256", "num_updates": "577400", "lr": "4.26869e-05", "gnorm": "2.456", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "172717"} +[2022-08-01 10:49:27,799][train_inner][INFO] - {"epoch": 12, "update": 11.222, "loss": "2.078", "ppl": "4.22", "wps": "401145", "ups": "3.38", "wpb": "118590", "bsz": "256", "num_updates": "577600", "lr": "4.26667e-05", "gnorm": "2.462", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "172776"} +[2022-08-01 10:50:27,161][train_inner][INFO] - {"epoch": 12, "update": 11.226, "loss": "2.082", "ppl": "4.24", "wps": "398687", "ups": "3.37", "wpb": "118334", "bsz": "256", "num_updates": "577800", "lr": "4.26465e-05", "gnorm": "2.218", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "172836"} +[2022-08-01 10:51:00,891][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 10:51:26,855][train_inner][INFO] - {"epoch": 12, "update": 11.23, "loss": "2.09", "ppl": "4.26", "wps": "395565", "ups": "3.35", "wpb": "118064", "bsz": "256", "num_updates": "578000", "lr": "4.26263e-05", "gnorm": "2.464", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "172895"} +[2022-08-01 10:52:26,337][train_inner][INFO] - {"epoch": 12, "update": 11.234, "loss": "2.085", "ppl": "4.24", "wps": "395391", "ups": "3.36", "wpb": "117593", "bsz": "256", "num_updates": "578200", "lr": "4.26061e-05", "gnorm": "2.457", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "172955"} +[2022-08-01 10:53:25,573][train_inner][INFO] - {"epoch": 12, "update": 11.238, "loss": "2.081", "ppl": "4.23", "wps": "401408", "ups": "3.38", "wpb": "118887", "bsz": "256", "num_updates": "578400", "lr": "4.25859e-05", "gnorm": "2.233", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "173014"} +[2022-08-01 10:54:25,016][train_inner][INFO] - {"epoch": 12, "update": 11.242, "loss": "2.085", "ppl": "4.24", "wps": "397190", "ups": "3.36", "wpb": "118051", "bsz": "256", "num_updates": "578600", "lr": "4.25657e-05", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "173073"} +[2022-08-01 10:55:24,462][train_inner][INFO] - {"epoch": 12, "update": 11.246, "loss": "2.082", "ppl": "4.23", "wps": "398790", "ups": "3.36", "wpb": "118531", "bsz": "256", "num_updates": "578800", "lr": "4.25455e-05", "gnorm": "2.249", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "173133"} +[2022-08-01 10:56:23,902][train_inner][INFO] - {"epoch": 12, "update": 11.25, "loss": "2.081", "ppl": "4.23", "wps": "398260", "ups": "3.36", "wpb": "118362", "bsz": "256", "num_updates": "579000", "lr": "4.25253e-05", "gnorm": "2.139", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "173192"} +[2022-08-01 10:57:23,421][train_inner][INFO] - {"epoch": 12, "update": 11.254, "loss": "2.085", "ppl": "4.24", "wps": "396075", "ups": "3.36", "wpb": "117869", "bsz": "256", "num_updates": "579200", "lr": "4.25051e-05", "gnorm": "2.313", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "173252"} +[2022-08-01 10:58:23,077][train_inner][INFO] - {"epoch": 12, "update": 11.257, "loss": "2.08", "ppl": "4.23", "wps": "398339", "ups": "3.35", "wpb": "118818", "bsz": "256", "num_updates": "579400", "lr": "4.24848e-05", "gnorm": "2.182", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "173312"} +[2022-08-01 10:59:22,319][train_inner][INFO] - {"epoch": 12, "update": 11.261, "loss": "2.086", "ppl": "4.25", "wps": "398225", "ups": "3.38", "wpb": "117956", "bsz": "256", "num_updates": "579600", "lr": "4.24646e-05", "gnorm": "2.255", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "173371"} +[2022-08-01 11:00:21,776][train_inner][INFO] - {"epoch": 12, "update": 11.265, "loss": "2.084", "ppl": "4.24", "wps": "399029", "ups": "3.36", "wpb": "118626", "bsz": "256", "num_updates": "579800", "lr": "4.24444e-05", "gnorm": "2.197", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "173430"} +[2022-08-01 11:01:17,531][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 11:01:21,434][train_inner][INFO] - {"epoch": 12, "update": 11.269, "loss": "2.079", "ppl": "4.23", "wps": "395832", "ups": "3.35", "wpb": "118071", "bsz": "255.9", "num_updates": "580000", "lr": "4.24242e-05", "gnorm": "2.307", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "173490"} +[2022-08-01 11:02:20,700][train_inner][INFO] - {"epoch": 12, "update": 11.273, "loss": "2.084", "ppl": "4.24", "wps": "398996", "ups": "3.37", "wpb": "118234", "bsz": "256", "num_updates": "580200", "lr": "4.2404e-05", "gnorm": "2.363", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "173549"} +[2022-08-01 11:03:20,535][train_inner][INFO] - {"epoch": 12, "update": 11.277, "loss": "2.085", "ppl": "4.24", "wps": "393089", "ups": "3.34", "wpb": "117601", "bsz": "256", "num_updates": "580400", "lr": "4.23838e-05", "gnorm": "2.37", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "173609"} +[2022-08-01 11:04:19,838][train_inner][INFO] - {"epoch": 12, "update": 11.281, "loss": "2.083", "ppl": "4.24", "wps": "397477", "ups": "3.37", "wpb": "117858", "bsz": "256", "num_updates": "580600", "lr": "4.23636e-05", "gnorm": "2.313", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "173668"} +[2022-08-01 11:05:19,581][train_inner][INFO] - {"epoch": 12, "update": 11.285, "loss": "2.082", "ppl": "4.23", "wps": "395555", "ups": "3.35", "wpb": "118158", "bsz": "256", "num_updates": "580800", "lr": "4.23434e-05", "gnorm": "2.391", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "173728"} +[2022-08-01 11:06:19,323][train_inner][INFO] - {"epoch": 12, "update": 11.289, "loss": "2.076", "ppl": "4.22", "wps": "397213", "ups": "3.35", "wpb": "118651", "bsz": "256", "num_updates": "581000", "lr": "4.23232e-05", "gnorm": "2.098", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "173788"} +[2022-08-01 11:07:19,706][train_inner][INFO] - {"epoch": 12, "update": 11.292, "loss": "2.075", "ppl": "4.21", "wps": "394533", "ups": "3.31", "wpb": "119116", "bsz": "256", "num_updates": "581200", "lr": "4.2303e-05", "gnorm": "2.24", "loss_scale": "2", "train_wall": "60", "gb_free": "22.8", "wall": "173848"} +[2022-08-01 11:08:19,446][train_inner][INFO] - {"epoch": 12, "update": 11.296, "loss": "2.08", "ppl": "4.23", "wps": "393598", "ups": "3.35", "wpb": "117567", "bsz": "256", "num_updates": "581400", "lr": "4.22828e-05", "gnorm": "2.365", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "173908"} +[2022-08-01 11:09:18,743][train_inner][INFO] - {"epoch": 12, "update": 11.3, "loss": "2.08", "ppl": "4.23", "wps": "399106", "ups": "3.37", "wpb": "118329", "bsz": "256", "num_updates": "581600", "lr": "4.22626e-05", "gnorm": "2.292", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "173967"} +[2022-08-01 11:10:18,585][train_inner][INFO] - {"epoch": 12, "update": 11.304, "loss": "2.083", "ppl": "4.24", "wps": "394796", "ups": "3.34", "wpb": "118125", "bsz": "256", "num_updates": "581800", "lr": "4.22424e-05", "gnorm": "2.357", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "174027"} +[2022-08-01 11:11:18,940][train_inner][INFO] - {"epoch": 12, "update": 11.308, "loss": "2.079", "ppl": "4.22", "wps": "392014", "ups": "3.31", "wpb": "118300", "bsz": "256", "num_updates": "582000", "lr": "4.22222e-05", "gnorm": "2.444", "loss_scale": "2", "train_wall": "60", "gb_free": "22", "wall": "174087"} +[2022-08-01 11:11:49,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 11:12:18,938][train_inner][INFO] - {"epoch": 12, "update": 11.312, "loss": "2.075", "ppl": "4.21", "wps": "394686", "ups": "3.33", "wpb": "118400", "bsz": "256", "num_updates": "582200", "lr": "4.2202e-05", "gnorm": "2.375", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "174147"} +[2022-08-01 11:13:18,382][train_inner][INFO] - {"epoch": 12, "update": 11.316, "loss": "2.081", "ppl": "4.23", "wps": "397544", "ups": "3.36", "wpb": "118158", "bsz": "256", "num_updates": "582400", "lr": "4.21818e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "174207"} +[2022-08-01 11:14:17,557][train_inner][INFO] - {"epoch": 12, "update": 11.32, "loss": "2.076", "ppl": "4.22", "wps": "399660", "ups": "3.38", "wpb": "118249", "bsz": "256", "num_updates": "582600", "lr": "4.21616e-05", "gnorm": "2.391", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "174266"} +[2022-08-01 11:15:16,776][train_inner][INFO] - {"epoch": 12, "update": 11.324, "loss": "2.082", "ppl": "4.23", "wps": "398783", "ups": "3.38", "wpb": "118077", "bsz": "256", "num_updates": "582800", "lr": "4.21414e-05", "gnorm": "2.159", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "174325"} +[2022-08-01 11:16:16,409][train_inner][INFO] - {"epoch": 12, "update": 11.327, "loss": "2.079", "ppl": "4.22", "wps": "397073", "ups": "3.35", "wpb": "118392", "bsz": "256", "num_updates": "583000", "lr": "4.21212e-05", "gnorm": "2.144", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "174385"} +[2022-08-01 11:17:15,841][train_inner][INFO] - {"epoch": 12, "update": 11.331, "loss": "2.082", "ppl": "4.23", "wps": "397810", "ups": "3.37", "wpb": "118212", "bsz": "256", "num_updates": "583200", "lr": "4.2101e-05", "gnorm": "2.163", "loss_scale": "2", "train_wall": "59", "gb_free": "28.4", "wall": "174444"} +[2022-08-01 11:18:15,858][train_inner][INFO] - {"epoch": 12, "update": 11.335, "loss": "2.078", "ppl": "4.22", "wps": "395008", "ups": "3.33", "wpb": "118535", "bsz": "256", "num_updates": "583400", "lr": "4.20808e-05", "gnorm": "2.419", "loss_scale": "2", "train_wall": "60", "gb_free": "23.9", "wall": "174504"} +[2022-08-01 11:19:15,459][train_inner][INFO] - {"epoch": 12, "update": 11.339, "loss": "2.083", "ppl": "4.24", "wps": "396925", "ups": "3.36", "wpb": "118286", "bsz": "256", "num_updates": "583600", "lr": "4.20606e-05", "gnorm": "2.445", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "174564"} +[2022-08-01 11:20:14,912][train_inner][INFO] - {"epoch": 12, "update": 11.343, "loss": "2.078", "ppl": "4.22", "wps": "398574", "ups": "3.36", "wpb": "118481", "bsz": "256", "num_updates": "583800", "lr": "4.20404e-05", "gnorm": "2.373", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "174623"} +[2022-08-01 11:21:14,455][train_inner][INFO] - {"epoch": 12, "update": 11.347, "loss": "2.081", "ppl": "4.23", "wps": "395872", "ups": "3.36", "wpb": "117857", "bsz": "256", "num_updates": "584000", "lr": "4.20202e-05", "gnorm": "2.452", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "174683"} +[2022-08-01 11:22:13,810][train_inner][INFO] - {"epoch": 12, "update": 11.351, "loss": "2.083", "ppl": "4.24", "wps": "397589", "ups": "3.37", "wpb": "117992", "bsz": "256", "num_updates": "584200", "lr": "4.2e-05", "gnorm": "2.263", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "174742"} +[2022-08-01 11:23:13,341][train_inner][INFO] - {"epoch": 12, "update": 11.355, "loss": "2.081", "ppl": "4.23", "wps": "397578", "ups": "3.36", "wpb": "118342", "bsz": "256", "num_updates": "584400", "lr": "4.19798e-05", "gnorm": "2.296", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "174802"} +[2022-08-01 11:24:12,831][train_inner][INFO] - {"epoch": 12, "update": 11.358, "loss": "2.078", "ppl": "4.22", "wps": "398590", "ups": "3.36", "wpb": "118560", "bsz": "256", "num_updates": "584600", "lr": "4.19596e-05", "gnorm": "2.273", "loss_scale": "4", "train_wall": "59", "gb_free": "32.4", "wall": "174861"} +[2022-08-01 11:24:54,402][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 11:25:12,830][train_inner][INFO] - {"epoch": 12, "update": 11.362, "loss": "2.079", "ppl": "4.22", "wps": "392389", "ups": "3.33", "wpb": "117714", "bsz": "256", "num_updates": "584800", "lr": "4.19394e-05", "gnorm": "2.279", "loss_scale": "2", "train_wall": "60", "gb_free": "22.7", "wall": "174921"} +[2022-08-01 11:26:12,840][train_inner][INFO] - {"epoch": 12, "update": 11.366, "loss": "2.076", "ppl": "4.22", "wps": "394833", "ups": "3.33", "wpb": "118467", "bsz": "256", "num_updates": "585000", "lr": "4.19192e-05", "gnorm": "2.194", "loss_scale": "2", "train_wall": "60", "gb_free": "21.9", "wall": "174981"} +[2022-08-01 11:27:12,010][train_inner][INFO] - {"epoch": 12, "update": 11.37, "loss": "2.082", "ppl": "4.23", "wps": "399750", "ups": "3.38", "wpb": "118266", "bsz": "256", "num_updates": "585200", "lr": "4.1899e-05", "gnorm": "2.569", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "175040"} +[2022-08-01 11:28:11,394][train_inner][INFO] - {"epoch": 12, "update": 11.374, "loss": "2.078", "ppl": "4.22", "wps": "397951", "ups": "3.37", "wpb": "118159", "bsz": "256", "num_updates": "585400", "lr": "4.18788e-05", "gnorm": "2.462", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "175100"} +[2022-08-01 11:29:10,627][train_inner][INFO] - {"epoch": 12, "update": 11.378, "loss": "2.081", "ppl": "4.23", "wps": "398658", "ups": "3.38", "wpb": "118068", "bsz": "256", "num_updates": "585600", "lr": "4.18586e-05", "gnorm": "2.256", "loss_scale": "2", "train_wall": "59", "gb_free": "27.5", "wall": "175159"} +[2022-08-01 11:30:09,760][train_inner][INFO] - {"epoch": 12, "update": 11.382, "loss": "2.075", "ppl": "4.21", "wps": "401900", "ups": "3.38", "wpb": "118826", "bsz": "256", "num_updates": "585800", "lr": "4.18384e-05", "gnorm": "2.148", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "175218"} +[2022-08-01 11:31:08,951][train_inner][INFO] - {"epoch": 12, "update": 11.386, "loss": "2.082", "ppl": "4.23", "wps": "397647", "ups": "3.38", "wpb": "117686", "bsz": "256", "num_updates": "586000", "lr": "4.18182e-05", "gnorm": "2.22", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "175277"} +[2022-08-01 11:32:08,605][train_inner][INFO] - {"epoch": 12, "update": 11.39, "loss": "2.073", "ppl": "4.21", "wps": "396828", "ups": "3.35", "wpb": "118361", "bsz": "256", "num_updates": "586200", "lr": "4.1798e-05", "gnorm": "2.244", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "175337"} +[2022-08-01 11:33:08,125][train_inner][INFO] - {"epoch": 12, "update": 11.393, "loss": "2.074", "ppl": "4.21", "wps": "398024", "ups": "3.36", "wpb": "118452", "bsz": "256", "num_updates": "586400", "lr": "4.17778e-05", "gnorm": "2.241", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "175397"} +[2022-08-01 11:34:07,322][train_inner][INFO] - {"epoch": 12, "update": 11.397, "loss": "2.08", "ppl": "4.23", "wps": "397727", "ups": "3.38", "wpb": "117720", "bsz": "256", "num_updates": "586600", "lr": "4.17576e-05", "gnorm": "2.248", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "175456"} +[2022-08-01 11:35:06,654][train_inner][INFO] - {"epoch": 12, "update": 11.401, "loss": "2.075", "ppl": "4.21", "wps": "398566", "ups": "3.37", "wpb": "118236", "bsz": "256", "num_updates": "586800", "lr": "4.17374e-05", "gnorm": "2.259", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "175515"} +[2022-08-01 11:36:06,016][train_inner][INFO] - {"epoch": 12, "update": 11.405, "loss": "2.077", "ppl": "4.22", "wps": "398627", "ups": "3.37", "wpb": "118317", "bsz": "256", "num_updates": "587000", "lr": "4.17172e-05", "gnorm": "2.224", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "175574"} +[2022-08-01 11:36:15,945][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 11:37:05,531][train_inner][INFO] - {"epoch": 12, "update": 11.409, "loss": "2.082", "ppl": "4.24", "wps": "398013", "ups": "3.36", "wpb": "118437", "bsz": "256", "num_updates": "587200", "lr": "4.1697e-05", "gnorm": "2.292", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "175634"} +[2022-08-01 11:38:04,658][train_inner][INFO] - {"epoch": 12, "update": 11.413, "loss": "2.079", "ppl": "4.23", "wps": "398228", "ups": "3.38", "wpb": "117729", "bsz": "256", "num_updates": "587400", "lr": "4.16768e-05", "gnorm": "2.288", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "175693"} +[2022-08-01 11:39:04,026][train_inner][INFO] - {"epoch": 12, "update": 11.417, "loss": "2.078", "ppl": "4.22", "wps": "400746", "ups": "3.37", "wpb": "118957", "bsz": "256", "num_updates": "587600", "lr": "4.16566e-05", "gnorm": "2.306", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "175752"} +[2022-08-01 11:40:04,008][train_inner][INFO] - {"epoch": 12, "update": 11.421, "loss": "2.078", "ppl": "4.22", "wps": "395322", "ups": "3.33", "wpb": "118562", "bsz": "256", "num_updates": "587800", "lr": "4.16364e-05", "gnorm": "2.351", "loss_scale": "2", "train_wall": "60", "gb_free": "26.1", "wall": "175812"} +[2022-08-01 11:41:03,155][train_inner][INFO] - {"epoch": 12, "update": 11.425, "loss": "2.083", "ppl": "4.24", "wps": "398957", "ups": "3.38", "wpb": "117984", "bsz": "256", "num_updates": "588000", "lr": "4.16162e-05", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "175872"} +[2022-08-01 11:42:02,310][train_inner][INFO] - {"epoch": 12, "update": 11.428, "loss": "2.081", "ppl": "4.23", "wps": "399680", "ups": "3.38", "wpb": "118214", "bsz": "256", "num_updates": "588200", "lr": "4.1596e-05", "gnorm": "2.251", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "175931"} +[2022-08-01 11:43:01,789][train_inner][INFO] - {"epoch": 12, "update": 11.432, "loss": "2.075", "ppl": "4.21", "wps": "397824", "ups": "3.36", "wpb": "118310", "bsz": "256", "num_updates": "588400", "lr": "4.15758e-05", "gnorm": "2.199", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "175990"} +[2022-08-01 11:44:00,699][train_inner][INFO] - {"epoch": 12, "update": 11.436, "loss": "2.076", "ppl": "4.22", "wps": "401615", "ups": "3.4", "wpb": "118295", "bsz": "256", "num_updates": "588600", "lr": "4.15556e-05", "gnorm": "2.252", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "176049"} +[2022-08-01 11:45:00,047][train_inner][INFO] - {"epoch": 12, "update": 11.44, "loss": "2.076", "ppl": "4.22", "wps": "397504", "ups": "3.37", "wpb": "117954", "bsz": "256", "num_updates": "588800", "lr": "4.15354e-05", "gnorm": "2.169", "loss_scale": "2", "train_wall": "59", "gb_free": "28.4", "wall": "176108"} +[2022-08-01 11:45:59,516][train_inner][INFO] - {"epoch": 12, "update": 11.444, "loss": "2.077", "ppl": "4.22", "wps": "398152", "ups": "3.36", "wpb": "118389", "bsz": "256", "num_updates": "589000", "lr": "4.15152e-05", "gnorm": "2.253", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "176168"} +[2022-08-01 11:46:29,670][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 11:46:59,119][train_inner][INFO] - {"epoch": 12, "update": 11.448, "loss": "2.08", "ppl": "4.23", "wps": "397030", "ups": "3.36", "wpb": "118318", "bsz": "256", "num_updates": "589200", "lr": "4.14949e-05", "gnorm": "2.388", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "176228"} +[2022-08-01 11:47:58,676][train_inner][INFO] - {"epoch": 12, "update": 11.452, "loss": "2.076", "ppl": "4.22", "wps": "398092", "ups": "3.36", "wpb": "118546", "bsz": "256", "num_updates": "589400", "lr": "4.14747e-05", "gnorm": "2.417", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "176287"} +[2022-08-01 11:48:58,380][train_inner][INFO] - {"epoch": 12, "update": 11.456, "loss": "2.081", "ppl": "4.23", "wps": "395988", "ups": "3.35", "wpb": "118210", "bsz": "256", "num_updates": "589600", "lr": "4.14545e-05", "gnorm": "2.304", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "176347"} +[2022-08-01 11:49:57,501][train_inner][INFO] - {"epoch": 12, "update": 11.46, "loss": "2.072", "ppl": "4.2", "wps": "400815", "ups": "3.38", "wpb": "118482", "bsz": "256", "num_updates": "589800", "lr": "4.14343e-05", "gnorm": "2.249", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "176406"} +[2022-08-01 11:50:56,601][train_inner][INFO] - {"epoch": 12, "update": 11.463, "loss": "2.08", "ppl": "4.23", "wps": "398971", "ups": "3.38", "wpb": "117896", "bsz": "256", "num_updates": "590000", "lr": "4.14141e-05", "gnorm": "2.254", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "176465"} +[2022-08-01 11:51:56,173][train_inner][INFO] - {"epoch": 12, "update": 11.467, "loss": "2.075", "ppl": "4.21", "wps": "399446", "ups": "3.36", "wpb": "118978", "bsz": "256", "num_updates": "590200", "lr": "4.13939e-05", "gnorm": "2.362", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "176525"} +[2022-08-01 11:52:55,566][train_inner][INFO] - {"epoch": 12, "update": 11.471, "loss": "2.075", "ppl": "4.21", "wps": "397131", "ups": "3.37", "wpb": "117932", "bsz": "256", "num_updates": "590400", "lr": "4.13737e-05", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "176584"} +[2022-08-01 11:53:54,782][train_inner][INFO] - {"epoch": 12, "update": 11.475, "loss": "2.076", "ppl": "4.22", "wps": "401091", "ups": "3.38", "wpb": "118755", "bsz": "256", "num_updates": "590600", "lr": "4.13535e-05", "gnorm": "2.309", "loss_scale": "2", "train_wall": "59", "gb_free": "28.8", "wall": "176643"} +[2022-08-01 11:54:54,190][train_inner][INFO] - {"epoch": 12, "update": 11.479, "loss": "2.074", "ppl": "4.21", "wps": "398777", "ups": "3.37", "wpb": "118451", "bsz": "256", "num_updates": "590800", "lr": "4.13333e-05", "gnorm": "2.26", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "176703"} +[2022-08-01 11:55:53,830][train_inner][INFO] - {"epoch": 12, "update": 11.483, "loss": "2.075", "ppl": "4.21", "wps": "396556", "ups": "3.35", "wpb": "118253", "bsz": "256", "num_updates": "591000", "lr": "4.13131e-05", "gnorm": "2.239", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "176762"} +[2022-08-01 11:56:53,021][train_inner][INFO] - {"epoch": 12, "update": 11.487, "loss": "2.078", "ppl": "4.22", "wps": "399009", "ups": "3.38", "wpb": "118086", "bsz": "256", "num_updates": "591200", "lr": "4.12929e-05", "gnorm": "2.373", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "176821"} +[2022-08-01 11:57:52,295][train_inner][INFO] - {"epoch": 12, "update": 11.491, "loss": "2.08", "ppl": "4.23", "wps": "397437", "ups": "3.37", "wpb": "117787", "bsz": "256", "num_updates": "591400", "lr": "4.12727e-05", "gnorm": "2.264", "loss_scale": "4", "train_wall": "59", "gb_free": "32.2", "wall": "176881"} +[2022-08-01 11:58:26,359][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 11:58:52,005][train_inner][INFO] - {"epoch": 12, "update": 11.495, "loss": "2.08", "ppl": "4.23", "wps": "394930", "ups": "3.35", "wpb": "117905", "bsz": "256", "num_updates": "591600", "lr": "4.12525e-05", "gnorm": "2.193", "loss_scale": "2", "train_wall": "59", "gb_free": "28", "wall": "176940"} +[2022-08-01 11:59:51,448][train_inner][INFO] - {"epoch": 12, "update": 11.498, "loss": "2.079", "ppl": "4.23", "wps": "398925", "ups": "3.36", "wpb": "118566", "bsz": "256", "num_updates": "591800", "lr": "4.12323e-05", "gnorm": "2.163", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "177000"} +[2022-08-01 12:00:50,814][train_inner][INFO] - {"epoch": 12, "update": 11.502, "loss": "2.073", "ppl": "4.21", "wps": "400614", "ups": "3.37", "wpb": "118913", "bsz": "256", "num_updates": "592000", "lr": "4.12121e-05", "gnorm": "2.376", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "177059"} +[2022-08-01 12:01:50,194][train_inner][INFO] - {"epoch": 12, "update": 11.506, "loss": "2.079", "ppl": "4.22", "wps": "397422", "ups": "3.37", "wpb": "117993", "bsz": "256", "num_updates": "592200", "lr": "4.11919e-05", "gnorm": "2.253", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "177119"} +[2022-08-01 12:02:49,721][train_inner][INFO] - {"epoch": 12, "update": 11.51, "loss": "2.079", "ppl": "4.22", "wps": "397461", "ups": "3.36", "wpb": "118298", "bsz": "256", "num_updates": "592400", "lr": "4.11717e-05", "gnorm": "2.282", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "177178"} +[2022-08-01 12:03:50,412][train_inner][INFO] - {"epoch": 12, "update": 11.514, "loss": "2.069", "ppl": "4.2", "wps": "390370", "ups": "3.3", "wpb": "118459", "bsz": "256", "num_updates": "592600", "lr": "4.11515e-05", "gnorm": "2.287", "loss_scale": "2", "train_wall": "60", "gb_free": "22", "wall": "177239"} +[2022-08-01 12:04:49,807][train_inner][INFO] - {"epoch": 12, "update": 11.518, "loss": "2.074", "ppl": "4.21", "wps": "395996", "ups": "3.37", "wpb": "117599", "bsz": "256", "num_updates": "592800", "lr": "4.11313e-05", "gnorm": "2.272", "loss_scale": "2", "train_wall": "59", "gb_free": "27.1", "wall": "177298"} +[2022-08-01 12:05:49,030][train_inner][INFO] - {"epoch": 12, "update": 11.522, "loss": "2.068", "ppl": "4.19", "wps": "396912", "ups": "3.38", "wpb": "117532", "bsz": "256", "num_updates": "593000", "lr": "4.11111e-05", "gnorm": "2.204", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "177357"} +[2022-08-01 12:06:48,792][train_inner][INFO] - {"epoch": 12, "update": 11.526, "loss": "2.081", "ppl": "4.23", "wps": "397046", "ups": "3.35", "wpb": "118639", "bsz": "256", "num_updates": "593200", "lr": "4.10909e-05", "gnorm": "2.216", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "177417"} +[2022-08-01 12:07:47,931][train_inner][INFO] - {"epoch": 12, "update": 11.529, "loss": "2.081", "ppl": "4.23", "wps": "400430", "ups": "3.38", "wpb": "118405", "bsz": "256", "num_updates": "593400", "lr": "4.10707e-05", "gnorm": "2.225", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "177476"} +[2022-08-01 12:08:48,709][train_inner][INFO] - {"epoch": 12, "update": 11.533, "loss": "2.074", "ppl": "4.21", "wps": "390802", "ups": "3.29", "wpb": "118760", "bsz": "256", "num_updates": "593600", "lr": "4.10505e-05", "gnorm": "2.25", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "177537"} +[2022-08-01 12:09:48,035][train_inner][INFO] - {"epoch": 12, "update": 11.537, "loss": "2.075", "ppl": "4.21", "wps": "398811", "ups": "3.37", "wpb": "118298", "bsz": "256", "num_updates": "593800", "lr": "4.10303e-05", "gnorm": "2.396", "loss_scale": "4", "train_wall": "59", "gb_free": "26.8", "wall": "177596"} +[2022-08-01 12:10:47,068][train_inner][INFO] - {"epoch": 12, "update": 11.541, "loss": "2.074", "ppl": "4.21", "wps": "401336", "ups": "3.39", "wpb": "118459", "bsz": "256", "num_updates": "594000", "lr": "4.10101e-05", "gnorm": "2.245", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "177655"} +[2022-08-01 12:11:46,623][train_inner][INFO] - {"epoch": 12, "update": 11.545, "loss": "2.075", "ppl": "4.21", "wps": "400420", "ups": "3.36", "wpb": "119235", "bsz": "256", "num_updates": "594200", "lr": "4.09899e-05", "gnorm": "2.129", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "177715"} +[2022-08-01 12:12:44,204][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 12:12:46,322][train_inner][INFO] - {"epoch": 12, "update": 11.549, "loss": "2.07", "ppl": "4.2", "wps": "397407", "ups": "3.35", "wpb": "118622", "bsz": "256", "num_updates": "594400", "lr": "4.09697e-05", "gnorm": "2.47", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "177775"} +[2022-08-01 12:13:45,999][train_inner][INFO] - {"epoch": 12, "update": 11.553, "loss": "2.074", "ppl": "4.21", "wps": "398819", "ups": "3.35", "wpb": "119000", "bsz": "256", "num_updates": "594600", "lr": "4.09495e-05", "gnorm": "2.213", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "177834"} +[2022-08-01 12:14:45,671][train_inner][INFO] - {"epoch": 12, "update": 11.557, "loss": "2.075", "ppl": "4.21", "wps": "395880", "ups": "3.35", "wpb": "118114", "bsz": "256", "num_updates": "594800", "lr": "4.09293e-05", "gnorm": "2.34", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "177894"} +[2022-08-01 12:15:45,524][train_inner][INFO] - {"epoch": 12, "update": 11.561, "loss": "2.074", "ppl": "4.21", "wps": "396447", "ups": "3.34", "wpb": "118643", "bsz": "256", "num_updates": "595000", "lr": "4.09091e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "177954"} +[2022-08-01 12:16:45,048][train_inner][INFO] - {"epoch": 12, "update": 11.564, "loss": "2.073", "ppl": "4.21", "wps": "397040", "ups": "3.36", "wpb": "118166", "bsz": "256", "num_updates": "595200", "lr": "4.08889e-05", "gnorm": "2.224", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "178013"} +[2022-08-01 12:17:44,676][train_inner][INFO] - {"epoch": 12, "update": 11.568, "loss": "2.074", "ppl": "4.21", "wps": "394383", "ups": "3.35", "wpb": "117580", "bsz": "256", "num_updates": "595400", "lr": "4.08687e-05", "gnorm": "2.25", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "178073"} +[2022-08-01 12:18:44,429][train_inner][INFO] - {"epoch": 12, "update": 11.572, "loss": "2.079", "ppl": "4.23", "wps": "396603", "ups": "3.35", "wpb": "118491", "bsz": "256", "num_updates": "595600", "lr": "4.08485e-05", "gnorm": "2.281", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "178133"} +[2022-08-01 12:19:43,396][train_inner][INFO] - {"epoch": 12, "update": 11.576, "loss": "2.08", "ppl": "4.23", "wps": "398994", "ups": "3.39", "wpb": "117636", "bsz": "256", "num_updates": "595800", "lr": "4.08283e-05", "gnorm": "2.158", "loss_scale": "2", "train_wall": "59", "gb_free": "26.8", "wall": "178192"} +[2022-08-01 12:20:43,203][train_inner][INFO] - {"epoch": 12, "update": 11.58, "loss": "2.076", "ppl": "4.22", "wps": "395986", "ups": "3.34", "wpb": "118413", "bsz": "256", "num_updates": "596000", "lr": "4.08081e-05", "gnorm": "2.239", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "178252"} +[2022-08-01 12:21:42,887][train_inner][INFO] - {"epoch": 12, "update": 11.584, "loss": "2.071", "ppl": "4.2", "wps": "398598", "ups": "3.35", "wpb": "118949", "bsz": "256", "num_updates": "596200", "lr": "4.07879e-05", "gnorm": "2.165", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "178311"} +[2022-08-01 12:22:42,464][train_inner][INFO] - {"epoch": 12, "update": 11.588, "loss": "2.073", "ppl": "4.21", "wps": "397302", "ups": "3.36", "wpb": "118350", "bsz": "256", "num_updates": "596400", "lr": "4.07677e-05", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "26.8", "wall": "178371"} +[2022-08-01 12:23:05,713][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 12:23:42,043][train_inner][INFO] - {"epoch": 12, "update": 11.592, "loss": "2.076", "ppl": "4.22", "wps": "396814", "ups": "3.36", "wpb": "118208", "bsz": "256", "num_updates": "596600", "lr": "4.07475e-05", "gnorm": "2.383", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "178430"} +[2022-08-01 12:24:42,533][train_inner][INFO] - {"epoch": 12, "update": 11.596, "loss": "2.078", "ppl": "4.22", "wps": "391383", "ups": "3.31", "wpb": "118373", "bsz": "256", "num_updates": "596800", "lr": "4.07273e-05", "gnorm": "2.349", "loss_scale": "2", "train_wall": "60", "gb_free": "22.5", "wall": "178491"} +[2022-08-01 12:25:41,475][train_inner][INFO] - {"epoch": 12, "update": 11.599, "loss": "2.078", "ppl": "4.22", "wps": "401802", "ups": "3.39", "wpb": "118412", "bsz": "256", "num_updates": "597000", "lr": "4.07071e-05", "gnorm": "2.313", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "178550"} +[2022-08-01 12:26:40,822][train_inner][INFO] - {"epoch": 12, "update": 11.603, "loss": "2.076", "ppl": "4.22", "wps": "398461", "ups": "3.37", "wpb": "118236", "bsz": "256", "num_updates": "597200", "lr": "4.06869e-05", "gnorm": "2.244", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "178609"} +[2022-08-01 12:27:40,434][train_inner][INFO] - {"epoch": 12, "update": 11.607, "loss": "2.071", "ppl": "4.2", "wps": "397534", "ups": "3.36", "wpb": "118490", "bsz": "256", "num_updates": "597400", "lr": "4.06667e-05", "gnorm": "2.223", "loss_scale": "2", "train_wall": "59", "gb_free": "26.2", "wall": "178669"} +[2022-08-01 12:28:40,062][train_inner][INFO] - {"epoch": 12, "update": 11.611, "loss": "2.073", "ppl": "4.21", "wps": "395954", "ups": "3.35", "wpb": "118049", "bsz": "256", "num_updates": "597600", "lr": "4.06465e-05", "gnorm": "2.409", "loss_scale": "2", "train_wall": "59", "gb_free": "28.6", "wall": "178728"} +[2022-08-01 12:29:39,025][train_inner][INFO] - {"epoch": 12, "update": 11.615, "loss": "2.076", "ppl": "4.22", "wps": "400777", "ups": "3.39", "wpb": "118154", "bsz": "256", "num_updates": "597800", "lr": "4.06263e-05", "gnorm": "2.186", "loss_scale": "2", "train_wall": "59", "gb_free": "26.6", "wall": "178787"} +[2022-08-01 12:30:38,267][train_inner][INFO] - {"epoch": 12, "update": 11.619, "loss": "2.076", "ppl": "4.22", "wps": "399446", "ups": "3.38", "wpb": "118319", "bsz": "256", "num_updates": "598000", "lr": "4.06061e-05", "gnorm": "2.253", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "178847"} +[2022-08-01 12:31:37,882][train_inner][INFO] - {"epoch": 12, "update": 11.623, "loss": "2.075", "ppl": "4.21", "wps": "396385", "ups": "3.35", "wpb": "118151", "bsz": "256", "num_updates": "598200", "lr": "4.05859e-05", "gnorm": "2.195", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "178906"} +[2022-08-01 12:32:37,309][train_inner][INFO] - {"epoch": 12, "update": 11.627, "loss": "2.073", "ppl": "4.21", "wps": "397792", "ups": "3.37", "wpb": "118196", "bsz": "256", "num_updates": "598400", "lr": "4.05657e-05", "gnorm": "2.235", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "178966"} +[2022-08-01 12:33:36,943][train_inner][INFO] - {"epoch": 12, "update": 11.631, "loss": "2.072", "ppl": "4.2", "wps": "397198", "ups": "3.35", "wpb": "118432", "bsz": "256", "num_updates": "598600", "lr": "4.05455e-05", "gnorm": "2.575", "loss_scale": "4", "train_wall": "59", "gb_free": "29.2", "wall": "179025"} +[2022-08-01 12:34:36,110][train_inner][INFO] - {"epoch": 12, "update": 11.634, "loss": "2.072", "ppl": "4.21", "wps": "399725", "ups": "3.38", "wpb": "118253", "bsz": "256", "num_updates": "598800", "lr": "4.05253e-05", "gnorm": "2.18", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "179085"} +[2022-08-01 12:35:35,805][train_inner][INFO] - {"epoch": 12, "update": 11.638, "loss": "2.077", "ppl": "4.22", "wps": "396499", "ups": "3.35", "wpb": "118344", "bsz": "256", "num_updates": "599000", "lr": "4.05051e-05", "gnorm": "2.103", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "179144"} +[2022-08-01 12:36:35,253][train_inner][INFO] - {"epoch": 12, "update": 11.642, "loss": "2.074", "ppl": "4.21", "wps": "397599", "ups": "3.36", "wpb": "118180", "bsz": "256", "num_updates": "599200", "lr": "4.04848e-05", "gnorm": "2.344", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "179204"} +[2022-08-01 12:37:34,930][train_inner][INFO] - {"epoch": 12, "update": 11.646, "loss": "2.072", "ppl": "4.2", "wps": "394498", "ups": "3.35", "wpb": "117713", "bsz": "256", "num_updates": "599400", "lr": "4.04646e-05", "gnorm": "2.285", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "179263"} +[2022-08-01 12:38:34,254][train_inner][INFO] - {"epoch": 12, "update": 11.65, "loss": "2.069", "ppl": "4.2", "wps": "397164", "ups": "3.37", "wpb": "117805", "bsz": "256", "num_updates": "599600", "lr": "4.04444e-05", "gnorm": "2.231", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "179323"} +[2022-08-01 12:39:33,675][train_inner][INFO] - {"epoch": 12, "update": 11.654, "loss": "2.075", "ppl": "4.21", "wps": "396308", "ups": "3.37", "wpb": "117744", "bsz": "256", "num_updates": "599800", "lr": "4.04242e-05", "gnorm": "2.402", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "179382"} +[2022-08-01 12:40:32,859][train_inner][INFO] - {"epoch": 12, "update": 11.658, "loss": "2.076", "ppl": "4.22", "wps": "397367", "ups": "3.38", "wpb": "117588", "bsz": "256", "num_updates": "600000", "lr": "4.0404e-05", "gnorm": "2.279", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "179441"} +[2022-08-01 12:40:32,860][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 12:40:55,603][valid][INFO] - {"epoch": 12, "valid_loss": "1.961", "valid_ppl": "3.89", "valid_wps": "1.57057e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "600000", "valid_best_loss": "1.961"} +[2022-08-01 12:40:55,606][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 12 @ 600000 updates +[2022-08-01 12:40:55,606][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_12_600000.pt +[2022-08-01 12:41:01,703][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_12_600000.pt +[2022-08-01 12:41:22,379][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_12_600000.pt (epoch 12 @ 600000 updates, score 1.961) (writing took 26.7738278452307 seconds) +[2022-08-01 12:42:21,610][train_inner][INFO] - {"epoch": 12, "update": 11.662, "loss": "2.074", "ppl": "4.21", "wps": "218060", "ups": "1.84", "wpb": "118571", "bsz": "256", "num_updates": "600200", "lr": "4.03838e-05", "gnorm": "2.202", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "179550"} +[2022-08-01 12:43:21,479][train_inner][INFO] - {"epoch": 12, "update": 11.665, "loss": "2.073", "ppl": "4.21", "wps": "396231", "ups": "3.34", "wpb": "118609", "bsz": "256", "num_updates": "600400", "lr": "4.03636e-05", "gnorm": "2.212", "loss_scale": "4", "train_wall": "60", "gb_free": "25.8", "wall": "179610"} +[2022-08-01 12:44:14,456][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 12:44:21,617][train_inner][INFO] - {"epoch": 12, "update": 11.669, "loss": "2.073", "ppl": "4.21", "wps": "394173", "ups": "3.33", "wpb": "118524", "bsz": "256", "num_updates": "600600", "lr": "4.03434e-05", "gnorm": "2.054", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "179670"} +[2022-08-01 12:45:21,236][train_inner][INFO] - {"epoch": 12, "update": 11.673, "loss": "2.069", "ppl": "4.2", "wps": "397004", "ups": "3.35", "wpb": "118343", "bsz": "256", "num_updates": "600800", "lr": "4.03232e-05", "gnorm": "2.17", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "179730"} +[2022-08-01 12:46:20,271][train_inner][INFO] - {"epoch": 12, "update": 11.677, "loss": "2.081", "ppl": "4.23", "wps": "397246", "ups": "3.39", "wpb": "117258", "bsz": "256", "num_updates": "601000", "lr": "4.0303e-05", "gnorm": "2.474", "loss_scale": "4", "train_wall": "59", "gb_free": "25.2", "wall": "179789"} +[2022-08-01 12:47:19,926][train_inner][INFO] - {"epoch": 12, "update": 11.681, "loss": "2.068", "ppl": "4.19", "wps": "395773", "ups": "3.35", "wpb": "118047", "bsz": "256", "num_updates": "601200", "lr": "4.02828e-05", "gnorm": "2.143", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "179848"} +[2022-08-01 12:48:19,583][train_inner][INFO] - {"epoch": 12, "update": 11.685, "loss": "2.079", "ppl": "4.22", "wps": "395483", "ups": "3.35", "wpb": "117966", "bsz": "256", "num_updates": "601400", "lr": "4.02626e-05", "gnorm": "2.296", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "179908"} +[2022-08-01 12:49:19,259][train_inner][INFO] - {"epoch": 12, "update": 11.689, "loss": "2.074", "ppl": "4.21", "wps": "396555", "ups": "3.35", "wpb": "118322", "bsz": "256", "num_updates": "601600", "lr": "4.02424e-05", "gnorm": "2.155", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "179968"} +[2022-08-01 12:50:18,500][train_inner][INFO] - {"epoch": 12, "update": 11.693, "loss": "2.078", "ppl": "4.22", "wps": "398420", "ups": "3.38", "wpb": "118015", "bsz": "256", "num_updates": "601800", "lr": "4.02222e-05", "gnorm": "2.296", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "180027"} +[2022-08-01 12:51:18,424][train_inner][INFO] - {"epoch": 12, "update": 11.697, "loss": "2.07", "ppl": "4.2", "wps": "395439", "ups": "3.34", "wpb": "118480", "bsz": "256", "num_updates": "602000", "lr": "4.0202e-05", "gnorm": "2.17", "loss_scale": "4", "train_wall": "60", "gb_free": "22", "wall": "180087"} +[2022-08-01 12:52:17,839][train_inner][INFO] - {"epoch": 12, "update": 11.7, "loss": "2.078", "ppl": "4.22", "wps": "397654", "ups": "3.37", "wpb": "118131", "bsz": "256", "num_updates": "602200", "lr": "4.01818e-05", "gnorm": "2.174", "loss_scale": "4", "train_wall": "59", "gb_free": "26.6", "wall": "180146"} +[2022-08-01 12:53:17,557][train_inner][INFO] - {"epoch": 12, "update": 11.704, "loss": "2.073", "ppl": "4.21", "wps": "395950", "ups": "3.35", "wpb": "118226", "bsz": "256", "num_updates": "602400", "lr": "4.01616e-05", "gnorm": "2.335", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "180206"} +[2022-08-01 12:54:17,589][train_inner][INFO] - {"epoch": 12, "update": 11.708, "loss": "2.067", "ppl": "4.19", "wps": "395950", "ups": "3.33", "wpb": "118849", "bsz": "256", "num_updates": "602600", "lr": "4.01414e-05", "gnorm": "2.04", "loss_scale": "4", "train_wall": "60", "gb_free": "22.4", "wall": "180266"} +[2022-08-01 12:54:25,275][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 12:55:16,864][train_inner][INFO] - {"epoch": 12, "update": 11.712, "loss": "2.076", "ppl": "4.22", "wps": "396937", "ups": "3.37", "wpb": "117641", "bsz": "256", "num_updates": "602800", "lr": "4.01212e-05", "gnorm": "2.124", "loss_scale": "4", "train_wall": "59", "gb_free": "24.7", "wall": "180325"} +[2022-08-01 12:56:16,280][train_inner][INFO] - {"epoch": 12, "update": 11.716, "loss": "2.073", "ppl": "4.21", "wps": "398648", "ups": "3.37", "wpb": "118430", "bsz": "256", "num_updates": "603000", "lr": "4.0101e-05", "gnorm": "2.522", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "180385"} +[2022-08-01 12:57:15,819][train_inner][INFO] - {"epoch": 12, "update": 11.72, "loss": "2.068", "ppl": "4.19", "wps": "398441", "ups": "3.36", "wpb": "118611", "bsz": "256", "num_updates": "603200", "lr": "4.00808e-05", "gnorm": "2.214", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "180444"} +[2022-08-01 12:58:15,148][train_inner][INFO] - {"epoch": 12, "update": 11.724, "loss": "2.067", "ppl": "4.19", "wps": "398507", "ups": "3.37", "wpb": "118215", "bsz": "256", "num_updates": "603400", "lr": "4.00606e-05", "gnorm": "2.257", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "180504"} +[2022-08-01 12:59:14,521][train_inner][INFO] - {"epoch": 12, "update": 11.728, "loss": "2.07", "ppl": "4.2", "wps": "398096", "ups": "3.37", "wpb": "118179", "bsz": "256", "num_updates": "603600", "lr": "4.00404e-05", "gnorm": "2.284", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "180563"} +[2022-08-01 13:00:13,715][train_inner][INFO] - {"epoch": 12, "update": 11.732, "loss": "2.069", "ppl": "4.2", "wps": "400403", "ups": "3.38", "wpb": "118507", "bsz": "256", "num_updates": "603800", "lr": "4.00202e-05", "gnorm": "2.138", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "180622"} +[2022-08-01 13:01:12,827][train_inner][INFO] - {"epoch": 12, "update": 11.735, "loss": "2.071", "ppl": "4.2", "wps": "401320", "ups": "3.38", "wpb": "118612", "bsz": "256", "num_updates": "604000", "lr": "4e-05", "gnorm": "2.353", "loss_scale": "4", "train_wall": "59", "gb_free": "26.2", "wall": "180681"} +[2022-08-01 13:02:12,384][train_inner][INFO] - {"epoch": 12, "update": 11.739, "loss": "2.071", "ppl": "4.2", "wps": "398077", "ups": "3.36", "wpb": "118542", "bsz": "256", "num_updates": "604200", "lr": "3.99798e-05", "gnorm": "2.339", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "180741"} +[2022-08-01 13:03:11,502][train_inner][INFO] - {"epoch": 12, "update": 11.743, "loss": "2.071", "ppl": "4.2", "wps": "400667", "ups": "3.38", "wpb": "118431", "bsz": "256", "num_updates": "604400", "lr": "3.99596e-05", "gnorm": "2.36", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "180800"} +[2022-08-01 13:04:10,765][train_inner][INFO] - {"epoch": 12, "update": 11.747, "loss": "2.069", "ppl": "4.2", "wps": "400747", "ups": "3.37", "wpb": "118747", "bsz": "256", "num_updates": "604600", "lr": "3.99394e-05", "gnorm": "2.258", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "180859"} +[2022-08-01 13:04:33,300][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 13:05:10,086][train_inner][INFO] - {"epoch": 12, "update": 11.751, "loss": "2.074", "ppl": "4.21", "wps": "396235", "ups": "3.37", "wpb": "117524", "bsz": "256", "num_updates": "604800", "lr": "3.99192e-05", "gnorm": "2.261", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "180919"} +[2022-08-01 13:06:09,416][train_inner][INFO] - {"epoch": 12, "update": 11.755, "loss": "2.071", "ppl": "4.2", "wps": "397319", "ups": "3.37", "wpb": "117863", "bsz": "256", "num_updates": "605000", "lr": "3.9899e-05", "gnorm": "2.089", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "180978"} +[2022-08-01 13:07:08,824][train_inner][INFO] - {"epoch": 12, "update": 11.759, "loss": "2.069", "ppl": "4.19", "wps": "399566", "ups": "3.37", "wpb": "118687", "bsz": "256", "num_updates": "605200", "lr": "3.98788e-05", "gnorm": "2.175", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "181037"} +[2022-08-01 13:08:08,439][train_inner][INFO] - {"epoch": 12, "update": 11.763, "loss": "2.066", "ppl": "4.19", "wps": "399331", "ups": "3.35", "wpb": "119029", "bsz": "256", "num_updates": "605400", "lr": "3.98586e-05", "gnorm": "2.304", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "181097"} +[2022-08-01 13:09:07,878][train_inner][INFO] - {"epoch": 12, "update": 11.767, "loss": "2.074", "ppl": "4.21", "wps": "398866", "ups": "3.36", "wpb": "118540", "bsz": "256", "num_updates": "605600", "lr": "3.98384e-05", "gnorm": "2.34", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "181156"} +[2022-08-01 13:10:07,367][train_inner][INFO] - {"epoch": 12, "update": 11.77, "loss": "2.076", "ppl": "4.22", "wps": "397502", "ups": "3.36", "wpb": "118234", "bsz": "256", "num_updates": "605800", "lr": "3.98182e-05", "gnorm": "2.147", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "181216"} +[2022-08-01 13:11:06,187][train_inner][INFO] - {"epoch": 12, "update": 11.774, "loss": "2.068", "ppl": "4.19", "wps": "402794", "ups": "3.4", "wpb": "118461", "bsz": "256", "num_updates": "606000", "lr": "3.9798e-05", "gnorm": "2.239", "loss_scale": "4", "train_wall": "58", "gb_free": "21.9", "wall": "181275"} +[2022-08-01 13:11:18,287][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 13:12:05,679][train_inner][INFO] - {"epoch": 12, "update": 11.778, "loss": "2.069", "ppl": "4.2", "wps": "401322", "ups": "3.36", "wpb": "119378", "bsz": "256", "num_updates": "606200", "lr": "3.97778e-05", "gnorm": "2.257", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "181334"} +[2022-08-01 13:13:05,431][train_inner][INFO] - {"epoch": 12, "update": 11.782, "loss": "2.074", "ppl": "4.21", "wps": "394173", "ups": "3.35", "wpb": "117761", "bsz": "256", "num_updates": "606400", "lr": "3.97576e-05", "gnorm": "2.175", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "181394"} +[2022-08-01 13:14:04,762][train_inner][INFO] - {"epoch": 12, "update": 11.786, "loss": "2.072", "ppl": "4.2", "wps": "398627", "ups": "3.37", "wpb": "118254", "bsz": "256", "num_updates": "606600", "lr": "3.97374e-05", "gnorm": "2.288", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "181453"} +[2022-08-01 13:15:04,444][train_inner][INFO] - {"epoch": 12, "update": 11.79, "loss": "2.068", "ppl": "4.19", "wps": "396157", "ups": "3.35", "wpb": "118217", "bsz": "256", "num_updates": "606800", "lr": "3.97172e-05", "gnorm": "2.189", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "181513"} +[2022-08-01 13:16:04,020][train_inner][INFO] - {"epoch": 12, "update": 11.794, "loss": "2.074", "ppl": "4.21", "wps": "396959", "ups": "3.36", "wpb": "118244", "bsz": "256", "num_updates": "607000", "lr": "3.9697e-05", "gnorm": "2.133", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "181572"} +[2022-08-01 13:17:03,408][train_inner][INFO] - {"epoch": 12, "update": 11.798, "loss": "2.073", "ppl": "4.21", "wps": "399936", "ups": "3.37", "wpb": "118757", "bsz": "256", "num_updates": "607200", "lr": "3.96768e-05", "gnorm": "2.151", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "181632"} +[2022-08-01 13:18:03,971][train_inner][INFO] - {"epoch": 12, "update": 11.802, "loss": "2.068", "ppl": "4.19", "wps": "391910", "ups": "3.3", "wpb": "118676", "bsz": "256", "num_updates": "607400", "lr": "3.96566e-05", "gnorm": "2.182", "loss_scale": "2", "train_wall": "60", "gb_free": "26.7", "wall": "181692"} +[2022-08-01 13:19:03,766][train_inner][INFO] - {"epoch": 12, "update": 11.805, "loss": "2.07", "ppl": "4.2", "wps": "395243", "ups": "3.34", "wpb": "118167", "bsz": "256", "num_updates": "607600", "lr": "3.96364e-05", "gnorm": "2.24", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "181752"} +[2022-08-01 13:20:03,396][train_inner][INFO] - {"epoch": 12, "update": 11.809, "loss": "2.066", "ppl": "4.19", "wps": "395741", "ups": "3.35", "wpb": "117989", "bsz": "256", "num_updates": "607800", "lr": "3.96162e-05", "gnorm": "2.228", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "181812"} +[2022-08-01 13:21:02,716][train_inner][INFO] - {"epoch": 12, "update": 11.813, "loss": "2.074", "ppl": "4.21", "wps": "401002", "ups": "3.37", "wpb": "118936", "bsz": "256", "num_updates": "608000", "lr": "3.9596e-05", "gnorm": "2.221", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "181871"} +[2022-08-01 13:22:01,746][train_inner][INFO] - {"epoch": 12, "update": 11.817, "loss": "2.073", "ppl": "4.21", "wps": "399404", "ups": "3.39", "wpb": "117884", "bsz": "256", "num_updates": "608200", "lr": "3.95758e-05", "gnorm": "2.157", "loss_scale": "4", "train_wall": "59", "gb_free": "23.3", "wall": "181930"} +[2022-08-01 13:23:01,028][train_inner][INFO] - {"epoch": 12, "update": 11.821, "loss": "2.074", "ppl": "4.21", "wps": "398041", "ups": "3.37", "wpb": "117983", "bsz": "256", "num_updates": "608400", "lr": "3.95556e-05", "gnorm": "2.218", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "181989"} +[2022-08-01 13:24:00,340][train_inner][INFO] - {"epoch": 12, "update": 11.825, "loss": "2.073", "ppl": "4.21", "wps": "399114", "ups": "3.37", "wpb": "118360", "bsz": "256", "num_updates": "608600", "lr": "3.95354e-05", "gnorm": "2.224", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "182049"} +[2022-08-01 13:25:00,214][train_inner][INFO] - {"epoch": 12, "update": 11.829, "loss": "2.067", "ppl": "4.19", "wps": "394157", "ups": "3.34", "wpb": "117996", "bsz": "256", "num_updates": "608800", "lr": "3.95152e-05", "gnorm": "2.158", "loss_scale": "4", "train_wall": "60", "gb_free": "23.7", "wall": "182109"} +[2022-08-01 13:25:59,697][train_inner][INFO] - {"epoch": 12, "update": 11.833, "loss": "2.073", "ppl": "4.21", "wps": "396777", "ups": "3.36", "wpb": "118008", "bsz": "256", "num_updates": "609000", "lr": "3.94949e-05", "gnorm": "2.329", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "182168"} +[2022-08-01 13:26:59,121][train_inner][INFO] - {"epoch": 12, "update": 11.836, "loss": "2.078", "ppl": "4.22", "wps": "397284", "ups": "3.37", "wpb": "118040", "bsz": "256", "num_updates": "609200", "lr": "3.94747e-05", "gnorm": "2.269", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "182228"} +[2022-08-01 13:27:49,402][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 13:27:58,496][train_inner][INFO] - {"epoch": 12, "update": 11.84, "loss": "2.071", "ppl": "4.2", "wps": "397686", "ups": "3.37", "wpb": "118062", "bsz": "256", "num_updates": "609400", "lr": "3.94545e-05", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "182287"} +[2022-08-01 13:28:57,968][train_inner][INFO] - {"epoch": 12, "update": 11.844, "loss": "2.073", "ppl": "4.21", "wps": "396514", "ups": "3.36", "wpb": "117906", "bsz": "256", "num_updates": "609600", "lr": "3.94343e-05", "gnorm": "2.18", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "182346"} +[2022-08-01 13:29:57,426][train_inner][INFO] - {"epoch": 12, "update": 11.848, "loss": "2.075", "ppl": "4.21", "wps": "398188", "ups": "3.36", "wpb": "118378", "bsz": "256", "num_updates": "609800", "lr": "3.94141e-05", "gnorm": "2.123", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "182406"} +[2022-08-01 13:30:57,378][train_inner][INFO] - {"epoch": 12, "update": 11.852, "loss": "2.066", "ppl": "4.19", "wps": "396245", "ups": "3.34", "wpb": "118776", "bsz": "256", "num_updates": "610000", "lr": "3.93939e-05", "gnorm": "2.259", "loss_scale": "2", "train_wall": "60", "gb_free": "22.2", "wall": "182466"} +[2022-08-01 13:31:56,751][train_inner][INFO] - {"epoch": 12, "update": 11.856, "loss": "2.078", "ppl": "4.22", "wps": "397980", "ups": "3.37", "wpb": "118147", "bsz": "256", "num_updates": "610200", "lr": "3.93737e-05", "gnorm": "2.193", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "182525"} +[2022-08-01 13:32:56,393][train_inner][INFO] - {"epoch": 12, "update": 11.86, "loss": "2.069", "ppl": "4.19", "wps": "397092", "ups": "3.35", "wpb": "118416", "bsz": "256", "num_updates": "610400", "lr": "3.93535e-05", "gnorm": "2.14", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "182585"} +[2022-08-01 13:33:55,651][train_inner][INFO] - {"epoch": 12, "update": 11.864, "loss": "2.075", "ppl": "4.21", "wps": "396953", "ups": "3.38", "wpb": "117611", "bsz": "256", "num_updates": "610600", "lr": "3.93333e-05", "gnorm": "2.194", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "182644"} +[2022-08-01 13:34:55,113][train_inner][INFO] - {"epoch": 12, "update": 11.868, "loss": "2.073", "ppl": "4.21", "wps": "396958", "ups": "3.36", "wpb": "118020", "bsz": "256", "num_updates": "610800", "lr": "3.93131e-05", "gnorm": "2.16", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "182704"} +[2022-08-01 13:35:54,330][train_inner][INFO] - {"epoch": 12, "update": 11.871, "loss": "2.071", "ppl": "4.2", "wps": "398378", "ups": "3.38", "wpb": "117952", "bsz": "256", "num_updates": "611000", "lr": "3.92929e-05", "gnorm": "2.118", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "182763"} +[2022-08-01 13:36:54,043][train_inner][INFO] - {"epoch": 12, "update": 11.875, "loss": "2.068", "ppl": "4.19", "wps": "397619", "ups": "3.35", "wpb": "118714", "bsz": "256", "num_updates": "611200", "lr": "3.92727e-05", "gnorm": "2.174", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "182822"} +[2022-08-01 13:37:53,499][train_inner][INFO] - {"epoch": 12, "update": 11.879, "loss": "2.066", "ppl": "4.19", "wps": "398211", "ups": "3.36", "wpb": "118380", "bsz": "256", "num_updates": "611400", "lr": "3.92525e-05", "gnorm": "2.166", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "182882"} +[2022-08-01 13:38:53,152][train_inner][INFO] - {"epoch": 12, "update": 11.883, "loss": "2.066", "ppl": "4.19", "wps": "397130", "ups": "3.35", "wpb": "118449", "bsz": "256", "num_updates": "611600", "lr": "3.92323e-05", "gnorm": "2.151", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "182942"} +[2022-08-01 13:39:52,660][train_inner][INFO] - {"epoch": 12, "update": 11.887, "loss": "2.078", "ppl": "4.22", "wps": "398597", "ups": "3.36", "wpb": "118598", "bsz": "256", "num_updates": "611800", "lr": "3.92121e-05", "gnorm": "2.169", "loss_scale": "4", "train_wall": "59", "gb_free": "26.5", "wall": "183001"} +[2022-08-01 13:40:52,128][train_inner][INFO] - {"epoch": 12, "update": 11.891, "loss": "2.067", "ppl": "4.19", "wps": "397552", "ups": "3.36", "wpb": "118207", "bsz": "256", "num_updates": "612000", "lr": "3.91919e-05", "gnorm": "2.104", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "183061"} +[2022-08-01 13:41:51,111][train_inner][INFO] - {"epoch": 12, "update": 11.895, "loss": "2.068", "ppl": "4.19", "wps": "400850", "ups": "3.39", "wpb": "118216", "bsz": "256", "num_updates": "612200", "lr": "3.91717e-05", "gnorm": "2.112", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "183120"} +[2022-08-01 13:42:50,803][train_inner][INFO] - {"epoch": 12, "update": 11.899, "loss": "2.066", "ppl": "4.19", "wps": "398410", "ups": "3.35", "wpb": "118910", "bsz": "256", "num_updates": "612400", "lr": "3.91515e-05", "gnorm": "2.1", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "183179"} +[2022-08-01 13:43:50,473][train_inner][INFO] - {"epoch": 12, "update": 11.903, "loss": "2.074", "ppl": "4.21", "wps": "397812", "ups": "3.35", "wpb": "118686", "bsz": "256", "num_updates": "612600", "lr": "3.91313e-05", "gnorm": "2.205", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "183239"} +[2022-08-01 13:44:50,196][train_inner][INFO] - {"epoch": 12, "update": 11.906, "loss": "2.075", "ppl": "4.21", "wps": "394326", "ups": "3.35", "wpb": "117750", "bsz": "255.9", "num_updates": "612800", "lr": "3.91111e-05", "gnorm": "2.236", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "183299"} +[2022-08-01 13:45:49,693][train_inner][INFO] - {"epoch": 12, "update": 11.91, "loss": "2.072", "ppl": "4.21", "wps": "395206", "ups": "3.36", "wpb": "117567", "bsz": "256", "num_updates": "613000", "lr": "3.90909e-05", "gnorm": "2.231", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "183358"} +[2022-08-01 13:46:49,137][train_inner][INFO] - {"epoch": 12, "update": 11.914, "loss": "2.069", "ppl": "4.2", "wps": "397590", "ups": "3.36", "wpb": "118170", "bsz": "256", "num_updates": "613200", "lr": "3.90707e-05", "gnorm": "2.188", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "183418"} +[2022-08-01 13:47:48,920][train_inner][INFO] - {"epoch": 12, "update": 11.918, "loss": "2.069", "ppl": "4.2", "wps": "397082", "ups": "3.35", "wpb": "118693", "bsz": "256", "num_updates": "613400", "lr": "3.90505e-05", "gnorm": "2.207", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "183477"} +[2022-08-01 13:48:08,394][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 13:48:48,716][train_inner][INFO] - {"epoch": 12, "update": 11.922, "loss": "2.068", "ppl": "4.19", "wps": "397153", "ups": "3.34", "wpb": "118740", "bsz": "256", "num_updates": "613600", "lr": "3.90303e-05", "gnorm": "2.3", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "183537"} +[2022-08-01 13:49:48,250][train_inner][INFO] - {"epoch": 12, "update": 11.926, "loss": "2.067", "ppl": "4.19", "wps": "398760", "ups": "3.36", "wpb": "118699", "bsz": "256", "num_updates": "613800", "lr": "3.90101e-05", "gnorm": "2.199", "loss_scale": "4", "train_wall": "59", "gb_free": "26.9", "wall": "183597"} +[2022-08-01 13:50:47,622][train_inner][INFO] - {"epoch": 12, "update": 11.93, "loss": "2.071", "ppl": "4.2", "wps": "398610", "ups": "3.37", "wpb": "118330", "bsz": "256", "num_updates": "614000", "lr": "3.89899e-05", "gnorm": "2.106", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "183656"} +[2022-08-01 13:51:47,933][train_inner][INFO] - {"epoch": 12, "update": 11.934, "loss": "2.076", "ppl": "4.22", "wps": "390265", "ups": "3.32", "wpb": "117687", "bsz": "256", "num_updates": "614200", "lr": "3.89697e-05", "gnorm": "2.312", "loss_scale": "4", "train_wall": "60", "gb_free": "22", "wall": "183716"} +[2022-08-01 13:52:47,473][train_inner][INFO] - {"epoch": 12, "update": 11.938, "loss": "2.068", "ppl": "4.19", "wps": "399241", "ups": "3.36", "wpb": "118853", "bsz": "256", "num_updates": "614400", "lr": "3.89495e-05", "gnorm": "2.215", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "183776"} +[2022-08-01 13:53:47,957][train_inner][INFO] - {"epoch": 12, "update": 11.941, "loss": "2.066", "ppl": "4.19", "wps": "391279", "ups": "3.31", "wpb": "118328", "bsz": "256", "num_updates": "614600", "lr": "3.89293e-05", "gnorm": "2.162", "loss_scale": "4", "train_wall": "60", "gb_free": "24.1", "wall": "183836"} +[2022-08-01 13:54:47,496][train_inner][INFO] - {"epoch": 12, "update": 11.945, "loss": "2.068", "ppl": "4.19", "wps": "396686", "ups": "3.36", "wpb": "118092", "bsz": "256", "num_updates": "614800", "lr": "3.89091e-05", "gnorm": "2.203", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "183896"} +[2022-08-01 13:55:46,793][train_inner][INFO] - {"epoch": 12, "update": 11.949, "loss": "2.07", "ppl": "4.2", "wps": "396777", "ups": "3.37", "wpb": "117637", "bsz": "256", "num_updates": "615000", "lr": "3.88889e-05", "gnorm": "2.136", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "183955"} +[2022-08-01 13:56:46,128][train_inner][INFO] - {"epoch": 12, "update": 11.953, "loss": "2.068", "ppl": "4.19", "wps": "400949", "ups": "3.37", "wpb": "118952", "bsz": "256", "num_updates": "615200", "lr": "3.88687e-05", "gnorm": "2.118", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "184015"} +[2022-08-01 13:57:45,867][train_inner][INFO] - {"epoch": 12, "update": 11.957, "loss": "2.068", "ppl": "4.19", "wps": "397540", "ups": "3.35", "wpb": "118743", "bsz": "256", "num_updates": "615400", "lr": "3.88485e-05", "gnorm": "2.199", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "184074"} +[2022-08-01 13:58:23,209][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 13:58:45,367][train_inner][INFO] - {"epoch": 12, "update": 11.961, "loss": "2.072", "ppl": "4.2", "wps": "397627", "ups": "3.36", "wpb": "118293", "bsz": "256", "num_updates": "615600", "lr": "3.88283e-05", "gnorm": "2.227", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "184134"} +[2022-08-01 13:59:44,712][train_inner][INFO] - {"epoch": 12, "update": 11.965, "loss": "2.07", "ppl": "4.2", "wps": "399699", "ups": "3.37", "wpb": "118599", "bsz": "256", "num_updates": "615800", "lr": "3.88081e-05", "gnorm": "2.115", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "184193"} +[2022-08-01 14:00:43,816][train_inner][INFO] - {"epoch": 12, "update": 11.969, "loss": "2.069", "ppl": "4.2", "wps": "397969", "ups": "3.38", "wpb": "117608", "bsz": "256", "num_updates": "616000", "lr": "3.87879e-05", "gnorm": "2.321", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "184252"} +[2022-08-01 14:01:43,512][train_inner][INFO] - {"epoch": 12, "update": 11.972, "loss": "2.063", "ppl": "4.18", "wps": "397451", "ups": "3.35", "wpb": "118630", "bsz": "256", "num_updates": "616200", "lr": "3.87677e-05", "gnorm": "2.219", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "184312"} +[2022-08-01 14:02:42,721][train_inner][INFO] - {"epoch": 12, "update": 11.976, "loss": "2.072", "ppl": "4.21", "wps": "401918", "ups": "3.38", "wpb": "118986", "bsz": "256", "num_updates": "616400", "lr": "3.87475e-05", "gnorm": "2.181", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "184371"} +[2022-08-01 14:03:42,189][train_inner][INFO] - {"epoch": 12, "update": 11.98, "loss": "2.063", "ppl": "4.18", "wps": "398161", "ups": "3.36", "wpb": "118389", "bsz": "256", "num_updates": "616600", "lr": "3.87273e-05", "gnorm": "2.167", "loss_scale": "4", "train_wall": "59", "gb_free": "28.1", "wall": "184431"} +[2022-08-01 14:04:41,702][train_inner][INFO] - {"epoch": 12, "update": 11.984, "loss": "2.073", "ppl": "4.21", "wps": "396638", "ups": "3.36", "wpb": "118024", "bsz": "256", "num_updates": "616800", "lr": "3.87071e-05", "gnorm": "2.195", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "184490"} +[2022-08-01 14:05:41,463][train_inner][INFO] - {"epoch": 12, "update": 11.988, "loss": "2.066", "ppl": "4.19", "wps": "397340", "ups": "3.35", "wpb": "118726", "bsz": "256", "num_updates": "617000", "lr": "3.86869e-05", "gnorm": "2.222", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "184550"} +[2022-08-01 14:06:40,583][train_inner][INFO] - {"epoch": 12, "update": 11.992, "loss": "2.069", "ppl": "4.2", "wps": "399203", "ups": "3.38", "wpb": "118005", "bsz": "256", "num_updates": "617200", "lr": "3.86667e-05", "gnorm": "2.161", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "184609"} +[2022-08-01 14:07:41,245][train_inner][INFO] - {"epoch": 12, "update": 11.996, "loss": "2.071", "ppl": "4.2", "wps": "389675", "ups": "3.3", "wpb": "118192", "bsz": "256", "num_updates": "617400", "lr": "3.86465e-05", "gnorm": "2.153", "loss_scale": "4", "train_wall": "60", "gb_free": "22.9", "wall": "184670"} +[2022-08-01 14:08:40,703][train_inner][INFO] - {"epoch": 12, "update": 12.0, "loss": "2.066", "ppl": "4.19", "wps": "395485", "ups": "3.36", "wpb": "117572", "bsz": "256", "num_updates": "617600", "lr": "3.86263e-05", "gnorm": "2.17", "loss_scale": "8", "train_wall": "59", "gb_free": "22.3", "wall": "184729"} +[2022-08-01 14:08:45,848][fairseq_cli.train][INFO] - end of epoch 12 (average epoch stats below) +[2022-08-01 14:08:45,848][train][INFO] - {"epoch": 12, "train_loss": "2.077", "train_ppl": "4.22", "train_wps": "395915", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "617617", "train_lr": "3.86245e-05", "train_gnorm": "2.28", "train_loss_scale": "8", "train_train_wall": "15228", "train_gb_free": "23.1", "train_wall": "184734"} +[2022-08-01 14:08:45,958][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 14:08:45,961][fairseq.trainer][INFO] - begin training epoch 13 +[2022-08-01 14:08:45,961][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 14:08:57,366][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 14:09:52,027][train_inner][INFO] - {"epoch": 13, "update": 12.004, "loss": "2.065", "ppl": "4.18", "wps": "332330", "ups": "2.8", "wpb": "118514", "bsz": "255.4", "num_updates": "617800", "lr": "3.86061e-05", "gnorm": "2.194", "loss_scale": "4", "train_wall": "61", "gb_free": "21.8", "wall": "184800"} +[2022-08-01 14:10:51,780][train_inner][INFO] - {"epoch": 13, "update": 12.007, "loss": "2.063", "ppl": "4.18", "wps": "397310", "ups": "3.35", "wpb": "118703", "bsz": "255.9", "num_updates": "618000", "lr": "3.85859e-05", "gnorm": "2.014", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "184860"} +[2022-08-01 14:11:51,247][train_inner][INFO] - {"epoch": 13, "update": 12.011, "loss": "2.066", "ppl": "4.19", "wps": "395980", "ups": "3.36", "wpb": "117738", "bsz": "256", "num_updates": "618200", "lr": "3.85657e-05", "gnorm": "2.098", "loss_scale": "4", "train_wall": "59", "gb_free": "23.5", "wall": "184920"} +[2022-08-01 14:12:50,872][train_inner][INFO] - {"epoch": 13, "update": 12.015, "loss": "2.069", "ppl": "4.2", "wps": "395102", "ups": "3.35", "wpb": "117788", "bsz": "256", "num_updates": "618400", "lr": "3.85455e-05", "gnorm": "2.175", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "184979"} +[2022-08-01 14:13:49,859][train_inner][INFO] - {"epoch": 13, "update": 12.019, "loss": "2.069", "ppl": "4.2", "wps": "400177", "ups": "3.39", "wpb": "118026", "bsz": "256", "num_updates": "618600", "lr": "3.85253e-05", "gnorm": "2.185", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "185038"} +[2022-08-01 14:14:50,279][train_inner][INFO] - {"epoch": 13, "update": 12.023, "loss": "2.065", "ppl": "4.18", "wps": "390924", "ups": "3.31", "wpb": "118082", "bsz": "256", "num_updates": "618800", "lr": "3.85051e-05", "gnorm": "2.174", "loss_scale": "4", "train_wall": "60", "gb_free": "21.5", "wall": "185099"} +[2022-08-01 14:15:49,861][train_inner][INFO] - {"epoch": 13, "update": 12.027, "loss": "2.063", "ppl": "4.18", "wps": "397231", "ups": "3.36", "wpb": "118339", "bsz": "256", "num_updates": "619000", "lr": "3.84848e-05", "gnorm": "2.164", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "185158"} +[2022-08-01 14:16:49,628][train_inner][INFO] - {"epoch": 13, "update": 12.031, "loss": "2.067", "ppl": "4.19", "wps": "395080", "ups": "3.35", "wpb": "118062", "bsz": "256", "num_updates": "619200", "lr": "3.84646e-05", "gnorm": "2.293", "loss_scale": "4", "train_wall": "59", "gb_free": "32.6", "wall": "185218"} +[2022-08-01 14:17:49,337][train_inner][INFO] - {"epoch": 13, "update": 12.035, "loss": "2.064", "ppl": "4.18", "wps": "396343", "ups": "3.35", "wpb": "118325", "bsz": "256", "num_updates": "619400", "lr": "3.84444e-05", "gnorm": "2.272", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "185278"} +[2022-08-01 14:18:48,825][train_inner][INFO] - {"epoch": 13, "update": 12.039, "loss": "2.068", "ppl": "4.19", "wps": "397498", "ups": "3.36", "wpb": "118232", "bsz": "256", "num_updates": "619600", "lr": "3.84242e-05", "gnorm": "2.178", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "185337"} +[2022-08-01 14:19:10,600][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 14:19:48,527][train_inner][INFO] - {"epoch": 13, "update": 12.042, "loss": "2.064", "ppl": "4.18", "wps": "394503", "ups": "3.35", "wpb": "117762", "bsz": "256", "num_updates": "619800", "lr": "3.8404e-05", "gnorm": "2.15", "loss_scale": "4", "train_wall": "59", "gb_free": "23.5", "wall": "185397"} +[2022-08-01 14:20:47,857][train_inner][INFO] - {"epoch": 13, "update": 12.046, "loss": "2.066", "ppl": "4.19", "wps": "400458", "ups": "3.37", "wpb": "118795", "bsz": "256", "num_updates": "620000", "lr": "3.83838e-05", "gnorm": "2.216", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "185456"} +[2022-08-01 14:21:47,540][train_inner][INFO] - {"epoch": 13, "update": 12.05, "loss": "2.064", "ppl": "4.18", "wps": "397671", "ups": "3.35", "wpb": "118670", "bsz": "256", "num_updates": "620200", "lr": "3.83636e-05", "gnorm": "2.12", "loss_scale": "4", "train_wall": "59", "gb_free": "23.5", "wall": "185516"} +[2022-08-01 14:22:46,965][train_inner][INFO] - {"epoch": 13, "update": 12.054, "loss": "2.059", "ppl": "4.17", "wps": "400746", "ups": "3.37", "wpb": "119071", "bsz": "256", "num_updates": "620400", "lr": "3.83434e-05", "gnorm": "2.063", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "185575"} +[2022-08-01 14:23:46,251][train_inner][INFO] - {"epoch": 13, "update": 12.058, "loss": "2.066", "ppl": "4.19", "wps": "399391", "ups": "3.37", "wpb": "118392", "bsz": "256", "num_updates": "620600", "lr": "3.83232e-05", "gnorm": "2.134", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "185635"} +[2022-08-01 14:24:45,954][train_inner][INFO] - {"epoch": 13, "update": 12.062, "loss": "2.068", "ppl": "4.19", "wps": "397120", "ups": "3.35", "wpb": "118545", "bsz": "256", "num_updates": "620800", "lr": "3.8303e-05", "gnorm": "2.176", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "185694"} +[2022-08-01 14:25:45,569][train_inner][INFO] - {"epoch": 13, "update": 12.066, "loss": "2.067", "ppl": "4.19", "wps": "395425", "ups": "3.35", "wpb": "117865", "bsz": "256", "num_updates": "621000", "lr": "3.82828e-05", "gnorm": "2.169", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "185754"} +[2022-08-01 14:26:45,199][train_inner][INFO] - {"epoch": 13, "update": 12.07, "loss": "2.056", "ppl": "4.16", "wps": "399584", "ups": "3.35", "wpb": "119136", "bsz": "256", "num_updates": "621200", "lr": "3.82626e-05", "gnorm": "2.395", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "185814"} +[2022-08-01 14:27:46,029][train_inner][INFO] - {"epoch": 13, "update": 12.074, "loss": "2.062", "ppl": "4.17", "wps": "388969", "ups": "3.29", "wpb": "118304", "bsz": "256", "num_updates": "621400", "lr": "3.82424e-05", "gnorm": "2.101", "loss_scale": "4", "train_wall": "60", "gb_free": "25.6", "wall": "185874"} +[2022-08-01 14:28:45,469][train_inner][INFO] - {"epoch": 13, "update": 12.077, "loss": "2.065", "ppl": "4.18", "wps": "400532", "ups": "3.36", "wpb": "119037", "bsz": "256", "num_updates": "621600", "lr": "3.82222e-05", "gnorm": "2.233", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "185934"} +[2022-08-01 14:29:26,054][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 14:29:45,387][train_inner][INFO] - {"epoch": 13, "update": 12.081, "loss": "2.062", "ppl": "4.17", "wps": "395247", "ups": "3.34", "wpb": "118412", "bsz": "256", "num_updates": "621800", "lr": "3.8202e-05", "gnorm": "2.057", "loss_scale": "4", "train_wall": "60", "gb_free": "21.5", "wall": "185994"} +[2022-08-01 14:30:45,043][train_inner][INFO] - {"epoch": 13, "update": 12.085, "loss": "2.066", "ppl": "4.19", "wps": "398097", "ups": "3.35", "wpb": "118744", "bsz": "256", "num_updates": "622000", "lr": "3.81818e-05", "gnorm": "2.194", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "186053"} +[2022-08-01 14:31:44,313][train_inner][INFO] - {"epoch": 13, "update": 12.089, "loss": "2.065", "ppl": "4.19", "wps": "398036", "ups": "3.37", "wpb": "117955", "bsz": "256", "num_updates": "622200", "lr": "3.81616e-05", "gnorm": "2.101", "loss_scale": "4", "train_wall": "59", "gb_free": "32.9", "wall": "186113"} +[2022-08-01 14:32:43,550][train_inner][INFO] - {"epoch": 13, "update": 12.093, "loss": "2.069", "ppl": "4.19", "wps": "399680", "ups": "3.38", "wpb": "118378", "bsz": "256", "num_updates": "622400", "lr": "3.81414e-05", "gnorm": "2.156", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "186172"} +[2022-08-01 14:33:43,058][train_inner][INFO] - {"epoch": 13, "update": 12.097, "loss": "2.063", "ppl": "4.18", "wps": "398440", "ups": "3.36", "wpb": "118551", "bsz": "256", "num_updates": "622600", "lr": "3.81212e-05", "gnorm": "2.025", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "186231"} +[2022-08-01 14:34:42,539][train_inner][INFO] - {"epoch": 13, "update": 12.101, "loss": "2.064", "ppl": "4.18", "wps": "396989", "ups": "3.36", "wpb": "118067", "bsz": "256", "num_updates": "622800", "lr": "3.8101e-05", "gnorm": "2.283", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "186291"} +[2022-08-01 14:35:42,065][train_inner][INFO] - {"epoch": 13, "update": 12.105, "loss": "2.07", "ppl": "4.2", "wps": "398724", "ups": "3.36", "wpb": "118672", "bsz": "256", "num_updates": "623000", "lr": "3.80808e-05", "gnorm": "2.153", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "186350"} +[2022-08-01 14:36:41,809][train_inner][INFO] - {"epoch": 13, "update": 12.108, "loss": "2.071", "ppl": "4.2", "wps": "395029", "ups": "3.35", "wpb": "118002", "bsz": "256", "num_updates": "623200", "lr": "3.80606e-05", "gnorm": "2.306", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "186410"} +[2022-08-01 14:37:40,779][train_inner][INFO] - {"epoch": 13, "update": 12.112, "loss": "2.066", "ppl": "4.19", "wps": "397909", "ups": "3.39", "wpb": "117322", "bsz": "256", "num_updates": "623400", "lr": "3.80404e-05", "gnorm": "2.136", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "186469"} +[2022-08-01 14:38:40,094][train_inner][INFO] - {"epoch": 13, "update": 12.116, "loss": "2.066", "ppl": "4.19", "wps": "397018", "ups": "3.37", "wpb": "117746", "bsz": "256", "num_updates": "623600", "lr": "3.80202e-05", "gnorm": "2.132", "loss_scale": "4", "train_wall": "59", "gb_free": "26.4", "wall": "186529"} +[2022-08-01 14:39:39,408][train_inner][INFO] - {"epoch": 13, "update": 12.12, "loss": "2.062", "ppl": "4.18", "wps": "400474", "ups": "3.37", "wpb": "118767", "bsz": "256", "num_updates": "623800", "lr": "3.8e-05", "gnorm": "2.215", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "186588"} +[2022-08-01 14:39:47,563][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 14:40:39,470][train_inner][INFO] - {"epoch": 13, "update": 12.124, "loss": "2.061", "ppl": "4.17", "wps": "396178", "ups": "3.33", "wpb": "118976", "bsz": "256", "num_updates": "624000", "lr": "3.79798e-05", "gnorm": "2.075", "loss_scale": "4", "train_wall": "60", "gb_free": "23.4", "wall": "186648"} +[2022-08-01 14:41:39,058][train_inner][INFO] - {"epoch": 13, "update": 12.128, "loss": "2.06", "ppl": "4.17", "wps": "398163", "ups": "3.36", "wpb": "118628", "bsz": "256", "num_updates": "624200", "lr": "3.79596e-05", "gnorm": "2.258", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "186707"} +[2022-08-01 14:42:38,207][train_inner][INFO] - {"epoch": 13, "update": 12.132, "loss": "2.065", "ppl": "4.18", "wps": "398777", "ups": "3.38", "wpb": "117935", "bsz": "256", "num_updates": "624400", "lr": "3.79394e-05", "gnorm": "2.341", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "186767"} +[2022-08-01 14:43:37,936][train_inner][INFO] - {"epoch": 13, "update": 12.136, "loss": "2.07", "ppl": "4.2", "wps": "394696", "ups": "3.35", "wpb": "117875", "bsz": "256", "num_updates": "624600", "lr": "3.79192e-05", "gnorm": "2.081", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "186826"} +[2022-08-01 14:44:37,090][train_inner][INFO] - {"epoch": 13, "update": 12.14, "loss": "2.07", "ppl": "4.2", "wps": "398046", "ups": "3.38", "wpb": "117728", "bsz": "256", "num_updates": "624800", "lr": "3.7899e-05", "gnorm": "2.174", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "186886"} +[2022-08-01 14:45:36,593][train_inner][INFO] - {"epoch": 13, "update": 12.143, "loss": "2.067", "ppl": "4.19", "wps": "396873", "ups": "3.36", "wpb": "118076", "bsz": "256", "num_updates": "625000", "lr": "3.78788e-05", "gnorm": "2.195", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "186945"} +[2022-08-01 14:46:36,399][train_inner][INFO] - {"epoch": 13, "update": 12.147, "loss": "2.063", "ppl": "4.18", "wps": "396907", "ups": "3.34", "wpb": "118686", "bsz": "256", "num_updates": "625200", "lr": "3.78586e-05", "gnorm": "2.069", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "187005"} +[2022-08-01 14:47:35,842][train_inner][INFO] - {"epoch": 13, "update": 12.151, "loss": "2.064", "ppl": "4.18", "wps": "399759", "ups": "3.36", "wpb": "118814", "bsz": "256", "num_updates": "625400", "lr": "3.78384e-05", "gnorm": "2.154", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "187064"} +[2022-08-01 14:48:35,466][train_inner][INFO] - {"epoch": 13, "update": 12.155, "loss": "2.061", "ppl": "4.17", "wps": "397724", "ups": "3.35", "wpb": "118568", "bsz": "256", "num_updates": "625600", "lr": "3.78182e-05", "gnorm": "2.161", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "187124"} +[2022-08-01 14:49:34,883][train_inner][INFO] - {"epoch": 13, "update": 12.159, "loss": "2.068", "ppl": "4.19", "wps": "398624", "ups": "3.37", "wpb": "118424", "bsz": "256", "num_updates": "625800", "lr": "3.7798e-05", "gnorm": "2.053", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "187183"} +[2022-08-01 14:50:01,473][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 14:50:34,691][train_inner][INFO] - {"epoch": 13, "update": 12.163, "loss": "2.063", "ppl": "4.18", "wps": "396192", "ups": "3.34", "wpb": "118476", "bsz": "256", "num_updates": "626000", "lr": "3.77778e-05", "gnorm": "2.164", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "187243"} +[2022-08-01 14:51:34,185][train_inner][INFO] - {"epoch": 13, "update": 12.167, "loss": "2.062", "ppl": "4.18", "wps": "397683", "ups": "3.36", "wpb": "118298", "bsz": "256", "num_updates": "626200", "lr": "3.77576e-05", "gnorm": "2.21", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "187303"} +[2022-08-01 14:52:33,061][train_inner][INFO] - {"epoch": 13, "update": 12.171, "loss": "2.071", "ppl": "4.2", "wps": "399541", "ups": "3.4", "wpb": "117617", "bsz": "256", "num_updates": "626400", "lr": "3.77374e-05", "gnorm": "2.284", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "187361"} +[2022-08-01 14:53:32,609][train_inner][INFO] - {"epoch": 13, "update": 12.175, "loss": "2.067", "ppl": "4.19", "wps": "395930", "ups": "3.36", "wpb": "117883", "bsz": "256", "num_updates": "626600", "lr": "3.77172e-05", "gnorm": "2.065", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "187421"} +[2022-08-01 14:54:32,323][train_inner][INFO] - {"epoch": 13, "update": 12.178, "loss": "2.067", "ppl": "4.19", "wps": "395172", "ups": "3.35", "wpb": "117985", "bsz": "256", "num_updates": "626800", "lr": "3.7697e-05", "gnorm": "2.062", "loss_scale": "4", "train_wall": "59", "gb_free": "24.7", "wall": "187481"} +[2022-08-01 14:55:32,272][train_inner][INFO] - {"epoch": 13, "update": 12.182, "loss": "2.061", "ppl": "4.17", "wps": "394720", "ups": "3.34", "wpb": "118316", "bsz": "256", "num_updates": "627000", "lr": "3.76768e-05", "gnorm": "2.095", "loss_scale": "4", "train_wall": "60", "gb_free": "21.8", "wall": "187541"} +[2022-08-01 14:56:31,961][train_inner][INFO] - {"epoch": 13, "update": 12.186, "loss": "2.06", "ppl": "4.17", "wps": "396046", "ups": "3.35", "wpb": "118196", "bsz": "256", "num_updates": "627200", "lr": "3.76566e-05", "gnorm": "2.067", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "187600"} +[2022-08-01 14:57:31,445][train_inner][INFO] - {"epoch": 13, "update": 12.19, "loss": "2.061", "ppl": "4.17", "wps": "397391", "ups": "3.36", "wpb": "118191", "bsz": "256", "num_updates": "627400", "lr": "3.76364e-05", "gnorm": "2.04", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "187660"} +[2022-08-01 14:58:30,486][train_inner][INFO] - {"epoch": 13, "update": 12.194, "loss": "2.065", "ppl": "4.18", "wps": "398240", "ups": "3.39", "wpb": "117561", "bsz": "256", "num_updates": "627600", "lr": "3.76162e-05", "gnorm": "2.114", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "187719"} +[2022-08-01 14:59:29,923][train_inner][INFO] - {"epoch": 13, "update": 12.198, "loss": "2.066", "ppl": "4.19", "wps": "399132", "ups": "3.36", "wpb": "118617", "bsz": "256", "num_updates": "627800", "lr": "3.7596e-05", "gnorm": "2.066", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "187778"} +[2022-08-01 15:00:14,883][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:00:29,819][train_inner][INFO] - {"epoch": 13, "update": 12.202, "loss": "2.064", "ppl": "4.18", "wps": "392896", "ups": "3.34", "wpb": "117663", "bsz": "256", "num_updates": "628000", "lr": "3.75758e-05", "gnorm": "2.179", "loss_scale": "4", "train_wall": "60", "gb_free": "23.6", "wall": "187838"} +[2022-08-01 15:01:29,354][train_inner][INFO] - {"epoch": 13, "update": 12.206, "loss": "2.062", "ppl": "4.17", "wps": "398450", "ups": "3.36", "wpb": "118608", "bsz": "256", "num_updates": "628200", "lr": "3.75556e-05", "gnorm": "2.101", "loss_scale": "4", "train_wall": "59", "gb_free": "26.4", "wall": "187898"} +[2022-08-01 15:02:29,088][train_inner][INFO] - {"epoch": 13, "update": 12.21, "loss": "2.065", "ppl": "4.18", "wps": "394304", "ups": "3.35", "wpb": "117766", "bsz": "256", "num_updates": "628400", "lr": "3.75354e-05", "gnorm": "2.092", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "187958"} +[2022-08-01 15:03:28,603][train_inner][INFO] - {"epoch": 13, "update": 12.213, "loss": "2.061", "ppl": "4.17", "wps": "397899", "ups": "3.36", "wpb": "118404", "bsz": "256", "num_updates": "628600", "lr": "3.75152e-05", "gnorm": "2.17", "loss_scale": "4", "train_wall": "59", "gb_free": "28.8", "wall": "188017"} +[2022-08-01 15:04:27,875][train_inner][INFO] - {"epoch": 13, "update": 12.217, "loss": "2.057", "ppl": "4.16", "wps": "400116", "ups": "3.37", "wpb": "118577", "bsz": "256", "num_updates": "628800", "lr": "3.74949e-05", "gnorm": "2.273", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "188076"} +[2022-08-01 15:05:27,296][train_inner][INFO] - {"epoch": 13, "update": 12.221, "loss": "2.065", "ppl": "4.18", "wps": "397903", "ups": "3.37", "wpb": "118218", "bsz": "256", "num_updates": "629000", "lr": "3.74747e-05", "gnorm": "2.176", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "188136"} +[2022-08-01 15:06:26,737][train_inner][INFO] - {"epoch": 13, "update": 12.225, "loss": "2.067", "ppl": "4.19", "wps": "397580", "ups": "3.36", "wpb": "118162", "bsz": "256", "num_updates": "629200", "lr": "3.74545e-05", "gnorm": "2.161", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "188195"} +[2022-08-01 15:07:25,840][train_inner][INFO] - {"epoch": 13, "update": 12.229, "loss": "2.059", "ppl": "4.17", "wps": "399935", "ups": "3.38", "wpb": "118186", "bsz": "256", "num_updates": "629400", "lr": "3.74343e-05", "gnorm": "2.039", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "188254"} +[2022-08-01 15:08:25,106][train_inner][INFO] - {"epoch": 13, "update": 12.233, "loss": "2.063", "ppl": "4.18", "wps": "398989", "ups": "3.37", "wpb": "118232", "bsz": "256", "num_updates": "629600", "lr": "3.74141e-05", "gnorm": "2.051", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "188314"} +[2022-08-01 15:09:24,152][train_inner][INFO] - {"epoch": 13, "update": 12.237, "loss": "2.063", "ppl": "4.18", "wps": "398887", "ups": "3.39", "wpb": "117762", "bsz": "256", "num_updates": "629800", "lr": "3.73939e-05", "gnorm": "2.061", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "188373"} +[2022-08-01 15:10:23,524][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:10:24,132][train_inner][INFO] - {"epoch": 13, "update": 12.241, "loss": "2.061", "ppl": "4.17", "wps": "394090", "ups": "3.33", "wpb": "118187", "bsz": "256", "num_updates": "630000", "lr": "3.73737e-05", "gnorm": "2.181", "loss_scale": "4", "train_wall": "60", "gb_free": "25.9", "wall": "188433"} +[2022-08-01 15:11:23,516][train_inner][INFO] - {"epoch": 13, "update": 12.245, "loss": "2.066", "ppl": "4.19", "wps": "398248", "ups": "3.37", "wpb": "118248", "bsz": "256", "num_updates": "630200", "lr": "3.73535e-05", "gnorm": "2.115", "loss_scale": "4", "train_wall": "59", "gb_free": "28.1", "wall": "188492"} +[2022-08-01 15:12:22,705][train_inner][INFO] - {"epoch": 13, "update": 12.248, "loss": "2.061", "ppl": "4.17", "wps": "401044", "ups": "3.38", "wpb": "118687", "bsz": "256", "num_updates": "630400", "lr": "3.73333e-05", "gnorm": "2.155", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "188551"} +[2022-08-01 15:13:22,130][train_inner][INFO] - {"epoch": 13, "update": 12.252, "loss": "2.061", "ppl": "4.17", "wps": "399118", "ups": "3.37", "wpb": "118586", "bsz": "256", "num_updates": "630600", "lr": "3.73131e-05", "gnorm": "2.048", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "188611"} +[2022-08-01 15:14:21,708][train_inner][INFO] - {"epoch": 13, "update": 12.256, "loss": "2.059", "ppl": "4.17", "wps": "398234", "ups": "3.36", "wpb": "118630", "bsz": "256", "num_updates": "630800", "lr": "3.72929e-05", "gnorm": "2.038", "loss_scale": "4", "train_wall": "59", "gb_free": "23.9", "wall": "188670"} +[2022-08-01 15:15:21,228][train_inner][INFO] - {"epoch": 13, "update": 12.26, "loss": "2.065", "ppl": "4.18", "wps": "397883", "ups": "3.36", "wpb": "118407", "bsz": "256", "num_updates": "631000", "lr": "3.72727e-05", "gnorm": "2.113", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "188730"} +[2022-08-01 15:16:20,577][train_inner][INFO] - {"epoch": 13, "update": 12.264, "loss": "2.065", "ppl": "4.19", "wps": "398138", "ups": "3.37", "wpb": "118146", "bsz": "256", "num_updates": "631200", "lr": "3.72525e-05", "gnorm": "2.064", "loss_scale": "4", "train_wall": "59", "gb_free": "25.1", "wall": "188789"} +[2022-08-01 15:17:20,031][train_inner][INFO] - {"epoch": 13, "update": 12.268, "loss": "2.057", "ppl": "4.16", "wps": "399268", "ups": "3.36", "wpb": "118689", "bsz": "256", "num_updates": "631400", "lr": "3.72323e-05", "gnorm": "2.203", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "188848"} +[2022-08-01 15:18:20,630][train_inner][INFO] - {"epoch": 13, "update": 12.272, "loss": "2.069", "ppl": "4.2", "wps": "388599", "ups": "3.3", "wpb": "117742", "bsz": "256", "num_updates": "631600", "lr": "3.72121e-05", "gnorm": "2.133", "loss_scale": "4", "train_wall": "60", "gb_free": "21.9", "wall": "188909"} +[2022-08-01 15:19:19,677][train_inner][INFO] - {"epoch": 13, "update": 12.276, "loss": "2.061", "ppl": "4.17", "wps": "401797", "ups": "3.39", "wpb": "118626", "bsz": "256", "num_updates": "631800", "lr": "3.71919e-05", "gnorm": "2.192", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "188968"} +[2022-08-01 15:20:19,158][train_inner][INFO] - {"epoch": 13, "update": 12.279, "loss": "2.061", "ppl": "4.17", "wps": "398022", "ups": "3.36", "wpb": "118372", "bsz": "256", "num_updates": "632000", "lr": "3.71717e-05", "gnorm": "2.16", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "189028"} +[2022-08-01 15:20:33,735][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:21:18,703][train_inner][INFO] - {"epoch": 13, "update": 12.283, "loss": "2.066", "ppl": "4.19", "wps": "398198", "ups": "3.36", "wpb": "118553", "bsz": "256", "num_updates": "632200", "lr": "3.71515e-05", "gnorm": "2.085", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "189087"} +[2022-08-01 15:22:18,038][train_inner][INFO] - {"epoch": 13, "update": 12.287, "loss": "2.065", "ppl": "4.18", "wps": "398251", "ups": "3.37", "wpb": "118150", "bsz": "256", "num_updates": "632400", "lr": "3.71313e-05", "gnorm": "2.101", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "189146"} +[2022-08-01 15:23:17,632][train_inner][INFO] - {"epoch": 13, "update": 12.291, "loss": "2.065", "ppl": "4.18", "wps": "397227", "ups": "3.36", "wpb": "118360", "bsz": "256", "num_updates": "632600", "lr": "3.71111e-05", "gnorm": "2.116", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "189206"} +[2022-08-01 15:24:16,777][train_inner][INFO] - {"epoch": 13, "update": 12.295, "loss": "2.068", "ppl": "4.19", "wps": "398376", "ups": "3.38", "wpb": "117810", "bsz": "256", "num_updates": "632800", "lr": "3.70909e-05", "gnorm": "2.16", "loss_scale": "4", "train_wall": "59", "gb_free": "26.8", "wall": "189265"} +[2022-08-01 15:25:16,299][train_inner][INFO] - {"epoch": 13, "update": 12.299, "loss": "2.059", "ppl": "4.17", "wps": "399598", "ups": "3.36", "wpb": "118924", "bsz": "256", "num_updates": "633000", "lr": "3.70707e-05", "gnorm": "2.111", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "189325"} +[2022-08-01 15:26:15,767][train_inner][INFO] - {"epoch": 13, "update": 12.303, "loss": "2.063", "ppl": "4.18", "wps": "398076", "ups": "3.36", "wpb": "118363", "bsz": "256", "num_updates": "633200", "lr": "3.70505e-05", "gnorm": "2.148", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "189384"} +[2022-08-01 15:27:15,249][train_inner][INFO] - {"epoch": 13, "update": 12.307, "loss": "2.063", "ppl": "4.18", "wps": "399786", "ups": "3.36", "wpb": "118899", "bsz": "256", "num_updates": "633400", "lr": "3.70303e-05", "gnorm": "2.091", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "189444"} +[2022-08-01 15:28:14,635][train_inner][INFO] - {"epoch": 13, "update": 12.311, "loss": "2.06", "ppl": "4.17", "wps": "397627", "ups": "3.37", "wpb": "118067", "bsz": "256", "num_updates": "633600", "lr": "3.70101e-05", "gnorm": "2.063", "loss_scale": "4", "train_wall": "59", "gb_free": "26", "wall": "189503"} +[2022-08-01 15:29:14,033][train_inner][INFO] - {"epoch": 13, "update": 12.314, "loss": "2.067", "ppl": "4.19", "wps": "397517", "ups": "3.37", "wpb": "118057", "bsz": "256", "num_updates": "633800", "lr": "3.69899e-05", "gnorm": "2.197", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "189562"} +[2022-08-01 15:30:13,562][train_inner][INFO] - {"epoch": 13, "update": 12.318, "loss": "2.058", "ppl": "4.16", "wps": "397360", "ups": "3.36", "wpb": "118272", "bsz": "256", "num_updates": "634000", "lr": "3.69697e-05", "gnorm": "2.16", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "189622"} +[2022-08-01 15:30:43,079][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:31:13,031][train_inner][INFO] - {"epoch": 13, "update": 12.322, "loss": "2.061", "ppl": "4.17", "wps": "396925", "ups": "3.36", "wpb": "118024", "bsz": "256", "num_updates": "634200", "lr": "3.69495e-05", "gnorm": "2.085", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "189681"} +[2022-08-01 15:32:12,968][train_inner][INFO] - {"epoch": 13, "update": 12.326, "loss": "2.062", "ppl": "4.18", "wps": "395190", "ups": "3.34", "wpb": "118431", "bsz": "256", "num_updates": "634400", "lr": "3.69293e-05", "gnorm": "2.102", "loss_scale": "4", "train_wall": "60", "gb_free": "24.7", "wall": "189741"} +[2022-08-01 15:33:12,062][train_inner][INFO] - {"epoch": 13, "update": 12.33, "loss": "2.063", "ppl": "4.18", "wps": "398756", "ups": "3.38", "wpb": "117821", "bsz": "256", "num_updates": "634600", "lr": "3.69091e-05", "gnorm": "2.127", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "189800"} +[2022-08-01 15:34:11,304][train_inner][INFO] - {"epoch": 13, "update": 12.334, "loss": "2.064", "ppl": "4.18", "wps": "399091", "ups": "3.38", "wpb": "118215", "bsz": "256", "num_updates": "634800", "lr": "3.68889e-05", "gnorm": "2.139", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "189860"} +[2022-08-01 15:35:10,797][train_inner][INFO] - {"epoch": 13, "update": 12.338, "loss": "2.058", "ppl": "4.16", "wps": "398132", "ups": "3.36", "wpb": "118428", "bsz": "256", "num_updates": "635000", "lr": "3.68687e-05", "gnorm": "2.07", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "189919"} +[2022-08-01 15:36:10,379][train_inner][INFO] - {"epoch": 13, "update": 12.342, "loss": "2.068", "ppl": "4.19", "wps": "396781", "ups": "3.36", "wpb": "118204", "bsz": "256", "num_updates": "635200", "lr": "3.68485e-05", "gnorm": "2.093", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "189979"} +[2022-08-01 15:37:10,070][train_inner][INFO] - {"epoch": 13, "update": 12.346, "loss": "2.054", "ppl": "4.15", "wps": "397808", "ups": "3.35", "wpb": "118728", "bsz": "256", "num_updates": "635400", "lr": "3.68283e-05", "gnorm": "2.038", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "190038"} +[2022-08-01 15:38:09,633][train_inner][INFO] - {"epoch": 13, "update": 12.349, "loss": "2.061", "ppl": "4.17", "wps": "398291", "ups": "3.36", "wpb": "118616", "bsz": "256", "num_updates": "635600", "lr": "3.68081e-05", "gnorm": "2.028", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "190098"} +[2022-08-01 15:39:09,179][train_inner][INFO] - {"epoch": 13, "update": 12.353, "loss": "2.064", "ppl": "4.18", "wps": "398639", "ups": "3.36", "wpb": "118684", "bsz": "256", "num_updates": "635800", "lr": "3.67879e-05", "gnorm": "2.315", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "190158"} +[2022-08-01 15:40:08,388][train_inner][INFO] - {"epoch": 13, "update": 12.357, "loss": "2.064", "ppl": "4.18", "wps": "397320", "ups": "3.38", "wpb": "117625", "bsz": "256", "num_updates": "636000", "lr": "3.67677e-05", "gnorm": "2.115", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "190217"} +[2022-08-01 15:41:04,039][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:41:07,841][train_inner][INFO] - {"epoch": 13, "update": 12.361, "loss": "2.062", "ppl": "4.18", "wps": "398058", "ups": "3.36", "wpb": "118328", "bsz": "256", "num_updates": "636200", "lr": "3.67475e-05", "gnorm": "2.168", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "190276"} +[2022-08-01 15:42:07,175][train_inner][INFO] - {"epoch": 13, "update": 12.365, "loss": "2.053", "ppl": "4.15", "wps": "399360", "ups": "3.37", "wpb": "118478", "bsz": "256", "num_updates": "636400", "lr": "3.67273e-05", "gnorm": "2.165", "loss_scale": "4", "train_wall": "59", "gb_free": "23.3", "wall": "190336"} +[2022-08-01 15:43:06,597][train_inner][INFO] - {"epoch": 13, "update": 12.369, "loss": "2.061", "ppl": "4.17", "wps": "398072", "ups": "3.37", "wpb": "118270", "bsz": "256", "num_updates": "636600", "lr": "3.67071e-05", "gnorm": "2.124", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "190395"} +[2022-08-01 15:44:05,899][train_inner][INFO] - {"epoch": 13, "update": 12.373, "loss": "2.064", "ppl": "4.18", "wps": "399645", "ups": "3.37", "wpb": "118498", "bsz": "256", "num_updates": "636800", "lr": "3.66869e-05", "gnorm": "2.225", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "190454"} +[2022-08-01 15:45:05,378][train_inner][INFO] - {"epoch": 13, "update": 12.377, "loss": "2.059", "ppl": "4.17", "wps": "395690", "ups": "3.36", "wpb": "117674", "bsz": "256", "num_updates": "637000", "lr": "3.66667e-05", "gnorm": "2.09", "loss_scale": "4", "train_wall": "59", "gb_free": "27.8", "wall": "190514"} +[2022-08-01 15:46:04,704][train_inner][INFO] - {"epoch": 13, "update": 12.381, "loss": "2.062", "ppl": "4.18", "wps": "397701", "ups": "3.37", "wpb": "117971", "bsz": "256", "num_updates": "637200", "lr": "3.66465e-05", "gnorm": "2.123", "loss_scale": "4", "train_wall": "59", "gb_free": "27.3", "wall": "190573"} +[2022-08-01 15:47:03,992][train_inner][INFO] - {"epoch": 13, "update": 12.384, "loss": "2.057", "ppl": "4.16", "wps": "399018", "ups": "3.37", "wpb": "118283", "bsz": "256", "num_updates": "637400", "lr": "3.66263e-05", "gnorm": "2.077", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "190632"} +[2022-08-01 15:48:03,578][train_inner][INFO] - {"epoch": 13, "update": 12.388, "loss": "2.062", "ppl": "4.17", "wps": "396858", "ups": "3.36", "wpb": "118235", "bsz": "256", "num_updates": "637600", "lr": "3.66061e-05", "gnorm": "2.057", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "190692"} +[2022-08-01 15:49:03,090][train_inner][INFO] - {"epoch": 13, "update": 12.392, "loss": "2.056", "ppl": "4.16", "wps": "397990", "ups": "3.36", "wpb": "118426", "bsz": "256", "num_updates": "637800", "lr": "3.65859e-05", "gnorm": "2.142", "loss_scale": "4", "train_wall": "59", "gb_free": "26.5", "wall": "190752"} +[2022-08-01 15:50:02,843][train_inner][INFO] - {"epoch": 13, "update": 12.396, "loss": "2.054", "ppl": "4.15", "wps": "398222", "ups": "3.35", "wpb": "118974", "bsz": "256", "num_updates": "638000", "lr": "3.65657e-05", "gnorm": "2.065", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "190811"} +[2022-08-01 15:51:02,351][train_inner][INFO] - {"epoch": 13, "update": 12.4, "loss": "2.059", "ppl": "4.17", "wps": "399550", "ups": "3.36", "wpb": "118881", "bsz": "256", "num_updates": "638200", "lr": "3.65455e-05", "gnorm": "2.018", "loss_scale": "4", "train_wall": "59", "gb_free": "26", "wall": "190871"} +[2022-08-01 15:51:38,919][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:52:02,315][train_inner][INFO] - {"epoch": 13, "update": 12.404, "loss": "2.063", "ppl": "4.18", "wps": "393592", "ups": "3.34", "wpb": "118006", "bsz": "256", "num_updates": "638400", "lr": "3.65253e-05", "gnorm": "2.137", "loss_scale": "4", "train_wall": "60", "gb_free": "23.5", "wall": "190931"} +[2022-08-01 15:53:01,925][train_inner][INFO] - {"epoch": 13, "update": 12.408, "loss": "2.061", "ppl": "4.17", "wps": "397889", "ups": "3.36", "wpb": "118590", "bsz": "256", "num_updates": "638600", "lr": "3.65051e-05", "gnorm": "2.097", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "190990"} +[2022-08-01 15:54:01,558][train_inner][INFO] - {"epoch": 13, "update": 12.412, "loss": "2.061", "ppl": "4.17", "wps": "396699", "ups": "3.35", "wpb": "118281", "bsz": "256", "num_updates": "638800", "lr": "3.64848e-05", "gnorm": "2.113", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "191050"} +[2022-08-01 15:55:01,066][train_inner][INFO] - {"epoch": 13, "update": 12.416, "loss": "2.059", "ppl": "4.17", "wps": "396680", "ups": "3.36", "wpb": "118028", "bsz": "256", "num_updates": "639000", "lr": "3.64646e-05", "gnorm": "2.098", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "191109"} +[2022-08-01 15:56:00,918][train_inner][INFO] - {"epoch": 13, "update": 12.419, "loss": "2.058", "ppl": "4.16", "wps": "397446", "ups": "3.34", "wpb": "118939", "bsz": "256", "num_updates": "639200", "lr": "3.64444e-05", "gnorm": "2.111", "loss_scale": "4", "train_wall": "60", "gb_free": "25", "wall": "191169"} +[2022-08-01 15:57:00,213][train_inner][INFO] - {"epoch": 13, "update": 12.423, "loss": "2.061", "ppl": "4.17", "wps": "397204", "ups": "3.37", "wpb": "117761", "bsz": "256", "num_updates": "639400", "lr": "3.64242e-05", "gnorm": "2.113", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "191229"} +[2022-08-01 15:57:59,775][train_inner][INFO] - {"epoch": 13, "update": 12.427, "loss": "2.062", "ppl": "4.18", "wps": "398801", "ups": "3.36", "wpb": "118765", "bsz": "256", "num_updates": "639600", "lr": "3.6404e-05", "gnorm": "2.227", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "191288"} +[2022-08-01 15:58:58,990][train_inner][INFO] - {"epoch": 13, "update": 12.431, "loss": "2.06", "ppl": "4.17", "wps": "398730", "ups": "3.38", "wpb": "118054", "bsz": "256", "num_updates": "639800", "lr": "3.63838e-05", "gnorm": "2.038", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "191347"} +[2022-08-01 15:59:58,264][train_inner][INFO] - {"epoch": 13, "update": 12.435, "loss": "2.062", "ppl": "4.17", "wps": "400321", "ups": "3.37", "wpb": "118643", "bsz": "256", "num_updates": "640000", "lr": "3.63636e-05", "gnorm": "2.15", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "191407"} +[2022-08-01 16:00:59,099][train_inner][INFO] - {"epoch": 13, "update": 12.439, "loss": "2.059", "ppl": "4.17", "wps": "386269", "ups": "3.29", "wpb": "117491", "bsz": "256", "num_updates": "640200", "lr": "3.63434e-05", "gnorm": "2.179", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "191468"} +[2022-08-01 16:01:58,362][train_inner][INFO] - {"epoch": 13, "update": 12.443, "loss": "2.064", "ppl": "4.18", "wps": "398473", "ups": "3.37", "wpb": "118074", "bsz": "256", "num_updates": "640400", "lr": "3.63232e-05", "gnorm": "2.093", "loss_scale": "8", "train_wall": "59", "gb_free": "28.2", "wall": "191527"} +[2022-08-01 16:02:09,864][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 16:02:58,190][train_inner][INFO] - {"epoch": 13, "update": 12.447, "loss": "2.064", "ppl": "4.18", "wps": "393377", "ups": "3.34", "wpb": "117673", "bsz": "256", "num_updates": "640600", "lr": "3.6303e-05", "gnorm": "2.126", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "191587"} +[2022-08-01 16:03:57,617][train_inner][INFO] - {"epoch": 13, "update": 12.45, "loss": "2.062", "ppl": "4.17", "wps": "398379", "ups": "3.37", "wpb": "118372", "bsz": "256", "num_updates": "640800", "lr": "3.62828e-05", "gnorm": "2.058", "loss_scale": "4", "train_wall": "59", "gb_free": "28.6", "wall": "191646"} +[2022-08-01 16:04:56,976][train_inner][INFO] - {"epoch": 13, "update": 12.454, "loss": "2.056", "ppl": "4.16", "wps": "400036", "ups": "3.37", "wpb": "118728", "bsz": "256", "num_updates": "641000", "lr": "3.62626e-05", "gnorm": "2.064", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "191705"} +[2022-08-01 16:05:57,730][train_inner][INFO] - {"epoch": 13, "update": 12.458, "loss": "2.066", "ppl": "4.19", "wps": "388685", "ups": "3.29", "wpb": "118070", "bsz": "256", "num_updates": "641200", "lr": "3.62424e-05", "gnorm": "2.158", "loss_scale": "4", "train_wall": "60", "gb_free": "22.3", "wall": "191766"} +[2022-08-01 16:06:57,628][train_inner][INFO] - {"epoch": 13, "update": 12.462, "loss": "2.059", "ppl": "4.17", "wps": "394660", "ups": "3.34", "wpb": "118195", "bsz": "256", "num_updates": "641400", "lr": "3.62222e-05", "gnorm": "1.97", "loss_scale": "4", "train_wall": "60", "gb_free": "21.7", "wall": "191826"} +[2022-08-01 16:07:57,120][train_inner][INFO] - {"epoch": 13, "update": 12.466, "loss": "2.062", "ppl": "4.17", "wps": "396695", "ups": "3.36", "wpb": "118001", "bsz": "256", "num_updates": "641600", "lr": "3.6202e-05", "gnorm": "2.043", "loss_scale": "4", "train_wall": "59", "gb_free": "25", "wall": "191886"} +[2022-08-01 16:08:57,433][train_inner][INFO] - {"epoch": 13, "update": 12.47, "loss": "2.06", "ppl": "4.17", "wps": "391038", "ups": "3.32", "wpb": "117922", "bsz": "256", "num_updates": "641800", "lr": "3.61818e-05", "gnorm": "2.068", "loss_scale": "4", "train_wall": "60", "gb_free": "26.1", "wall": "191946"} +[2022-08-01 16:09:57,137][train_inner][INFO] - {"epoch": 13, "update": 12.474, "loss": "2.064", "ppl": "4.18", "wps": "394939", "ups": "3.35", "wpb": "117897", "bsz": "256", "num_updates": "642000", "lr": "3.61616e-05", "gnorm": "2.039", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "192006"} +[2022-08-01 16:10:56,832][train_inner][INFO] - {"epoch": 13, "update": 12.478, "loss": "2.056", "ppl": "4.16", "wps": "395568", "ups": "3.35", "wpb": "118066", "bsz": "256", "num_updates": "642200", "lr": "3.61414e-05", "gnorm": "2.094", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "192065"} +[2022-08-01 16:11:56,619][train_inner][INFO] - {"epoch": 13, "update": 12.482, "loss": "2.056", "ppl": "4.16", "wps": "394566", "ups": "3.35", "wpb": "117950", "bsz": "256", "num_updates": "642400", "lr": "3.61212e-05", "gnorm": "2.066", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "192125"} +[2022-08-01 16:12:25,718][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 16:12:56,176][train_inner][INFO] - {"epoch": 13, "update": 12.485, "loss": "2.056", "ppl": "4.16", "wps": "397685", "ups": "3.36", "wpb": "118423", "bsz": "256", "num_updates": "642600", "lr": "3.6101e-05", "gnorm": "2.154", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "192185"} +[2022-08-01 16:13:55,686][train_inner][INFO] - {"epoch": 13, "update": 12.489, "loss": "2.057", "ppl": "4.16", "wps": "398792", "ups": "3.36", "wpb": "118660", "bsz": "256", "num_updates": "642800", "lr": "3.60808e-05", "gnorm": "2.111", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "192244"} +[2022-08-01 16:14:55,297][train_inner][INFO] - {"epoch": 13, "update": 12.493, "loss": "2.054", "ppl": "4.15", "wps": "396459", "ups": "3.36", "wpb": "118166", "bsz": "256", "num_updates": "643000", "lr": "3.60606e-05", "gnorm": "2.03", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "192304"} +[2022-08-01 16:15:54,950][train_inner][INFO] - {"epoch": 13, "update": 12.497, "loss": "2.058", "ppl": "4.16", "wps": "398668", "ups": "3.35", "wpb": "118908", "bsz": "256", "num_updates": "643200", "lr": "3.60404e-05", "gnorm": "2.1", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "192363"} +[2022-08-01 16:16:54,568][train_inner][INFO] - {"epoch": 13, "update": 12.501, "loss": "2.06", "ppl": "4.17", "wps": "395923", "ups": "3.35", "wpb": "118019", "bsz": "256", "num_updates": "643400", "lr": "3.60202e-05", "gnorm": "2.095", "loss_scale": "4", "train_wall": "59", "gb_free": "26", "wall": "192423"} +[2022-08-01 16:17:54,218][train_inner][INFO] - {"epoch": 13, "update": 12.505, "loss": "2.058", "ppl": "4.17", "wps": "396856", "ups": "3.35", "wpb": "118361", "bsz": "256", "num_updates": "643600", "lr": "3.6e-05", "gnorm": "2.095", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "192483"} +[2022-08-01 16:18:53,796][train_inner][INFO] - {"epoch": 13, "update": 12.509, "loss": "2.056", "ppl": "4.16", "wps": "395862", "ups": "3.36", "wpb": "117923", "bsz": "256", "num_updates": "643800", "lr": "3.59798e-05", "gnorm": "2.136", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "192542"} +[2022-08-01 16:19:53,593][train_inner][INFO] - {"epoch": 13, "update": 12.513, "loss": "2.055", "ppl": "4.15", "wps": "397228", "ups": "3.34", "wpb": "118765", "bsz": "256", "num_updates": "644000", "lr": "3.59596e-05", "gnorm": "2.071", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "192602"} +[2022-08-01 16:20:52,711][train_inner][INFO] - {"epoch": 13, "update": 12.517, "loss": "2.061", "ppl": "4.17", "wps": "402364", "ups": "3.38", "wpb": "118934", "bsz": "256", "num_updates": "644200", "lr": "3.59394e-05", "gnorm": "2.065", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "192661"} +[2022-08-01 16:21:52,367][train_inner][INFO] - {"epoch": 13, "update": 12.52, "loss": "2.061", "ppl": "4.17", "wps": "396656", "ups": "3.35", "wpb": "118313", "bsz": "256", "num_updates": "644400", "lr": "3.59192e-05", "gnorm": "1.988", "loss_scale": "4", "train_wall": "59", "gb_free": "25.9", "wall": "192721"} +[2022-08-01 16:22:51,950][train_inner][INFO] - {"epoch": 13, "update": 12.524, "loss": "2.059", "ppl": "4.17", "wps": "396522", "ups": "3.36", "wpb": "118130", "bsz": "256", "num_updates": "644600", "lr": "3.5899e-05", "gnorm": "2.095", "loss_scale": "8", "train_wall": "59", "gb_free": "24.6", "wall": "192780"} +[2022-08-01 16:23:51,199][train_inner][INFO] - {"epoch": 13, "update": 12.528, "loss": "2.057", "ppl": "4.16", "wps": "399378", "ups": "3.38", "wpb": "118312", "bsz": "256", "num_updates": "644800", "lr": "3.58788e-05", "gnorm": "2.11", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "192840"} +[2022-08-01 16:24:50,480][train_inner][INFO] - {"epoch": 13, "update": 12.532, "loss": "2.056", "ppl": "4.16", "wps": "398823", "ups": "3.37", "wpb": "118214", "bsz": "256", "num_updates": "645000", "lr": "3.58586e-05", "gnorm": "2.139", "loss_scale": "8", "train_wall": "59", "gb_free": "22.5", "wall": "192899"} +[2022-08-01 16:25:49,907][train_inner][INFO] - {"epoch": 13, "update": 12.536, "loss": "2.06", "ppl": "4.17", "wps": "397950", "ups": "3.37", "wpb": "118243", "bsz": "256", "num_updates": "645200", "lr": "3.58384e-05", "gnorm": "1.99", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "192958"} +[2022-08-01 16:26:14,617][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 16:26:49,872][train_inner][INFO] - {"epoch": 13, "update": 12.54, "loss": "2.056", "ppl": "4.16", "wps": "393375", "ups": "3.34", "wpb": "117943", "bsz": "256", "num_updates": "645400", "lr": "3.58182e-05", "gnorm": "2.147", "loss_scale": "4", "train_wall": "60", "gb_free": "26.6", "wall": "193018"} +[2022-08-01 16:27:49,073][train_inner][INFO] - {"epoch": 13, "update": 12.544, "loss": "2.067", "ppl": "4.19", "wps": "397601", "ups": "3.38", "wpb": "117690", "bsz": "256", "num_updates": "645600", "lr": "3.5798e-05", "gnorm": "2.141", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "193078"} +[2022-08-01 16:28:48,421][train_inner][INFO] - {"epoch": 13, "update": 12.548, "loss": "2.057", "ppl": "4.16", "wps": "398474", "ups": "3.37", "wpb": "118244", "bsz": "256", "num_updates": "645800", "lr": "3.57778e-05", "gnorm": "2.058", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "193137"} +[2022-08-01 16:29:48,283][train_inner][INFO] - {"epoch": 13, "update": 12.552, "loss": "2.05", "ppl": "4.14", "wps": "396136", "ups": "3.34", "wpb": "118566", "bsz": "256", "num_updates": "646000", "lr": "3.57576e-05", "gnorm": "2.042", "loss_scale": "4", "train_wall": "60", "gb_free": "21.5", "wall": "193197"} +[2022-08-01 16:30:47,143][train_inner][INFO] - {"epoch": 13, "update": 12.555, "loss": "2.059", "ppl": "4.17", "wps": "398757", "ups": "3.4", "wpb": "117354", "bsz": "256", "num_updates": "646200", "lr": "3.57374e-05", "gnorm": "2.196", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "193256"} +[2022-08-01 16:31:46,573][train_inner][INFO] - {"epoch": 13, "update": 12.559, "loss": "2.053", "ppl": "4.15", "wps": "400562", "ups": "3.37", "wpb": "119026", "bsz": "256", "num_updates": "646400", "lr": "3.57172e-05", "gnorm": "2.231", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "193315"} +[2022-08-01 16:32:46,034][train_inner][INFO] - {"epoch": 13, "update": 12.563, "loss": "2.057", "ppl": "4.16", "wps": "399000", "ups": "3.36", "wpb": "118624", "bsz": "256", "num_updates": "646600", "lr": "3.5697e-05", "gnorm": "2.066", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "193374"} +[2022-08-01 16:33:45,326][train_inner][INFO] - {"epoch": 13, "update": 12.567, "loss": "2.052", "ppl": "4.15", "wps": "400890", "ups": "3.37", "wpb": "118846", "bsz": "256", "num_updates": "646800", "lr": "3.56768e-05", "gnorm": "2.081", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "193434"} +[2022-08-01 16:34:44,620][train_inner][INFO] - {"epoch": 13, "update": 12.571, "loss": "2.056", "ppl": "4.16", "wps": "398084", "ups": "3.37", "wpb": "118019", "bsz": "256", "num_updates": "647000", "lr": "3.56566e-05", "gnorm": "1.995", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "193493"} +[2022-08-01 16:35:43,934][train_inner][INFO] - {"epoch": 13, "update": 12.575, "loss": "2.058", "ppl": "4.17", "wps": "399037", "ups": "3.37", "wpb": "118341", "bsz": "256", "num_updates": "647200", "lr": "3.56364e-05", "gnorm": "2.124", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "193552"} +[2022-08-01 16:36:44,829][train_inner][INFO] - {"epoch": 13, "update": 12.579, "loss": "2.055", "ppl": "4.15", "wps": "388556", "ups": "3.28", "wpb": "118306", "bsz": "256", "num_updates": "647400", "lr": "3.56162e-05", "gnorm": "2.141", "loss_scale": "8", "train_wall": "61", "gb_free": "24.4", "wall": "193613"} +[2022-08-01 16:37:44,327][train_inner][INFO] - {"epoch": 13, "update": 12.583, "loss": "2.058", "ppl": "4.17", "wps": "397657", "ups": "3.36", "wpb": "118297", "bsz": "256", "num_updates": "647600", "lr": "3.5596e-05", "gnorm": "2.085", "loss_scale": "8", "train_wall": "59", "gb_free": "26.5", "wall": "193673"} +[2022-08-01 16:37:57,647][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 16:38:18,709][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 16:38:44,314][train_inner][INFO] - {"epoch": 13, "update": 12.587, "loss": "2.054", "ppl": "4.15", "wps": "395665", "ups": "3.33", "wpb": "118673", "bsz": "256", "num_updates": "647800", "lr": "3.55758e-05", "gnorm": "2.083", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "193733"} +[2022-08-01 16:39:43,744][train_inner][INFO] - {"epoch": 13, "update": 12.59, "loss": "2.058", "ppl": "4.16", "wps": "398831", "ups": "3.37", "wpb": "118512", "bsz": "256", "num_updates": "648000", "lr": "3.55556e-05", "gnorm": "2.101", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "193792"} +[2022-08-01 16:40:43,128][train_inner][INFO] - {"epoch": 13, "update": 12.594, "loss": "2.06", "ppl": "4.17", "wps": "399691", "ups": "3.37", "wpb": "118676", "bsz": "256", "num_updates": "648200", "lr": "3.55354e-05", "gnorm": "2.102", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "193852"} +[2022-08-01 16:41:42,640][train_inner][INFO] - {"epoch": 13, "update": 12.598, "loss": "2.057", "ppl": "4.16", "wps": "397837", "ups": "3.36", "wpb": "118378", "bsz": "256", "num_updates": "648400", "lr": "3.55152e-05", "gnorm": "2.147", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "193911"} +[2022-08-01 16:42:42,270][train_inner][INFO] - {"epoch": 13, "update": 12.602, "loss": "2.054", "ppl": "4.15", "wps": "397949", "ups": "3.35", "wpb": "118648", "bsz": "256", "num_updates": "648600", "lr": "3.54949e-05", "gnorm": "2.046", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "193971"} +[2022-08-01 16:43:41,768][train_inner][INFO] - {"epoch": 13, "update": 12.606, "loss": "2.057", "ppl": "4.16", "wps": "398788", "ups": "3.36", "wpb": "118635", "bsz": "256", "num_updates": "648800", "lr": "3.54747e-05", "gnorm": "2.075", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "194030"} +[2022-08-01 16:44:41,081][train_inner][INFO] - {"epoch": 13, "update": 12.61, "loss": "2.057", "ppl": "4.16", "wps": "398870", "ups": "3.37", "wpb": "118291", "bsz": "256", "num_updates": "649000", "lr": "3.54545e-05", "gnorm": "2.169", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "194090"} +[2022-08-01 16:45:40,477][train_inner][INFO] - {"epoch": 13, "update": 12.614, "loss": "2.053", "ppl": "4.15", "wps": "399219", "ups": "3.37", "wpb": "118558", "bsz": "256", "num_updates": "649200", "lr": "3.54343e-05", "gnorm": "2.009", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "194149"} +[2022-08-01 16:46:39,811][train_inner][INFO] - {"epoch": 13, "update": 12.618, "loss": "2.053", "ppl": "4.15", "wps": "397364", "ups": "3.37", "wpb": "117887", "bsz": "256", "num_updates": "649400", "lr": "3.54141e-05", "gnorm": "2.058", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "194208"} +[2022-08-01 16:47:39,351][train_inner][INFO] - {"epoch": 13, "update": 12.621, "loss": "2.057", "ppl": "4.16", "wps": "396537", "ups": "3.36", "wpb": "118048", "bsz": "256", "num_updates": "649600", "lr": "3.53939e-05", "gnorm": "2.142", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "194268"} +[2022-08-01 16:48:39,204][train_inner][INFO] - {"epoch": 13, "update": 12.625, "loss": "2.057", "ppl": "4.16", "wps": "396600", "ups": "3.34", "wpb": "118688", "bsz": "256", "num_updates": "649800", "lr": "3.53737e-05", "gnorm": "2.118", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "194328"} +[2022-08-01 16:49:38,650][train_inner][INFO] - {"epoch": 13, "update": 12.629, "loss": "2.058", "ppl": "4.17", "wps": "398171", "ups": "3.36", "wpb": "118347", "bsz": "256", "num_updates": "650000", "lr": "3.53535e-05", "gnorm": "2.125", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "194387"} +[2022-08-01 16:49:38,651][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 16:50:01,327][valid][INFO] - {"epoch": 13, "valid_loss": "1.945", "valid_ppl": "3.85", "valid_wps": "1.57992e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "650000", "valid_best_loss": "1.945"} +[2022-08-01 16:50:01,329][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 13 @ 650000 updates +[2022-08-01 16:50:01,330][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_13_650000.pt +[2022-08-01 16:50:07,631][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_13_650000.pt +[2022-08-01 16:50:30,077][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_13_650000.pt (epoch 13 @ 650000 updates, score 1.945) (writing took 28.747987696900964 seconds) +[2022-08-01 16:51:31,082][train_inner][INFO] - {"epoch": 13, "update": 12.633, "loss": "2.061", "ppl": "4.17", "wps": "210046", "ups": "1.78", "wpb": "118079", "bsz": "256", "num_updates": "650200", "lr": "3.53333e-05", "gnorm": "2.113", "loss_scale": "4", "train_wall": "61", "gb_free": "22.6", "wall": "194500"} +[2022-08-01 16:52:30,573][train_inner][INFO] - {"epoch": 13, "update": 12.637, "loss": "2.052", "ppl": "4.15", "wps": "399751", "ups": "3.36", "wpb": "118908", "bsz": "256", "num_updates": "650400", "lr": "3.53131e-05", "gnorm": "2.08", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "194559"} +[2022-08-01 16:53:30,116][train_inner][INFO] - {"epoch": 13, "update": 12.641, "loss": "2.052", "ppl": "4.15", "wps": "398905", "ups": "3.36", "wpb": "118759", "bsz": "256", "num_updates": "650600", "lr": "3.52929e-05", "gnorm": "2.182", "loss_scale": "4", "train_wall": "59", "gb_free": "28.6", "wall": "194619"} +[2022-08-01 16:54:29,667][train_inner][INFO] - {"epoch": 13, "update": 12.645, "loss": "2.059", "ppl": "4.17", "wps": "396274", "ups": "3.36", "wpb": "117992", "bsz": "256", "num_updates": "650800", "lr": "3.52727e-05", "gnorm": "2.004", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "194678"} +[2022-08-01 16:55:29,322][train_inner][INFO] - {"epoch": 13, "update": 12.649, "loss": "2.056", "ppl": "4.16", "wps": "397228", "ups": "3.35", "wpb": "118484", "bsz": "256", "num_updates": "651000", "lr": "3.52525e-05", "gnorm": "2.073", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "194738"} +[2022-08-01 16:56:29,295][train_inner][INFO] - {"epoch": 13, "update": 12.653, "loss": "2.057", "ppl": "4.16", "wps": "394145", "ups": "3.33", "wpb": "118188", "bsz": "256", "num_updates": "651200", "lr": "3.52323e-05", "gnorm": "2.088", "loss_scale": "4", "train_wall": "60", "gb_free": "21.8", "wall": "194798"} +[2022-08-01 16:57:29,021][train_inner][INFO] - {"epoch": 13, "update": 12.656, "loss": "2.056", "ppl": "4.16", "wps": "395546", "ups": "3.35", "wpb": "118122", "bsz": "256", "num_updates": "651400", "lr": "3.52121e-05", "gnorm": "2.086", "loss_scale": "4", "train_wall": "59", "gb_free": "28.4", "wall": "194857"} +[2022-08-01 16:58:28,418][train_inner][INFO] - {"epoch": 13, "update": 12.66, "loss": "2.059", "ppl": "4.17", "wps": "397695", "ups": "3.37", "wpb": "118109", "bsz": "256", "num_updates": "651600", "lr": "3.51919e-05", "gnorm": "2.031", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "194917"} +[2022-08-01 16:59:27,669][train_inner][INFO] - {"epoch": 13, "update": 12.664, "loss": "2.054", "ppl": "4.15", "wps": "399149", "ups": "3.38", "wpb": "118248", "bsz": "256", "num_updates": "651800", "lr": "3.51717e-05", "gnorm": "2.198", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "194976"} +[2022-08-01 17:00:26,891][train_inner][INFO] - {"epoch": 13, "update": 12.668, "loss": "2.055", "ppl": "4.15", "wps": "399910", "ups": "3.38", "wpb": "118416", "bsz": "256", "num_updates": "652000", "lr": "3.51515e-05", "gnorm": "2.167", "loss_scale": "8", "train_wall": "59", "gb_free": "23.9", "wall": "195035"} +[2022-08-01 17:01:26,468][train_inner][INFO] - {"epoch": 13, "update": 12.672, "loss": "2.056", "ppl": "4.16", "wps": "394941", "ups": "3.36", "wpb": "117647", "bsz": "256", "num_updates": "652200", "lr": "3.51313e-05", "gnorm": "2.128", "loss_scale": "8", "train_wall": "59", "gb_free": "24.6", "wall": "195095"} +[2022-08-01 17:02:26,283][train_inner][INFO] - {"epoch": 13, "update": 12.676, "loss": "2.052", "ppl": "4.15", "wps": "396048", "ups": "3.34", "wpb": "118447", "bsz": "256", "num_updates": "652400", "lr": "3.51111e-05", "gnorm": "2.026", "loss_scale": "8", "train_wall": "59", "gb_free": "24", "wall": "195155"} +[2022-08-01 17:03:25,795][train_inner][INFO] - {"epoch": 13, "update": 12.68, "loss": "2.06", "ppl": "4.17", "wps": "397865", "ups": "3.36", "wpb": "118388", "bsz": "256", "num_updates": "652600", "lr": "3.50909e-05", "gnorm": "2.051", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "195214"} +[2022-08-01 17:04:25,107][train_inner][INFO] - {"epoch": 13, "update": 12.684, "loss": "2.055", "ppl": "4.16", "wps": "398355", "ups": "3.37", "wpb": "118137", "bsz": "256", "num_updates": "652800", "lr": "3.50707e-05", "gnorm": "2.051", "loss_scale": "8", "train_wall": "59", "gb_free": "27.2", "wall": "195274"} +[2022-08-01 17:05:24,548][train_inner][INFO] - {"epoch": 13, "update": 12.688, "loss": "2.061", "ppl": "4.17", "wps": "398592", "ups": "3.36", "wpb": "118461", "bsz": "256", "num_updates": "653000", "lr": "3.50505e-05", "gnorm": "2.067", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "195333"} +[2022-08-01 17:06:24,837][train_inner][INFO] - {"epoch": 13, "update": 12.691, "loss": "2.052", "ppl": "4.15", "wps": "392076", "ups": "3.32", "wpb": "118189", "bsz": "255.9", "num_updates": "653200", "lr": "3.50303e-05", "gnorm": "2.173", "loss_scale": "8", "train_wall": "60", "gb_free": "22.1", "wall": "195393"} +[2022-08-01 17:07:24,533][train_inner][INFO] - {"epoch": 13, "update": 12.695, "loss": "2.056", "ppl": "4.16", "wps": "396498", "ups": "3.35", "wpb": "118345", "bsz": "256", "num_updates": "653400", "lr": "3.50101e-05", "gnorm": "2.012", "loss_scale": "8", "train_wall": "59", "gb_free": "21.9", "wall": "195453"} +[2022-08-01 17:07:43,309][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 17:08:24,467][train_inner][INFO] - {"epoch": 13, "update": 12.699, "loss": "2.054", "ppl": "4.15", "wps": "394140", "ups": "3.34", "wpb": "118112", "bsz": "256", "num_updates": "653600", "lr": "3.49899e-05", "gnorm": "2.091", "loss_scale": "4", "train_wall": "60", "gb_free": "21.9", "wall": "195513"} +[2022-08-01 17:09:24,137][train_inner][INFO] - {"epoch": 13, "update": 12.703, "loss": "2.06", "ppl": "4.17", "wps": "395037", "ups": "3.35", "wpb": "117858", "bsz": "256", "num_updates": "653800", "lr": "3.49697e-05", "gnorm": "2.046", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "195573"} +[2022-08-01 17:10:23,832][train_inner][INFO] - {"epoch": 13, "update": 12.707, "loss": "2.055", "ppl": "4.15", "wps": "396461", "ups": "3.35", "wpb": "118334", "bsz": "256", "num_updates": "654000", "lr": "3.49495e-05", "gnorm": "2.118", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "195632"} +[2022-08-01 17:11:23,143][train_inner][INFO] - {"epoch": 13, "update": 12.711, "loss": "2.06", "ppl": "4.17", "wps": "396239", "ups": "3.37", "wpb": "117505", "bsz": "256", "num_updates": "654200", "lr": "3.49293e-05", "gnorm": "2.07", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "195692"} +[2022-08-01 17:12:22,487][train_inner][INFO] - {"epoch": 13, "update": 12.715, "loss": "2.059", "ppl": "4.17", "wps": "397260", "ups": "3.37", "wpb": "117874", "bsz": "256", "num_updates": "654400", "lr": "3.49091e-05", "gnorm": "2.116", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "195751"} +[2022-08-01 17:13:22,133][train_inner][INFO] - {"epoch": 13, "update": 12.719, "loss": "2.053", "ppl": "4.15", "wps": "397420", "ups": "3.35", "wpb": "118523", "bsz": "256", "num_updates": "654600", "lr": "3.48889e-05", "gnorm": "2.161", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "195811"} +[2022-08-01 17:14:21,963][train_inner][INFO] - {"epoch": 13, "update": 12.722, "loss": "2.05", "ppl": "4.14", "wps": "396307", "ups": "3.34", "wpb": "118553", "bsz": "256", "num_updates": "654800", "lr": "3.48687e-05", "gnorm": "2.092", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "195870"} +[2022-08-01 17:15:21,370][train_inner][INFO] - {"epoch": 13, "update": 12.726, "loss": "2.055", "ppl": "4.16", "wps": "397683", "ups": "3.37", "wpb": "118126", "bsz": "256", "num_updates": "655000", "lr": "3.48485e-05", "gnorm": "2.091", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "195930"} +[2022-08-01 17:16:20,962][train_inner][INFO] - {"epoch": 13, "update": 12.73, "loss": "2.053", "ppl": "4.15", "wps": "397845", "ups": "3.36", "wpb": "118542", "bsz": "256", "num_updates": "655200", "lr": "3.48283e-05", "gnorm": "2.17", "loss_scale": "4", "train_wall": "59", "gb_free": "27.6", "wall": "195989"} +[2022-08-01 17:17:20,824][train_inner][INFO] - {"epoch": 13, "update": 12.734, "loss": "2.046", "ppl": "4.13", "wps": "396904", "ups": "3.34", "wpb": "118796", "bsz": "256", "num_updates": "655400", "lr": "3.48081e-05", "gnorm": "2.08", "loss_scale": "4", "train_wall": "60", "gb_free": "23.4", "wall": "196049"} +[2022-08-01 17:18:20,200][train_inner][INFO] - {"epoch": 13, "update": 12.738, "loss": "2.053", "ppl": "4.15", "wps": "399170", "ups": "3.37", "wpb": "118506", "bsz": "256", "num_updates": "655600", "lr": "3.47879e-05", "gnorm": "2.109", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "196109"} +[2022-08-01 17:19:19,697][train_inner][INFO] - {"epoch": 13, "update": 12.742, "loss": "2.057", "ppl": "4.16", "wps": "397583", "ups": "3.36", "wpb": "118274", "bsz": "256", "num_updates": "655800", "lr": "3.47677e-05", "gnorm": "2.095", "loss_scale": "8", "train_wall": "59", "gb_free": "23.2", "wall": "196168"} +[2022-08-01 17:20:19,415][train_inner][INFO] - {"epoch": 13, "update": 12.746, "loss": "2.05", "ppl": "4.14", "wps": "398312", "ups": "3.35", "wpb": "118930", "bsz": "256", "num_updates": "656000", "lr": "3.47475e-05", "gnorm": "2.035", "loss_scale": "8", "train_wall": "59", "gb_free": "26.7", "wall": "196228"} +[2022-08-01 17:21:19,631][train_inner][INFO] - {"epoch": 13, "update": 12.75, "loss": "2.058", "ppl": "4.16", "wps": "393768", "ups": "3.32", "wpb": "118556", "bsz": "256", "num_updates": "656200", "lr": "3.47273e-05", "gnorm": "2.071", "loss_scale": "8", "train_wall": "60", "gb_free": "21.7", "wall": "196288"} +[2022-08-01 17:21:57,951][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 17:22:19,454][train_inner][INFO] - {"epoch": 13, "update": 12.754, "loss": "2.057", "ppl": "4.16", "wps": "396164", "ups": "3.34", "wpb": "118498", "bsz": "256", "num_updates": "656400", "lr": "3.47071e-05", "gnorm": "2.086", "loss_scale": "4", "train_wall": "59", "gb_free": "27.4", "wall": "196348"} +[2022-08-01 17:23:19,337][train_inner][INFO] - {"epoch": 13, "update": 12.757, "loss": "2.054", "ppl": "4.15", "wps": "395518", "ups": "3.34", "wpb": "118423", "bsz": "256", "num_updates": "656600", "lr": "3.46869e-05", "gnorm": "2.001", "loss_scale": "4", "train_wall": "60", "gb_free": "27.6", "wall": "196408"} +[2022-08-01 17:24:18,679][train_inner][INFO] - {"epoch": 13, "update": 12.761, "loss": "2.047", "ppl": "4.13", "wps": "403035", "ups": "3.37", "wpb": "119584", "bsz": "256", "num_updates": "656800", "lr": "3.46667e-05", "gnorm": "2.015", "loss_scale": "4", "train_wall": "59", "gb_free": "27", "wall": "196467"} +[2022-08-01 17:25:18,649][train_inner][INFO] - {"epoch": 13, "update": 12.765, "loss": "2.046", "ppl": "4.13", "wps": "395268", "ups": "3.34", "wpb": "118520", "bsz": "256", "num_updates": "657000", "lr": "3.46465e-05", "gnorm": "2.086", "loss_scale": "4", "train_wall": "60", "gb_free": "26.6", "wall": "196527"} +[2022-08-01 17:26:17,991][train_inner][INFO] - {"epoch": 13, "update": 12.769, "loss": "2.05", "ppl": "4.14", "wps": "400845", "ups": "3.37", "wpb": "118933", "bsz": "256", "num_updates": "657200", "lr": "3.46263e-05", "gnorm": "2.181", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "196586"} +[2022-08-01 17:27:17,671][train_inner][INFO] - {"epoch": 13, "update": 12.773, "loss": "2.054", "ppl": "4.15", "wps": "397812", "ups": "3.35", "wpb": "118706", "bsz": "256", "num_updates": "657400", "lr": "3.46061e-05", "gnorm": "2.038", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "196646"} +[2022-08-01 17:28:17,030][train_inner][INFO] - {"epoch": 13, "update": 12.777, "loss": "2.055", "ppl": "4.16", "wps": "398230", "ups": "3.37", "wpb": "118192", "bsz": "256", "num_updates": "657600", "lr": "3.45859e-05", "gnorm": "2.065", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "196705"} +[2022-08-01 17:29:16,291][train_inner][INFO] - {"epoch": 13, "update": 12.781, "loss": "2.056", "ppl": "4.16", "wps": "398523", "ups": "3.37", "wpb": "118084", "bsz": "256", "num_updates": "657800", "lr": "3.45657e-05", "gnorm": "2.092", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "196765"} +[2022-08-01 17:30:15,762][train_inner][INFO] - {"epoch": 13, "update": 12.785, "loss": "2.055", "ppl": "4.16", "wps": "398797", "ups": "3.36", "wpb": "118585", "bsz": "256", "num_updates": "658000", "lr": "3.45455e-05", "gnorm": "2.001", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "196824"} +[2022-08-01 17:31:15,430][train_inner][INFO] - {"epoch": 13, "update": 12.789, "loss": "2.054", "ppl": "4.15", "wps": "396361", "ups": "3.35", "wpb": "118249", "bsz": "256", "num_updates": "658200", "lr": "3.45253e-05", "gnorm": "2.067", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "196884"} +[2022-08-01 17:32:14,967][train_inner][INFO] - {"epoch": 13, "update": 12.792, "loss": "2.055", "ppl": "4.16", "wps": "395842", "ups": "3.36", "wpb": "117835", "bsz": "256", "num_updates": "658400", "lr": "3.45051e-05", "gnorm": "2.006", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "196943"} +[2022-08-01 17:33:14,644][train_inner][INFO] - {"epoch": 13, "update": 12.796, "loss": "2.053", "ppl": "4.15", "wps": "396425", "ups": "3.35", "wpb": "118286", "bsz": "256", "num_updates": "658600", "lr": "3.44848e-05", "gnorm": "2.098", "loss_scale": "8", "train_wall": "59", "gb_free": "26.1", "wall": "197003"} +[2022-08-01 17:34:14,336][train_inner][INFO] - {"epoch": 13, "update": 12.8, "loss": "2.056", "ppl": "4.16", "wps": "395403", "ups": "3.35", "wpb": "118011", "bsz": "256", "num_updates": "658800", "lr": "3.44646e-05", "gnorm": "2.135", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "197063"} +[2022-08-01 17:35:13,829][train_inner][INFO] - {"epoch": 13, "update": 12.804, "loss": "2.052", "ppl": "4.15", "wps": "396679", "ups": "3.36", "wpb": "117997", "bsz": "256", "num_updates": "659000", "lr": "3.44444e-05", "gnorm": "2.008", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "197122"} +[2022-08-01 17:36:13,362][train_inner][INFO] - {"epoch": 13, "update": 12.808, "loss": "2.056", "ppl": "4.16", "wps": "396379", "ups": "3.36", "wpb": "117988", "bsz": "256", "num_updates": "659200", "lr": "3.44242e-05", "gnorm": "2.028", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "197182"} +[2022-08-01 17:37:12,897][train_inner][INFO] - {"epoch": 13, "update": 12.812, "loss": "2.05", "ppl": "4.14", "wps": "396750", "ups": "3.36", "wpb": "118101", "bsz": "256", "num_updates": "659400", "lr": "3.4404e-05", "gnorm": "2.19", "loss_scale": "8", "train_wall": "59", "gb_free": "25.9", "wall": "197241"} +[2022-08-01 17:38:12,620][train_inner][INFO] - {"epoch": 13, "update": 12.816, "loss": "2.054", "ppl": "4.15", "wps": "395743", "ups": "3.35", "wpb": "118174", "bsz": "256", "num_updates": "659600", "lr": "3.43838e-05", "gnorm": "1.992", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "197301"} +[2022-08-01 17:39:12,307][train_inner][INFO] - {"epoch": 13, "update": 12.82, "loss": "2.051", "ppl": "4.14", "wps": "395928", "ups": "3.35", "wpb": "118158", "bsz": "256", "num_updates": "659800", "lr": "3.43636e-05", "gnorm": "2.07", "loss_scale": "8", "train_wall": "59", "gb_free": "23.8", "wall": "197361"} +[2022-08-01 17:40:11,745][train_inner][INFO] - {"epoch": 13, "update": 12.824, "loss": "2.052", "ppl": "4.15", "wps": "398287", "ups": "3.36", "wpb": "118367", "bsz": "256", "num_updates": "660000", "lr": "3.43434e-05", "gnorm": "2.148", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "197420"} +[2022-08-01 17:41:11,338][train_inner][INFO] - {"epoch": 13, "update": 12.827, "loss": "2.055", "ppl": "4.15", "wps": "399338", "ups": "3.36", "wpb": "118987", "bsz": "256", "num_updates": "660200", "lr": "3.43232e-05", "gnorm": "2.118", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "197480"} +[2022-08-01 17:42:11,221][train_inner][INFO] - {"epoch": 13, "update": 12.831, "loss": "2.054", "ppl": "4.15", "wps": "394411", "ups": "3.34", "wpb": "118091", "bsz": "256", "num_updates": "660400", "lr": "3.4303e-05", "gnorm": "2.092", "loss_scale": "8", "train_wall": "60", "gb_free": "25", "wall": "197540"} +[2022-08-01 17:42:18,990][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 17:43:10,999][train_inner][INFO] - {"epoch": 13, "update": 12.835, "loss": "2.051", "ppl": "4.14", "wps": "396135", "ups": "3.35", "wpb": "118400", "bsz": "256", "num_updates": "660600", "lr": "3.42828e-05", "gnorm": "2.084", "loss_scale": "8", "train_wall": "59", "gb_free": "25.1", "wall": "197599"} +[2022-08-01 17:44:10,306][train_inner][INFO] - {"epoch": 13, "update": 12.839, "loss": "2.06", "ppl": "4.17", "wps": "397529", "ups": "3.37", "wpb": "117881", "bsz": "256", "num_updates": "660800", "lr": "3.42626e-05", "gnorm": "2.059", "loss_scale": "8", "train_wall": "59", "gb_free": "23.2", "wall": "197659"} +[2022-08-01 17:45:10,056][train_inner][INFO] - {"epoch": 13, "update": 12.843, "loss": "2.054", "ppl": "4.15", "wps": "394624", "ups": "3.35", "wpb": "117892", "bsz": "256", "num_updates": "661000", "lr": "3.42424e-05", "gnorm": "2.062", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "197718"} +[2022-08-01 17:46:09,692][train_inner][INFO] - {"epoch": 13, "update": 12.847, "loss": "2.048", "ppl": "4.14", "wps": "398472", "ups": "3.35", "wpb": "118815", "bsz": "256", "num_updates": "661200", "lr": "3.42222e-05", "gnorm": "1.961", "loss_scale": "8", "train_wall": "59", "gb_free": "22.4", "wall": "197778"} +[2022-08-01 17:47:08,917][train_inner][INFO] - {"epoch": 13, "update": 12.851, "loss": "2.057", "ppl": "4.16", "wps": "397408", "ups": "3.38", "wpb": "117682", "bsz": "256", "num_updates": "661400", "lr": "3.4202e-05", "gnorm": "2.058", "loss_scale": "8", "train_wall": "59", "gb_free": "21.6", "wall": "197837"} +[2022-08-01 17:48:08,360][train_inner][INFO] - {"epoch": 13, "update": 12.855, "loss": "2.057", "ppl": "4.16", "wps": "398676", "ups": "3.36", "wpb": "118491", "bsz": "256", "num_updates": "661600", "lr": "3.41818e-05", "gnorm": "2.047", "loss_scale": "8", "train_wall": "59", "gb_free": "23.2", "wall": "197897"} +[2022-08-01 17:49:07,856][train_inner][INFO] - {"epoch": 13, "update": 12.858, "loss": "2.057", "ppl": "4.16", "wps": "395444", "ups": "3.36", "wpb": "117637", "bsz": "256", "num_updates": "661800", "lr": "3.41616e-05", "gnorm": "2.184", "loss_scale": "8", "train_wall": "59", "gb_free": "24.2", "wall": "197956"} +[2022-08-01 17:50:07,774][train_inner][INFO] - {"epoch": 13, "update": 12.862, "loss": "2.056", "ppl": "4.16", "wps": "395527", "ups": "3.34", "wpb": "118493", "bsz": "256", "num_updates": "662000", "lr": "3.41414e-05", "gnorm": "2.043", "loss_scale": "8", "train_wall": "60", "gb_free": "24.3", "wall": "198016"} +[2022-08-01 17:50:10,069][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 17:51:07,524][train_inner][INFO] - {"epoch": 13, "update": 12.866, "loss": "2.053", "ppl": "4.15", "wps": "396528", "ups": "3.35", "wpb": "118463", "bsz": "256", "num_updates": "662200", "lr": "3.41212e-05", "gnorm": "2.045", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "198076"} +[2022-08-01 17:52:07,005][train_inner][INFO] - {"epoch": 13, "update": 12.87, "loss": "2.061", "ppl": "4.17", "wps": "396975", "ups": "3.36", "wpb": "118062", "bsz": "256", "num_updates": "662400", "lr": "3.4101e-05", "gnorm": "2.118", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "198135"} +[2022-08-01 17:53:06,576][train_inner][INFO] - {"epoch": 13, "update": 12.874, "loss": "2.057", "ppl": "4.16", "wps": "398580", "ups": "3.36", "wpb": "118717", "bsz": "256", "num_updates": "662600", "lr": "3.40808e-05", "gnorm": "2.067", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "198195"} +[2022-08-01 17:54:06,409][train_inner][INFO] - {"epoch": 13, "update": 12.878, "loss": "2.045", "ppl": "4.13", "wps": "398247", "ups": "3.34", "wpb": "119141", "bsz": "256", "num_updates": "662800", "lr": "3.40606e-05", "gnorm": "2.048", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "198255"} +[2022-08-01 17:55:06,064][train_inner][INFO] - {"epoch": 13, "update": 12.882, "loss": "2.058", "ppl": "4.16", "wps": "396094", "ups": "3.35", "wpb": "118145", "bsz": "256", "num_updates": "663000", "lr": "3.40404e-05", "gnorm": "2.058", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "198314"} +[2022-08-01 17:56:05,968][train_inner][INFO] - {"epoch": 13, "update": 12.886, "loss": "2.048", "ppl": "4.13", "wps": "396236", "ups": "3.34", "wpb": "118680", "bsz": "256", "num_updates": "663200", "lr": "3.40202e-05", "gnorm": "2.127", "loss_scale": "4", "train_wall": "60", "gb_free": "22.4", "wall": "198374"} +[2022-08-01 17:57:05,364][train_inner][INFO] - {"epoch": 13, "update": 12.89, "loss": "2.053", "ppl": "4.15", "wps": "398708", "ups": "3.37", "wpb": "118407", "bsz": "256", "num_updates": "663400", "lr": "3.4e-05", "gnorm": "2.043", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "198434"} +[2022-08-01 17:58:04,842][train_inner][INFO] - {"epoch": 13, "update": 12.893, "loss": "2.054", "ppl": "4.15", "wps": "397259", "ups": "3.36", "wpb": "118140", "bsz": "256", "num_updates": "663600", "lr": "3.39798e-05", "gnorm": "2.037", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "198493"} +[2022-08-01 17:59:04,367][train_inner][INFO] - {"epoch": 13, "update": 12.897, "loss": "2.047", "ppl": "4.13", "wps": "398983", "ups": "3.36", "wpb": "118746", "bsz": "256", "num_updates": "663800", "lr": "3.39596e-05", "gnorm": "2.083", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "198553"} +[2022-08-01 18:00:03,865][train_inner][INFO] - {"epoch": 13, "update": 12.901, "loss": "2.051", "ppl": "4.14", "wps": "394059", "ups": "3.36", "wpb": "117227", "bsz": "256", "num_updates": "664000", "lr": "3.39394e-05", "gnorm": "2.143", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "198612"} +[2022-08-01 18:01:03,283][train_inner][INFO] - {"epoch": 13, "update": 12.905, "loss": "2.049", "ppl": "4.14", "wps": "397198", "ups": "3.37", "wpb": "118003", "bsz": "256", "num_updates": "664200", "lr": "3.39192e-05", "gnorm": "2.006", "loss_scale": "8", "train_wall": "59", "gb_free": "24.2", "wall": "198672"} +[2022-08-01 18:01:53,230][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 18:02:02,837][train_inner][INFO] - {"epoch": 13, "update": 12.909, "loss": "2.056", "ppl": "4.16", "wps": "396835", "ups": "3.36", "wpb": "118166", "bsz": "256", "num_updates": "664400", "lr": "3.3899e-05", "gnorm": "2.038", "loss_scale": "4", "train_wall": "59", "gb_free": "26.5", "wall": "198731"} +[2022-08-01 18:03:02,176][train_inner][INFO] - {"epoch": 13, "update": 12.913, "loss": "2.052", "ppl": "4.15", "wps": "397059", "ups": "3.37", "wpb": "117805", "bsz": "256", "num_updates": "664600", "lr": "3.38788e-05", "gnorm": "2.063", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "198791"} +[2022-08-01 18:04:01,879][train_inner][INFO] - {"epoch": 13, "update": 12.917, "loss": "2.057", "ppl": "4.16", "wps": "394588", "ups": "3.35", "wpb": "117790", "bsz": "256", "num_updates": "664800", "lr": "3.38586e-05", "gnorm": "2.063", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "198850"} +[2022-08-01 18:05:01,415][train_inner][INFO] - {"epoch": 13, "update": 12.921, "loss": "2.044", "ppl": "4.12", "wps": "398470", "ups": "3.36", "wpb": "118615", "bsz": "256", "num_updates": "665000", "lr": "3.38384e-05", "gnorm": "2.089", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "198910"} +[2022-08-01 18:06:00,797][train_inner][INFO] - {"epoch": 13, "update": 12.925, "loss": "2.053", "ppl": "4.15", "wps": "397953", "ups": "3.37", "wpb": "118156", "bsz": "256", "num_updates": "665200", "lr": "3.38182e-05", "gnorm": "2.117", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "198969"} +[2022-08-01 18:07:00,429][train_inner][INFO] - {"epoch": 13, "update": 12.928, "loss": "2.046", "ppl": "4.13", "wps": "396521", "ups": "3.35", "wpb": "118227", "bsz": "256", "num_updates": "665400", "lr": "3.3798e-05", "gnorm": "2.143", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "199029"} +[2022-08-01 18:08:00,150][train_inner][INFO] - {"epoch": 13, "update": 12.932, "loss": "2.054", "ppl": "4.15", "wps": "397376", "ups": "3.35", "wpb": "118656", "bsz": "256", "num_updates": "665600", "lr": "3.37778e-05", "gnorm": "2.053", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "199089"} +[2022-08-01 18:08:59,858][train_inner][INFO] - {"epoch": 13, "update": 12.936, "loss": "2.05", "ppl": "4.14", "wps": "398697", "ups": "3.35", "wpb": "119027", "bsz": "256", "num_updates": "665800", "lr": "3.37576e-05", "gnorm": "2.077", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "199148"} +[2022-08-01 18:09:59,583][train_inner][INFO] - {"epoch": 13, "update": 12.94, "loss": "2.051", "ppl": "4.14", "wps": "396733", "ups": "3.35", "wpb": "118473", "bsz": "256", "num_updates": "666000", "lr": "3.37374e-05", "gnorm": "2.055", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "199208"} +[2022-08-01 18:10:58,648][train_inner][INFO] - {"epoch": 13, "update": 12.944, "loss": "2.051", "ppl": "4.14", "wps": "400102", "ups": "3.39", "wpb": "118160", "bsz": "256", "num_updates": "666200", "lr": "3.37172e-05", "gnorm": "2.102", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "199267"} +[2022-08-01 18:11:58,082][train_inner][INFO] - {"epoch": 13, "update": 12.948, "loss": "2.049", "ppl": "4.14", "wps": "398070", "ups": "3.37", "wpb": "118292", "bsz": "256", "num_updates": "666400", "lr": "3.3697e-05", "gnorm": "2.138", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "199327"} +[2022-08-01 18:12:57,592][train_inner][INFO] - {"epoch": 13, "update": 12.952, "loss": "2.058", "ppl": "4.16", "wps": "397038", "ups": "3.36", "wpb": "118138", "bsz": "256", "num_updates": "666600", "lr": "3.36768e-05", "gnorm": "2.195", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "199386"} +[2022-08-01 18:13:56,990][train_inner][INFO] - {"epoch": 13, "update": 12.956, "loss": "2.047", "ppl": "4.13", "wps": "400542", "ups": "3.37", "wpb": "118957", "bsz": "256", "num_updates": "666800", "lr": "3.36566e-05", "gnorm": "2.012", "loss_scale": "8", "train_wall": "59", "gb_free": "24.6", "wall": "199445"} +[2022-08-01 18:14:56,395][train_inner][INFO] - {"epoch": 13, "update": 12.96, "loss": "2.058", "ppl": "4.16", "wps": "396746", "ups": "3.37", "wpb": "117842", "bsz": "256", "num_updates": "667000", "lr": "3.36364e-05", "gnorm": "2.055", "loss_scale": "8", "train_wall": "59", "gb_free": "23.7", "wall": "199505"} +[2022-08-01 18:15:56,011][train_inner][INFO] - {"epoch": 13, "update": 12.963, "loss": "2.049", "ppl": "4.14", "wps": "397876", "ups": "3.35", "wpb": "118598", "bsz": "256", "num_updates": "667200", "lr": "3.36162e-05", "gnorm": "2.15", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "199564"} +[2022-08-01 18:16:56,064][train_inner][INFO] - {"epoch": 13, "update": 12.967, "loss": "2.046", "ppl": "4.13", "wps": "394021", "ups": "3.33", "wpb": "118309", "bsz": "256", "num_updates": "667400", "lr": "3.3596e-05", "gnorm": "2.06", "loss_scale": "8", "train_wall": "60", "gb_free": "23.3", "wall": "199624"} +[2022-08-01 18:17:55,130][train_inner][INFO] - {"epoch": 13, "update": 12.971, "loss": "2.054", "ppl": "4.15", "wps": "400104", "ups": "3.39", "wpb": "118162", "bsz": "256", "num_updates": "667600", "lr": "3.35758e-05", "gnorm": "2.016", "loss_scale": "8", "train_wall": "59", "gb_free": "21.3", "wall": "199684"} +[2022-08-01 18:18:54,410][train_inner][INFO] - {"epoch": 13, "update": 12.975, "loss": "2.051", "ppl": "4.14", "wps": "399045", "ups": "3.37", "wpb": "118275", "bsz": "256", "num_updates": "667800", "lr": "3.35556e-05", "gnorm": "2.061", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "199743"} +[2022-08-01 18:19:53,858][train_inner][INFO] - {"epoch": 13, "update": 12.979, "loss": "2.051", "ppl": "4.14", "wps": "395318", "ups": "3.36", "wpb": "117504", "bsz": "256", "num_updates": "668000", "lr": "3.35354e-05", "gnorm": "2.113", "loss_scale": "8", "train_wall": "59", "gb_free": "28.4", "wall": "199802"} +[2022-08-01 18:20:53,118][train_inner][INFO] - {"epoch": 13, "update": 12.983, "loss": "2.052", "ppl": "4.15", "wps": "400084", "ups": "3.38", "wpb": "118543", "bsz": "256", "num_updates": "668200", "lr": "3.35152e-05", "gnorm": "2.091", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "199862"} +[2022-08-01 18:21:53,476][train_inner][INFO] - {"epoch": 13, "update": 12.987, "loss": "2.05", "ppl": "4.14", "wps": "391326", "ups": "3.31", "wpb": "118098", "bsz": "256", "num_updates": "668400", "lr": "3.34949e-05", "gnorm": "2.089", "loss_scale": "8", "train_wall": "60", "gb_free": "21.3", "wall": "199922"} +[2022-08-01 18:22:12,735][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 18:22:52,896][train_inner][INFO] - {"epoch": 13, "update": 12.991, "loss": "2.048", "ppl": "4.13", "wps": "397122", "ups": "3.37", "wpb": "117985", "bsz": "256", "num_updates": "668600", "lr": "3.34747e-05", "gnorm": "2.108", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "199981"} +[2022-08-01 18:23:52,609][train_inner][INFO] - {"epoch": 13, "update": 12.995, "loss": "2.049", "ppl": "4.14", "wps": "394737", "ups": "3.35", "wpb": "117855", "bsz": "256", "num_updates": "668800", "lr": "3.34545e-05", "gnorm": "2.082", "loss_scale": "8", "train_wall": "59", "gb_free": "23.3", "wall": "200041"} +[2022-08-01 18:24:36,918][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 18:24:52,538][train_inner][INFO] - {"epoch": 13, "update": 12.998, "loss": "2.057", "ppl": "4.16", "wps": "394876", "ups": "3.34", "wpb": "118321", "bsz": "256", "num_updates": "669000", "lr": "3.34343e-05", "gnorm": "2.14", "loss_scale": "4", "train_wall": "60", "gb_free": "24", "wall": "200101"} +[2022-08-01 18:25:16,926][fairseq_cli.train][INFO] - end of epoch 13 (average epoch stats below) +[2022-08-01 18:25:16,927][train][INFO] - {"epoch": 13, "train_loss": "2.058", "train_ppl": "4.17", "train_wps": "395566", "train_ups": "3.34", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "669082", "train_lr": "3.34261e-05", "train_gnorm": "2.108", "train_loss_scale": "4", "train_train_wall": "15240", "train_gb_free": "23.9", "train_wall": "200125"} +[2022-08-01 18:25:17,034][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 18:25:17,037][fairseq.trainer][INFO] - begin training epoch 14 +[2022-08-01 18:25:17,037][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 18:26:04,012][train_inner][INFO] - {"epoch": 14, "update": 13.002, "loss": "2.049", "ppl": "4.14", "wps": "330755", "ups": "2.8", "wpb": "118200", "bsz": "255.4", "num_updates": "669200", "lr": "3.34141e-05", "gnorm": "2.029", "loss_scale": "4", "train_wall": "60", "gb_free": "23.9", "wall": "200172"} +[2022-08-01 18:27:02,824][train_inner][INFO] - {"epoch": 14, "update": 13.006, "loss": "2.049", "ppl": "4.14", "wps": "400756", "ups": "3.4", "wpb": "117846", "bsz": "256", "num_updates": "669400", "lr": "3.33939e-05", "gnorm": "2.018", "loss_scale": "4", "train_wall": "58", "gb_free": "25.8", "wall": "200231"} +[2022-08-01 18:28:02,439][train_inner][INFO] - {"epoch": 14, "update": 13.01, "loss": "2.039", "ppl": "4.11", "wps": "397725", "ups": "3.35", "wpb": "118552", "bsz": "256", "num_updates": "669600", "lr": "3.33737e-05", "gnorm": "2.035", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "200291"} +[2022-08-01 18:29:01,428][train_inner][INFO] - {"epoch": 14, "update": 13.014, "loss": "2.041", "ppl": "4.12", "wps": "402954", "ups": "3.39", "wpb": "118849", "bsz": "256", "num_updates": "669800", "lr": "3.33535e-05", "gnorm": "2.08", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "200350"} +[2022-08-01 18:30:01,811][train_inner][INFO] - {"epoch": 14, "update": 13.018, "loss": "2.047", "ppl": "4.13", "wps": "391717", "ups": "3.31", "wpb": "118263", "bsz": "256", "num_updates": "670000", "lr": "3.33333e-05", "gnorm": "2.041", "loss_scale": "4", "train_wall": "60", "gb_free": "21.8", "wall": "200410"} +[2022-08-01 18:31:01,005][train_inner][INFO] - {"epoch": 14, "update": 13.022, "loss": "2.042", "ppl": "4.12", "wps": "400546", "ups": "3.38", "wpb": "118549", "bsz": "256", "num_updates": "670200", "lr": "3.33131e-05", "gnorm": "2.047", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "200469"} +[2022-08-01 18:32:00,471][train_inner][INFO] - {"epoch": 14, "update": 13.026, "loss": "2.048", "ppl": "4.14", "wps": "396928", "ups": "3.36", "wpb": "118018", "bsz": "256", "num_updates": "670400", "lr": "3.32929e-05", "gnorm": "2.15", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "200529"} +[2022-08-01 18:32:59,631][train_inner][INFO] - {"epoch": 14, "update": 13.029, "loss": "2.046", "ppl": "4.13", "wps": "399052", "ups": "3.38", "wpb": "118038", "bsz": "256", "num_updates": "670600", "lr": "3.32727e-05", "gnorm": "2.183", "loss_scale": "4", "train_wall": "59", "gb_free": "23.9", "wall": "200588"} +[2022-08-01 18:33:58,881][train_inner][INFO] - {"epoch": 14, "update": 13.033, "loss": "2.047", "ppl": "4.13", "wps": "399767", "ups": "3.38", "wpb": "118430", "bsz": "256", "num_updates": "670800", "lr": "3.32525e-05", "gnorm": "2.142", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "200647"} +[2022-08-01 18:34:58,697][train_inner][INFO] - {"epoch": 14, "update": 13.037, "loss": "2.05", "ppl": "4.14", "wps": "398042", "ups": "3.34", "wpb": "119046", "bsz": "256", "num_updates": "671000", "lr": "3.32323e-05", "gnorm": "2.03", "loss_scale": "8", "train_wall": "59", "gb_free": "21.7", "wall": "200707"} +[2022-08-01 18:35:05,880][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 18:35:58,379][train_inner][INFO] - {"epoch": 14, "update": 13.041, "loss": "2.048", "ppl": "4.13", "wps": "397194", "ups": "3.35", "wpb": "118525", "bsz": "256", "num_updates": "671200", "lr": "3.32121e-05", "gnorm": "2.077", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "200767"} +[2022-08-01 18:36:57,415][train_inner][INFO] - {"epoch": 14, "update": 13.045, "loss": "2.054", "ppl": "4.15", "wps": "399978", "ups": "3.39", "wpb": "118066", "bsz": "256", "num_updates": "671400", "lr": "3.31919e-05", "gnorm": "2.072", "loss_scale": "4", "train_wall": "59", "gb_free": "26.9", "wall": "200826"} +[2022-08-01 18:37:56,899][train_inner][INFO] - {"epoch": 14, "update": 13.049, "loss": "2.048", "ppl": "4.14", "wps": "398120", "ups": "3.36", "wpb": "118408", "bsz": "256", "num_updates": "671600", "lr": "3.31717e-05", "gnorm": "2.091", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "200885"} +[2022-08-01 18:38:56,683][train_inner][INFO] - {"epoch": 14, "update": 13.053, "loss": "2.046", "ppl": "4.13", "wps": "396488", "ups": "3.35", "wpb": "118518", "bsz": "256", "num_updates": "671800", "lr": "3.31515e-05", "gnorm": "2.074", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "200945"} +[2022-08-01 18:39:56,055][train_inner][INFO] - {"epoch": 14, "update": 13.057, "loss": "2.053", "ppl": "4.15", "wps": "398812", "ups": "3.37", "wpb": "118391", "bsz": "256", "num_updates": "672000", "lr": "3.31313e-05", "gnorm": "2.145", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "201004"} +[2022-08-01 18:40:55,387][train_inner][INFO] - {"epoch": 14, "update": 13.061, "loss": "2.048", "ppl": "4.14", "wps": "399887", "ups": "3.37", "wpb": "118629", "bsz": "256", "num_updates": "672200", "lr": "3.31111e-05", "gnorm": "2.059", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "201064"} +[2022-08-01 18:41:54,825][train_inner][INFO] - {"epoch": 14, "update": 13.064, "loss": "2.045", "ppl": "4.13", "wps": "397695", "ups": "3.36", "wpb": "118191", "bsz": "256", "num_updates": "672400", "lr": "3.30909e-05", "gnorm": "2.074", "loss_scale": "4", "train_wall": "59", "gb_free": "26.4", "wall": "201123"} +[2022-08-01 18:42:54,442][train_inner][INFO] - {"epoch": 14, "update": 13.068, "loss": "2.047", "ppl": "4.13", "wps": "397053", "ups": "3.35", "wpb": "118355", "bsz": "256", "num_updates": "672600", "lr": "3.30707e-05", "gnorm": "2.07", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "201183"} +[2022-08-01 18:43:53,563][train_inner][INFO] - {"epoch": 14, "update": 13.072, "loss": "2.042", "ppl": "4.12", "wps": "401604", "ups": "3.38", "wpb": "118715", "bsz": "256", "num_updates": "672800", "lr": "3.30505e-05", "gnorm": "2.047", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "201242"} +[2022-08-01 18:44:53,284][train_inner][INFO] - {"epoch": 14, "update": 13.076, "loss": "2.045", "ppl": "4.13", "wps": "395674", "ups": "3.35", "wpb": "118149", "bsz": "256", "num_updates": "673000", "lr": "3.30303e-05", "gnorm": "2.151", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "201302"} +[2022-08-01 18:45:16,146][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 18:45:53,044][train_inner][INFO] - {"epoch": 14, "update": 13.08, "loss": "2.047", "ppl": "4.13", "wps": "395666", "ups": "3.35", "wpb": "118225", "bsz": "256", "num_updates": "673200", "lr": "3.30101e-05", "gnorm": "2.05", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "201361"} +[2022-08-01 18:46:52,690][train_inner][INFO] - {"epoch": 14, "update": 13.084, "loss": "2.042", "ppl": "4.12", "wps": "397032", "ups": "3.35", "wpb": "118405", "bsz": "256", "num_updates": "673400", "lr": "3.29899e-05", "gnorm": "2.13", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "201421"} +[2022-08-01 18:47:51,730][train_inner][INFO] - {"epoch": 14, "update": 13.088, "loss": "2.047", "ppl": "4.13", "wps": "399446", "ups": "3.39", "wpb": "117916", "bsz": "256", "num_updates": "673600", "lr": "3.29697e-05", "gnorm": "2.106", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "201480"} +[2022-08-01 18:48:51,173][train_inner][INFO] - {"epoch": 14, "update": 13.092, "loss": "2.047", "ppl": "4.13", "wps": "398480", "ups": "3.36", "wpb": "118434", "bsz": "256", "num_updates": "673800", "lr": "3.29495e-05", "gnorm": "2.039", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "201540"} +[2022-08-01 18:49:50,694][train_inner][INFO] - {"epoch": 14, "update": 13.096, "loss": "2.046", "ppl": "4.13", "wps": "398441", "ups": "3.36", "wpb": "118577", "bsz": "256", "num_updates": "674000", "lr": "3.29293e-05", "gnorm": "2.115", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "201599"} +[2022-08-01 18:50:50,466][train_inner][INFO] - {"epoch": 14, "update": 13.099, "loss": "2.048", "ppl": "4.13", "wps": "397471", "ups": "3.35", "wpb": "118788", "bsz": "256", "num_updates": "674200", "lr": "3.29091e-05", "gnorm": "2.037", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "201659"} +[2022-08-01 18:51:50,528][train_inner][INFO] - {"epoch": 14, "update": 13.103, "loss": "2.054", "ppl": "4.15", "wps": "394728", "ups": "3.33", "wpb": "118540", "bsz": "256", "num_updates": "674400", "lr": "3.28889e-05", "gnorm": "2.065", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "201719"} +[2022-08-01 18:52:51,047][train_inner][INFO] - {"epoch": 14, "update": 13.107, "loss": "2.045", "ppl": "4.13", "wps": "388870", "ups": "3.3", "wpb": "117669", "bsz": "256", "num_updates": "674600", "lr": "3.28687e-05", "gnorm": "2.074", "loss_scale": "4", "train_wall": "60", "gb_free": "21.9", "wall": "201779"} +[2022-08-01 18:53:50,796][train_inner][INFO] - {"epoch": 14, "update": 13.111, "loss": "2.048", "ppl": "4.13", "wps": "396318", "ups": "3.35", "wpb": "118398", "bsz": "256", "num_updates": "674800", "lr": "3.28485e-05", "gnorm": "2.14", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "201839"} +[2022-08-01 18:54:50,427][train_inner][INFO] - {"epoch": 14, "update": 13.115, "loss": "2.048", "ppl": "4.13", "wps": "395755", "ups": "3.35", "wpb": "117994", "bsz": "256", "num_updates": "675000", "lr": "3.28283e-05", "gnorm": "2.055", "loss_scale": "4", "train_wall": "59", "gb_free": "27.8", "wall": "201899"} +[2022-08-01 18:55:50,066][train_inner][INFO] - {"epoch": 14, "update": 13.119, "loss": "2.055", "ppl": "4.16", "wps": "392529", "ups": "3.35", "wpb": "117051", "bsz": "256", "num_updates": "675200", "lr": "3.28081e-05", "gnorm": "2.155", "loss_scale": "8", "train_wall": "59", "gb_free": "22.1", "wall": "201958"} +[2022-08-01 18:56:49,181][train_inner][INFO] - {"epoch": 14, "update": 13.123, "loss": "2.05", "ppl": "4.14", "wps": "398384", "ups": "3.38", "wpb": "117752", "bsz": "256", "num_updates": "675400", "lr": "3.27879e-05", "gnorm": "2.086", "loss_scale": "8", "train_wall": "59", "gb_free": "24.3", "wall": "202018"} +[2022-08-01 18:57:48,582][train_inner][INFO] - {"epoch": 14, "update": 13.127, "loss": "2.045", "ppl": "4.13", "wps": "397832", "ups": "3.37", "wpb": "118156", "bsz": "256", "num_updates": "675600", "lr": "3.27677e-05", "gnorm": "2.087", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "202077"} +[2022-08-01 18:58:48,050][train_inner][INFO] - {"epoch": 14, "update": 13.131, "loss": "2.047", "ppl": "4.13", "wps": "398461", "ups": "3.36", "wpb": "118478", "bsz": "256", "num_updates": "675800", "lr": "3.27475e-05", "gnorm": "2.052", "loss_scale": "8", "train_wall": "59", "gb_free": "24.2", "wall": "202136"} +[2022-08-01 18:59:48,110][train_inner][INFO] - {"epoch": 14, "update": 13.134, "loss": "2.048", "ppl": "4.14", "wps": "392862", "ups": "3.33", "wpb": "117976", "bsz": "256", "num_updates": "676000", "lr": "3.27273e-05", "gnorm": "2.09", "loss_scale": "8", "train_wall": "60", "gb_free": "24.3", "wall": "202197"} +[2022-08-01 19:00:47,612][train_inner][INFO] - {"epoch": 14, "update": 13.138, "loss": "2.045", "ppl": "4.13", "wps": "397772", "ups": "3.36", "wpb": "118340", "bsz": "256", "num_updates": "676200", "lr": "3.27071e-05", "gnorm": "2.078", "loss_scale": "8", "train_wall": "59", "gb_free": "23.8", "wall": "202256"} +[2022-08-01 19:01:48,209][train_inner][INFO] - {"epoch": 14, "update": 13.142, "loss": "2.043", "ppl": "4.12", "wps": "391041", "ups": "3.3", "wpb": "118479", "bsz": "256", "num_updates": "676400", "lr": "3.26869e-05", "gnorm": "2.134", "loss_scale": "8", "train_wall": "60", "gb_free": "23.6", "wall": "202317"} +[2022-08-01 19:02:47,721][train_inner][INFO] - {"epoch": 14, "update": 13.146, "loss": "2.047", "ppl": "4.13", "wps": "397421", "ups": "3.36", "wpb": "118256", "bsz": "256", "num_updates": "676600", "lr": "3.26667e-05", "gnorm": "2.096", "loss_scale": "8", "train_wall": "59", "gb_free": "27.3", "wall": "202376"} +[2022-08-01 19:03:47,431][train_inner][INFO] - {"epoch": 14, "update": 13.15, "loss": "2.047", "ppl": "4.13", "wps": "397742", "ups": "3.35", "wpb": "118745", "bsz": "256", "num_updates": "676800", "lr": "3.26465e-05", "gnorm": "2.063", "loss_scale": "8", "train_wall": "59", "gb_free": "22.6", "wall": "202436"} +[2022-08-01 19:04:46,787][train_inner][INFO] - {"epoch": 14, "update": 13.154, "loss": "2.041", "ppl": "4.12", "wps": "400320", "ups": "3.37", "wpb": "118807", "bsz": "256", "num_updates": "677000", "lr": "3.26263e-05", "gnorm": "2.08", "loss_scale": "8", "train_wall": "59", "gb_free": "23.9", "wall": "202495"} +[2022-08-01 19:05:39,579][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 19:05:46,414][train_inner][INFO] - {"epoch": 14, "update": 13.158, "loss": "2.05", "ppl": "4.14", "wps": "394786", "ups": "3.35", "wpb": "117698", "bsz": "256", "num_updates": "677200", "lr": "3.26061e-05", "gnorm": "2.066", "loss_scale": "8", "train_wall": "59", "gb_free": "24.1", "wall": "202555"} +[2022-08-01 19:05:54,575][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 19:06:46,021][train_inner][INFO] - {"epoch": 14, "update": 13.162, "loss": "2.045", "ppl": "4.13", "wps": "396224", "ups": "3.36", "wpb": "118088", "bsz": "256", "num_updates": "677400", "lr": "3.25859e-05", "gnorm": "2.092", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "202614"} +[2022-08-01 19:07:45,361][train_inner][INFO] - {"epoch": 14, "update": 13.166, "loss": "2.052", "ppl": "4.15", "wps": "395699", "ups": "3.37", "wpb": "117402", "bsz": "256", "num_updates": "677600", "lr": "3.25657e-05", "gnorm": "2.151", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "202674"} +[2022-08-01 19:08:44,968][train_inner][INFO] - {"epoch": 14, "update": 13.169, "loss": "2.049", "ppl": "4.14", "wps": "397686", "ups": "3.36", "wpb": "118525", "bsz": "256", "num_updates": "677800", "lr": "3.25455e-05", "gnorm": "2.022", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "202733"} +[2022-08-01 19:09:44,433][train_inner][INFO] - {"epoch": 14, "update": 13.173, "loss": "2.042", "ppl": "4.12", "wps": "398765", "ups": "3.36", "wpb": "118561", "bsz": "256", "num_updates": "678000", "lr": "3.25253e-05", "gnorm": "2.072", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "202793"} +[2022-08-01 19:10:43,989][train_inner][INFO] - {"epoch": 14, "update": 13.177, "loss": "2.044", "ppl": "4.12", "wps": "397917", "ups": "3.36", "wpb": "118490", "bsz": "256", "num_updates": "678200", "lr": "3.25051e-05", "gnorm": "2.225", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "202852"} +[2022-08-01 19:11:43,488][train_inner][INFO] - {"epoch": 14, "update": 13.181, "loss": "2.043", "ppl": "4.12", "wps": "397002", "ups": "3.36", "wpb": "118105", "bsz": "256", "num_updates": "678400", "lr": "3.24848e-05", "gnorm": "2.051", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "202912"} +[2022-08-01 19:12:42,956][train_inner][INFO] - {"epoch": 14, "update": 13.185, "loss": "2.049", "ppl": "4.14", "wps": "396657", "ups": "3.36", "wpb": "117942", "bsz": "256", "num_updates": "678600", "lr": "3.24646e-05", "gnorm": "2.093", "loss_scale": "4", "train_wall": "59", "gb_free": "28.9", "wall": "202971"} +[2022-08-01 19:13:43,580][train_inner][INFO] - {"epoch": 14, "update": 13.189, "loss": "2.051", "ppl": "4.14", "wps": "387964", "ups": "3.3", "wpb": "117599", "bsz": "256", "num_updates": "678800", "lr": "3.24444e-05", "gnorm": "2.074", "loss_scale": "4", "train_wall": "60", "gb_free": "23.1", "wall": "203032"} +[2022-08-01 19:14:43,232][train_inner][INFO] - {"epoch": 14, "update": 13.193, "loss": "2.043", "ppl": "4.12", "wps": "396975", "ups": "3.35", "wpb": "118400", "bsz": "256", "num_updates": "679000", "lr": "3.24242e-05", "gnorm": "2.02", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "203092"} +[2022-08-01 19:15:42,971][train_inner][INFO] - {"epoch": 14, "update": 13.197, "loss": "2.043", "ppl": "4.12", "wps": "395040", "ups": "3.35", "wpb": "117997", "bsz": "256", "num_updates": "679200", "lr": "3.2404e-05", "gnorm": "2.059", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "203151"} +[2022-08-01 19:16:42,181][train_inner][INFO] - {"epoch": 14, "update": 13.2, "loss": "2.046", "ppl": "4.13", "wps": "401343", "ups": "3.38", "wpb": "118817", "bsz": "256", "num_updates": "679400", "lr": "3.23838e-05", "gnorm": "2.15", "loss_scale": "8", "train_wall": "59", "gb_free": "21.8", "wall": "203211"} +[2022-08-01 19:17:02,565][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 19:17:42,246][train_inner][INFO] - {"epoch": 14, "update": 13.204, "loss": "2.045", "ppl": "4.13", "wps": "396614", "ups": "3.33", "wpb": "119111", "bsz": "256", "num_updates": "679600", "lr": "3.23636e-05", "gnorm": "2.086", "loss_scale": "4", "train_wall": "60", "gb_free": "24.8", "wall": "203271"} +[2022-08-01 19:18:42,035][train_inner][INFO] - {"epoch": 14, "update": 13.208, "loss": "2.045", "ppl": "4.13", "wps": "396251", "ups": "3.35", "wpb": "118456", "bsz": "256", "num_updates": "679800", "lr": "3.23434e-05", "gnorm": "2.102", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "203330"} +[2022-08-01 19:19:41,405][train_inner][INFO] - {"epoch": 14, "update": 13.212, "loss": "2.043", "ppl": "4.12", "wps": "397410", "ups": "3.37", "wpb": "117972", "bsz": "256", "num_updates": "680000", "lr": "3.23232e-05", "gnorm": "2.114", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "203390"} +[2022-08-01 19:20:41,023][train_inner][INFO] - {"epoch": 14, "update": 13.216, "loss": "2.048", "ppl": "4.14", "wps": "394442", "ups": "3.35", "wpb": "117577", "bsz": "256", "num_updates": "680200", "lr": "3.2303e-05", "gnorm": "2.107", "loss_scale": "4", "train_wall": "59", "gb_free": "24.9", "wall": "203449"} +[2022-08-01 19:21:40,784][train_inner][INFO] - {"epoch": 14, "update": 13.22, "loss": "2.053", "ppl": "4.15", "wps": "396430", "ups": "3.35", "wpb": "118455", "bsz": "256", "num_updates": "680400", "lr": "3.22828e-05", "gnorm": "2.066", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "203509"} +[2022-08-01 19:22:40,129][train_inner][INFO] - {"epoch": 14, "update": 13.224, "loss": "2.042", "ppl": "4.12", "wps": "397467", "ups": "3.37", "wpb": "117939", "bsz": "256", "num_updates": "680600", "lr": "3.22626e-05", "gnorm": "2.12", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "203569"} +[2022-08-01 19:23:39,211][train_inner][INFO] - {"epoch": 14, "update": 13.228, "loss": "2.05", "ppl": "4.14", "wps": "399707", "ups": "3.39", "wpb": "118076", "bsz": "256", "num_updates": "680800", "lr": "3.22424e-05", "gnorm": "2.026", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "203628"} +[2022-08-01 19:24:38,970][train_inner][INFO] - {"epoch": 14, "update": 13.232, "loss": "2.043", "ppl": "4.12", "wps": "396080", "ups": "3.35", "wpb": "118345", "bsz": "256", "num_updates": "681000", "lr": "3.22222e-05", "gnorm": "2.196", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "203687"} +[2022-08-01 19:25:38,430][train_inner][INFO] - {"epoch": 14, "update": 13.235, "loss": "2.042", "ppl": "4.12", "wps": "399139", "ups": "3.36", "wpb": "118664", "bsz": "256", "num_updates": "681200", "lr": "3.2202e-05", "gnorm": "2.051", "loss_scale": "4", "train_wall": "59", "gb_free": "26.6", "wall": "203747"} +[2022-08-01 19:26:38,188][train_inner][INFO] - {"epoch": 14, "update": 13.239, "loss": "2.045", "ppl": "4.13", "wps": "396651", "ups": "3.35", "wpb": "118514", "bsz": "256", "num_updates": "681400", "lr": "3.21818e-05", "gnorm": "2.15", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "203807"} +[2022-08-01 19:27:37,785][train_inner][INFO] - {"epoch": 14, "update": 13.243, "loss": "2.044", "ppl": "4.12", "wps": "397247", "ups": "3.36", "wpb": "118373", "bsz": "256", "num_updates": "681600", "lr": "3.21616e-05", "gnorm": "2.146", "loss_scale": "8", "train_wall": "59", "gb_free": "21.4", "wall": "203866"} +[2022-08-01 19:28:36,933][train_inner][INFO] - {"epoch": 14, "update": 13.247, "loss": "2.042", "ppl": "4.12", "wps": "403245", "ups": "3.38", "wpb": "119255", "bsz": "256", "num_updates": "681800", "lr": "3.21414e-05", "gnorm": "2.037", "loss_scale": "8", "train_wall": "59", "gb_free": "23.4", "wall": "203925"} +[2022-08-01 19:29:36,353][train_inner][INFO] - {"epoch": 14, "update": 13.251, "loss": "2.049", "ppl": "4.14", "wps": "396250", "ups": "3.37", "wpb": "117724", "bsz": "256", "num_updates": "682000", "lr": "3.21212e-05", "gnorm": "2.066", "loss_scale": "8", "train_wall": "59", "gb_free": "31.2", "wall": "203985"} +[2022-08-01 19:29:42,506][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 19:30:35,806][train_inner][INFO] - {"epoch": 14, "update": 13.255, "loss": "2.041", "ppl": "4.11", "wps": "399314", "ups": "3.36", "wpb": "118701", "bsz": "256", "num_updates": "682200", "lr": "3.2101e-05", "gnorm": "2.103", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "204044"} +[2022-08-01 19:31:35,102][train_inner][INFO] - {"epoch": 14, "update": 13.259, "loss": "2.043", "ppl": "4.12", "wps": "400595", "ups": "3.37", "wpb": "118768", "bsz": "256", "num_updates": "682400", "lr": "3.20808e-05", "gnorm": "2.033", "loss_scale": "4", "train_wall": "59", "gb_free": "26", "wall": "204104"} +[2022-08-01 19:32:34,709][train_inner][INFO] - {"epoch": 14, "update": 13.263, "loss": "2.047", "ppl": "4.13", "wps": "395613", "ups": "3.36", "wpb": "117906", "bsz": "256", "num_updates": "682600", "lr": "3.20606e-05", "gnorm": "2.209", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "204163"} +[2022-08-01 19:33:34,180][train_inner][INFO] - {"epoch": 14, "update": 13.267, "loss": "2.048", "ppl": "4.14", "wps": "398691", "ups": "3.36", "wpb": "118536", "bsz": "256", "num_updates": "682800", "lr": "3.20404e-05", "gnorm": "2.058", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "204223"} +[2022-08-01 19:34:33,678][train_inner][INFO] - {"epoch": 14, "update": 13.27, "loss": "2.056", "ppl": "4.16", "wps": "395547", "ups": "3.36", "wpb": "117670", "bsz": "256", "num_updates": "683000", "lr": "3.20202e-05", "gnorm": "2.16", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "204282"} +[2022-08-01 19:35:33,146][train_inner][INFO] - {"epoch": 14, "update": 13.274, "loss": "2.05", "ppl": "4.14", "wps": "397043", "ups": "3.36", "wpb": "118056", "bsz": "256", "num_updates": "683200", "lr": "3.2e-05", "gnorm": "2.109", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "204342"} +[2022-08-01 19:36:33,025][train_inner][INFO] - {"epoch": 14, "update": 13.278, "loss": "2.046", "ppl": "4.13", "wps": "394341", "ups": "3.34", "wpb": "118064", "bsz": "256", "num_updates": "683400", "lr": "3.19798e-05", "gnorm": "2.103", "loss_scale": "4", "train_wall": "60", "gb_free": "22.8", "wall": "204401"} +[2022-08-01 19:37:32,791][train_inner][INFO] - {"epoch": 14, "update": 13.282, "loss": "2.043", "ppl": "4.12", "wps": "395718", "ups": "3.35", "wpb": "118251", "bsz": "256", "num_updates": "683600", "lr": "3.19596e-05", "gnorm": "2.059", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "204461"} +[2022-08-01 19:38:32,050][train_inner][INFO] - {"epoch": 14, "update": 13.286, "loss": "2.047", "ppl": "4.13", "wps": "399110", "ups": "3.38", "wpb": "118254", "bsz": "256", "num_updates": "683800", "lr": "3.19394e-05", "gnorm": "2.128", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "204520"} +[2022-08-01 19:39:31,826][train_inner][INFO] - {"epoch": 14, "update": 13.29, "loss": "2.047", "ppl": "4.13", "wps": "396068", "ups": "3.35", "wpb": "118375", "bsz": "256", "num_updates": "684000", "lr": "3.19192e-05", "gnorm": "2.107", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "204580"} +[2022-08-01 19:40:21,553][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 19:40:31,700][train_inner][INFO] - {"epoch": 14, "update": 13.294, "loss": "2.044", "ppl": "4.12", "wps": "394458", "ups": "3.34", "wpb": "118088", "bsz": "256", "num_updates": "684200", "lr": "3.1899e-05", "gnorm": "2.123", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "204640"} +[2022-08-01 19:41:30,822][train_inner][INFO] - {"epoch": 14, "update": 13.298, "loss": "2.047", "ppl": "4.13", "wps": "399466", "ups": "3.38", "wpb": "118087", "bsz": "256", "num_updates": "684400", "lr": "3.18788e-05", "gnorm": "2.105", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "204699"} +[2022-08-01 19:42:30,681][train_inner][INFO] - {"epoch": 14, "update": 13.302, "loss": "2.041", "ppl": "4.12", "wps": "397847", "ups": "3.34", "wpb": "119072", "bsz": "256", "num_updates": "684600", "lr": "3.18586e-05", "gnorm": "2.084", "loss_scale": "4", "train_wall": "60", "gb_free": "21.5", "wall": "204759"} +[2022-08-01 19:43:30,399][train_inner][INFO] - {"epoch": 14, "update": 13.305, "loss": "2.044", "ppl": "4.12", "wps": "396240", "ups": "3.35", "wpb": "118313", "bsz": "255.9", "num_updates": "684800", "lr": "3.18384e-05", "gnorm": "2.238", "loss_scale": "4", "train_wall": "59", "gb_free": "26", "wall": "204819"} +[2022-08-01 19:44:30,213][train_inner][INFO] - {"epoch": 14, "update": 13.309, "loss": "2.046", "ppl": "4.13", "wps": "395570", "ups": "3.34", "wpb": "118303", "bsz": "256", "num_updates": "685000", "lr": "3.18182e-05", "gnorm": "2.109", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "204879"} +[2022-08-01 19:45:29,171][train_inner][INFO] - {"epoch": 14, "update": 13.313, "loss": "2.049", "ppl": "4.14", "wps": "399890", "ups": "3.39", "wpb": "117882", "bsz": "256", "num_updates": "685200", "lr": "3.1798e-05", "gnorm": "2.037", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "204938"} +[2022-08-01 19:46:28,537][train_inner][INFO] - {"epoch": 14, "update": 13.317, "loss": "2.044", "ppl": "4.12", "wps": "399185", "ups": "3.37", "wpb": "118489", "bsz": "256", "num_updates": "685400", "lr": "3.17778e-05", "gnorm": "2.14", "loss_scale": "4", "train_wall": "59", "gb_free": "29.2", "wall": "204997"} +[2022-08-01 19:47:27,979][train_inner][INFO] - {"epoch": 14, "update": 13.321, "loss": "2.047", "ppl": "4.13", "wps": "395720", "ups": "3.36", "wpb": "117610", "bsz": "256", "num_updates": "685600", "lr": "3.17576e-05", "gnorm": "2.23", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "205056"} +[2022-08-01 19:48:27,467][train_inner][INFO] - {"epoch": 14, "update": 13.325, "loss": "2.041", "ppl": "4.12", "wps": "397137", "ups": "3.36", "wpb": "118124", "bsz": "256", "num_updates": "685800", "lr": "3.17374e-05", "gnorm": "2.074", "loss_scale": "4", "train_wall": "59", "gb_free": "26.8", "wall": "205116"} +[2022-08-01 19:49:26,850][train_inner][INFO] - {"epoch": 14, "update": 13.329, "loss": "2.04", "ppl": "4.11", "wps": "398058", "ups": "3.37", "wpb": "118190", "bsz": "256", "num_updates": "686000", "lr": "3.17172e-05", "gnorm": "2.145", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "205175"} +[2022-08-01 19:50:26,338][train_inner][INFO] - {"epoch": 14, "update": 13.333, "loss": "2.043", "ppl": "4.12", "wps": "399364", "ups": "3.36", "wpb": "118786", "bsz": "256", "num_updates": "686200", "lr": "3.1697e-05", "gnorm": "2.094", "loss_scale": "4", "train_wall": "59", "gb_free": "26.7", "wall": "205235"} +[2022-08-01 19:51:22,218][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 19:51:26,146][train_inner][INFO] - {"epoch": 14, "update": 13.337, "loss": "2.049", "ppl": "4.14", "wps": "396298", "ups": "3.34", "wpb": "118507", "bsz": "256", "num_updates": "686400", "lr": "3.16768e-05", "gnorm": "2.085", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "205295"} +[2022-08-01 19:52:25,499][train_inner][INFO] - {"epoch": 14, "update": 13.34, "loss": "2.042", "ppl": "4.12", "wps": "399069", "ups": "3.37", "wpb": "118429", "bsz": "256", "num_updates": "686600", "lr": "3.16566e-05", "gnorm": "2.11", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "205354"} +[2022-08-01 19:53:25,111][train_inner][INFO] - {"epoch": 14, "update": 13.344, "loss": "2.04", "ppl": "4.11", "wps": "397022", "ups": "3.36", "wpb": "118336", "bsz": "256", "num_updates": "686800", "lr": "3.16364e-05", "gnorm": "2.11", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "205414"} +[2022-08-01 19:54:24,786][train_inner][INFO] - {"epoch": 14, "update": 13.348, "loss": "2.049", "ppl": "4.14", "wps": "395997", "ups": "3.35", "wpb": "118156", "bsz": "256", "num_updates": "687000", "lr": "3.16162e-05", "gnorm": "2.128", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "205473"} +[2022-08-01 19:55:24,381][train_inner][INFO] - {"epoch": 14, "update": 13.352, "loss": "2.039", "ppl": "4.11", "wps": "399462", "ups": "3.36", "wpb": "119029", "bsz": "256", "num_updates": "687200", "lr": "3.1596e-05", "gnorm": "2.217", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "205533"} +[2022-08-01 19:56:23,666][train_inner][INFO] - {"epoch": 14, "update": 13.356, "loss": "2.043", "ppl": "4.12", "wps": "396135", "ups": "3.37", "wpb": "117423", "bsz": "256", "num_updates": "687400", "lr": "3.15758e-05", "gnorm": "2.125", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "205592"} +[2022-08-01 19:57:23,448][train_inner][INFO] - {"epoch": 14, "update": 13.36, "loss": "2.039", "ppl": "4.11", "wps": "397133", "ups": "3.35", "wpb": "118706", "bsz": "256", "num_updates": "687600", "lr": "3.15556e-05", "gnorm": "2.004", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "205652"} +[2022-08-01 19:58:22,498][train_inner][INFO] - {"epoch": 14, "update": 13.364, "loss": "2.047", "ppl": "4.13", "wps": "400514", "ups": "3.39", "wpb": "118249", "bsz": "256", "num_updates": "687800", "lr": "3.15354e-05", "gnorm": "2.031", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "205711"} +[2022-08-01 19:59:21,988][train_inner][INFO] - {"epoch": 14, "update": 13.368, "loss": "2.047", "ppl": "4.13", "wps": "398010", "ups": "3.36", "wpb": "118388", "bsz": "256", "num_updates": "688000", "lr": "3.15152e-05", "gnorm": "2.141", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "205770"} +[2022-08-01 20:00:21,346][train_inner][INFO] - {"epoch": 14, "update": 13.371, "loss": "2.046", "ppl": "4.13", "wps": "398337", "ups": "3.37", "wpb": "118222", "bsz": "256", "num_updates": "688200", "lr": "3.14949e-05", "gnorm": "2.183", "loss_scale": "4", "train_wall": "59", "gb_free": "30", "wall": "205830"} +[2022-08-01 20:01:21,099][train_inner][INFO] - {"epoch": 14, "update": 13.375, "loss": "2.047", "ppl": "4.13", "wps": "393687", "ups": "3.35", "wpb": "117619", "bsz": "256", "num_updates": "688400", "lr": "3.14747e-05", "gnorm": "2.059", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "205890"} +[2022-08-01 20:02:20,469][train_inner][INFO] - {"epoch": 14, "update": 13.379, "loss": "2.041", "ppl": "4.12", "wps": "399104", "ups": "3.37", "wpb": "118473", "bsz": "256", "num_updates": "688600", "lr": "3.14545e-05", "gnorm": "2.081", "loss_scale": "8", "train_wall": "59", "gb_free": "24.2", "wall": "205949"} +[2022-08-01 20:02:35,464][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 20:03:20,466][train_inner][INFO] - {"epoch": 14, "update": 13.383, "loss": "2.041", "ppl": "4.11", "wps": "395124", "ups": "3.33", "wpb": "118531", "bsz": "256", "num_updates": "688800", "lr": "3.14343e-05", "gnorm": "2.066", "loss_scale": "4", "train_wall": "60", "gb_free": "24.8", "wall": "206009"} +[2022-08-01 20:04:19,916][train_inner][INFO] - {"epoch": 14, "update": 13.387, "loss": "2.049", "ppl": "4.14", "wps": "395229", "ups": "3.36", "wpb": "117482", "bsz": "256", "num_updates": "689000", "lr": "3.14141e-05", "gnorm": "2.176", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "206068"} +[2022-08-01 20:05:19,117][train_inner][INFO] - {"epoch": 14, "update": 13.391, "loss": "2.044", "ppl": "4.12", "wps": "399082", "ups": "3.38", "wpb": "118128", "bsz": "256", "num_updates": "689200", "lr": "3.13939e-05", "gnorm": "2.174", "loss_scale": "4", "train_wall": "59", "gb_free": "25.1", "wall": "206128"} +[2022-08-01 20:06:18,526][train_inner][INFO] - {"epoch": 14, "update": 13.395, "loss": "2.04", "ppl": "4.11", "wps": "397858", "ups": "3.37", "wpb": "118181", "bsz": "256", "num_updates": "689400", "lr": "3.13737e-05", "gnorm": "2.144", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "206187"} +[2022-08-01 20:07:17,780][train_inner][INFO] - {"epoch": 14, "update": 13.399, "loss": "2.044", "ppl": "4.12", "wps": "398185", "ups": "3.38", "wpb": "117971", "bsz": "256", "num_updates": "689600", "lr": "3.13535e-05", "gnorm": "2.07", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "206246"} +[2022-08-01 20:08:17,289][train_inner][INFO] - {"epoch": 14, "update": 13.403, "loss": "2.049", "ppl": "4.14", "wps": "397130", "ups": "3.36", "wpb": "118163", "bsz": "256", "num_updates": "689800", "lr": "3.13333e-05", "gnorm": "2.138", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "206306"} +[2022-08-01 20:09:16,543][train_inner][INFO] - {"epoch": 14, "update": 13.406, "loss": "2.039", "ppl": "4.11", "wps": "398485", "ups": "3.38", "wpb": "118057", "bsz": "256", "num_updates": "690000", "lr": "3.13131e-05", "gnorm": "2.086", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "206365"} +[2022-08-01 20:10:17,133][train_inner][INFO] - {"epoch": 14, "update": 13.41, "loss": "2.045", "ppl": "4.13", "wps": "391256", "ups": "3.3", "wpb": "118531", "bsz": "256", "num_updates": "690200", "lr": "3.12929e-05", "gnorm": "2.113", "loss_scale": "4", "train_wall": "60", "gb_free": "23.9", "wall": "206426"} +[2022-08-01 20:11:16,422][train_inner][INFO] - {"epoch": 14, "update": 13.414, "loss": "2.044", "ppl": "4.12", "wps": "397820", "ups": "3.37", "wpb": "117930", "bsz": "256", "num_updates": "690400", "lr": "3.12727e-05", "gnorm": "2.068", "loss_scale": "4", "train_wall": "59", "gb_free": "23.5", "wall": "206485"} +[2022-08-01 20:12:16,042][train_inner][INFO] - {"epoch": 14, "update": 13.418, "loss": "2.039", "ppl": "4.11", "wps": "397043", "ups": "3.35", "wpb": "118357", "bsz": "256", "num_updates": "690600", "lr": "3.12525e-05", "gnorm": "2.14", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "206544"} +[2022-08-01 20:12:59,219][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 20:13:15,961][train_inner][INFO] - {"epoch": 14, "update": 13.422, "loss": "2.043", "ppl": "4.12", "wps": "394850", "ups": "3.34", "wpb": "118295", "bsz": "256", "num_updates": "690800", "lr": "3.12323e-05", "gnorm": "2.037", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "206604"} +[2022-08-01 20:14:15,449][train_inner][INFO] - {"epoch": 14, "update": 13.426, "loss": "2.037", "ppl": "4.1", "wps": "398412", "ups": "3.36", "wpb": "118503", "bsz": "256", "num_updates": "691000", "lr": "3.12121e-05", "gnorm": "2.072", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "206664"} +[2022-08-01 20:15:14,953][train_inner][INFO] - {"epoch": 14, "update": 13.43, "loss": "2.038", "ppl": "4.11", "wps": "396469", "ups": "3.36", "wpb": "117958", "bsz": "256", "num_updates": "691200", "lr": "3.11919e-05", "gnorm": "2.133", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "206723"} +[2022-08-01 20:16:14,220][train_inner][INFO] - {"epoch": 14, "update": 13.434, "loss": "2.042", "ppl": "4.12", "wps": "399680", "ups": "3.37", "wpb": "118438", "bsz": "256", "num_updates": "691400", "lr": "3.11717e-05", "gnorm": "2.137", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "206783"} +[2022-08-01 20:17:13,584][train_inner][INFO] - {"epoch": 14, "update": 13.438, "loss": "2.048", "ppl": "4.13", "wps": "397566", "ups": "3.37", "wpb": "118004", "bsz": "256", "num_updates": "691600", "lr": "3.11515e-05", "gnorm": "2.107", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "206842"} +[2022-08-01 20:18:13,383][train_inner][INFO] - {"epoch": 14, "update": 13.441, "loss": "2.046", "ppl": "4.13", "wps": "394975", "ups": "3.34", "wpb": "118096", "bsz": "256", "num_updates": "691800", "lr": "3.11313e-05", "gnorm": "2.078", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "206902"} +[2022-08-01 20:19:12,750][train_inner][INFO] - {"epoch": 14, "update": 13.445, "loss": "2.044", "ppl": "4.12", "wps": "399715", "ups": "3.37", "wpb": "118647", "bsz": "256", "num_updates": "692000", "lr": "3.11111e-05", "gnorm": "2.215", "loss_scale": "4", "train_wall": "59", "gb_free": "23.3", "wall": "206961"} +[2022-08-01 20:20:12,422][train_inner][INFO] - {"epoch": 14, "update": 13.449, "loss": "2.043", "ppl": "4.12", "wps": "396780", "ups": "3.35", "wpb": "118382", "bsz": "256", "num_updates": "692200", "lr": "3.10909e-05", "gnorm": "2.122", "loss_scale": "4", "train_wall": "59", "gb_free": "29.1", "wall": "207021"} +[2022-08-01 20:21:12,474][train_inner][INFO] - {"epoch": 14, "update": 13.453, "loss": "2.047", "ppl": "4.13", "wps": "394513", "ups": "3.33", "wpb": "118456", "bsz": "256", "num_updates": "692400", "lr": "3.10707e-05", "gnorm": "2.14", "loss_scale": "4", "train_wall": "60", "gb_free": "22.4", "wall": "207081"} +[2022-08-01 20:22:12,034][train_inner][INFO] - {"epoch": 14, "update": 13.457, "loss": "2.045", "ppl": "4.13", "wps": "395334", "ups": "3.36", "wpb": "117729", "bsz": "256", "num_updates": "692600", "lr": "3.10505e-05", "gnorm": "2.23", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "207140"} +[2022-08-01 20:23:11,856][train_inner][INFO] - {"epoch": 14, "update": 13.461, "loss": "2.037", "ppl": "4.1", "wps": "395625", "ups": "3.34", "wpb": "118335", "bsz": "256", "num_updates": "692800", "lr": "3.10303e-05", "gnorm": "2.073", "loss_scale": "8", "train_wall": "59", "gb_free": "21.5", "wall": "207200"} +[2022-08-01 20:23:21,118][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 20:24:11,807][train_inner][INFO] - {"epoch": 14, "update": 13.465, "loss": "2.044", "ppl": "4.12", "wps": "394510", "ups": "3.34", "wpb": "118249", "bsz": "256", "num_updates": "693000", "lr": "3.10101e-05", "gnorm": "2.223", "loss_scale": "4", "train_wall": "60", "gb_free": "22.4", "wall": "207260"} +[2022-08-01 20:25:11,571][train_inner][INFO] - {"epoch": 14, "update": 13.469, "loss": "2.044", "ppl": "4.12", "wps": "398035", "ups": "3.35", "wpb": "118940", "bsz": "256", "num_updates": "693200", "lr": "3.09899e-05", "gnorm": "2.095", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "207320"} +[2022-08-01 20:26:11,112][train_inner][INFO] - {"epoch": 14, "update": 13.473, "loss": "2.044", "ppl": "4.12", "wps": "397878", "ups": "3.36", "wpb": "118448", "bsz": "256", "num_updates": "693400", "lr": "3.09697e-05", "gnorm": "2.139", "loss_scale": "4", "train_wall": "59", "gb_free": "25.1", "wall": "207380"} +[2022-08-01 20:27:10,683][train_inner][INFO] - {"epoch": 14, "update": 13.476, "loss": "2.042", "ppl": "4.12", "wps": "397229", "ups": "3.36", "wpb": "118316", "bsz": "256", "num_updates": "693600", "lr": "3.09495e-05", "gnorm": "2.233", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "207439"} +[2022-08-01 20:28:10,066][train_inner][INFO] - {"epoch": 14, "update": 13.48, "loss": "2.043", "ppl": "4.12", "wps": "398254", "ups": "3.37", "wpb": "118246", "bsz": "256", "num_updates": "693800", "lr": "3.09293e-05", "gnorm": "2.121", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "207498"} +[2022-08-01 20:29:09,641][train_inner][INFO] - {"epoch": 14, "update": 13.484, "loss": "2.04", "ppl": "4.11", "wps": "397349", "ups": "3.36", "wpb": "118360", "bsz": "256", "num_updates": "694000", "lr": "3.09091e-05", "gnorm": "2.044", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "207558"} +[2022-08-01 20:30:09,314][train_inner][INFO] - {"epoch": 14, "update": 13.488, "loss": "2.042", "ppl": "4.12", "wps": "396274", "ups": "3.35", "wpb": "118232", "bsz": "256", "num_updates": "694200", "lr": "3.08889e-05", "gnorm": "2.195", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "207618"} +[2022-08-01 20:31:08,756][train_inner][INFO] - {"epoch": 14, "update": 13.492, "loss": "2.039", "ppl": "4.11", "wps": "398159", "ups": "3.36", "wpb": "118336", "bsz": "256", "num_updates": "694400", "lr": "3.08687e-05", "gnorm": "2.139", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "207677"} +[2022-08-01 20:32:07,973][train_inner][INFO] - {"epoch": 14, "update": 13.496, "loss": "2.043", "ppl": "4.12", "wps": "398919", "ups": "3.38", "wpb": "118114", "bsz": "256", "num_updates": "694600", "lr": "3.08485e-05", "gnorm": "2.138", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "207736"} +[2022-08-01 20:33:07,471][train_inner][INFO] - {"epoch": 14, "update": 13.5, "loss": "2.04", "ppl": "4.11", "wps": "398135", "ups": "3.36", "wpb": "118440", "bsz": "256", "num_updates": "694800", "lr": "3.08283e-05", "gnorm": "2.051", "loss_scale": "4", "train_wall": "59", "gb_free": "24.9", "wall": "207796"} +[2022-08-01 20:34:00,538][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 20:34:07,436][train_inner][INFO] - {"epoch": 14, "update": 13.504, "loss": "2.035", "ppl": "4.1", "wps": "395826", "ups": "3.34", "wpb": "118678", "bsz": "256", "num_updates": "695000", "lr": "3.08081e-05", "gnorm": "2.108", "loss_scale": "4", "train_wall": "60", "gb_free": "23.2", "wall": "207856"} +[2022-08-01 20:35:07,041][train_inner][INFO] - {"epoch": 14, "update": 13.507, "loss": "2.039", "ppl": "4.11", "wps": "398044", "ups": "3.36", "wpb": "118626", "bsz": "256", "num_updates": "695200", "lr": "3.07879e-05", "gnorm": "2.143", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "207915"} +[2022-08-01 20:36:06,349][train_inner][INFO] - {"epoch": 14, "update": 13.511, "loss": "2.035", "ppl": "4.1", "wps": "398105", "ups": "3.37", "wpb": "118055", "bsz": "256", "num_updates": "695400", "lr": "3.07677e-05", "gnorm": "2.156", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "207975"} +[2022-08-01 20:37:06,204][train_inner][INFO] - {"epoch": 14, "update": 13.515, "loss": "2.042", "ppl": "4.12", "wps": "395667", "ups": "3.34", "wpb": "118411", "bsz": "256", "num_updates": "695600", "lr": "3.07475e-05", "gnorm": "2.093", "loss_scale": "4", "train_wall": "60", "gb_free": "22.7", "wall": "208035"} +[2022-08-01 20:38:05,753][train_inner][INFO] - {"epoch": 14, "update": 13.519, "loss": "2.036", "ppl": "4.1", "wps": "398490", "ups": "3.36", "wpb": "118648", "bsz": "256", "num_updates": "695800", "lr": "3.07273e-05", "gnorm": "2.13", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "208094"} +[2022-08-01 20:39:04,861][train_inner][INFO] - {"epoch": 14, "update": 13.523, "loss": "2.041", "ppl": "4.12", "wps": "400389", "ups": "3.38", "wpb": "118331", "bsz": "256", "num_updates": "696000", "lr": "3.07071e-05", "gnorm": "2.038", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "208153"} +[2022-08-01 20:40:04,195][train_inner][INFO] - {"epoch": 14, "update": 13.527, "loss": "2.041", "ppl": "4.12", "wps": "400036", "ups": "3.37", "wpb": "118678", "bsz": "256", "num_updates": "696200", "lr": "3.06869e-05", "gnorm": "2.175", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "208213"} +[2022-08-01 20:41:03,776][train_inner][INFO] - {"epoch": 14, "update": 13.531, "loss": "2.05", "ppl": "4.14", "wps": "398766", "ups": "3.36", "wpb": "118794", "bsz": "256", "num_updates": "696400", "lr": "3.06667e-05", "gnorm": "2.029", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "208272"} +[2022-08-01 20:42:03,353][train_inner][INFO] - {"epoch": 14, "update": 13.535, "loss": "2.038", "ppl": "4.11", "wps": "396527", "ups": "3.36", "wpb": "118118", "bsz": "256", "num_updates": "696600", "lr": "3.06465e-05", "gnorm": "2.072", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "208332"} +[2022-08-01 20:43:04,028][train_inner][INFO] - {"epoch": 14, "update": 13.539, "loss": "2.038", "ppl": "4.11", "wps": "390603", "ups": "3.3", "wpb": "118498", "bsz": "256", "num_updates": "696800", "lr": "3.06263e-05", "gnorm": "2.047", "loss_scale": "4", "train_wall": "60", "gb_free": "24", "wall": "208392"} +[2022-08-01 20:44:03,504][train_inner][INFO] - {"epoch": 14, "update": 13.542, "loss": "2.041", "ppl": "4.12", "wps": "399414", "ups": "3.36", "wpb": "118778", "bsz": "256", "num_updates": "697000", "lr": "3.06061e-05", "gnorm": "2.153", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "208452"} +[2022-08-01 20:44:29,439][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 20:45:03,223][train_inner][INFO] - {"epoch": 14, "update": 13.546, "loss": "2.039", "ppl": "4.11", "wps": "394580", "ups": "3.35", "wpb": "117818", "bsz": "256", "num_updates": "697200", "lr": "3.05859e-05", "gnorm": "2.183", "loss_scale": "4", "train_wall": "59", "gb_free": "23.9", "wall": "208512"} +[2022-08-01 20:46:04,095][train_inner][INFO] - {"epoch": 14, "update": 13.55, "loss": "2.042", "ppl": "4.12", "wps": "388253", "ups": "3.29", "wpb": "118167", "bsz": "256", "num_updates": "697400", "lr": "3.05657e-05", "gnorm": "2.188", "loss_scale": "4", "train_wall": "61", "gb_free": "24.8", "wall": "208573"} +[2022-08-01 20:47:03,365][train_inner][INFO] - {"epoch": 14, "update": 13.554, "loss": "2.037", "ppl": "4.1", "wps": "399435", "ups": "3.37", "wpb": "118372", "bsz": "256", "num_updates": "697600", "lr": "3.05455e-05", "gnorm": "2.078", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "208632"} +[2022-08-01 20:48:03,209][train_inner][INFO] - {"epoch": 14, "update": 13.558, "loss": "2.031", "ppl": "4.09", "wps": "396041", "ups": "3.34", "wpb": "118503", "bsz": "256", "num_updates": "697800", "lr": "3.05253e-05", "gnorm": "2.079", "loss_scale": "4", "train_wall": "60", "gb_free": "23.5", "wall": "208692"} +[2022-08-01 20:49:02,586][train_inner][INFO] - {"epoch": 14, "update": 13.562, "loss": "2.04", "ppl": "4.11", "wps": "397745", "ups": "3.37", "wpb": "118084", "bsz": "256", "num_updates": "698000", "lr": "3.05051e-05", "gnorm": "2.191", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "208751"} +[2022-08-01 20:50:01,738][train_inner][INFO] - {"epoch": 14, "update": 13.566, "loss": "2.039", "ppl": "4.11", "wps": "400118", "ups": "3.38", "wpb": "118338", "bsz": "256", "num_updates": "698200", "lr": "3.04848e-05", "gnorm": "2.068", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "208810"} +[2022-08-01 20:51:02,301][train_inner][INFO] - {"epoch": 14, "update": 13.57, "loss": "2.041", "ppl": "4.12", "wps": "390566", "ups": "3.3", "wpb": "118269", "bsz": "256", "num_updates": "698400", "lr": "3.04646e-05", "gnorm": "2.209", "loss_scale": "4", "train_wall": "60", "gb_free": "22.5", "wall": "208871"} +[2022-08-01 20:52:01,935][train_inner][INFO] - {"epoch": 14, "update": 13.574, "loss": "2.042", "ppl": "4.12", "wps": "397442", "ups": "3.35", "wpb": "118504", "bsz": "256", "num_updates": "698600", "lr": "3.04444e-05", "gnorm": "2.101", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "208930"} +[2022-08-01 20:53:01,009][train_inner][INFO] - {"epoch": 14, "update": 13.577, "loss": "2.036", "ppl": "4.1", "wps": "399467", "ups": "3.39", "wpb": "117991", "bsz": "256", "num_updates": "698800", "lr": "3.04242e-05", "gnorm": "2.126", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "208989"} +[2022-08-01 20:54:00,857][train_inner][INFO] - {"epoch": 14, "update": 13.581, "loss": "2.036", "ppl": "4.1", "wps": "395448", "ups": "3.34", "wpb": "118332", "bsz": "256", "num_updates": "699000", "lr": "3.0404e-05", "gnorm": "2.099", "loss_scale": "4", "train_wall": "59", "gb_free": "26.7", "wall": "209049"} +[2022-08-01 20:55:00,557][train_inner][INFO] - {"epoch": 14, "update": 13.585, "loss": "2.042", "ppl": "4.12", "wps": "397957", "ups": "3.35", "wpb": "118789", "bsz": "256", "num_updates": "699200", "lr": "3.03838e-05", "gnorm": "2.139", "loss_scale": "8", "train_wall": "59", "gb_free": "22.2", "wall": "209109"} +[2022-08-01 20:55:29,048][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 20:55:59,906][train_inner][INFO] - {"epoch": 14, "update": 13.589, "loss": "2.046", "ppl": "4.13", "wps": "395684", "ups": "3.37", "wpb": "117418", "bsz": "256", "num_updates": "699400", "lr": "3.03636e-05", "gnorm": "2.233", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "209168"} +[2022-08-01 20:56:59,688][train_inner][INFO] - {"epoch": 14, "update": 13.593, "loss": "2.039", "ppl": "4.11", "wps": "397274", "ups": "3.35", "wpb": "118748", "bsz": "256", "num_updates": "699600", "lr": "3.03434e-05", "gnorm": "2.187", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "209228"} +[2022-08-01 20:57:59,471][train_inner][INFO] - {"epoch": 14, "update": 13.597, "loss": "2.042", "ppl": "4.12", "wps": "396162", "ups": "3.35", "wpb": "118417", "bsz": "256", "num_updates": "699800", "lr": "3.03232e-05", "gnorm": "2.146", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "209288"} +[2022-08-01 20:58:58,738][train_inner][INFO] - {"epoch": 14, "update": 13.601, "loss": "2.045", "ppl": "4.13", "wps": "396600", "ups": "3.37", "wpb": "117527", "bsz": "256", "num_updates": "700000", "lr": "3.0303e-05", "gnorm": "2.289", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "209347"} +[2022-08-01 20:58:58,739][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 20:59:21,501][valid][INFO] - {"epoch": 14, "valid_loss": "1.929", "valid_ppl": "3.81", "valid_wps": "1.58728e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "700000", "valid_best_loss": "1.929"} +[2022-08-01 20:59:21,504][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 14 @ 700000 updates +[2022-08-01 20:59:21,505][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_14_700000.pt +[2022-08-01 20:59:28,549][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_14_700000.pt +[2022-08-01 20:59:47,620][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_14_700000.pt (epoch 14 @ 700000 updates, score 1.929) (writing took 26.1161236371845 seconds) +[2022-08-01 21:00:47,295][train_inner][INFO] - {"epoch": 14, "update": 13.605, "loss": "2.047", "ppl": "4.13", "wps": "217008", "ups": "1.84", "wpb": "117788", "bsz": "256", "num_updates": "700200", "lr": "3.02828e-05", "gnorm": "2.106", "loss_scale": "4", "train_wall": "59", "gb_free": "24.9", "wall": "209456"} +[2022-08-01 21:01:46,701][train_inner][INFO] - {"epoch": 14, "update": 13.609, "loss": "2.043", "ppl": "4.12", "wps": "397114", "ups": "3.37", "wpb": "117955", "bsz": "256", "num_updates": "700400", "lr": "3.02626e-05", "gnorm": "2.168", "loss_scale": "4", "train_wall": "59", "gb_free": "25.2", "wall": "209515"} +[2022-08-01 21:02:47,489][train_inner][INFO] - {"epoch": 14, "update": 13.612, "loss": "2.041", "ppl": "4.11", "wps": "390823", "ups": "3.29", "wpb": "118784", "bsz": "256", "num_updates": "700600", "lr": "3.02424e-05", "gnorm": "2.177", "loss_scale": "4", "train_wall": "60", "gb_free": "23.8", "wall": "209576"} +[2022-08-01 21:03:47,223][train_inner][INFO] - {"epoch": 14, "update": 13.616, "loss": "2.034", "ppl": "4.1", "wps": "395522", "ups": "3.35", "wpb": "118127", "bsz": "256", "num_updates": "700800", "lr": "3.02222e-05", "gnorm": "2.074", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "209636"} +[2022-08-01 21:04:46,757][train_inner][INFO] - {"epoch": 14, "update": 13.62, "loss": "2.05", "ppl": "4.14", "wps": "395164", "ups": "3.36", "wpb": "117627", "bsz": "256", "num_updates": "701000", "lr": "3.0202e-05", "gnorm": "2.076", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "209695"} +[2022-08-01 21:05:46,337][train_inner][INFO] - {"epoch": 14, "update": 13.624, "loss": "2.037", "ppl": "4.1", "wps": "399226", "ups": "3.36", "wpb": "118928", "bsz": "256", "num_updates": "701200", "lr": "3.01818e-05", "gnorm": "2.115", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "209755"} +[2022-08-01 21:06:43,147][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 21:06:45,769][train_inner][INFO] - {"epoch": 14, "update": 13.628, "loss": "2.04", "ppl": "4.11", "wps": "397456", "ups": "3.37", "wpb": "118108", "bsz": "256", "num_updates": "701400", "lr": "3.01616e-05", "gnorm": "2.135", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "209814"} +[2022-08-01 21:07:45,456][train_inner][INFO] - {"epoch": 14, "update": 13.632, "loss": "2.044", "ppl": "4.12", "wps": "397062", "ups": "3.35", "wpb": "118497", "bsz": "256", "num_updates": "701600", "lr": "3.01414e-05", "gnorm": "2.192", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "209874"} +[2022-08-01 21:08:45,053][train_inner][INFO] - {"epoch": 14, "update": 13.636, "loss": "2.033", "ppl": "4.09", "wps": "396839", "ups": "3.36", "wpb": "118252", "bsz": "256", "num_updates": "701800", "lr": "3.01212e-05", "gnorm": "2.187", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "209933"} +[2022-08-01 21:09:44,510][train_inner][INFO] - {"epoch": 14, "update": 13.64, "loss": "2.044", "ppl": "4.12", "wps": "399301", "ups": "3.36", "wpb": "118704", "bsz": "256", "num_updates": "702000", "lr": "3.0101e-05", "gnorm": "2.097", "loss_scale": "4", "train_wall": "59", "gb_free": "25.7", "wall": "209993"} +[2022-08-01 21:10:43,814][train_inner][INFO] - {"epoch": 14, "update": 13.644, "loss": "2.035", "ppl": "4.1", "wps": "398973", "ups": "3.37", "wpb": "118303", "bsz": "256", "num_updates": "702200", "lr": "3.00808e-05", "gnorm": "2.141", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "210052"} +[2022-08-01 21:11:43,663][train_inner][INFO] - {"epoch": 14, "update": 13.647, "loss": "2.041", "ppl": "4.11", "wps": "395294", "ups": "3.34", "wpb": "118288", "bsz": "256", "num_updates": "702400", "lr": "3.00606e-05", "gnorm": "2.139", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "210112"} +[2022-08-01 21:12:43,487][train_inner][INFO] - {"epoch": 14, "update": 13.651, "loss": "2.034", "ppl": "4.09", "wps": "397552", "ups": "3.34", "wpb": "118916", "bsz": "256", "num_updates": "702600", "lr": "3.00404e-05", "gnorm": "2.088", "loss_scale": "4", "train_wall": "59", "gb_free": "27", "wall": "210172"} +[2022-08-01 21:13:43,124][train_inner][INFO] - {"epoch": 14, "update": 13.655, "loss": "2.038", "ppl": "4.11", "wps": "397287", "ups": "3.35", "wpb": "118464", "bsz": "256", "num_updates": "702800", "lr": "3.00202e-05", "gnorm": "2.208", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "210232"} +[2022-08-01 21:14:42,591][train_inner][INFO] - {"epoch": 14, "update": 13.659, "loss": "2.038", "ppl": "4.11", "wps": "399076", "ups": "3.36", "wpb": "118658", "bsz": "256", "num_updates": "703000", "lr": "3e-05", "gnorm": "2.201", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "210291"} +[2022-08-01 21:15:41,940][train_inner][INFO] - {"epoch": 14, "update": 13.663, "loss": "2.035", "ppl": "4.1", "wps": "400359", "ups": "3.37", "wpb": "118804", "bsz": "256", "num_updates": "703200", "lr": "2.99798e-05", "gnorm": "2.202", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "210350"} +[2022-08-01 21:16:41,485][train_inner][INFO] - {"epoch": 14, "update": 13.667, "loss": "2.039", "ppl": "4.11", "wps": "394378", "ups": "3.36", "wpb": "117416", "bsz": "256", "num_updates": "703400", "lr": "2.99596e-05", "gnorm": "2.172", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "210410"} +[2022-08-01 21:17:02,415][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 21:17:41,494][train_inner][INFO] - {"epoch": 14, "update": 13.671, "loss": "2.042", "ppl": "4.12", "wps": "393439", "ups": "3.33", "wpb": "118048", "bsz": "255.9", "num_updates": "703600", "lr": "2.99394e-05", "gnorm": "2.216", "loss_scale": "4", "train_wall": "60", "gb_free": "26.8", "wall": "210470"} +[2022-08-01 21:18:41,212][train_inner][INFO] - {"epoch": 14, "update": 13.675, "loss": "2.036", "ppl": "4.1", "wps": "396858", "ups": "3.35", "wpb": "118496", "bsz": "256", "num_updates": "703800", "lr": "2.99192e-05", "gnorm": "2.214", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "210530"} +[2022-08-01 21:19:41,004][train_inner][INFO] - {"epoch": 14, "update": 13.678, "loss": "2.044", "ppl": "4.12", "wps": "395670", "ups": "3.34", "wpb": "118290", "bsz": "256", "num_updates": "704000", "lr": "2.9899e-05", "gnorm": "2.169", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "210589"} +[2022-08-01 21:20:40,828][train_inner][INFO] - {"epoch": 14, "update": 13.682, "loss": "2.034", "ppl": "4.1", "wps": "396026", "ups": "3.34", "wpb": "118457", "bsz": "256", "num_updates": "704200", "lr": "2.98788e-05", "gnorm": "2.189", "loss_scale": "4", "train_wall": "59", "gb_free": "25.2", "wall": "210649"} +[2022-08-01 21:21:40,539][train_inner][INFO] - {"epoch": 14, "update": 13.686, "loss": "2.037", "ppl": "4.1", "wps": "397085", "ups": "3.35", "wpb": "118551", "bsz": "256", "num_updates": "704400", "lr": "2.98586e-05", "gnorm": "2.141", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "210709"} +[2022-08-01 21:22:40,348][train_inner][INFO] - {"epoch": 14, "update": 13.69, "loss": "2.036", "ppl": "4.1", "wps": "393659", "ups": "3.34", "wpb": "117721", "bsz": "256", "num_updates": "704600", "lr": "2.98384e-05", "gnorm": "2.16", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "210769"} +[2022-08-01 21:23:39,929][train_inner][INFO] - {"epoch": 14, "update": 13.694, "loss": "2.041", "ppl": "4.12", "wps": "398968", "ups": "3.36", "wpb": "118853", "bsz": "256", "num_updates": "704800", "lr": "2.98182e-05", "gnorm": "2.151", "loss_scale": "4", "train_wall": "59", "gb_free": "28.1", "wall": "210828"} +[2022-08-01 21:24:39,318][train_inner][INFO] - {"epoch": 14, "update": 13.698, "loss": "2.042", "ppl": "4.12", "wps": "395589", "ups": "3.37", "wpb": "117469", "bsz": "256", "num_updates": "705000", "lr": "2.9798e-05", "gnorm": "2.29", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "210888"} +[2022-08-01 21:25:38,766][train_inner][INFO] - {"epoch": 14, "update": 13.702, "loss": "2.039", "ppl": "4.11", "wps": "398072", "ups": "3.36", "wpb": "118320", "bsz": "256", "num_updates": "705200", "lr": "2.97778e-05", "gnorm": "2.177", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "210947"} +[2022-08-01 21:26:38,497][train_inner][INFO] - {"epoch": 14, "update": 13.706, "loss": "2.037", "ppl": "4.1", "wps": "396601", "ups": "3.35", "wpb": "118447", "bsz": "256", "num_updates": "705400", "lr": "2.97576e-05", "gnorm": "2.125", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "211007"} +[2022-08-01 21:27:31,433][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 21:27:38,354][train_inner][INFO] - {"epoch": 14, "update": 13.71, "loss": "2.039", "ppl": "4.11", "wps": "395500", "ups": "3.34", "wpb": "118366", "bsz": "256", "num_updates": "705600", "lr": "2.97374e-05", "gnorm": "2.239", "loss_scale": "4", "train_wall": "60", "gb_free": "23.1", "wall": "211067"} +[2022-08-01 21:28:37,905][train_inner][INFO] - {"epoch": 14, "update": 13.713, "loss": "2.04", "ppl": "4.11", "wps": "397678", "ups": "3.36", "wpb": "118409", "bsz": "256", "num_updates": "705800", "lr": "2.97172e-05", "gnorm": "2.174", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "211126"} +[2022-08-01 21:29:37,585][train_inner][INFO] - {"epoch": 14, "update": 13.717, "loss": "2.039", "ppl": "4.11", "wps": "398036", "ups": "3.35", "wpb": "118772", "bsz": "256", "num_updates": "706000", "lr": "2.9697e-05", "gnorm": "2.146", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "211186"} +[2022-08-01 21:30:37,019][train_inner][INFO] - {"epoch": 14, "update": 13.721, "loss": "2.038", "ppl": "4.11", "wps": "400078", "ups": "3.37", "wpb": "118891", "bsz": "256", "num_updates": "706200", "lr": "2.96768e-05", "gnorm": "2.138", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "211245"} +[2022-08-01 21:31:36,683][train_inner][INFO] - {"epoch": 14, "update": 13.725, "loss": "2.036", "ppl": "4.1", "wps": "396815", "ups": "3.35", "wpb": "118378", "bsz": "256", "num_updates": "706400", "lr": "2.96566e-05", "gnorm": "2.253", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "211305"} +[2022-08-01 21:32:36,055][train_inner][INFO] - {"epoch": 14, "update": 13.729, "loss": "2.037", "ppl": "4.1", "wps": "397397", "ups": "3.37", "wpb": "117970", "bsz": "256", "num_updates": "706600", "lr": "2.96364e-05", "gnorm": "2.209", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "211364"} +[2022-08-01 21:33:35,535][train_inner][INFO] - {"epoch": 14, "update": 13.733, "loss": "2.033", "ppl": "4.09", "wps": "397774", "ups": "3.36", "wpb": "118297", "bsz": "256", "num_updates": "706800", "lr": "2.96162e-05", "gnorm": "2.2", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "211424"} +[2022-08-01 21:34:35,045][train_inner][INFO] - {"epoch": 14, "update": 13.737, "loss": "2.039", "ppl": "4.11", "wps": "397117", "ups": "3.36", "wpb": "118161", "bsz": "256", "num_updates": "707000", "lr": "2.9596e-05", "gnorm": "2.154", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "211483"} +[2022-08-01 21:35:34,626][train_inner][INFO] - {"epoch": 14, "update": 13.741, "loss": "2.045", "ppl": "4.13", "wps": "400009", "ups": "3.36", "wpb": "119164", "bsz": "256", "num_updates": "707200", "lr": "2.95758e-05", "gnorm": "2.243", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "211543"} +[2022-08-01 21:36:33,786][train_inner][INFO] - {"epoch": 14, "update": 13.745, "loss": "2.042", "ppl": "4.12", "wps": "398566", "ups": "3.38", "wpb": "117895", "bsz": "256", "num_updates": "707400", "lr": "2.95556e-05", "gnorm": "2.207", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "211602"} +[2022-08-01 21:37:33,044][train_inner][INFO] - {"epoch": 14, "update": 13.748, "loss": "2.032", "ppl": "4.09", "wps": "399120", "ups": "3.38", "wpb": "118255", "bsz": "256", "num_updates": "707600", "lr": "2.95354e-05", "gnorm": "2.107", "loss_scale": "4", "train_wall": "59", "gb_free": "23.5", "wall": "211661"} +[2022-08-01 21:37:43,788][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 21:38:32,845][train_inner][INFO] - {"epoch": 14, "update": 13.752, "loss": "2.028", "ppl": "4.08", "wps": "398124", "ups": "3.34", "wpb": "119040", "bsz": "256", "num_updates": "707800", "lr": "2.95152e-05", "gnorm": "2.083", "loss_scale": "4", "train_wall": "59", "gb_free": "23.3", "wall": "211721"} +[2022-08-01 21:39:32,235][train_inner][INFO] - {"epoch": 14, "update": 13.756, "loss": "2.034", "ppl": "4.1", "wps": "395831", "ups": "3.37", "wpb": "117540", "bsz": "256", "num_updates": "708000", "lr": "2.94949e-05", "gnorm": "2.258", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "211781"} +[2022-08-01 21:40:31,718][train_inner][INFO] - {"epoch": 14, "update": 13.76, "loss": "2.045", "ppl": "4.13", "wps": "394292", "ups": "3.36", "wpb": "117268", "bsz": "256", "num_updates": "708200", "lr": "2.94747e-05", "gnorm": "2.164", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "211840"} +[2022-08-01 21:41:31,311][train_inner][INFO] - {"epoch": 14, "update": 13.764, "loss": "2.038", "ppl": "4.11", "wps": "398494", "ups": "3.36", "wpb": "118736", "bsz": "256", "num_updates": "708400", "lr": "2.94545e-05", "gnorm": "2.162", "loss_scale": "4", "train_wall": "59", "gb_free": "25.8", "wall": "211900"} +[2022-08-01 21:42:30,861][train_inner][INFO] - {"epoch": 14, "update": 13.768, "loss": "2.038", "ppl": "4.11", "wps": "399218", "ups": "3.36", "wpb": "118865", "bsz": "256", "num_updates": "708600", "lr": "2.94343e-05", "gnorm": "2.062", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "211959"} +[2022-08-01 21:43:29,924][train_inner][INFO] - {"epoch": 14, "update": 13.772, "loss": "2.034", "ppl": "4.1", "wps": "402419", "ups": "3.39", "wpb": "118839", "bsz": "256", "num_updates": "708800", "lr": "2.94141e-05", "gnorm": "2.192", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "212018"} +[2022-08-01 21:44:29,515][train_inner][INFO] - {"epoch": 14, "update": 13.776, "loss": "2.04", "ppl": "4.11", "wps": "395518", "ups": "3.36", "wpb": "117846", "bsz": "256", "num_updates": "709000", "lr": "2.93939e-05", "gnorm": "2.216", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "212078"} +[2022-08-01 21:45:29,160][train_inner][INFO] - {"epoch": 14, "update": 13.78, "loss": "2.04", "ppl": "4.11", "wps": "395127", "ups": "3.35", "wpb": "117836", "bsz": "256", "num_updates": "709200", "lr": "2.93737e-05", "gnorm": "2.176", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "212138"} +[2022-08-01 21:46:28,687][train_inner][INFO] - {"epoch": 14, "update": 13.783, "loss": "2.031", "ppl": "4.09", "wps": "398443", "ups": "3.36", "wpb": "118590", "bsz": "256", "num_updates": "709400", "lr": "2.93535e-05", "gnorm": "2.109", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "212197"} +[2022-08-01 21:47:28,316][train_inner][INFO] - {"epoch": 14, "update": 13.787, "loss": "2.037", "ppl": "4.1", "wps": "397098", "ups": "3.35", "wpb": "118392", "bsz": "256", "num_updates": "709600", "lr": "2.93333e-05", "gnorm": "2.108", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "212257"} +[2022-08-01 21:48:10,900][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 21:48:27,975][train_inner][INFO] - {"epoch": 14, "update": 13.791, "loss": "2.032", "ppl": "4.09", "wps": "397818", "ups": "3.35", "wpb": "118666", "bsz": "256", "num_updates": "709800", "lr": "2.93131e-05", "gnorm": "2.179", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "212316"} +[2022-08-01 21:49:27,419][train_inner][INFO] - {"epoch": 14, "update": 13.795, "loss": "2.043", "ppl": "4.12", "wps": "397141", "ups": "3.36", "wpb": "118038", "bsz": "256", "num_updates": "710000", "lr": "2.92929e-05", "gnorm": "2.157", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "212376"} +[2022-08-01 21:50:26,903][train_inner][INFO] - {"epoch": 14, "update": 13.799, "loss": "2.035", "ppl": "4.1", "wps": "399134", "ups": "3.36", "wpb": "118710", "bsz": "256", "num_updates": "710200", "lr": "2.92727e-05", "gnorm": "2.14", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "212435"} +[2022-08-01 21:51:26,481][train_inner][INFO] - {"epoch": 14, "update": 13.803, "loss": "2.039", "ppl": "4.11", "wps": "395649", "ups": "3.36", "wpb": "117859", "bsz": "256", "num_updates": "710400", "lr": "2.92525e-05", "gnorm": "2.175", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "212495"} +[2022-08-01 21:52:26,007][train_inner][INFO] - {"epoch": 14, "update": 13.807, "loss": "2.035", "ppl": "4.1", "wps": "397928", "ups": "3.36", "wpb": "118433", "bsz": "256", "num_updates": "710600", "lr": "2.92323e-05", "gnorm": "2.06", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "212554"} +[2022-08-01 21:53:25,629][train_inner][INFO] - {"epoch": 14, "update": 13.811, "loss": "2.041", "ppl": "4.11", "wps": "396549", "ups": "3.35", "wpb": "118214", "bsz": "256", "num_updates": "710800", "lr": "2.92121e-05", "gnorm": "2.18", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "212614"} +[2022-08-01 21:54:24,973][train_inner][INFO] - {"epoch": 14, "update": 13.815, "loss": "2.035", "ppl": "4.1", "wps": "398955", "ups": "3.37", "wpb": "118378", "bsz": "256", "num_updates": "711000", "lr": "2.91919e-05", "gnorm": "2.156", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "212673"} +[2022-08-01 21:55:24,368][train_inner][INFO] - {"epoch": 14, "update": 13.818, "loss": "2.041", "ppl": "4.11", "wps": "399363", "ups": "3.37", "wpb": "118601", "bsz": "256", "num_updates": "711200", "lr": "2.91717e-05", "gnorm": "2.135", "loss_scale": "4", "train_wall": "59", "gb_free": "28.2", "wall": "212733"} +[2022-08-01 21:56:23,693][train_inner][INFO] - {"epoch": 14, "update": 13.822, "loss": "2.034", "ppl": "4.1", "wps": "397618", "ups": "3.37", "wpb": "117941", "bsz": "256", "num_updates": "711400", "lr": "2.91515e-05", "gnorm": "2.23", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "212792"} +[2022-08-01 21:57:22,992][train_inner][INFO] - {"epoch": 14, "update": 13.826, "loss": "2.036", "ppl": "4.1", "wps": "399974", "ups": "3.37", "wpb": "118590", "bsz": "256", "num_updates": "711600", "lr": "2.91313e-05", "gnorm": "2.295", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "212851"} +[2022-08-01 21:58:20,302][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 21:58:22,668][train_inner][INFO] - {"epoch": 14, "update": 13.83, "loss": "2.034", "ppl": "4.09", "wps": "397983", "ups": "3.35", "wpb": "118748", "bsz": "256", "num_updates": "711800", "lr": "2.91111e-05", "gnorm": "2.185", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "212911"} +[2022-08-01 21:59:22,217][train_inner][INFO] - {"epoch": 14, "update": 13.834, "loss": "2.033", "ppl": "4.09", "wps": "397011", "ups": "3.36", "wpb": "118208", "bsz": "256", "num_updates": "712000", "lr": "2.90909e-05", "gnorm": "2.186", "loss_scale": "4", "train_wall": "59", "gb_free": "25.6", "wall": "212971"} +[2022-08-01 22:00:21,115][train_inner][INFO] - {"epoch": 14, "update": 13.838, "loss": "2.039", "ppl": "4.11", "wps": "400118", "ups": "3.4", "wpb": "117831", "bsz": "256", "num_updates": "712200", "lr": "2.90707e-05", "gnorm": "2.053", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "213030"} +[2022-08-01 22:01:20,813][train_inner][INFO] - {"epoch": 14, "update": 13.842, "loss": "2.033", "ppl": "4.09", "wps": "396118", "ups": "3.35", "wpb": "118236", "bsz": "256", "num_updates": "712400", "lr": "2.90505e-05", "gnorm": "2.113", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "213089"} +[2022-08-01 22:02:20,502][train_inner][INFO] - {"epoch": 14, "update": 13.846, "loss": "2.032", "ppl": "4.09", "wps": "395803", "ups": "3.35", "wpb": "118123", "bsz": "256", "num_updates": "712600", "lr": "2.90303e-05", "gnorm": "2.161", "loss_scale": "4", "train_wall": "59", "gb_free": "25.1", "wall": "213149"} +[2022-08-01 22:03:19,670][train_inner][INFO] - {"epoch": 14, "update": 13.849, "loss": "2.04", "ppl": "4.11", "wps": "399673", "ups": "3.38", "wpb": "118239", "bsz": "256", "num_updates": "712800", "lr": "2.90101e-05", "gnorm": "2.141", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "213208"} +[2022-08-01 22:04:18,992][train_inner][INFO] - {"epoch": 14, "update": 13.853, "loss": "2.041", "ppl": "4.12", "wps": "399185", "ups": "3.37", "wpb": "118401", "bsz": "256", "num_updates": "713000", "lr": "2.89899e-05", "gnorm": "2.252", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "213267"} +[2022-08-01 22:05:19,056][train_inner][INFO] - {"epoch": 14, "update": 13.857, "loss": "2.031", "ppl": "4.09", "wps": "393253", "ups": "3.33", "wpb": "118102", "bsz": "256", "num_updates": "713200", "lr": "2.89697e-05", "gnorm": "2.11", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "213327"} +[2022-08-01 22:06:19,895][train_inner][INFO] - {"epoch": 14, "update": 13.861, "loss": "2.034", "ppl": "4.1", "wps": "389733", "ups": "3.29", "wpb": "118554", "bsz": "256", "num_updates": "713400", "lr": "2.89495e-05", "gnorm": "2.175", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "213388"} +[2022-08-01 22:07:19,363][train_inner][INFO] - {"epoch": 14, "update": 13.865, "loss": "2.031", "ppl": "4.09", "wps": "399037", "ups": "3.36", "wpb": "118649", "bsz": "256", "num_updates": "713600", "lr": "2.89293e-05", "gnorm": "2.285", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "213448"} +[2022-08-01 22:08:19,158][train_inner][INFO] - {"epoch": 14, "update": 13.869, "loss": "2.036", "ppl": "4.1", "wps": "395100", "ups": "3.34", "wpb": "118124", "bsz": "256", "num_updates": "713800", "lr": "2.89091e-05", "gnorm": "2.19", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "213508"} +[2022-08-01 22:08:48,204][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 22:09:18,643][train_inner][INFO] - {"epoch": 14, "update": 13.873, "loss": "2.035", "ppl": "4.1", "wps": "396199", "ups": "3.36", "wpb": "117838", "bsz": "256", "num_updates": "714000", "lr": "2.88889e-05", "gnorm": "2.183", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "213567"} +[2022-08-01 22:10:18,285][train_inner][INFO] - {"epoch": 14, "update": 13.877, "loss": "2.03", "ppl": "4.08", "wps": "397725", "ups": "3.35", "wpb": "118604", "bsz": "256", "num_updates": "714200", "lr": "2.88687e-05", "gnorm": "2.186", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "213627"} +[2022-08-01 22:11:17,704][train_inner][INFO] - {"epoch": 14, "update": 13.881, "loss": "2.036", "ppl": "4.1", "wps": "398346", "ups": "3.37", "wpb": "118346", "bsz": "256", "num_updates": "714400", "lr": "2.88485e-05", "gnorm": "2.099", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "213686"} +[2022-08-01 22:12:17,210][train_inner][INFO] - {"epoch": 14, "update": 13.884, "loss": "2.037", "ppl": "4.1", "wps": "397848", "ups": "3.36", "wpb": "118370", "bsz": "256", "num_updates": "714600", "lr": "2.88283e-05", "gnorm": "2.129", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "213746"} +[2022-08-01 22:13:16,985][train_inner][INFO] - {"epoch": 14, "update": 13.888, "loss": "2.038", "ppl": "4.11", "wps": "395354", "ups": "3.35", "wpb": "118162", "bsz": "256", "num_updates": "714800", "lr": "2.88081e-05", "gnorm": "2.196", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "213805"} +[2022-08-01 22:14:16,730][train_inner][INFO] - {"epoch": 14, "update": 13.892, "loss": "2.033", "ppl": "4.09", "wps": "395472", "ups": "3.35", "wpb": "118136", "bsz": "256", "num_updates": "715000", "lr": "2.87879e-05", "gnorm": "2.137", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "213865"} +[2022-08-01 22:15:15,786][train_inner][INFO] - {"epoch": 14, "update": 13.896, "loss": "2.035", "ppl": "4.1", "wps": "398919", "ups": "3.39", "wpb": "117791", "bsz": "256", "num_updates": "715200", "lr": "2.87677e-05", "gnorm": "2.183", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "213924"} +[2022-08-01 22:16:15,440][train_inner][INFO] - {"epoch": 14, "update": 13.9, "loss": "2.043", "ppl": "4.12", "wps": "397000", "ups": "3.35", "wpb": "118413", "bsz": "256", "num_updates": "715400", "lr": "2.87475e-05", "gnorm": "2.177", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "213984"} +[2022-08-01 22:17:14,725][train_inner][INFO] - {"epoch": 14, "update": 13.904, "loss": "2.029", "ppl": "4.08", "wps": "399103", "ups": "3.37", "wpb": "118302", "bsz": "256", "num_updates": "715600", "lr": "2.87273e-05", "gnorm": "2.145", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "214043"} +[2022-08-01 22:18:14,558][train_inner][INFO] - {"epoch": 14, "update": 13.908, "loss": "2.038", "ppl": "4.11", "wps": "396228", "ups": "3.34", "wpb": "118536", "bsz": "256", "num_updates": "715800", "lr": "2.87071e-05", "gnorm": "2.123", "loss_scale": "4", "train_wall": "60", "gb_free": "21.7", "wall": "214103"} +[2022-08-01 22:19:02,519][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 22:19:14,220][train_inner][INFO] - {"epoch": 14, "update": 13.912, "loss": "2.034", "ppl": "4.09", "wps": "397725", "ups": "3.35", "wpb": "118645", "bsz": "256", "num_updates": "716000", "lr": "2.86869e-05", "gnorm": "2.147", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "214163"} +[2022-08-01 22:20:13,295][train_inner][INFO] - {"epoch": 14, "update": 13.916, "loss": "2.036", "ppl": "4.1", "wps": "398189", "ups": "3.39", "wpb": "117614", "bsz": "256", "num_updates": "716200", "lr": "2.86667e-05", "gnorm": "2.197", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "214222"} +[2022-08-01 22:21:12,911][train_inner][INFO] - {"epoch": 14, "update": 13.919, "loss": "2.038", "ppl": "4.11", "wps": "397487", "ups": "3.35", "wpb": "118482", "bsz": "256", "num_updates": "716400", "lr": "2.86465e-05", "gnorm": "2.133", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "214281"} +[2022-08-01 22:22:12,405][train_inner][INFO] - {"epoch": 14, "update": 13.923, "loss": "2.037", "ppl": "4.1", "wps": "396776", "ups": "3.36", "wpb": "118029", "bsz": "256", "num_updates": "716600", "lr": "2.86263e-05", "gnorm": "2.257", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "214341"} +[2022-08-01 22:23:11,924][train_inner][INFO] - {"epoch": 14, "update": 13.927, "loss": "2.036", "ppl": "4.1", "wps": "397601", "ups": "3.36", "wpb": "118322", "bsz": "256", "num_updates": "716800", "lr": "2.86061e-05", "gnorm": "2.249", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "214400"} +[2022-08-01 22:24:11,434][train_inner][INFO] - {"epoch": 14, "update": 13.931, "loss": "2.031", "ppl": "4.09", "wps": "397305", "ups": "3.36", "wpb": "118218", "bsz": "256", "num_updates": "717000", "lr": "2.85859e-05", "gnorm": "2.215", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "214460"} +[2022-08-01 22:25:11,098][train_inner][INFO] - {"epoch": 14, "update": 13.935, "loss": "2.034", "ppl": "4.09", "wps": "396392", "ups": "3.35", "wpb": "118251", "bsz": "256", "num_updates": "717200", "lr": "2.85657e-05", "gnorm": "2.239", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "214520"} +[2022-08-01 22:26:10,840][train_inner][INFO] - {"epoch": 14, "update": 13.939, "loss": "2.034", "ppl": "4.1", "wps": "395249", "ups": "3.35", "wpb": "118063", "bsz": "256", "num_updates": "717400", "lr": "2.85455e-05", "gnorm": "2.241", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "214579"} +[2022-08-01 22:27:10,599][train_inner][INFO] - {"epoch": 14, "update": 13.943, "loss": "2.034", "ppl": "4.09", "wps": "396146", "ups": "3.35", "wpb": "118367", "bsz": "256", "num_updates": "717600", "lr": "2.85253e-05", "gnorm": "2.281", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "214639"} +[2022-08-01 22:28:10,262][train_inner][INFO] - {"epoch": 14, "update": 13.947, "loss": "2.035", "ppl": "4.1", "wps": "398864", "ups": "3.35", "wpb": "118986", "bsz": "256", "num_updates": "717800", "lr": "2.85051e-05", "gnorm": "2.236", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "214699"} +[2022-08-01 22:29:10,015][train_inner][INFO] - {"epoch": 14, "update": 13.951, "loss": "2.033", "ppl": "4.09", "wps": "397206", "ups": "3.35", "wpb": "118670", "bsz": "256", "num_updates": "718000", "lr": "2.84848e-05", "gnorm": "2.108", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "214758"} +[2022-08-01 22:29:14,152][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 22:30:09,960][train_inner][INFO] - {"epoch": 14, "update": 13.954, "loss": "2.033", "ppl": "4.09", "wps": "395867", "ups": "3.34", "wpb": "118650", "bsz": "256", "num_updates": "718200", "lr": "2.84646e-05", "gnorm": "2.24", "loss_scale": "4", "train_wall": "60", "gb_free": "22.3", "wall": "214818"} +[2022-08-01 22:31:09,246][train_inner][INFO] - {"epoch": 14, "update": 13.958, "loss": "2.033", "ppl": "4.09", "wps": "398854", "ups": "3.37", "wpb": "118232", "bsz": "256", "num_updates": "718400", "lr": "2.84444e-05", "gnorm": "2.116", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "214878"} +[2022-08-01 22:32:08,675][train_inner][INFO] - {"epoch": 14, "update": 13.962, "loss": "2.031", "ppl": "4.09", "wps": "395323", "ups": "3.37", "wpb": "117467", "bsz": "256", "num_updates": "718600", "lr": "2.84242e-05", "gnorm": "2.215", "loss_scale": "4", "train_wall": "59", "gb_free": "26.2", "wall": "214937"} +[2022-08-01 22:33:07,945][train_inner][INFO] - {"epoch": 14, "update": 13.966, "loss": "2.025", "ppl": "4.07", "wps": "401085", "ups": "3.37", "wpb": "118861", "bsz": "256", "num_updates": "718800", "lr": "2.8404e-05", "gnorm": "2.23", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "214996"} +[2022-08-01 22:34:07,314][train_inner][INFO] - {"epoch": 14, "update": 13.97, "loss": "2.039", "ppl": "4.11", "wps": "396301", "ups": "3.37", "wpb": "117640", "bsz": "256", "num_updates": "719000", "lr": "2.83838e-05", "gnorm": "2.223", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "215056"} +[2022-08-01 22:35:06,968][train_inner][INFO] - {"epoch": 14, "update": 13.974, "loss": "2.036", "ppl": "4.1", "wps": "393359", "ups": "3.35", "wpb": "117326", "bsz": "256", "num_updates": "719200", "lr": "2.83636e-05", "gnorm": "2.203", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "215115"} +[2022-08-01 22:36:06,367][train_inner][INFO] - {"epoch": 14, "update": 13.978, "loss": "2.037", "ppl": "4.1", "wps": "397152", "ups": "3.37", "wpb": "117951", "bsz": "256", "num_updates": "719400", "lr": "2.83434e-05", "gnorm": "2.191", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "215175"} +[2022-08-01 22:37:05,898][train_inner][INFO] - {"epoch": 14, "update": 13.982, "loss": "2.034", "ppl": "4.1", "wps": "397719", "ups": "3.36", "wpb": "118383", "bsz": "256", "num_updates": "719600", "lr": "2.83232e-05", "gnorm": "2.224", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "215234"} +[2022-08-01 22:38:05,484][train_inner][INFO] - {"epoch": 14, "update": 13.985, "loss": "2.035", "ppl": "4.1", "wps": "397720", "ups": "3.36", "wpb": "118491", "bsz": "256", "num_updates": "719800", "lr": "2.8303e-05", "gnorm": "2.11", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "215294"} +[2022-08-01 22:39:06,357][train_inner][INFO] - {"epoch": 14, "update": 13.989, "loss": "2.035", "ppl": "4.1", "wps": "389462", "ups": "3.29", "wpb": "118539", "bsz": "256", "num_updates": "720000", "lr": "2.82828e-05", "gnorm": "2.2", "loss_scale": "4", "train_wall": "61", "gb_free": "23.6", "wall": "215355"} +[2022-08-01 22:39:26,311][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 22:40:06,041][train_inner][INFO] - {"epoch": 14, "update": 13.993, "loss": "2.035", "ppl": "4.1", "wps": "398004", "ups": "3.35", "wpb": "118771", "bsz": "256", "num_updates": "720200", "lr": "2.82626e-05", "gnorm": "2.191", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "215414"} +[2022-08-01 22:41:05,806][train_inner][INFO] - {"epoch": 14, "update": 13.997, "loss": "2.035", "ppl": "4.1", "wps": "397818", "ups": "3.35", "wpb": "118878", "bsz": "256", "num_updates": "720400", "lr": "2.82424e-05", "gnorm": "2.239", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "215474"} +[2022-08-01 22:41:49,211][fairseq_cli.train][INFO] - end of epoch 14 (average epoch stats below) +[2022-08-01 22:41:49,211][train][INFO] - {"epoch": 14, "train_loss": "2.041", "train_ppl": "4.12", "train_wps": "395530", "train_ups": "3.34", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "720546", "train_lr": "2.82277e-05", "train_gnorm": "2.134", "train_loss_scale": "4", "train_train_wall": "15243", "train_gb_free": "22.9", "train_wall": "215518"} +[2022-08-01 22:41:49,299][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 22:41:49,302][fairseq.trainer][INFO] - begin training epoch 15 +[2022-08-01 22:41:49,302][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 22:42:16,392][train_inner][INFO] - {"epoch": 15, "update": 14.001, "loss": "2.03", "ppl": "4.08", "wps": "336118", "ups": "2.83", "wpb": "118625", "bsz": "255.4", "num_updates": "720600", "lr": "2.82222e-05", "gnorm": "2.094", "loss_scale": "4", "train_wall": "59", "gb_free": "23.5", "wall": "215545"} +[2022-08-01 22:43:15,757][train_inner][INFO] - {"epoch": 15, "update": 14.005, "loss": "2.034", "ppl": "4.1", "wps": "397585", "ups": "3.37", "wpb": "118012", "bsz": "256", "num_updates": "720800", "lr": "2.8202e-05", "gnorm": "2.219", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "215604"} +[2022-08-01 22:44:15,216][train_inner][INFO] - {"epoch": 15, "update": 14.009, "loss": "2.032", "ppl": "4.09", "wps": "398743", "ups": "3.36", "wpb": "118544", "bsz": "256", "num_updates": "721000", "lr": "2.81818e-05", "gnorm": "2.189", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "215664"} +[2022-08-01 22:45:14,772][train_inner][INFO] - {"epoch": 15, "update": 14.013, "loss": "2.031", "ppl": "4.09", "wps": "397296", "ups": "3.36", "wpb": "118305", "bsz": "256", "num_updates": "721200", "lr": "2.81616e-05", "gnorm": "2.131", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "215723"} +[2022-08-01 22:46:14,010][train_inner][INFO] - {"epoch": 15, "update": 14.017, "loss": "2.029", "ppl": "4.08", "wps": "399662", "ups": "3.38", "wpb": "118376", "bsz": "256", "num_updates": "721400", "lr": "2.81414e-05", "gnorm": "2.215", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "215782"} +[2022-08-01 22:47:13,547][train_inner][INFO] - {"epoch": 15, "update": 14.02, "loss": "2.035", "ppl": "4.1", "wps": "396801", "ups": "3.36", "wpb": "118120", "bsz": "256", "num_updates": "721600", "lr": "2.81212e-05", "gnorm": "2.063", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "215842"} +[2022-08-01 22:48:13,221][train_inner][INFO] - {"epoch": 15, "update": 14.024, "loss": "2.032", "ppl": "4.09", "wps": "397434", "ups": "3.35", "wpb": "118582", "bsz": "256", "num_updates": "721800", "lr": "2.8101e-05", "gnorm": "2.208", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "215902"} +[2022-08-01 22:49:12,330][train_inner][INFO] - {"epoch": 15, "update": 14.028, "loss": "2.031", "ppl": "4.09", "wps": "400219", "ups": "3.38", "wpb": "118283", "bsz": "256", "num_updates": "722000", "lr": "2.80808e-05", "gnorm": "2.166", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "215961"} +[2022-08-01 22:49:52,468][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 22:50:12,383][train_inner][INFO] - {"epoch": 15, "update": 14.032, "loss": "2.028", "ppl": "4.08", "wps": "391381", "ups": "3.33", "wpb": "117517", "bsz": "256", "num_updates": "722200", "lr": "2.80606e-05", "gnorm": "2.294", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "216021"} +[2022-08-01 22:51:12,037][train_inner][INFO] - {"epoch": 15, "update": 14.036, "loss": "2.031", "ppl": "4.09", "wps": "396008", "ups": "3.35", "wpb": "118116", "bsz": "256", "num_updates": "722400", "lr": "2.80404e-05", "gnorm": "2.217", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "216080"} +[2022-08-01 22:52:11,666][train_inner][INFO] - {"epoch": 15, "update": 14.04, "loss": "2.036", "ppl": "4.1", "wps": "395340", "ups": "3.35", "wpb": "117868", "bsz": "256", "num_updates": "722600", "lr": "2.80202e-05", "gnorm": "2.261", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "216140"} +[2022-08-01 22:53:11,603][train_inner][INFO] - {"epoch": 15, "update": 14.044, "loss": "2.033", "ppl": "4.09", "wps": "392670", "ups": "3.34", "wpb": "117677", "bsz": "256", "num_updates": "722800", "lr": "2.8e-05", "gnorm": "2.2", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "216200"} +[2022-08-01 22:54:10,853][train_inner][INFO] - {"epoch": 15, "update": 14.048, "loss": "2.03", "ppl": "4.08", "wps": "399654", "ups": "3.38", "wpb": "118396", "bsz": "256", "num_updates": "723000", "lr": "2.79798e-05", "gnorm": "2.07", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "216259"} +[2022-08-01 22:55:10,583][train_inner][INFO] - {"epoch": 15, "update": 14.052, "loss": "2.028", "ppl": "4.08", "wps": "397324", "ups": "3.35", "wpb": "118660", "bsz": "256", "num_updates": "723200", "lr": "2.79596e-05", "gnorm": "2.203", "loss_scale": "4", "train_wall": "59", "gb_free": "23.9", "wall": "216319"} +[2022-08-01 22:56:09,879][train_inner][INFO] - {"epoch": 15, "update": 14.055, "loss": "2.031", "ppl": "4.09", "wps": "396364", "ups": "3.37", "wpb": "117513", "bsz": "256", "num_updates": "723400", "lr": "2.79394e-05", "gnorm": "2.283", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "216378"} +[2022-08-01 22:57:09,414][train_inner][INFO] - {"epoch": 15, "update": 14.059, "loss": "2.028", "ppl": "4.08", "wps": "398877", "ups": "3.36", "wpb": "118734", "bsz": "256", "num_updates": "723600", "lr": "2.79192e-05", "gnorm": "2.221", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "216438"} +[2022-08-01 22:58:09,024][train_inner][INFO] - {"epoch": 15, "update": 14.063, "loss": "2.022", "ppl": "4.06", "wps": "398119", "ups": "3.36", "wpb": "118660", "bsz": "256", "num_updates": "723800", "lr": "2.7899e-05", "gnorm": "2.177", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "216497"} +[2022-08-01 22:59:09,557][train_inner][INFO] - {"epoch": 15, "update": 14.067, "loss": "2.037", "ppl": "4.1", "wps": "389423", "ups": "3.3", "wpb": "117864", "bsz": "256", "num_updates": "724000", "lr": "2.78788e-05", "gnorm": "2.193", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "216558"} +[2022-08-01 22:59:12,558][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 23:00:09,201][train_inner][INFO] - {"epoch": 15, "update": 14.071, "loss": "2.032", "ppl": "4.09", "wps": "395890", "ups": "3.35", "wpb": "118060", "bsz": "256", "num_updates": "724200", "lr": "2.78586e-05", "gnorm": "2.365", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "216618"} +[2022-08-01 23:01:09,153][train_inner][INFO] - {"epoch": 15, "update": 14.075, "loss": "2.028", "ppl": "4.08", "wps": "394773", "ups": "3.34", "wpb": "118336", "bsz": "256", "num_updates": "724400", "lr": "2.78384e-05", "gnorm": "2.28", "loss_scale": "2", "train_wall": "60", "gb_free": "22.8", "wall": "216678"} +[2022-08-01 23:02:08,810][train_inner][INFO] - {"epoch": 15, "update": 14.079, "loss": "2.037", "ppl": "4.1", "wps": "396297", "ups": "3.35", "wpb": "118209", "bsz": "256", "num_updates": "724600", "lr": "2.78182e-05", "gnorm": "2.304", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "216737"} +[2022-08-01 23:03:08,369][train_inner][INFO] - {"epoch": 15, "update": 14.083, "loss": "2.026", "ppl": "4.07", "wps": "397744", "ups": "3.36", "wpb": "118446", "bsz": "256", "num_updates": "724800", "lr": "2.7798e-05", "gnorm": "2.216", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "216797"} +[2022-08-01 23:04:07,574][train_inner][INFO] - {"epoch": 15, "update": 14.087, "loss": "2.032", "ppl": "4.09", "wps": "398208", "ups": "3.38", "wpb": "117879", "bsz": "256", "num_updates": "725000", "lr": "2.77778e-05", "gnorm": "2.269", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "216856"} +[2022-08-01 23:05:06,885][train_inner][INFO] - {"epoch": 15, "update": 14.09, "loss": "2.028", "ppl": "4.08", "wps": "398729", "ups": "3.37", "wpb": "118243", "bsz": "256", "num_updates": "725200", "lr": "2.77576e-05", "gnorm": "2.155", "loss_scale": "2", "train_wall": "59", "gb_free": "31.2", "wall": "216915"} +[2022-08-01 23:06:06,314][train_inner][INFO] - {"epoch": 15, "update": 14.094, "loss": "2.024", "ppl": "4.07", "wps": "397819", "ups": "3.37", "wpb": "118209", "bsz": "256", "num_updates": "725400", "lr": "2.77374e-05", "gnorm": "2.282", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "216975"} +[2022-08-01 23:07:05,908][train_inner][INFO] - {"epoch": 15, "update": 14.098, "loss": "2.034", "ppl": "4.1", "wps": "395491", "ups": "3.36", "wpb": "117844", "bsz": "256", "num_updates": "725600", "lr": "2.77172e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "217034"} +[2022-08-01 23:08:05,681][train_inner][INFO] - {"epoch": 15, "update": 14.102, "loss": "2.025", "ppl": "4.07", "wps": "396207", "ups": "3.35", "wpb": "118412", "bsz": "256", "num_updates": "725800", "lr": "2.7697e-05", "gnorm": "2.183", "loss_scale": "2", "train_wall": "59", "gb_free": "26.6", "wall": "217094"} +[2022-08-01 23:09:04,905][train_inner][INFO] - {"epoch": 15, "update": 14.106, "loss": "2.036", "ppl": "4.1", "wps": "400382", "ups": "3.38", "wpb": "118561", "bsz": "256", "num_updates": "726000", "lr": "2.76768e-05", "gnorm": "2.103", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "217153"} +[2022-08-01 23:10:04,689][train_inner][INFO] - {"epoch": 15, "update": 14.11, "loss": "2.031", "ppl": "4.09", "wps": "395290", "ups": "3.35", "wpb": "118158", "bsz": "256", "num_updates": "726200", "lr": "2.76566e-05", "gnorm": "2.229", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "217213"} +[2022-08-01 23:11:04,198][train_inner][INFO] - {"epoch": 15, "update": 14.114, "loss": "2.028", "ppl": "4.08", "wps": "398490", "ups": "3.36", "wpb": "118568", "bsz": "256", "num_updates": "726400", "lr": "2.76364e-05", "gnorm": "2.254", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "217273"} +[2022-08-01 23:12:04,955][train_inner][INFO] - {"epoch": 15, "update": 14.118, "loss": "2.029", "ppl": "4.08", "wps": "388054", "ups": "3.29", "wpb": "117885", "bsz": "256", "num_updates": "726600", "lr": "2.76162e-05", "gnorm": "2.295", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "217333"} +[2022-08-01 23:13:04,345][train_inner][INFO] - {"epoch": 15, "update": 14.122, "loss": "2.033", "ppl": "4.09", "wps": "397816", "ups": "3.37", "wpb": "118131", "bsz": "256", "num_updates": "726800", "lr": "2.7596e-05", "gnorm": "2.239", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "217393"} +[2022-08-01 23:14:03,622][train_inner][INFO] - {"epoch": 15, "update": 14.125, "loss": "2.031", "ppl": "4.09", "wps": "398672", "ups": "3.37", "wpb": "118159", "bsz": "256", "num_updates": "727000", "lr": "2.75758e-05", "gnorm": "2.305", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "217452"} +[2022-08-01 23:15:02,670][train_inner][INFO] - {"epoch": 15, "update": 14.129, "loss": "2.031", "ppl": "4.09", "wps": "400450", "ups": "3.39", "wpb": "118228", "bsz": "256", "num_updates": "727200", "lr": "2.75556e-05", "gnorm": "2.363", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "217511"} +[2022-08-01 23:16:02,067][train_inner][INFO] - {"epoch": 15, "update": 14.133, "loss": "2.029", "ppl": "4.08", "wps": "398604", "ups": "3.37", "wpb": "118380", "bsz": "256", "num_updates": "727400", "lr": "2.75354e-05", "gnorm": "2.164", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "217570"} +[2022-08-01 23:17:01,509][train_inner][INFO] - {"epoch": 15, "update": 14.137, "loss": "2.024", "ppl": "4.07", "wps": "397436", "ups": "3.36", "wpb": "118120", "bsz": "256", "num_updates": "727600", "lr": "2.75152e-05", "gnorm": "2.197", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "217630"} +[2022-08-01 23:18:00,820][train_inner][INFO] - {"epoch": 15, "update": 14.141, "loss": "2.031", "ppl": "4.09", "wps": "398899", "ups": "3.37", "wpb": "118295", "bsz": "256", "num_updates": "727800", "lr": "2.74949e-05", "gnorm": "2.221", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "217689"} +[2022-08-01 23:19:00,210][train_inner][INFO] - {"epoch": 15, "update": 14.145, "loss": "2.023", "ppl": "4.06", "wps": "400137", "ups": "3.37", "wpb": "118820", "bsz": "256", "num_updates": "728000", "lr": "2.74747e-05", "gnorm": "2.297", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "217749"} +[2022-08-01 23:19:47,223][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 23:20:00,006][train_inner][INFO] - {"epoch": 15, "update": 14.149, "loss": "2.033", "ppl": "4.09", "wps": "396474", "ups": "3.34", "wpb": "118537", "bsz": "256", "num_updates": "728200", "lr": "2.74545e-05", "gnorm": "2.131", "loss_scale": "4", "train_wall": "59", "gb_free": "25.1", "wall": "217808"} +[2022-08-01 23:21:00,351][train_inner][INFO] - {"epoch": 15, "update": 14.153, "loss": "2.028", "ppl": "4.08", "wps": "392098", "ups": "3.31", "wpb": "118305", "bsz": "256", "num_updates": "728400", "lr": "2.74343e-05", "gnorm": "2.129", "loss_scale": "4", "train_wall": "60", "gb_free": "23", "wall": "217869"} +[2022-08-01 23:21:59,854][train_inner][INFO] - {"epoch": 15, "update": 14.156, "loss": "2.031", "ppl": "4.09", "wps": "398636", "ups": "3.36", "wpb": "118599", "bsz": "256", "num_updates": "728600", "lr": "2.74141e-05", "gnorm": "2.153", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "217928"} +[2022-08-01 23:22:59,524][train_inner][INFO] - {"epoch": 15, "update": 14.16, "loss": "2.027", "ppl": "4.07", "wps": "396196", "ups": "3.35", "wpb": "118205", "bsz": "256", "num_updates": "728800", "lr": "2.73939e-05", "gnorm": "2.406", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "217988"} +[2022-08-01 23:23:59,003][train_inner][INFO] - {"epoch": 15, "update": 14.164, "loss": "2.03", "ppl": "4.08", "wps": "396023", "ups": "3.36", "wpb": "117774", "bsz": "256", "num_updates": "729000", "lr": "2.73737e-05", "gnorm": "2.179", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "218047"} +[2022-08-01 23:24:59,449][train_inner][INFO] - {"epoch": 15, "update": 14.168, "loss": "2.031", "ppl": "4.09", "wps": "392110", "ups": "3.31", "wpb": "118508", "bsz": "256", "num_updates": "729200", "lr": "2.73535e-05", "gnorm": "2.244", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "218108"} +[2022-08-01 23:26:00,039][train_inner][INFO] - {"epoch": 15, "update": 14.172, "loss": "2.029", "ppl": "4.08", "wps": "390324", "ups": "3.3", "wpb": "118246", "bsz": "256", "num_updates": "729400", "lr": "2.73333e-05", "gnorm": "2.196", "loss_scale": "4", "train_wall": "60", "gb_free": "21.7", "wall": "218168"} +[2022-08-01 23:26:59,454][train_inner][INFO] - {"epoch": 15, "update": 14.176, "loss": "2.033", "ppl": "4.09", "wps": "400539", "ups": "3.37", "wpb": "118983", "bsz": "256", "num_updates": "729600", "lr": "2.73131e-05", "gnorm": "2.274", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "218228"} +[2022-08-01 23:27:58,964][train_inner][INFO] - {"epoch": 15, "update": 14.18, "loss": "2.026", "ppl": "4.07", "wps": "398118", "ups": "3.36", "wpb": "118453", "bsz": "256", "num_updates": "729800", "lr": "2.72929e-05", "gnorm": "2.188", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "218287"} +[2022-08-01 23:28:58,729][train_inner][INFO] - {"epoch": 15, "update": 14.184, "loss": "2.028", "ppl": "4.08", "wps": "394515", "ups": "3.35", "wpb": "117888", "bsz": "256", "num_updates": "730000", "lr": "2.72727e-05", "gnorm": "2.271", "loss_scale": "4", "train_wall": "59", "gb_free": "28.2", "wall": "218347"} +[2022-08-01 23:29:59,770][train_inner][INFO] - {"epoch": 15, "update": 14.188, "loss": "2.033", "ppl": "4.09", "wps": "389288", "ups": "3.28", "wpb": "118812", "bsz": "256", "num_updates": "730200", "lr": "2.72525e-05", "gnorm": "2.226", "loss_scale": "4", "train_wall": "61", "gb_free": "23.2", "wall": "218408"} +[2022-08-01 23:30:06,672][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 23:30:59,709][train_inner][INFO] - {"epoch": 15, "update": 14.191, "loss": "2.025", "ppl": "4.07", "wps": "395353", "ups": "3.34", "wpb": "118484", "bsz": "256", "num_updates": "730400", "lr": "2.72323e-05", "gnorm": "2.22", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "218468"} +[2022-08-01 23:31:59,169][train_inner][INFO] - {"epoch": 15, "update": 14.195, "loss": "2.024", "ppl": "4.07", "wps": "397950", "ups": "3.36", "wpb": "118309", "bsz": "256", "num_updates": "730600", "lr": "2.72121e-05", "gnorm": "2.126", "loss_scale": "4", "train_wall": "59", "gb_free": "24.9", "wall": "218528"} +[2022-08-01 23:32:59,493][train_inner][INFO] - {"epoch": 15, "update": 14.199, "loss": "2.032", "ppl": "4.09", "wps": "393046", "ups": "3.32", "wpb": "118550", "bsz": "256", "num_updates": "730800", "lr": "2.71919e-05", "gnorm": "2.178", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "218588"} +[2022-08-01 23:33:59,129][train_inner][INFO] - {"epoch": 15, "update": 14.203, "loss": "2.034", "ppl": "4.1", "wps": "394516", "ups": "3.35", "wpb": "117635", "bsz": "256", "num_updates": "731000", "lr": "2.71717e-05", "gnorm": "2.265", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "218648"} +[2022-08-01 23:34:58,528][train_inner][INFO] - {"epoch": 15, "update": 14.207, "loss": "2.031", "ppl": "4.09", "wps": "399327", "ups": "3.37", "wpb": "118597", "bsz": "256", "num_updates": "731200", "lr": "2.71515e-05", "gnorm": "2.276", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "218707"} +[2022-08-01 23:35:58,082][train_inner][INFO] - {"epoch": 15, "update": 14.211, "loss": "2.02", "ppl": "4.06", "wps": "399465", "ups": "3.36", "wpb": "118949", "bsz": "256", "num_updates": "731400", "lr": "2.71313e-05", "gnorm": "2.274", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "218767"} +[2022-08-01 23:36:57,383][train_inner][INFO] - {"epoch": 15, "update": 14.215, "loss": "2.03", "ppl": "4.08", "wps": "399697", "ups": "3.37", "wpb": "118512", "bsz": "256", "num_updates": "731600", "lr": "2.71111e-05", "gnorm": "2.255", "loss_scale": "4", "train_wall": "59", "gb_free": "26.2", "wall": "218826"} +[2022-08-01 23:37:57,017][train_inner][INFO] - {"epoch": 15, "update": 14.219, "loss": "2.032", "ppl": "4.09", "wps": "394883", "ups": "3.35", "wpb": "117741", "bsz": "256", "num_updates": "731800", "lr": "2.70909e-05", "gnorm": "2.203", "loss_scale": "4", "train_wall": "59", "gb_free": "26.6", "wall": "218885"} +[2022-08-01 23:38:56,526][train_inner][INFO] - {"epoch": 15, "update": 14.223, "loss": "2.03", "ppl": "4.08", "wps": "398286", "ups": "3.36", "wpb": "118506", "bsz": "256", "num_updates": "732000", "lr": "2.70707e-05", "gnorm": "2.294", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "218945"} +[2022-08-01 23:39:56,025][train_inner][INFO] - {"epoch": 15, "update": 14.226, "loss": "2.029", "ppl": "4.08", "wps": "395717", "ups": "3.36", "wpb": "117725", "bsz": "256", "num_updates": "732200", "lr": "2.70505e-05", "gnorm": "2.2", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "219004"} +[2022-08-01 23:40:18,267][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 23:40:55,614][train_inner][INFO] - {"epoch": 15, "update": 14.23, "loss": "2.031", "ppl": "4.09", "wps": "399030", "ups": "3.36", "wpb": "118888", "bsz": "256", "num_updates": "732400", "lr": "2.70303e-05", "gnorm": "2.217", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "219064"} +[2022-08-01 23:41:55,506][train_inner][INFO] - {"epoch": 15, "update": 14.234, "loss": "2.024", "ppl": "4.07", "wps": "395104", "ups": "3.34", "wpb": "118316", "bsz": "256", "num_updates": "732600", "lr": "2.70101e-05", "gnorm": "2.239", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "219124"} +[2022-08-01 23:42:54,948][train_inner][INFO] - {"epoch": 15, "update": 14.238, "loss": "2.035", "ppl": "4.1", "wps": "398661", "ups": "3.36", "wpb": "118486", "bsz": "256", "num_updates": "732800", "lr": "2.69899e-05", "gnorm": "2.249", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "219183"} +[2022-08-01 23:43:54,527][train_inner][INFO] - {"epoch": 15, "update": 14.242, "loss": "2.029", "ppl": "4.08", "wps": "398129", "ups": "3.36", "wpb": "118600", "bsz": "256", "num_updates": "733000", "lr": "2.69697e-05", "gnorm": "2.263", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "219243"} +[2022-08-01 23:44:53,451][train_inner][INFO] - {"epoch": 15, "update": 14.246, "loss": "2.028", "ppl": "4.08", "wps": "402510", "ups": "3.39", "wpb": "118587", "bsz": "256", "num_updates": "733200", "lr": "2.69495e-05", "gnorm": "2.252", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "219302"} +[2022-08-01 23:45:52,550][train_inner][INFO] - {"epoch": 15, "update": 14.25, "loss": "2.027", "ppl": "4.07", "wps": "400233", "ups": "3.38", "wpb": "118266", "bsz": "256", "num_updates": "733400", "lr": "2.69293e-05", "gnorm": "2.294", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "219361"} +[2022-08-01 23:46:52,125][train_inner][INFO] - {"epoch": 15, "update": 14.254, "loss": "2.026", "ppl": "4.07", "wps": "397048", "ups": "3.36", "wpb": "118268", "bsz": "256", "num_updates": "733600", "lr": "2.69091e-05", "gnorm": "2.28", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "219421"} +[2022-08-01 23:47:52,259][train_inner][INFO] - {"epoch": 15, "update": 14.258, "loss": "2.026", "ppl": "4.07", "wps": "394253", "ups": "3.33", "wpb": "118540", "bsz": "256", "num_updates": "733800", "lr": "2.68889e-05", "gnorm": "2.153", "loss_scale": "4", "train_wall": "60", "gb_free": "27.7", "wall": "219481"} +[2022-08-01 23:48:51,229][train_inner][INFO] - {"epoch": 15, "update": 14.261, "loss": "2.026", "ppl": "4.07", "wps": "400464", "ups": "3.39", "wpb": "118077", "bsz": "256", "num_updates": "734000", "lr": "2.68687e-05", "gnorm": "2.142", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "219540"} +[2022-08-01 23:49:50,488][train_inner][INFO] - {"epoch": 15, "update": 14.265, "loss": "2.028", "ppl": "4.08", "wps": "399406", "ups": "3.38", "wpb": "118341", "bsz": "256", "num_updates": "734200", "lr": "2.68485e-05", "gnorm": "2.153", "loss_scale": "4", "train_wall": "59", "gb_free": "26.5", "wall": "219599"} +[2022-08-01 23:50:33,126][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 23:50:50,021][train_inner][INFO] - {"epoch": 15, "update": 14.269, "loss": "2.023", "ppl": "4.06", "wps": "396917", "ups": "3.36", "wpb": "118147", "bsz": "256", "num_updates": "734400", "lr": "2.68283e-05", "gnorm": "2.143", "loss_scale": "4", "train_wall": "59", "gb_free": "23.9", "wall": "219658"} +[2022-08-01 23:51:49,380][train_inner][INFO] - {"epoch": 15, "update": 14.273, "loss": "2.026", "ppl": "4.07", "wps": "398473", "ups": "3.37", "wpb": "118264", "bsz": "256", "num_updates": "734600", "lr": "2.68081e-05", "gnorm": "2.359", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "219718"} +[2022-08-01 23:52:48,856][train_inner][INFO] - {"epoch": 15, "update": 14.277, "loss": "2.025", "ppl": "4.07", "wps": "398108", "ups": "3.36", "wpb": "118387", "bsz": "256", "num_updates": "734800", "lr": "2.67879e-05", "gnorm": "2.249", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "219777"} +[2022-08-01 23:53:48,310][train_inner][INFO] - {"epoch": 15, "update": 14.281, "loss": "2.029", "ppl": "4.08", "wps": "398754", "ups": "3.36", "wpb": "118536", "bsz": "256", "num_updates": "735000", "lr": "2.67677e-05", "gnorm": "2.316", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "219837"} +[2022-08-01 23:54:47,857][train_inner][INFO] - {"epoch": 15, "update": 14.285, "loss": "2.032", "ppl": "4.09", "wps": "398321", "ups": "3.36", "wpb": "118595", "bsz": "256", "num_updates": "735200", "lr": "2.67475e-05", "gnorm": "2.261", "loss_scale": "4", "train_wall": "59", "gb_free": "24.7", "wall": "219896"} +[2022-08-01 23:55:38,656][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 23:55:47,233][train_inner][INFO] - {"epoch": 15, "update": 14.289, "loss": "2.025", "ppl": "4.07", "wps": "398914", "ups": "3.37", "wpb": "118429", "bsz": "256", "num_updates": "735400", "lr": "2.67273e-05", "gnorm": "2.463", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "219956"} +[2022-08-01 23:56:46,806][train_inner][INFO] - {"epoch": 15, "update": 14.293, "loss": "2.025", "ppl": "4.07", "wps": "397339", "ups": "3.36", "wpb": "118352", "bsz": "256", "num_updates": "735600", "lr": "2.67071e-05", "gnorm": "2.23", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "220015"} +[2022-08-01 23:57:45,975][train_inner][INFO] - {"epoch": 15, "update": 14.296, "loss": "2.018", "ppl": "4.05", "wps": "402645", "ups": "3.38", "wpb": "119121", "bsz": "256", "num_updates": "735800", "lr": "2.66869e-05", "gnorm": "2.117", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "220074"} +[2022-08-01 23:58:45,677][train_inner][INFO] - {"epoch": 15, "update": 14.3, "loss": "2.029", "ppl": "4.08", "wps": "395688", "ups": "3.35", "wpb": "118116", "bsz": "256", "num_updates": "736000", "lr": "2.66667e-05", "gnorm": "2.293", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "220134"} +[2022-08-01 23:59:44,958][train_inner][INFO] - {"epoch": 15, "update": 14.304, "loss": "2.027", "ppl": "4.07", "wps": "399072", "ups": "3.37", "wpb": "118284", "bsz": "256", "num_updates": "736200", "lr": "2.66465e-05", "gnorm": "2.444", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "220193"} +[2022-08-02 00:00:44,071][train_inner][INFO] - {"epoch": 15, "update": 14.308, "loss": "2.035", "ppl": "4.1", "wps": "398906", "ups": "3.38", "wpb": "117903", "bsz": "256", "num_updates": "736400", "lr": "2.66263e-05", "gnorm": "2.298", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "220253"} +[2022-08-02 00:01:43,551][train_inner][INFO] - {"epoch": 15, "update": 14.312, "loss": "2.023", "ppl": "4.06", "wps": "398704", "ups": "3.36", "wpb": "118573", "bsz": "256", "num_updates": "736600", "lr": "2.66061e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "220312"} +[2022-08-02 00:02:43,210][train_inner][INFO] - {"epoch": 15, "update": 14.316, "loss": "2.023", "ppl": "4.06", "wps": "398402", "ups": "3.35", "wpb": "118840", "bsz": "256", "num_updates": "736800", "lr": "2.65859e-05", "gnorm": "2.181", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "220372"} +[2022-08-02 00:03:42,627][train_inner][INFO] - {"epoch": 15, "update": 14.32, "loss": "2.032", "ppl": "4.09", "wps": "400208", "ups": "3.37", "wpb": "118895", "bsz": "256", "num_updates": "737000", "lr": "2.65657e-05", "gnorm": "2.23", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "220431"} +[2022-08-02 00:04:41,804][train_inner][INFO] - {"epoch": 15, "update": 14.324, "loss": "2.026", "ppl": "4.07", "wps": "399676", "ups": "3.38", "wpb": "118257", "bsz": "256", "num_updates": "737200", "lr": "2.65455e-05", "gnorm": "2.221", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "220490"} +[2022-08-02 00:05:41,177][train_inner][INFO] - {"epoch": 15, "update": 14.327, "loss": "2.02", "ppl": "4.05", "wps": "400070", "ups": "3.37", "wpb": "118766", "bsz": "256", "num_updates": "737400", "lr": "2.65253e-05", "gnorm": "2.296", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "220550"} +[2022-08-02 00:06:40,339][train_inner][INFO] - {"epoch": 15, "update": 14.331, "loss": "2.03", "ppl": "4.08", "wps": "397523", "ups": "3.38", "wpb": "117592", "bsz": "256", "num_updates": "737600", "lr": "2.65051e-05", "gnorm": "2.327", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "220609"} +[2022-08-02 00:07:39,957][train_inner][INFO] - {"epoch": 15, "update": 14.335, "loss": "2.02", "ppl": "4.06", "wps": "400029", "ups": "3.35", "wpb": "119244", "bsz": "256", "num_updates": "737800", "lr": "2.64848e-05", "gnorm": "2.195", "loss_scale": "4", "train_wall": "59", "gb_free": "33.6", "wall": "220668"} +[2022-08-02 00:08:39,475][train_inner][INFO] - {"epoch": 15, "update": 14.339, "loss": "2.028", "ppl": "4.08", "wps": "395868", "ups": "3.36", "wpb": "117805", "bsz": "256", "num_updates": "738000", "lr": "2.64646e-05", "gnorm": "2.129", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "220728"} +[2022-08-02 00:09:38,465][train_inner][INFO] - {"epoch": 15, "update": 14.343, "loss": "2.03", "ppl": "4.08", "wps": "399839", "ups": "3.39", "wpb": "117932", "bsz": "256", "num_updates": "738200", "lr": "2.64444e-05", "gnorm": "2.307", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "220787"} +[2022-08-02 00:10:38,073][train_inner][INFO] - {"epoch": 15, "update": 14.347, "loss": "2.027", "ppl": "4.08", "wps": "398080", "ups": "3.36", "wpb": "118642", "bsz": "256", "num_updates": "738400", "lr": "2.64242e-05", "gnorm": "2.273", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "220847"} +[2022-08-02 00:11:37,377][train_inner][INFO] - {"epoch": 15, "update": 14.351, "loss": "2.028", "ppl": "4.08", "wps": "398762", "ups": "3.37", "wpb": "118242", "bsz": "256", "num_updates": "738600", "lr": "2.6404e-05", "gnorm": "2.171", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "220906"} +[2022-08-02 00:12:36,700][train_inner][INFO] - {"epoch": 15, "update": 14.355, "loss": "2.03", "ppl": "4.08", "wps": "397828", "ups": "3.37", "wpb": "118001", "bsz": "256", "num_updates": "738800", "lr": "2.63838e-05", "gnorm": "2.342", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "220965"} +[2022-08-02 00:13:36,077][train_inner][INFO] - {"epoch": 15, "update": 14.359, "loss": "2.028", "ppl": "4.08", "wps": "400429", "ups": "3.37", "wpb": "118879", "bsz": "256", "num_updates": "739000", "lr": "2.63636e-05", "gnorm": "2.201", "loss_scale": "4", "train_wall": "59", "gb_free": "24.2", "wall": "221025"} +[2022-08-02 00:14:35,212][train_inner][INFO] - {"epoch": 15, "update": 14.362, "loss": "2.031", "ppl": "4.09", "wps": "396869", "ups": "3.38", "wpb": "117344", "bsz": "255.9", "num_updates": "739200", "lr": "2.63434e-05", "gnorm": "2.187", "loss_scale": "4", "train_wall": "59", "gb_free": "35.3", "wall": "221084"} +[2022-08-02 00:15:34,534][train_inner][INFO] - {"epoch": 15, "update": 14.366, "loss": "2.021", "ppl": "4.06", "wps": "397149", "ups": "3.37", "wpb": "117798", "bsz": "256", "num_updates": "739400", "lr": "2.63232e-05", "gnorm": "2.293", "loss_scale": "4", "train_wall": "59", "gb_free": "22.7", "wall": "221143"} +[2022-08-02 00:15:55,317][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 00:16:34,191][train_inner][INFO] - {"epoch": 15, "update": 14.37, "loss": "2.025", "ppl": "4.07", "wps": "396084", "ups": "3.35", "wpb": "118145", "bsz": "256", "num_updates": "739600", "lr": "2.6303e-05", "gnorm": "2.169", "loss_scale": "4", "train_wall": "59", "gb_free": "24", "wall": "221203"} +[2022-08-02 00:17:33,301][train_inner][INFO] - {"epoch": 15, "update": 14.374, "loss": "2.031", "ppl": "4.09", "wps": "398962", "ups": "3.38", "wpb": "117913", "bsz": "256", "num_updates": "739800", "lr": "2.62828e-05", "gnorm": "2.111", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "221262"} +[2022-08-02 00:18:32,963][train_inner][INFO] - {"epoch": 15, "update": 14.378, "loss": "2.028", "ppl": "4.08", "wps": "395792", "ups": "3.35", "wpb": "118068", "bsz": "256", "num_updates": "740000", "lr": "2.62626e-05", "gnorm": "2.182", "loss_scale": "4", "train_wall": "59", "gb_free": "31.1", "wall": "221321"} +[2022-08-02 00:19:32,073][train_inner][INFO] - {"epoch": 15, "update": 14.382, "loss": "2.022", "ppl": "4.06", "wps": "399734", "ups": "3.38", "wpb": "118140", "bsz": "256", "num_updates": "740200", "lr": "2.62424e-05", "gnorm": "2.165", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "221381"} +[2022-08-02 00:20:31,514][train_inner][INFO] - {"epoch": 15, "update": 14.386, "loss": "2.024", "ppl": "4.07", "wps": "397578", "ups": "3.36", "wpb": "118161", "bsz": "256", "num_updates": "740400", "lr": "2.62222e-05", "gnorm": "2.174", "loss_scale": "4", "train_wall": "59", "gb_free": "25.8", "wall": "221440"} +[2022-08-02 00:21:31,203][train_inner][INFO] - {"epoch": 15, "update": 14.39, "loss": "2.028", "ppl": "4.08", "wps": "395428", "ups": "3.35", "wpb": "118012", "bsz": "256", "num_updates": "740600", "lr": "2.6202e-05", "gnorm": "2.214", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "221500"} +[2022-08-02 00:22:30,683][train_inner][INFO] - {"epoch": 15, "update": 14.394, "loss": "2.025", "ppl": "4.07", "wps": "398276", "ups": "3.36", "wpb": "118446", "bsz": "256", "num_updates": "740800", "lr": "2.61818e-05", "gnorm": "2.171", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "221559"} +[2022-08-02 00:23:30,225][train_inner][INFO] - {"epoch": 15, "update": 14.397, "loss": "2.026", "ppl": "4.07", "wps": "397598", "ups": "3.36", "wpb": "118369", "bsz": "256", "num_updates": "741000", "lr": "2.61616e-05", "gnorm": "2.136", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "221619"} +[2022-08-02 00:24:29,845][train_inner][INFO] - {"epoch": 15, "update": 14.401, "loss": "2.02", "ppl": "4.06", "wps": "397568", "ups": "3.35", "wpb": "118513", "bsz": "256", "num_updates": "741200", "lr": "2.61414e-05", "gnorm": "2.311", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "221678"} +[2022-08-02 00:25:29,116][train_inner][INFO] - {"epoch": 15, "update": 14.405, "loss": "2.026", "ppl": "4.07", "wps": "398885", "ups": "3.37", "wpb": "118211", "bsz": "256", "num_updates": "741400", "lr": "2.61212e-05", "gnorm": "2.253", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "221738"} +[2022-08-02 00:26:07,555][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 00:26:28,988][train_inner][INFO] - {"epoch": 15, "update": 14.409, "loss": "2.02", "ppl": "4.06", "wps": "395886", "ups": "3.34", "wpb": "118512", "bsz": "256", "num_updates": "741600", "lr": "2.6101e-05", "gnorm": "2.218", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "221797"} +[2022-08-02 00:27:28,485][train_inner][INFO] - {"epoch": 15, "update": 14.413, "loss": "2.022", "ppl": "4.06", "wps": "398443", "ups": "3.36", "wpb": "118530", "bsz": "256", "num_updates": "741800", "lr": "2.60808e-05", "gnorm": "2.119", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "221857"} +[2022-08-02 00:28:27,675][train_inner][INFO] - {"epoch": 15, "update": 14.417, "loss": "2.024", "ppl": "4.07", "wps": "398269", "ups": "3.38", "wpb": "117866", "bsz": "256", "num_updates": "742000", "lr": "2.60606e-05", "gnorm": "2.25", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "221916"} +[2022-08-02 00:29:27,350][train_inner][INFO] - {"epoch": 15, "update": 14.421, "loss": "2.026", "ppl": "4.07", "wps": "395809", "ups": "3.35", "wpb": "118099", "bsz": "256", "num_updates": "742200", "lr": "2.60404e-05", "gnorm": "2.373", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "221976"} +[2022-08-02 00:30:27,680][train_inner][INFO] - {"epoch": 15, "update": 14.425, "loss": "2.022", "ppl": "4.06", "wps": "391160", "ups": "3.32", "wpb": "117994", "bsz": "256", "num_updates": "742400", "lr": "2.60202e-05", "gnorm": "2.211", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "222036"} +[2022-08-02 00:31:27,078][train_inner][INFO] - {"epoch": 15, "update": 14.429, "loss": "2.025", "ppl": "4.07", "wps": "397549", "ups": "3.37", "wpb": "118066", "bsz": "256", "num_updates": "742600", "lr": "2.6e-05", "gnorm": "2.195", "loss_scale": "4", "train_wall": "59", "gb_free": "29.7", "wall": "222096"} +[2022-08-02 00:32:26,547][train_inner][INFO] - {"epoch": 15, "update": 14.432, "loss": "2.021", "ppl": "4.06", "wps": "397850", "ups": "3.36", "wpb": "118299", "bsz": "256", "num_updates": "742800", "lr": "2.59798e-05", "gnorm": "2.253", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "222155"} +[2022-08-02 00:33:25,905][train_inner][INFO] - {"epoch": 15, "update": 14.436, "loss": "2.027", "ppl": "4.07", "wps": "399068", "ups": "3.37", "wpb": "118439", "bsz": "256", "num_updates": "743000", "lr": "2.59596e-05", "gnorm": "2.274", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "222214"} +[2022-08-02 00:34:25,671][train_inner][INFO] - {"epoch": 15, "update": 14.44, "loss": "2.028", "ppl": "4.08", "wps": "396700", "ups": "3.35", "wpb": "118543", "bsz": "256", "num_updates": "743200", "lr": "2.59394e-05", "gnorm": "2.264", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "222274"} +[2022-08-02 00:35:26,184][train_inner][INFO] - {"epoch": 15, "update": 14.444, "loss": "2.022", "ppl": "4.06", "wps": "389887", "ups": "3.31", "wpb": "117966", "bsz": "256", "num_updates": "743400", "lr": "2.59192e-05", "gnorm": "2.141", "loss_scale": "4", "train_wall": "60", "gb_free": "31", "wall": "222335"} +[2022-08-02 00:36:19,097][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 00:36:25,967][train_inner][INFO] - {"epoch": 15, "update": 14.448, "loss": "2.026", "ppl": "4.07", "wps": "396733", "ups": "3.35", "wpb": "118589", "bsz": "256", "num_updates": "743600", "lr": "2.5899e-05", "gnorm": "2.227", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "222394"} +[2022-08-02 00:37:25,517][train_inner][INFO] - {"epoch": 15, "update": 14.452, "loss": "2.024", "ppl": "4.07", "wps": "395943", "ups": "3.36", "wpb": "117890", "bsz": "256", "num_updates": "743800", "lr": "2.58788e-05", "gnorm": "2.283", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "222454"} +[2022-08-02 00:38:25,211][train_inner][INFO] - {"epoch": 15, "update": 14.456, "loss": "2.021", "ppl": "4.06", "wps": "395786", "ups": "3.35", "wpb": "118131", "bsz": "256", "num_updates": "744000", "lr": "2.58586e-05", "gnorm": "2.203", "loss_scale": "4", "train_wall": "59", "gb_free": "26", "wall": "222514"} +[2022-08-02 00:39:24,173][train_inner][INFO] - {"epoch": 15, "update": 14.46, "loss": "2.028", "ppl": "4.08", "wps": "399923", "ups": "3.39", "wpb": "117900", "bsz": "256", "num_updates": "744200", "lr": "2.58384e-05", "gnorm": "2.205", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "222573"} +[2022-08-02 00:40:23,255][train_inner][INFO] - {"epoch": 15, "update": 14.463, "loss": "2.025", "ppl": "4.07", "wps": "402061", "ups": "3.39", "wpb": "118772", "bsz": "256", "num_updates": "744400", "lr": "2.58182e-05", "gnorm": "2.168", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "222632"} +[2022-08-02 00:41:23,105][train_inner][INFO] - {"epoch": 15, "update": 14.467, "loss": "2.025", "ppl": "4.07", "wps": "395557", "ups": "3.34", "wpb": "118369", "bsz": "256", "num_updates": "744600", "lr": "2.5798e-05", "gnorm": "2.252", "loss_scale": "4", "train_wall": "59", "gb_free": "22.1", "wall": "222692"} +[2022-08-02 00:42:22,526][train_inner][INFO] - {"epoch": 15, "update": 14.471, "loss": "2.026", "ppl": "4.07", "wps": "398142", "ups": "3.37", "wpb": "118290", "bsz": "256", "num_updates": "744800", "lr": "2.57778e-05", "gnorm": "2.229", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "222751"} +[2022-08-02 00:43:21,710][train_inner][INFO] - {"epoch": 15, "update": 14.475, "loss": "2.029", "ppl": "4.08", "wps": "400977", "ups": "3.38", "wpb": "118656", "bsz": "256", "num_updates": "745000", "lr": "2.57576e-05", "gnorm": "2.282", "loss_scale": "4", "train_wall": "59", "gb_free": "26.1", "wall": "222810"} +[2022-08-02 00:44:21,094][train_inner][INFO] - {"epoch": 15, "update": 14.479, "loss": "2.028", "ppl": "4.08", "wps": "399528", "ups": "3.37", "wpb": "118626", "bsz": "256", "num_updates": "745200", "lr": "2.57374e-05", "gnorm": "2.491", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "222870"} +[2022-08-02 00:45:20,425][train_inner][INFO] - {"epoch": 15, "update": 14.483, "loss": "2.022", "ppl": "4.06", "wps": "399892", "ups": "3.37", "wpb": "118629", "bsz": "256", "num_updates": "745400", "lr": "2.57172e-05", "gnorm": "2.169", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "222929"} +[2022-08-02 00:46:19,765][train_inner][INFO] - {"epoch": 15, "update": 14.487, "loss": "2.03", "ppl": "4.08", "wps": "400509", "ups": "3.37", "wpb": "118831", "bsz": "256", "num_updates": "745600", "lr": "2.5697e-05", "gnorm": "2.192", "loss_scale": "4", "train_wall": "59", "gb_free": "26.2", "wall": "222988"} +[2022-08-02 00:46:28,298][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 00:47:19,329][train_inner][INFO] - {"epoch": 15, "update": 14.491, "loss": "2.024", "ppl": "4.07", "wps": "395105", "ups": "3.36", "wpb": "117670", "bsz": "256", "num_updates": "745800", "lr": "2.56768e-05", "gnorm": "2.461", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "223048"} +[2022-08-02 00:48:18,740][train_inner][INFO] - {"epoch": 15, "update": 14.495, "loss": "2.022", "ppl": "4.06", "wps": "398697", "ups": "3.37", "wpb": "118433", "bsz": "256", "num_updates": "746000", "lr": "2.56566e-05", "gnorm": "2.293", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "223107"} +[2022-08-02 00:49:18,266][train_inner][INFO] - {"epoch": 15, "update": 14.498, "loss": "2.018", "ppl": "4.05", "wps": "397883", "ups": "3.36", "wpb": "118422", "bsz": "256", "num_updates": "746200", "lr": "2.56364e-05", "gnorm": "2.242", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "223167"} +[2022-08-02 00:50:17,230][train_inner][INFO] - {"epoch": 15, "update": 14.502, "loss": "2.022", "ppl": "4.06", "wps": "404434", "ups": "3.39", "wpb": "119232", "bsz": "256", "num_updates": "746400", "lr": "2.56162e-05", "gnorm": "2.133", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "223226"} +[2022-08-02 00:51:17,317][train_inner][INFO] - {"epoch": 15, "update": 14.506, "loss": "2.022", "ppl": "4.06", "wps": "396602", "ups": "3.33", "wpb": "119152", "bsz": "256", "num_updates": "746600", "lr": "2.5596e-05", "gnorm": "2.218", "loss_scale": "4", "train_wall": "60", "gb_free": "27.5", "wall": "223286"} +[2022-08-02 00:52:16,671][train_inner][INFO] - {"epoch": 15, "update": 14.51, "loss": "2.025", "ppl": "4.07", "wps": "397696", "ups": "3.37", "wpb": "118025", "bsz": "255.9", "num_updates": "746800", "lr": "2.55758e-05", "gnorm": "2.249", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "223345"} +[2022-08-02 00:53:16,427][train_inner][INFO] - {"epoch": 15, "update": 14.514, "loss": "2.026", "ppl": "4.07", "wps": "395307", "ups": "3.35", "wpb": "118110", "bsz": "256", "num_updates": "747000", "lr": "2.55556e-05", "gnorm": "2.266", "loss_scale": "4", "train_wall": "59", "gb_free": "24.9", "wall": "223405"} +[2022-08-02 00:54:15,968][train_inner][INFO] - {"epoch": 15, "update": 14.518, "loss": "2.026", "ppl": "4.07", "wps": "396450", "ups": "3.36", "wpb": "118023", "bsz": "256", "num_updates": "747200", "lr": "2.55354e-05", "gnorm": "2.197", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "223464"} +[2022-08-02 00:55:14,893][train_inner][INFO] - {"epoch": 15, "update": 14.522, "loss": "2.025", "ppl": "4.07", "wps": "400668", "ups": "3.39", "wpb": "118046", "bsz": "256", "num_updates": "747400", "lr": "2.55152e-05", "gnorm": "2.221", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "223523"} +[2022-08-02 00:56:14,216][train_inner][INFO] - {"epoch": 15, "update": 14.526, "loss": "2.026", "ppl": "4.07", "wps": "399748", "ups": "3.37", "wpb": "118571", "bsz": "256", "num_updates": "747600", "lr": "2.54949e-05", "gnorm": "2.293", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "223583"} +[2022-08-02 00:56:37,337][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 00:57:13,555][train_inner][INFO] - {"epoch": 15, "update": 14.53, "loss": "2.018", "ppl": "4.05", "wps": "400094", "ups": "3.37", "wpb": "118705", "bsz": "256", "num_updates": "747800", "lr": "2.54747e-05", "gnorm": "2.3", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "223642"} +[2022-08-02 00:58:13,052][train_inner][INFO] - {"epoch": 15, "update": 14.533, "loss": "2.022", "ppl": "4.06", "wps": "398104", "ups": "3.36", "wpb": "118429", "bsz": "256", "num_updates": "748000", "lr": "2.54545e-05", "gnorm": "2.292", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "223701"} +[2022-08-02 00:59:13,844][train_inner][INFO] - {"epoch": 15, "update": 14.537, "loss": "2.022", "ppl": "4.06", "wps": "390196", "ups": "3.29", "wpb": "118603", "bsz": "256", "num_updates": "748200", "lr": "2.54343e-05", "gnorm": "2.496", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "223762"} +[2022-08-02 01:00:12,860][train_inner][INFO] - {"epoch": 15, "update": 14.541, "loss": "2.025", "ppl": "4.07", "wps": "399302", "ups": "3.39", "wpb": "117825", "bsz": "256", "num_updates": "748400", "lr": "2.54141e-05", "gnorm": "2.358", "loss_scale": "4", "train_wall": "59", "gb_free": "28", "wall": "223821"} +[2022-08-02 01:01:12,394][train_inner][INFO] - {"epoch": 15, "update": 14.545, "loss": "2.024", "ppl": "4.07", "wps": "398081", "ups": "3.36", "wpb": "118496", "bsz": "256", "num_updates": "748600", "lr": "2.53939e-05", "gnorm": "2.21", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "223881"} +[2022-08-02 01:02:11,856][train_inner][INFO] - {"epoch": 15, "update": 14.549, "loss": "2.023", "ppl": "4.06", "wps": "397225", "ups": "3.36", "wpb": "118099", "bsz": "256", "num_updates": "748800", "lr": "2.53737e-05", "gnorm": "2.11", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "223940"} +[2022-08-02 01:03:11,059][train_inner][INFO] - {"epoch": 15, "update": 14.553, "loss": "2.025", "ppl": "4.07", "wps": "399515", "ups": "3.38", "wpb": "118261", "bsz": "256", "num_updates": "749000", "lr": "2.53535e-05", "gnorm": "2.3", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "223999"} +[2022-08-02 01:04:10,492][train_inner][INFO] - {"epoch": 15, "update": 14.557, "loss": "2.02", "ppl": "4.05", "wps": "400116", "ups": "3.37", "wpb": "118899", "bsz": "256", "num_updates": "749200", "lr": "2.53333e-05", "gnorm": "2.395", "loss_scale": "4", "train_wall": "59", "gb_free": "26", "wall": "224059"} +[2022-08-02 01:05:09,630][train_inner][INFO] - {"epoch": 15, "update": 14.561, "loss": "2.026", "ppl": "4.07", "wps": "395754", "ups": "3.38", "wpb": "117021", "bsz": "256", "num_updates": "749400", "lr": "2.53131e-05", "gnorm": "2.314", "loss_scale": "4", "train_wall": "59", "gb_free": "29.6", "wall": "224118"} +[2022-08-02 01:06:09,184][train_inner][INFO] - {"epoch": 15, "update": 14.565, "loss": "2.025", "ppl": "4.07", "wps": "396197", "ups": "3.36", "wpb": "117975", "bsz": "256", "num_updates": "749600", "lr": "2.52929e-05", "gnorm": "2.525", "loss_scale": "4", "train_wall": "59", "gb_free": "27.4", "wall": "224178"} +[2022-08-02 01:06:47,979][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 01:07:08,784][train_inner][INFO] - {"epoch": 15, "update": 14.568, "loss": "2.025", "ppl": "4.07", "wps": "397775", "ups": "3.36", "wpb": "118536", "bsz": "256", "num_updates": "749800", "lr": "2.52727e-05", "gnorm": "2.352", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "224237"} +[2022-08-02 01:08:08,195][train_inner][INFO] - {"epoch": 15, "update": 14.572, "loss": "2.025", "ppl": "4.07", "wps": "396372", "ups": "3.37", "wpb": "117743", "bsz": "256", "num_updates": "750000", "lr": "2.52525e-05", "gnorm": "2.388", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "224297"} +[2022-08-02 01:08:08,196][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 01:08:31,153][valid][INFO] - {"epoch": 15, "valid_loss": "1.913", "valid_ppl": "3.77", "valid_wps": "1.59892e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "750000", "valid_best_loss": "1.913"} +[2022-08-02 01:08:31,156][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 15 @ 750000 updates +[2022-08-02 01:08:31,156][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_15_750000.pt +[2022-08-02 01:08:37,343][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_15_750000.pt +[2022-08-02 01:08:55,194][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_15_750000.pt (epoch 15 @ 750000 updates, score 1.913) (writing took 24.038892585784197 seconds) +[2022-08-02 01:09:54,634][train_inner][INFO] - {"epoch": 15, "update": 14.576, "loss": "2.015", "ppl": "4.04", "wps": "223309", "ups": "1.88", "wpb": "118843", "bsz": "256", "num_updates": "750200", "lr": "2.52323e-05", "gnorm": "2.284", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "224403"} +[2022-08-02 01:10:55,365][train_inner][INFO] - {"epoch": 15, "update": 14.58, "loss": "2.026", "ppl": "4.07", "wps": "389979", "ups": "3.29", "wpb": "118418", "bsz": "256", "num_updates": "750400", "lr": "2.52121e-05", "gnorm": "2.174", "loss_scale": "4", "train_wall": "60", "gb_free": "21.8", "wall": "224464"} +[2022-08-02 01:11:54,617][train_inner][INFO] - {"epoch": 15, "update": 14.584, "loss": "2.02", "ppl": "4.06", "wps": "399431", "ups": "3.38", "wpb": "118335", "bsz": "256", "num_updates": "750600", "lr": "2.51919e-05", "gnorm": "2.293", "loss_scale": "4", "train_wall": "59", "gb_free": "27.9", "wall": "224523"} +[2022-08-02 01:12:53,894][train_inner][INFO] - {"epoch": 15, "update": 14.588, "loss": "2.028", "ppl": "4.08", "wps": "396821", "ups": "3.37", "wpb": "117612", "bsz": "256", "num_updates": "750800", "lr": "2.51717e-05", "gnorm": "2.316", "loss_scale": "4", "train_wall": "59", "gb_free": "23.1", "wall": "224582"} +[2022-08-02 01:13:53,564][train_inner][INFO] - {"epoch": 15, "update": 14.592, "loss": "2.021", "ppl": "4.06", "wps": "396380", "ups": "3.35", "wpb": "118259", "bsz": "256", "num_updates": "751000", "lr": "2.51515e-05", "gnorm": "2.245", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "224642"} +[2022-08-02 01:14:53,162][train_inner][INFO] - {"epoch": 15, "update": 14.596, "loss": "2.031", "ppl": "4.09", "wps": "395319", "ups": "3.36", "wpb": "117801", "bsz": "256", "num_updates": "751200", "lr": "2.51313e-05", "gnorm": "2.191", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "224702"} +[2022-08-02 01:15:53,611][train_inner][INFO] - {"epoch": 15, "update": 14.599, "loss": "2.019", "ppl": "4.05", "wps": "392621", "ups": "3.31", "wpb": "118666", "bsz": "256", "num_updates": "751400", "lr": "2.51111e-05", "gnorm": "2.205", "loss_scale": "4", "train_wall": "60", "gb_free": "27.9", "wall": "224762"} +[2022-08-02 01:16:53,111][train_inner][INFO] - {"epoch": 15, "update": 14.603, "loss": "2.018", "ppl": "4.05", "wps": "397678", "ups": "3.36", "wpb": "118309", "bsz": "256", "num_updates": "751600", "lr": "2.50909e-05", "gnorm": "2.31", "loss_scale": "4", "train_wall": "59", "gb_free": "25.2", "wall": "224822"} +[2022-08-02 01:17:49,033][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 01:17:52,620][train_inner][INFO] - {"epoch": 15, "update": 14.607, "loss": "2.024", "ppl": "4.07", "wps": "396470", "ups": "3.36", "wpb": "117966", "bsz": "256", "num_updates": "751800", "lr": "2.50707e-05", "gnorm": "2.304", "loss_scale": "4", "train_wall": "59", "gb_free": "25.9", "wall": "224881"} +[2022-08-02 01:18:52,238][train_inner][INFO] - {"epoch": 15, "update": 14.611, "loss": "2.025", "ppl": "4.07", "wps": "397869", "ups": "3.35", "wpb": "118601", "bsz": "256", "num_updates": "752000", "lr": "2.50505e-05", "gnorm": "2.169", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "224941"} +[2022-08-02 01:19:52,135][train_inner][INFO] - {"epoch": 15, "update": 14.615, "loss": "2.027", "ppl": "4.08", "wps": "393953", "ups": "3.34", "wpb": "117981", "bsz": "256", "num_updates": "752200", "lr": "2.50303e-05", "gnorm": "2.315", "loss_scale": "4", "train_wall": "60", "gb_free": "24.5", "wall": "225001"} +[2022-08-02 01:20:51,152][train_inner][INFO] - {"epoch": 15, "update": 14.619, "loss": "2.026", "ppl": "4.07", "wps": "398524", "ups": "3.39", "wpb": "117598", "bsz": "256", "num_updates": "752400", "lr": "2.50101e-05", "gnorm": "2.314", "loss_scale": "4", "train_wall": "59", "gb_free": "22.5", "wall": "225060"} +[2022-08-02 01:21:51,803][train_inner][INFO] - {"epoch": 15, "update": 14.623, "loss": "2.027", "ppl": "4.08", "wps": "389831", "ups": "3.3", "wpb": "118218", "bsz": "256", "num_updates": "752600", "lr": "2.49899e-05", "gnorm": "2.34", "loss_scale": "4", "train_wall": "60", "gb_free": "26.4", "wall": "225120"} +[2022-08-02 01:22:51,117][train_inner][INFO] - {"epoch": 15, "update": 14.627, "loss": "2.023", "ppl": "4.06", "wps": "397406", "ups": "3.37", "wpb": "117858", "bsz": "256", "num_updates": "752800", "lr": "2.49697e-05", "gnorm": "2.236", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "225180"} +[2022-08-02 01:23:49,979][train_inner][INFO] - {"epoch": 15, "update": 14.631, "loss": "2.024", "ppl": "4.07", "wps": "400877", "ups": "3.4", "wpb": "117981", "bsz": "256", "num_updates": "753000", "lr": "2.49495e-05", "gnorm": "2.274", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "225238"} +[2022-08-02 01:24:49,182][train_inner][INFO] - {"epoch": 15, "update": 14.634, "loss": "2.017", "ppl": "4.05", "wps": "399903", "ups": "3.38", "wpb": "118376", "bsz": "256", "num_updates": "753200", "lr": "2.49293e-05", "gnorm": "2.257", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "225298"} +[2022-08-02 01:25:48,668][train_inner][INFO] - {"epoch": 15, "update": 14.638, "loss": "2.021", "ppl": "4.06", "wps": "397135", "ups": "3.36", "wpb": "118119", "bsz": "256", "num_updates": "753400", "lr": "2.49091e-05", "gnorm": "2.292", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "225357"} +[2022-08-02 01:26:48,288][train_inner][INFO] - {"epoch": 15, "update": 14.642, "loss": "2.018", "ppl": "4.05", "wps": "399593", "ups": "3.35", "wpb": "119119", "bsz": "256", "num_updates": "753600", "lr": "2.48889e-05", "gnorm": "2.198", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "225417"} +[2022-08-02 01:27:47,690][train_inner][INFO] - {"epoch": 15, "update": 14.646, "loss": "2.025", "ppl": "4.07", "wps": "397993", "ups": "3.37", "wpb": "118206", "bsz": "256", "num_updates": "753800", "lr": "2.48687e-05", "gnorm": "2.287", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "225476"} +[2022-08-02 01:27:58,602][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 01:28:46,999][train_inner][INFO] - {"epoch": 15, "update": 14.65, "loss": "2.022", "ppl": "4.06", "wps": "400408", "ups": "3.37", "wpb": "118738", "bsz": "256", "num_updates": "754000", "lr": "2.48485e-05", "gnorm": "2.2", "loss_scale": "4", "train_wall": "59", "gb_free": "23.3", "wall": "225535"} +[2022-08-02 01:29:46,011][train_inner][INFO] - {"epoch": 15, "update": 14.654, "loss": "2.029", "ppl": "4.08", "wps": "399953", "ups": "3.39", "wpb": "118010", "bsz": "256", "num_updates": "754200", "lr": "2.48283e-05", "gnorm": "2.262", "loss_scale": "4", "train_wall": "59", "gb_free": "24.4", "wall": "225594"} +[2022-08-02 01:30:45,582][train_inner][INFO] - {"epoch": 15, "update": 14.658, "loss": "2.028", "ppl": "4.08", "wps": "397938", "ups": "3.36", "wpb": "118527", "bsz": "256", "num_updates": "754400", "lr": "2.48081e-05", "gnorm": "2.307", "loss_scale": "4", "train_wall": "59", "gb_free": "24.6", "wall": "225654"} +[2022-08-02 01:31:45,354][train_inner][INFO] - {"epoch": 15, "update": 14.662, "loss": "2.017", "ppl": "4.05", "wps": "396571", "ups": "3.35", "wpb": "118518", "bsz": "256", "num_updates": "754600", "lr": "2.47879e-05", "gnorm": "2.425", "loss_scale": "4", "train_wall": "59", "gb_free": "23", "wall": "225714"} +[2022-08-02 01:32:45,139][train_inner][INFO] - {"epoch": 15, "update": 14.666, "loss": "2.02", "ppl": "4.05", "wps": "396131", "ups": "3.35", "wpb": "118412", "bsz": "256", "num_updates": "754800", "lr": "2.47677e-05", "gnorm": "2.327", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "225774"} +[2022-08-02 01:33:44,539][train_inner][INFO] - {"epoch": 15, "update": 14.669, "loss": "2.02", "ppl": "4.06", "wps": "399072", "ups": "3.37", "wpb": "118525", "bsz": "256", "num_updates": "755000", "lr": "2.47475e-05", "gnorm": "2.227", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "225833"} +[2022-08-02 01:34:43,730][train_inner][INFO] - {"epoch": 15, "update": 14.673, "loss": "2.024", "ppl": "4.07", "wps": "399471", "ups": "3.38", "wpb": "118225", "bsz": "256", "num_updates": "755200", "lr": "2.47273e-05", "gnorm": "2.214", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "225892"} +[2022-08-02 01:35:43,350][train_inner][INFO] - {"epoch": 15, "update": 14.677, "loss": "2.018", "ppl": "4.05", "wps": "398733", "ups": "3.35", "wpb": "118862", "bsz": "256", "num_updates": "755400", "lr": "2.47071e-05", "gnorm": "2.247", "loss_scale": "4", "train_wall": "59", "gb_free": "23.4", "wall": "225952"} +[2022-08-02 01:36:42,488][train_inner][INFO] - {"epoch": 15, "update": 14.681, "loss": "2.025", "ppl": "4.07", "wps": "399393", "ups": "3.38", "wpb": "118096", "bsz": "256", "num_updates": "755600", "lr": "2.46869e-05", "gnorm": "2.247", "loss_scale": "4", "train_wall": "59", "gb_free": "24.7", "wall": "226011"} +[2022-08-02 01:37:41,917][train_inner][INFO] - {"epoch": 15, "update": 14.685, "loss": "2.025", "ppl": "4.07", "wps": "397417", "ups": "3.37", "wpb": "118090", "bsz": "256", "num_updates": "755800", "lr": "2.46667e-05", "gnorm": "2.137", "loss_scale": "4", "train_wall": "59", "gb_free": "24.1", "wall": "226070"} +[2022-08-02 01:38:09,352][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 01:38:41,945][train_inner][INFO] - {"epoch": 15, "update": 14.689, "loss": "2.017", "ppl": "4.05", "wps": "395144", "ups": "3.33", "wpb": "118596", "bsz": "256", "num_updates": "756000", "lr": "2.46465e-05", "gnorm": "2.118", "loss_scale": "4", "train_wall": "60", "gb_free": "21.9", "wall": "226130"} +[2022-08-02 01:39:41,721][train_inner][INFO] - {"epoch": 15, "update": 14.693, "loss": "2.024", "ppl": "4.07", "wps": "396619", "ups": "3.35", "wpb": "118540", "bsz": "256", "num_updates": "756200", "lr": "2.46263e-05", "gnorm": "2.233", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "226190"} +[2022-08-02 01:40:41,049][train_inner][INFO] - {"epoch": 15, "update": 14.697, "loss": "2.016", "ppl": "4.04", "wps": "401402", "ups": "3.37", "wpb": "119071", "bsz": "256", "num_updates": "756400", "lr": "2.46061e-05", "gnorm": "2.393", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "226249"} +[2022-08-02 01:41:40,847][train_inner][INFO] - {"epoch": 15, "update": 14.701, "loss": "2.024", "ppl": "4.07", "wps": "396674", "ups": "3.34", "wpb": "118601", "bsz": "256", "num_updates": "756600", "lr": "2.45859e-05", "gnorm": "2.421", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "226309"} +[2022-08-02 01:42:40,436][train_inner][INFO] - {"epoch": 15, "update": 14.704, "loss": "2.021", "ppl": "4.06", "wps": "398720", "ups": "3.36", "wpb": "118796", "bsz": "256", "num_updates": "756800", "lr": "2.45657e-05", "gnorm": "2.261", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "226369"} +[2022-08-02 01:43:40,174][train_inner][INFO] - {"epoch": 15, "update": 14.708, "loss": "2.02", "ppl": "4.06", "wps": "397362", "ups": "3.35", "wpb": "118688", "bsz": "256", "num_updates": "757000", "lr": "2.45455e-05", "gnorm": "2.422", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "226429"} +[2022-08-02 01:44:39,384][train_inner][INFO] - {"epoch": 15, "update": 14.712, "loss": "2.021", "ppl": "4.06", "wps": "398405", "ups": "3.38", "wpb": "117947", "bsz": "256", "num_updates": "757200", "lr": "2.45253e-05", "gnorm": "2.25", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "226488"} +[2022-08-02 01:45:38,680][train_inner][INFO] - {"epoch": 15, "update": 14.716, "loss": "2.024", "ppl": "4.07", "wps": "399240", "ups": "3.37", "wpb": "118366", "bsz": "256", "num_updates": "757400", "lr": "2.45051e-05", "gnorm": "2.419", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "226547"} +[2022-08-02 01:46:38,182][train_inner][INFO] - {"epoch": 15, "update": 14.72, "loss": "2.013", "ppl": "4.04", "wps": "397927", "ups": "3.36", "wpb": "118386", "bsz": "256", "num_updates": "757600", "lr": "2.44848e-05", "gnorm": "2.301", "loss_scale": "4", "train_wall": "59", "gb_free": "24.7", "wall": "226607"} +[2022-08-02 01:47:37,665][train_inner][INFO] - {"epoch": 15, "update": 14.724, "loss": "2.023", "ppl": "4.06", "wps": "398640", "ups": "3.36", "wpb": "118562", "bsz": "256", "num_updates": "757800", "lr": "2.44646e-05", "gnorm": "2.376", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "226666"} +[2022-08-02 01:48:06,881][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 01:48:37,450][train_inner][INFO] - {"epoch": 15, "update": 14.728, "loss": "2.023", "ppl": "4.06", "wps": "394206", "ups": "3.35", "wpb": "117837", "bsz": "256", "num_updates": "758000", "lr": "2.44444e-05", "gnorm": "2.442", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "226726"} +[2022-08-02 01:49:36,948][train_inner][INFO] - {"epoch": 15, "update": 14.732, "loss": "2.023", "ppl": "4.06", "wps": "399665", "ups": "3.36", "wpb": "118895", "bsz": "256", "num_updates": "758200", "lr": "2.44242e-05", "gnorm": "2.375", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "226785"} +[2022-08-02 01:50:36,312][train_inner][INFO] - {"epoch": 15, "update": 14.736, "loss": "2.027", "ppl": "4.08", "wps": "398596", "ups": "3.37", "wpb": "118309", "bsz": "256", "num_updates": "758400", "lr": "2.4404e-05", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "28.6", "wall": "226845"} +[2022-08-02 01:51:35,714][train_inner][INFO] - {"epoch": 15, "update": 14.739, "loss": "2.02", "ppl": "4.06", "wps": "397936", "ups": "3.37", "wpb": "118191", "bsz": "256", "num_updates": "758600", "lr": "2.43838e-05", "gnorm": "2.402", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "226904"} +[2022-08-02 01:52:36,017][train_inner][INFO] - {"epoch": 15, "update": 14.743, "loss": "2.024", "ppl": "4.07", "wps": "392923", "ups": "3.32", "wpb": "118471", "bsz": "256", "num_updates": "758800", "lr": "2.43636e-05", "gnorm": "2.274", "loss_scale": "2", "train_wall": "60", "gb_free": "24.6", "wall": "226964"} +[2022-08-02 01:53:35,497][train_inner][INFO] - {"epoch": 15, "update": 14.747, "loss": "2.022", "ppl": "4.06", "wps": "399400", "ups": "3.36", "wpb": "118781", "bsz": "256", "num_updates": "759000", "lr": "2.43434e-05", "gnorm": "2.319", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "227024"} +[2022-08-02 01:54:34,556][train_inner][INFO] - {"epoch": 15, "update": 14.751, "loss": "2.024", "ppl": "4.07", "wps": "398103", "ups": "3.39", "wpb": "117558", "bsz": "256", "num_updates": "759200", "lr": "2.43232e-05", "gnorm": "2.242", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "227083"} +[2022-08-02 01:55:33,976][train_inner][INFO] - {"epoch": 15, "update": 14.755, "loss": "2.022", "ppl": "4.06", "wps": "396898", "ups": "3.37", "wpb": "117918", "bsz": "256", "num_updates": "759400", "lr": "2.4303e-05", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "227142"} +[2022-08-02 01:56:33,623][train_inner][INFO] - {"epoch": 15, "update": 14.759, "loss": "2.019", "ppl": "4.05", "wps": "396846", "ups": "3.35", "wpb": "118351", "bsz": "256", "num_updates": "759600", "lr": "2.42828e-05", "gnorm": "2.285", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "227202"} +[2022-08-02 01:57:33,008][train_inner][INFO] - {"epoch": 15, "update": 14.763, "loss": "2.018", "ppl": "4.05", "wps": "399331", "ups": "3.37", "wpb": "118571", "bsz": "256", "num_updates": "759800", "lr": "2.42626e-05", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "227261"} +[2022-08-02 01:58:32,487][train_inner][INFO] - {"epoch": 15, "update": 14.767, "loss": "2.024", "ppl": "4.07", "wps": "396338", "ups": "3.36", "wpb": "117870", "bsz": "256", "num_updates": "760000", "lr": "2.42424e-05", "gnorm": "2.407", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "227321"} +[2022-08-02 01:59:31,762][train_inner][INFO] - {"epoch": 15, "update": 14.77, "loss": "2.021", "ppl": "4.06", "wps": "400754", "ups": "3.37", "wpb": "118771", "bsz": "256", "num_updates": "760200", "lr": "2.42222e-05", "gnorm": "2.208", "loss_scale": "4", "train_wall": "59", "gb_free": "23.2", "wall": "227380"} +[2022-08-02 02:00:31,321][train_inner][INFO] - {"epoch": 15, "update": 14.774, "loss": "2.02", "ppl": "4.05", "wps": "399298", "ups": "3.36", "wpb": "118909", "bsz": "256", "num_updates": "760400", "lr": "2.4202e-05", "gnorm": "2.215", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "227440"} +[2022-08-02 02:01:30,871][train_inner][INFO] - {"epoch": 15, "update": 14.778, "loss": "2.02", "ppl": "4.05", "wps": "395960", "ups": "3.36", "wpb": "117896", "bsz": "256", "num_updates": "760600", "lr": "2.41818e-05", "gnorm": "2.175", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "227499"} +[2022-08-02 02:02:30,441][train_inner][INFO] - {"epoch": 15, "update": 14.782, "loss": "2.023", "ppl": "4.06", "wps": "396620", "ups": "3.36", "wpb": "118132", "bsz": "256", "num_updates": "760800", "lr": "2.41616e-05", "gnorm": "2.28", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "227559"} +[2022-08-02 02:03:30,474][train_inner][INFO] - {"epoch": 15, "update": 14.786, "loss": "2.027", "ppl": "4.07", "wps": "393714", "ups": "3.33", "wpb": "118178", "bsz": "256", "num_updates": "761000", "lr": "2.41414e-05", "gnorm": "2.302", "loss_scale": "4", "train_wall": "60", "gb_free": "21.6", "wall": "227619"} +[2022-08-02 02:04:30,262][train_inner][INFO] - {"epoch": 15, "update": 14.79, "loss": "2.017", "ppl": "4.05", "wps": "397482", "ups": "3.35", "wpb": "118823", "bsz": "256", "num_updates": "761200", "lr": "2.41212e-05", "gnorm": "2.381", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "227679"} +[2022-08-02 02:05:15,091][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 02:05:29,840][train_inner][INFO] - {"epoch": 15, "update": 14.794, "loss": "2.023", "ppl": "4.07", "wps": "398558", "ups": "3.36", "wpb": "118725", "bsz": "256", "num_updates": "761400", "lr": "2.4101e-05", "gnorm": "2.336", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "227738"} +[2022-08-02 02:06:29,193][train_inner][INFO] - {"epoch": 15, "update": 14.798, "loss": "2.016", "ppl": "4.04", "wps": "398446", "ups": "3.37", "wpb": "118244", "bsz": "256", "num_updates": "761600", "lr": "2.40808e-05", "gnorm": "2.286", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "227798"} +[2022-08-02 02:07:29,029][train_inner][INFO] - {"epoch": 15, "update": 14.802, "loss": "2.013", "ppl": "4.04", "wps": "396992", "ups": "3.34", "wpb": "118772", "bsz": "256", "num_updates": "761800", "lr": "2.40606e-05", "gnorm": "2.308", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "227857"} +[2022-08-02 02:08:28,265][train_inner][INFO] - {"epoch": 15, "update": 14.805, "loss": "2.013", "ppl": "4.04", "wps": "397824", "ups": "3.38", "wpb": "117827", "bsz": "256", "num_updates": "762000", "lr": "2.40404e-05", "gnorm": "2.245", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "227917"} +[2022-08-02 02:09:27,250][train_inner][INFO] - {"epoch": 15, "update": 14.809, "loss": "2.028", "ppl": "4.08", "wps": "400526", "ups": "3.39", "wpb": "118123", "bsz": "256", "num_updates": "762200", "lr": "2.40202e-05", "gnorm": "2.579", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "227976"} +[2022-08-02 02:10:26,529][train_inner][INFO] - {"epoch": 15, "update": 14.813, "loss": "2.019", "ppl": "4.05", "wps": "400831", "ups": "3.37", "wpb": "118805", "bsz": "256", "num_updates": "762400", "lr": "2.4e-05", "gnorm": "2.403", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "228035"} +[2022-08-02 02:11:26,240][train_inner][INFO] - {"epoch": 15, "update": 14.817, "loss": "2.016", "ppl": "4.05", "wps": "395942", "ups": "3.35", "wpb": "118210", "bsz": "256", "num_updates": "762600", "lr": "2.39798e-05", "gnorm": "2.19", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "228095"} +[2022-08-02 02:12:25,732][train_inner][INFO] - {"epoch": 15, "update": 14.821, "loss": "2.02", "ppl": "4.05", "wps": "399248", "ups": "3.36", "wpb": "118759", "bsz": "256", "num_updates": "762800", "lr": "2.39596e-05", "gnorm": "2.375", "loss_scale": "2", "train_wall": "59", "gb_free": "28.2", "wall": "228154"} +[2022-08-02 02:13:25,469][train_inner][INFO] - {"epoch": 15, "update": 14.825, "loss": "2.016", "ppl": "4.05", "wps": "394451", "ups": "3.35", "wpb": "117816", "bsz": "256", "num_updates": "763000", "lr": "2.39394e-05", "gnorm": "2.273", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "228214"} +[2022-08-02 02:14:24,751][train_inner][INFO] - {"epoch": 15, "update": 14.829, "loss": "2.016", "ppl": "4.05", "wps": "400560", "ups": "3.37", "wpb": "118728", "bsz": "256", "num_updates": "763200", "lr": "2.39192e-05", "gnorm": "2.468", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "228273"} +[2022-08-02 02:15:24,018][train_inner][INFO] - {"epoch": 15, "update": 14.833, "loss": "2.023", "ppl": "4.06", "wps": "399303", "ups": "3.37", "wpb": "118328", "bsz": "256", "num_updates": "763400", "lr": "2.3899e-05", "gnorm": "2.337", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "228332"} +[2022-08-02 02:15:41,216][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 02:16:23,611][train_inner][INFO] - {"epoch": 15, "update": 14.837, "loss": "2.018", "ppl": "4.05", "wps": "394756", "ups": "3.36", "wpb": "117622", "bsz": "256", "num_updates": "763600", "lr": "2.38788e-05", "gnorm": "2.3", "loss_scale": "2", "train_wall": "59", "gb_free": "28.5", "wall": "228392"} +[2022-08-02 02:17:22,612][train_inner][INFO] - {"epoch": 15, "update": 14.84, "loss": "2.016", "ppl": "4.05", "wps": "400488", "ups": "3.39", "wpb": "118146", "bsz": "256", "num_updates": "763800", "lr": "2.38586e-05", "gnorm": "2.271", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "228451"} +[2022-08-02 02:18:22,080][train_inner][INFO] - {"epoch": 15, "update": 14.844, "loss": "2.018", "ppl": "4.05", "wps": "396386", "ups": "3.36", "wpb": "117859", "bsz": "256", "num_updates": "764000", "lr": "2.38384e-05", "gnorm": "2.167", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "228511"} +[2022-08-02 02:19:21,330][train_inner][INFO] - {"epoch": 15, "update": 14.848, "loss": "2.027", "ppl": "4.07", "wps": "397322", "ups": "3.38", "wpb": "117708", "bsz": "256", "num_updates": "764200", "lr": "2.38182e-05", "gnorm": "2.204", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "228570"} +[2022-08-02 02:20:20,589][train_inner][INFO] - {"epoch": 15, "update": 14.852, "loss": "2.019", "ppl": "4.05", "wps": "398629", "ups": "3.38", "wpb": "118109", "bsz": "256", "num_updates": "764400", "lr": "2.3798e-05", "gnorm": "2.427", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "228629"} +[2022-08-02 02:21:19,765][train_inner][INFO] - {"epoch": 15, "update": 14.856, "loss": "2.018", "ppl": "4.05", "wps": "399685", "ups": "3.38", "wpb": "118258", "bsz": "256", "num_updates": "764600", "lr": "2.37778e-05", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "228688"} +[2022-08-02 02:22:18,992][train_inner][INFO] - {"epoch": 15, "update": 14.86, "loss": "2.019", "ppl": "4.05", "wps": "398319", "ups": "3.38", "wpb": "117956", "bsz": "256", "num_updates": "764800", "lr": "2.37576e-05", "gnorm": "2.367", "loss_scale": "2", "train_wall": "59", "gb_free": "28.3", "wall": "228747"} +[2022-08-02 02:23:18,413][train_inner][INFO] - {"epoch": 15, "update": 14.864, "loss": "2.017", "ppl": "4.05", "wps": "397077", "ups": "3.37", "wpb": "117972", "bsz": "256", "num_updates": "765000", "lr": "2.37374e-05", "gnorm": "2.362", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "228807"} +[2022-08-02 02:24:18,244][train_inner][INFO] - {"epoch": 15, "update": 14.868, "loss": "2.017", "ppl": "4.05", "wps": "394735", "ups": "3.34", "wpb": "118086", "bsz": "256", "num_updates": "765200", "lr": "2.37172e-05", "gnorm": "2.273", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "228867"} +[2022-08-02 02:25:17,715][train_inner][INFO] - {"epoch": 15, "update": 14.872, "loss": "2.023", "ppl": "4.07", "wps": "396689", "ups": "3.36", "wpb": "117957", "bsz": "256", "num_updates": "765400", "lr": "2.3697e-05", "gnorm": "2.36", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "228926"} +[2022-08-02 02:26:17,654][train_inner][INFO] - {"epoch": 15, "update": 14.875, "loss": "2.014", "ppl": "4.04", "wps": "395210", "ups": "3.34", "wpb": "118442", "bsz": "256", "num_updates": "765600", "lr": "2.36768e-05", "gnorm": "2.316", "loss_scale": "4", "train_wall": "60", "gb_free": "23.3", "wall": "228986"} +[2022-08-02 02:27:17,099][train_inner][INFO] - {"epoch": 15, "update": 14.879, "loss": "2.02", "ppl": "4.06", "wps": "397188", "ups": "3.36", "wpb": "118054", "bsz": "256", "num_updates": "765800", "lr": "2.36566e-05", "gnorm": "2.444", "loss_scale": "4", "train_wall": "59", "gb_free": "26.1", "wall": "229046"} +[2022-08-02 02:28:16,739][train_inner][INFO] - {"epoch": 15, "update": 14.883, "loss": "2.018", "ppl": "4.05", "wps": "397242", "ups": "3.35", "wpb": "118455", "bsz": "256", "num_updates": "766000", "lr": "2.36364e-05", "gnorm": "2.473", "loss_scale": "4", "train_wall": "59", "gb_free": "27.6", "wall": "229105"} +[2022-08-02 02:28:48,532][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 02:29:16,551][train_inner][INFO] - {"epoch": 15, "update": 14.887, "loss": "2.016", "ppl": "4.05", "wps": "396209", "ups": "3.34", "wpb": "118491", "bsz": "256", "num_updates": "766200", "lr": "2.36162e-05", "gnorm": "2.226", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "229165"} +[2022-08-02 02:30:16,030][train_inner][INFO] - {"epoch": 15, "update": 14.891, "loss": "2.015", "ppl": "4.04", "wps": "399431", "ups": "3.36", "wpb": "118788", "bsz": "256", "num_updates": "766400", "lr": "2.3596e-05", "gnorm": "2.333", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "229224"} +[2022-08-02 02:31:15,251][train_inner][INFO] - {"epoch": 15, "update": 14.895, "loss": "2.021", "ppl": "4.06", "wps": "398170", "ups": "3.38", "wpb": "117898", "bsz": "256", "num_updates": "766600", "lr": "2.35758e-05", "gnorm": "2.49", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "229284"} +[2022-08-02 02:32:14,688][train_inner][INFO] - {"epoch": 15, "update": 14.899, "loss": "2.018", "ppl": "4.05", "wps": "397700", "ups": "3.36", "wpb": "118189", "bsz": "256", "num_updates": "766800", "lr": "2.35556e-05", "gnorm": "2.267", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "229343"} +[2022-08-02 02:33:14,474][train_inner][INFO] - {"epoch": 15, "update": 14.903, "loss": "2.021", "ppl": "4.06", "wps": "396337", "ups": "3.35", "wpb": "118477", "bsz": "256", "num_updates": "767000", "lr": "2.35354e-05", "gnorm": "2.377", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "229403"} +[2022-08-02 02:34:13,533][train_inner][INFO] - {"epoch": 15, "update": 14.907, "loss": "2.022", "ppl": "4.06", "wps": "401931", "ups": "3.39", "wpb": "118687", "bsz": "256", "num_updates": "767200", "lr": "2.35152e-05", "gnorm": "2.286", "loss_scale": "2", "train_wall": "59", "gb_free": "27.9", "wall": "229462"} +[2022-08-02 02:35:13,144][train_inner][INFO] - {"epoch": 15, "update": 14.91, "loss": "2.015", "ppl": "4.04", "wps": "395233", "ups": "3.36", "wpb": "117800", "bsz": "256", "num_updates": "767400", "lr": "2.34949e-05", "gnorm": "2.327", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "229522"} +[2022-08-02 02:36:12,654][train_inner][INFO] - {"epoch": 15, "update": 14.914, "loss": "2.015", "ppl": "4.04", "wps": "397745", "ups": "3.36", "wpb": "118348", "bsz": "256", "num_updates": "767600", "lr": "2.34747e-05", "gnorm": "2.327", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "229581"} +[2022-08-02 02:37:12,056][train_inner][INFO] - {"epoch": 15, "update": 14.918, "loss": "2.018", "ppl": "4.05", "wps": "397298", "ups": "3.37", "wpb": "118001", "bsz": "256", "num_updates": "767800", "lr": "2.34545e-05", "gnorm": "2.243", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "229640"} +[2022-08-02 02:38:11,551][train_inner][INFO] - {"epoch": 15, "update": 14.922, "loss": "2.02", "ppl": "4.06", "wps": "398162", "ups": "3.36", "wpb": "118442", "bsz": "256", "num_updates": "768000", "lr": "2.34343e-05", "gnorm": "2.438", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "229700"} +[2022-08-02 02:39:10,800][train_inner][INFO] - {"epoch": 15, "update": 14.926, "loss": "2.016", "ppl": "4.05", "wps": "397813", "ups": "3.38", "wpb": "117850", "bsz": "256", "num_updates": "768200", "lr": "2.34141e-05", "gnorm": "2.188", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "229759"} +[2022-08-02 02:40:10,353][train_inner][INFO] - {"epoch": 15, "update": 14.93, "loss": "2.015", "ppl": "4.04", "wps": "397220", "ups": "3.36", "wpb": "118277", "bsz": "256", "num_updates": "768400", "lr": "2.33939e-05", "gnorm": "2.172", "loss_scale": "4", "train_wall": "59", "gb_free": "22.9", "wall": "229819"} +[2022-08-02 02:41:10,285][train_inner][INFO] - {"epoch": 15, "update": 14.934, "loss": "2.012", "ppl": "4.03", "wps": "395065", "ups": "3.34", "wpb": "118385", "bsz": "256", "num_updates": "768600", "lr": "2.33737e-05", "gnorm": "2.319", "loss_scale": "4", "train_wall": "60", "gb_free": "21.8", "wall": "229879"} +[2022-08-02 02:42:09,789][train_inner][INFO] - {"epoch": 15, "update": 14.938, "loss": "2.015", "ppl": "4.04", "wps": "397186", "ups": "3.36", "wpb": "118169", "bsz": "256", "num_updates": "768800", "lr": "2.33535e-05", "gnorm": "2.278", "loss_scale": "4", "train_wall": "59", "gb_free": "23.7", "wall": "229938"} +[2022-08-02 02:43:09,075][train_inner][INFO] - {"epoch": 15, "update": 14.941, "loss": "2.022", "ppl": "4.06", "wps": "397901", "ups": "3.37", "wpb": "117949", "bsz": "256", "num_updates": "769000", "lr": "2.33333e-05", "gnorm": "2.264", "loss_scale": "4", "train_wall": "59", "gb_free": "22.3", "wall": "229998"} +[2022-08-02 02:44:08,241][train_inner][INFO] - {"epoch": 15, "update": 14.945, "loss": "2.016", "ppl": "4.05", "wps": "398366", "ups": "3.38", "wpb": "117849", "bsz": "256", "num_updates": "769200", "lr": "2.33131e-05", "gnorm": "2.396", "loss_scale": "4", "train_wall": "59", "gb_free": "25.4", "wall": "230057"} +[2022-08-02 02:45:07,824][train_inner][INFO] - {"epoch": 15, "update": 14.949, "loss": "2.017", "ppl": "4.05", "wps": "398276", "ups": "3.36", "wpb": "118652", "bsz": "256", "num_updates": "769400", "lr": "2.32929e-05", "gnorm": "2.276", "loss_scale": "4", "train_wall": "59", "gb_free": "22", "wall": "230116"} +[2022-08-02 02:46:07,312][train_inner][INFO] - {"epoch": 15, "update": 14.953, "loss": "2.021", "ppl": "4.06", "wps": "396550", "ups": "3.36", "wpb": "117948", "bsz": "256", "num_updates": "769600", "lr": "2.32727e-05", "gnorm": "2.357", "loss_scale": "4", "train_wall": "59", "gb_free": "24.3", "wall": "230176"} +[2022-08-02 02:47:06,451][train_inner][INFO] - {"epoch": 15, "update": 14.957, "loss": "2.02", "ppl": "4.06", "wps": "398817", "ups": "3.38", "wpb": "117928", "bsz": "256", "num_updates": "769800", "lr": "2.32525e-05", "gnorm": "2.28", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "230235"} +[2022-08-02 02:48:06,006][train_inner][INFO] - {"epoch": 15, "update": 14.961, "loss": "2.023", "ppl": "4.06", "wps": "395897", "ups": "3.36", "wpb": "117887", "bsz": "256", "num_updates": "770000", "lr": "2.32323e-05", "gnorm": "2.416", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "230294"} +[2022-08-02 02:48:55,994][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 02:49:05,864][train_inner][INFO] - {"epoch": 15, "update": 14.965, "loss": "2.019", "ppl": "4.05", "wps": "396682", "ups": "3.34", "wpb": "118721", "bsz": "256", "num_updates": "770200", "lr": "2.32121e-05", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "230354"} +[2022-08-02 02:50:05,582][train_inner][INFO] - {"epoch": 15, "update": 14.969, "loss": "2.014", "ppl": "4.04", "wps": "395501", "ups": "3.35", "wpb": "118091", "bsz": "256", "num_updates": "770400", "lr": "2.31919e-05", "gnorm": "2.334", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "230414"} +[2022-08-02 02:51:05,496][train_inner][INFO] - {"epoch": 15, "update": 14.973, "loss": "2.016", "ppl": "4.04", "wps": "395337", "ups": "3.34", "wpb": "118432", "bsz": "256", "num_updates": "770600", "lr": "2.31717e-05", "gnorm": "2.443", "loss_scale": "2", "train_wall": "60", "gb_free": "27.2", "wall": "230474"} +[2022-08-02 02:52:04,492][train_inner][INFO] - {"epoch": 15, "update": 14.976, "loss": "2.019", "ppl": "4.05", "wps": "399554", "ups": "3.39", "wpb": "117860", "bsz": "256", "num_updates": "770800", "lr": "2.31515e-05", "gnorm": "2.28", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "230533"} +[2022-08-02 02:53:03,906][train_inner][INFO] - {"epoch": 15, "update": 14.98, "loss": "2.016", "ppl": "4.05", "wps": "398121", "ups": "3.37", "wpb": "118268", "bsz": "256", "num_updates": "771000", "lr": "2.31313e-05", "gnorm": "2.231", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "230592"} +[2022-08-02 02:54:03,444][train_inner][INFO] - {"epoch": 15, "update": 14.984, "loss": "2.021", "ppl": "4.06", "wps": "398307", "ups": "3.36", "wpb": "118573", "bsz": "256", "num_updates": "771200", "lr": "2.31111e-05", "gnorm": "2.214", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "230652"} +[2022-08-02 02:55:02,856][train_inner][INFO] - {"epoch": 15, "update": 14.988, "loss": "2.014", "ppl": "4.04", "wps": "399481", "ups": "3.37", "wpb": "118668", "bsz": "256", "num_updates": "771400", "lr": "2.30909e-05", "gnorm": "2.243", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "230711"} +[2022-08-02 02:56:02,336][train_inner][INFO] - {"epoch": 15, "update": 14.992, "loss": "2.014", "ppl": "4.04", "wps": "398514", "ups": "3.36", "wpb": "118517", "bsz": "256", "num_updates": "771600", "lr": "2.30707e-05", "gnorm": "2.262", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "230771"} +[2022-08-02 02:57:02,078][train_inner][INFO] - {"epoch": 15, "update": 14.996, "loss": "2.011", "ppl": "4.03", "wps": "398501", "ups": "3.35", "wpb": "119036", "bsz": "256", "num_updates": "771800", "lr": "2.30505e-05", "gnorm": "2.385", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "230831"} +[2022-08-02 02:58:01,587][train_inner][INFO] - {"epoch": 15, "update": 15.0, "loss": "2.024", "ppl": "4.07", "wps": "397118", "ups": "3.36", "wpb": "118159", "bsz": "256", "num_updates": "772000", "lr": "2.30303e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "230890"} +[2022-08-02 02:58:05,481][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 02:58:28,181][valid][INFO] - {"epoch": 15, "valid_loss": "1.906", "valid_ppl": "3.75", "valid_wps": "1.5468e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "772013", "valid_best_loss": "1.906"} +[2022-08-02 02:58:28,184][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 15 @ 772013 updates +[2022-08-02 02:58:28,185][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_best.pt +[2022-08-02 02:58:35,567][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_best.pt +[2022-08-02 02:58:49,143][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_best.pt (epoch 15 @ 772013 updates, score 1.906) (writing took 20.958460787311196 seconds) +[2022-08-02 02:58:49,143][fairseq_cli.train][INFO] - end of epoch 15 (average epoch stats below) +[2022-08-02 02:58:49,144][train][INFO] - {"epoch": 15, "train_loss": "2.024", "train_ppl": "4.07", "train_wps": "394843", "train_ups": "3.34", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "772013", "train_lr": "2.3029e-05", "train_gnorm": "2.269", "train_loss_scale": "2", "train_train_wall": "15228", "train_gb_free": "22.9", "train_wall": "230938"} +[2022-08-02 02:58:49,262][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 02:58:49,265][fairseq.trainer][INFO] - begin training epoch 16 +[2022-08-02 02:58:49,265][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 02:59:53,793][train_inner][INFO] - {"epoch": 16, "update": 15.004, "loss": "2.014", "ppl": "4.04", "wps": "210977", "ups": "1.78", "wpb": "118365", "bsz": "255.4", "num_updates": "772200", "lr": "2.30101e-05", "gnorm": "2.314", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "231002"} +[2022-08-02 03:00:24,486][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 03:00:54,225][train_inner][INFO] - {"epoch": 16, "update": 15.008, "loss": "2.02", "ppl": "4.06", "wps": "390855", "ups": "3.31", "wpb": "118099", "bsz": "256", "num_updates": "772400", "lr": "2.29899e-05", "gnorm": "2.361", "loss_scale": "2", "train_wall": "60", "gb_free": "24.9", "wall": "231063"} +[2022-08-02 03:01:53,493][train_inner][INFO] - {"epoch": 16, "update": 15.011, "loss": "2.014", "ppl": "4.04", "wps": "398574", "ups": "3.37", "wpb": "118111", "bsz": "256", "num_updates": "772600", "lr": "2.29697e-05", "gnorm": "2.399", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "231122"} +[2022-08-02 03:02:53,206][train_inner][INFO] - {"epoch": 16, "update": 15.015, "loss": "2.012", "ppl": "4.03", "wps": "393430", "ups": "3.35", "wpb": "117463", "bsz": "256", "num_updates": "772800", "lr": "2.29495e-05", "gnorm": "2.284", "loss_scale": "2", "train_wall": "59", "gb_free": "26.1", "wall": "231182"} +[2022-08-02 03:03:52,293][train_inner][INFO] - {"epoch": 16, "update": 15.019, "loss": "2.015", "ppl": "4.04", "wps": "400009", "ups": "3.38", "wpb": "118177", "bsz": "256", "num_updates": "773000", "lr": "2.29293e-05", "gnorm": "2.408", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "231241"} +[2022-08-02 03:04:51,764][train_inner][INFO] - {"epoch": 16, "update": 15.023, "loss": "2.022", "ppl": "4.06", "wps": "397553", "ups": "3.36", "wpb": "118213", "bsz": "256", "num_updates": "773200", "lr": "2.29091e-05", "gnorm": "2.452", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "231300"} +[2022-08-02 03:05:51,170][train_inner][INFO] - {"epoch": 16, "update": 15.027, "loss": "2.016", "ppl": "4.04", "wps": "397193", "ups": "3.37", "wpb": "117979", "bsz": "256", "num_updates": "773400", "lr": "2.28889e-05", "gnorm": "2.208", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "231360"} +[2022-08-02 03:06:50,788][train_inner][INFO] - {"epoch": 16, "update": 15.031, "loss": "2.013", "ppl": "4.04", "wps": "395911", "ups": "3.35", "wpb": "118016", "bsz": "256", "num_updates": "773600", "lr": "2.28687e-05", "gnorm": "2.434", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "231419"} +[2022-08-02 03:07:50,476][train_inner][INFO] - {"epoch": 16, "update": 15.035, "loss": "2.011", "ppl": "4.03", "wps": "394947", "ups": "3.35", "wpb": "117866", "bsz": "256", "num_updates": "773800", "lr": "2.28485e-05", "gnorm": "2.518", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "231479"} +[2022-08-02 03:08:50,134][train_inner][INFO] - {"epoch": 16, "update": 15.039, "loss": "2.018", "ppl": "4.05", "wps": "396361", "ups": "3.35", "wpb": "118232", "bsz": "256", "num_updates": "774000", "lr": "2.28283e-05", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "231539"} +[2022-08-02 03:09:49,361][train_inner][INFO] - {"epoch": 16, "update": 15.042, "loss": "2.014", "ppl": "4.04", "wps": "395881", "ups": "3.38", "wpb": "117232", "bsz": "256", "num_updates": "774200", "lr": "2.28081e-05", "gnorm": "2.428", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "231598"} +[2022-08-02 03:10:48,963][train_inner][INFO] - {"epoch": 16, "update": 15.046, "loss": "2.011", "ppl": "4.03", "wps": "398884", "ups": "3.36", "wpb": "118870", "bsz": "256", "num_updates": "774400", "lr": "2.27879e-05", "gnorm": "2.274", "loss_scale": "4", "train_wall": "59", "gb_free": "26.2", "wall": "231657"} +[2022-08-02 03:11:47,237][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 03:11:48,754][train_inner][INFO] - {"epoch": 16, "update": 15.05, "loss": "2.012", "ppl": "4.03", "wps": "394472", "ups": "3.34", "wpb": "117930", "bsz": "256", "num_updates": "774600", "lr": "2.27677e-05", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "231717"} +[2022-08-02 03:12:48,015][train_inner][INFO] - {"epoch": 16, "update": 15.054, "loss": "2.014", "ppl": "4.04", "wps": "399868", "ups": "3.37", "wpb": "118481", "bsz": "256", "num_updates": "774800", "lr": "2.27475e-05", "gnorm": "2.423", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "231776"} +[2022-08-02 03:13:47,507][train_inner][INFO] - {"epoch": 16, "update": 15.058, "loss": "2.019", "ppl": "4.05", "wps": "397309", "ups": "3.36", "wpb": "118184", "bsz": "256", "num_updates": "775000", "lr": "2.27273e-05", "gnorm": "2.27", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "231836"} +[2022-08-02 03:14:46,743][train_inner][INFO] - {"epoch": 16, "update": 15.062, "loss": "2.02", "ppl": "4.06", "wps": "398353", "ups": "3.38", "wpb": "117982", "bsz": "256", "num_updates": "775200", "lr": "2.27071e-05", "gnorm": "2.393", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "231895"} +[2022-08-02 03:15:46,461][train_inner][INFO] - {"epoch": 16, "update": 15.066, "loss": "2.014", "ppl": "4.04", "wps": "394611", "ups": "3.35", "wpb": "117826", "bsz": "256", "num_updates": "775400", "lr": "2.26869e-05", "gnorm": "2.327", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "231955"} +[2022-08-02 03:16:45,774][train_inner][INFO] - {"epoch": 16, "update": 15.07, "loss": "2.015", "ppl": "4.04", "wps": "399232", "ups": "3.37", "wpb": "118399", "bsz": "256", "num_updates": "775600", "lr": "2.26667e-05", "gnorm": "2.271", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "232014"} +[2022-08-02 03:17:45,269][train_inner][INFO] - {"epoch": 16, "update": 15.074, "loss": "2.014", "ppl": "4.04", "wps": "398821", "ups": "3.36", "wpb": "118638", "bsz": "256", "num_updates": "775800", "lr": "2.26465e-05", "gnorm": "2.293", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "232074"} +[2022-08-02 03:18:44,732][train_inner][INFO] - {"epoch": 16, "update": 15.077, "loss": "2.018", "ppl": "4.05", "wps": "395372", "ups": "3.36", "wpb": "117549", "bsz": "256", "num_updates": "776000", "lr": "2.26263e-05", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "232133"} +[2022-08-02 03:19:44,021][train_inner][INFO] - {"epoch": 16, "update": 15.081, "loss": "2.012", "ppl": "4.03", "wps": "399931", "ups": "3.37", "wpb": "118557", "bsz": "256", "num_updates": "776200", "lr": "2.26061e-05", "gnorm": "2.253", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "232192"} +[2022-08-02 03:20:43,917][train_inner][INFO] - {"epoch": 16, "update": 15.085, "loss": "2.01", "ppl": "4.03", "wps": "397316", "ups": "3.34", "wpb": "118986", "bsz": "256", "num_updates": "776400", "lr": "2.25859e-05", "gnorm": "2.193", "loss_scale": "2", "train_wall": "60", "gb_free": "23.4", "wall": "232252"} +[2022-08-02 03:21:43,531][train_inner][INFO] - {"epoch": 16, "update": 15.089, "loss": "2.006", "ppl": "4.02", "wps": "396942", "ups": "3.35", "wpb": "118316", "bsz": "256", "num_updates": "776600", "lr": "2.25657e-05", "gnorm": "2.419", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "232312"} +[2022-08-02 03:22:43,244][train_inner][INFO] - {"epoch": 16, "update": 15.093, "loss": "2.016", "ppl": "4.05", "wps": "397175", "ups": "3.35", "wpb": "118582", "bsz": "256", "num_updates": "776800", "lr": "2.25455e-05", "gnorm": "2.25", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "232372"} +[2022-08-02 03:23:20,205][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 03:23:43,043][train_inner][INFO] - {"epoch": 16, "update": 15.097, "loss": "2.012", "ppl": "4.03", "wps": "394570", "ups": "3.34", "wpb": "117973", "bsz": "256", "num_updates": "777000", "lr": "2.25253e-05", "gnorm": "2.435", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "232431"} +[2022-08-02 03:24:42,493][train_inner][INFO] - {"epoch": 16, "update": 15.101, "loss": "2.019", "ppl": "4.05", "wps": "399690", "ups": "3.36", "wpb": "118806", "bsz": "256", "num_updates": "777200", "lr": "2.25051e-05", "gnorm": "2.175", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "232491"} +[2022-08-02 03:25:41,745][train_inner][INFO] - {"epoch": 16, "update": 15.105, "loss": "2.017", "ppl": "4.05", "wps": "397573", "ups": "3.38", "wpb": "117785", "bsz": "256", "num_updates": "777400", "lr": "2.24848e-05", "gnorm": "2.32", "loss_scale": "2", "train_wall": "59", "gb_free": "27.4", "wall": "232550"} +[2022-08-02 03:26:41,426][train_inner][INFO] - {"epoch": 16, "update": 15.109, "loss": "2.004", "ppl": "4.01", "wps": "395554", "ups": "3.35", "wpb": "118034", "bsz": "256", "num_updates": "777600", "lr": "2.24646e-05", "gnorm": "2.302", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "232610"} +[2022-08-02 03:27:40,810][train_inner][INFO] - {"epoch": 16, "update": 15.112, "loss": "2.014", "ppl": "4.04", "wps": "399370", "ups": "3.37", "wpb": "118580", "bsz": "256", "num_updates": "777800", "lr": "2.24444e-05", "gnorm": "2.423", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "232669"} +[2022-08-02 03:28:40,327][train_inner][INFO] - {"epoch": 16, "update": 15.116, "loss": "2.01", "ppl": "4.03", "wps": "397935", "ups": "3.36", "wpb": "118419", "bsz": "256", "num_updates": "778000", "lr": "2.24242e-05", "gnorm": "2.428", "loss_scale": "2", "train_wall": "59", "gb_free": "28.4", "wall": "232729"} +[2022-08-02 03:29:40,160][train_inner][INFO] - {"epoch": 16, "update": 15.12, "loss": "2.02", "ppl": "4.06", "wps": "395467", "ups": "3.34", "wpb": "118308", "bsz": "256", "num_updates": "778200", "lr": "2.2404e-05", "gnorm": "2.442", "loss_scale": "2", "train_wall": "60", "gb_free": "24.2", "wall": "232789"} +[2022-08-02 03:30:39,385][train_inner][INFO] - {"epoch": 16, "update": 15.124, "loss": "2.014", "ppl": "4.04", "wps": "398953", "ups": "3.38", "wpb": "118139", "bsz": "256", "num_updates": "778400", "lr": "2.23838e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "232848"} +[2022-08-02 03:31:38,306][train_inner][INFO] - {"epoch": 16, "update": 15.128, "loss": "2.011", "ppl": "4.03", "wps": "401262", "ups": "3.39", "wpb": "118214", "bsz": "256", "num_updates": "778600", "lr": "2.23636e-05", "gnorm": "2.439", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "232907"} +[2022-08-02 03:32:38,039][train_inner][INFO] - {"epoch": 16, "update": 15.132, "loss": "2.012", "ppl": "4.03", "wps": "396415", "ups": "3.35", "wpb": "118395", "bsz": "256", "num_updates": "778800", "lr": "2.23434e-05", "gnorm": "2.321", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "232966"} +[2022-08-02 03:33:37,192][train_inner][INFO] - {"epoch": 16, "update": 15.136, "loss": "2.021", "ppl": "4.06", "wps": "399305", "ups": "3.38", "wpb": "118099", "bsz": "256", "num_updates": "779000", "lr": "2.23232e-05", "gnorm": "2.306", "loss_scale": "4", "train_wall": "59", "gb_free": "22.8", "wall": "233026"} +[2022-08-02 03:34:36,805][train_inner][INFO] - {"epoch": 16, "update": 15.14, "loss": "2.016", "ppl": "4.04", "wps": "396367", "ups": "3.36", "wpb": "118142", "bsz": "256", "num_updates": "779200", "lr": "2.2303e-05", "gnorm": "2.43", "loss_scale": "4", "train_wall": "59", "gb_free": "26.7", "wall": "233085"} +[2022-08-02 03:35:36,220][train_inner][INFO] - {"epoch": 16, "update": 15.144, "loss": "2.017", "ppl": "4.05", "wps": "396520", "ups": "3.37", "wpb": "117795", "bsz": "256", "num_updates": "779400", "lr": "2.22828e-05", "gnorm": "2.169", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "233145"} +[2022-08-02 03:36:11,870][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 03:36:35,663][train_inner][INFO] - {"epoch": 16, "update": 15.147, "loss": "2.017", "ppl": "4.05", "wps": "398715", "ups": "3.36", "wpb": "118504", "bsz": "256", "num_updates": "779600", "lr": "2.22626e-05", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "233204"} +[2022-08-02 03:37:35,108][train_inner][INFO] - {"epoch": 16, "update": 15.151, "loss": "2.008", "ppl": "4.02", "wps": "399800", "ups": "3.36", "wpb": "118829", "bsz": "256", "num_updates": "779800", "lr": "2.22424e-05", "gnorm": "2.552", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "233264"} +[2022-08-02 03:38:34,306][train_inner][INFO] - {"epoch": 16, "update": 15.155, "loss": "2.017", "ppl": "4.05", "wps": "398567", "ups": "3.38", "wpb": "117971", "bsz": "256", "num_updates": "780000", "lr": "2.22222e-05", "gnorm": "2.31", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "233323"} +[2022-08-02 03:39:33,838][train_inner][INFO] - {"epoch": 16, "update": 15.159, "loss": "2.009", "ppl": "4.02", "wps": "398595", "ups": "3.36", "wpb": "118645", "bsz": "256", "num_updates": "780200", "lr": "2.2202e-05", "gnorm": "2.254", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "233382"} +[2022-08-02 03:40:33,334][train_inner][INFO] - {"epoch": 16, "update": 15.163, "loss": "2.022", "ppl": "4.06", "wps": "396016", "ups": "3.36", "wpb": "117807", "bsz": "256", "num_updates": "780400", "lr": "2.21818e-05", "gnorm": "2.343", "loss_scale": "2", "train_wall": "59", "gb_free": "27", "wall": "233442"} +[2022-08-02 03:41:32,588][train_inner][INFO] - {"epoch": 16, "update": 15.167, "loss": "2.021", "ppl": "4.06", "wps": "398996", "ups": "3.38", "wpb": "118210", "bsz": "256", "num_updates": "780600", "lr": "2.21616e-05", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "233501"} +[2022-08-02 03:42:32,506][train_inner][INFO] - {"epoch": 16, "update": 15.171, "loss": "2.012", "ppl": "4.03", "wps": "394986", "ups": "3.34", "wpb": "118333", "bsz": "256", "num_updates": "780800", "lr": "2.21414e-05", "gnorm": "2.475", "loss_scale": "2", "train_wall": "60", "gb_free": "25.7", "wall": "233561"} +[2022-08-02 03:43:31,852][train_inner][INFO] - {"epoch": 16, "update": 15.175, "loss": "2.01", "ppl": "4.03", "wps": "399461", "ups": "3.37", "wpb": "118531", "bsz": "256", "num_updates": "781000", "lr": "2.21212e-05", "gnorm": "2.283", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "233620"} +[2022-08-02 03:44:31,095][train_inner][INFO] - {"epoch": 16, "update": 15.179, "loss": "2.014", "ppl": "4.04", "wps": "398557", "ups": "3.38", "wpb": "118058", "bsz": "256", "num_updates": "781200", "lr": "2.2101e-05", "gnorm": "2.364", "loss_scale": "2", "train_wall": "59", "gb_free": "26.1", "wall": "233680"} +[2022-08-02 03:45:31,032][train_inner][INFO] - {"epoch": 16, "update": 15.182, "loss": "2.009", "ppl": "4.02", "wps": "395233", "ups": "3.34", "wpb": "118444", "bsz": "256", "num_updates": "781400", "lr": "2.20808e-05", "gnorm": "2.242", "loss_scale": "2", "train_wall": "60", "gb_free": "25.1", "wall": "233739"} +[2022-08-02 03:46:30,501][train_inner][INFO] - {"epoch": 16, "update": 15.186, "loss": "2.009", "ppl": "4.03", "wps": "398501", "ups": "3.36", "wpb": "118491", "bsz": "256", "num_updates": "781600", "lr": "2.20606e-05", "gnorm": "2.205", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "233799"} +[2022-08-02 03:47:30,385][train_inner][INFO] - {"epoch": 16, "update": 15.19, "loss": "2.011", "ppl": "4.03", "wps": "396086", "ups": "3.34", "wpb": "118594", "bsz": "256", "num_updates": "781800", "lr": "2.20404e-05", "gnorm": "2.344", "loss_scale": "4", "train_wall": "60", "gb_free": "22.9", "wall": "233859"} +[2022-08-02 03:48:29,458][train_inner][INFO] - {"epoch": 16, "update": 15.194, "loss": "2.007", "ppl": "4.02", "wps": "401948", "ups": "3.39", "wpb": "118721", "bsz": "256", "num_updates": "782000", "lr": "2.20202e-05", "gnorm": "2.226", "loss_scale": "4", "train_wall": "59", "gb_free": "22.2", "wall": "233918"} +[2022-08-02 03:49:28,567][train_inner][INFO] - {"epoch": 16, "update": 15.198, "loss": "2.007", "ppl": "4.02", "wps": "399359", "ups": "3.38", "wpb": "118029", "bsz": "256", "num_updates": "782200", "lr": "2.2e-05", "gnorm": "2.368", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "233977"} +[2022-08-02 03:50:27,754][train_inner][INFO] - {"epoch": 16, "update": 15.202, "loss": "2.013", "ppl": "4.04", "wps": "398899", "ups": "3.38", "wpb": "118047", "bsz": "256", "num_updates": "782400", "lr": "2.19798e-05", "gnorm": "2.186", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "234036"} +[2022-08-02 03:50:51,967][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 03:51:27,566][train_inner][INFO] - {"epoch": 16, "update": 15.206, "loss": "2.019", "ppl": "4.05", "wps": "395468", "ups": "3.34", "wpb": "118267", "bsz": "256", "num_updates": "782600", "lr": "2.19596e-05", "gnorm": "2.53", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "234096"} +[2022-08-02 03:52:26,483][train_inner][INFO] - {"epoch": 16, "update": 15.21, "loss": "2.013", "ppl": "4.04", "wps": "399520", "ups": "3.39", "wpb": "117692", "bsz": "256", "num_updates": "782800", "lr": "2.19394e-05", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "234155"} +[2022-08-02 03:53:25,969][train_inner][INFO] - {"epoch": 16, "update": 15.213, "loss": "2.015", "ppl": "4.04", "wps": "394486", "ups": "3.36", "wpb": "117332", "bsz": "256", "num_updates": "783000", "lr": "2.19192e-05", "gnorm": "2.204", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "234214"} +[2022-08-02 03:54:25,455][train_inner][INFO] - {"epoch": 16, "update": 15.217, "loss": "2.015", "ppl": "4.04", "wps": "397476", "ups": "3.36", "wpb": "118220", "bsz": "256", "num_updates": "783200", "lr": "2.1899e-05", "gnorm": "2.327", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "234274"} +[2022-08-02 03:55:24,741][train_inner][INFO] - {"epoch": 16, "update": 15.221, "loss": "2.009", "ppl": "4.02", "wps": "398944", "ups": "3.37", "wpb": "118259", "bsz": "256", "num_updates": "783400", "lr": "2.18788e-05", "gnorm": "2.524", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "234333"} +[2022-08-02 03:56:24,374][train_inner][INFO] - {"epoch": 16, "update": 15.225, "loss": "2.012", "ppl": "4.03", "wps": "395888", "ups": "3.35", "wpb": "118039", "bsz": "256", "num_updates": "783600", "lr": "2.18586e-05", "gnorm": "2.351", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "234393"} +[2022-08-02 03:57:23,999][train_inner][INFO] - {"epoch": 16, "update": 15.229, "loss": "2.012", "ppl": "4.03", "wps": "398287", "ups": "3.35", "wpb": "118739", "bsz": "256", "num_updates": "783800", "lr": "2.18384e-05", "gnorm": "2.337", "loss_scale": "2", "train_wall": "59", "gb_free": "26.9", "wall": "234452"} +[2022-08-02 03:58:22,982][train_inner][INFO] - {"epoch": 16, "update": 15.233, "loss": "2.012", "ppl": "4.03", "wps": "400274", "ups": "3.39", "wpb": "118046", "bsz": "256", "num_updates": "784000", "lr": "2.18182e-05", "gnorm": "2.335", "loss_scale": "2", "train_wall": "59", "gb_free": "29", "wall": "234511"} +[2022-08-02 03:59:22,509][train_inner][INFO] - {"epoch": 16, "update": 15.237, "loss": "2.006", "ppl": "4.02", "wps": "397404", "ups": "3.36", "wpb": "118281", "bsz": "256", "num_updates": "784200", "lr": "2.1798e-05", "gnorm": "2.303", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "234571"} +[2022-08-02 04:00:22,132][train_inner][INFO] - {"epoch": 16, "update": 15.241, "loss": "2.01", "ppl": "4.03", "wps": "396164", "ups": "3.35", "wpb": "118100", "bsz": "256", "num_updates": "784400", "lr": "2.17778e-05", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "234631"} +[2022-08-02 04:01:04,130][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 04:01:21,902][train_inner][INFO] - {"epoch": 16, "update": 15.245, "loss": "2.013", "ppl": "4.04", "wps": "396250", "ups": "3.35", "wpb": "118418", "bsz": "256", "num_updates": "784600", "lr": "2.17576e-05", "gnorm": "2.293", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "234690"} +[2022-08-02 04:02:21,481][train_inner][INFO] - {"epoch": 16, "update": 15.248, "loss": "2.014", "ppl": "4.04", "wps": "396210", "ups": "3.36", "wpb": "118029", "bsz": "256", "num_updates": "784800", "lr": "2.17374e-05", "gnorm": "2.386", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "234750"} +[2022-08-02 04:03:20,871][train_inner][INFO] - {"epoch": 16, "update": 15.252, "loss": "2.012", "ppl": "4.03", "wps": "399380", "ups": "3.37", "wpb": "118595", "bsz": "256", "num_updates": "785000", "lr": "2.17172e-05", "gnorm": "2.416", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "234809"} +[2022-08-02 04:04:20,775][train_inner][INFO] - {"epoch": 16, "update": 15.256, "loss": "2.012", "ppl": "4.03", "wps": "395094", "ups": "3.34", "wpb": "118337", "bsz": "256", "num_updates": "785200", "lr": "2.1697e-05", "gnorm": "2.384", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "234869"} +[2022-08-02 04:05:20,103][train_inner][INFO] - {"epoch": 16, "update": 15.26, "loss": "2.008", "ppl": "4.02", "wps": "400360", "ups": "3.37", "wpb": "118763", "bsz": "256", "num_updates": "785400", "lr": "2.16768e-05", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "234929"} +[2022-08-02 04:06:19,569][train_inner][INFO] - {"epoch": 16, "update": 15.264, "loss": "2.019", "ppl": "4.05", "wps": "396090", "ups": "3.36", "wpb": "117768", "bsz": "256", "num_updates": "785600", "lr": "2.16566e-05", "gnorm": "2.367", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "234988"} +[2022-08-02 04:07:18,840][train_inner][INFO] - {"epoch": 16, "update": 15.268, "loss": "2.012", "ppl": "4.03", "wps": "400364", "ups": "3.37", "wpb": "118650", "bsz": "256", "num_updates": "785800", "lr": "2.16364e-05", "gnorm": "2.407", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "235047"} +[2022-08-02 04:08:18,102][train_inner][INFO] - {"epoch": 16, "update": 15.272, "loss": "2.008", "ppl": "4.02", "wps": "399836", "ups": "3.37", "wpb": "118474", "bsz": "256", "num_updates": "786000", "lr": "2.16162e-05", "gnorm": "2.213", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "235107"} +[2022-08-02 04:09:17,327][train_inner][INFO] - {"epoch": 16, "update": 15.276, "loss": "2.008", "ppl": "4.02", "wps": "400886", "ups": "3.38", "wpb": "118713", "bsz": "256", "num_updates": "786200", "lr": "2.1596e-05", "gnorm": "2.24", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "235166"} +[2022-08-02 04:10:18,079][train_inner][INFO] - {"epoch": 16, "update": 15.28, "loss": "2.004", "ppl": "4.01", "wps": "388897", "ups": "3.29", "wpb": "118131", "bsz": "256", "num_updates": "786400", "lr": "2.15758e-05", "gnorm": "2.212", "loss_scale": "2", "train_wall": "60", "gb_free": "22.9", "wall": "235227"} +[2022-08-02 04:11:17,707][train_inner][INFO] - {"epoch": 16, "update": 15.283, "loss": "2.011", "ppl": "4.03", "wps": "397480", "ups": "3.35", "wpb": "118503", "bsz": "256", "num_updates": "786600", "lr": "2.15556e-05", "gnorm": "2.401", "loss_scale": "4", "train_wall": "59", "gb_free": "23.8", "wall": "235286"} +[2022-08-02 04:12:16,978][train_inner][INFO] - {"epoch": 16, "update": 15.287, "loss": "2.01", "ppl": "4.03", "wps": "398370", "ups": "3.37", "wpb": "118058", "bsz": "256", "num_updates": "786800", "lr": "2.15354e-05", "gnorm": "2.325", "loss_scale": "4", "train_wall": "59", "gb_free": "31", "wall": "235345"} +[2022-08-02 04:13:16,577][train_inner][INFO] - {"epoch": 16, "update": 15.291, "loss": "2.016", "ppl": "4.05", "wps": "395278", "ups": "3.36", "wpb": "117789", "bsz": "256", "num_updates": "787000", "lr": "2.15152e-05", "gnorm": "2.34", "loss_scale": "4", "train_wall": "59", "gb_free": "21.5", "wall": "235405"} +[2022-08-02 04:14:16,207][train_inner][INFO] - {"epoch": 16, "update": 15.295, "loss": "2.01", "ppl": "4.03", "wps": "399365", "ups": "3.35", "wpb": "119070", "bsz": "256", "num_updates": "787200", "lr": "2.14949e-05", "gnorm": "2.428", "loss_scale": "4", "train_wall": "59", "gb_free": "25.8", "wall": "235465"} +[2022-08-02 04:15:15,862][train_inner][INFO] - {"epoch": 16, "update": 15.299, "loss": "2.015", "ppl": "4.04", "wps": "396421", "ups": "3.35", "wpb": "118242", "bsz": "256", "num_updates": "787400", "lr": "2.14747e-05", "gnorm": "2.372", "loss_scale": "4", "train_wall": "59", "gb_free": "25.2", "wall": "235524"} +[2022-08-02 04:16:15,366][train_inner][INFO] - {"epoch": 16, "update": 15.303, "loss": "2.007", "ppl": "4.02", "wps": "396974", "ups": "3.36", "wpb": "118106", "bsz": "256", "num_updates": "787600", "lr": "2.14545e-05", "gnorm": "2.372", "loss_scale": "4", "train_wall": "59", "gb_free": "21.9", "wall": "235584"} +[2022-08-02 04:16:56,603][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 04:17:15,307][train_inner][INFO] - {"epoch": 16, "update": 15.307, "loss": "2.007", "ppl": "4.02", "wps": "397071", "ups": "3.34", "wpb": "119004", "bsz": "256", "num_updates": "787800", "lr": "2.14343e-05", "gnorm": "2.365", "loss_scale": "2", "train_wall": "60", "gb_free": "23", "wall": "235644"} +[2022-08-02 04:18:14,692][train_inner][INFO] - {"epoch": 16, "update": 15.311, "loss": "2.009", "ppl": "4.02", "wps": "397948", "ups": "3.37", "wpb": "118160", "bsz": "256", "num_updates": "788000", "lr": "2.14141e-05", "gnorm": "2.236", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "235703"} +[2022-08-02 04:19:14,494][train_inner][INFO] - {"epoch": 16, "update": 15.315, "loss": "2.008", "ppl": "4.02", "wps": "397268", "ups": "3.34", "wpb": "118785", "bsz": "256", "num_updates": "788200", "lr": "2.13939e-05", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "235763"} +[2022-08-02 04:20:14,071][train_inner][INFO] - {"epoch": 16, "update": 15.318, "loss": "2.014", "ppl": "4.04", "wps": "397420", "ups": "3.36", "wpb": "118384", "bsz": "256", "num_updates": "788400", "lr": "2.13737e-05", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "235823"} +[2022-08-02 04:21:13,670][train_inner][INFO] - {"epoch": 16, "update": 15.322, "loss": "2.012", "ppl": "4.03", "wps": "396478", "ups": "3.36", "wpb": "118148", "bsz": "256", "num_updates": "788600", "lr": "2.13535e-05", "gnorm": "2.313", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "235882"} +[2022-08-02 04:22:13,240][train_inner][INFO] - {"epoch": 16, "update": 15.326, "loss": "2.009", "ppl": "4.03", "wps": "397360", "ups": "3.36", "wpb": "118354", "bsz": "256", "num_updates": "788800", "lr": "2.13333e-05", "gnorm": "2.354", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "235942"} +[2022-08-02 04:23:12,603][train_inner][INFO] - {"epoch": 16, "update": 15.33, "loss": "2.013", "ppl": "4.04", "wps": "398344", "ups": "3.37", "wpb": "118233", "bsz": "256", "num_updates": "789000", "lr": "2.13131e-05", "gnorm": "2.241", "loss_scale": "2", "train_wall": "59", "gb_free": "26.2", "wall": "236001"} +[2022-08-02 04:24:11,815][train_inner][INFO] - {"epoch": 16, "update": 15.334, "loss": "2.016", "ppl": "4.04", "wps": "398333", "ups": "3.38", "wpb": "117930", "bsz": "256", "num_updates": "789200", "lr": "2.12929e-05", "gnorm": "2.386", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "236060"} +[2022-08-02 04:25:11,484][train_inner][INFO] - {"epoch": 16, "update": 15.338, "loss": "2.007", "ppl": "4.02", "wps": "396982", "ups": "3.35", "wpb": "118436", "bsz": "256", "num_updates": "789400", "lr": "2.12727e-05", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "236120"} +[2022-08-02 04:26:10,712][train_inner][INFO] - {"epoch": 16, "update": 15.342, "loss": "2.006", "ppl": "4.02", "wps": "400632", "ups": "3.38", "wpb": "118641", "bsz": "256", "num_updates": "789600", "lr": "2.12525e-05", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "27.8", "wall": "236179"} +[2022-08-02 04:27:10,212][train_inner][INFO] - {"epoch": 16, "update": 15.346, "loss": "2.01", "ppl": "4.03", "wps": "398489", "ups": "3.36", "wpb": "118549", "bsz": "256", "num_updates": "789800", "lr": "2.12323e-05", "gnorm": "2.489", "loss_scale": "4", "train_wall": "59", "gb_free": "28.8", "wall": "236239"} +[2022-08-02 04:28:09,517][train_inner][INFO] - {"epoch": 16, "update": 15.349, "loss": "2.015", "ppl": "4.04", "wps": "398168", "ups": "3.37", "wpb": "118067", "bsz": "256", "num_updates": "790000", "lr": "2.12121e-05", "gnorm": "2.551", "loss_scale": "4", "train_wall": "59", "gb_free": "23.6", "wall": "236298"} +[2022-08-02 04:28:47,269][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 04:29:09,341][train_inner][INFO] - {"epoch": 16, "update": 15.353, "loss": "2.006", "ppl": "4.02", "wps": "397049", "ups": "3.34", "wpb": "118764", "bsz": "256", "num_updates": "790200", "lr": "2.11919e-05", "gnorm": "2.513", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "236358"} +[2022-08-02 04:30:08,992][train_inner][INFO] - {"epoch": 16, "update": 15.357, "loss": "2.009", "ppl": "4.02", "wps": "398011", "ups": "3.35", "wpb": "118707", "bsz": "256", "num_updates": "790400", "lr": "2.11717e-05", "gnorm": "2.316", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "236417"} +[2022-08-02 04:31:08,532][train_inner][INFO] - {"epoch": 16, "update": 15.361, "loss": "2.006", "ppl": "4.02", "wps": "399426", "ups": "3.36", "wpb": "118908", "bsz": "256", "num_updates": "790600", "lr": "2.11515e-05", "gnorm": "2.38", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "236477"} +[2022-08-02 04:32:08,104][train_inner][INFO] - {"epoch": 16, "update": 15.365, "loss": "2.007", "ppl": "4.02", "wps": "397085", "ups": "3.36", "wpb": "118274", "bsz": "256", "num_updates": "790800", "lr": "2.11313e-05", "gnorm": "2.422", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "236537"} +[2022-08-02 04:33:07,569][train_inner][INFO] - {"epoch": 16, "update": 15.369, "loss": "2.008", "ppl": "4.02", "wps": "396394", "ups": "3.36", "wpb": "117859", "bsz": "256", "num_updates": "791000", "lr": "2.11111e-05", "gnorm": "2.259", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "236596"} +[2022-08-02 04:34:07,227][train_inner][INFO] - {"epoch": 16, "update": 15.373, "loss": "2.006", "ppl": "4.02", "wps": "398951", "ups": "3.35", "wpb": "119001", "bsz": "256", "num_updates": "791200", "lr": "2.10909e-05", "gnorm": "2.453", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "236656"} +[2022-08-02 04:35:06,736][train_inner][INFO] - {"epoch": 16, "update": 15.377, "loss": "2.012", "ppl": "4.03", "wps": "398241", "ups": "3.36", "wpb": "118493", "bsz": "256", "num_updates": "791400", "lr": "2.10707e-05", "gnorm": "2.457", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "236715"} +[2022-08-02 04:36:06,529][train_inner][INFO] - {"epoch": 16, "update": 15.381, "loss": "2.008", "ppl": "4.02", "wps": "396392", "ups": "3.34", "wpb": "118508", "bsz": "256", "num_updates": "791600", "lr": "2.10505e-05", "gnorm": "2.276", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "236775"} +[2022-08-02 04:37:05,965][train_inner][INFO] - {"epoch": 16, "update": 15.384, "loss": "2.009", "ppl": "4.03", "wps": "398534", "ups": "3.37", "wpb": "118434", "bsz": "256", "num_updates": "791800", "lr": "2.10303e-05", "gnorm": "2.279", "loss_scale": "2", "train_wall": "59", "gb_free": "25.5", "wall": "236834"} +[2022-08-02 04:38:05,442][train_inner][INFO] - {"epoch": 16, "update": 15.388, "loss": "2.007", "ppl": "4.02", "wps": "398025", "ups": "3.36", "wpb": "118366", "bsz": "256", "num_updates": "792000", "lr": "2.10101e-05", "gnorm": "2.265", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "236894"} +[2022-08-02 04:38:59,258][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 04:39:04,929][train_inner][INFO] - {"epoch": 16, "update": 15.392, "loss": "2.012", "ppl": "4.03", "wps": "396584", "ups": "3.36", "wpb": "117958", "bsz": "256", "num_updates": "792200", "lr": "2.09899e-05", "gnorm": "2.25", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "236953"} +[2022-08-02 04:40:04,573][train_inner][INFO] - {"epoch": 16, "update": 15.396, "loss": "2.005", "ppl": "4.01", "wps": "396959", "ups": "3.35", "wpb": "118380", "bsz": "256", "num_updates": "792400", "lr": "2.09697e-05", "gnorm": "2.388", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "237013"} +[2022-08-02 04:41:04,331][train_inner][INFO] - {"epoch": 16, "update": 15.4, "loss": "2.012", "ppl": "4.03", "wps": "395915", "ups": "3.35", "wpb": "118295", "bsz": "256", "num_updates": "792600", "lr": "2.09495e-05", "gnorm": "2.31", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "237073"} +[2022-08-02 04:42:03,843][train_inner][INFO] - {"epoch": 16, "update": 15.404, "loss": "2.008", "ppl": "4.02", "wps": "399098", "ups": "3.36", "wpb": "118755", "bsz": "256", "num_updates": "792800", "lr": "2.09293e-05", "gnorm": "2.221", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "237132"} +[2022-08-02 04:43:03,554][train_inner][INFO] - {"epoch": 16, "update": 15.408, "loss": "2.004", "ppl": "4.01", "wps": "395636", "ups": "3.35", "wpb": "118118", "bsz": "256", "num_updates": "793000", "lr": "2.09091e-05", "gnorm": "2.24", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "237192"} +[2022-08-02 04:44:02,952][train_inner][INFO] - {"epoch": 16, "update": 15.412, "loss": "2.01", "ppl": "4.03", "wps": "397341", "ups": "3.37", "wpb": "118004", "bsz": "256", "num_updates": "793200", "lr": "2.08889e-05", "gnorm": "2.381", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "237251"} +[2022-08-02 04:45:02,307][train_inner][INFO] - {"epoch": 16, "update": 15.416, "loss": "2.005", "ppl": "4.01", "wps": "401099", "ups": "3.37", "wpb": "119036", "bsz": "256", "num_updates": "793400", "lr": "2.08687e-05", "gnorm": "2.293", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "237311"} +[2022-08-02 04:46:01,580][train_inner][INFO] - {"epoch": 16, "update": 15.419, "loss": "2.008", "ppl": "4.02", "wps": "398578", "ups": "3.37", "wpb": "118123", "bsz": "256", "num_updates": "793600", "lr": "2.08485e-05", "gnorm": "2.261", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "237370"} +[2022-08-02 04:47:00,990][train_inner][INFO] - {"epoch": 16, "update": 15.423, "loss": "2.01", "ppl": "4.03", "wps": "398169", "ups": "3.37", "wpb": "118276", "bsz": "256", "num_updates": "793800", "lr": "2.08283e-05", "gnorm": "2.237", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "237429"} +[2022-08-02 04:48:00,727][train_inner][INFO] - {"epoch": 16, "update": 15.427, "loss": "2.008", "ppl": "4.02", "wps": "396965", "ups": "3.35", "wpb": "118566", "bsz": "256", "num_updates": "794000", "lr": "2.08081e-05", "gnorm": "2.458", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "237489"} +[2022-08-02 04:49:00,199][train_inner][INFO] - {"epoch": 16, "update": 15.431, "loss": "2.02", "ppl": "4.06", "wps": "397257", "ups": "3.36", "wpb": "118127", "bsz": "256", "num_updates": "794200", "lr": "2.07879e-05", "gnorm": "2.442", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "237549"} +[2022-08-02 04:49:59,839][train_inner][INFO] - {"epoch": 16, "update": 15.435, "loss": "2.01", "ppl": "4.03", "wps": "397056", "ups": "3.35", "wpb": "118403", "bsz": "256", "num_updates": "794400", "lr": "2.07677e-05", "gnorm": "2.413", "loss_scale": "4", "train_wall": "59", "gb_free": "27.9", "wall": "237608"} +[2022-08-02 04:50:57,414][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 04:50:59,784][train_inner][INFO] - {"epoch": 16, "update": 15.439, "loss": "2.006", "ppl": "4.02", "wps": "395433", "ups": "3.34", "wpb": "118520", "bsz": "256", "num_updates": "794600", "lr": "2.07475e-05", "gnorm": "2.349", "loss_scale": "2", "train_wall": "60", "gb_free": "22.2", "wall": "237668"} +[2022-08-02 04:51:59,209][train_inner][INFO] - {"epoch": 16, "update": 15.443, "loss": "2.015", "ppl": "4.04", "wps": "397335", "ups": "3.37", "wpb": "118057", "bsz": "256", "num_updates": "794800", "lr": "2.07273e-05", "gnorm": "2.388", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "237728"} +[2022-08-02 04:52:58,866][train_inner][INFO] - {"epoch": 16, "update": 15.447, "loss": "2.005", "ppl": "4.01", "wps": "398436", "ups": "3.35", "wpb": "118847", "bsz": "256", "num_updates": "795000", "lr": "2.07071e-05", "gnorm": "2.516", "loss_scale": "2", "train_wall": "59", "gb_free": "30.8", "wall": "237787"} +[2022-08-02 04:53:58,305][train_inner][INFO] - {"epoch": 16, "update": 15.451, "loss": "2.002", "ppl": "4", "wps": "399410", "ups": "3.36", "wpb": "118702", "bsz": "256", "num_updates": "795200", "lr": "2.06869e-05", "gnorm": "2.366", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "237847"} +[2022-08-02 04:54:57,487][train_inner][INFO] - {"epoch": 16, "update": 15.454, "loss": "2.008", "ppl": "4.02", "wps": "400468", "ups": "3.38", "wpb": "118503", "bsz": "256", "num_updates": "795400", "lr": "2.06667e-05", "gnorm": "2.531", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "237906"} +[2022-08-02 04:55:57,047][train_inner][INFO] - {"epoch": 16, "update": 15.458, "loss": "2.013", "ppl": "4.04", "wps": "398089", "ups": "3.36", "wpb": "118549", "bsz": "256", "num_updates": "795600", "lr": "2.06465e-05", "gnorm": "2.387", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "237965"} +[2022-08-02 04:56:56,608][train_inner][INFO] - {"epoch": 16, "update": 15.462, "loss": "2.006", "ppl": "4.02", "wps": "399190", "ups": "3.36", "wpb": "118880", "bsz": "256", "num_updates": "795800", "lr": "2.06263e-05", "gnorm": "2.57", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "238025"} +[2022-08-02 04:57:56,000][train_inner][INFO] - {"epoch": 16, "update": 15.466, "loss": "2.006", "ppl": "4.02", "wps": "397496", "ups": "3.37", "wpb": "118039", "bsz": "256", "num_updates": "796000", "lr": "2.06061e-05", "gnorm": "2.304", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "238084"} +[2022-08-02 04:58:56,575][train_inner][INFO] - {"epoch": 16, "update": 15.47, "loss": "2.01", "ppl": "4.03", "wps": "392136", "ups": "3.3", "wpb": "118768", "bsz": "256", "num_updates": "796200", "lr": "2.05859e-05", "gnorm": "2.538", "loss_scale": "2", "train_wall": "60", "gb_free": "26.7", "wall": "238145"} +[2022-08-02 04:59:55,715][train_inner][INFO] - {"epoch": 16, "update": 15.474, "loss": "2.011", "ppl": "4.03", "wps": "399549", "ups": "3.38", "wpb": "118145", "bsz": "256", "num_updates": "796400", "lr": "2.05657e-05", "gnorm": "2.404", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "238204"} +[2022-08-02 05:00:55,283][train_inner][INFO] - {"epoch": 16, "update": 15.478, "loss": "2.005", "ppl": "4.01", "wps": "397491", "ups": "3.36", "wpb": "118388", "bsz": "256", "num_updates": "796600", "lr": "2.05455e-05", "gnorm": "2.266", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "238264"} +[2022-08-02 05:01:55,057][train_inner][INFO] - {"epoch": 16, "update": 15.482, "loss": "2.004", "ppl": "4.01", "wps": "398737", "ups": "3.35", "wpb": "119170", "bsz": "256", "num_updates": "796800", "lr": "2.05253e-05", "gnorm": "2.317", "loss_scale": "4", "train_wall": "59", "gb_free": "25.3", "wall": "238323"} +[2022-08-02 05:02:17,022][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 05:02:55,327][train_inner][INFO] - {"epoch": 16, "update": 15.486, "loss": "2.005", "ppl": "4.01", "wps": "392154", "ups": "3.32", "wpb": "118175", "bsz": "256", "num_updates": "797000", "lr": "2.05051e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "238384"} +[2022-08-02 05:03:55,181][train_inner][INFO] - {"epoch": 16, "update": 15.489, "loss": "2.007", "ppl": "4.02", "wps": "395422", "ups": "3.34", "wpb": "118338", "bsz": "256", "num_updates": "797200", "lr": "2.04848e-05", "gnorm": "2.283", "loss_scale": "2", "train_wall": "60", "gb_free": "22", "wall": "238444"} +[2022-08-02 05:04:54,926][train_inner][INFO] - {"epoch": 16, "update": 15.493, "loss": "2.006", "ppl": "4.02", "wps": "396732", "ups": "3.35", "wpb": "118514", "bsz": "256", "num_updates": "797400", "lr": "2.04646e-05", "gnorm": "2.416", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "238503"} +[2022-08-02 05:05:54,499][train_inner][INFO] - {"epoch": 16, "update": 15.497, "loss": "2", "ppl": "4", "wps": "398317", "ups": "3.36", "wpb": "118643", "bsz": "256", "num_updates": "797600", "lr": "2.04444e-05", "gnorm": "2.351", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "238563"} +[2022-08-02 05:06:54,165][train_inner][INFO] - {"epoch": 16, "update": 15.501, "loss": "2.011", "ppl": "4.03", "wps": "397629", "ups": "3.35", "wpb": "118624", "bsz": "256", "num_updates": "797800", "lr": "2.04242e-05", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "238623"} +[2022-08-02 05:07:53,886][train_inner][INFO] - {"epoch": 16, "update": 15.505, "loss": "2.005", "ppl": "4.02", "wps": "394594", "ups": "3.35", "wpb": "117827", "bsz": "256", "num_updates": "798000", "lr": "2.0404e-05", "gnorm": "2.457", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "238682"} +[2022-08-02 05:08:53,435][train_inner][INFO] - {"epoch": 16, "update": 15.509, "loss": "2.007", "ppl": "4.02", "wps": "398790", "ups": "3.36", "wpb": "118738", "bsz": "256", "num_updates": "798200", "lr": "2.03838e-05", "gnorm": "2.394", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "238742"} +[2022-08-02 05:09:53,160][train_inner][INFO] - {"epoch": 16, "update": 15.513, "loss": "2.013", "ppl": "4.04", "wps": "397524", "ups": "3.35", "wpb": "118709", "bsz": "256", "num_updates": "798400", "lr": "2.03636e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "238802"} +[2022-08-02 05:10:52,420][train_inner][INFO] - {"epoch": 16, "update": 15.517, "loss": "2.006", "ppl": "4.02", "wps": "397898", "ups": "3.37", "wpb": "117896", "bsz": "256", "num_updates": "798600", "lr": "2.03434e-05", "gnorm": "2.461", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "238861"} +[2022-08-02 05:11:51,652][train_inner][INFO] - {"epoch": 16, "update": 15.52, "loss": "2.007", "ppl": "4.02", "wps": "397538", "ups": "3.38", "wpb": "117735", "bsz": "256", "num_updates": "798800", "lr": "2.03232e-05", "gnorm": "2.291", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "238920"} +[2022-08-02 05:12:39,976][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 05:12:51,103][train_inner][INFO] - {"epoch": 16, "update": 15.524, "loss": "2.004", "ppl": "4.01", "wps": "396928", "ups": "3.36", "wpb": "117989", "bsz": "256", "num_updates": "799000", "lr": "2.0303e-05", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "27.3", "wall": "238980"} +[2022-08-02 05:13:50,483][train_inner][INFO] - {"epoch": 16, "update": 15.528, "loss": "2.009", "ppl": "4.03", "wps": "400059", "ups": "3.37", "wpb": "118775", "bsz": "256", "num_updates": "799200", "lr": "2.02828e-05", "gnorm": "2.309", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "239039"} +[2022-08-02 05:14:50,061][train_inner][INFO] - {"epoch": 16, "update": 15.532, "loss": "2.005", "ppl": "4.01", "wps": "396539", "ups": "3.36", "wpb": "118125", "bsz": "256", "num_updates": "799400", "lr": "2.02626e-05", "gnorm": "2.654", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "239098"} +[2022-08-02 05:15:49,320][train_inner][INFO] - {"epoch": 16, "update": 15.536, "loss": "2.011", "ppl": "4.03", "wps": "397794", "ups": "3.38", "wpb": "117864", "bsz": "256", "num_updates": "799600", "lr": "2.02424e-05", "gnorm": "2.249", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "239158"} +[2022-08-02 05:16:48,920][train_inner][INFO] - {"epoch": 16, "update": 15.54, "loss": "2.008", "ppl": "4.02", "wps": "394840", "ups": "3.36", "wpb": "117661", "bsz": "256", "num_updates": "799800", "lr": "2.02222e-05", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "239217"} +[2022-08-02 05:17:48,639][train_inner][INFO] - {"epoch": 16, "update": 15.544, "loss": "2", "ppl": "4", "wps": "396026", "ups": "3.35", "wpb": "118250", "bsz": "256", "num_updates": "800000", "lr": "2.0202e-05", "gnorm": "2.333", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "239277"} +[2022-08-02 05:17:48,639][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 05:18:11,559][valid][INFO] - {"epoch": 16, "valid_loss": "1.9", "valid_ppl": "3.73", "valid_wps": "1.57557e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "800000", "valid_best_loss": "1.9"} +[2022-08-02 05:18:11,562][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 16 @ 800000 updates +[2022-08-02 05:18:11,563][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_16_800000.pt +[2022-08-02 05:18:18,759][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_16_800000.pt +[2022-08-02 05:18:42,125][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_16_800000.pt (epoch 16 @ 800000 updates, score 1.9) (writing took 30.562791945412755 seconds) +[2022-08-02 05:19:41,464][train_inner][INFO] - {"epoch": 16, "update": 15.548, "loss": "2.014", "ppl": "4.04", "wps": "209182", "ups": "1.77", "wpb": "118005", "bsz": "256", "num_updates": "800200", "lr": "2.01818e-05", "gnorm": "2.345", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "239390"} +[2022-08-02 05:20:41,107][train_inner][INFO] - {"epoch": 16, "update": 15.552, "loss": "2.009", "ppl": "4.02", "wps": "397009", "ups": "3.35", "wpb": "118394", "bsz": "256", "num_updates": "800400", "lr": "2.01616e-05", "gnorm": "2.747", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "239450"} +[2022-08-02 05:21:40,561][train_inner][INFO] - {"epoch": 16, "update": 15.555, "loss": "2.007", "ppl": "4.02", "wps": "399283", "ups": "3.36", "wpb": "118694", "bsz": "256", "num_updates": "800600", "lr": "2.01414e-05", "gnorm": "2.536", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "239509"} +[2022-08-02 05:22:39,827][train_inner][INFO] - {"epoch": 16, "update": 15.559, "loss": "2.013", "ppl": "4.04", "wps": "398100", "ups": "3.37", "wpb": "117967", "bsz": "256", "num_updates": "800800", "lr": "2.01212e-05", "gnorm": "2.564", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "239568"} +[2022-08-02 05:23:39,331][train_inner][INFO] - {"epoch": 16, "update": 15.563, "loss": "2.012", "ppl": "4.03", "wps": "397420", "ups": "3.36", "wpb": "118241", "bsz": "256", "num_updates": "801000", "lr": "2.0101e-05", "gnorm": "2.555", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "239628"} +[2022-08-02 05:24:18,631][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 05:24:38,819][train_inner][INFO] - {"epoch": 16, "update": 15.567, "loss": "2.007", "ppl": "4.02", "wps": "395706", "ups": "3.36", "wpb": "117698", "bsz": "256", "num_updates": "801200", "lr": "2.00808e-05", "gnorm": "2.481", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "239687"} +[2022-08-02 05:25:38,708][train_inner][INFO] - {"epoch": 16, "update": 15.571, "loss": "2.008", "ppl": "4.02", "wps": "395078", "ups": "3.34", "wpb": "118303", "bsz": "256", "num_updates": "801400", "lr": "2.00606e-05", "gnorm": "2.57", "loss_scale": "2", "train_wall": "60", "gb_free": "24.8", "wall": "239747"} +[2022-08-02 05:26:38,365][train_inner][INFO] - {"epoch": 16, "update": 15.575, "loss": "2.011", "ppl": "4.03", "wps": "397756", "ups": "3.35", "wpb": "118645", "bsz": "256", "num_updates": "801600", "lr": "2.00404e-05", "gnorm": "2.527", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "239807"} +[2022-08-02 05:27:37,748][train_inner][INFO] - {"epoch": 16, "update": 15.579, "loss": "2.009", "ppl": "4.02", "wps": "399013", "ups": "3.37", "wpb": "118472", "bsz": "256", "num_updates": "801800", "lr": "2.00202e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "239866"} +[2022-08-02 05:28:37,064][train_inner][INFO] - {"epoch": 16, "update": 15.583, "loss": "2.011", "ppl": "4.03", "wps": "400334", "ups": "3.37", "wpb": "118731", "bsz": "256", "num_updates": "802000", "lr": "2e-05", "gnorm": "2.569", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "239925"} +[2022-08-02 05:29:36,671][train_inner][INFO] - {"epoch": 16, "update": 15.587, "loss": "2.011", "ppl": "4.03", "wps": "396226", "ups": "3.36", "wpb": "118089", "bsz": "256", "num_updates": "802200", "lr": "1.99798e-05", "gnorm": "2.49", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "239985"} +[2022-08-02 05:30:36,236][train_inner][INFO] - {"epoch": 16, "update": 15.59, "loss": "2.007", "ppl": "4.02", "wps": "396661", "ups": "3.36", "wpb": "118134", "bsz": "256", "num_updates": "802400", "lr": "1.99596e-05", "gnorm": "2.284", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "240045"} +[2022-08-02 05:31:35,873][train_inner][INFO] - {"epoch": 16, "update": 15.594, "loss": "2", "ppl": "4", "wps": "396020", "ups": "3.35", "wpb": "118086", "bsz": "256", "num_updates": "802600", "lr": "1.99394e-05", "gnorm": "2.386", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "240104"} +[2022-08-02 05:32:35,267][train_inner][INFO] - {"epoch": 16, "update": 15.598, "loss": "2.002", "ppl": "4.01", "wps": "399383", "ups": "3.37", "wpb": "118605", "bsz": "256", "num_updates": "802800", "lr": "1.99192e-05", "gnorm": "2.402", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "240164"} +[2022-08-02 05:33:35,059][train_inner][INFO] - {"epoch": 16, "update": 15.602, "loss": "2.013", "ppl": "4.04", "wps": "395802", "ups": "3.34", "wpb": "118328", "bsz": "256", "num_updates": "803000", "lr": "1.9899e-05", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "240223"} +[2022-08-02 05:34:34,710][train_inner][INFO] - {"epoch": 16, "update": 15.606, "loss": "2.005", "ppl": "4.01", "wps": "396759", "ups": "3.35", "wpb": "118333", "bsz": "256", "num_updates": "803200", "lr": "1.98788e-05", "gnorm": "2.311", "loss_scale": "4", "train_wall": "59", "gb_free": "21.6", "wall": "240283"} +[2022-08-02 05:35:17,684][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 05:35:34,431][train_inner][INFO] - {"epoch": 16, "update": 15.61, "loss": "2.007", "ppl": "4.02", "wps": "396302", "ups": "3.35", "wpb": "118337", "bsz": "256", "num_updates": "803400", "lr": "1.98586e-05", "gnorm": "2.324", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "240343"} +[2022-08-02 05:36:33,676][train_inner][INFO] - {"epoch": 16, "update": 15.614, "loss": "1.999", "ppl": "4", "wps": "398275", "ups": "3.38", "wpb": "117979", "bsz": "256", "num_updates": "803600", "lr": "1.98384e-05", "gnorm": "2.571", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "240402"} +[2022-08-02 05:37:32,879][train_inner][INFO] - {"epoch": 16, "update": 15.618, "loss": "2.003", "ppl": "4.01", "wps": "401763", "ups": "3.38", "wpb": "118927", "bsz": "256", "num_updates": "803800", "lr": "1.98182e-05", "gnorm": "2.219", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "240461"} +[2022-08-02 05:38:32,065][train_inner][INFO] - {"epoch": 16, "update": 15.622, "loss": "2.005", "ppl": "4.01", "wps": "397834", "ups": "3.38", "wpb": "117730", "bsz": "256", "num_updates": "804000", "lr": "1.9798e-05", "gnorm": "2.533", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "240520"} +[2022-08-02 05:39:31,625][train_inner][INFO] - {"epoch": 16, "update": 15.625, "loss": "2.005", "ppl": "4.01", "wps": "397821", "ups": "3.36", "wpb": "118471", "bsz": "256", "num_updates": "804200", "lr": "1.97778e-05", "gnorm": "2.384", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "240580"} +[2022-08-02 05:40:31,323][train_inner][INFO] - {"epoch": 16, "update": 15.629, "loss": "2.003", "ppl": "4.01", "wps": "397055", "ups": "3.35", "wpb": "118515", "bsz": "256", "num_updates": "804400", "lr": "1.97576e-05", "gnorm": "2.292", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "240640"} +[2022-08-02 05:41:30,562][train_inner][INFO] - {"epoch": 16, "update": 15.633, "loss": "2", "ppl": "4", "wps": "400482", "ups": "3.38", "wpb": "118620", "bsz": "256", "num_updates": "804600", "lr": "1.97374e-05", "gnorm": "2.436", "loss_scale": "2", "train_wall": "59", "gb_free": "26.2", "wall": "240699"} +[2022-08-02 05:42:30,359][train_inner][INFO] - {"epoch": 16, "update": 15.637, "loss": "2.007", "ppl": "4.02", "wps": "396878", "ups": "3.34", "wpb": "118660", "bsz": "255.9", "num_updates": "804800", "lr": "1.97172e-05", "gnorm": "2.409", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "240759"} +[2022-08-02 05:43:29,800][train_inner][INFO] - {"epoch": 16, "update": 15.641, "loss": "2.002", "ppl": "4.01", "wps": "398287", "ups": "3.36", "wpb": "118372", "bsz": "256", "num_updates": "805000", "lr": "1.9697e-05", "gnorm": "2.457", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "240818"} +[2022-08-02 05:44:29,187][train_inner][INFO] - {"epoch": 16, "update": 15.645, "loss": "2", "ppl": "4", "wps": "398082", "ups": "3.37", "wpb": "118203", "bsz": "256", "num_updates": "805200", "lr": "1.96768e-05", "gnorm": "2.433", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "240878"} +[2022-08-02 05:45:27,774][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 05:45:28,364][train_inner][INFO] - {"epoch": 16, "update": 15.649, "loss": "2.005", "ppl": "4.01", "wps": "400400", "ups": "3.38", "wpb": "118473", "bsz": "256", "num_updates": "805400", "lr": "1.96566e-05", "gnorm": "2.346", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "240937"} +[2022-08-02 05:46:27,628][train_inner][INFO] - {"epoch": 16, "update": 15.653, "loss": "2.013", "ppl": "4.03", "wps": "399912", "ups": "3.37", "wpb": "118501", "bsz": "256", "num_updates": "805600", "lr": "1.96364e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "240996"} +[2022-08-02 05:47:26,849][train_inner][INFO] - {"epoch": 16, "update": 15.657, "loss": "2.006", "ppl": "4.02", "wps": "397492", "ups": "3.38", "wpb": "117698", "bsz": "256", "num_updates": "805800", "lr": "1.96162e-05", "gnorm": "2.349", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "241055"} +[2022-08-02 05:48:26,230][train_inner][INFO] - {"epoch": 16, "update": 15.66, "loss": "2.008", "ppl": "4.02", "wps": "397855", "ups": "3.37", "wpb": "118124", "bsz": "256", "num_updates": "806000", "lr": "1.9596e-05", "gnorm": "2.268", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "241115"} +[2022-08-02 05:49:26,080][train_inner][INFO] - {"epoch": 16, "update": 15.664, "loss": "2.007", "ppl": "4.02", "wps": "395033", "ups": "3.34", "wpb": "118214", "bsz": "256", "num_updates": "806200", "lr": "1.95758e-05", "gnorm": "2.405", "loss_scale": "2", "train_wall": "60", "gb_free": "21.5", "wall": "241175"} +[2022-08-02 05:50:25,545][train_inner][INFO] - {"epoch": 16, "update": 15.668, "loss": "2.008", "ppl": "4.02", "wps": "397394", "ups": "3.36", "wpb": "118153", "bsz": "256", "num_updates": "806400", "lr": "1.95556e-05", "gnorm": "2.326", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "241234"} +[2022-08-02 05:51:25,295][train_inner][INFO] - {"epoch": 16, "update": 15.672, "loss": "2.001", "ppl": "4", "wps": "397456", "ups": "3.35", "wpb": "118739", "bsz": "255.9", "num_updates": "806600", "lr": "1.95354e-05", "gnorm": "2.411", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "241294"} +[2022-08-02 05:52:24,597][train_inner][INFO] - {"epoch": 16, "update": 15.676, "loss": "2.008", "ppl": "4.02", "wps": "399684", "ups": "3.37", "wpb": "118510", "bsz": "256", "num_updates": "806800", "lr": "1.95152e-05", "gnorm": "2.469", "loss_scale": "2", "train_wall": "59", "gb_free": "27.3", "wall": "241353"} +[2022-08-02 05:53:23,809][train_inner][INFO] - {"epoch": 16, "update": 15.68, "loss": "2.006", "ppl": "4.02", "wps": "398667", "ups": "3.38", "wpb": "118028", "bsz": "256", "num_updates": "807000", "lr": "1.94949e-05", "gnorm": "2.31", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "241412"} +[2022-08-02 05:54:23,170][train_inner][INFO] - {"epoch": 16, "update": 15.684, "loss": "2.006", "ppl": "4.02", "wps": "396172", "ups": "3.37", "wpb": "117585", "bsz": "256", "num_updates": "807200", "lr": "1.94747e-05", "gnorm": "2.579", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "241472"} +[2022-08-02 05:55:22,765][train_inner][INFO] - {"epoch": 16, "update": 15.688, "loss": "2.006", "ppl": "4.02", "wps": "398109", "ups": "3.36", "wpb": "118625", "bsz": "256", "num_updates": "807400", "lr": "1.94545e-05", "gnorm": "2.316", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "241531"} +[2022-08-02 05:55:39,231][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 05:56:22,760][train_inner][INFO] - {"epoch": 16, "update": 15.691, "loss": "1.998", "ppl": "3.99", "wps": "394692", "ups": "3.33", "wpb": "118399", "bsz": "256", "num_updates": "807600", "lr": "1.94343e-05", "gnorm": "2.298", "loss_scale": "2", "train_wall": "60", "gb_free": "26.4", "wall": "241591"} +[2022-08-02 05:57:22,029][train_inner][INFO] - {"epoch": 16, "update": 15.695, "loss": "2.011", "ppl": "4.03", "wps": "398293", "ups": "3.37", "wpb": "118031", "bsz": "256", "num_updates": "807800", "lr": "1.94141e-05", "gnorm": "2.3", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "241650"} +[2022-08-02 05:58:21,548][train_inner][INFO] - {"epoch": 16, "update": 15.699, "loss": "2.013", "ppl": "4.04", "wps": "395793", "ups": "3.36", "wpb": "117785", "bsz": "256", "num_updates": "808000", "lr": "1.93939e-05", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "241710"} +[2022-08-02 05:59:20,755][train_inner][INFO] - {"epoch": 16, "update": 15.703, "loss": "2.006", "ppl": "4.02", "wps": "400299", "ups": "3.38", "wpb": "118502", "bsz": "256", "num_updates": "808200", "lr": "1.93737e-05", "gnorm": "2.21", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "241769"} +[2022-08-02 06:00:20,461][train_inner][INFO] - {"epoch": 16, "update": 15.707, "loss": "2.002", "ppl": "4.01", "wps": "397194", "ups": "3.35", "wpb": "118573", "bsz": "256", "num_updates": "808400", "lr": "1.93535e-05", "gnorm": "2.391", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "241829"} +[2022-08-02 06:01:20,304][train_inner][INFO] - {"epoch": 16, "update": 15.711, "loss": "2.003", "ppl": "4.01", "wps": "396506", "ups": "3.34", "wpb": "118639", "bsz": "256", "num_updates": "808600", "lr": "1.93333e-05", "gnorm": "2.458", "loss_scale": "2", "train_wall": "60", "gb_free": "24.8", "wall": "241889"} +[2022-08-02 06:02:19,409][train_inner][INFO] - {"epoch": 16, "update": 15.715, "loss": "2", "ppl": "4", "wps": "401072", "ups": "3.38", "wpb": "118527", "bsz": "256", "num_updates": "808800", "lr": "1.93131e-05", "gnorm": "2.363", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "241948"} +[2022-08-02 06:03:18,564][train_inner][INFO] - {"epoch": 16, "update": 15.719, "loss": "2.008", "ppl": "4.02", "wps": "398966", "ups": "3.38", "wpb": "118002", "bsz": "256", "num_updates": "809000", "lr": "1.92929e-05", "gnorm": "2.435", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "242007"} +[2022-08-02 06:04:17,941][train_inner][INFO] - {"epoch": 16, "update": 15.723, "loss": "2.005", "ppl": "4.01", "wps": "398124", "ups": "3.37", "wpb": "118198", "bsz": "256", "num_updates": "809200", "lr": "1.92727e-05", "gnorm": "2.366", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "242066"} +[2022-08-02 06:05:17,404][train_inner][INFO] - {"epoch": 16, "update": 15.726, "loss": "2.003", "ppl": "4.01", "wps": "398615", "ups": "3.36", "wpb": "118513", "bsz": "256", "num_updates": "809400", "lr": "1.92525e-05", "gnorm": "2.639", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "242126"} +[2022-08-02 06:06:16,936][train_inner][INFO] - {"epoch": 16, "update": 15.73, "loss": "2", "ppl": "4", "wps": "398448", "ups": "3.36", "wpb": "118601", "bsz": "256", "num_updates": "809600", "lr": "1.92323e-05", "gnorm": "2.282", "loss_scale": "4", "train_wall": "59", "gb_free": "24.8", "wall": "242185"} +[2022-08-02 06:07:15,886][train_inner][INFO] - {"epoch": 16, "update": 15.734, "loss": "2.006", "ppl": "4.02", "wps": "401050", "ups": "3.39", "wpb": "118209", "bsz": "256", "num_updates": "809800", "lr": "1.92121e-05", "gnorm": "2.317", "loss_scale": "4", "train_wall": "59", "gb_free": "21.3", "wall": "242244"} +[2022-08-02 06:07:21,351][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 06:08:15,600][train_inner][INFO] - {"epoch": 16, "update": 15.738, "loss": "2.008", "ppl": "4.02", "wps": "396607", "ups": "3.35", "wpb": "118414", "bsz": "256", "num_updates": "810000", "lr": "1.91919e-05", "gnorm": "2.287", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "242304"} +[2022-08-02 06:09:16,017][train_inner][INFO] - {"epoch": 16, "update": 15.742, "loss": "2.01", "ppl": "4.03", "wps": "390277", "ups": "3.31", "wpb": "117897", "bsz": "256", "num_updates": "810200", "lr": "1.91717e-05", "gnorm": "2.455", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "242364"} +[2022-08-02 06:10:15,473][train_inner][INFO] - {"epoch": 16, "update": 15.746, "loss": "2.004", "ppl": "4.01", "wps": "399476", "ups": "3.36", "wpb": "118755", "bsz": "256", "num_updates": "810400", "lr": "1.91515e-05", "gnorm": "2.293", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "242424"} +[2022-08-02 06:11:15,030][train_inner][INFO] - {"epoch": 16, "update": 15.75, "loss": "1.998", "ppl": "3.99", "wps": "397386", "ups": "3.36", "wpb": "118335", "bsz": "256", "num_updates": "810600", "lr": "1.91313e-05", "gnorm": "2.335", "loss_scale": "2", "train_wall": "59", "gb_free": "30", "wall": "242483"} +[2022-08-02 06:12:14,478][train_inner][INFO] - {"epoch": 16, "update": 15.754, "loss": "2.002", "ppl": "4.01", "wps": "397262", "ups": "3.36", "wpb": "118081", "bsz": "256", "num_updates": "810800", "lr": "1.91111e-05", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "29.2", "wall": "242543"} +[2022-08-02 06:13:13,916][train_inner][INFO] - {"epoch": 16, "update": 15.758, "loss": "2.003", "ppl": "4.01", "wps": "397863", "ups": "3.36", "wpb": "118240", "bsz": "256", "num_updates": "811000", "lr": "1.90909e-05", "gnorm": "2.54", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "242602"} +[2022-08-02 06:14:13,320][train_inner][INFO] - {"epoch": 16, "update": 15.761, "loss": "2.011", "ppl": "4.03", "wps": "397398", "ups": "3.37", "wpb": "118033", "bsz": "256", "num_updates": "811200", "lr": "1.90707e-05", "gnorm": "2.732", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "242662"} +[2022-08-02 06:15:12,867][train_inner][INFO] - {"epoch": 16, "update": 15.765, "loss": "2.005", "ppl": "4.01", "wps": "396426", "ups": "3.36", "wpb": "118030", "bsz": "256", "num_updates": "811400", "lr": "1.90505e-05", "gnorm": "2.452", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "242721"} +[2022-08-02 06:16:12,252][train_inner][INFO] - {"epoch": 16, "update": 15.769, "loss": "2.005", "ppl": "4.01", "wps": "397536", "ups": "3.37", "wpb": "118038", "bsz": "256", "num_updates": "811600", "lr": "1.90303e-05", "gnorm": "2.767", "loss_scale": "2", "train_wall": "59", "gb_free": "27.9", "wall": "242781"} +[2022-08-02 06:17:11,652][train_inner][INFO] - {"epoch": 16, "update": 15.773, "loss": "2.011", "ppl": "4.03", "wps": "398009", "ups": "3.37", "wpb": "118208", "bsz": "256", "num_updates": "811800", "lr": "1.90101e-05", "gnorm": "2.537", "loss_scale": "2", "train_wall": "59", "gb_free": "29.4", "wall": "242840"} +[2022-08-02 06:18:06,231][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 06:18:11,202][train_inner][INFO] - {"epoch": 16, "update": 15.777, "loss": "2.001", "ppl": "4", "wps": "397889", "ups": "3.36", "wpb": "118471", "bsz": "256", "num_updates": "812000", "lr": "1.89899e-05", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "242900"} +[2022-08-02 06:19:10,753][train_inner][INFO] - {"epoch": 16, "update": 15.781, "loss": "2.009", "ppl": "4.03", "wps": "395894", "ups": "3.36", "wpb": "117878", "bsz": "256", "num_updates": "812200", "lr": "1.89697e-05", "gnorm": "2.551", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "242959"} +[2022-08-02 06:20:10,041][train_inner][INFO] - {"epoch": 16, "update": 15.785, "loss": "1.999", "ppl": "4", "wps": "399529", "ups": "3.37", "wpb": "118436", "bsz": "256", "num_updates": "812400", "lr": "1.89495e-05", "gnorm": "2.375", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "243018"} +[2022-08-02 06:21:09,350][train_inner][INFO] - {"epoch": 16, "update": 15.789, "loss": "2.004", "ppl": "4.01", "wps": "399295", "ups": "3.37", "wpb": "118407", "bsz": "256", "num_updates": "812600", "lr": "1.89293e-05", "gnorm": "2.263", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "243078"} +[2022-08-02 06:22:08,617][train_inner][INFO] - {"epoch": 16, "update": 15.793, "loss": "2.003", "ppl": "4.01", "wps": "400305", "ups": "3.37", "wpb": "118624", "bsz": "256", "num_updates": "812800", "lr": "1.89091e-05", "gnorm": "2.311", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "243137"} +[2022-08-02 06:23:08,209][train_inner][INFO] - {"epoch": 16, "update": 15.796, "loss": "2.008", "ppl": "4.02", "wps": "395173", "ups": "3.36", "wpb": "117745", "bsz": "256", "num_updates": "813000", "lr": "1.88889e-05", "gnorm": "2.409", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "243197"} +[2022-08-02 06:24:07,640][train_inner][INFO] - {"epoch": 16, "update": 15.8, "loss": "2.008", "ppl": "4.02", "wps": "396083", "ups": "3.37", "wpb": "117697", "bsz": "256", "num_updates": "813200", "lr": "1.88687e-05", "gnorm": "2.463", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "243256"} +[2022-08-02 06:25:07,171][train_inner][INFO] - {"epoch": 16, "update": 15.804, "loss": "2.001", "ppl": "4", "wps": "398444", "ups": "3.36", "wpb": "118598", "bsz": "256", "num_updates": "813400", "lr": "1.88485e-05", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "243316"} +[2022-08-02 06:26:06,884][train_inner][INFO] - {"epoch": 16, "update": 15.808, "loss": "2.002", "ppl": "4.01", "wps": "395736", "ups": "3.35", "wpb": "118153", "bsz": "256", "num_updates": "813600", "lr": "1.88283e-05", "gnorm": "2.52", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "243375"} +[2022-08-02 06:27:06,263][train_inner][INFO] - {"epoch": 16, "update": 15.812, "loss": "2.006", "ppl": "4.02", "wps": "396116", "ups": "3.37", "wpb": "117604", "bsz": "256", "num_updates": "813800", "lr": "1.88081e-05", "gnorm": "2.468", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "243435"} +[2022-08-02 06:28:05,997][train_inner][INFO] - {"epoch": 16, "update": 15.816, "loss": "2.002", "ppl": "4", "wps": "396675", "ups": "3.35", "wpb": "118474", "bsz": "256", "num_updates": "814000", "lr": "1.87879e-05", "gnorm": "2.27", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "243494"} +[2022-08-02 06:28:41,631][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 06:29:05,823][train_inner][INFO] - {"epoch": 16, "update": 15.82, "loss": "2.002", "ppl": "4", "wps": "395716", "ups": "3.34", "wpb": "118371", "bsz": "256", "num_updates": "814200", "lr": "1.87677e-05", "gnorm": "2.288", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "243554"} +[2022-08-02 06:30:05,373][train_inner][INFO] - {"epoch": 16, "update": 15.824, "loss": "2.002", "ppl": "4.01", "wps": "397507", "ups": "3.36", "wpb": "118358", "bsz": "256", "num_updates": "814400", "lr": "1.87475e-05", "gnorm": "2.332", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "243614"} +[2022-08-02 06:31:04,976][train_inner][INFO] - {"epoch": 16, "update": 15.827, "loss": "2.005", "ppl": "4.01", "wps": "397858", "ups": "3.36", "wpb": "118566", "bsz": "256", "num_updates": "814600", "lr": "1.87273e-05", "gnorm": "2.546", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "243673"} +[2022-08-02 06:32:04,020][train_inner][INFO] - {"epoch": 16, "update": 15.831, "loss": "2.01", "ppl": "4.03", "wps": "400393", "ups": "3.39", "wpb": "118204", "bsz": "256", "num_updates": "814800", "lr": "1.87071e-05", "gnorm": "2.582", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "243732"} +[2022-08-02 06:33:03,170][train_inner][INFO] - {"epoch": 16, "update": 15.835, "loss": "2.004", "ppl": "4.01", "wps": "400478", "ups": "3.38", "wpb": "118441", "bsz": "256", "num_updates": "815000", "lr": "1.86869e-05", "gnorm": "2.346", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "243792"} +[2022-08-02 06:34:02,429][train_inner][INFO] - {"epoch": 16, "update": 15.839, "loss": "2.003", "ppl": "4.01", "wps": "399405", "ups": "3.38", "wpb": "118340", "bsz": "256", "num_updates": "815200", "lr": "1.86667e-05", "gnorm": "2.377", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "243851"} +[2022-08-02 06:35:01,695][train_inner][INFO] - {"epoch": 16, "update": 15.843, "loss": "1.999", "ppl": "4", "wps": "400242", "ups": "3.37", "wpb": "118602", "bsz": "256", "num_updates": "815400", "lr": "1.86465e-05", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "27", "wall": "243910"} +[2022-08-02 06:36:02,419][train_inner][INFO] - {"epoch": 16, "update": 15.847, "loss": "2.001", "ppl": "4", "wps": "389197", "ups": "3.29", "wpb": "118168", "bsz": "256", "num_updates": "815600", "lr": "1.86263e-05", "gnorm": "2.439", "loss_scale": "2", "train_wall": "60", "gb_free": "22.5", "wall": "243971"} +[2022-08-02 06:37:01,989][train_inner][INFO] - {"epoch": 16, "update": 15.851, "loss": "2", "ppl": "4", "wps": "398176", "ups": "3.36", "wpb": "118595", "bsz": "256", "num_updates": "815800", "lr": "1.86061e-05", "gnorm": "2.355", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "244030"} +[2022-08-02 06:38:02,583][train_inner][INFO] - {"epoch": 16, "update": 15.855, "loss": "2", "ppl": "4", "wps": "389894", "ups": "3.3", "wpb": "118125", "bsz": "256", "num_updates": "816000", "lr": "1.85859e-05", "gnorm": "2.467", "loss_scale": "2", "train_wall": "60", "gb_free": "25.4", "wall": "244091"} +[2022-08-02 06:39:03,309][train_inner][INFO] - {"epoch": 16, "update": 15.859, "loss": "2", "ppl": "4", "wps": "391825", "ups": "3.29", "wpb": "118970", "bsz": "256", "num_updates": "816200", "lr": "1.85657e-05", "gnorm": "2.331", "loss_scale": "4", "train_wall": "60", "gb_free": "21.4", "wall": "244152"} +[2022-08-02 06:39:06,000][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 06:40:03,080][train_inner][INFO] - {"epoch": 16, "update": 15.862, "loss": "2.006", "ppl": "4.02", "wps": "395823", "ups": "3.35", "wpb": "118293", "bsz": "256", "num_updates": "816400", "lr": "1.85455e-05", "gnorm": "2.55", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "244212"} +[2022-08-02 06:41:02,798][train_inner][INFO] - {"epoch": 16, "update": 15.866, "loss": "1.996", "ppl": "3.99", "wps": "397545", "ups": "3.35", "wpb": "118701", "bsz": "256", "num_updates": "816600", "lr": "1.85253e-05", "gnorm": "2.518", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "244271"} +[2022-08-02 06:42:02,376][train_inner][INFO] - {"epoch": 16, "update": 15.87, "loss": "2.002", "ppl": "4.01", "wps": "398510", "ups": "3.36", "wpb": "118713", "bsz": "256", "num_updates": "816800", "lr": "1.85051e-05", "gnorm": "2.514", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "244331"} +[2022-08-02 06:43:01,905][train_inner][INFO] - {"epoch": 16, "update": 15.874, "loss": "2.004", "ppl": "4.01", "wps": "399506", "ups": "3.36", "wpb": "118910", "bsz": "256", "num_updates": "817000", "lr": "1.84848e-05", "gnorm": "2.364", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "244390"} +[2022-08-02 06:44:00,960][train_inner][INFO] - {"epoch": 16, "update": 15.878, "loss": "2.005", "ppl": "4.02", "wps": "400808", "ups": "3.39", "wpb": "118348", "bsz": "256", "num_updates": "817200", "lr": "1.84646e-05", "gnorm": "2.463", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "244449"} +[2022-08-02 06:45:00,057][train_inner][INFO] - {"epoch": 16, "update": 15.882, "loss": "2", "ppl": "4", "wps": "400554", "ups": "3.38", "wpb": "118357", "bsz": "256", "num_updates": "817400", "lr": "1.84444e-05", "gnorm": "2.448", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "244508"} +[2022-08-02 06:45:59,618][train_inner][INFO] - {"epoch": 16, "update": 15.886, "loss": "2.004", "ppl": "4.01", "wps": "396904", "ups": "3.36", "wpb": "118199", "bsz": "256", "num_updates": "817600", "lr": "1.84242e-05", "gnorm": "2.498", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "244568"} +[2022-08-02 06:46:58,983][train_inner][INFO] - {"epoch": 16, "update": 15.89, "loss": "2.003", "ppl": "4.01", "wps": "398334", "ups": "3.37", "wpb": "118235", "bsz": "256", "num_updates": "817800", "lr": "1.8404e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "244627"} +[2022-08-02 06:47:58,368][train_inner][INFO] - {"epoch": 16, "update": 15.894, "loss": "2.004", "ppl": "4.01", "wps": "398060", "ups": "3.37", "wpb": "118193", "bsz": "256", "num_updates": "818000", "lr": "1.83838e-05", "gnorm": "2.379", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "244687"} +[2022-08-02 06:48:57,496][train_inner][INFO] - {"epoch": 16, "update": 15.897, "loss": "2.001", "ppl": "4", "wps": "400287", "ups": "3.38", "wpb": "118340", "bsz": "256", "num_updates": "818200", "lr": "1.83636e-05", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "244746"} +[2022-08-02 06:49:15,288][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 06:49:57,218][train_inner][INFO] - {"epoch": 16, "update": 15.901, "loss": "1.999", "ppl": "4", "wps": "395815", "ups": "3.35", "wpb": "118193", "bsz": "256", "num_updates": "818400", "lr": "1.83434e-05", "gnorm": "2.366", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "244806"} +[2022-08-02 06:50:56,744][train_inner][INFO] - {"epoch": 16, "update": 15.905, "loss": "1.999", "ppl": "4", "wps": "397604", "ups": "3.36", "wpb": "118339", "bsz": "256", "num_updates": "818600", "lr": "1.83232e-05", "gnorm": "2.591", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "244865"} +[2022-08-02 06:51:57,196][train_inner][INFO] - {"epoch": 16, "update": 15.909, "loss": "1.999", "ppl": "4", "wps": "390193", "ups": "3.31", "wpb": "117938", "bsz": "256", "num_updates": "818800", "lr": "1.8303e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "60", "gb_free": "24.2", "wall": "244926"} +[2022-08-02 06:52:56,546][train_inner][INFO] - {"epoch": 16, "update": 15.913, "loss": "2.006", "ppl": "4.02", "wps": "398218", "ups": "3.37", "wpb": "118172", "bsz": "256", "num_updates": "819000", "lr": "1.82828e-05", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "244985"} +[2022-08-02 06:53:55,681][train_inner][INFO] - {"epoch": 16, "update": 15.917, "loss": "2.001", "ppl": "4", "wps": "401451", "ups": "3.38", "wpb": "118697", "bsz": "256", "num_updates": "819200", "lr": "1.82626e-05", "gnorm": "2.379", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "245044"} +[2022-08-02 06:54:55,409][train_inner][INFO] - {"epoch": 16, "update": 15.921, "loss": "2.005", "ppl": "4.01", "wps": "394430", "ups": "3.35", "wpb": "117793", "bsz": "256", "num_updates": "819400", "lr": "1.82424e-05", "gnorm": "2.237", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "245104"} +[2022-08-02 06:55:54,216][train_inner][INFO] - {"epoch": 16, "update": 15.925, "loss": "1.998", "ppl": "3.99", "wps": "400358", "ups": "3.4", "wpb": "117717", "bsz": "256", "num_updates": "819600", "lr": "1.82222e-05", "gnorm": "2.375", "loss_scale": "2", "train_wall": "58", "gb_free": "22.7", "wall": "245163"} +[2022-08-02 06:56:53,849][train_inner][INFO] - {"epoch": 16, "update": 15.929, "loss": "2.005", "ppl": "4.01", "wps": "396052", "ups": "3.35", "wpb": "118088", "bsz": "256", "num_updates": "819800", "lr": "1.8202e-05", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "245222"} +[2022-08-02 06:57:53,432][train_inner][INFO] - {"epoch": 16, "update": 15.932, "loss": "1.998", "ppl": "4", "wps": "396320", "ups": "3.36", "wpb": "118070", "bsz": "256", "num_updates": "820000", "lr": "1.81818e-05", "gnorm": "2.484", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "245282"} +[2022-08-02 06:58:52,642][train_inner][INFO] - {"epoch": 16, "update": 15.936, "loss": "2.007", "ppl": "4.02", "wps": "398551", "ups": "3.38", "wpb": "117990", "bsz": "256", "num_updates": "820200", "lr": "1.81616e-05", "gnorm": "2.415", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "245341"} +[2022-08-02 06:59:52,284][train_inner][INFO] - {"epoch": 16, "update": 15.94, "loss": "2.001", "ppl": "4", "wps": "397655", "ups": "3.35", "wpb": "118583", "bsz": "256", "num_updates": "820400", "lr": "1.81414e-05", "gnorm": "2.296", "loss_scale": "4", "train_wall": "59", "gb_free": "22.4", "wall": "245401"} +[2022-08-02 07:00:06,366][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 07:00:52,059][train_inner][INFO] - {"epoch": 16, "update": 15.944, "loss": "2.003", "ppl": "4.01", "wps": "395895", "ups": "3.35", "wpb": "118324", "bsz": "256", "num_updates": "820600", "lr": "1.81212e-05", "gnorm": "2.361", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "245460"} +[2022-08-02 07:01:51,270][train_inner][INFO] - {"epoch": 16, "update": 15.948, "loss": "2.007", "ppl": "4.02", "wps": "397737", "ups": "3.38", "wpb": "117751", "bsz": "256", "num_updates": "820800", "lr": "1.8101e-05", "gnorm": "2.427", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "245520"} +[2022-08-02 07:02:50,886][train_inner][INFO] - {"epoch": 16, "update": 15.952, "loss": "2.001", "ppl": "4", "wps": "397910", "ups": "3.35", "wpb": "118608", "bsz": "256", "num_updates": "821000", "lr": "1.80808e-05", "gnorm": "2.323", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "245579"} +[2022-08-02 07:03:50,057][train_inner][INFO] - {"epoch": 16, "update": 15.956, "loss": "2.004", "ppl": "4.01", "wps": "398400", "ups": "3.38", "wpb": "117867", "bsz": "256", "num_updates": "821200", "lr": "1.80606e-05", "gnorm": "2.549", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "245638"} +[2022-08-02 07:04:49,206][train_inner][INFO] - {"epoch": 16, "update": 15.96, "loss": "1.998", "ppl": "3.99", "wps": "401729", "ups": "3.38", "wpb": "118809", "bsz": "256", "num_updates": "821400", "lr": "1.80404e-05", "gnorm": "2.546", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "245698"} +[2022-08-02 07:05:48,759][train_inner][INFO] - {"epoch": 16, "update": 15.964, "loss": "2.001", "ppl": "4", "wps": "396702", "ups": "3.36", "wpb": "118123", "bsz": "256", "num_updates": "821600", "lr": "1.80202e-05", "gnorm": "2.434", "loss_scale": "2", "train_wall": "59", "gb_free": "27.9", "wall": "245757"} +[2022-08-02 07:06:48,648][train_inner][INFO] - {"epoch": 16, "update": 15.967, "loss": "2.001", "ppl": "4", "wps": "396356", "ups": "3.34", "wpb": "118685", "bsz": "256", "num_updates": "821800", "lr": "1.8e-05", "gnorm": "2.357", "loss_scale": "2", "train_wall": "60", "gb_free": "22.9", "wall": "245817"} +[2022-08-02 07:07:48,196][train_inner][INFO] - {"epoch": 16, "update": 15.971, "loss": "2.004", "ppl": "4.01", "wps": "396357", "ups": "3.36", "wpb": "118011", "bsz": "256", "num_updates": "822000", "lr": "1.79798e-05", "gnorm": "2.525", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "245877"} +[2022-08-02 07:08:47,473][train_inner][INFO] - {"epoch": 16, "update": 15.975, "loss": "2.002", "ppl": "4.01", "wps": "399091", "ups": "3.37", "wpb": "118285", "bsz": "256", "num_updates": "822200", "lr": "1.79596e-05", "gnorm": "2.477", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "245936"} +[2022-08-02 07:09:46,745][train_inner][INFO] - {"epoch": 16, "update": 15.979, "loss": "2.008", "ppl": "4.02", "wps": "397826", "ups": "3.37", "wpb": "117898", "bsz": "256", "num_updates": "822400", "lr": "1.79394e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "245995"} +[2022-08-02 07:10:19,589][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 07:10:46,479][train_inner][INFO] - {"epoch": 16, "update": 15.983, "loss": "1.997", "ppl": "3.99", "wps": "395801", "ups": "3.35", "wpb": "118214", "bsz": "256", "num_updates": "822600", "lr": "1.79192e-05", "gnorm": "2.255", "loss_scale": "2", "train_wall": "59", "gb_free": "27.1", "wall": "246055"} +[2022-08-02 07:11:45,889][train_inner][INFO] - {"epoch": 16, "update": 15.987, "loss": "1.999", "ppl": "4", "wps": "398755", "ups": "3.37", "wpb": "118449", "bsz": "256", "num_updates": "822800", "lr": "1.7899e-05", "gnorm": "2.482", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "246114"} +[2022-08-02 07:12:45,146][train_inner][INFO] - {"epoch": 16, "update": 15.991, "loss": "1.999", "ppl": "4", "wps": "397140", "ups": "3.38", "wpb": "117666", "bsz": "256", "num_updates": "823000", "lr": "1.78788e-05", "gnorm": "2.291", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "246174"} +[2022-08-02 07:13:44,184][train_inner][INFO] - {"epoch": 16, "update": 15.995, "loss": "1.997", "ppl": "3.99", "wps": "401657", "ups": "3.39", "wpb": "118564", "bsz": "256", "num_updates": "823200", "lr": "1.78586e-05", "gnorm": "2.338", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "246233"} +[2022-08-02 07:14:44,824][train_inner][INFO] - {"epoch": 16, "update": 15.998, "loss": "1.997", "ppl": "3.99", "wps": "390133", "ups": "3.3", "wpb": "118288", "bsz": "256", "num_updates": "823400", "lr": "1.78384e-05", "gnorm": "2.444", "loss_scale": "2", "train_wall": "60", "gb_free": "22.3", "wall": "246293"} +[2022-08-02 07:15:07,952][fairseq_cli.train][INFO] - end of epoch 16 (average epoch stats below) +[2022-08-02 07:15:07,952][train][INFO] - {"epoch": 16, "train_loss": "2.008", "train_ppl": "4.02", "train_wps": "395881", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "823478", "train_lr": "1.78305e-05", "train_gnorm": "2.382", "train_loss_scale": "2", "train_train_wall": "15230", "train_gb_free": "23.7", "train_wall": "246316"} +[2022-08-02 07:15:08,062][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 07:15:08,065][fairseq.trainer][INFO] - begin training epoch 17 +[2022-08-02 07:15:08,065][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 07:15:55,676][train_inner][INFO] - {"epoch": 17, "update": 16.002, "loss": "1.999", "ppl": "4", "wps": "332463", "ups": "2.82", "wpb": "117778", "bsz": "255.4", "num_updates": "823600", "lr": "1.78182e-05", "gnorm": "2.476", "loss_scale": "2", "train_wall": "60", "gb_free": "22", "wall": "246364"} +[2022-08-02 07:16:55,185][train_inner][INFO] - {"epoch": 17, "update": 16.006, "loss": "2", "ppl": "4", "wps": "395651", "ups": "3.36", "wpb": "117723", "bsz": "256", "num_updates": "823800", "lr": "1.7798e-05", "gnorm": "2.39", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "246424"} +[2022-08-02 07:17:54,535][train_inner][INFO] - {"epoch": 17, "update": 16.01, "loss": "1.993", "ppl": "3.98", "wps": "401110", "ups": "3.37", "wpb": "119029", "bsz": "256", "num_updates": "824000", "lr": "1.77778e-05", "gnorm": "2.343", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "246483"} +[2022-08-02 07:18:54,009][train_inner][INFO] - {"epoch": 17, "update": 16.014, "loss": "2.003", "ppl": "4.01", "wps": "398246", "ups": "3.36", "wpb": "118425", "bsz": "256", "num_updates": "824200", "lr": "1.77576e-05", "gnorm": "2.371", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "246542"} +[2022-08-02 07:19:53,289][train_inner][INFO] - {"epoch": 17, "update": 16.018, "loss": "1.999", "ppl": "4", "wps": "400440", "ups": "3.37", "wpb": "118690", "bsz": "256", "num_updates": "824400", "lr": "1.77374e-05", "gnorm": "2.421", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "246602"} +[2022-08-02 07:20:46,742][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 07:20:53,323][train_inner][INFO] - {"epoch": 17, "update": 16.022, "loss": "2.002", "ppl": "4.01", "wps": "394182", "ups": "3.33", "wpb": "118321", "bsz": "256", "num_updates": "824600", "lr": "1.77172e-05", "gnorm": "2.511", "loss_scale": "2", "train_wall": "60", "gb_free": "25.7", "wall": "246662"} +[2022-08-02 07:21:52,978][train_inner][INFO] - {"epoch": 17, "update": 16.026, "loss": "1.996", "ppl": "3.99", "wps": "396823", "ups": "3.35", "wpb": "118361", "bsz": "256", "num_updates": "824800", "lr": "1.7697e-05", "gnorm": "2.459", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "246721"} +[2022-08-02 07:22:52,116][train_inner][INFO] - {"epoch": 17, "update": 16.03, "loss": "1.995", "ppl": "3.99", "wps": "400672", "ups": "3.38", "wpb": "118475", "bsz": "256", "num_updates": "825000", "lr": "1.76768e-05", "gnorm": "2.454", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "246781"} +[2022-08-02 07:23:51,534][train_inner][INFO] - {"epoch": 17, "update": 16.033, "loss": "1.994", "ppl": "3.98", "wps": "398102", "ups": "3.37", "wpb": "118270", "bsz": "256", "num_updates": "825200", "lr": "1.76566e-05", "gnorm": "2.487", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "246840"} +[2022-08-02 07:24:51,068][train_inner][INFO] - {"epoch": 17, "update": 16.037, "loss": "1.994", "ppl": "3.98", "wps": "401016", "ups": "3.36", "wpb": "119370", "bsz": "256", "num_updates": "825400", "lr": "1.76364e-05", "gnorm": "2.24", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "246899"} +[2022-08-02 07:25:50,481][train_inner][INFO] - {"epoch": 17, "update": 16.041, "loss": "1.997", "ppl": "3.99", "wps": "397566", "ups": "3.37", "wpb": "118103", "bsz": "256", "num_updates": "825600", "lr": "1.76162e-05", "gnorm": "2.45", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "246959"} +[2022-08-02 07:26:50,029][train_inner][INFO] - {"epoch": 17, "update": 16.045, "loss": "1.999", "ppl": "4", "wps": "398951", "ups": "3.36", "wpb": "118783", "bsz": "256", "num_updates": "825800", "lr": "1.7596e-05", "gnorm": "2.502", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "247018"} +[2022-08-02 07:27:49,121][train_inner][INFO] - {"epoch": 17, "update": 16.049, "loss": "2.003", "ppl": "4.01", "wps": "399549", "ups": "3.38", "wpb": "118051", "bsz": "256", "num_updates": "826000", "lr": "1.75758e-05", "gnorm": "2.216", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "247078"} +[2022-08-02 07:28:48,252][train_inner][INFO] - {"epoch": 17, "update": 16.053, "loss": "1.996", "ppl": "3.99", "wps": "397966", "ups": "3.38", "wpb": "117658", "bsz": "256", "num_updates": "826200", "lr": "1.75556e-05", "gnorm": "2.291", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "247137"} +[2022-08-02 07:29:47,508][train_inner][INFO] - {"epoch": 17, "update": 16.057, "loss": "2.001", "ppl": "4", "wps": "399132", "ups": "3.38", "wpb": "118254", "bsz": "256", "num_updates": "826400", "lr": "1.75354e-05", "gnorm": "2.387", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "247196"} +[2022-08-02 07:30:47,240][train_inner][INFO] - {"epoch": 17, "update": 16.061, "loss": "1.999", "ppl": "4", "wps": "396349", "ups": "3.35", "wpb": "118373", "bsz": "256", "num_updates": "826600", "lr": "1.75152e-05", "gnorm": "2.428", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "247256"} +[2022-08-02 07:30:55,691][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 07:31:46,819][train_inner][INFO] - {"epoch": 17, "update": 16.065, "loss": "2.004", "ppl": "4.01", "wps": "398291", "ups": "3.36", "wpb": "118648", "bsz": "256", "num_updates": "826800", "lr": "1.74949e-05", "gnorm": "2.498", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "247315"} +[2022-08-02 07:32:46,554][train_inner][INFO] - {"epoch": 17, "update": 16.068, "loss": "1.999", "ppl": "4", "wps": "395919", "ups": "3.35", "wpb": "118251", "bsz": "256", "num_updates": "827000", "lr": "1.74747e-05", "gnorm": "2.424", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "247375"} +[2022-08-02 07:33:46,418][train_inner][INFO] - {"epoch": 17, "update": 16.072, "loss": "1.997", "ppl": "3.99", "wps": "396379", "ups": "3.34", "wpb": "118643", "bsz": "256", "num_updates": "827200", "lr": "1.74545e-05", "gnorm": "2.423", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "247435"} +[2022-08-02 07:34:45,733][train_inner][INFO] - {"epoch": 17, "update": 16.076, "loss": "2.002", "ppl": "4.01", "wps": "397046", "ups": "3.37", "wpb": "117754", "bsz": "256", "num_updates": "827400", "lr": "1.74343e-05", "gnorm": "2.656", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "247494"} +[2022-08-02 07:35:45,490][train_inner][INFO] - {"epoch": 17, "update": 16.08, "loss": "1.999", "ppl": "4", "wps": "396304", "ups": "3.35", "wpb": "118409", "bsz": "256", "num_updates": "827600", "lr": "1.74141e-05", "gnorm": "2.343", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "247554"} +[2022-08-02 07:36:45,189][train_inner][INFO] - {"epoch": 17, "update": 16.084, "loss": "1.996", "ppl": "3.99", "wps": "396565", "ups": "3.35", "wpb": "118372", "bsz": "256", "num_updates": "827800", "lr": "1.73939e-05", "gnorm": "2.817", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "247614"} +[2022-08-02 07:37:44,075][train_inner][INFO] - {"epoch": 17, "update": 16.088, "loss": "1.998", "ppl": "3.99", "wps": "402204", "ups": "3.4", "wpb": "118421", "bsz": "256", "num_updates": "828000", "lr": "1.73737e-05", "gnorm": "2.578", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "247673"} +[2022-08-02 07:38:43,521][train_inner][INFO] - {"epoch": 17, "update": 16.092, "loss": "1.993", "ppl": "3.98", "wps": "400268", "ups": "3.36", "wpb": "118970", "bsz": "256", "num_updates": "828200", "lr": "1.73535e-05", "gnorm": "2.666", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "247732"} +[2022-08-02 07:39:43,285][train_inner][INFO] - {"epoch": 17, "update": 16.096, "loss": "1.992", "ppl": "3.98", "wps": "395529", "ups": "3.35", "wpb": "118193", "bsz": "256", "num_updates": "828400", "lr": "1.73333e-05", "gnorm": "2.56", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "247792"} +[2022-08-02 07:40:42,821][train_inner][INFO] - {"epoch": 17, "update": 16.1, "loss": "1.993", "ppl": "3.98", "wps": "397515", "ups": "3.36", "wpb": "118330", "bsz": "256", "num_updates": "828600", "lr": "1.73131e-05", "gnorm": "2.394", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "247851"} +[2022-08-02 07:41:42,431][train_inner][INFO] - {"epoch": 17, "update": 16.103, "loss": "2.003", "ppl": "4.01", "wps": "395226", "ups": "3.36", "wpb": "117797", "bsz": "256", "num_updates": "828800", "lr": "1.72929e-05", "gnorm": "2.456", "loss_scale": "4", "train_wall": "59", "gb_free": "21.4", "wall": "247911"} +[2022-08-02 07:42:14,945][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 07:42:42,329][train_inner][INFO] - {"epoch": 17, "update": 16.107, "loss": "1.997", "ppl": "3.99", "wps": "395309", "ups": "3.34", "wpb": "118390", "bsz": "256", "num_updates": "829000", "lr": "1.72727e-05", "gnorm": "2.421", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "247971"} +[2022-08-02 07:43:41,886][train_inner][INFO] - {"epoch": 17, "update": 16.111, "loss": "1.998", "ppl": "3.99", "wps": "397157", "ups": "3.36", "wpb": "118266", "bsz": "256", "num_updates": "829200", "lr": "1.72525e-05", "gnorm": "2.493", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "248030"} +[2022-08-02 07:44:41,305][train_inner][INFO] - {"epoch": 17, "update": 16.115, "loss": "2.004", "ppl": "4.01", "wps": "397822", "ups": "3.37", "wpb": "118190", "bsz": "256", "num_updates": "829400", "lr": "1.72323e-05", "gnorm": "2.342", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "248090"} +[2022-08-02 07:45:41,102][train_inner][INFO] - {"epoch": 17, "update": 16.119, "loss": "2.002", "ppl": "4.01", "wps": "396528", "ups": "3.34", "wpb": "118556", "bsz": "256", "num_updates": "829600", "lr": "1.72121e-05", "gnorm": "2.244", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "248150"} +[2022-08-02 07:46:40,778][train_inner][INFO] - {"epoch": 17, "update": 16.123, "loss": "2", "ppl": "4", "wps": "397708", "ups": "3.35", "wpb": "118667", "bsz": "256", "num_updates": "829800", "lr": "1.71919e-05", "gnorm": "2.221", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "248209"} +[2022-08-02 07:47:40,692][train_inner][INFO] - {"epoch": 17, "update": 16.127, "loss": "1.995", "ppl": "3.99", "wps": "395559", "ups": "3.34", "wpb": "118497", "bsz": "256", "num_updates": "830000", "lr": "1.71717e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "248269"} +[2022-08-02 07:48:40,115][train_inner][INFO] - {"epoch": 17, "update": 16.131, "loss": "2.003", "ppl": "4.01", "wps": "396060", "ups": "3.37", "wpb": "117674", "bsz": "256", "num_updates": "830200", "lr": "1.71515e-05", "gnorm": "2.595", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "248329"} +[2022-08-02 07:49:39,759][train_inner][INFO] - {"epoch": 17, "update": 16.134, "loss": "2.001", "ppl": "4", "wps": "395639", "ups": "3.35", "wpb": "117987", "bsz": "256", "num_updates": "830400", "lr": "1.71313e-05", "gnorm": "2.428", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "248388"} +[2022-08-02 07:50:39,140][train_inner][INFO] - {"epoch": 17, "update": 16.138, "loss": "1.999", "ppl": "4", "wps": "394711", "ups": "3.37", "wpb": "117191", "bsz": "256", "num_updates": "830600", "lr": "1.71111e-05", "gnorm": "2.412", "loss_scale": "2", "train_wall": "59", "gb_free": "27.8", "wall": "248448"} +[2022-08-02 07:51:39,080][train_inner][INFO] - {"epoch": 17, "update": 16.142, "loss": "1.996", "ppl": "3.99", "wps": "394128", "ups": "3.34", "wpb": "118120", "bsz": "255.9", "num_updates": "830800", "lr": "1.70909e-05", "gnorm": "2.345", "loss_scale": "2", "train_wall": "60", "gb_free": "21.5", "wall": "248508"} +[2022-08-02 07:52:38,622][train_inner][INFO] - {"epoch": 17, "update": 16.146, "loss": "1.992", "ppl": "3.98", "wps": "397930", "ups": "3.36", "wpb": "118467", "bsz": "256", "num_updates": "831000", "lr": "1.70707e-05", "gnorm": "2.368", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "248567"} +[2022-08-02 07:52:49,236][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 07:53:38,251][train_inner][INFO] - {"epoch": 17, "update": 16.15, "loss": "2.002", "ppl": "4.01", "wps": "396284", "ups": "3.35", "wpb": "118150", "bsz": "256", "num_updates": "831200", "lr": "1.70505e-05", "gnorm": "2.338", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "248627"} +[2022-08-02 07:54:38,011][train_inner][INFO] - {"epoch": 17, "update": 16.154, "loss": "1.995", "ppl": "3.99", "wps": "396724", "ups": "3.35", "wpb": "118539", "bsz": "256", "num_updates": "831400", "lr": "1.70303e-05", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "248686"} +[2022-08-02 07:55:37,370][train_inner][INFO] - {"epoch": 17, "update": 16.158, "loss": "1.995", "ppl": "3.99", "wps": "399534", "ups": "3.37", "wpb": "118579", "bsz": "256", "num_updates": "831600", "lr": "1.70101e-05", "gnorm": "2.402", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "248746"} +[2022-08-02 07:56:36,973][train_inner][INFO] - {"epoch": 17, "update": 16.162, "loss": "1.997", "ppl": "3.99", "wps": "397043", "ups": "3.36", "wpb": "118325", "bsz": "256", "num_updates": "831800", "lr": "1.69899e-05", "gnorm": "2.502", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "248805"} +[2022-08-02 07:57:36,228][train_inner][INFO] - {"epoch": 17, "update": 16.166, "loss": "1.993", "ppl": "3.98", "wps": "398113", "ups": "3.38", "wpb": "117950", "bsz": "256", "num_updates": "832000", "lr": "1.69697e-05", "gnorm": "2.428", "loss_scale": "2", "train_wall": "59", "gb_free": "27", "wall": "248865"} +[2022-08-02 07:58:35,659][train_inner][INFO] - {"epoch": 17, "update": 16.169, "loss": "1.996", "ppl": "3.99", "wps": "399701", "ups": "3.37", "wpb": "118773", "bsz": "256", "num_updates": "832200", "lr": "1.69495e-05", "gnorm": "2.401", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "248924"} +[2022-08-02 07:59:34,920][train_inner][INFO] - {"epoch": 17, "update": 16.173, "loss": "1.998", "ppl": "4", "wps": "399525", "ups": "3.37", "wpb": "118380", "bsz": "256", "num_updates": "832400", "lr": "1.69293e-05", "gnorm": "2.372", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "248983"} +[2022-08-02 08:00:34,772][train_inner][INFO] - {"epoch": 17, "update": 16.177, "loss": "2", "ppl": "4", "wps": "396475", "ups": "3.34", "wpb": "118648", "bsz": "256", "num_updates": "832600", "lr": "1.69091e-05", "gnorm": "2.488", "loss_scale": "2", "train_wall": "60", "gb_free": "21.5", "wall": "249043"} +[2022-08-02 08:01:34,139][train_inner][INFO] - {"epoch": 17, "update": 16.181, "loss": "1.998", "ppl": "3.99", "wps": "399009", "ups": "3.37", "wpb": "118440", "bsz": "256", "num_updates": "832800", "lr": "1.68889e-05", "gnorm": "2.473", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "249103"} +[2022-08-02 08:02:33,370][train_inner][INFO] - {"epoch": 17, "update": 16.185, "loss": "1.996", "ppl": "3.99", "wps": "401807", "ups": "3.38", "wpb": "118996", "bsz": "256", "num_updates": "833000", "lr": "1.68687e-05", "gnorm": "2.494", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "249162"} +[2022-08-02 08:03:08,448][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 08:03:33,524][train_inner][INFO] - {"epoch": 17, "update": 16.189, "loss": "1.994", "ppl": "3.98", "wps": "395776", "ups": "3.32", "wpb": "119036", "bsz": "256", "num_updates": "833200", "lr": "1.68485e-05", "gnorm": "2.421", "loss_scale": "2", "train_wall": "60", "gb_free": "29.6", "wall": "249222"} +[2022-08-02 08:04:32,755][train_inner][INFO] - {"epoch": 17, "update": 16.193, "loss": "1.994", "ppl": "3.98", "wps": "400326", "ups": "3.38", "wpb": "118557", "bsz": "256", "num_updates": "833400", "lr": "1.68283e-05", "gnorm": "2.534", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "249281"} +[2022-08-02 08:05:31,956][train_inner][INFO] - {"epoch": 17, "update": 16.197, "loss": "1.995", "ppl": "3.99", "wps": "398270", "ups": "3.38", "wpb": "117891", "bsz": "256", "num_updates": "833600", "lr": "1.68081e-05", "gnorm": "2.458", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "249340"} +[2022-08-02 08:06:31,649][train_inner][INFO] - {"epoch": 17, "update": 16.201, "loss": "1.998", "ppl": "3.99", "wps": "396011", "ups": "3.35", "wpb": "118193", "bsz": "256", "num_updates": "833800", "lr": "1.67879e-05", "gnorm": "2.329", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "249400"} +[2022-08-02 08:07:30,731][train_inner][INFO] - {"epoch": 17, "update": 16.204, "loss": "1.997", "ppl": "3.99", "wps": "399994", "ups": "3.39", "wpb": "118163", "bsz": "256", "num_updates": "834000", "lr": "1.67677e-05", "gnorm": "2.223", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "249459"} +[2022-08-02 08:08:29,973][train_inner][INFO] - {"epoch": 17, "update": 16.208, "loss": "1.994", "ppl": "3.98", "wps": "400662", "ups": "3.38", "wpb": "118678", "bsz": "256", "num_updates": "834200", "lr": "1.67475e-05", "gnorm": "2.265", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "249518"} +[2022-08-02 08:09:29,278][train_inner][INFO] - {"epoch": 17, "update": 16.212, "loss": "1.998", "ppl": "3.99", "wps": "398310", "ups": "3.37", "wpb": "118107", "bsz": "256", "num_updates": "834400", "lr": "1.67273e-05", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "249578"} +[2022-08-02 08:10:30,005][train_inner][INFO] - {"epoch": 17, "update": 16.216, "loss": "1.994", "ppl": "3.98", "wps": "387946", "ups": "3.29", "wpb": "117794", "bsz": "256", "num_updates": "834600", "lr": "1.67071e-05", "gnorm": "2.421", "loss_scale": "2", "train_wall": "60", "gb_free": "22.3", "wall": "249638"} +[2022-08-02 08:11:29,611][train_inner][INFO] - {"epoch": 17, "update": 16.22, "loss": "1.999", "ppl": "4", "wps": "396001", "ups": "3.36", "wpb": "118019", "bsz": "256", "num_updates": "834800", "lr": "1.66869e-05", "gnorm": "2.421", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "249698"} +[2022-08-02 08:12:28,736][train_inner][INFO] - {"epoch": 17, "update": 16.224, "loss": "1.994", "ppl": "3.98", "wps": "399671", "ups": "3.38", "wpb": "118152", "bsz": "256", "num_updates": "835000", "lr": "1.66667e-05", "gnorm": "2.519", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "249757"} +[2022-08-02 08:13:28,504][train_inner][INFO] - {"epoch": 17, "update": 16.228, "loss": "1.992", "ppl": "3.98", "wps": "398372", "ups": "3.35", "wpb": "119048", "bsz": "256", "num_updates": "835200", "lr": "1.66465e-05", "gnorm": "2.277", "loss_scale": "4", "train_wall": "59", "gb_free": "22.6", "wall": "249817"} +[2022-08-02 08:13:30,588][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 08:14:28,135][train_inner][INFO] - {"epoch": 17, "update": 16.232, "loss": "1.994", "ppl": "3.98", "wps": "395917", "ups": "3.35", "wpb": "118044", "bsz": "256", "num_updates": "835400", "lr": "1.66263e-05", "gnorm": "2.307", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "249877"} +[2022-08-02 08:15:27,154][train_inner][INFO] - {"epoch": 17, "update": 16.236, "loss": "2.002", "ppl": "4", "wps": "399135", "ups": "3.39", "wpb": "117781", "bsz": "256", "num_updates": "835600", "lr": "1.66061e-05", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "249936"} +[2022-08-02 08:16:26,463][train_inner][INFO] - {"epoch": 17, "update": 16.239, "loss": "1.998", "ppl": "3.99", "wps": "400754", "ups": "3.37", "wpb": "118841", "bsz": "256", "num_updates": "835800", "lr": "1.65859e-05", "gnorm": "2.363", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "249995"} +[2022-08-02 08:17:25,804][train_inner][INFO] - {"epoch": 17, "update": 16.243, "loss": "1.995", "ppl": "3.99", "wps": "397867", "ups": "3.37", "wpb": "118049", "bsz": "256", "num_updates": "836000", "lr": "1.65657e-05", "gnorm": "2.382", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "250054"} +[2022-08-02 08:18:25,289][train_inner][INFO] - {"epoch": 17, "update": 16.247, "loss": "1.998", "ppl": "4", "wps": "398740", "ups": "3.36", "wpb": "118595", "bsz": "256", "num_updates": "836200", "lr": "1.65455e-05", "gnorm": "2.38", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "250114"} +[2022-08-02 08:19:24,709][train_inner][INFO] - {"epoch": 17, "update": 16.251, "loss": "1.995", "ppl": "3.99", "wps": "397954", "ups": "3.37", "wpb": "118229", "bsz": "256", "num_updates": "836400", "lr": "1.65253e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "250173"} +[2022-08-02 08:20:24,191][train_inner][INFO] - {"epoch": 17, "update": 16.255, "loss": "2.001", "ppl": "4", "wps": "398430", "ups": "3.36", "wpb": "118497", "bsz": "256", "num_updates": "836600", "lr": "1.65051e-05", "gnorm": "2.429", "loss_scale": "2", "train_wall": "59", "gb_free": "30.4", "wall": "250233"} +[2022-08-02 08:21:23,312][train_inner][INFO] - {"epoch": 17, "update": 16.259, "loss": "1.99", "ppl": "3.97", "wps": "401589", "ups": "3.38", "wpb": "118712", "bsz": "256", "num_updates": "836800", "lr": "1.64848e-05", "gnorm": "2.507", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "250292"} +[2022-08-02 08:22:23,943][train_inner][INFO] - {"epoch": 17, "update": 16.263, "loss": "1.997", "ppl": "3.99", "wps": "389667", "ups": "3.3", "wpb": "118127", "bsz": "256", "num_updates": "837000", "lr": "1.64646e-05", "gnorm": "2.446", "loss_scale": "2", "train_wall": "60", "gb_free": "26.4", "wall": "250352"} +[2022-08-02 08:23:22,774][train_inner][INFO] - {"epoch": 17, "update": 16.267, "loss": "1.994", "ppl": "3.98", "wps": "399658", "ups": "3.4", "wpb": "117561", "bsz": "256", "num_updates": "837200", "lr": "1.64444e-05", "gnorm": "2.282", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "250411"} +[2022-08-02 08:23:42,940][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 08:24:22,263][train_inner][INFO] - {"epoch": 17, "update": 16.271, "loss": "1.993", "ppl": "3.98", "wps": "399303", "ups": "3.36", "wpb": "118770", "bsz": "256", "num_updates": "837400", "lr": "1.64242e-05", "gnorm": "2.337", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "250471"} +[2022-08-02 08:25:21,379][train_inner][INFO] - {"epoch": 17, "update": 16.274, "loss": "1.998", "ppl": "4", "wps": "399696", "ups": "3.38", "wpb": "118142", "bsz": "256", "num_updates": "837600", "lr": "1.6404e-05", "gnorm": "2.388", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "250530"} +[2022-08-02 08:26:20,778][train_inner][INFO] - {"epoch": 17, "update": 16.278, "loss": "1.991", "ppl": "3.97", "wps": "398449", "ups": "3.37", "wpb": "118335", "bsz": "256", "num_updates": "837800", "lr": "1.63838e-05", "gnorm": "2.412", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "250589"} +[2022-08-02 08:27:20,401][train_inner][INFO] - {"epoch": 17, "update": 16.282, "loss": "1.998", "ppl": "4", "wps": "397681", "ups": "3.35", "wpb": "118554", "bsz": "256", "num_updates": "838000", "lr": "1.63636e-05", "gnorm": "2.435", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "250649"} +[2022-08-02 08:28:19,831][train_inner][INFO] - {"epoch": 17, "update": 16.286, "loss": "1.989", "ppl": "3.97", "wps": "397308", "ups": "3.37", "wpb": "118059", "bsz": "256", "num_updates": "838200", "lr": "1.63434e-05", "gnorm": "2.358", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "250708"} +[2022-08-02 08:29:19,227][train_inner][INFO] - {"epoch": 17, "update": 16.29, "loss": "1.997", "ppl": "3.99", "wps": "397060", "ups": "3.37", "wpb": "117918", "bsz": "256", "num_updates": "838400", "lr": "1.63232e-05", "gnorm": "2.59", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "250768"} +[2022-08-02 08:30:18,842][train_inner][INFO] - {"epoch": 17, "update": 16.294, "loss": "1.994", "ppl": "3.98", "wps": "398860", "ups": "3.35", "wpb": "118889", "bsz": "256", "num_updates": "838600", "lr": "1.6303e-05", "gnorm": "2.533", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "250827"} +[2022-08-02 08:31:18,156][train_inner][INFO] - {"epoch": 17, "update": 16.298, "loss": "1.998", "ppl": "3.99", "wps": "399423", "ups": "3.37", "wpb": "118456", "bsz": "256", "num_updates": "838800", "lr": "1.62828e-05", "gnorm": "2.626", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "250887"} +[2022-08-02 08:32:17,829][train_inner][INFO] - {"epoch": 17, "update": 16.302, "loss": "1.993", "ppl": "3.98", "wps": "395371", "ups": "3.35", "wpb": "117965", "bsz": "256", "num_updates": "839000", "lr": "1.62626e-05", "gnorm": "2.455", "loss_scale": "2", "train_wall": "59", "gb_free": "26.2", "wall": "250946"} +[2022-08-02 08:33:17,085][train_inner][INFO] - {"epoch": 17, "update": 16.305, "loss": "1.993", "ppl": "3.98", "wps": "400376", "ups": "3.38", "wpb": "118622", "bsz": "256", "num_updates": "839200", "lr": "1.62424e-05", "gnorm": "2.43", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "251006"} +[2022-08-02 08:34:00,948][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 08:34:17,055][train_inner][INFO] - {"epoch": 17, "update": 16.309, "loss": "1.997", "ppl": "3.99", "wps": "394043", "ups": "3.34", "wpb": "118152", "bsz": "256", "num_updates": "839400", "lr": "1.62222e-05", "gnorm": "2.487", "loss_scale": "2", "train_wall": "60", "gb_free": "23.1", "wall": "251065"} +[2022-08-02 08:35:16,398][train_inner][INFO] - {"epoch": 17, "update": 16.313, "loss": "1.99", "ppl": "3.97", "wps": "398504", "ups": "3.37", "wpb": "118242", "bsz": "256", "num_updates": "839600", "lr": "1.6202e-05", "gnorm": "2.378", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "251125"} +[2022-08-02 08:36:16,126][train_inner][INFO] - {"epoch": 17, "update": 16.317, "loss": "1.993", "ppl": "3.98", "wps": "396646", "ups": "3.35", "wpb": "118453", "bsz": "256", "num_updates": "839800", "lr": "1.61818e-05", "gnorm": "2.265", "loss_scale": "2", "train_wall": "59", "gb_free": "28.2", "wall": "251185"} +[2022-08-02 08:37:15,209][train_inner][INFO] - {"epoch": 17, "update": 16.321, "loss": "1.992", "ppl": "3.98", "wps": "398650", "ups": "3.39", "wpb": "117768", "bsz": "256", "num_updates": "840000", "lr": "1.61616e-05", "gnorm": "2.582", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "251244"} +[2022-08-02 08:38:14,150][train_inner][INFO] - {"epoch": 17, "update": 16.325, "loss": "1.995", "ppl": "3.99", "wps": "401586", "ups": "3.39", "wpb": "118347", "bsz": "256", "num_updates": "840200", "lr": "1.61414e-05", "gnorm": "2.256", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "251303"} +[2022-08-02 08:39:13,880][train_inner][INFO] - {"epoch": 17, "update": 16.329, "loss": "1.995", "ppl": "3.99", "wps": "397003", "ups": "3.35", "wpb": "118565", "bsz": "256", "num_updates": "840400", "lr": "1.61212e-05", "gnorm": "2.506", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "251362"} +[2022-08-02 08:40:12,963][train_inner][INFO] - {"epoch": 17, "update": 16.333, "loss": "1.997", "ppl": "3.99", "wps": "401243", "ups": "3.39", "wpb": "118532", "bsz": "256", "num_updates": "840600", "lr": "1.6101e-05", "gnorm": "2.268", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "251421"} +[2022-08-02 08:41:12,375][train_inner][INFO] - {"epoch": 17, "update": 16.337, "loss": "2.002", "ppl": "4.01", "wps": "398061", "ups": "3.37", "wpb": "118247", "bsz": "256", "num_updates": "840800", "lr": "1.60808e-05", "gnorm": "2.504", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "251481"} +[2022-08-02 08:42:11,425][train_inner][INFO] - {"epoch": 17, "update": 16.34, "loss": "1.998", "ppl": "4", "wps": "397823", "ups": "3.39", "wpb": "117456", "bsz": "256", "num_updates": "841000", "lr": "1.60606e-05", "gnorm": "2.461", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "251540"} +[2022-08-02 08:43:10,957][train_inner][INFO] - {"epoch": 17, "update": 16.344, "loss": "1.996", "ppl": "3.99", "wps": "398072", "ups": "3.36", "wpb": "118490", "bsz": "256", "num_updates": "841200", "lr": "1.60404e-05", "gnorm": "2.415", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "251599"} +[2022-08-02 08:44:10,835][train_inner][INFO] - {"epoch": 17, "update": 16.348, "loss": "1.999", "ppl": "4", "wps": "395028", "ups": "3.34", "wpb": "118266", "bsz": "256", "num_updates": "841400", "lr": "1.60202e-05", "gnorm": "2.317", "loss_scale": "4", "train_wall": "60", "gb_free": "22.5", "wall": "251659"} +[2022-08-02 08:44:12,023][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 08:45:10,203][train_inner][INFO] - {"epoch": 17, "update": 16.352, "loss": "1.996", "ppl": "3.99", "wps": "398923", "ups": "3.37", "wpb": "118416", "bsz": "256", "num_updates": "841600", "lr": "1.6e-05", "gnorm": "2.495", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "251719"} +[2022-08-02 08:46:09,587][train_inner][INFO] - {"epoch": 17, "update": 16.356, "loss": "1.996", "ppl": "3.99", "wps": "399461", "ups": "3.37", "wpb": "118607", "bsz": "256", "num_updates": "841800", "lr": "1.59798e-05", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "251778"} +[2022-08-02 08:47:09,178][train_inner][INFO] - {"epoch": 17, "update": 16.36, "loss": "1.997", "ppl": "3.99", "wps": "397661", "ups": "3.36", "wpb": "118483", "bsz": "256", "num_updates": "842000", "lr": "1.59596e-05", "gnorm": "2.565", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "251838"} +[2022-08-02 08:48:09,023][train_inner][INFO] - {"epoch": 17, "update": 16.364, "loss": "1.989", "ppl": "3.97", "wps": "396381", "ups": "3.34", "wpb": "118606", "bsz": "256", "num_updates": "842200", "lr": "1.59394e-05", "gnorm": "2.352", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "251897"} +[2022-08-02 08:49:08,851][train_inner][INFO] - {"epoch": 17, "update": 16.368, "loss": "1.988", "ppl": "3.97", "wps": "395563", "ups": "3.34", "wpb": "118328", "bsz": "256", "num_updates": "842400", "lr": "1.59192e-05", "gnorm": "2.394", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "251957"} +[2022-08-02 08:50:08,284][train_inner][INFO] - {"epoch": 17, "update": 16.372, "loss": "2.001", "ppl": "4", "wps": "396668", "ups": "3.37", "wpb": "117876", "bsz": "256", "num_updates": "842600", "lr": "1.5899e-05", "gnorm": "2.348", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "252017"} +[2022-08-02 08:51:08,018][train_inner][INFO] - {"epoch": 17, "update": 16.375, "loss": "1.99", "ppl": "3.97", "wps": "396713", "ups": "3.35", "wpb": "118486", "bsz": "256", "num_updates": "842800", "lr": "1.58788e-05", "gnorm": "2.47", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "252076"} +[2022-08-02 08:52:07,869][train_inner][INFO] - {"epoch": 17, "update": 16.379, "loss": "1.992", "ppl": "3.98", "wps": "396278", "ups": "3.34", "wpb": "118587", "bsz": "256", "num_updates": "843000", "lr": "1.58586e-05", "gnorm": "2.766", "loss_scale": "2", "train_wall": "60", "gb_free": "23.3", "wall": "252136"} +[2022-08-02 08:53:07,218][train_inner][INFO] - {"epoch": 17, "update": 16.383, "loss": "1.998", "ppl": "4", "wps": "396840", "ups": "3.37", "wpb": "117760", "bsz": "256", "num_updates": "843200", "lr": "1.58384e-05", "gnorm": "2.564", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "252196"} +[2022-08-02 08:54:06,711][train_inner][INFO] - {"epoch": 17, "update": 16.387, "loss": "1.993", "ppl": "3.98", "wps": "396767", "ups": "3.36", "wpb": "118023", "bsz": "256", "num_updates": "843400", "lr": "1.58182e-05", "gnorm": "2.788", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "252255"} +[2022-08-02 08:54:25,667][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 08:55:06,478][train_inner][INFO] - {"epoch": 17, "update": 16.391, "loss": "1.998", "ppl": "4", "wps": "394639", "ups": "3.35", "wpb": "117931", "bsz": "256", "num_updates": "843600", "lr": "1.5798e-05", "gnorm": "2.528", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "252315"} +[2022-08-02 08:56:05,784][train_inner][INFO] - {"epoch": 17, "update": 16.395, "loss": "2", "ppl": "4", "wps": "396701", "ups": "3.37", "wpb": "117633", "bsz": "256", "num_updates": "843800", "lr": "1.57778e-05", "gnorm": "2.447", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "252374"} +[2022-08-02 08:57:05,663][train_inner][INFO] - {"epoch": 17, "update": 16.399, "loss": "1.991", "ppl": "3.97", "wps": "395587", "ups": "3.34", "wpb": "118434", "bsz": "256", "num_updates": "844000", "lr": "1.57576e-05", "gnorm": "2.388", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "252434"} +[2022-08-02 08:58:05,179][train_inner][INFO] - {"epoch": 17, "update": 16.403, "loss": "1.993", "ppl": "3.98", "wps": "399873", "ups": "3.36", "wpb": "118994", "bsz": "256", "num_updates": "844200", "lr": "1.57374e-05", "gnorm": "2.607", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "252494"} +[2022-08-02 08:59:04,620][train_inner][INFO] - {"epoch": 17, "update": 16.407, "loss": "1.993", "ppl": "3.98", "wps": "398005", "ups": "3.36", "wpb": "118288", "bsz": "256", "num_updates": "844400", "lr": "1.57172e-05", "gnorm": "2.549", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "252553"} +[2022-08-02 09:00:04,326][train_inner][INFO] - {"epoch": 17, "update": 16.41, "loss": "1.994", "ppl": "3.98", "wps": "396407", "ups": "3.35", "wpb": "118339", "bsz": "256", "num_updates": "844600", "lr": "1.5697e-05", "gnorm": "2.412", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "252613"} +[2022-08-02 09:01:03,962][train_inner][INFO] - {"epoch": 17, "update": 16.414, "loss": "1.99", "ppl": "3.97", "wps": "397448", "ups": "3.35", "wpb": "118509", "bsz": "256", "num_updates": "844800", "lr": "1.56768e-05", "gnorm": "2.379", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "252672"} +[2022-08-02 09:02:03,365][train_inner][INFO] - {"epoch": 17, "update": 16.418, "loss": "1.999", "ppl": "4", "wps": "398090", "ups": "3.37", "wpb": "118238", "bsz": "256", "num_updates": "845000", "lr": "1.56566e-05", "gnorm": "2.498", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "252732"} +[2022-08-02 09:03:02,989][train_inner][INFO] - {"epoch": 17, "update": 16.422, "loss": "1.993", "ppl": "3.98", "wps": "397650", "ups": "3.35", "wpb": "118548", "bsz": "256", "num_updates": "845200", "lr": "1.56364e-05", "gnorm": "2.296", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "252791"} +[2022-08-02 09:04:02,138][train_inner][INFO] - {"epoch": 17, "update": 16.426, "loss": "1.998", "ppl": "3.99", "wps": "401094", "ups": "3.38", "wpb": "118619", "bsz": "256", "num_updates": "845400", "lr": "1.56162e-05", "gnorm": "2.496", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "252851"} +[2022-08-02 09:04:36,108][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 09:05:01,914][train_inner][INFO] - {"epoch": 17, "update": 16.43, "loss": "1.992", "ppl": "3.98", "wps": "395090", "ups": "3.35", "wpb": "118085", "bsz": "256", "num_updates": "845600", "lr": "1.5596e-05", "gnorm": "2.527", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "252910"} +[2022-08-02 09:06:01,284][train_inner][INFO] - {"epoch": 17, "update": 16.434, "loss": "1.993", "ppl": "3.98", "wps": "400422", "ups": "3.37", "wpb": "118865", "bsz": "256", "num_updates": "845800", "lr": "1.55758e-05", "gnorm": "2.38", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "252970"} +[2022-08-02 09:07:00,307][train_inner][INFO] - {"epoch": 17, "update": 16.438, "loss": "1.988", "ppl": "3.97", "wps": "400271", "ups": "3.39", "wpb": "118125", "bsz": "256", "num_updates": "846000", "lr": "1.55556e-05", "gnorm": "2.485", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "253029"} +[2022-08-02 09:08:00,005][train_inner][INFO] - {"epoch": 17, "update": 16.442, "loss": "1.995", "ppl": "3.99", "wps": "396722", "ups": "3.35", "wpb": "118416", "bsz": "256", "num_updates": "846200", "lr": "1.55354e-05", "gnorm": "2.33", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "253088"} +[2022-08-02 09:08:59,920][train_inner][INFO] - {"epoch": 17, "update": 16.445, "loss": "1.993", "ppl": "3.98", "wps": "395205", "ups": "3.34", "wpb": "118393", "bsz": "256", "num_updates": "846400", "lr": "1.55152e-05", "gnorm": "2.533", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "253148"} +[2022-08-02 09:09:59,503][train_inner][INFO] - {"epoch": 17, "update": 16.449, "loss": "1.995", "ppl": "3.99", "wps": "395028", "ups": "3.36", "wpb": "117684", "bsz": "256", "num_updates": "846600", "lr": "1.54949e-05", "gnorm": "2.47", "loss_scale": "2", "train_wall": "59", "gb_free": "32", "wall": "253208"} +[2022-08-02 09:10:58,627][train_inner][INFO] - {"epoch": 17, "update": 16.453, "loss": "1.994", "ppl": "3.98", "wps": "398466", "ups": "3.38", "wpb": "117794", "bsz": "256", "num_updates": "846800", "lr": "1.54747e-05", "gnorm": "2.358", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "253267"} +[2022-08-02 09:11:58,053][train_inner][INFO] - {"epoch": 17, "update": 16.457, "loss": "1.99", "ppl": "3.97", "wps": "396431", "ups": "3.37", "wpb": "117791", "bsz": "256", "num_updates": "847000", "lr": "1.54545e-05", "gnorm": "2.417", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "253326"} +[2022-08-02 09:12:57,229][train_inner][INFO] - {"epoch": 17, "update": 16.461, "loss": "1.994", "ppl": "3.98", "wps": "399432", "ups": "3.38", "wpb": "118182", "bsz": "256", "num_updates": "847200", "lr": "1.54343e-05", "gnorm": "2.392", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "253386"} +[2022-08-02 09:13:57,381][train_inner][INFO] - {"epoch": 17, "update": 16.465, "loss": "1.997", "ppl": "3.99", "wps": "391945", "ups": "3.32", "wpb": "117881", "bsz": "256", "num_updates": "847400", "lr": "1.54141e-05", "gnorm": "2.329", "loss_scale": "2", "train_wall": "60", "gb_free": "26.4", "wall": "253446"} +[2022-08-02 09:14:47,371][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 09:14:57,302][train_inner][INFO] - {"epoch": 17, "update": 16.469, "loss": "1.993", "ppl": "3.98", "wps": "395066", "ups": "3.34", "wpb": "118364", "bsz": "256", "num_updates": "847600", "lr": "1.53939e-05", "gnorm": "2.388", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "253506"} +[2022-08-02 09:15:57,034][train_inner][INFO] - {"epoch": 17, "update": 16.473, "loss": "1.989", "ppl": "3.97", "wps": "397289", "ups": "3.35", "wpb": "118654", "bsz": "256", "num_updates": "847800", "lr": "1.53737e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "253565"} +[2022-08-02 09:16:56,266][train_inner][INFO] - {"epoch": 17, "update": 16.476, "loss": "1.991", "ppl": "3.98", "wps": "400158", "ups": "3.38", "wpb": "118510", "bsz": "256", "num_updates": "848000", "lr": "1.53535e-05", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "253625"} +[2022-08-02 09:17:56,259][train_inner][INFO] - {"epoch": 17, "update": 16.48, "loss": "1.991", "ppl": "3.98", "wps": "396003", "ups": "3.33", "wpb": "118786", "bsz": "256", "num_updates": "848200", "lr": "1.53333e-05", "gnorm": "2.57", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "253685"} +[2022-08-02 09:18:56,080][train_inner][INFO] - {"epoch": 17, "update": 16.484, "loss": "1.987", "ppl": "3.96", "wps": "396440", "ups": "3.34", "wpb": "118576", "bsz": "256", "num_updates": "848400", "lr": "1.53131e-05", "gnorm": "2.887", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "253745"} +[2022-08-02 09:19:55,719][train_inner][INFO] - {"epoch": 17, "update": 16.488, "loss": "1.997", "ppl": "3.99", "wps": "395437", "ups": "3.35", "wpb": "117915", "bsz": "256", "num_updates": "848600", "lr": "1.52929e-05", "gnorm": "2.706", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "253804"} +[2022-08-02 09:20:54,679][train_inner][INFO] - {"epoch": 17, "update": 16.492, "loss": "1.99", "ppl": "3.97", "wps": "401528", "ups": "3.39", "wpb": "118371", "bsz": "256", "num_updates": "848800", "lr": "1.52727e-05", "gnorm": "2.602", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "253863"} +[2022-08-02 09:21:54,377][train_inner][INFO] - {"epoch": 17, "update": 16.496, "loss": "1.995", "ppl": "3.98", "wps": "396270", "ups": "3.35", "wpb": "118280", "bsz": "256", "num_updates": "849000", "lr": "1.52525e-05", "gnorm": "2.443", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "253923"} +[2022-08-02 09:22:53,744][train_inner][INFO] - {"epoch": 17, "update": 16.5, "loss": "1.989", "ppl": "3.97", "wps": "396660", "ups": "3.37", "wpb": "117743", "bsz": "256", "num_updates": "849200", "lr": "1.52323e-05", "gnorm": "2.475", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "253982"} +[2022-08-02 09:23:53,218][train_inner][INFO] - {"epoch": 17, "update": 16.504, "loss": "1.989", "ppl": "3.97", "wps": "397394", "ups": "3.36", "wpb": "118171", "bsz": "256", "num_updates": "849400", "lr": "1.52121e-05", "gnorm": "2.436", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "254042"} +[2022-08-02 09:24:52,835][train_inner][INFO] - {"epoch": 17, "update": 16.508, "loss": "1.99", "ppl": "3.97", "wps": "398210", "ups": "3.35", "wpb": "118700", "bsz": "256", "num_updates": "849600", "lr": "1.51919e-05", "gnorm": "2.445", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "254101"} +[2022-08-02 09:25:02,406][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 09:25:52,676][train_inner][INFO] - {"epoch": 17, "update": 16.511, "loss": "1.992", "ppl": "3.98", "wps": "394468", "ups": "3.34", "wpb": "118025", "bsz": "256", "num_updates": "849800", "lr": "1.51717e-05", "gnorm": "2.413", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "254161"} +[2022-08-02 09:26:52,227][train_inner][INFO] - {"epoch": 17, "update": 16.515, "loss": "1.994", "ppl": "3.98", "wps": "396309", "ups": "3.36", "wpb": "118003", "bsz": "256", "num_updates": "850000", "lr": "1.51515e-05", "gnorm": "2.456", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "254221"} +[2022-08-02 09:26:52,228][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 09:27:15,065][valid][INFO] - {"epoch": 17, "valid_loss": "1.885", "valid_ppl": "3.69", "valid_wps": "1.59128e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "850000", "valid_best_loss": "1.885"} +[2022-08-02 09:27:15,069][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 17 @ 850000 updates +[2022-08-02 09:27:15,070][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_17_850000.pt +[2022-08-02 09:27:22,844][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_17_850000.pt +[2022-08-02 09:27:41,109][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_17_850000.pt (epoch 17 @ 850000 updates, score 1.885) (writing took 26.040314242243767 seconds) +[2022-08-02 09:28:40,087][train_inner][INFO] - {"epoch": 17, "update": 16.519, "loss": "1.993", "ppl": "3.98", "wps": "218724", "ups": "1.85", "wpb": "117958", "bsz": "256", "num_updates": "850200", "lr": "1.51313e-05", "gnorm": "2.362", "loss_scale": "2", "train_wall": "59", "gb_free": "30.3", "wall": "254329"} +[2022-08-02 09:29:39,331][train_inner][INFO] - {"epoch": 17, "update": 16.523, "loss": "1.99", "ppl": "3.97", "wps": "399408", "ups": "3.38", "wpb": "118312", "bsz": "256", "num_updates": "850400", "lr": "1.51111e-05", "gnorm": "2.285", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "254388"} +[2022-08-02 09:30:38,312][train_inner][INFO] - {"epoch": 17, "update": 16.527, "loss": "1.995", "ppl": "3.99", "wps": "400344", "ups": "3.39", "wpb": "118062", "bsz": "256", "num_updates": "850600", "lr": "1.50909e-05", "gnorm": "2.482", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "254447"} +[2022-08-02 09:31:37,817][train_inner][INFO] - {"epoch": 17, "update": 16.531, "loss": "1.99", "ppl": "3.97", "wps": "395832", "ups": "3.36", "wpb": "117769", "bsz": "256", "num_updates": "850800", "lr": "1.50707e-05", "gnorm": "2.405", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "254506"} +[2022-08-02 09:32:37,443][train_inner][INFO] - {"epoch": 17, "update": 16.535, "loss": "1.99", "ppl": "3.97", "wps": "400039", "ups": "3.35", "wpb": "119264", "bsz": "256", "num_updates": "851000", "lr": "1.50505e-05", "gnorm": "2.398", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "254566"} +[2022-08-02 09:33:37,364][train_inner][INFO] - {"epoch": 17, "update": 16.539, "loss": "1.99", "ppl": "3.97", "wps": "396212", "ups": "3.34", "wpb": "118706", "bsz": "256", "num_updates": "851200", "lr": "1.50303e-05", "gnorm": "2.433", "loss_scale": "2", "train_wall": "60", "gb_free": "30.1", "wall": "254626"} +[2022-08-02 09:34:37,084][train_inner][INFO] - {"epoch": 17, "update": 16.543, "loss": "1.997", "ppl": "3.99", "wps": "395929", "ups": "3.35", "wpb": "118224", "bsz": "256", "num_updates": "851400", "lr": "1.50101e-05", "gnorm": "2.387", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "254686"} +[2022-08-02 09:35:36,300][train_inner][INFO] - {"epoch": 17, "update": 16.546, "loss": "1.993", "ppl": "3.98", "wps": "399344", "ups": "3.38", "wpb": "118236", "bsz": "256", "num_updates": "851600", "lr": "1.49899e-05", "gnorm": "2.386", "loss_scale": "2", "train_wall": "59", "gb_free": "26.1", "wall": "254745"} +[2022-08-02 09:36:04,049][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 09:36:36,488][train_inner][INFO] - {"epoch": 17, "update": 16.55, "loss": "1.989", "ppl": "3.97", "wps": "394198", "ups": "3.32", "wpb": "118629", "bsz": "256", "num_updates": "851800", "lr": "1.49697e-05", "gnorm": "2.45", "loss_scale": "2", "train_wall": "60", "gb_free": "26.4", "wall": "254805"} +[2022-08-02 09:37:35,771][train_inner][INFO] - {"epoch": 17, "update": 16.554, "loss": "1.994", "ppl": "3.98", "wps": "399354", "ups": "3.37", "wpb": "118373", "bsz": "256", "num_updates": "852000", "lr": "1.49495e-05", "gnorm": "2.406", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "254864"} +[2022-08-02 09:38:35,399][train_inner][INFO] - {"epoch": 17, "update": 16.558, "loss": "1.992", "ppl": "3.98", "wps": "397687", "ups": "3.35", "wpb": "118566", "bsz": "256", "num_updates": "852200", "lr": "1.49293e-05", "gnorm": "2.379", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "254924"} +[2022-08-02 09:39:34,776][train_inner][INFO] - {"epoch": 17, "update": 16.562, "loss": "1.99", "ppl": "3.97", "wps": "400739", "ups": "3.37", "wpb": "118974", "bsz": "256", "num_updates": "852400", "lr": "1.49091e-05", "gnorm": "2.232", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "254983"} +[2022-08-02 09:40:34,243][train_inner][INFO] - {"epoch": 17, "update": 16.566, "loss": "1.992", "ppl": "3.98", "wps": "399674", "ups": "3.36", "wpb": "118836", "bsz": "256", "num_updates": "852600", "lr": "1.48889e-05", "gnorm": "2.201", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "255043"} +[2022-08-02 09:41:33,592][train_inner][INFO] - {"epoch": 17, "update": 16.57, "loss": "1.992", "ppl": "3.98", "wps": "395827", "ups": "3.37", "wpb": "117460", "bsz": "256", "num_updates": "852800", "lr": "1.48687e-05", "gnorm": "2.479", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "255102"} +[2022-08-02 09:42:32,559][train_inner][INFO] - {"epoch": 17, "update": 16.574, "loss": "1.989", "ppl": "3.97", "wps": "401663", "ups": "3.39", "wpb": "118423", "bsz": "256", "num_updates": "853000", "lr": "1.48485e-05", "gnorm": "2.441", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "255161"} +[2022-08-02 09:43:32,021][train_inner][INFO] - {"epoch": 17, "update": 16.578, "loss": "1.996", "ppl": "3.99", "wps": "397387", "ups": "3.36", "wpb": "118145", "bsz": "256", "num_updates": "853200", "lr": "1.48283e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "255220"} +[2022-08-02 09:44:31,656][train_inner][INFO] - {"epoch": 17, "update": 16.581, "loss": "1.99", "ppl": "3.97", "wps": "396031", "ups": "3.35", "wpb": "118087", "bsz": "256", "num_updates": "853400", "lr": "1.48081e-05", "gnorm": "2.68", "loss_scale": "2", "train_wall": "59", "gb_free": "26.6", "wall": "255280"} +[2022-08-02 09:45:31,083][train_inner][INFO] - {"epoch": 17, "update": 16.585, "loss": "1.993", "ppl": "3.98", "wps": "394808", "ups": "3.37", "wpb": "117310", "bsz": "256", "num_updates": "853600", "lr": "1.47879e-05", "gnorm": "2.42", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "255340"} +[2022-08-02 09:46:19,357][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 09:46:30,530][train_inner][INFO] - {"epoch": 17, "update": 16.589, "loss": "1.991", "ppl": "3.98", "wps": "396783", "ups": "3.36", "wpb": "117936", "bsz": "256", "num_updates": "853800", "lr": "1.47677e-05", "gnorm": "2.393", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "255399"} +[2022-08-02 09:47:29,603][train_inner][INFO] - {"epoch": 17, "update": 16.593, "loss": "1.992", "ppl": "3.98", "wps": "399720", "ups": "3.39", "wpb": "118063", "bsz": "256", "num_updates": "854000", "lr": "1.47475e-05", "gnorm": "2.548", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "255458"} +[2022-08-02 09:48:28,564][train_inner][INFO] - {"epoch": 17, "update": 16.597, "loss": "1.987", "ppl": "3.96", "wps": "403047", "ups": "3.39", "wpb": "118820", "bsz": "256", "num_updates": "854200", "lr": "1.47273e-05", "gnorm": "2.762", "loss_scale": "2", "train_wall": "59", "gb_free": "25.8", "wall": "255517"} +[2022-08-02 09:49:27,964][train_inner][INFO] - {"epoch": 17, "update": 16.601, "loss": "1.986", "ppl": "3.96", "wps": "398672", "ups": "3.37", "wpb": "118403", "bsz": "256", "num_updates": "854400", "lr": "1.47071e-05", "gnorm": "2.485", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "255576"} +[2022-08-02 09:50:27,291][train_inner][INFO] - {"epoch": 17, "update": 16.605, "loss": "1.988", "ppl": "3.97", "wps": "399428", "ups": "3.37", "wpb": "118484", "bsz": "256", "num_updates": "854600", "lr": "1.46869e-05", "gnorm": "2.81", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "255636"} +[2022-08-02 09:51:26,712][train_inner][INFO] - {"epoch": 17, "update": 16.609, "loss": "1.994", "ppl": "3.98", "wps": "399160", "ups": "3.37", "wpb": "118592", "bsz": "256", "num_updates": "854800", "lr": "1.46667e-05", "gnorm": "2.401", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "255695"} +[2022-08-02 09:52:26,088][train_inner][INFO] - {"epoch": 17, "update": 16.613, "loss": "1.99", "ppl": "3.97", "wps": "398617", "ups": "3.37", "wpb": "118342", "bsz": "256", "num_updates": "855000", "lr": "1.46465e-05", "gnorm": "2.271", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "255755"} +[2022-08-02 09:53:26,413][train_inner][INFO] - {"epoch": 17, "update": 16.616, "loss": "1.982", "ppl": "3.95", "wps": "392207", "ups": "3.32", "wpb": "118298", "bsz": "256", "num_updates": "855200", "lr": "1.46263e-05", "gnorm": "2.441", "loss_scale": "2", "train_wall": "60", "gb_free": "22.4", "wall": "255815"} +[2022-08-02 09:54:26,344][train_inner][INFO] - {"epoch": 17, "update": 16.62, "loss": "1.989", "ppl": "3.97", "wps": "395083", "ups": "3.34", "wpb": "118388", "bsz": "256", "num_updates": "855400", "lr": "1.46061e-05", "gnorm": "2.472", "loss_scale": "2", "train_wall": "60", "gb_free": "27.2", "wall": "255875"} +[2022-08-02 09:55:25,785][train_inner][INFO] - {"epoch": 17, "update": 16.624, "loss": "1.992", "ppl": "3.98", "wps": "398756", "ups": "3.36", "wpb": "118510", "bsz": "256", "num_updates": "855600", "lr": "1.45859e-05", "gnorm": "2.428", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "255934"} +[2022-08-02 09:56:25,200][train_inner][INFO] - {"epoch": 17, "update": 16.628, "loss": "1.985", "ppl": "3.96", "wps": "398131", "ups": "3.37", "wpb": "118275", "bsz": "256", "num_updates": "855800", "lr": "1.45657e-05", "gnorm": "2.403", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "255994"} +[2022-08-02 09:56:31,484][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 09:57:25,018][train_inner][INFO] - {"epoch": 17, "update": 16.632, "loss": "1.991", "ppl": "3.98", "wps": "394474", "ups": "3.34", "wpb": "117982", "bsz": "256", "num_updates": "856000", "lr": "1.45455e-05", "gnorm": "2.321", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "256053"} +[2022-08-02 09:58:24,358][train_inner][INFO] - {"epoch": 17, "update": 16.636, "loss": "1.987", "ppl": "3.96", "wps": "396550", "ups": "3.37", "wpb": "117656", "bsz": "256", "num_updates": "856200", "lr": "1.45253e-05", "gnorm": "2.265", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "256113"} +[2022-08-02 09:59:23,981][train_inner][INFO] - {"epoch": 17, "update": 16.64, "loss": "1.988", "ppl": "3.97", "wps": "396932", "ups": "3.35", "wpb": "118330", "bsz": "256", "num_updates": "856400", "lr": "1.45051e-05", "gnorm": "2.542", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "256172"} +[2022-08-02 10:00:23,457][train_inner][INFO] - {"epoch": 17, "update": 16.644, "loss": "1.982", "ppl": "3.95", "wps": "399712", "ups": "3.36", "wpb": "118866", "bsz": "256", "num_updates": "856600", "lr": "1.44848e-05", "gnorm": "2.416", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "256232"} +[2022-08-02 10:01:22,845][train_inner][INFO] - {"epoch": 17, "update": 16.647, "loss": "1.992", "ppl": "3.98", "wps": "396316", "ups": "3.37", "wpb": "117681", "bsz": "256", "num_updates": "856800", "lr": "1.44646e-05", "gnorm": "2.481", "loss_scale": "2", "train_wall": "59", "gb_free": "31.2", "wall": "256291"} +[2022-08-02 10:02:22,183][train_inner][INFO] - {"epoch": 17, "update": 16.651, "loss": "1.992", "ppl": "3.98", "wps": "396455", "ups": "3.37", "wpb": "117623", "bsz": "256", "num_updates": "857000", "lr": "1.44444e-05", "gnorm": "2.418", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "256351"} +[2022-08-02 10:03:21,714][train_inner][INFO] - {"epoch": 17, "update": 16.655, "loss": "1.989", "ppl": "3.97", "wps": "397698", "ups": "3.36", "wpb": "118376", "bsz": "256", "num_updates": "857200", "lr": "1.44242e-05", "gnorm": "2.394", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "256410"} +[2022-08-02 10:04:21,111][train_inner][INFO] - {"epoch": 17, "update": 16.659, "loss": "1.992", "ppl": "3.98", "wps": "396886", "ups": "3.37", "wpb": "117868", "bsz": "256", "num_updates": "857400", "lr": "1.4404e-05", "gnorm": "2.447", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "256470"} +[2022-08-02 10:05:20,427][train_inner][INFO] - {"epoch": 17, "update": 16.663, "loss": "1.986", "ppl": "3.96", "wps": "398031", "ups": "3.37", "wpb": "118049", "bsz": "256", "num_updates": "857600", "lr": "1.43838e-05", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "256529"} +[2022-08-02 10:06:20,084][train_inner][INFO] - {"epoch": 17, "update": 16.667, "loss": "1.998", "ppl": "3.99", "wps": "395350", "ups": "3.35", "wpb": "117925", "bsz": "256", "num_updates": "857800", "lr": "1.43636e-05", "gnorm": "2.438", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "256589"} +[2022-08-02 10:06:44,463][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 10:07:20,905][train_inner][INFO] - {"epoch": 17, "update": 16.671, "loss": "1.984", "ppl": "3.96", "wps": "386497", "ups": "3.29", "wpb": "117534", "bsz": "256", "num_updates": "858000", "lr": "1.43434e-05", "gnorm": "2.475", "loss_scale": "2", "train_wall": "60", "gb_free": "25.4", "wall": "256649"} +[2022-08-02 10:08:21,681][train_inner][INFO] - {"epoch": 17, "update": 16.675, "loss": "1.987", "ppl": "3.96", "wps": "389665", "ups": "3.29", "wpb": "118410", "bsz": "256", "num_updates": "858200", "lr": "1.43232e-05", "gnorm": "2.38", "loss_scale": "2", "train_wall": "60", "gb_free": "27", "wall": "256710"} +[2022-08-02 10:09:21,252][train_inner][INFO] - {"epoch": 17, "update": 16.679, "loss": "1.991", "ppl": "3.98", "wps": "395910", "ups": "3.36", "wpb": "117923", "bsz": "256", "num_updates": "858400", "lr": "1.4303e-05", "gnorm": "2.323", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "256770"} +[2022-08-02 10:10:20,823][train_inner][INFO] - {"epoch": 17, "update": 16.682, "loss": "1.99", "ppl": "3.97", "wps": "398243", "ups": "3.36", "wpb": "118619", "bsz": "256", "num_updates": "858600", "lr": "1.42828e-05", "gnorm": "2.584", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "256829"} +[2022-08-02 10:11:20,440][train_inner][INFO] - {"epoch": 17, "update": 16.686, "loss": "1.994", "ppl": "3.98", "wps": "397760", "ups": "3.35", "wpb": "118566", "bsz": "256", "num_updates": "858800", "lr": "1.42626e-05", "gnorm": "2.387", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "256889"} +[2022-08-02 10:12:19,961][train_inner][INFO] - {"epoch": 17, "update": 16.69, "loss": "1.984", "ppl": "3.96", "wps": "399495", "ups": "3.36", "wpb": "118891", "bsz": "256", "num_updates": "859000", "lr": "1.42424e-05", "gnorm": "2.462", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "256948"} +[2022-08-02 10:13:19,617][train_inner][INFO] - {"epoch": 17, "update": 16.694, "loss": "1.991", "ppl": "3.97", "wps": "396651", "ups": "3.35", "wpb": "118312", "bsz": "256", "num_updates": "859200", "lr": "1.42222e-05", "gnorm": "2.458", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "257008"} +[2022-08-02 10:14:18,983][train_inner][INFO] - {"epoch": 17, "update": 16.698, "loss": "1.994", "ppl": "3.98", "wps": "398669", "ups": "3.37", "wpb": "118335", "bsz": "256", "num_updates": "859400", "lr": "1.4202e-05", "gnorm": "2.431", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "257067"} +[2022-08-02 10:15:18,522][train_inner][INFO] - {"epoch": 17, "update": 16.702, "loss": "1.995", "ppl": "3.98", "wps": "397389", "ups": "3.36", "wpb": "118300", "bsz": "256", "num_updates": "859600", "lr": "1.41818e-05", "gnorm": "2.551", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "257127"} +[2022-08-02 10:16:17,997][train_inner][INFO] - {"epoch": 17, "update": 16.706, "loss": "1.988", "ppl": "3.97", "wps": "396524", "ups": "3.36", "wpb": "117915", "bsz": "256", "num_updates": "859800", "lr": "1.41616e-05", "gnorm": "2.677", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "257186"} +[2022-08-02 10:16:56,855][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 10:17:17,688][train_inner][INFO] - {"epoch": 17, "update": 16.71, "loss": "1.991", "ppl": "3.97", "wps": "396782", "ups": "3.35", "wpb": "118421", "bsz": "256", "num_updates": "860000", "lr": "1.41414e-05", "gnorm": "2.526", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "257246"} +[2022-08-02 10:18:17,325][train_inner][INFO] - {"epoch": 17, "update": 16.714, "loss": "1.993", "ppl": "3.98", "wps": "398035", "ups": "3.35", "wpb": "118687", "bsz": "256", "num_updates": "860200", "lr": "1.41212e-05", "gnorm": "2.334", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "257306"} +[2022-08-02 10:19:16,895][train_inner][INFO] - {"epoch": 17, "update": 16.717, "loss": "1.984", "ppl": "3.96", "wps": "397812", "ups": "3.36", "wpb": "118489", "bsz": "256", "num_updates": "860400", "lr": "1.4101e-05", "gnorm": "2.644", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "257365"} +[2022-08-02 10:20:16,323][train_inner][INFO] - {"epoch": 17, "update": 16.721, "loss": "1.989", "ppl": "3.97", "wps": "396368", "ups": "3.37", "wpb": "117776", "bsz": "256", "num_updates": "860600", "lr": "1.40808e-05", "gnorm": "2.566", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "257425"} +[2022-08-02 10:21:15,555][train_inner][INFO] - {"epoch": 17, "update": 16.725, "loss": "1.994", "ppl": "3.98", "wps": "397582", "ups": "3.38", "wpb": "117745", "bsz": "256", "num_updates": "860800", "lr": "1.40606e-05", "gnorm": "2.448", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "257484"} +[2022-08-02 10:22:14,991][train_inner][INFO] - {"epoch": 17, "update": 16.729, "loss": "1.983", "ppl": "3.95", "wps": "397812", "ups": "3.36", "wpb": "118223", "bsz": "256", "num_updates": "861000", "lr": "1.40404e-05", "gnorm": "2.501", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "257543"} +[2022-08-02 10:23:14,790][train_inner][INFO] - {"epoch": 17, "update": 16.733, "loss": "1.986", "ppl": "3.96", "wps": "397473", "ups": "3.34", "wpb": "118841", "bsz": "256", "num_updates": "861200", "lr": "1.40202e-05", "gnorm": "2.462", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "257603"} +[2022-08-02 10:24:13,842][train_inner][INFO] - {"epoch": 17, "update": 16.737, "loss": "1.992", "ppl": "3.98", "wps": "396858", "ups": "3.39", "wpb": "117176", "bsz": "256", "num_updates": "861400", "lr": "1.4e-05", "gnorm": "2.489", "loss_scale": "2", "train_wall": "59", "gb_free": "32.9", "wall": "257662"} +[2022-08-02 10:25:14,236][train_inner][INFO] - {"epoch": 17, "update": 16.741, "loss": "1.985", "ppl": "3.96", "wps": "392960", "ups": "3.31", "wpb": "118661", "bsz": "256", "num_updates": "861600", "lr": "1.39798e-05", "gnorm": "2.48", "loss_scale": "2", "train_wall": "60", "gb_free": "24.3", "wall": "257723"} +[2022-08-02 10:26:13,680][train_inner][INFO] - {"epoch": 17, "update": 16.745, "loss": "1.982", "ppl": "3.95", "wps": "400776", "ups": "3.36", "wpb": "119118", "bsz": "256", "num_updates": "861800", "lr": "1.39596e-05", "gnorm": "2.314", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "257782"} +[2022-08-02 10:27:12,018][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 10:27:13,531][train_inner][INFO] - {"epoch": 17, "update": 16.749, "loss": "1.986", "ppl": "3.96", "wps": "396559", "ups": "3.34", "wpb": "118671", "bsz": "256", "num_updates": "862000", "lr": "1.39394e-05", "gnorm": "2.377", "loss_scale": "2", "train_wall": "60", "gb_free": "25.2", "wall": "257842"} +[2022-08-02 10:28:12,818][train_inner][INFO] - {"epoch": 17, "update": 16.752, "loss": "1.985", "ppl": "3.96", "wps": "399104", "ups": "3.37", "wpb": "118308", "bsz": "256", "num_updates": "862200", "lr": "1.39192e-05", "gnorm": "2.578", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "257901"} +[2022-08-02 10:29:12,142][train_inner][INFO] - {"epoch": 17, "update": 16.756, "loss": "1.983", "ppl": "3.95", "wps": "398512", "ups": "3.37", "wpb": "118206", "bsz": "256", "num_updates": "862400", "lr": "1.3899e-05", "gnorm": "2.517", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "257961"} +[2022-08-02 10:30:11,505][train_inner][INFO] - {"epoch": 17, "update": 16.76, "loss": "1.987", "ppl": "3.96", "wps": "398136", "ups": "3.37", "wpb": "118173", "bsz": "256", "num_updates": "862600", "lr": "1.38788e-05", "gnorm": "2.322", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "258020"} +[2022-08-02 10:31:12,203][train_inner][INFO] - {"epoch": 17, "update": 16.764, "loss": "1.989", "ppl": "3.97", "wps": "388942", "ups": "3.3", "wpb": "118039", "bsz": "256", "num_updates": "862800", "lr": "1.38586e-05", "gnorm": "2.564", "loss_scale": "2", "train_wall": "60", "gb_free": "21.8", "wall": "258081"} +[2022-08-02 10:32:11,790][train_inner][INFO] - {"epoch": 17, "update": 16.768, "loss": "1.987", "ppl": "3.96", "wps": "398192", "ups": "3.36", "wpb": "118635", "bsz": "256", "num_updates": "863000", "lr": "1.38384e-05", "gnorm": "2.365", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "258140"} +[2022-08-02 10:33:11,367][train_inner][INFO] - {"epoch": 17, "update": 16.772, "loss": "1.99", "ppl": "3.97", "wps": "398018", "ups": "3.36", "wpb": "118564", "bsz": "256", "num_updates": "863200", "lr": "1.38182e-05", "gnorm": "2.372", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "258200"} +[2022-08-02 10:34:11,036][train_inner][INFO] - {"epoch": 17, "update": 16.776, "loss": "1.99", "ppl": "3.97", "wps": "396767", "ups": "3.35", "wpb": "118373", "bsz": "256", "num_updates": "863400", "lr": "1.3798e-05", "gnorm": "2.341", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "258259"} +[2022-08-02 10:35:11,701][train_inner][INFO] - {"epoch": 17, "update": 16.78, "loss": "1.991", "ppl": "3.97", "wps": "389588", "ups": "3.3", "wpb": "118171", "bsz": "256", "num_updates": "863600", "lr": "1.37778e-05", "gnorm": "2.354", "loss_scale": "2", "train_wall": "60", "gb_free": "22.4", "wall": "258320"} +[2022-08-02 10:36:11,310][train_inner][INFO] - {"epoch": 17, "update": 16.784, "loss": "1.986", "ppl": "3.96", "wps": "395712", "ups": "3.36", "wpb": "117938", "bsz": "256", "num_updates": "863800", "lr": "1.37576e-05", "gnorm": "2.392", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "258380"} +[2022-08-02 10:37:10,657][train_inner][INFO] - {"epoch": 17, "update": 16.787, "loss": "1.983", "ppl": "3.95", "wps": "400861", "ups": "3.37", "wpb": "118949", "bsz": "256", "num_updates": "864000", "lr": "1.37374e-05", "gnorm": "2.305", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "258439"} +[2022-08-02 10:37:31,422][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 10:38:10,418][train_inner][INFO] - {"epoch": 17, "update": 16.791, "loss": "1.993", "ppl": "3.98", "wps": "395212", "ups": "3.35", "wpb": "118091", "bsz": "256", "num_updates": "864200", "lr": "1.37172e-05", "gnorm": "2.375", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "258499"} +[2022-08-02 10:39:09,272][train_inner][INFO] - {"epoch": 17, "update": 16.795, "loss": "1.992", "ppl": "3.98", "wps": "398625", "ups": "3.4", "wpb": "117302", "bsz": "256", "num_updates": "864400", "lr": "1.3697e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "258558"} +[2022-08-02 10:40:09,747][train_inner][INFO] - {"epoch": 17, "update": 16.799, "loss": "1.984", "ppl": "3.96", "wps": "391650", "ups": "3.31", "wpb": "118424", "bsz": "256", "num_updates": "864600", "lr": "1.36768e-05", "gnorm": "2.289", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "258618"} +[2022-08-02 10:41:09,541][train_inner][INFO] - {"epoch": 17, "update": 16.803, "loss": "1.988", "ppl": "3.97", "wps": "396475", "ups": "3.34", "wpb": "118534", "bsz": "256", "num_updates": "864800", "lr": "1.36566e-05", "gnorm": "2.391", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "258678"} +[2022-08-02 10:42:08,833][train_inner][INFO] - {"epoch": 17, "update": 16.807, "loss": "1.992", "ppl": "3.98", "wps": "398575", "ups": "3.37", "wpb": "118162", "bsz": "256", "num_updates": "865000", "lr": "1.36364e-05", "gnorm": "2.464", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "258737"} +[2022-08-02 10:43:08,542][train_inner][INFO] - {"epoch": 17, "update": 16.811, "loss": "1.982", "ppl": "3.95", "wps": "396356", "ups": "3.35", "wpb": "118329", "bsz": "256", "num_updates": "865200", "lr": "1.36162e-05", "gnorm": "2.454", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "258797"} +[2022-08-02 10:44:09,148][train_inner][INFO] - {"epoch": 17, "update": 16.815, "loss": "1.988", "ppl": "3.97", "wps": "392011", "ups": "3.3", "wpb": "118789", "bsz": "256", "num_updates": "865400", "lr": "1.3596e-05", "gnorm": "2.476", "loss_scale": "2", "train_wall": "60", "gb_free": "26.2", "wall": "258858"} +[2022-08-02 10:45:08,425][train_inner][INFO] - {"epoch": 17, "update": 16.818, "loss": "1.989", "ppl": "3.97", "wps": "400577", "ups": "3.37", "wpb": "118725", "bsz": "256", "num_updates": "865600", "lr": "1.35758e-05", "gnorm": "2.528", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "258917"} +[2022-08-02 10:46:07,723][train_inner][INFO] - {"epoch": 17, "update": 16.822, "loss": "1.993", "ppl": "3.98", "wps": "397733", "ups": "3.37", "wpb": "117924", "bsz": "256", "num_updates": "865800", "lr": "1.35556e-05", "gnorm": "2.354", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "258976"} +[2022-08-02 10:47:07,200][train_inner][INFO] - {"epoch": 17, "update": 16.826, "loss": "1.982", "ppl": "3.95", "wps": "397941", "ups": "3.36", "wpb": "118340", "bsz": "256", "num_updates": "866000", "lr": "1.35354e-05", "gnorm": "2.553", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "259036"} +[2022-08-02 10:47:45,911][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 10:48:06,708][train_inner][INFO] - {"epoch": 17, "update": 16.83, "loss": "1.989", "ppl": "3.97", "wps": "395910", "ups": "3.36", "wpb": "117799", "bsz": "256", "num_updates": "866200", "lr": "1.35152e-05", "gnorm": "2.613", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "259095"} +[2022-08-02 10:49:06,367][train_inner][INFO] - {"epoch": 17, "update": 16.834, "loss": "1.984", "ppl": "3.96", "wps": "396389", "ups": "3.35", "wpb": "118240", "bsz": "256", "num_updates": "866400", "lr": "1.34949e-05", "gnorm": "2.549", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "259155"} +[2022-08-02 10:50:05,795][train_inner][INFO] - {"epoch": 17, "update": 16.838, "loss": "1.983", "ppl": "3.95", "wps": "398963", "ups": "3.37", "wpb": "118547", "bsz": "256", "num_updates": "866600", "lr": "1.34747e-05", "gnorm": "2.673", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "259214"} +[2022-08-02 10:51:04,926][train_inner][INFO] - {"epoch": 17, "update": 16.842, "loss": "1.988", "ppl": "3.97", "wps": "400144", "ups": "3.38", "wpb": "118304", "bsz": "256", "num_updates": "866800", "lr": "1.34545e-05", "gnorm": "2.861", "loss_scale": "2", "train_wall": "59", "gb_free": "28.5", "wall": "259273"} +[2022-08-02 10:52:05,582][train_inner][INFO] - {"epoch": 17, "update": 16.846, "loss": "1.987", "ppl": "3.96", "wps": "389777", "ups": "3.3", "wpb": "118210", "bsz": "256", "num_updates": "867000", "lr": "1.34343e-05", "gnorm": "2.459", "loss_scale": "2", "train_wall": "60", "gb_free": "21.8", "wall": "259334"} +[2022-08-02 10:53:04,815][train_inner][INFO] - {"epoch": 17, "update": 16.85, "loss": "1.987", "ppl": "3.97", "wps": "399730", "ups": "3.38", "wpb": "118387", "bsz": "256", "num_updates": "867200", "lr": "1.34141e-05", "gnorm": "2.497", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "259393"} +[2022-08-02 10:54:03,947][train_inner][INFO] - {"epoch": 17, "update": 16.853, "loss": "1.987", "ppl": "3.97", "wps": "399926", "ups": "3.38", "wpb": "118240", "bsz": "256", "num_updates": "867400", "lr": "1.33939e-05", "gnorm": "2.567", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "259452"} +[2022-08-02 10:55:03,777][train_inner][INFO] - {"epoch": 17, "update": 16.857, "loss": "1.986", "ppl": "3.96", "wps": "399470", "ups": "3.34", "wpb": "119500", "bsz": "256", "num_updates": "867600", "lr": "1.33737e-05", "gnorm": "2.349", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "259512"} +[2022-08-02 10:56:03,269][train_inner][INFO] - {"epoch": 17, "update": 16.861, "loss": "1.983", "ppl": "3.95", "wps": "398080", "ups": "3.36", "wpb": "118411", "bsz": "255.9", "num_updates": "867800", "lr": "1.33535e-05", "gnorm": "2.392", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "259572"} +[2022-08-02 10:57:02,729][train_inner][INFO] - {"epoch": 17, "update": 16.865, "loss": "1.992", "ppl": "3.98", "wps": "397093", "ups": "3.36", "wpb": "118056", "bsz": "256", "num_updates": "868000", "lr": "1.33333e-05", "gnorm": "2.497", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "259631"} +[2022-08-02 10:58:00,550][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 10:58:02,647][train_inner][INFO] - {"epoch": 17, "update": 16.869, "loss": "1.984", "ppl": "3.96", "wps": "396107", "ups": "3.34", "wpb": "118668", "bsz": "256", "num_updates": "868200", "lr": "1.33131e-05", "gnorm": "2.553", "loss_scale": "2", "train_wall": "60", "gb_free": "22", "wall": "259691"} +[2022-08-02 10:59:01,695][train_inner][INFO] - {"epoch": 17, "update": 16.873, "loss": "1.983", "ppl": "3.95", "wps": "399397", "ups": "3.39", "wpb": "117916", "bsz": "256", "num_updates": "868400", "lr": "1.32929e-05", "gnorm": "2.473", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "259750"} +[2022-08-02 11:00:01,280][train_inner][INFO] - {"epoch": 17, "update": 16.877, "loss": "1.984", "ppl": "3.96", "wps": "395919", "ups": "3.36", "wpb": "117955", "bsz": "256", "num_updates": "868600", "lr": "1.32727e-05", "gnorm": "2.644", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "259810"} +[2022-08-02 11:01:00,674][train_inner][INFO] - {"epoch": 17, "update": 16.881, "loss": "1.984", "ppl": "3.96", "wps": "399128", "ups": "3.37", "wpb": "118528", "bsz": "256", "num_updates": "868800", "lr": "1.32525e-05", "gnorm": "2.431", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "259869"} +[2022-08-02 11:02:00,332][train_inner][INFO] - {"epoch": 17, "update": 16.885, "loss": "1.985", "ppl": "3.96", "wps": "396744", "ups": "3.35", "wpb": "118345", "bsz": "256", "num_updates": "869000", "lr": "1.32323e-05", "gnorm": "2.491", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "259929"} +[2022-08-02 11:02:59,637][train_inner][INFO] - {"epoch": 17, "update": 16.888, "loss": "1.979", "ppl": "3.94", "wps": "400194", "ups": "3.37", "wpb": "118666", "bsz": "256", "num_updates": "869200", "lr": "1.32121e-05", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "259988"} +[2022-08-02 11:04:00,391][train_inner][INFO] - {"epoch": 17, "update": 16.892, "loss": "1.99", "ppl": "3.97", "wps": "388396", "ups": "3.29", "wpb": "117983", "bsz": "256", "num_updates": "869400", "lr": "1.31919e-05", "gnorm": "2.443", "loss_scale": "2", "train_wall": "60", "gb_free": "23.2", "wall": "260049"} +[2022-08-02 11:05:00,003][train_inner][INFO] - {"epoch": 17, "update": 16.896, "loss": "1.985", "ppl": "3.96", "wps": "398533", "ups": "3.36", "wpb": "118786", "bsz": "256", "num_updates": "869600", "lr": "1.31717e-05", "gnorm": "2.343", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "260108"} +[2022-08-02 11:05:59,607][train_inner][INFO] - {"epoch": 17, "update": 16.9, "loss": "1.979", "ppl": "3.94", "wps": "398812", "ups": "3.36", "wpb": "118852", "bsz": "256", "num_updates": "869800", "lr": "1.31515e-05", "gnorm": "2.29", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "260168"} +[2022-08-02 11:06:59,445][train_inner][INFO] - {"epoch": 17, "update": 16.904, "loss": "1.982", "ppl": "3.95", "wps": "396376", "ups": "3.34", "wpb": "118591", "bsz": "256", "num_updates": "870000", "lr": "1.31313e-05", "gnorm": "2.284", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "260228"} +[2022-08-02 11:07:59,310][train_inner][INFO] - {"epoch": 17, "update": 16.908, "loss": "1.984", "ppl": "3.96", "wps": "396407", "ups": "3.34", "wpb": "118653", "bsz": "256", "num_updates": "870200", "lr": "1.31111e-05", "gnorm": "2.391", "loss_scale": "2", "train_wall": "60", "gb_free": "25.4", "wall": "260288"} +[2022-08-02 11:08:31,895][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 11:08:59,169][train_inner][INFO] - {"epoch": 17, "update": 16.912, "loss": "1.984", "ppl": "3.96", "wps": "395101", "ups": "3.34", "wpb": "118253", "bsz": "256", "num_updates": "870400", "lr": "1.30909e-05", "gnorm": "2.196", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "260348"} +[2022-08-02 11:10:00,103][train_inner][INFO] - {"epoch": 17, "update": 16.916, "loss": "1.985", "ppl": "3.96", "wps": "387770", "ups": "3.28", "wpb": "118140", "bsz": "256", "num_updates": "870600", "lr": "1.30707e-05", "gnorm": "2.421", "loss_scale": "2", "train_wall": "61", "gb_free": "21.9", "wall": "260409"} +[2022-08-02 11:10:59,326][train_inner][INFO] - {"epoch": 17, "update": 16.92, "loss": "1.986", "ppl": "3.96", "wps": "397788", "ups": "3.38", "wpb": "117791", "bsz": "256", "num_updates": "870800", "lr": "1.30505e-05", "gnorm": "2.557", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "260468"} +[2022-08-02 11:11:59,300][train_inner][INFO] - {"epoch": 17, "update": 16.923, "loss": "1.985", "ppl": "3.96", "wps": "394299", "ups": "3.33", "wpb": "118237", "bsz": "256", "num_updates": "871000", "lr": "1.30303e-05", "gnorm": "2.392", "loss_scale": "2", "train_wall": "60", "gb_free": "21.5", "wall": "260528"} +[2022-08-02 11:12:58,805][train_inner][INFO] - {"epoch": 17, "update": 16.927, "loss": "1.992", "ppl": "3.98", "wps": "396798", "ups": "3.36", "wpb": "118056", "bsz": "256", "num_updates": "871200", "lr": "1.30101e-05", "gnorm": "2.28", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "260587"} +[2022-08-02 11:13:58,502][train_inner][INFO] - {"epoch": 17, "update": 16.931, "loss": "1.983", "ppl": "3.95", "wps": "395460", "ups": "3.35", "wpb": "118038", "bsz": "256", "num_updates": "871400", "lr": "1.29899e-05", "gnorm": "2.574", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "260647"} +[2022-08-02 11:14:57,866][train_inner][INFO] - {"epoch": 17, "update": 16.935, "loss": "1.984", "ppl": "3.95", "wps": "398256", "ups": "3.37", "wpb": "118209", "bsz": "256", "num_updates": "871600", "lr": "1.29697e-05", "gnorm": "2.461", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "260706"} +[2022-08-02 11:15:57,285][train_inner][INFO] - {"epoch": 17, "update": 16.939, "loss": "1.986", "ppl": "3.96", "wps": "397274", "ups": "3.37", "wpb": "118028", "bsz": "256", "num_updates": "871800", "lr": "1.29495e-05", "gnorm": "2.392", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "260766"} +[2022-08-02 11:16:56,701][train_inner][INFO] - {"epoch": 17, "update": 16.943, "loss": "1.984", "ppl": "3.96", "wps": "398055", "ups": "3.37", "wpb": "118253", "bsz": "256", "num_updates": "872000", "lr": "1.29293e-05", "gnorm": "2.368", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "260825"} +[2022-08-02 11:17:56,367][train_inner][INFO] - {"epoch": 17, "update": 16.947, "loss": "1.983", "ppl": "3.95", "wps": "397166", "ups": "3.35", "wpb": "118486", "bsz": "256", "num_updates": "872200", "lr": "1.29091e-05", "gnorm": "2.667", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "260885"} +[2022-08-02 11:18:42,768][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 11:18:55,974][train_inner][INFO] - {"epoch": 17, "update": 16.951, "loss": "1.986", "ppl": "3.96", "wps": "398242", "ups": "3.36", "wpb": "118688", "bsz": "256", "num_updates": "872400", "lr": "1.28889e-05", "gnorm": "2.432", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "260944"} +[2022-08-02 11:19:54,950][train_inner][INFO] - {"epoch": 17, "update": 16.955, "loss": "1.991", "ppl": "3.98", "wps": "398097", "ups": "3.39", "wpb": "117391", "bsz": "256", "num_updates": "872600", "lr": "1.28687e-05", "gnorm": "2.572", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "261003"} +[2022-08-02 11:20:54,465][train_inner][INFO] - {"epoch": 17, "update": 16.958, "loss": "1.987", "ppl": "3.96", "wps": "397343", "ups": "3.36", "wpb": "118239", "bsz": "256", "num_updates": "872800", "lr": "1.28485e-05", "gnorm": "2.268", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "261063"} +[2022-08-02 11:21:53,942][train_inner][INFO] - {"epoch": 17, "update": 16.962, "loss": "1.991", "ppl": "3.97", "wps": "397443", "ups": "3.36", "wpb": "118193", "bsz": "256", "num_updates": "873000", "lr": "1.28283e-05", "gnorm": "2.527", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "261122"} +[2022-08-02 11:22:53,276][train_inner][INFO] - {"epoch": 17, "update": 16.966, "loss": "1.984", "ppl": "3.96", "wps": "398813", "ups": "3.37", "wpb": "118314", "bsz": "256", "num_updates": "873200", "lr": "1.28081e-05", "gnorm": "2.446", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "261182"} +[2022-08-02 11:23:53,712][train_inner][INFO] - {"epoch": 17, "update": 16.97, "loss": "1.986", "ppl": "3.96", "wps": "392737", "ups": "3.31", "wpb": "118678", "bsz": "256", "num_updates": "873400", "lr": "1.27879e-05", "gnorm": "2.326", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "261242"} +[2022-08-02 11:24:53,096][train_inner][INFO] - {"epoch": 17, "update": 16.974, "loss": "1.985", "ppl": "3.96", "wps": "399198", "ups": "3.37", "wpb": "118528", "bsz": "256", "num_updates": "873600", "lr": "1.27677e-05", "gnorm": "2.416", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "261302"} +[2022-08-02 11:25:52,223][train_inner][INFO] - {"epoch": 17, "update": 16.978, "loss": "1.985", "ppl": "3.96", "wps": "400421", "ups": "3.38", "wpb": "118378", "bsz": "256", "num_updates": "873800", "lr": "1.27475e-05", "gnorm": "2.329", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "261361"} +[2022-08-02 11:26:51,582][train_inner][INFO] - {"epoch": 17, "update": 16.982, "loss": "1.984", "ppl": "3.96", "wps": "398539", "ups": "3.37", "wpb": "118284", "bsz": "256", "num_updates": "874000", "lr": "1.27273e-05", "gnorm": "2.545", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "261420"} +[2022-08-02 11:27:51,133][train_inner][INFO] - {"epoch": 17, "update": 16.986, "loss": "1.981", "ppl": "3.95", "wps": "397692", "ups": "3.36", "wpb": "118414", "bsz": "256", "num_updates": "874200", "lr": "1.27071e-05", "gnorm": "2.442", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "261480"} +[2022-08-02 11:28:50,717][train_inner][INFO] - {"epoch": 17, "update": 16.989, "loss": "1.979", "ppl": "3.94", "wps": "397481", "ups": "3.36", "wpb": "118416", "bsz": "256", "num_updates": "874400", "lr": "1.26869e-05", "gnorm": "2.439", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "261539"} +[2022-08-02 11:28:53,107][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 11:29:50,012][train_inner][INFO] - {"epoch": 17, "update": 16.993, "loss": "1.985", "ppl": "3.96", "wps": "399504", "ups": "3.37", "wpb": "118444", "bsz": "256", "num_updates": "874600", "lr": "1.26667e-05", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "261598"} +[2022-08-02 11:30:49,611][train_inner][INFO] - {"epoch": 17, "update": 16.997, "loss": "1.985", "ppl": "3.96", "wps": "394250", "ups": "3.36", "wpb": "117484", "bsz": "256", "num_updates": "874800", "lr": "1.26465e-05", "gnorm": "2.517", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "261658"} +[2022-08-02 11:31:32,692][fairseq_cli.train][INFO] - end of epoch 17 (average epoch stats below) +[2022-08-02 11:31:32,692][train][INFO] - {"epoch": 17, "train_loss": "1.992", "train_ppl": "3.98", "train_wps": "395714", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "874941", "train_lr": "1.26322e-05", "train_gnorm": "2.441", "train_loss_scale": "2", "train_train_wall": "15236", "train_gb_free": "22", "train_wall": "261701"} +[2022-08-02 11:31:32,801][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 11:31:32,804][fairseq.trainer][INFO] - begin training epoch 18 +[2022-08-02 11:31:32,805][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 11:32:01,572][train_inner][INFO] - {"epoch": 18, "update": 17.001, "loss": "1.981", "ppl": "3.95", "wps": "328596", "ups": "2.78", "wpb": "118228", "bsz": "255.4", "num_updates": "875000", "lr": "1.26263e-05", "gnorm": "2.28", "loss_scale": "2", "train_wall": "62", "gb_free": "21.9", "wall": "261730"} +[2022-08-02 11:33:01,000][train_inner][INFO] - {"epoch": 18, "update": 17.005, "loss": "1.985", "ppl": "3.96", "wps": "396537", "ups": "3.37", "wpb": "117827", "bsz": "256", "num_updates": "875200", "lr": "1.26061e-05", "gnorm": "2.649", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "261789"} +[2022-08-02 11:34:00,592][train_inner][INFO] - {"epoch": 18, "update": 17.009, "loss": "1.985", "ppl": "3.96", "wps": "398961", "ups": "3.36", "wpb": "118873", "bsz": "256", "num_updates": "875400", "lr": "1.25859e-05", "gnorm": "2.577", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "261849"} +[2022-08-02 11:35:00,245][train_inner][INFO] - {"epoch": 18, "update": 17.013, "loss": "1.981", "ppl": "3.95", "wps": "397367", "ups": "3.35", "wpb": "118520", "bsz": "256", "num_updates": "875600", "lr": "1.25657e-05", "gnorm": "2.317", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "261909"} +[2022-08-02 11:35:59,579][train_inner][INFO] - {"epoch": 18, "update": 17.017, "loss": "1.98", "ppl": "3.94", "wps": "399951", "ups": "3.37", "wpb": "118654", "bsz": "256", "num_updates": "875800", "lr": "1.25455e-05", "gnorm": "2.426", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "261968"} +[2022-08-02 11:36:58,917][train_inner][INFO] - {"epoch": 18, "update": 17.021, "loss": "1.983", "ppl": "3.95", "wps": "398392", "ups": "3.37", "wpb": "118198", "bsz": "256", "num_updates": "876000", "lr": "1.25253e-05", "gnorm": "2.354", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "262027"} +[2022-08-02 11:37:58,744][train_inner][INFO] - {"epoch": 18, "update": 17.024, "loss": "1.979", "ppl": "3.94", "wps": "396867", "ups": "3.34", "wpb": "118716", "bsz": "256", "num_updates": "876200", "lr": "1.25051e-05", "gnorm": "2.256", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "262087"} +[2022-08-02 11:38:58,580][train_inner][INFO] - {"epoch": 18, "update": 17.028, "loss": "1.977", "ppl": "3.94", "wps": "398988", "ups": "3.34", "wpb": "119368", "bsz": "256", "num_updates": "876400", "lr": "1.24848e-05", "gnorm": "2.354", "loss_scale": "2", "train_wall": "60", "gb_free": "24.5", "wall": "262147"} +[2022-08-02 11:39:16,358][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 11:39:58,083][train_inner][INFO] - {"epoch": 18, "update": 17.032, "loss": "1.983", "ppl": "3.95", "wps": "397923", "ups": "3.36", "wpb": "118387", "bsz": "256", "num_updates": "876600", "lr": "1.24646e-05", "gnorm": "2.644", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "262207"} +[2022-08-02 11:40:57,886][train_inner][INFO] - {"epoch": 18, "update": 17.036, "loss": "1.984", "ppl": "3.96", "wps": "396708", "ups": "3.34", "wpb": "118621", "bsz": "256", "num_updates": "876800", "lr": "1.24444e-05", "gnorm": "2.542", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "262266"} +[2022-08-02 11:41:57,171][train_inner][INFO] - {"epoch": 18, "update": 17.04, "loss": "1.985", "ppl": "3.96", "wps": "398368", "ups": "3.37", "wpb": "118085", "bsz": "256", "num_updates": "877000", "lr": "1.24242e-05", "gnorm": "2.689", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "262326"} +[2022-08-02 11:42:56,440][train_inner][INFO] - {"epoch": 18, "update": 17.044, "loss": "1.986", "ppl": "3.96", "wps": "395984", "ups": "3.37", "wpb": "117348", "bsz": "256", "num_updates": "877200", "lr": "1.2404e-05", "gnorm": "2.785", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "262385"} +[2022-08-02 11:43:57,045][train_inner][INFO] - {"epoch": 18, "update": 17.048, "loss": "1.979", "ppl": "3.94", "wps": "388520", "ups": "3.3", "wpb": "117730", "bsz": "256", "num_updates": "877400", "lr": "1.23838e-05", "gnorm": "2.621", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "262445"} +[2022-08-02 11:44:56,438][train_inner][INFO] - {"epoch": 18, "update": 17.052, "loss": "1.983", "ppl": "3.95", "wps": "398845", "ups": "3.37", "wpb": "118442", "bsz": "256", "num_updates": "877600", "lr": "1.23636e-05", "gnorm": "2.669", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "262505"} +[2022-08-02 11:45:56,206][train_inner][INFO] - {"epoch": 18, "update": 17.056, "loss": "1.985", "ppl": "3.96", "wps": "396771", "ups": "3.35", "wpb": "118570", "bsz": "256", "num_updates": "877800", "lr": "1.23434e-05", "gnorm": "2.376", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "262565"} +[2022-08-02 11:46:55,715][train_inner][INFO] - {"epoch": 18, "update": 17.059, "loss": "1.981", "ppl": "3.95", "wps": "399163", "ups": "3.36", "wpb": "118768", "bsz": "256", "num_updates": "878000", "lr": "1.23232e-05", "gnorm": "2.455", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "262624"} +[2022-08-02 11:47:55,026][train_inner][INFO] - {"epoch": 18, "update": 17.063, "loss": "1.985", "ppl": "3.96", "wps": "398011", "ups": "3.37", "wpb": "118032", "bsz": "256", "num_updates": "878200", "lr": "1.2303e-05", "gnorm": "2.46", "loss_scale": "2", "train_wall": "59", "gb_free": "28.3", "wall": "262683"} +[2022-08-02 11:48:54,911][train_inner][INFO] - {"epoch": 18, "update": 17.067, "loss": "1.981", "ppl": "3.95", "wps": "396238", "ups": "3.34", "wpb": "118642", "bsz": "256", "num_updates": "878400", "lr": "1.22828e-05", "gnorm": "2.503", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "262743"} +[2022-08-02 11:49:35,690][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 11:49:54,722][train_inner][INFO] - {"epoch": 18, "update": 17.071, "loss": "1.98", "ppl": "3.95", "wps": "394997", "ups": "3.34", "wpb": "118126", "bsz": "256", "num_updates": "878600", "lr": "1.22626e-05", "gnorm": "2.33", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "262803"} +[2022-08-02 11:50:54,015][train_inner][INFO] - {"epoch": 18, "update": 17.075, "loss": "1.981", "ppl": "3.95", "wps": "399532", "ups": "3.37", "wpb": "118447", "bsz": "256", "num_updates": "878800", "lr": "1.22424e-05", "gnorm": "2.482", "loss_scale": "2", "train_wall": "59", "gb_free": "27.2", "wall": "262862"} +[2022-08-02 11:51:53,501][train_inner][INFO] - {"epoch": 18, "update": 17.079, "loss": "1.983", "ppl": "3.95", "wps": "398578", "ups": "3.36", "wpb": "118548", "bsz": "256", "num_updates": "879000", "lr": "1.22222e-05", "gnorm": "2.51", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "262922"} +[2022-08-02 11:52:53,016][train_inner][INFO] - {"epoch": 18, "update": 17.083, "loss": "1.977", "ppl": "3.94", "wps": "397971", "ups": "3.36", "wpb": "118426", "bsz": "256", "num_updates": "879200", "lr": "1.2202e-05", "gnorm": "2.324", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "262981"} +[2022-08-02 11:53:52,489][train_inner][INFO] - {"epoch": 18, "update": 17.087, "loss": "1.978", "ppl": "3.94", "wps": "399713", "ups": "3.36", "wpb": "118859", "bsz": "256", "num_updates": "879400", "lr": "1.21818e-05", "gnorm": "2.417", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "263041"} +[2022-08-02 11:54:51,844][train_inner][INFO] - {"epoch": 18, "update": 17.091, "loss": "1.975", "ppl": "3.93", "wps": "399758", "ups": "3.37", "wpb": "118636", "bsz": "256", "num_updates": "879600", "lr": "1.21616e-05", "gnorm": "2.344", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "263100"} +[2022-08-02 11:55:51,239][train_inner][INFO] - {"epoch": 18, "update": 17.094, "loss": "1.981", "ppl": "3.95", "wps": "397424", "ups": "3.37", "wpb": "118025", "bsz": "256", "num_updates": "879800", "lr": "1.21414e-05", "gnorm": "2.521", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "263160"} +[2022-08-02 11:56:50,684][train_inner][INFO] - {"epoch": 18, "update": 17.098, "loss": "1.983", "ppl": "3.95", "wps": "395210", "ups": "3.36", "wpb": "117464", "bsz": "256", "num_updates": "880000", "lr": "1.21212e-05", "gnorm": "2.481", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "263219"} +[2022-08-02 11:57:50,128][train_inner][INFO] - {"epoch": 18, "update": 17.102, "loss": "1.986", "ppl": "3.96", "wps": "397778", "ups": "3.36", "wpb": "118228", "bsz": "256", "num_updates": "880200", "lr": "1.2101e-05", "gnorm": "2.47", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "263279"} +[2022-08-02 11:58:50,592][train_inner][INFO] - {"epoch": 18, "update": 17.106, "loss": "1.982", "ppl": "3.95", "wps": "390062", "ups": "3.31", "wpb": "117922", "bsz": "256", "num_updates": "880400", "lr": "1.20808e-05", "gnorm": "2.344", "loss_scale": "2", "train_wall": "60", "gb_free": "22.7", "wall": "263339"} +[2022-08-02 11:59:47,204][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 11:59:50,478][train_inner][INFO] - {"epoch": 18, "update": 17.11, "loss": "1.981", "ppl": "3.95", "wps": "396413", "ups": "3.34", "wpb": "118698", "bsz": "256", "num_updates": "880600", "lr": "1.20606e-05", "gnorm": "2.307", "loss_scale": "2", "train_wall": "60", "gb_free": "22.2", "wall": "263399"} +[2022-08-02 12:00:50,112][train_inner][INFO] - {"epoch": 18, "update": 17.114, "loss": "1.982", "ppl": "3.95", "wps": "397158", "ups": "3.35", "wpb": "118419", "bsz": "256", "num_updates": "880800", "lr": "1.20404e-05", "gnorm": "2.541", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "263459"} +[2022-08-02 12:01:49,469][train_inner][INFO] - {"epoch": 18, "update": 17.118, "loss": "1.981", "ppl": "3.95", "wps": "398738", "ups": "3.37", "wpb": "118339", "bsz": "256", "num_updates": "881000", "lr": "1.20202e-05", "gnorm": "2.352", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "263518"} +[2022-08-02 12:02:49,720][train_inner][INFO] - {"epoch": 18, "update": 17.122, "loss": "1.981", "ppl": "3.95", "wps": "393443", "ups": "3.32", "wpb": "118526", "bsz": "256", "num_updates": "881200", "lr": "1.2e-05", "gnorm": "2.291", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "263578"} +[2022-08-02 12:03:49,285][train_inner][INFO] - {"epoch": 18, "update": 17.126, "loss": "1.984", "ppl": "3.96", "wps": "397127", "ups": "3.36", "wpb": "118273", "bsz": "256", "num_updates": "881400", "lr": "1.19798e-05", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "263638"} +[2022-08-02 12:04:48,753][train_inner][INFO] - {"epoch": 18, "update": 17.129, "loss": "1.982", "ppl": "3.95", "wps": "397559", "ups": "3.36", "wpb": "118209", "bsz": "256", "num_updates": "881600", "lr": "1.19596e-05", "gnorm": "2.587", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "263697"} +[2022-08-02 12:05:48,153][train_inner][INFO] - {"epoch": 18, "update": 17.133, "loss": "1.986", "ppl": "3.96", "wps": "398238", "ups": "3.37", "wpb": "118276", "bsz": "256", "num_updates": "881800", "lr": "1.19394e-05", "gnorm": "2.62", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "263757"} +[2022-08-02 12:06:46,993][train_inner][INFO] - {"epoch": 18, "update": 17.137, "loss": "1.987", "ppl": "3.96", "wps": "400724", "ups": "3.4", "wpb": "117893", "bsz": "256", "num_updates": "882000", "lr": "1.19192e-05", "gnorm": "2.501", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "263815"} +[2022-08-02 12:07:46,184][train_inner][INFO] - {"epoch": 18, "update": 17.141, "loss": "1.984", "ppl": "3.95", "wps": "398400", "ups": "3.38", "wpb": "117908", "bsz": "256", "num_updates": "882200", "lr": "1.1899e-05", "gnorm": "2.527", "loss_scale": "2", "train_wall": "59", "gb_free": "28.1", "wall": "263875"} +[2022-08-02 12:08:45,473][train_inner][INFO] - {"epoch": 18, "update": 17.145, "loss": "1.98", "ppl": "3.95", "wps": "397806", "ups": "3.37", "wpb": "117926", "bsz": "256", "num_updates": "882400", "lr": "1.18788e-05", "gnorm": "2.588", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "263934"} +[2022-08-02 12:09:44,793][train_inner][INFO] - {"epoch": 18, "update": 17.149, "loss": "1.976", "ppl": "3.93", "wps": "399953", "ups": "3.37", "wpb": "118625", "bsz": "256", "num_updates": "882600", "lr": "1.18586e-05", "gnorm": "2.612", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "263993"} +[2022-08-02 12:09:56,167][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 12:10:44,292][train_inner][INFO] - {"epoch": 18, "update": 17.153, "loss": "1.984", "ppl": "3.95", "wps": "399220", "ups": "3.36", "wpb": "118765", "bsz": "256", "num_updates": "882800", "lr": "1.18384e-05", "gnorm": "2.72", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "264053"} +[2022-08-02 12:11:44,077][train_inner][INFO] - {"epoch": 18, "update": 17.157, "loss": "1.979", "ppl": "3.94", "wps": "396465", "ups": "3.35", "wpb": "118513", "bsz": "256", "num_updates": "883000", "lr": "1.18182e-05", "gnorm": "2.675", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "264113"} +[2022-08-02 12:12:43,400][train_inner][INFO] - {"epoch": 18, "update": 17.16, "loss": "1.982", "ppl": "3.95", "wps": "397892", "ups": "3.37", "wpb": "118020", "bsz": "256", "num_updates": "883200", "lr": "1.1798e-05", "gnorm": "2.525", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "264172"} +[2022-08-02 12:13:42,941][train_inner][INFO] - {"epoch": 18, "update": 17.164, "loss": "1.985", "ppl": "3.96", "wps": "397750", "ups": "3.36", "wpb": "118410", "bsz": "256", "num_updates": "883400", "lr": "1.17778e-05", "gnorm": "2.503", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "264231"} +[2022-08-02 12:14:42,383][train_inner][INFO] - {"epoch": 18, "update": 17.168, "loss": "1.991", "ppl": "3.97", "wps": "395830", "ups": "3.36", "wpb": "117644", "bsz": "256", "num_updates": "883600", "lr": "1.17576e-05", "gnorm": "2.275", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "264291"} +[2022-08-02 12:15:41,722][train_inner][INFO] - {"epoch": 18, "update": 17.172, "loss": "1.982", "ppl": "3.95", "wps": "398732", "ups": "3.37", "wpb": "118302", "bsz": "256", "num_updates": "883800", "lr": "1.17374e-05", "gnorm": "2.558", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "264350"} +[2022-08-02 12:16:41,180][train_inner][INFO] - {"epoch": 18, "update": 17.176, "loss": "1.984", "ppl": "3.96", "wps": "396228", "ups": "3.36", "wpb": "117794", "bsz": "256", "num_updates": "884000", "lr": "1.17172e-05", "gnorm": "2.42", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "264410"} +[2022-08-02 12:17:40,261][train_inner][INFO] - {"epoch": 18, "update": 17.18, "loss": "1.984", "ppl": "3.95", "wps": "399470", "ups": "3.39", "wpb": "118004", "bsz": "256", "num_updates": "884200", "lr": "1.1697e-05", "gnorm": "2.454", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "264469"} +[2022-08-02 12:18:39,875][train_inner][INFO] - {"epoch": 18, "update": 17.184, "loss": "1.985", "ppl": "3.96", "wps": "396264", "ups": "3.35", "wpb": "118114", "bsz": "256", "num_updates": "884400", "lr": "1.16768e-05", "gnorm": "2.349", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "264528"} +[2022-08-02 12:19:39,409][train_inner][INFO] - {"epoch": 18, "update": 17.188, "loss": "1.986", "ppl": "3.96", "wps": "395692", "ups": "3.36", "wpb": "117784", "bsz": "256", "num_updates": "884600", "lr": "1.16566e-05", "gnorm": "2.333", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "264588"} +[2022-08-02 12:20:09,831][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 12:20:39,037][train_inner][INFO] - {"epoch": 18, "update": 17.192, "loss": "1.977", "ppl": "3.94", "wps": "397593", "ups": "3.35", "wpb": "118538", "bsz": "256", "num_updates": "884800", "lr": "1.16364e-05", "gnorm": "2.346", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "264647"} +[2022-08-02 12:21:38,732][train_inner][INFO] - {"epoch": 18, "update": 17.195, "loss": "1.981", "ppl": "3.95", "wps": "398900", "ups": "3.35", "wpb": "119062", "bsz": "256", "num_updates": "885000", "lr": "1.16162e-05", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "264707"} +[2022-08-02 12:22:38,285][train_inner][INFO] - {"epoch": 18, "update": 17.199, "loss": "1.982", "ppl": "3.95", "wps": "398019", "ups": "3.36", "wpb": "118515", "bsz": "256", "num_updates": "885200", "lr": "1.1596e-05", "gnorm": "2.34", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "264767"} +[2022-08-02 12:23:37,624][train_inner][INFO] - {"epoch": 18, "update": 17.203, "loss": "1.984", "ppl": "3.96", "wps": "399202", "ups": "3.37", "wpb": "118440", "bsz": "256", "num_updates": "885400", "lr": "1.15758e-05", "gnorm": "2.429", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "264826"} +[2022-08-02 12:24:37,076][train_inner][INFO] - {"epoch": 18, "update": 17.207, "loss": "1.978", "ppl": "3.94", "wps": "397815", "ups": "3.36", "wpb": "118255", "bsz": "256", "num_updates": "885600", "lr": "1.15556e-05", "gnorm": "2.494", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "264886"} +[2022-08-02 12:25:36,286][train_inner][INFO] - {"epoch": 18, "update": 17.211, "loss": "1.983", "ppl": "3.95", "wps": "399006", "ups": "3.38", "wpb": "118124", "bsz": "256", "num_updates": "885800", "lr": "1.15354e-05", "gnorm": "2.281", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "264945"} +[2022-08-02 12:26:35,620][train_inner][INFO] - {"epoch": 18, "update": 17.215, "loss": "1.982", "ppl": "3.95", "wps": "397942", "ups": "3.37", "wpb": "118056", "bsz": "256", "num_updates": "886000", "lr": "1.15152e-05", "gnorm": "2.475", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "265004"} +[2022-08-02 12:27:34,703][train_inner][INFO] - {"epoch": 18, "update": 17.219, "loss": "1.979", "ppl": "3.94", "wps": "398690", "ups": "3.39", "wpb": "117777", "bsz": "256", "num_updates": "886200", "lr": "1.14949e-05", "gnorm": "2.39", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "265063"} +[2022-08-02 12:28:34,404][train_inner][INFO] - {"epoch": 18, "update": 17.223, "loss": "1.987", "ppl": "3.96", "wps": "394160", "ups": "3.35", "wpb": "117658", "bsz": "256", "num_updates": "886400", "lr": "1.14747e-05", "gnorm": "2.51", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "265123"} +[2022-08-02 12:29:33,828][train_inner][INFO] - {"epoch": 18, "update": 17.227, "loss": "1.98", "ppl": "3.94", "wps": "397266", "ups": "3.37", "wpb": "118034", "bsz": "256", "num_updates": "886600", "lr": "1.14545e-05", "gnorm": "2.328", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "265182"} +[2022-08-02 12:30:20,585][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 12:30:33,463][train_inner][INFO] - {"epoch": 18, "update": 17.23, "loss": "1.981", "ppl": "3.95", "wps": "398069", "ups": "3.35", "wpb": "118694", "bsz": "256", "num_updates": "886800", "lr": "1.14343e-05", "gnorm": "2.303", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "265242"} +[2022-08-02 12:31:32,659][train_inner][INFO] - {"epoch": 18, "update": 17.234, "loss": "1.98", "ppl": "3.95", "wps": "399036", "ups": "3.38", "wpb": "118106", "bsz": "256", "num_updates": "887000", "lr": "1.14141e-05", "gnorm": "2.429", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "265301"} +[2022-08-02 12:32:31,784][train_inner][INFO] - {"epoch": 18, "update": 17.238, "loss": "1.982", "ppl": "3.95", "wps": "399903", "ups": "3.38", "wpb": "118221", "bsz": "256", "num_updates": "887200", "lr": "1.13939e-05", "gnorm": "2.367", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "265360"} +[2022-08-02 12:33:31,115][train_inner][INFO] - {"epoch": 18, "update": 17.242, "loss": "1.987", "ppl": "3.96", "wps": "397388", "ups": "3.37", "wpb": "117886", "bsz": "256", "num_updates": "887400", "lr": "1.13737e-05", "gnorm": "2.409", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "265420"} +[2022-08-02 12:34:30,703][train_inner][INFO] - {"epoch": 18, "update": 17.246, "loss": "1.979", "ppl": "3.94", "wps": "396025", "ups": "3.36", "wpb": "117991", "bsz": "256", "num_updates": "887600", "lr": "1.13535e-05", "gnorm": "2.451", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "265479"} +[2022-08-02 12:35:30,332][train_inner][INFO] - {"epoch": 18, "update": 17.25, "loss": "1.983", "ppl": "3.95", "wps": "394868", "ups": "3.35", "wpb": "117727", "bsz": "256", "num_updates": "887800", "lr": "1.13333e-05", "gnorm": "2.345", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "265539"} +[2022-08-02 12:36:29,456][train_inner][INFO] - {"epoch": 18, "update": 17.254, "loss": "1.987", "ppl": "3.96", "wps": "398360", "ups": "3.38", "wpb": "117764", "bsz": "256", "num_updates": "888000", "lr": "1.13131e-05", "gnorm": "2.573", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "265598"} +[2022-08-02 12:37:29,041][train_inner][INFO] - {"epoch": 18, "update": 17.258, "loss": "1.979", "ppl": "3.94", "wps": "397779", "ups": "3.36", "wpb": "118506", "bsz": "256", "num_updates": "888200", "lr": "1.12929e-05", "gnorm": "2.541", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "265657"} +[2022-08-02 12:38:28,164][train_inner][INFO] - {"epoch": 18, "update": 17.262, "loss": "1.978", "ppl": "3.94", "wps": "401835", "ups": "3.38", "wpb": "118788", "bsz": "256", "num_updates": "888400", "lr": "1.12727e-05", "gnorm": "2.707", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "265717"} +[2022-08-02 12:39:27,530][train_inner][INFO] - {"epoch": 18, "update": 17.265, "loss": "1.977", "ppl": "3.94", "wps": "398968", "ups": "3.37", "wpb": "118426", "bsz": "256", "num_updates": "888600", "lr": "1.12525e-05", "gnorm": "2.782", "loss_scale": "2", "train_wall": "59", "gb_free": "28.5", "wall": "265776"} +[2022-08-02 12:40:27,163][train_inner][INFO] - {"epoch": 18, "update": 17.269, "loss": "1.975", "ppl": "3.93", "wps": "397677", "ups": "3.35", "wpb": "118573", "bsz": "256", "num_updates": "888800", "lr": "1.12323e-05", "gnorm": "2.622", "loss_scale": "2", "train_wall": "59", "gb_free": "25.5", "wall": "265836"} +[2022-08-02 12:40:31,681][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 12:41:26,955][train_inner][INFO] - {"epoch": 18, "update": 17.273, "loss": "1.98", "ppl": "3.94", "wps": "396269", "ups": "3.34", "wpb": "118468", "bsz": "256", "num_updates": "889000", "lr": "1.12121e-05", "gnorm": "2.569", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "265895"} +[2022-08-02 12:42:26,226][train_inner][INFO] - {"epoch": 18, "update": 17.277, "loss": "1.98", "ppl": "3.94", "wps": "398883", "ups": "3.37", "wpb": "118209", "bsz": "256", "num_updates": "889200", "lr": "1.11919e-05", "gnorm": "2.328", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "265955"} +[2022-08-02 12:43:25,413][train_inner][INFO] - {"epoch": 18, "update": 17.281, "loss": "1.986", "ppl": "3.96", "wps": "399824", "ups": "3.38", "wpb": "118322", "bsz": "256", "num_updates": "889400", "lr": "1.11717e-05", "gnorm": "2.678", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "266014"} +[2022-08-02 12:44:24,341][train_inner][INFO] - {"epoch": 18, "update": 17.285, "loss": "1.982", "ppl": "3.95", "wps": "401040", "ups": "3.39", "wpb": "118161", "bsz": "256", "num_updates": "889600", "lr": "1.11515e-05", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "266073"} +[2022-08-02 12:45:23,606][train_inner][INFO] - {"epoch": 18, "update": 17.289, "loss": "1.98", "ppl": "3.95", "wps": "399352", "ups": "3.37", "wpb": "118338", "bsz": "256", "num_updates": "889800", "lr": "1.11313e-05", "gnorm": "2.743", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "266132"} +[2022-08-02 12:46:22,823][train_inner][INFO] - {"epoch": 18, "update": 17.293, "loss": "1.976", "ppl": "3.93", "wps": "398734", "ups": "3.38", "wpb": "118058", "bsz": "256", "num_updates": "890000", "lr": "1.11111e-05", "gnorm": "2.488", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "266191"} +[2022-08-02 12:47:22,149][train_inner][INFO] - {"epoch": 18, "update": 17.296, "loss": "1.981", "ppl": "3.95", "wps": "397254", "ups": "3.37", "wpb": "117837", "bsz": "256", "num_updates": "890200", "lr": "1.10909e-05", "gnorm": "2.271", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "266251"} +[2022-08-02 12:48:21,504][train_inner][INFO] - {"epoch": 18, "update": 17.3, "loss": "1.981", "ppl": "3.95", "wps": "399893", "ups": "3.37", "wpb": "118678", "bsz": "256", "num_updates": "890400", "lr": "1.10707e-05", "gnorm": "2.503", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "266310"} +[2022-08-02 12:49:21,582][train_inner][INFO] - {"epoch": 18, "update": 17.304, "loss": "1.987", "ppl": "3.96", "wps": "390448", "ups": "3.33", "wpb": "117286", "bsz": "256", "num_updates": "890600", "lr": "1.10505e-05", "gnorm": "2.515", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "266370"} +[2022-08-02 12:50:21,108][train_inner][INFO] - {"epoch": 18, "update": 17.308, "loss": "1.977", "ppl": "3.94", "wps": "396372", "ups": "3.36", "wpb": "117971", "bsz": "256", "num_updates": "890800", "lr": "1.10303e-05", "gnorm": "2.476", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "266430"} +[2022-08-02 12:50:51,244][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 12:51:20,407][train_inner][INFO] - {"epoch": 18, "update": 17.312, "loss": "1.981", "ppl": "3.95", "wps": "398993", "ups": "3.37", "wpb": "118298", "bsz": "256", "num_updates": "891000", "lr": "1.10101e-05", "gnorm": "2.396", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "266489"} +[2022-08-02 12:52:19,892][train_inner][INFO] - {"epoch": 18, "update": 17.316, "loss": "1.983", "ppl": "3.95", "wps": "397387", "ups": "3.36", "wpb": "118193", "bsz": "256", "num_updates": "891200", "lr": "1.09899e-05", "gnorm": "2.281", "loss_scale": "2", "train_wall": "59", "gb_free": "29.2", "wall": "266548"} +[2022-08-02 12:53:19,086][train_inner][INFO] - {"epoch": 18, "update": 17.32, "loss": "1.977", "ppl": "3.94", "wps": "399968", "ups": "3.38", "wpb": "118378", "bsz": "256", "num_updates": "891400", "lr": "1.09697e-05", "gnorm": "2.417", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "266608"} +[2022-08-02 12:54:18,372][train_inner][INFO] - {"epoch": 18, "update": 17.324, "loss": "1.982", "ppl": "3.95", "wps": "399239", "ups": "3.37", "wpb": "118344", "bsz": "256", "num_updates": "891600", "lr": "1.09495e-05", "gnorm": "2.304", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "266667"} +[2022-08-02 12:55:17,802][train_inner][INFO] - {"epoch": 18, "update": 17.328, "loss": "1.978", "ppl": "3.94", "wps": "398172", "ups": "3.37", "wpb": "118317", "bsz": "256", "num_updates": "891800", "lr": "1.09293e-05", "gnorm": "2.411", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "266726"} +[2022-08-02 12:56:16,734][train_inner][INFO] - {"epoch": 18, "update": 17.331, "loss": "1.985", "ppl": "3.96", "wps": "398593", "ups": "3.39", "wpb": "117448", "bsz": "256", "num_updates": "892000", "lr": "1.09091e-05", "gnorm": "2.401", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "266785"} +[2022-08-02 12:57:16,202][train_inner][INFO] - {"epoch": 18, "update": 17.335, "loss": "1.967", "ppl": "3.91", "wps": "398306", "ups": "3.36", "wpb": "118430", "bsz": "256", "num_updates": "892200", "lr": "1.08889e-05", "gnorm": "2.464", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "266845"} +[2022-08-02 12:58:15,406][train_inner][INFO] - {"epoch": 18, "update": 17.339, "loss": "1.978", "ppl": "3.94", "wps": "398804", "ups": "3.38", "wpb": "118054", "bsz": "256", "num_updates": "892400", "lr": "1.08687e-05", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "266904"} +[2022-08-02 12:59:14,717][train_inner][INFO] - {"epoch": 18, "update": 17.343, "loss": "1.98", "ppl": "3.95", "wps": "397940", "ups": "3.37", "wpb": "118010", "bsz": "256", "num_updates": "892600", "lr": "1.08485e-05", "gnorm": "2.337", "loss_scale": "2", "train_wall": "59", "gb_free": "31.3", "wall": "266963"} +[2022-08-02 13:00:14,161][train_inner][INFO] - {"epoch": 18, "update": 17.347, "loss": "1.979", "ppl": "3.94", "wps": "398356", "ups": "3.36", "wpb": "118399", "bsz": "256", "num_updates": "892800", "lr": "1.08283e-05", "gnorm": "2.476", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "267023"} +[2022-08-02 13:01:04,757][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 13:01:13,792][train_inner][INFO] - {"epoch": 18, "update": 17.351, "loss": "1.974", "ppl": "3.93", "wps": "396550", "ups": "3.35", "wpb": "118233", "bsz": "256", "num_updates": "893000", "lr": "1.08081e-05", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "267082"} +[2022-08-02 13:02:13,536][train_inner][INFO] - {"epoch": 18, "update": 17.355, "loss": "1.983", "ppl": "3.95", "wps": "396132", "ups": "3.35", "wpb": "118331", "bsz": "256", "num_updates": "893200", "lr": "1.07879e-05", "gnorm": "2.521", "loss_scale": "2", "train_wall": "59", "gb_free": "29.3", "wall": "267142"} +[2022-08-02 13:03:13,132][train_inner][INFO] - {"epoch": 18, "update": 17.359, "loss": "1.974", "ppl": "3.93", "wps": "398722", "ups": "3.36", "wpb": "118810", "bsz": "256", "num_updates": "893400", "lr": "1.07677e-05", "gnorm": "2.541", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "267202"} +[2022-08-02 13:04:12,711][train_inner][INFO] - {"epoch": 18, "update": 17.363, "loss": "1.982", "ppl": "3.95", "wps": "395866", "ups": "3.36", "wpb": "117925", "bsz": "256", "num_updates": "893600", "lr": "1.07475e-05", "gnorm": "2.442", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "267261"} +[2022-08-02 13:05:12,304][train_inner][INFO] - {"epoch": 18, "update": 17.366, "loss": "1.976", "ppl": "3.93", "wps": "399180", "ups": "3.36", "wpb": "118942", "bsz": "256", "num_updates": "893800", "lr": "1.07273e-05", "gnorm": "2.346", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "267321"} +[2022-08-02 13:06:11,909][train_inner][INFO] - {"epoch": 18, "update": 17.37, "loss": "1.983", "ppl": "3.95", "wps": "395835", "ups": "3.36", "wpb": "117967", "bsz": "256", "num_updates": "894000", "lr": "1.07071e-05", "gnorm": "2.339", "loss_scale": "2", "train_wall": "59", "gb_free": "33.6", "wall": "267380"} +[2022-08-02 13:07:11,009][train_inner][INFO] - {"epoch": 18, "update": 17.374, "loss": "1.981", "ppl": "3.95", "wps": "399477", "ups": "3.38", "wpb": "118045", "bsz": "256", "num_updates": "894200", "lr": "1.06869e-05", "gnorm": "2.399", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "267439"} +[2022-08-02 13:08:10,103][train_inner][INFO] - {"epoch": 18, "update": 17.378, "loss": "1.979", "ppl": "3.94", "wps": "399414", "ups": "3.38", "wpb": "118013", "bsz": "256", "num_updates": "894400", "lr": "1.06667e-05", "gnorm": "2.329", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "267499"} +[2022-08-02 13:09:10,784][train_inner][INFO] - {"epoch": 18, "update": 17.382, "loss": "1.976", "ppl": "3.94", "wps": "390113", "ups": "3.3", "wpb": "118361", "bsz": "256", "num_updates": "894600", "lr": "1.06465e-05", "gnorm": "2.66", "loss_scale": "2", "train_wall": "60", "gb_free": "25.6", "wall": "267559"} +[2022-08-02 13:10:09,880][train_inner][INFO] - {"epoch": 18, "update": 17.386, "loss": "1.976", "ppl": "3.93", "wps": "399559", "ups": "3.38", "wpb": "118061", "bsz": "256", "num_updates": "894800", "lr": "1.06263e-05", "gnorm": "2.59", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "267618"} +[2022-08-02 13:11:09,446][train_inner][INFO] - {"epoch": 18, "update": 17.39, "loss": "1.981", "ppl": "3.95", "wps": "397158", "ups": "3.36", "wpb": "118285", "bsz": "256", "num_updates": "895000", "lr": "1.06061e-05", "gnorm": "2.808", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "267678"} +[2022-08-02 13:11:15,379][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 13:12:09,321][train_inner][INFO] - {"epoch": 18, "update": 17.394, "loss": "1.977", "ppl": "3.94", "wps": "394792", "ups": "3.34", "wpb": "118191", "bsz": "256", "num_updates": "895200", "lr": "1.05859e-05", "gnorm": "2.686", "loss_scale": "2", "train_wall": "60", "gb_free": "22.3", "wall": "267738"} +[2022-08-02 13:13:08,753][train_inner][INFO] - {"epoch": 18, "update": 17.398, "loss": "1.983", "ppl": "3.95", "wps": "395082", "ups": "3.37", "wpb": "117402", "bsz": "256", "num_updates": "895400", "lr": "1.05657e-05", "gnorm": "2.848", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "267797"} +[2022-08-02 13:14:08,212][train_inner][INFO] - {"epoch": 18, "update": 17.401, "loss": "1.975", "ppl": "3.93", "wps": "400375", "ups": "3.36", "wpb": "119027", "bsz": "256", "num_updates": "895600", "lr": "1.05455e-05", "gnorm": "2.915", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "267857"} +[2022-08-02 13:15:08,089][train_inner][INFO] - {"epoch": 18, "update": 17.405, "loss": "1.969", "ppl": "3.91", "wps": "394748", "ups": "3.34", "wpb": "118181", "bsz": "256", "num_updates": "895800", "lr": "1.05253e-05", "gnorm": "2.932", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "267917"} +[2022-08-02 13:16:07,462][train_inner][INFO] - {"epoch": 18, "update": 17.409, "loss": "1.978", "ppl": "3.94", "wps": "399210", "ups": "3.37", "wpb": "118510", "bsz": "256", "num_updates": "896000", "lr": "1.05051e-05", "gnorm": "2.903", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "267976"} +[2022-08-02 13:17:06,682][train_inner][INFO] - {"epoch": 18, "update": 17.413, "loss": "1.975", "ppl": "3.93", "wps": "400436", "ups": "3.38", "wpb": "118569", "bsz": "256", "num_updates": "896200", "lr": "1.04848e-05", "gnorm": "2.642", "loss_scale": "2", "train_wall": "59", "gb_free": "29.8", "wall": "268035"} +[2022-08-02 13:18:06,364][train_inner][INFO] - {"epoch": 18, "update": 17.417, "loss": "1.982", "ppl": "3.95", "wps": "394626", "ups": "3.35", "wpb": "117758", "bsz": "256", "num_updates": "896400", "lr": "1.04646e-05", "gnorm": "2.738", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "268095"} +[2022-08-02 13:19:05,318][train_inner][INFO] - {"epoch": 18, "update": 17.421, "loss": "1.981", "ppl": "3.95", "wps": "399800", "ups": "3.39", "wpb": "117848", "bsz": "256", "num_updates": "896600", "lr": "1.04444e-05", "gnorm": "2.668", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "268154"} +[2022-08-02 13:20:04,926][train_inner][INFO] - {"epoch": 18, "update": 17.425, "loss": "1.978", "ppl": "3.94", "wps": "397395", "ups": "3.36", "wpb": "118438", "bsz": "256", "num_updates": "896800", "lr": "1.04242e-05", "gnorm": "2.707", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "268213"} +[2022-08-02 13:21:04,279][train_inner][INFO] - {"epoch": 18, "update": 17.429, "loss": "1.985", "ppl": "3.96", "wps": "397749", "ups": "3.37", "wpb": "118038", "bsz": "256", "num_updates": "897000", "lr": "1.0404e-05", "gnorm": "2.532", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "268273"} +[2022-08-02 13:21:25,744][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 13:22:04,229][train_inner][INFO] - {"epoch": 18, "update": 17.433, "loss": "1.974", "ppl": "3.93", "wps": "395006", "ups": "3.34", "wpb": "118403", "bsz": "256", "num_updates": "897200", "lr": "1.03838e-05", "gnorm": "2.512", "loss_scale": "2", "train_wall": "60", "gb_free": "22.4", "wall": "268333"} +[2022-08-02 13:23:03,894][train_inner][INFO] - {"epoch": 18, "update": 17.436, "loss": "1.976", "ppl": "3.93", "wps": "395984", "ups": "3.35", "wpb": "118130", "bsz": "256", "num_updates": "897400", "lr": "1.03636e-05", "gnorm": "2.542", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "268392"} +[2022-08-02 13:24:03,298][train_inner][INFO] - {"epoch": 18, "update": 17.44, "loss": "1.979", "ppl": "3.94", "wps": "398036", "ups": "3.37", "wpb": "118223", "bsz": "256", "num_updates": "897600", "lr": "1.03434e-05", "gnorm": "2.416", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "268452"} +[2022-08-02 13:25:02,680][train_inner][INFO] - {"epoch": 18, "update": 17.444, "loss": "1.981", "ppl": "3.95", "wps": "397641", "ups": "3.37", "wpb": "118063", "bsz": "256", "num_updates": "897800", "lr": "1.03232e-05", "gnorm": "2.505", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "268511"} +[2022-08-02 13:26:01,961][train_inner][INFO] - {"epoch": 18, "update": 17.448, "loss": "1.975", "ppl": "3.93", "wps": "399533", "ups": "3.37", "wpb": "118423", "bsz": "256", "num_updates": "898000", "lr": "1.0303e-05", "gnorm": "2.276", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "268570"} +[2022-08-02 13:27:01,228][train_inner][INFO] - {"epoch": 18, "update": 17.452, "loss": "1.973", "ppl": "3.93", "wps": "402054", "ups": "3.37", "wpb": "119144", "bsz": "256", "num_updates": "898200", "lr": "1.02828e-05", "gnorm": "2.365", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "268630"} +[2022-08-02 13:28:00,895][train_inner][INFO] - {"epoch": 18, "update": 17.456, "loss": "1.976", "ppl": "3.93", "wps": "396007", "ups": "3.35", "wpb": "118142", "bsz": "256", "num_updates": "898400", "lr": "1.02626e-05", "gnorm": "2.589", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "268689"} +[2022-08-02 13:29:00,517][train_inner][INFO] - {"epoch": 18, "update": 17.46, "loss": "1.974", "ppl": "3.93", "wps": "395348", "ups": "3.35", "wpb": "117856", "bsz": "256", "num_updates": "898600", "lr": "1.02424e-05", "gnorm": "2.502", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "268749"} +[2022-08-02 13:30:00,091][train_inner][INFO] - {"epoch": 18, "update": 17.464, "loss": "1.981", "ppl": "3.95", "wps": "397662", "ups": "3.36", "wpb": "118450", "bsz": "256", "num_updates": "898800", "lr": "1.02222e-05", "gnorm": "2.448", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "268809"} +[2022-08-02 13:30:59,659][train_inner][INFO] - {"epoch": 18, "update": 17.467, "loss": "1.973", "ppl": "3.93", "wps": "396665", "ups": "3.36", "wpb": "118141", "bsz": "256", "num_updates": "899000", "lr": "1.0202e-05", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "268868"} +[2022-08-02 13:31:41,340][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 13:31:59,946][train_inner][INFO] - {"epoch": 18, "update": 17.471, "loss": "1.972", "ppl": "3.92", "wps": "392790", "ups": "3.32", "wpb": "118401", "bsz": "256", "num_updates": "899200", "lr": "1.01818e-05", "gnorm": "2.299", "loss_scale": "2", "train_wall": "60", "gb_free": "22.4", "wall": "268928"} +[2022-08-02 13:32:59,655][train_inner][INFO] - {"epoch": 18, "update": 17.475, "loss": "1.977", "ppl": "3.94", "wps": "396262", "ups": "3.35", "wpb": "118300", "bsz": "256", "num_updates": "899400", "lr": "1.01616e-05", "gnorm": "2.471", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "268988"} +[2022-08-02 13:33:59,210][train_inner][INFO] - {"epoch": 18, "update": 17.479, "loss": "1.983", "ppl": "3.95", "wps": "396010", "ups": "3.36", "wpb": "117921", "bsz": "256", "num_updates": "899600", "lr": "1.01414e-05", "gnorm": "2.381", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "269048"} +[2022-08-02 13:34:58,670][train_inner][INFO] - {"epoch": 18, "update": 17.483, "loss": "1.974", "ppl": "3.93", "wps": "398245", "ups": "3.36", "wpb": "118397", "bsz": "256", "num_updates": "899800", "lr": "1.01212e-05", "gnorm": "2.296", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "269107"} +[2022-08-02 13:35:58,182][train_inner][INFO] - {"epoch": 18, "update": 17.487, "loss": "1.982", "ppl": "3.95", "wps": "396136", "ups": "3.36", "wpb": "117874", "bsz": "256", "num_updates": "900000", "lr": "1.0101e-05", "gnorm": "2.276", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "269167"} +[2022-08-02 13:35:58,183][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 13:36:21,026][valid][INFO] - {"epoch": 18, "valid_loss": "1.87", "valid_ppl": "3.66", "valid_wps": "1.58558e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "900000", "valid_best_loss": "1.87"} +[2022-08-02 13:36:21,029][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 18 @ 900000 updates +[2022-08-02 13:36:21,030][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_18_900000.pt +[2022-08-02 13:36:26,968][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_18_900000.pt +[2022-08-02 13:36:45,820][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_18_900000.pt (epoch 18 @ 900000 updates, score 1.87) (writing took 24.790587527677417 seconds) +[2022-08-02 13:37:45,477][train_inner][INFO] - {"epoch": 18, "update": 17.491, "loss": "1.974", "ppl": "3.93", "wps": "219248", "ups": "1.86", "wpb": "117621", "bsz": "256", "num_updates": "900200", "lr": "1.00808e-05", "gnorm": "2.237", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "269274"} +[2022-08-02 13:38:45,301][train_inner][INFO] - {"epoch": 18, "update": 17.495, "loss": "1.981", "ppl": "3.95", "wps": "394053", "ups": "3.34", "wpb": "117868", "bsz": "256", "num_updates": "900400", "lr": "1.00606e-05", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "269334"} +[2022-08-02 13:39:45,038][train_inner][INFO] - {"epoch": 18, "update": 17.499, "loss": "1.979", "ppl": "3.94", "wps": "396324", "ups": "3.35", "wpb": "118374", "bsz": "256", "num_updates": "900600", "lr": "1.00404e-05", "gnorm": "2.363", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "269393"} +[2022-08-02 13:40:44,312][train_inner][INFO] - {"epoch": 18, "update": 17.502, "loss": "1.973", "ppl": "3.93", "wps": "398250", "ups": "3.37", "wpb": "118027", "bsz": "256", "num_updates": "900800", "lr": "1.00202e-05", "gnorm": "2.433", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "269453"} +[2022-08-02 13:41:43,703][train_inner][INFO] - {"epoch": 18, "update": 17.506, "loss": "1.979", "ppl": "3.94", "wps": "399536", "ups": "3.37", "wpb": "118645", "bsz": "256", "num_updates": "901000", "lr": "1e-05", "gnorm": "2.47", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "269512"} +[2022-08-02 13:42:43,511][train_inner][INFO] - {"epoch": 18, "update": 17.51, "loss": "1.981", "ppl": "3.95", "wps": "395552", "ups": "3.34", "wpb": "118285", "bsz": "256", "num_updates": "901200", "lr": "9.9798e-06", "gnorm": "2.542", "loss_scale": "4", "train_wall": "59", "gb_free": "21.8", "wall": "269572"} +[2022-08-02 13:42:57,415][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 13:43:44,634][train_inner][INFO] - {"epoch": 18, "update": 17.514, "loss": "1.979", "ppl": "3.94", "wps": "385092", "ups": "3.27", "wpb": "117688", "bsz": "256", "num_updates": "901400", "lr": "9.9596e-06", "gnorm": "2.329", "loss_scale": "2", "train_wall": "61", "gb_free": "24.9", "wall": "269633"} +[2022-08-02 13:44:43,847][train_inner][INFO] - {"epoch": 18, "update": 17.518, "loss": "1.975", "ppl": "3.93", "wps": "402243", "ups": "3.38", "wpb": "119091", "bsz": "256", "num_updates": "901600", "lr": "9.93939e-06", "gnorm": "2.448", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "269692"} +[2022-08-02 13:45:42,904][train_inner][INFO] - {"epoch": 18, "update": 17.522, "loss": "1.977", "ppl": "3.94", "wps": "400613", "ups": "3.39", "wpb": "118292", "bsz": "256", "num_updates": "901800", "lr": "9.91919e-06", "gnorm": "2.432", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "269751"} +[2022-08-02 13:46:42,478][train_inner][INFO] - {"epoch": 18, "update": 17.526, "loss": "1.972", "ppl": "3.92", "wps": "397018", "ups": "3.36", "wpb": "118260", "bsz": "256", "num_updates": "902000", "lr": "9.89899e-06", "gnorm": "2.609", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "269811"} +[2022-08-02 13:47:41,854][train_inner][INFO] - {"epoch": 18, "update": 17.53, "loss": "1.973", "ppl": "3.93", "wps": "398716", "ups": "3.37", "wpb": "118369", "bsz": "256", "num_updates": "902200", "lr": "9.87879e-06", "gnorm": "2.449", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "269870"} +[2022-08-02 13:48:41,250][train_inner][INFO] - {"epoch": 18, "update": 17.534, "loss": "1.98", "ppl": "3.95", "wps": "397585", "ups": "3.37", "wpb": "118074", "bsz": "255.9", "num_updates": "902400", "lr": "9.85859e-06", "gnorm": "2.406", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "269930"} +[2022-08-02 13:49:40,958][train_inner][INFO] - {"epoch": 18, "update": 17.537, "loss": "1.972", "ppl": "3.92", "wps": "397288", "ups": "3.35", "wpb": "118605", "bsz": "256", "num_updates": "902600", "lr": "9.83838e-06", "gnorm": "2.334", "loss_scale": "2", "train_wall": "59", "gb_free": "28.6", "wall": "269989"} +[2022-08-02 13:50:40,519][train_inner][INFO] - {"epoch": 18, "update": 17.541, "loss": "1.975", "ppl": "3.93", "wps": "398566", "ups": "3.36", "wpb": "118695", "bsz": "256", "num_updates": "902800", "lr": "9.81818e-06", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "270049"} +[2022-08-02 13:51:40,008][train_inner][INFO] - {"epoch": 18, "update": 17.545, "loss": "1.975", "ppl": "3.93", "wps": "395863", "ups": "3.36", "wpb": "117745", "bsz": "256", "num_updates": "903000", "lr": "9.79798e-06", "gnorm": "2.459", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "270108"} +[2022-08-02 13:52:39,616][train_inner][INFO] - {"epoch": 18, "update": 17.549, "loss": "1.975", "ppl": "3.93", "wps": "396502", "ups": "3.36", "wpb": "118174", "bsz": "256", "num_updates": "903200", "lr": "9.77778e-06", "gnorm": "2.527", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "270168"} +[2022-08-02 13:53:13,878][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 13:53:39,378][train_inner][INFO] - {"epoch": 18, "update": 17.553, "loss": "1.98", "ppl": "3.95", "wps": "394836", "ups": "3.35", "wpb": "117980", "bsz": "256", "num_updates": "903400", "lr": "9.75758e-06", "gnorm": "2.564", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "270228"} +[2022-08-02 13:54:39,169][train_inner][INFO] - {"epoch": 18, "update": 17.557, "loss": "1.979", "ppl": "3.94", "wps": "395638", "ups": "3.35", "wpb": "118277", "bsz": "256", "num_updates": "903600", "lr": "9.73737e-06", "gnorm": "2.367", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "270288"} +[2022-08-02 13:55:38,912][train_inner][INFO] - {"epoch": 18, "update": 17.561, "loss": "1.978", "ppl": "3.94", "wps": "394669", "ups": "3.35", "wpb": "117892", "bsz": "256", "num_updates": "903800", "lr": "9.71717e-06", "gnorm": "2.384", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "270347"} +[2022-08-02 13:56:38,203][train_inner][INFO] - {"epoch": 18, "update": 17.565, "loss": "1.978", "ppl": "3.94", "wps": "399696", "ups": "3.37", "wpb": "118491", "bsz": "256", "num_updates": "904000", "lr": "9.69697e-06", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "270407"} +[2022-08-02 13:57:37,695][train_inner][INFO] - {"epoch": 18, "update": 17.569, "loss": "1.973", "ppl": "3.93", "wps": "398638", "ups": "3.36", "wpb": "118577", "bsz": "256", "num_updates": "904200", "lr": "9.67677e-06", "gnorm": "2.405", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "270466"} +[2022-08-02 13:58:37,220][train_inner][INFO] - {"epoch": 18, "update": 17.572, "loss": "1.975", "ppl": "3.93", "wps": "397121", "ups": "3.36", "wpb": "118193", "bsz": "256", "num_updates": "904400", "lr": "9.65657e-06", "gnorm": "2.514", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "270526"} +[2022-08-02 13:59:36,672][train_inner][INFO] - {"epoch": 18, "update": 17.576, "loss": "1.974", "ppl": "3.93", "wps": "397567", "ups": "3.36", "wpb": "118179", "bsz": "256", "num_updates": "904600", "lr": "9.63636e-06", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "270585"} +[2022-08-02 14:00:35,817][train_inner][INFO] - {"epoch": 18, "update": 17.58, "loss": "1.974", "ppl": "3.93", "wps": "398332", "ups": "3.38", "wpb": "117796", "bsz": "256", "num_updates": "904800", "lr": "9.61616e-06", "gnorm": "2.375", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "270644"} +[2022-08-02 14:01:35,197][train_inner][INFO] - {"epoch": 18, "update": 17.584, "loss": "1.973", "ppl": "3.92", "wps": "398617", "ups": "3.37", "wpb": "118348", "bsz": "256", "num_updates": "905000", "lr": "9.59596e-06", "gnorm": "2.34", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "270704"} +[2022-08-02 14:02:34,562][train_inner][INFO] - {"epoch": 18, "update": 17.588, "loss": "1.976", "ppl": "3.93", "wps": "398534", "ups": "3.37", "wpb": "118293", "bsz": "256", "num_updates": "905200", "lr": "9.57576e-06", "gnorm": "2.461", "loss_scale": "2", "train_wall": "59", "gb_free": "27.7", "wall": "270763"} +[2022-08-02 14:03:22,923][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 14:03:34,242][train_inner][INFO] - {"epoch": 18, "update": 17.592, "loss": "1.978", "ppl": "3.94", "wps": "396098", "ups": "3.35", "wpb": "118194", "bsz": "256", "num_updates": "905400", "lr": "9.55556e-06", "gnorm": "2.364", "loss_scale": "2", "train_wall": "59", "gb_free": "25.8", "wall": "270823"} +[2022-08-02 14:04:33,970][train_inner][INFO] - {"epoch": 18, "update": 17.596, "loss": "1.981", "ppl": "3.95", "wps": "396686", "ups": "3.35", "wpb": "118466", "bsz": "256", "num_updates": "905600", "lr": "9.53535e-06", "gnorm": "2.515", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "270882"} +[2022-08-02 14:05:33,131][train_inner][INFO] - {"epoch": 18, "update": 17.6, "loss": "1.976", "ppl": "3.93", "wps": "399230", "ups": "3.38", "wpb": "118094", "bsz": "256", "num_updates": "905800", "lr": "9.51515e-06", "gnorm": "2.443", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "270942"} +[2022-08-02 14:06:32,634][train_inner][INFO] - {"epoch": 18, "update": 17.604, "loss": "1.969", "ppl": "3.92", "wps": "401220", "ups": "3.36", "wpb": "119367", "bsz": "256", "num_updates": "906000", "lr": "9.49495e-06", "gnorm": "2.365", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "271001"} +[2022-08-02 14:07:32,050][train_inner][INFO] - {"epoch": 18, "update": 17.607, "loss": "1.975", "ppl": "3.93", "wps": "398286", "ups": "3.37", "wpb": "118324", "bsz": "256", "num_updates": "906200", "lr": "9.47475e-06", "gnorm": "2.359", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "271060"} +[2022-08-02 14:08:31,150][train_inner][INFO] - {"epoch": 18, "update": 17.611, "loss": "1.978", "ppl": "3.94", "wps": "398076", "ups": "3.38", "wpb": "117629", "bsz": "256", "num_updates": "906400", "lr": "9.45455e-06", "gnorm": "2.279", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "271120"} +[2022-08-02 14:09:30,939][train_inner][INFO] - {"epoch": 18, "update": 17.615, "loss": "1.974", "ppl": "3.93", "wps": "396497", "ups": "3.35", "wpb": "118531", "bsz": "256", "num_updates": "906600", "lr": "9.43434e-06", "gnorm": "2.436", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "271179"} +[2022-08-02 14:10:30,106][train_inner][INFO] - {"epoch": 18, "update": 17.619, "loss": "1.976", "ppl": "3.94", "wps": "399448", "ups": "3.38", "wpb": "118168", "bsz": "256", "num_updates": "906800", "lr": "9.41414e-06", "gnorm": "2.357", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "271239"} +[2022-08-02 14:11:29,660][train_inner][INFO] - {"epoch": 18, "update": 17.623, "loss": "1.975", "ppl": "3.93", "wps": "397612", "ups": "3.36", "wpb": "118398", "bsz": "256", "num_updates": "907000", "lr": "9.39394e-06", "gnorm": "2.276", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "271298"} +[2022-08-02 14:12:29,580][train_inner][INFO] - {"epoch": 18, "update": 17.627, "loss": "1.978", "ppl": "3.94", "wps": "395860", "ups": "3.34", "wpb": "118599", "bsz": "256", "num_updates": "907200", "lr": "9.37374e-06", "gnorm": "2.463", "loss_scale": "2", "train_wall": "60", "gb_free": "26.2", "wall": "271358"} +[2022-08-02 14:13:28,961][train_inner][INFO] - {"epoch": 18, "update": 17.631, "loss": "1.973", "ppl": "3.93", "wps": "401534", "ups": "3.37", "wpb": "119215", "bsz": "256", "num_updates": "907400", "lr": "9.35354e-06", "gnorm": "2.509", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "271417"} +[2022-08-02 14:13:33,977][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 14:14:28,721][train_inner][INFO] - {"epoch": 18, "update": 17.635, "loss": "1.972", "ppl": "3.92", "wps": "396352", "ups": "3.35", "wpb": "118429", "bsz": "256", "num_updates": "907600", "lr": "9.33333e-06", "gnorm": "2.396", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "271477"} +[2022-08-02 14:15:28,486][train_inner][INFO] - {"epoch": 18, "update": 17.638, "loss": "1.972", "ppl": "3.92", "wps": "397613", "ups": "3.35", "wpb": "118816", "bsz": "256", "num_updates": "907800", "lr": "9.31313e-06", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "271537"} +[2022-08-02 14:16:28,242][train_inner][INFO] - {"epoch": 18, "update": 17.642, "loss": "1.975", "ppl": "3.93", "wps": "396275", "ups": "3.35", "wpb": "118399", "bsz": "256", "num_updates": "908000", "lr": "9.29293e-06", "gnorm": "2.417", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "271597"} +[2022-08-02 14:17:28,732][train_inner][INFO] - {"epoch": 18, "update": 17.646, "loss": "1.976", "ppl": "3.94", "wps": "392227", "ups": "3.31", "wpb": "118628", "bsz": "256", "num_updates": "908200", "lr": "9.27273e-06", "gnorm": "2.395", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "271657"} +[2022-08-02 14:18:28,465][train_inner][INFO] - {"epoch": 18, "update": 17.65, "loss": "1.979", "ppl": "3.94", "wps": "394881", "ups": "3.35", "wpb": "117936", "bsz": "256", "num_updates": "908400", "lr": "9.25253e-06", "gnorm": "2.409", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "271717"} +[2022-08-02 14:19:27,972][train_inner][INFO] - {"epoch": 18, "update": 17.654, "loss": "1.978", "ppl": "3.94", "wps": "397281", "ups": "3.36", "wpb": "118204", "bsz": "256", "num_updates": "908600", "lr": "9.23232e-06", "gnorm": "2.306", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "271776"} +[2022-08-02 14:20:27,747][train_inner][INFO] - {"epoch": 18, "update": 17.658, "loss": "1.977", "ppl": "3.94", "wps": "396496", "ups": "3.35", "wpb": "118500", "bsz": "256", "num_updates": "908800", "lr": "9.21212e-06", "gnorm": "2.316", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "271836"} +[2022-08-02 14:21:27,522][train_inner][INFO] - {"epoch": 18, "update": 17.662, "loss": "1.976", "ppl": "3.94", "wps": "397290", "ups": "3.35", "wpb": "118739", "bsz": "256", "num_updates": "909000", "lr": "9.19192e-06", "gnorm": "2.361", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "271896"} +[2022-08-02 14:22:26,899][train_inner][INFO] - {"epoch": 18, "update": 17.666, "loss": "1.97", "ppl": "3.92", "wps": "399117", "ups": "3.37", "wpb": "118493", "bsz": "256", "num_updates": "909200", "lr": "9.17172e-06", "gnorm": "2.402", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "271955"} +[2022-08-02 14:23:26,597][train_inner][INFO] - {"epoch": 18, "update": 17.67, "loss": "1.974", "ppl": "3.93", "wps": "397750", "ups": "3.35", "wpb": "118723", "bsz": "256", "num_updates": "909400", "lr": "9.15152e-06", "gnorm": "2.49", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "272015"} +[2022-08-02 14:23:46,390][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 14:24:26,396][train_inner][INFO] - {"epoch": 18, "update": 17.673, "loss": "1.977", "ppl": "3.94", "wps": "394911", "ups": "3.34", "wpb": "118076", "bsz": "256", "num_updates": "909600", "lr": "9.13131e-06", "gnorm": "2.83", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "272075"} +[2022-08-02 14:25:26,077][train_inner][INFO] - {"epoch": 18, "update": 17.677, "loss": "1.976", "ppl": "3.94", "wps": "394892", "ups": "3.35", "wpb": "117836", "bsz": "256", "num_updates": "909800", "lr": "9.11111e-06", "gnorm": "2.713", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "272135"} +[2022-08-02 14:26:25,556][train_inner][INFO] - {"epoch": 18, "update": 17.681, "loss": "1.98", "ppl": "3.94", "wps": "396991", "ups": "3.36", "wpb": "118063", "bsz": "256", "num_updates": "910000", "lr": "9.09091e-06", "gnorm": "2.698", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "272194"} +[2022-08-02 14:27:25,011][train_inner][INFO] - {"epoch": 18, "update": 17.685, "loss": "1.978", "ppl": "3.94", "wps": "396328", "ups": "3.36", "wpb": "117817", "bsz": "256", "num_updates": "910200", "lr": "9.07071e-06", "gnorm": "2.847", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "272253"} +[2022-08-02 14:28:24,333][train_inner][INFO] - {"epoch": 18, "update": 17.689, "loss": "1.968", "ppl": "3.91", "wps": "399773", "ups": "3.37", "wpb": "118576", "bsz": "256", "num_updates": "910400", "lr": "9.05051e-06", "gnorm": "2.64", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "272313"} +[2022-08-02 14:29:23,452][train_inner][INFO] - {"epoch": 18, "update": 17.693, "loss": "1.974", "ppl": "3.93", "wps": "401130", "ups": "3.38", "wpb": "118572", "bsz": "256", "num_updates": "910600", "lr": "9.0303e-06", "gnorm": "2.406", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "272372"} +[2022-08-02 14:30:23,103][train_inner][INFO] - {"epoch": 18, "update": 17.697, "loss": "1.973", "ppl": "3.93", "wps": "395890", "ups": "3.35", "wpb": "118076", "bsz": "256", "num_updates": "910800", "lr": "9.0101e-06", "gnorm": "2.336", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "272432"} +[2022-08-02 14:31:22,646][train_inner][INFO] - {"epoch": 18, "update": 17.701, "loss": "1.976", "ppl": "3.93", "wps": "395902", "ups": "3.36", "wpb": "117863", "bsz": "256", "num_updates": "911000", "lr": "8.9899e-06", "gnorm": "2.504", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "272491"} +[2022-08-02 14:32:22,118][train_inner][INFO] - {"epoch": 18, "update": 17.705, "loss": "1.973", "ppl": "3.93", "wps": "398360", "ups": "3.36", "wpb": "118457", "bsz": "256", "num_updates": "911200", "lr": "8.9697e-06", "gnorm": "2.391", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "272551"} +[2022-08-02 14:33:21,566][train_inner][INFO] - {"epoch": 18, "update": 17.708, "loss": "1.971", "ppl": "3.92", "wps": "397859", "ups": "3.36", "wpb": "118258", "bsz": "256", "num_updates": "911400", "lr": "8.94949e-06", "gnorm": "2.387", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "272610"} +[2022-08-02 14:34:00,925][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 14:34:21,453][train_inner][INFO] - {"epoch": 18, "update": 17.712, "loss": "1.965", "ppl": "3.9", "wps": "393780", "ups": "3.34", "wpb": "117911", "bsz": "256", "num_updates": "911600", "lr": "8.92929e-06", "gnorm": "2.453", "loss_scale": "2", "train_wall": "60", "gb_free": "24.5", "wall": "272670"} +[2022-08-02 14:35:22,026][train_inner][INFO] - {"epoch": 18, "update": 17.716, "loss": "1.975", "ppl": "3.93", "wps": "389644", "ups": "3.3", "wpb": "118007", "bsz": "256", "num_updates": "911800", "lr": "8.90909e-06", "gnorm": "2.419", "loss_scale": "2", "train_wall": "60", "gb_free": "23.4", "wall": "272730"} +[2022-08-02 14:36:21,601][train_inner][INFO] - {"epoch": 18, "update": 17.72, "loss": "1.976", "ppl": "3.93", "wps": "397860", "ups": "3.36", "wpb": "118512", "bsz": "256", "num_updates": "912000", "lr": "8.88889e-06", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "272790"} +[2022-08-02 14:37:21,230][train_inner][INFO] - {"epoch": 18, "update": 17.724, "loss": "1.974", "ppl": "3.93", "wps": "395886", "ups": "3.35", "wpb": "118032", "bsz": "256", "num_updates": "912200", "lr": "8.86869e-06", "gnorm": "2.249", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "272850"} +[2022-08-02 14:38:20,976][train_inner][INFO] - {"epoch": 18, "update": 17.728, "loss": "1.969", "ppl": "3.91", "wps": "394574", "ups": "3.35", "wpb": "117871", "bsz": "256", "num_updates": "912400", "lr": "8.84848e-06", "gnorm": "2.346", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "272909"} +[2022-08-02 14:39:20,438][train_inner][INFO] - {"epoch": 18, "update": 17.732, "loss": "1.973", "ppl": "3.93", "wps": "399477", "ups": "3.36", "wpb": "118767", "bsz": "256", "num_updates": "912600", "lr": "8.82828e-06", "gnorm": "2.422", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "272969"} +[2022-08-02 14:40:19,311][train_inner][INFO] - {"epoch": 18, "update": 17.736, "loss": "1.975", "ppl": "3.93", "wps": "401697", "ups": "3.4", "wpb": "118245", "bsz": "255.9", "num_updates": "912800", "lr": "8.80808e-06", "gnorm": "2.441", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "273028"} +[2022-08-02 14:41:18,754][train_inner][INFO] - {"epoch": 18, "update": 17.74, "loss": "1.973", "ppl": "3.93", "wps": "395941", "ups": "3.36", "wpb": "117678", "bsz": "256", "num_updates": "913000", "lr": "8.78788e-06", "gnorm": "2.328", "loss_scale": "2", "train_wall": "59", "gb_free": "32.3", "wall": "273087"} +[2022-08-02 14:42:18,166][train_inner][INFO] - {"epoch": 18, "update": 17.743, "loss": "1.973", "ppl": "3.93", "wps": "398297", "ups": "3.37", "wpb": "118318", "bsz": "256", "num_updates": "913200", "lr": "8.76768e-06", "gnorm": "2.221", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "273147"} +[2022-08-02 14:43:17,466][train_inner][INFO] - {"epoch": 18, "update": 17.747, "loss": "1.969", "ppl": "3.92", "wps": "399420", "ups": "3.37", "wpb": "118426", "bsz": "256", "num_updates": "913400", "lr": "8.74747e-06", "gnorm": "2.281", "loss_scale": "2", "train_wall": "59", "gb_free": "30.3", "wall": "273206"} +[2022-08-02 14:44:13,383][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 14:44:17,325][train_inner][INFO] - {"epoch": 18, "update": 17.751, "loss": "1.975", "ppl": "3.93", "wps": "395259", "ups": "3.34", "wpb": "118298", "bsz": "256", "num_updates": "913600", "lr": "8.72727e-06", "gnorm": "2.345", "loss_scale": "2", "train_wall": "60", "gb_free": "22.5", "wall": "273266"} +[2022-08-02 14:45:16,927][train_inner][INFO] - {"epoch": 18, "update": 17.755, "loss": "1.973", "ppl": "3.93", "wps": "397848", "ups": "3.36", "wpb": "118562", "bsz": "256", "num_updates": "913800", "lr": "8.70707e-06", "gnorm": "2.348", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "273325"} +[2022-08-02 14:46:16,711][train_inner][INFO] - {"epoch": 18, "update": 17.759, "loss": "1.983", "ppl": "3.95", "wps": "394362", "ups": "3.35", "wpb": "117883", "bsz": "256", "num_updates": "914000", "lr": "8.68687e-06", "gnorm": "2.323", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "273385"} +[2022-08-02 14:47:16,111][train_inner][INFO] - {"epoch": 18, "update": 17.763, "loss": "1.971", "ppl": "3.92", "wps": "397688", "ups": "3.37", "wpb": "118114", "bsz": "256", "num_updates": "914200", "lr": "8.66667e-06", "gnorm": "2.258", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "273445"} +[2022-08-02 14:48:15,721][train_inner][INFO] - {"epoch": 18, "update": 17.767, "loss": "1.97", "ppl": "3.92", "wps": "397807", "ups": "3.36", "wpb": "118564", "bsz": "256", "num_updates": "914400", "lr": "8.64646e-06", "gnorm": "2.376", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "273504"} +[2022-08-02 14:49:15,343][train_inner][INFO] - {"epoch": 18, "update": 17.771, "loss": "1.976", "ppl": "3.93", "wps": "395048", "ups": "3.35", "wpb": "117768", "bsz": "256", "num_updates": "914600", "lr": "8.62626e-06", "gnorm": "2.385", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "273564"} +[2022-08-02 14:50:14,705][train_inner][INFO] - {"epoch": 18, "update": 17.775, "loss": "1.971", "ppl": "3.92", "wps": "399707", "ups": "3.37", "wpb": "118636", "bsz": "256", "num_updates": "914800", "lr": "8.60606e-06", "gnorm": "2.264", "loss_scale": "2", "train_wall": "59", "gb_free": "26.8", "wall": "273623"} +[2022-08-02 14:51:14,435][train_inner][INFO] - {"epoch": 18, "update": 17.778, "loss": "1.968", "ppl": "3.91", "wps": "397933", "ups": "3.35", "wpb": "118842", "bsz": "256", "num_updates": "915000", "lr": "8.58586e-06", "gnorm": "2.351", "loss_scale": "2", "train_wall": "59", "gb_free": "27.8", "wall": "273683"} +[2022-08-02 14:52:14,046][train_inner][INFO] - {"epoch": 18, "update": 17.782, "loss": "1.978", "ppl": "3.94", "wps": "398314", "ups": "3.36", "wpb": "118719", "bsz": "256", "num_updates": "915200", "lr": "8.56566e-06", "gnorm": "2.267", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "273742"} +[2022-08-02 14:53:13,348][train_inner][INFO] - {"epoch": 18, "update": 17.786, "loss": "1.968", "ppl": "3.91", "wps": "401009", "ups": "3.37", "wpb": "118901", "bsz": "256", "num_updates": "915400", "lr": "8.54545e-06", "gnorm": "2.27", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "273802"} +[2022-08-02 14:54:13,001][train_inner][INFO] - {"epoch": 18, "update": 17.79, "loss": "1.974", "ppl": "3.93", "wps": "397459", "ups": "3.35", "wpb": "118548", "bsz": "256", "num_updates": "915600", "lr": "8.52525e-06", "gnorm": "2.444", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "273861"} +[2022-08-02 14:54:25,887][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 14:55:12,466][train_inner][INFO] - {"epoch": 18, "update": 17.794, "loss": "1.974", "ppl": "3.93", "wps": "398907", "ups": "3.36", "wpb": "118604", "bsz": "256", "num_updates": "915800", "lr": "8.50505e-06", "gnorm": "2.44", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "273921"} +[2022-08-02 14:56:11,876][train_inner][INFO] - {"epoch": 18, "update": 17.798, "loss": "1.977", "ppl": "3.94", "wps": "399031", "ups": "3.37", "wpb": "118530", "bsz": "256", "num_updates": "916000", "lr": "8.48485e-06", "gnorm": "2.296", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "273980"} +[2022-08-02 14:57:11,179][train_inner][INFO] - {"epoch": 18, "update": 17.802, "loss": "1.973", "ppl": "3.93", "wps": "397314", "ups": "3.37", "wpb": "117809", "bsz": "256", "num_updates": "916200", "lr": "8.46465e-06", "gnorm": "2.399", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "274040"} +[2022-08-02 14:58:10,751][train_inner][INFO] - {"epoch": 18, "update": 17.806, "loss": "1.973", "ppl": "3.93", "wps": "396023", "ups": "3.36", "wpb": "117958", "bsz": "256", "num_updates": "916400", "lr": "8.44444e-06", "gnorm": "2.306", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "274099"} +[2022-08-02 14:59:10,243][train_inner][INFO] - {"epoch": 18, "update": 17.809, "loss": "1.975", "ppl": "3.93", "wps": "397491", "ups": "3.36", "wpb": "118238", "bsz": "256", "num_updates": "916600", "lr": "8.42424e-06", "gnorm": "2.561", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "274159"} +[2022-08-02 15:00:09,875][train_inner][INFO] - {"epoch": 18, "update": 17.813, "loss": "1.973", "ppl": "3.93", "wps": "398637", "ups": "3.35", "wpb": "118856", "bsz": "256", "num_updates": "916800", "lr": "8.40404e-06", "gnorm": "2.551", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "274218"} +[2022-08-02 15:01:09,386][train_inner][INFO] - {"epoch": 18, "update": 17.817, "loss": "1.968", "ppl": "3.91", "wps": "399477", "ups": "3.36", "wpb": "118866", "bsz": "256", "num_updates": "917000", "lr": "8.38384e-06", "gnorm": "2.496", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "274278"} +[2022-08-02 15:02:08,790][train_inner][INFO] - {"epoch": 18, "update": 17.821, "loss": "1.968", "ppl": "3.91", "wps": "397969", "ups": "3.37", "wpb": "118204", "bsz": "256", "num_updates": "917200", "lr": "8.36364e-06", "gnorm": "2.624", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "274337"} +[2022-08-02 15:03:08,174][train_inner][INFO] - {"epoch": 18, "update": 17.825, "loss": "1.973", "ppl": "3.92", "wps": "399622", "ups": "3.37", "wpb": "118654", "bsz": "256", "num_updates": "917400", "lr": "8.34343e-06", "gnorm": "2.718", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "274397"} +[2022-08-02 15:04:09,004][train_inner][INFO] - {"epoch": 18, "update": 17.829, "loss": "1.973", "ppl": "3.93", "wps": "390328", "ups": "3.29", "wpb": "118717", "bsz": "256", "num_updates": "917600", "lr": "8.32323e-06", "gnorm": "2.329", "loss_scale": "2", "train_wall": "60", "gb_free": "26", "wall": "274457"} +[2022-08-02 15:04:57,016][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 15:05:08,966][train_inner][INFO] - {"epoch": 18, "update": 17.833, "loss": "1.971", "ppl": "3.92", "wps": "397170", "ups": "3.34", "wpb": "119076", "bsz": "256", "num_updates": "917800", "lr": "8.30303e-06", "gnorm": "2.352", "loss_scale": "2", "train_wall": "60", "gb_free": "23.3", "wall": "274517"} +[2022-08-02 15:06:08,098][train_inner][INFO] - {"epoch": 18, "update": 17.837, "loss": "1.967", "ppl": "3.91", "wps": "400110", "ups": "3.38", "wpb": "118294", "bsz": "256", "num_updates": "918000", "lr": "8.28283e-06", "gnorm": "2.478", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "274577"} +[2022-08-02 15:07:07,664][train_inner][INFO] - {"epoch": 18, "update": 17.841, "loss": "1.977", "ppl": "3.94", "wps": "396970", "ups": "3.36", "wpb": "118229", "bsz": "256", "num_updates": "918200", "lr": "8.26263e-06", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "274636"} +[2022-08-02 15:08:07,441][train_inner][INFO] - {"epoch": 18, "update": 17.844, "loss": "1.972", "ppl": "3.92", "wps": "395116", "ups": "3.35", "wpb": "118093", "bsz": "256", "num_updates": "918400", "lr": "8.24242e-06", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "274696"} +[2022-08-02 15:09:06,818][train_inner][INFO] - {"epoch": 18, "update": 17.848, "loss": "1.978", "ppl": "3.94", "wps": "397641", "ups": "3.37", "wpb": "118053", "bsz": "256", "num_updates": "918600", "lr": "8.22222e-06", "gnorm": "2.28", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "274755"} +[2022-08-02 15:10:06,502][train_inner][INFO] - {"epoch": 18, "update": 17.852, "loss": "1.976", "ppl": "3.93", "wps": "394271", "ups": "3.35", "wpb": "117659", "bsz": "256", "num_updates": "918800", "lr": "8.20202e-06", "gnorm": "2.302", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "274815"} +[2022-08-02 15:11:07,004][train_inner][INFO] - {"epoch": 18, "update": 17.856, "loss": "1.977", "ppl": "3.94", "wps": "390994", "ups": "3.31", "wpb": "118278", "bsz": "256", "num_updates": "919000", "lr": "8.18182e-06", "gnorm": "2.348", "loss_scale": "2", "train_wall": "60", "gb_free": "21.9", "wall": "274875"} +[2022-08-02 15:12:06,616][train_inner][INFO] - {"epoch": 18, "update": 17.86, "loss": "1.967", "ppl": "3.91", "wps": "398306", "ups": "3.36", "wpb": "118719", "bsz": "256", "num_updates": "919200", "lr": "8.16162e-06", "gnorm": "2.337", "loss_scale": "2", "train_wall": "59", "gb_free": "27.5", "wall": "274935"} +[2022-08-02 15:13:05,788][train_inner][INFO] - {"epoch": 18, "update": 17.864, "loss": "1.975", "ppl": "3.93", "wps": "397988", "ups": "3.38", "wpb": "117748", "bsz": "256", "num_updates": "919400", "lr": "8.14141e-06", "gnorm": "2.305", "loss_scale": "2", "train_wall": "59", "gb_free": "29.2", "wall": "274994"} +[2022-08-02 15:14:05,375][train_inner][INFO] - {"epoch": 18, "update": 17.868, "loss": "1.973", "ppl": "3.93", "wps": "395516", "ups": "3.36", "wpb": "117837", "bsz": "256", "num_updates": "919600", "lr": "8.12121e-06", "gnorm": "2.361", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "275054"} +[2022-08-02 15:15:04,849][train_inner][INFO] - {"epoch": 18, "update": 17.872, "loss": "1.972", "ppl": "3.92", "wps": "399206", "ups": "3.36", "wpb": "118711", "bsz": "256", "num_updates": "919800", "lr": "8.10101e-06", "gnorm": "2.276", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "275113"} +[2022-08-02 15:15:25,621][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 15:16:04,745][train_inner][INFO] - {"epoch": 18, "update": 17.876, "loss": "1.968", "ppl": "3.91", "wps": "397057", "ups": "3.34", "wpb": "118910", "bsz": "256", "num_updates": "920000", "lr": "8.08081e-06", "gnorm": "2.29", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "275173"} +[2022-08-02 15:17:03,807][train_inner][INFO] - {"epoch": 18, "update": 17.879, "loss": "1.982", "ppl": "3.95", "wps": "397476", "ups": "3.39", "wpb": "117378", "bsz": "256", "num_updates": "920200", "lr": "8.06061e-06", "gnorm": "2.493", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "275232"} +[2022-08-02 15:18:02,730][train_inner][INFO] - {"epoch": 18, "update": 17.883, "loss": "1.975", "ppl": "3.93", "wps": "400513", "ups": "3.39", "wpb": "117996", "bsz": "256", "num_updates": "920400", "lr": "8.0404e-06", "gnorm": "2.325", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "275291"} +[2022-08-02 15:19:03,374][train_inner][INFO] - {"epoch": 18, "update": 17.887, "loss": "1.975", "ppl": "3.93", "wps": "392272", "ups": "3.3", "wpb": "118942", "bsz": "256", "num_updates": "920600", "lr": "8.0202e-06", "gnorm": "2.243", "loss_scale": "2", "train_wall": "60", "gb_free": "21.8", "wall": "275352"} +[2022-08-02 15:20:02,588][train_inner][INFO] - {"epoch": 18, "update": 17.891, "loss": "1.972", "ppl": "3.92", "wps": "401093", "ups": "3.38", "wpb": "118752", "bsz": "256", "num_updates": "920800", "lr": "8e-06", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "275411"} +[2022-08-02 15:21:02,176][train_inner][INFO] - {"epoch": 18, "update": 17.895, "loss": "1.969", "ppl": "3.92", "wps": "399932", "ups": "3.36", "wpb": "119156", "bsz": "256", "num_updates": "921000", "lr": "7.9798e-06", "gnorm": "2.392", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "275471"} +[2022-08-02 15:22:01,533][train_inner][INFO] - {"epoch": 18, "update": 17.899, "loss": "1.973", "ppl": "3.93", "wps": "399785", "ups": "3.37", "wpb": "118649", "bsz": "256", "num_updates": "921200", "lr": "7.9596e-06", "gnorm": "2.471", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "275530"} +[2022-08-02 15:23:01,171][train_inner][INFO] - {"epoch": 18, "update": 17.903, "loss": "1.972", "ppl": "3.92", "wps": "398129", "ups": "3.35", "wpb": "118717", "bsz": "256", "num_updates": "921400", "lr": "7.93939e-06", "gnorm": "2.249", "loss_scale": "2", "train_wall": "59", "gb_free": "25.2", "wall": "275590"} +[2022-08-02 15:24:00,918][train_inner][INFO] - {"epoch": 18, "update": 17.907, "loss": "1.966", "ppl": "3.91", "wps": "398261", "ups": "3.35", "wpb": "118974", "bsz": "256", "num_updates": "921600", "lr": "7.91919e-06", "gnorm": "2.412", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "275649"} +[2022-08-02 15:25:00,339][train_inner][INFO] - {"epoch": 18, "update": 17.911, "loss": "1.977", "ppl": "3.94", "wps": "396135", "ups": "3.37", "wpb": "117692", "bsz": "256", "num_updates": "921800", "lr": "7.89899e-06", "gnorm": "2.289", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "275709"} +[2022-08-02 15:25:35,302][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 15:25:59,955][train_inner][INFO] - {"epoch": 18, "update": 17.914, "loss": "1.974", "ppl": "3.93", "wps": "395765", "ups": "3.35", "wpb": "117970", "bsz": "256", "num_updates": "922000", "lr": "7.87879e-06", "gnorm": "2.411", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "275768"} +[2022-08-02 15:26:59,416][train_inner][INFO] - {"epoch": 18, "update": 17.918, "loss": "1.968", "ppl": "3.91", "wps": "397931", "ups": "3.36", "wpb": "118306", "bsz": "256", "num_updates": "922200", "lr": "7.85859e-06", "gnorm": "2.39", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "275828"} +[2022-08-02 15:27:59,126][train_inner][INFO] - {"epoch": 18, "update": 17.922, "loss": "1.973", "ppl": "3.93", "wps": "397616", "ups": "3.35", "wpb": "118706", "bsz": "256", "num_updates": "922400", "lr": "7.83838e-06", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "275888"} +[2022-08-02 15:28:58,531][train_inner][INFO] - {"epoch": 18, "update": 17.926, "loss": "1.971", "ppl": "3.92", "wps": "398634", "ups": "3.37", "wpb": "118404", "bsz": "256", "num_updates": "922600", "lr": "7.81818e-06", "gnorm": "2.333", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "275947"} +[2022-08-02 15:29:58,932][train_inner][INFO] - {"epoch": 18, "update": 17.93, "loss": "1.976", "ppl": "3.94", "wps": "391006", "ups": "3.31", "wpb": "118084", "bsz": "256", "num_updates": "922800", "lr": "7.79798e-06", "gnorm": "2.328", "loss_scale": "2", "train_wall": "60", "gb_free": "23.5", "wall": "276007"} +[2022-08-02 15:30:58,729][train_inner][INFO] - {"epoch": 18, "update": 17.934, "loss": "1.97", "ppl": "3.92", "wps": "397390", "ups": "3.34", "wpb": "118814", "bsz": "256", "num_updates": "923000", "lr": "7.77778e-06", "gnorm": "2.449", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "276067"} +[2022-08-02 15:31:58,301][train_inner][INFO] - {"epoch": 18, "update": 17.938, "loss": "1.97", "ppl": "3.92", "wps": "397873", "ups": "3.36", "wpb": "118508", "bsz": "256", "num_updates": "923200", "lr": "7.75758e-06", "gnorm": "2.485", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "276127"} +[2022-08-02 15:32:57,586][train_inner][INFO] - {"epoch": 18, "update": 17.942, "loss": "1.973", "ppl": "3.93", "wps": "398226", "ups": "3.37", "wpb": "118043", "bsz": "256", "num_updates": "923400", "lr": "7.73737e-06", "gnorm": "2.581", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "276186"} +[2022-08-02 15:33:56,961][train_inner][INFO] - {"epoch": 18, "update": 17.946, "loss": "1.971", "ppl": "3.92", "wps": "398300", "ups": "3.37", "wpb": "118245", "bsz": "256", "num_updates": "923600", "lr": "7.71717e-06", "gnorm": "2.722", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "276245"} +[2022-08-02 15:34:56,488][train_inner][INFO] - {"epoch": 18, "update": 17.949, "loss": "1.967", "ppl": "3.91", "wps": "400364", "ups": "3.36", "wpb": "119163", "bsz": "256", "num_updates": "923800", "lr": "7.69697e-06", "gnorm": "2.733", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "276305"} +[2022-08-02 15:35:48,114][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 15:35:56,035][train_inner][INFO] - {"epoch": 18, "update": 17.953, "loss": "1.971", "ppl": "3.92", "wps": "397111", "ups": "3.36", "wpb": "118232", "bsz": "256", "num_updates": "924000", "lr": "7.67677e-06", "gnorm": "2.754", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "276364"} +[2022-08-02 15:36:55,461][train_inner][INFO] - {"epoch": 18, "update": 17.957, "loss": "1.966", "ppl": "3.91", "wps": "399244", "ups": "3.37", "wpb": "118627", "bsz": "256", "num_updates": "924200", "lr": "7.65657e-06", "gnorm": "2.555", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "276424"} +[2022-08-02 15:37:54,908][train_inner][INFO] - {"epoch": 18, "update": 17.961, "loss": "1.97", "ppl": "3.92", "wps": "396525", "ups": "3.36", "wpb": "117860", "bsz": "256", "num_updates": "924400", "lr": "7.63636e-06", "gnorm": "2.67", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "276483"} +[2022-08-02 15:38:54,397][train_inner][INFO] - {"epoch": 18, "update": 17.965, "loss": "1.968", "ppl": "3.91", "wps": "397287", "ups": "3.36", "wpb": "118171", "bsz": "256", "num_updates": "924600", "lr": "7.61616e-06", "gnorm": "2.566", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "276543"} +[2022-08-02 15:39:53,678][train_inner][INFO] - {"epoch": 18, "update": 17.969, "loss": "1.972", "ppl": "3.92", "wps": "398789", "ups": "3.37", "wpb": "118203", "bsz": "256", "num_updates": "924800", "lr": "7.59596e-06", "gnorm": "2.61", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "276602"} +[2022-08-02 15:40:53,992][train_inner][INFO] - {"epoch": 18, "update": 17.973, "loss": "1.973", "ppl": "3.92", "wps": "391695", "ups": "3.32", "wpb": "118122", "bsz": "256", "num_updates": "925000", "lr": "7.57576e-06", "gnorm": "2.316", "loss_scale": "2", "train_wall": "60", "gb_free": "24", "wall": "276662"} +[2022-08-02 15:41:53,482][train_inner][INFO] - {"epoch": 18, "update": 17.977, "loss": "1.969", "ppl": "3.91", "wps": "399725", "ups": "3.36", "wpb": "118898", "bsz": "256", "num_updates": "925200", "lr": "7.55556e-06", "gnorm": "2.319", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "276722"} +[2022-08-02 15:42:52,657][train_inner][INFO] - {"epoch": 18, "update": 17.98, "loss": "1.971", "ppl": "3.92", "wps": "399041", "ups": "3.38", "wpb": "118066", "bsz": "256", "num_updates": "925400", "lr": "7.53535e-06", "gnorm": "2.48", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "276781"} +[2022-08-02 15:43:52,333][train_inner][INFO] - {"epoch": 18, "update": 17.984, "loss": "1.967", "ppl": "3.91", "wps": "398500", "ups": "3.35", "wpb": "118904", "bsz": "256", "num_updates": "925600", "lr": "7.51515e-06", "gnorm": "2.25", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "276841"} +[2022-08-02 15:44:51,986][train_inner][INFO] - {"epoch": 18, "update": 17.988, "loss": "1.962", "ppl": "3.9", "wps": "397485", "ups": "3.35", "wpb": "118554", "bsz": "256", "num_updates": "925800", "lr": "7.49495e-06", "gnorm": "2.467", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "276900"} +[2022-08-02 15:45:51,306][train_inner][INFO] - {"epoch": 18, "update": 17.992, "loss": "1.972", "ppl": "3.92", "wps": "399535", "ups": "3.37", "wpb": "118502", "bsz": "256", "num_updates": "926000", "lr": "7.47475e-06", "gnorm": "2.475", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "276960"} +[2022-08-02 15:46:01,378][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 15:46:51,283][train_inner][INFO] - {"epoch": 18, "update": 17.996, "loss": "1.973", "ppl": "3.93", "wps": "395005", "ups": "3.33", "wpb": "118454", "bsz": "256", "num_updates": "926200", "lr": "7.45455e-06", "gnorm": "2.435", "loss_scale": "2", "train_wall": "60", "gb_free": "25.2", "wall": "277020"} +[2022-08-02 15:47:50,486][train_inner][INFO] - {"epoch": 18, "update": 18.0, "loss": "1.971", "ppl": "3.92", "wps": "400206", "ups": "3.38", "wpb": "118468", "bsz": "256", "num_updates": "926400", "lr": "7.43434e-06", "gnorm": "2.461", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "277079"} +[2022-08-02 15:47:51,616][fairseq_cli.train][INFO] - end of epoch 18 (average epoch stats below) +[2022-08-02 15:47:51,616][train][INFO] - {"epoch": 18, "train_loss": "1.977", "train_ppl": "3.94", "train_wps": "395865", "train_ups": "3.35", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "926404", "train_lr": "7.43394e-06", "train_gnorm": "2.45", "train_loss_scale": "2", "train_train_wall": "15233", "train_gb_free": "21.7", "train_wall": "277080"} +[2022-08-02 15:47:51,726][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 15:47:51,729][fairseq.trainer][INFO] - begin training epoch 19 +[2022-08-02 15:47:51,729][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 15:49:01,390][train_inner][INFO] - {"epoch": 19, "update": 18.004, "loss": "1.963", "ppl": "3.9", "wps": "332860", "ups": "2.82", "wpb": "118004", "bsz": "255.4", "num_updates": "926600", "lr": "7.41414e-06", "gnorm": "2.262", "loss_scale": "2", "train_wall": "61", "gb_free": "21.7", "wall": "277150"} +[2022-08-02 15:50:00,766][train_inner][INFO] - {"epoch": 19, "update": 18.008, "loss": "1.966", "ppl": "3.91", "wps": "397075", "ups": "3.37", "wpb": "117882", "bsz": "256", "num_updates": "926800", "lr": "7.39394e-06", "gnorm": "2.274", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "277209"} +[2022-08-02 15:50:59,994][train_inner][INFO] - {"epoch": 19, "update": 18.012, "loss": "1.963", "ppl": "3.9", "wps": "399021", "ups": "3.38", "wpb": "118166", "bsz": "256", "num_updates": "927000", "lr": "7.37374e-06", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "277268"} +[2022-08-02 15:51:59,700][train_inner][INFO] - {"epoch": 19, "update": 18.015, "loss": "1.961", "ppl": "3.89", "wps": "396256", "ups": "3.35", "wpb": "118293", "bsz": "256", "num_updates": "927200", "lr": "7.35354e-06", "gnorm": "2.294", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "277328"} +[2022-08-02 15:52:59,134][train_inner][INFO] - {"epoch": 19, "update": 18.019, "loss": "1.969", "ppl": "3.92", "wps": "396463", "ups": "3.37", "wpb": "117816", "bsz": "256", "num_updates": "927400", "lr": "7.33333e-06", "gnorm": "2.471", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "277388"} +[2022-08-02 15:53:59,571][train_inner][INFO] - {"epoch": 19, "update": 18.023, "loss": "1.962", "ppl": "3.9", "wps": "392932", "ups": "3.31", "wpb": "118736", "bsz": "256", "num_updates": "927600", "lr": "7.31313e-06", "gnorm": "2.322", "loss_scale": "2", "train_wall": "60", "gb_free": "22.1", "wall": "277448"} +[2022-08-02 15:54:58,922][train_inner][INFO] - {"epoch": 19, "update": 18.027, "loss": "1.967", "ppl": "3.91", "wps": "398369", "ups": "3.37", "wpb": "118218", "bsz": "256", "num_updates": "927800", "lr": "7.29293e-06", "gnorm": "2.378", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "277507"} +[2022-08-02 15:55:58,240][train_inner][INFO] - {"epoch": 19, "update": 18.031, "loss": "1.97", "ppl": "3.92", "wps": "399371", "ups": "3.37", "wpb": "118450", "bsz": "256", "num_updates": "928000", "lr": "7.27273e-06", "gnorm": "2.28", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "277567"} +[2022-08-02 15:56:28,609][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 15:56:58,073][train_inner][INFO] - {"epoch": 19, "update": 18.035, "loss": "1.968", "ppl": "3.91", "wps": "396322", "ups": "3.34", "wpb": "118563", "bsz": "256", "num_updates": "928200", "lr": "7.25253e-06", "gnorm": "2.242", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "277627"} +[2022-08-02 15:57:57,326][train_inner][INFO] - {"epoch": 19, "update": 18.039, "loss": "1.969", "ppl": "3.91", "wps": "398732", "ups": "3.38", "wpb": "118130", "bsz": "256", "num_updates": "928400", "lr": "7.23232e-06", "gnorm": "2.286", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "277686"} +[2022-08-02 15:58:56,544][train_inner][INFO] - {"epoch": 19, "update": 18.043, "loss": "1.971", "ppl": "3.92", "wps": "397298", "ups": "3.38", "wpb": "117634", "bsz": "256", "num_updates": "928600", "lr": "7.21212e-06", "gnorm": "2.343", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "277745"} +[2022-08-02 15:59:55,888][train_inner][INFO] - {"epoch": 19, "update": 18.047, "loss": "1.968", "ppl": "3.91", "wps": "400158", "ups": "3.37", "wpb": "118736", "bsz": "256", "num_updates": "928800", "lr": "7.19192e-06", "gnorm": "2.438", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "277804"} +[2022-08-02 16:00:55,312][train_inner][INFO] - {"epoch": 19, "update": 18.05, "loss": "1.971", "ppl": "3.92", "wps": "399385", "ups": "3.37", "wpb": "118665", "bsz": "256", "num_updates": "929000", "lr": "7.17172e-06", "gnorm": "2.274", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "277864"} +[2022-08-02 16:01:54,475][train_inner][INFO] - {"epoch": 19, "update": 18.054, "loss": "1.967", "ppl": "3.91", "wps": "398636", "ups": "3.38", "wpb": "117921", "bsz": "256", "num_updates": "929200", "lr": "7.15152e-06", "gnorm": "2.423", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "277923"} +[2022-08-02 16:02:54,049][train_inner][INFO] - {"epoch": 19, "update": 18.058, "loss": "1.973", "ppl": "3.93", "wps": "397035", "ups": "3.36", "wpb": "118264", "bsz": "256", "num_updates": "929400", "lr": "7.13131e-06", "gnorm": "2.325", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "277982"} +[2022-08-02 16:03:53,383][train_inner][INFO] - {"epoch": 19, "update": 18.062, "loss": "1.968", "ppl": "3.91", "wps": "397795", "ups": "3.37", "wpb": "118014", "bsz": "256", "num_updates": "929600", "lr": "7.11111e-06", "gnorm": "2.384", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "278042"} +[2022-08-02 16:04:52,764][train_inner][INFO] - {"epoch": 19, "update": 18.066, "loss": "1.969", "ppl": "3.92", "wps": "397287", "ups": "3.37", "wpb": "117955", "bsz": "256", "num_updates": "929800", "lr": "7.09091e-06", "gnorm": "2.407", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "278101"} +[2022-08-02 16:05:52,555][train_inner][INFO] - {"epoch": 19, "update": 18.07, "loss": "1.969", "ppl": "3.91", "wps": "396059", "ups": "3.35", "wpb": "118403", "bsz": "256", "num_updates": "930000", "lr": "7.07071e-06", "gnorm": "2.377", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "278161"} +[2022-08-02 16:06:39,488][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 16:06:52,248][train_inner][INFO] - {"epoch": 19, "update": 18.074, "loss": "1.968", "ppl": "3.91", "wps": "396700", "ups": "3.35", "wpb": "118401", "bsz": "256", "num_updates": "930200", "lr": "7.05051e-06", "gnorm": "2.357", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "278221"} +[2022-08-02 16:07:51,652][train_inner][INFO] - {"epoch": 19, "update": 18.078, "loss": "1.97", "ppl": "3.92", "wps": "397793", "ups": "3.37", "wpb": "118151", "bsz": "256", "num_updates": "930400", "lr": "7.0303e-06", "gnorm": "2.391", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "278280"} +[2022-08-02 16:08:50,995][train_inner][INFO] - {"epoch": 19, "update": 18.082, "loss": "1.964", "ppl": "3.9", "wps": "396667", "ups": "3.37", "wpb": "117695", "bsz": "256", "num_updates": "930600", "lr": "7.0101e-06", "gnorm": "2.424", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "278339"} +[2022-08-02 16:09:50,332][train_inner][INFO] - {"epoch": 19, "update": 18.085, "loss": "1.964", "ppl": "3.9", "wps": "401063", "ups": "3.37", "wpb": "118988", "bsz": "256", "num_updates": "930800", "lr": "6.9899e-06", "gnorm": "2.419", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "278399"} +[2022-08-02 16:10:49,892][train_inner][INFO] - {"epoch": 19, "update": 18.089, "loss": "1.969", "ppl": "3.92", "wps": "396965", "ups": "3.36", "wpb": "118217", "bsz": "256", "num_updates": "931000", "lr": "6.9697e-06", "gnorm": "2.372", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "278458"} +[2022-08-02 16:11:49,200][train_inner][INFO] - {"epoch": 19, "update": 18.093, "loss": "1.968", "ppl": "3.91", "wps": "399059", "ups": "3.37", "wpb": "118335", "bsz": "256", "num_updates": "931200", "lr": "6.94949e-06", "gnorm": "2.288", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "278518"} +[2022-08-02 16:12:48,551][train_inner][INFO] - {"epoch": 19, "update": 18.097, "loss": "1.967", "ppl": "3.91", "wps": "398535", "ups": "3.37", "wpb": "118266", "bsz": "256", "num_updates": "931400", "lr": "6.92929e-06", "gnorm": "2.414", "loss_scale": "2", "train_wall": "59", "gb_free": "26.1", "wall": "278577"} +[2022-08-02 16:13:48,147][train_inner][INFO] - {"epoch": 19, "update": 18.101, "loss": "1.974", "ppl": "3.93", "wps": "397164", "ups": "3.36", "wpb": "118346", "bsz": "256", "num_updates": "931600", "lr": "6.90909e-06", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "278637"} +[2022-08-02 16:14:47,753][train_inner][INFO] - {"epoch": 19, "update": 18.105, "loss": "1.976", "ppl": "3.94", "wps": "397290", "ups": "3.36", "wpb": "118404", "bsz": "256", "num_updates": "931800", "lr": "6.88889e-06", "gnorm": "2.303", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "278696"} +[2022-08-02 16:15:46,976][train_inner][INFO] - {"epoch": 19, "update": 18.109, "loss": "1.972", "ppl": "3.92", "wps": "397257", "ups": "3.38", "wpb": "117634", "bsz": "255.9", "num_updates": "932000", "lr": "6.86869e-06", "gnorm": "2.424", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "278755"} +[2022-08-02 16:16:46,405][train_inner][INFO] - {"epoch": 19, "update": 18.113, "loss": "1.968", "ppl": "3.91", "wps": "397024", "ups": "3.37", "wpb": "117972", "bsz": "256", "num_updates": "932200", "lr": "6.84848e-06", "gnorm": "2.654", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "278815"} +[2022-08-02 16:16:59,559][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 16:17:45,936][train_inner][INFO] - {"epoch": 19, "update": 18.117, "loss": "1.965", "ppl": "3.9", "wps": "397283", "ups": "3.36", "wpb": "118253", "bsz": "256", "num_updates": "932400", "lr": "6.82828e-06", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "28.1", "wall": "278874"} +[2022-08-02 16:18:45,668][train_inner][INFO] - {"epoch": 19, "update": 18.12, "loss": "1.966", "ppl": "3.91", "wps": "394989", "ups": "3.35", "wpb": "117967", "bsz": "256", "num_updates": "932600", "lr": "6.80808e-06", "gnorm": "2.449", "loss_scale": "2", "train_wall": "59", "gb_free": "26.6", "wall": "278934"} +[2022-08-02 16:19:45,079][train_inner][INFO] - {"epoch": 19, "update": 18.124, "loss": "1.967", "ppl": "3.91", "wps": "398049", "ups": "3.37", "wpb": "118241", "bsz": "256", "num_updates": "932800", "lr": "6.78788e-06", "gnorm": "2.351", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "278994"} +[2022-08-02 16:20:44,671][train_inner][INFO] - {"epoch": 19, "update": 18.128, "loss": "1.965", "ppl": "3.9", "wps": "399981", "ups": "3.36", "wpb": "119179", "bsz": "256", "num_updates": "933000", "lr": "6.76768e-06", "gnorm": "2.555", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "279053"} +[2022-08-02 16:21:44,256][train_inner][INFO] - {"epoch": 19, "update": 18.132, "loss": "1.966", "ppl": "3.91", "wps": "398901", "ups": "3.36", "wpb": "118841", "bsz": "256", "num_updates": "933200", "lr": "6.74747e-06", "gnorm": "2.494", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "279113"} +[2022-08-02 16:22:43,897][train_inner][INFO] - {"epoch": 19, "update": 18.136, "loss": "1.974", "ppl": "3.93", "wps": "395755", "ups": "3.35", "wpb": "118015", "bsz": "256", "num_updates": "933400", "lr": "6.72727e-06", "gnorm": "2.396", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "279172"} +[2022-08-02 16:23:43,403][train_inner][INFO] - {"epoch": 19, "update": 18.14, "loss": "1.965", "ppl": "3.9", "wps": "397240", "ups": "3.36", "wpb": "118191", "bsz": "256", "num_updates": "933600", "lr": "6.70707e-06", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "279232"} +[2022-08-02 16:24:42,649][train_inner][INFO] - {"epoch": 19, "update": 18.144, "loss": "1.968", "ppl": "3.91", "wps": "399865", "ups": "3.38", "wpb": "118451", "bsz": "256", "num_updates": "933800", "lr": "6.68687e-06", "gnorm": "2.359", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "279291"} +[2022-08-02 16:25:41,938][train_inner][INFO] - {"epoch": 19, "update": 18.148, "loss": "1.971", "ppl": "3.92", "wps": "399116", "ups": "3.37", "wpb": "118314", "bsz": "256", "num_updates": "934000", "lr": "6.66667e-06", "gnorm": "2.39", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "279350"} +[2022-08-02 16:26:40,949][train_inner][INFO] - {"epoch": 19, "update": 18.151, "loss": "1.971", "ppl": "3.92", "wps": "402328", "ups": "3.39", "wpb": "118709", "bsz": "256", "num_updates": "934200", "lr": "6.64646e-06", "gnorm": "2.361", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "279409"} +[2022-08-02 16:27:10,175][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 16:27:41,623][train_inner][INFO] - {"epoch": 19, "update": 18.155, "loss": "1.966", "ppl": "3.91", "wps": "390611", "ups": "3.3", "wpb": "118500", "bsz": "256", "num_updates": "934400", "lr": "6.62626e-06", "gnorm": "2.426", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "279470"} +[2022-08-02 16:28:41,238][train_inner][INFO] - {"epoch": 19, "update": 18.159, "loss": "1.967", "ppl": "3.91", "wps": "396214", "ups": "3.35", "wpb": "118099", "bsz": "256", "num_updates": "934600", "lr": "6.60606e-06", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "279530"} +[2022-08-02 16:29:40,805][train_inner][INFO] - {"epoch": 19, "update": 18.163, "loss": "1.967", "ppl": "3.91", "wps": "397139", "ups": "3.36", "wpb": "118282", "bsz": "256", "num_updates": "934800", "lr": "6.58586e-06", "gnorm": "2.317", "loss_scale": "2", "train_wall": "59", "gb_free": "27.1", "wall": "279589"} +[2022-08-02 16:30:39,991][train_inner][INFO] - {"epoch": 19, "update": 18.167, "loss": "1.965", "ppl": "3.9", "wps": "400084", "ups": "3.38", "wpb": "118395", "bsz": "256", "num_updates": "935000", "lr": "6.56566e-06", "gnorm": "2.356", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "279648"} +[2022-08-02 16:31:40,136][train_inner][INFO] - {"epoch": 19, "update": 18.171, "loss": "1.965", "ppl": "3.91", "wps": "392874", "ups": "3.33", "wpb": "118147", "bsz": "256", "num_updates": "935200", "lr": "6.54545e-06", "gnorm": "2.245", "loss_scale": "2", "train_wall": "60", "gb_free": "21.9", "wall": "279709"} +[2022-08-02 16:32:39,801][train_inner][INFO] - {"epoch": 19, "update": 18.175, "loss": "1.965", "ppl": "3.91", "wps": "398034", "ups": "3.35", "wpb": "118742", "bsz": "256", "num_updates": "935400", "lr": "6.52525e-06", "gnorm": "2.282", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "279768"} +[2022-08-02 16:33:38,947][train_inner][INFO] - {"epoch": 19, "update": 18.179, "loss": "1.966", "ppl": "3.91", "wps": "398889", "ups": "3.38", "wpb": "117963", "bsz": "256", "num_updates": "935600", "lr": "6.50505e-06", "gnorm": "2.272", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "279827"} +[2022-08-02 16:34:38,161][train_inner][INFO] - {"epoch": 19, "update": 18.183, "loss": "1.97", "ppl": "3.92", "wps": "395953", "ups": "3.38", "wpb": "117228", "bsz": "256", "num_updates": "935800", "lr": "6.48485e-06", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "279887"} +[2022-08-02 16:35:37,556][train_inner][INFO] - {"epoch": 19, "update": 18.186, "loss": "1.97", "ppl": "3.92", "wps": "397386", "ups": "3.37", "wpb": "118014", "bsz": "256", "num_updates": "936000", "lr": "6.46465e-06", "gnorm": "2.336", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "279946"} +[2022-08-02 16:36:36,990][train_inner][INFO] - {"epoch": 19, "update": 18.19, "loss": "1.967", "ppl": "3.91", "wps": "397353", "ups": "3.37", "wpb": "118080", "bsz": "256", "num_updates": "936200", "lr": "6.44444e-06", "gnorm": "2.322", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "280005"} +[2022-08-02 16:37:22,608][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 16:37:36,641][train_inner][INFO] - {"epoch": 19, "update": 18.194, "loss": "1.971", "ppl": "3.92", "wps": "396509", "ups": "3.35", "wpb": "118260", "bsz": "256", "num_updates": "936400", "lr": "6.42424e-06", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "280065"} +[2022-08-02 16:38:36,479][train_inner][INFO] - {"epoch": 19, "update": 18.198, "loss": "1.968", "ppl": "3.91", "wps": "395743", "ups": "3.34", "wpb": "118403", "bsz": "256", "num_updates": "936600", "lr": "6.40404e-06", "gnorm": "2.254", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "280125"} +[2022-08-02 16:39:36,052][train_inner][INFO] - {"epoch": 19, "update": 18.202, "loss": "1.968", "ppl": "3.91", "wps": "397256", "ups": "3.36", "wpb": "118328", "bsz": "256", "num_updates": "936800", "lr": "6.38384e-06", "gnorm": "2.377", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "280184"} +[2022-08-02 16:40:35,384][train_inner][INFO] - {"epoch": 19, "update": 18.206, "loss": "1.963", "ppl": "3.9", "wps": "397204", "ups": "3.37", "wpb": "117834", "bsz": "256", "num_updates": "937000", "lr": "6.36364e-06", "gnorm": "2.29", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "280244"} +[2022-08-02 16:41:36,026][train_inner][INFO] - {"epoch": 19, "update": 18.21, "loss": "1.961", "ppl": "3.89", "wps": "393011", "ups": "3.3", "wpb": "119164", "bsz": "256", "num_updates": "937200", "lr": "6.34343e-06", "gnorm": "2.278", "loss_scale": "2", "train_wall": "60", "gb_free": "28.3", "wall": "280304"} +[2022-08-02 16:42:35,756][train_inner][INFO] - {"epoch": 19, "update": 18.214, "loss": "1.965", "ppl": "3.9", "wps": "396923", "ups": "3.35", "wpb": "118540", "bsz": "256", "num_updates": "937400", "lr": "6.32323e-06", "gnorm": "2.28", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "280364"} +[2022-08-02 16:43:35,487][train_inner][INFO] - {"epoch": 19, "update": 18.218, "loss": "1.966", "ppl": "3.91", "wps": "394595", "ups": "3.35", "wpb": "117847", "bsz": "256", "num_updates": "937600", "lr": "6.30303e-06", "gnorm": "2.28", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "280424"} +[2022-08-02 16:44:34,825][train_inner][INFO] - {"epoch": 19, "update": 18.221, "loss": "1.971", "ppl": "3.92", "wps": "398650", "ups": "3.37", "wpb": "118274", "bsz": "256", "num_updates": "937800", "lr": "6.28283e-06", "gnorm": "2.322", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "280483"} +[2022-08-02 16:45:34,161][train_inner][INFO] - {"epoch": 19, "update": 18.225, "loss": "1.967", "ppl": "3.91", "wps": "398179", "ups": "3.37", "wpb": "118131", "bsz": "256", "num_updates": "938000", "lr": "6.26263e-06", "gnorm": "2.508", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "280543"} +[2022-08-02 16:46:33,752][train_inner][INFO] - {"epoch": 19, "update": 18.229, "loss": "1.967", "ppl": "3.91", "wps": "397975", "ups": "3.36", "wpb": "118579", "bsz": "256", "num_updates": "938200", "lr": "6.24242e-06", "gnorm": "2.269", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "280602"} +[2022-08-02 16:47:33,310][train_inner][INFO] - {"epoch": 19, "update": 18.233, "loss": "1.97", "ppl": "3.92", "wps": "398472", "ups": "3.36", "wpb": "118659", "bsz": "256", "num_updates": "938400", "lr": "6.22222e-06", "gnorm": "2.418", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "280662"} +[2022-08-02 16:47:45,830][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 16:48:33,040][train_inner][INFO] - {"epoch": 19, "update": 18.237, "loss": "1.97", "ppl": "3.92", "wps": "395873", "ups": "3.35", "wpb": "118228", "bsz": "256", "num_updates": "938600", "lr": "6.20202e-06", "gnorm": "2.398", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "280721"} +[2022-08-02 16:49:32,671][train_inner][INFO] - {"epoch": 19, "update": 18.241, "loss": "1.968", "ppl": "3.91", "wps": "396802", "ups": "3.35", "wpb": "118307", "bsz": "256", "num_updates": "938800", "lr": "6.18182e-06", "gnorm": "2.341", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "280781"} +[2022-08-02 16:50:32,483][train_inner][INFO] - {"epoch": 19, "update": 18.245, "loss": "1.96", "ppl": "3.89", "wps": "399003", "ups": "3.34", "wpb": "119324", "bsz": "256", "num_updates": "939000", "lr": "6.16162e-06", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "280841"} +[2022-08-02 16:51:32,014][train_inner][INFO] - {"epoch": 19, "update": 18.249, "loss": "1.962", "ppl": "3.9", "wps": "399423", "ups": "3.36", "wpb": "118890", "bsz": "256", "num_updates": "939200", "lr": "6.14141e-06", "gnorm": "2.345", "loss_scale": "2", "train_wall": "59", "gb_free": "24.1", "wall": "280900"} +[2022-08-02 16:52:31,725][train_inner][INFO] - {"epoch": 19, "update": 18.253, "loss": "1.964", "ppl": "3.9", "wps": "396041", "ups": "3.35", "wpb": "118240", "bsz": "256", "num_updates": "939400", "lr": "6.12121e-06", "gnorm": "2.237", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "280960"} +[2022-08-02 16:53:31,042][train_inner][INFO] - {"epoch": 19, "update": 18.256, "loss": "1.971", "ppl": "3.92", "wps": "399971", "ups": "3.37", "wpb": "118624", "bsz": "256", "num_updates": "939600", "lr": "6.10101e-06", "gnorm": "2.397", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "281019"} +[2022-08-02 16:54:30,624][train_inner][INFO] - {"epoch": 19, "update": 18.26, "loss": "1.965", "ppl": "3.9", "wps": "397544", "ups": "3.36", "wpb": "118433", "bsz": "256", "num_updates": "939800", "lr": "6.08081e-06", "gnorm": "2.639", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "281079"} +[2022-08-02 16:55:29,938][train_inner][INFO] - {"epoch": 19, "update": 18.264, "loss": "1.969", "ppl": "3.92", "wps": "398813", "ups": "3.37", "wpb": "118273", "bsz": "256", "num_updates": "940000", "lr": "6.06061e-06", "gnorm": "2.327", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "281138"} +[2022-08-02 16:56:29,127][train_inner][INFO] - {"epoch": 19, "update": 18.268, "loss": "1.966", "ppl": "3.91", "wps": "400300", "ups": "3.38", "wpb": "118468", "bsz": "256", "num_updates": "940200", "lr": "6.0404e-06", "gnorm": "2.546", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "281198"} +[2022-08-02 16:57:28,711][train_inner][INFO] - {"epoch": 19, "update": 18.272, "loss": "1.974", "ppl": "3.93", "wps": "396006", "ups": "3.36", "wpb": "117976", "bsz": "256", "num_updates": "940400", "lr": "6.0202e-06", "gnorm": "2.655", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "281257"} +[2022-08-02 16:57:55,841][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 16:58:28,613][train_inner][INFO] - {"epoch": 19, "update": 18.276, "loss": "1.961", "ppl": "3.89", "wps": "394465", "ups": "3.34", "wpb": "118147", "bsz": "256", "num_updates": "940600", "lr": "6e-06", "gnorm": "2.551", "loss_scale": "2", "train_wall": "60", "gb_free": "22.3", "wall": "281317"} +[2022-08-02 16:59:27,993][train_inner][INFO] - {"epoch": 19, "update": 18.28, "loss": "1.971", "ppl": "3.92", "wps": "395797", "ups": "3.37", "wpb": "117510", "bsz": "256", "num_updates": "940800", "lr": "5.9798e-06", "gnorm": "2.678", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "281376"} +[2022-08-02 17:00:27,376][train_inner][INFO] - {"epoch": 19, "update": 18.284, "loss": "1.968", "ppl": "3.91", "wps": "397492", "ups": "3.37", "wpb": "118020", "bsz": "256", "num_updates": "941000", "lr": "5.9596e-06", "gnorm": "2.472", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "281436"} +[2022-08-02 17:01:26,936][train_inner][INFO] - {"epoch": 19, "update": 18.288, "loss": "1.97", "ppl": "3.92", "wps": "396886", "ups": "3.36", "wpb": "118193", "bsz": "256", "num_updates": "941200", "lr": "5.93939e-06", "gnorm": "2.55", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "281495"} +[2022-08-02 17:02:26,692][train_inner][INFO] - {"epoch": 19, "update": 18.291, "loss": "1.969", "ppl": "3.91", "wps": "395869", "ups": "3.35", "wpb": "118276", "bsz": "256", "num_updates": "941400", "lr": "5.91919e-06", "gnorm": "2.479", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "281555"} +[2022-08-02 17:03:26,302][train_inner][INFO] - {"epoch": 19, "update": 18.295, "loss": "1.964", "ppl": "3.9", "wps": "397269", "ups": "3.36", "wpb": "118405", "bsz": "256", "num_updates": "941600", "lr": "5.89899e-06", "gnorm": "2.522", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "281615"} +[2022-08-02 17:04:25,928][train_inner][INFO] - {"epoch": 19, "update": 18.299, "loss": "1.968", "ppl": "3.91", "wps": "396722", "ups": "3.35", "wpb": "118275", "bsz": "256", "num_updates": "941800", "lr": "5.87879e-06", "gnorm": "2.371", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "281674"} +[2022-08-02 17:05:25,167][train_inner][INFO] - {"epoch": 19, "update": 18.303, "loss": "1.962", "ppl": "3.9", "wps": "397857", "ups": "3.38", "wpb": "117843", "bsz": "256", "num_updates": "942000", "lr": "5.85859e-06", "gnorm": "2.322", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "281734"} +[2022-08-02 17:06:24,357][train_inner][INFO] - {"epoch": 19, "update": 18.307, "loss": "1.962", "ppl": "3.9", "wps": "400376", "ups": "3.38", "wpb": "118490", "bsz": "256", "num_updates": "942200", "lr": "5.83838e-06", "gnorm": "2.352", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "281793"} +[2022-08-02 17:07:23,538][train_inner][INFO] - {"epoch": 19, "update": 18.311, "loss": "1.969", "ppl": "3.92", "wps": "400772", "ups": "3.38", "wpb": "118589", "bsz": "256", "num_updates": "942400", "lr": "5.81818e-06", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "281852"} +[2022-08-02 17:08:06,726][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 17:08:23,349][train_inner][INFO] - {"epoch": 19, "update": 18.315, "loss": "1.968", "ppl": "3.91", "wps": "396798", "ups": "3.34", "wpb": "118664", "bsz": "256", "num_updates": "942600", "lr": "5.79798e-06", "gnorm": "2.256", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "281912"} +[2022-08-02 17:09:22,987][train_inner][INFO] - {"epoch": 19, "update": 18.319, "loss": "1.965", "ppl": "3.9", "wps": "396780", "ups": "3.35", "wpb": "118316", "bsz": "256", "num_updates": "942800", "lr": "5.77778e-06", "gnorm": "2.268", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "281971"} +[2022-08-02 17:10:22,351][train_inner][INFO] - {"epoch": 19, "update": 18.322, "loss": "1.97", "ppl": "3.92", "wps": "397527", "ups": "3.37", "wpb": "117994", "bsz": "256", "num_updates": "943000", "lr": "5.75758e-06", "gnorm": "2.272", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "282031"} +[2022-08-02 17:11:21,473][train_inner][INFO] - {"epoch": 19, "update": 18.326, "loss": "1.968", "ppl": "3.91", "wps": "395989", "ups": "3.38", "wpb": "117057", "bsz": "256", "num_updates": "943200", "lr": "5.73737e-06", "gnorm": "2.36", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "282090"} +[2022-08-02 17:12:20,877][train_inner][INFO] - {"epoch": 19, "update": 18.33, "loss": "1.964", "ppl": "3.9", "wps": "397478", "ups": "3.37", "wpb": "118058", "bsz": "256", "num_updates": "943400", "lr": "5.71717e-06", "gnorm": "2.314", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "282149"} +[2022-08-02 17:13:20,449][train_inner][INFO] - {"epoch": 19, "update": 18.334, "loss": "1.967", "ppl": "3.91", "wps": "397991", "ups": "3.36", "wpb": "118545", "bsz": "256", "num_updates": "943600", "lr": "5.69697e-06", "gnorm": "2.284", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "282209"} +[2022-08-02 17:14:19,495][train_inner][INFO] - {"epoch": 19, "update": 18.338, "loss": "1.969", "ppl": "3.92", "wps": "398900", "ups": "3.39", "wpb": "117766", "bsz": "256", "num_updates": "943800", "lr": "5.67677e-06", "gnorm": "2.414", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "282268"} +[2022-08-02 17:15:18,602][train_inner][INFO] - {"epoch": 19, "update": 18.342, "loss": "1.968", "ppl": "3.91", "wps": "398471", "ups": "3.38", "wpb": "117761", "bsz": "256", "num_updates": "944000", "lr": "5.65657e-06", "gnorm": "2.256", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "282327"} +[2022-08-02 17:16:18,427][train_inner][INFO] - {"epoch": 19, "update": 18.346, "loss": "1.962", "ppl": "3.9", "wps": "394789", "ups": "3.34", "wpb": "118091", "bsz": "256", "num_updates": "944200", "lr": "5.63636e-06", "gnorm": "2.26", "loss_scale": "2", "train_wall": "60", "gb_free": "23.4", "wall": "282387"} +[2022-08-02 17:17:18,258][train_inner][INFO] - {"epoch": 19, "update": 18.35, "loss": "1.964", "ppl": "3.9", "wps": "396459", "ups": "3.34", "wpb": "118603", "bsz": "256", "num_updates": "944400", "lr": "5.61616e-06", "gnorm": "2.259", "loss_scale": "2", "train_wall": "60", "gb_free": "23.2", "wall": "282447"} +[2022-08-02 17:18:18,145][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 17:18:18,420][train_inner][INFO] - {"epoch": 19, "update": 18.354, "loss": "1.97", "ppl": "3.92", "wps": "393568", "ups": "3.32", "wpb": "118389", "bsz": "256", "num_updates": "944600", "lr": "5.59596e-06", "gnorm": "2.282", "loss_scale": "2", "train_wall": "60", "gb_free": "21.9", "wall": "282507"} +[2022-08-02 17:19:17,615][train_inner][INFO] - {"epoch": 19, "update": 18.357, "loss": "1.968", "ppl": "3.91", "wps": "398791", "ups": "3.38", "wpb": "118032", "bsz": "256", "num_updates": "944800", "lr": "5.57576e-06", "gnorm": "2.279", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "282566"} +[2022-08-02 17:20:16,558][train_inner][INFO] - {"epoch": 19, "update": 18.361, "loss": "1.965", "ppl": "3.91", "wps": "400710", "ups": "3.39", "wpb": "118093", "bsz": "256", "num_updates": "945000", "lr": "5.55556e-06", "gnorm": "2.282", "loss_scale": "2", "train_wall": "59", "gb_free": "26.9", "wall": "282625"} +[2022-08-02 17:21:15,631][train_inner][INFO] - {"epoch": 19, "update": 18.365, "loss": "1.969", "ppl": "3.91", "wps": "399850", "ups": "3.39", "wpb": "118102", "bsz": "256", "num_updates": "945200", "lr": "5.53535e-06", "gnorm": "2.267", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "282684"} +[2022-08-02 17:22:15,218][train_inner][INFO] - {"epoch": 19, "update": 18.369, "loss": "1.967", "ppl": "3.91", "wps": "395067", "ups": "3.36", "wpb": "117704", "bsz": "256", "num_updates": "945400", "lr": "5.51515e-06", "gnorm": "2.231", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "282744"} +[2022-08-02 17:23:14,546][train_inner][INFO] - {"epoch": 19, "update": 18.373, "loss": "1.965", "ppl": "3.9", "wps": "398219", "ups": "3.37", "wpb": "118126", "bsz": "256", "num_updates": "945600", "lr": "5.49495e-06", "gnorm": "2.395", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "282803"} +[2022-08-02 17:24:14,185][train_inner][INFO] - {"epoch": 19, "update": 18.377, "loss": "1.961", "ppl": "3.89", "wps": "396784", "ups": "3.35", "wpb": "118320", "bsz": "256", "num_updates": "945800", "lr": "5.47475e-06", "gnorm": "2.36", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "282863"} +[2022-08-02 17:25:13,497][train_inner][INFO] - {"epoch": 19, "update": 18.381, "loss": "1.964", "ppl": "3.9", "wps": "399710", "ups": "3.37", "wpb": "118538", "bsz": "256", "num_updates": "946000", "lr": "5.45455e-06", "gnorm": "2.305", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "282922"} +[2022-08-02 17:26:13,046][train_inner][INFO] - {"epoch": 19, "update": 18.385, "loss": "1.965", "ppl": "3.9", "wps": "396636", "ups": "3.36", "wpb": "118094", "bsz": "255.9", "num_updates": "946200", "lr": "5.43434e-06", "gnorm": "2.218", "loss_scale": "2", "train_wall": "59", "gb_free": "26.3", "wall": "282981"} +[2022-08-02 17:27:12,198][train_inner][INFO] - {"epoch": 19, "update": 18.389, "loss": "1.971", "ppl": "3.92", "wps": "399787", "ups": "3.38", "wpb": "118241", "bsz": "256", "num_updates": "946400", "lr": "5.41414e-06", "gnorm": "2.4", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "283041"} +[2022-08-02 17:28:12,010][train_inner][INFO] - {"epoch": 19, "update": 18.392, "loss": "1.96", "ppl": "3.89", "wps": "396941", "ups": "3.34", "wpb": "118709", "bsz": "256", "num_updates": "946600", "lr": "5.39394e-06", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "283100"} +[2022-08-02 17:28:28,344][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 17:29:11,287][train_inner][INFO] - {"epoch": 19, "update": 18.396, "loss": "1.965", "ppl": "3.91", "wps": "398384", "ups": "3.37", "wpb": "118074", "bsz": "256", "num_updates": "946800", "lr": "5.37374e-06", "gnorm": "2.402", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "283160"} +[2022-08-02 17:30:10,883][train_inner][INFO] - {"epoch": 19, "update": 18.4, "loss": "1.96", "ppl": "3.89", "wps": "397759", "ups": "3.36", "wpb": "118523", "bsz": "256", "num_updates": "947000", "lr": "5.35354e-06", "gnorm": "2.245", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "283219"} +[2022-08-02 17:31:10,961][train_inner][INFO] - {"epoch": 19, "update": 18.404, "loss": "1.964", "ppl": "3.9", "wps": "392502", "ups": "3.33", "wpb": "117903", "bsz": "256", "num_updates": "947200", "lr": "5.33333e-06", "gnorm": "2.198", "loss_scale": "2", "train_wall": "60", "gb_free": "30.7", "wall": "283279"} +[2022-08-02 17:32:10,501][train_inner][INFO] - {"epoch": 19, "update": 18.408, "loss": "1.959", "ppl": "3.89", "wps": "395615", "ups": "3.36", "wpb": "117774", "bsz": "256", "num_updates": "947400", "lr": "5.31313e-06", "gnorm": "2.27", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "283339"} +[2022-08-02 17:33:10,113][train_inner][INFO] - {"epoch": 19, "update": 18.412, "loss": "1.963", "ppl": "3.9", "wps": "397073", "ups": "3.36", "wpb": "118351", "bsz": "256", "num_updates": "947600", "lr": "5.29293e-06", "gnorm": "2.317", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "283399"} +[2022-08-02 17:34:09,836][train_inner][INFO] - {"epoch": 19, "update": 18.416, "loss": "1.966", "ppl": "3.91", "wps": "397731", "ups": "3.35", "wpb": "118767", "bsz": "256", "num_updates": "947800", "lr": "5.27273e-06", "gnorm": "2.314", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "283458"} +[2022-08-02 17:35:09,042][train_inner][INFO] - {"epoch": 19, "update": 18.42, "loss": "1.965", "ppl": "3.9", "wps": "399994", "ups": "3.38", "wpb": "118410", "bsz": "256", "num_updates": "948000", "lr": "5.25253e-06", "gnorm": "2.272", "loss_scale": "2", "train_wall": "59", "gb_free": "26.1", "wall": "283517"} +[2022-08-02 17:36:08,531][train_inner][INFO] - {"epoch": 19, "update": 18.424, "loss": "1.967", "ppl": "3.91", "wps": "395944", "ups": "3.36", "wpb": "117771", "bsz": "256", "num_updates": "948200", "lr": "5.23232e-06", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "283577"} +[2022-08-02 17:37:08,072][train_inner][INFO] - {"epoch": 19, "update": 18.427, "loss": "1.964", "ppl": "3.9", "wps": "396181", "ups": "3.36", "wpb": "117945", "bsz": "256", "num_updates": "948400", "lr": "5.21212e-06", "gnorm": "2.275", "loss_scale": "2", "train_wall": "59", "gb_free": "28.5", "wall": "283637"} +[2022-08-02 17:38:07,587][train_inner][INFO] - {"epoch": 19, "update": 18.431, "loss": "1.963", "ppl": "3.9", "wps": "397758", "ups": "3.36", "wpb": "118362", "bsz": "256", "num_updates": "948600", "lr": "5.19192e-06", "gnorm": "2.315", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "283696"} +[2022-08-02 17:38:48,285][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 17:39:07,372][train_inner][INFO] - {"epoch": 19, "update": 18.435, "loss": "1.96", "ppl": "3.89", "wps": "396398", "ups": "3.35", "wpb": "118492", "bsz": "256", "num_updates": "948800", "lr": "5.17172e-06", "gnorm": "2.253", "loss_scale": "2", "train_wall": "59", "gb_free": "24.4", "wall": "283756"} +[2022-08-02 17:40:06,312][train_inner][INFO] - {"epoch": 19, "update": 18.439, "loss": "1.962", "ppl": "3.9", "wps": "400918", "ups": "3.39", "wpb": "118149", "bsz": "256", "num_updates": "949000", "lr": "5.15152e-06", "gnorm": "2.239", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "283815"} +[2022-08-02 17:41:05,991][train_inner][INFO] - {"epoch": 19, "update": 18.443, "loss": "1.96", "ppl": "3.89", "wps": "397716", "ups": "3.35", "wpb": "118677", "bsz": "256", "num_updates": "949200", "lr": "5.13131e-06", "gnorm": "2.292", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "283874"} +[2022-08-02 17:42:05,434][train_inner][INFO] - {"epoch": 19, "update": 18.447, "loss": "1.962", "ppl": "3.9", "wps": "397120", "ups": "3.36", "wpb": "118029", "bsz": "256", "num_updates": "949400", "lr": "5.11111e-06", "gnorm": "2.358", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "283934"} +[2022-08-02 17:43:05,126][train_inner][INFO] - {"epoch": 19, "update": 18.451, "loss": "1.964", "ppl": "3.9", "wps": "396961", "ups": "3.35", "wpb": "118476", "bsz": "256", "num_updates": "949600", "lr": "5.09091e-06", "gnorm": "2.277", "loss_scale": "2", "train_wall": "59", "gb_free": "26.6", "wall": "283994"} +[2022-08-02 17:44:04,513][train_inner][INFO] - {"epoch": 19, "update": 18.455, "loss": "1.969", "ppl": "3.91", "wps": "397371", "ups": "3.37", "wpb": "117993", "bsz": "256", "num_updates": "949800", "lr": "5.07071e-06", "gnorm": "2.459", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "284053"} +[2022-08-02 17:45:03,938][train_inner][INFO] - {"epoch": 19, "update": 18.458, "loss": "1.968", "ppl": "3.91", "wps": "399582", "ups": "3.37", "wpb": "118725", "bsz": "256", "num_updates": "950000", "lr": "5.05051e-06", "gnorm": "2.607", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "284112"} +[2022-08-02 17:45:03,939][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 17:45:26,803][valid][INFO] - {"epoch": 19, "valid_loss": "1.857", "valid_ppl": "3.62", "valid_wps": "1.57953e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "950000", "valid_best_loss": "1.857"} +[2022-08-02 17:45:26,806][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 19 @ 950000 updates +[2022-08-02 17:45:26,807][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_19_950000.pt +[2022-08-02 17:45:34,752][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_19_950000.pt +[2022-08-02 17:45:54,416][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_19_950000.pt (epoch 19 @ 950000 updates, score 1.857) (writing took 27.609881304204464 seconds) +[2022-08-02 17:46:53,836][train_inner][INFO] - {"epoch": 19, "update": 18.462, "loss": "1.959", "ppl": "3.89", "wps": "216079", "ups": "1.82", "wpb": "118733", "bsz": "256", "num_updates": "950200", "lr": "5.0303e-06", "gnorm": "2.686", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "284222"} +[2022-08-02 17:47:53,338][train_inner][INFO] - {"epoch": 19, "update": 18.466, "loss": "1.968", "ppl": "3.91", "wps": "395408", "ups": "3.36", "wpb": "117638", "bsz": "256", "num_updates": "950400", "lr": "5.0101e-06", "gnorm": "2.59", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "284282"} +[2022-08-02 17:48:52,667][train_inner][INFO] - {"epoch": 19, "update": 18.47, "loss": "1.965", "ppl": "3.9", "wps": "399506", "ups": "3.37", "wpb": "118511", "bsz": "256", "num_updates": "950600", "lr": "4.9899e-06", "gnorm": "2.611", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "284341"} +[2022-08-02 17:49:49,651][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 17:49:52,323][train_inner][INFO] - {"epoch": 19, "update": 18.474, "loss": "1.971", "ppl": "3.92", "wps": "397005", "ups": "3.35", "wpb": "118416", "bsz": "256", "num_updates": "950800", "lr": "4.9697e-06", "gnorm": "2.406", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "284401"} +[2022-08-02 17:50:51,805][train_inner][INFO] - {"epoch": 19, "update": 18.478, "loss": "1.962", "ppl": "3.9", "wps": "397620", "ups": "3.36", "wpb": "118256", "bsz": "256", "num_updates": "951000", "lr": "4.94949e-06", "gnorm": "2.713", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "284460"} +[2022-08-02 17:51:51,270][train_inner][INFO] - {"epoch": 19, "update": 18.482, "loss": "1.96", "ppl": "3.89", "wps": "397474", "ups": "3.36", "wpb": "118178", "bsz": "256", "num_updates": "951200", "lr": "4.92929e-06", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "284520"} +[2022-08-02 17:52:52,137][train_inner][INFO] - {"epoch": 19, "update": 18.486, "loss": "1.961", "ppl": "3.89", "wps": "390585", "ups": "3.29", "wpb": "118868", "bsz": "256", "num_updates": "951400", "lr": "4.90909e-06", "gnorm": "2.614", "loss_scale": "2", "train_wall": "61", "gb_free": "22.4", "wall": "284581"} +[2022-08-02 17:53:51,582][train_inner][INFO] - {"epoch": 19, "update": 18.49, "loss": "1.964", "ppl": "3.9", "wps": "396772", "ups": "3.36", "wpb": "117931", "bsz": "256", "num_updates": "951600", "lr": "4.88889e-06", "gnorm": "2.636", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "284640"} +[2022-08-02 17:54:50,850][train_inner][INFO] - {"epoch": 19, "update": 18.493, "loss": "1.968", "ppl": "3.91", "wps": "397894", "ups": "3.37", "wpb": "117910", "bsz": "256", "num_updates": "951800", "lr": "4.86869e-06", "gnorm": "2.756", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "284699"} +[2022-08-02 17:55:49,925][train_inner][INFO] - {"epoch": 19, "update": 18.497, "loss": "1.969", "ppl": "3.91", "wps": "398662", "ups": "3.39", "wpb": "117755", "bsz": "256", "num_updates": "952000", "lr": "4.84848e-06", "gnorm": "2.308", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "284758"} +[2022-08-02 17:56:49,163][train_inner][INFO] - {"epoch": 19, "update": 18.501, "loss": "1.964", "ppl": "3.9", "wps": "400860", "ups": "3.38", "wpb": "118729", "bsz": "256", "num_updates": "952200", "lr": "4.82828e-06", "gnorm": "2.385", "loss_scale": "2", "train_wall": "59", "gb_free": "24.8", "wall": "284818"} +[2022-08-02 17:57:48,460][train_inner][INFO] - {"epoch": 19, "update": 18.505, "loss": "1.965", "ppl": "3.9", "wps": "397246", "ups": "3.37", "wpb": "117776", "bsz": "256", "num_updates": "952400", "lr": "4.80808e-06", "gnorm": "2.315", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "284877"} +[2022-08-02 17:58:48,112][train_inner][INFO] - {"epoch": 19, "update": 18.509, "loss": "1.963", "ppl": "3.9", "wps": "397398", "ups": "3.35", "wpb": "118529", "bsz": "256", "num_updates": "952600", "lr": "4.78788e-06", "gnorm": "2.262", "loss_scale": "2", "train_wall": "59", "gb_free": "23.5", "wall": "284937"} +[2022-08-02 17:59:47,634][train_inner][INFO] - {"epoch": 19, "update": 18.513, "loss": "1.964", "ppl": "3.9", "wps": "396731", "ups": "3.36", "wpb": "118070", "bsz": "256", "num_updates": "952800", "lr": "4.76768e-06", "gnorm": "2.236", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "284996"} +[2022-08-02 18:00:09,347][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 18:00:47,292][train_inner][INFO] - {"epoch": 19, "update": 18.517, "loss": "1.962", "ppl": "3.9", "wps": "398808", "ups": "3.35", "wpb": "118958", "bsz": "256", "num_updates": "953000", "lr": "4.74747e-06", "gnorm": "2.279", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "285056"} +[2022-08-02 18:01:46,638][train_inner][INFO] - {"epoch": 19, "update": 18.521, "loss": "1.962", "ppl": "3.9", "wps": "397727", "ups": "3.37", "wpb": "118018", "bsz": "256", "num_updates": "953200", "lr": "4.72727e-06", "gnorm": "2.331", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "285115"} +[2022-08-02 18:02:46,368][train_inner][INFO] - {"epoch": 19, "update": 18.525, "loss": "1.962", "ppl": "3.9", "wps": "396784", "ups": "3.35", "wpb": "118499", "bsz": "256", "num_updates": "953400", "lr": "4.70707e-06", "gnorm": "2.21", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "285175"} +[2022-08-02 18:03:46,717][train_inner][INFO] - {"epoch": 19, "update": 18.528, "loss": "1.964", "ppl": "3.9", "wps": "391880", "ups": "3.31", "wpb": "118246", "bsz": "256", "num_updates": "953600", "lr": "4.68687e-06", "gnorm": "2.314", "loss_scale": "2", "train_wall": "60", "gb_free": "31.8", "wall": "285235"} +[2022-08-02 18:04:46,602][train_inner][INFO] - {"epoch": 19, "update": 18.532, "loss": "1.961", "ppl": "3.89", "wps": "395982", "ups": "3.34", "wpb": "118565", "bsz": "256", "num_updates": "953800", "lr": "4.66667e-06", "gnorm": "2.25", "loss_scale": "2", "train_wall": "60", "gb_free": "24.2", "wall": "285295"} +[2022-08-02 18:05:46,106][train_inner][INFO] - {"epoch": 19, "update": 18.536, "loss": "1.967", "ppl": "3.91", "wps": "397518", "ups": "3.36", "wpb": "118269", "bsz": "256", "num_updates": "954000", "lr": "4.64646e-06", "gnorm": "2.293", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "285355"} +[2022-08-02 18:06:45,408][train_inner][INFO] - {"epoch": 19, "update": 18.54, "loss": "1.961", "ppl": "3.89", "wps": "399510", "ups": "3.37", "wpb": "118458", "bsz": "256", "num_updates": "954200", "lr": "4.62626e-06", "gnorm": "2.334", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "285414"} +[2022-08-02 18:07:44,884][train_inner][INFO] - {"epoch": 19, "update": 18.544, "loss": "1.961", "ppl": "3.89", "wps": "398031", "ups": "3.36", "wpb": "118367", "bsz": "256", "num_updates": "954400", "lr": "4.60606e-06", "gnorm": "2.243", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "285473"} +[2022-08-02 18:08:45,404][train_inner][INFO] - {"epoch": 19, "update": 18.548, "loss": "1.961", "ppl": "3.89", "wps": "392148", "ups": "3.3", "wpb": "118663", "bsz": "256", "num_updates": "954600", "lr": "4.58586e-06", "gnorm": "2.297", "loss_scale": "2", "train_wall": "60", "gb_free": "22.9", "wall": "285534"} +[2022-08-02 18:09:44,829][train_inner][INFO] - {"epoch": 19, "update": 18.552, "loss": "1.963", "ppl": "3.9", "wps": "397544", "ups": "3.37", "wpb": "118120", "bsz": "256", "num_updates": "954800", "lr": "4.56566e-06", "gnorm": "2.312", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "285593"} +[2022-08-02 18:10:22,317][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 18:10:44,719][train_inner][INFO] - {"epoch": 19, "update": 18.556, "loss": "1.96", "ppl": "3.89", "wps": "395006", "ups": "3.34", "wpb": "118283", "bsz": "256", "num_updates": "955000", "lr": "4.54545e-06", "gnorm": "2.271", "loss_scale": "2", "train_wall": "60", "gb_free": "22.3", "wall": "285653"} +[2022-08-02 18:11:43,744][train_inner][INFO] - {"epoch": 19, "update": 18.56, "loss": "1.963", "ppl": "3.9", "wps": "400234", "ups": "3.39", "wpb": "118119", "bsz": "256", "num_updates": "955200", "lr": "4.52525e-06", "gnorm": "2.232", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "285712"} +[2022-08-02 18:12:43,082][train_inner][INFO] - {"epoch": 19, "update": 18.563, "loss": "1.967", "ppl": "3.91", "wps": "397453", "ups": "3.37", "wpb": "117919", "bsz": "256", "num_updates": "955400", "lr": "4.50505e-06", "gnorm": "2.259", "loss_scale": "2", "train_wall": "59", "gb_free": "26.1", "wall": "285772"} +[2022-08-02 18:13:42,811][train_inner][INFO] - {"epoch": 19, "update": 18.567, "loss": "1.963", "ppl": "3.9", "wps": "396871", "ups": "3.35", "wpb": "118524", "bsz": "256", "num_updates": "955600", "lr": "4.48485e-06", "gnorm": "2.332", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "285831"} +[2022-08-02 18:14:42,810][train_inner][INFO] - {"epoch": 19, "update": 18.571, "loss": "1.961", "ppl": "3.89", "wps": "397137", "ups": "3.33", "wpb": "119138", "bsz": "256", "num_updates": "955800", "lr": "4.46465e-06", "gnorm": "2.282", "loss_scale": "2", "train_wall": "60", "gb_free": "23.1", "wall": "285891"} +[2022-08-02 18:15:42,545][train_inner][INFO] - {"epoch": 19, "update": 18.575, "loss": "1.962", "ppl": "3.9", "wps": "397539", "ups": "3.35", "wpb": "118734", "bsz": "256", "num_updates": "956000", "lr": "4.44444e-06", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "285951"} +[2022-08-02 18:16:41,902][train_inner][INFO] - {"epoch": 19, "update": 18.579, "loss": "1.965", "ppl": "3.9", "wps": "398689", "ups": "3.37", "wpb": "118324", "bsz": "256", "num_updates": "956200", "lr": "4.42424e-06", "gnorm": "2.385", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "286010"} +[2022-08-02 18:17:41,643][train_inner][INFO] - {"epoch": 19, "update": 18.583, "loss": "1.956", "ppl": "3.88", "wps": "397735", "ups": "3.35", "wpb": "118805", "bsz": "256", "num_updates": "956400", "lr": "4.40404e-06", "gnorm": "2.207", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "286070"} +[2022-08-02 18:18:41,181][train_inner][INFO] - {"epoch": 19, "update": 18.587, "loss": "1.959", "ppl": "3.89", "wps": "397346", "ups": "3.36", "wpb": "118284", "bsz": "256", "num_updates": "956600", "lr": "4.38384e-06", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "286130"} +[2022-08-02 18:19:40,679][train_inner][INFO] - {"epoch": 19, "update": 18.591, "loss": "1.959", "ppl": "3.89", "wps": "397511", "ups": "3.36", "wpb": "118255", "bsz": "256", "num_updates": "956800", "lr": "4.36364e-06", "gnorm": "2.226", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "286189"} +[2022-08-02 18:20:39,847][train_inner][INFO] - {"epoch": 19, "update": 18.595, "loss": "1.967", "ppl": "3.91", "wps": "398824", "ups": "3.38", "wpb": "117988", "bsz": "256", "num_updates": "957000", "lr": "4.34343e-06", "gnorm": "2.294", "loss_scale": "4", "train_wall": "59", "gb_free": "24.5", "wall": "286248"} +[2022-08-02 18:20:48,721][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 18:21:39,196][train_inner][INFO] - {"epoch": 19, "update": 18.598, "loss": "1.964", "ppl": "3.9", "wps": "397231", "ups": "3.37", "wpb": "117875", "bsz": "256", "num_updates": "957200", "lr": "4.32323e-06", "gnorm": "2.235", "loss_scale": "2", "train_wall": "59", "gb_free": "27.4", "wall": "286308"} +[2022-08-02 18:22:38,502][train_inner][INFO] - {"epoch": 19, "update": 18.602, "loss": "1.96", "ppl": "3.89", "wps": "400171", "ups": "3.37", "wpb": "118663", "bsz": "256", "num_updates": "957400", "lr": "4.30303e-06", "gnorm": "2.287", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "286367"} +[2022-08-02 18:23:38,463][train_inner][INFO] - {"epoch": 19, "update": 18.606, "loss": "1.961", "ppl": "3.89", "wps": "395111", "ups": "3.34", "wpb": "118455", "bsz": "256", "num_updates": "957600", "lr": "4.28283e-06", "gnorm": "2.196", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "286427"} +[2022-08-02 18:24:38,492][train_inner][INFO] - {"epoch": 19, "update": 18.61, "loss": "1.959", "ppl": "3.89", "wps": "394256", "ups": "3.33", "wpb": "118333", "bsz": "256", "num_updates": "957800", "lr": "4.26263e-06", "gnorm": "2.349", "loss_scale": "2", "train_wall": "60", "gb_free": "22.5", "wall": "286487"} +[2022-08-02 18:25:37,893][train_inner][INFO] - {"epoch": 19, "update": 18.614, "loss": "1.957", "ppl": "3.88", "wps": "398457", "ups": "3.37", "wpb": "118344", "bsz": "256", "num_updates": "958000", "lr": "4.24242e-06", "gnorm": "2.389", "loss_scale": "2", "train_wall": "59", "gb_free": "29.9", "wall": "286546"} +[2022-08-02 18:26:38,523][train_inner][INFO] - {"epoch": 19, "update": 18.618, "loss": "1.97", "ppl": "3.92", "wps": "389272", "ups": "3.3", "wpb": "118007", "bsz": "256", "num_updates": "958200", "lr": "4.22222e-06", "gnorm": "2.323", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "286607"} +[2022-08-02 18:27:37,850][train_inner][INFO] - {"epoch": 19, "update": 18.622, "loss": "1.962", "ppl": "3.9", "wps": "397640", "ups": "3.37", "wpb": "117953", "bsz": "256", "num_updates": "958400", "lr": "4.20202e-06", "gnorm": "2.491", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "286666"} +[2022-08-02 18:28:37,424][train_inner][INFO] - {"epoch": 19, "update": 18.626, "loss": "1.961", "ppl": "3.89", "wps": "396274", "ups": "3.36", "wpb": "118037", "bsz": "256", "num_updates": "958600", "lr": "4.18182e-06", "gnorm": "2.286", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "286726"} +[2022-08-02 18:29:36,854][train_inner][INFO] - {"epoch": 19, "update": 18.629, "loss": "1.969", "ppl": "3.92", "wps": "399501", "ups": "3.37", "wpb": "118711", "bsz": "256", "num_updates": "958800", "lr": "4.16162e-06", "gnorm": "2.28", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "286785"} +[2022-08-02 18:30:36,300][train_inner][INFO] - {"epoch": 19, "update": 18.633, "loss": "1.964", "ppl": "3.9", "wps": "398661", "ups": "3.36", "wpb": "118493", "bsz": "256", "num_updates": "959000", "lr": "4.14141e-06", "gnorm": "2.252", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "286845"} +[2022-08-02 18:31:04,512][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 18:31:36,052][train_inner][INFO] - {"epoch": 19, "update": 18.637, "loss": "1.96", "ppl": "3.89", "wps": "395432", "ups": "3.35", "wpb": "118139", "bsz": "256", "num_updates": "959200", "lr": "4.12121e-06", "gnorm": "2.329", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "286904"} +[2022-08-02 18:32:36,476][train_inner][INFO] - {"epoch": 19, "update": 18.641, "loss": "1.961", "ppl": "3.89", "wps": "391515", "ups": "3.31", "wpb": "118283", "bsz": "256", "num_updates": "959400", "lr": "4.10101e-06", "gnorm": "2.274", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "286965"} +[2022-08-02 18:33:35,711][train_inner][INFO] - {"epoch": 19, "update": 18.645, "loss": "1.961", "ppl": "3.89", "wps": "398098", "ups": "3.38", "wpb": "117905", "bsz": "256", "num_updates": "959600", "lr": "4.08081e-06", "gnorm": "2.332", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "287024"} +[2022-08-02 18:34:35,210][train_inner][INFO] - {"epoch": 19, "update": 18.649, "loss": "1.959", "ppl": "3.89", "wps": "398514", "ups": "3.36", "wpb": "118555", "bsz": "256", "num_updates": "959800", "lr": "4.06061e-06", "gnorm": "2.268", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "287084"} +[2022-08-02 18:35:34,493][train_inner][INFO] - {"epoch": 19, "update": 18.653, "loss": "1.959", "ppl": "3.89", "wps": "397937", "ups": "3.37", "wpb": "117954", "bsz": "256", "num_updates": "960000", "lr": "4.0404e-06", "gnorm": "2.187", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "287143"} +[2022-08-02 18:36:34,062][train_inner][INFO] - {"epoch": 19, "update": 18.657, "loss": "1.956", "ppl": "3.88", "wps": "399001", "ups": "3.36", "wpb": "118840", "bsz": "256", "num_updates": "960200", "lr": "4.0202e-06", "gnorm": "2.317", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "287202"} +[2022-08-02 18:37:33,566][train_inner][INFO] - {"epoch": 19, "update": 18.661, "loss": "1.964", "ppl": "3.9", "wps": "396132", "ups": "3.36", "wpb": "117858", "bsz": "256", "num_updates": "960400", "lr": "4e-06", "gnorm": "2.285", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "287262"} +[2022-08-02 18:38:33,211][train_inner][INFO] - {"epoch": 19, "update": 18.664, "loss": "1.962", "ppl": "3.9", "wps": "396618", "ups": "3.35", "wpb": "118281", "bsz": "256", "num_updates": "960600", "lr": "3.9798e-06", "gnorm": "2.235", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "287322"} +[2022-08-02 18:39:32,785][train_inner][INFO] - {"epoch": 19, "update": 18.668, "loss": "1.966", "ppl": "3.91", "wps": "398064", "ups": "3.36", "wpb": "118570", "bsz": "256", "num_updates": "960800", "lr": "3.9596e-06", "gnorm": "2.228", "loss_scale": "2", "train_wall": "59", "gb_free": "27.9", "wall": "287381"} +[2022-08-02 18:40:32,361][train_inner][INFO] - {"epoch": 19, "update": 18.672, "loss": "1.962", "ppl": "3.9", "wps": "396195", "ups": "3.36", "wpb": "118019", "bsz": "256", "num_updates": "961000", "lr": "3.93939e-06", "gnorm": "2.217", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "287441"} +[2022-08-02 18:41:15,921][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 18:41:33,209][train_inner][INFO] - {"epoch": 19, "update": 18.676, "loss": "1.956", "ppl": "3.88", "wps": "389421", "ups": "3.29", "wpb": "118476", "bsz": "256", "num_updates": "961200", "lr": "3.91919e-06", "gnorm": "2.186", "loss_scale": "2", "train_wall": "61", "gb_free": "21.3", "wall": "287502"} +[2022-08-02 18:42:32,566][train_inner][INFO] - {"epoch": 19, "update": 18.68, "loss": "1.961", "ppl": "3.89", "wps": "397114", "ups": "3.37", "wpb": "117858", "bsz": "256", "num_updates": "961400", "lr": "3.89899e-06", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "287561"} +[2022-08-02 18:43:31,665][train_inner][INFO] - {"epoch": 19, "update": 18.684, "loss": "1.963", "ppl": "3.9", "wps": "400536", "ups": "3.38", "wpb": "118355", "bsz": "256", "num_updates": "961600", "lr": "3.87879e-06", "gnorm": "2.294", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "287620"} +[2022-08-02 18:44:31,397][train_inner][INFO] - {"epoch": 19, "update": 18.688, "loss": "1.959", "ppl": "3.89", "wps": "397422", "ups": "3.35", "wpb": "118694", "bsz": "256", "num_updates": "961800", "lr": "3.85859e-06", "gnorm": "2.31", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "287680"} +[2022-08-02 18:45:30,643][train_inner][INFO] - {"epoch": 19, "update": 18.692, "loss": "1.963", "ppl": "3.9", "wps": "398994", "ups": "3.38", "wpb": "118193", "bsz": "256", "num_updates": "962000", "lr": "3.83838e-06", "gnorm": "2.193", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "287739"} +[2022-08-02 18:46:30,244][train_inner][INFO] - {"epoch": 19, "update": 18.696, "loss": "1.962", "ppl": "3.89", "wps": "396622", "ups": "3.36", "wpb": "118196", "bsz": "256", "num_updates": "962200", "lr": "3.81818e-06", "gnorm": "2.396", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "287799"} +[2022-08-02 18:47:29,736][train_inner][INFO] - {"epoch": 19, "update": 18.699, "loss": "1.963", "ppl": "3.9", "wps": "396810", "ups": "3.36", "wpb": "118033", "bsz": "256", "num_updates": "962400", "lr": "3.79798e-06", "gnorm": "2.284", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "287858"} +[2022-08-02 18:48:29,612][train_inner][INFO] - {"epoch": 19, "update": 18.703, "loss": "1.959", "ppl": "3.89", "wps": "394581", "ups": "3.34", "wpb": "118130", "bsz": "256", "num_updates": "962600", "lr": "3.77778e-06", "gnorm": "2.292", "loss_scale": "2", "train_wall": "60", "gb_free": "21.7", "wall": "287918"} +[2022-08-02 18:49:28,794][train_inner][INFO] - {"epoch": 19, "update": 18.707, "loss": "1.958", "ppl": "3.89", "wps": "400200", "ups": "3.38", "wpb": "118422", "bsz": "256", "num_updates": "962800", "lr": "3.75758e-06", "gnorm": "2.222", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "287977"} +[2022-08-02 18:50:28,449][train_inner][INFO] - {"epoch": 19, "update": 18.711, "loss": "1.959", "ppl": "3.89", "wps": "396802", "ups": "3.35", "wpb": "118356", "bsz": "256", "num_updates": "963000", "lr": "3.73737e-06", "gnorm": "2.219", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "288037"} +[2022-08-02 18:51:28,054][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 18:51:28,358][train_inner][INFO] - {"epoch": 19, "update": 18.715, "loss": "1.968", "ppl": "3.91", "wps": "394662", "ups": "3.34", "wpb": "118217", "bsz": "256", "num_updates": "963200", "lr": "3.71717e-06", "gnorm": "2.291", "loss_scale": "2", "train_wall": "60", "gb_free": "22.5", "wall": "288097"} +[2022-08-02 18:52:28,999][train_inner][INFO] - {"epoch": 19, "update": 18.719, "loss": "1.957", "ppl": "3.88", "wps": "391353", "ups": "3.3", "wpb": "118660", "bsz": "256", "num_updates": "963400", "lr": "3.69697e-06", "gnorm": "2.252", "loss_scale": "2", "train_wall": "60", "gb_free": "23.6", "wall": "288157"} +[2022-08-02 18:53:28,214][train_inner][INFO] - {"epoch": 19, "update": 18.723, "loss": "1.962", "ppl": "3.9", "wps": "400599", "ups": "3.38", "wpb": "118607", "bsz": "256", "num_updates": "963600", "lr": "3.67677e-06", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "288217"} +[2022-08-02 18:54:27,721][train_inner][INFO] - {"epoch": 19, "update": 18.727, "loss": "1.969", "ppl": "3.91", "wps": "396604", "ups": "3.36", "wpb": "118002", "bsz": "256", "num_updates": "963800", "lr": "3.65657e-06", "gnorm": "2.37", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "288276"} +[2022-08-02 18:55:26,845][train_inner][INFO] - {"epoch": 19, "update": 18.731, "loss": "1.962", "ppl": "3.9", "wps": "400235", "ups": "3.38", "wpb": "118317", "bsz": "256", "num_updates": "964000", "lr": "3.63636e-06", "gnorm": "2.434", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "288335"} +[2022-08-02 18:56:26,382][train_inner][INFO] - {"epoch": 19, "update": 18.734, "loss": "1.954", "ppl": "3.88", "wps": "397894", "ups": "3.36", "wpb": "118446", "bsz": "256", "num_updates": "964200", "lr": "3.61616e-06", "gnorm": "2.212", "loss_scale": "2", "train_wall": "59", "gb_free": "27.4", "wall": "288395"} +[2022-08-02 18:57:25,629][train_inner][INFO] - {"epoch": 19, "update": 18.738, "loss": "1.964", "ppl": "3.9", "wps": "398431", "ups": "3.38", "wpb": "118029", "bsz": "256", "num_updates": "964400", "lr": "3.59596e-06", "gnorm": "2.328", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "288454"} +[2022-08-02 18:58:25,227][train_inner][INFO] - {"epoch": 19, "update": 18.742, "loss": "1.961", "ppl": "3.89", "wps": "396566", "ups": "3.36", "wpb": "118172", "bsz": "256", "num_updates": "964600", "lr": "3.57576e-06", "gnorm": "2.391", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "288514"} +[2022-08-02 18:59:25,010][train_inner][INFO] - {"epoch": 19, "update": 18.746, "loss": "1.958", "ppl": "3.89", "wps": "395913", "ups": "3.35", "wpb": "118342", "bsz": "256", "num_updates": "964800", "lr": "3.55556e-06", "gnorm": "2.289", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "288573"} +[2022-08-02 19:00:24,508][train_inner][INFO] - {"epoch": 19, "update": 18.75, "loss": "1.962", "ppl": "3.89", "wps": "399814", "ups": "3.36", "wpb": "118941", "bsz": "256", "num_updates": "965000", "lr": "3.53535e-06", "gnorm": "2.338", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "288633"} +[2022-08-02 19:01:23,971][train_inner][INFO] - {"epoch": 19, "update": 18.754, "loss": "1.962", "ppl": "3.9", "wps": "398654", "ups": "3.36", "wpb": "118525", "bsz": "256", "num_updates": "965200", "lr": "3.51515e-06", "gnorm": "2.447", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "288692"} +[2022-08-02 19:01:40,941][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 19:02:23,703][train_inner][INFO] - {"epoch": 19, "update": 18.758, "loss": "1.961", "ppl": "3.89", "wps": "395964", "ups": "3.35", "wpb": "118258", "bsz": "256", "num_updates": "965400", "lr": "3.49495e-06", "gnorm": "2.379", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "288752"} +[2022-08-02 19:03:23,166][train_inner][INFO] - {"epoch": 19, "update": 18.762, "loss": "1.962", "ppl": "3.9", "wps": "398067", "ups": "3.36", "wpb": "118350", "bsz": "256", "num_updates": "965600", "lr": "3.47475e-06", "gnorm": "2.373", "loss_scale": "2", "train_wall": "59", "gb_free": "27.3", "wall": "288812"} +[2022-08-02 19:04:22,514][train_inner][INFO] - {"epoch": 19, "update": 18.766, "loss": "1.958", "ppl": "3.89", "wps": "399419", "ups": "3.37", "wpb": "118522", "bsz": "256", "num_updates": "965800", "lr": "3.45455e-06", "gnorm": "2.452", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "288871"} +[2022-08-02 19:05:22,131][train_inner][INFO] - {"epoch": 19, "update": 18.769, "loss": "1.968", "ppl": "3.91", "wps": "395765", "ups": "3.35", "wpb": "117972", "bsz": "256", "num_updates": "966000", "lr": "3.43434e-06", "gnorm": "2.34", "loss_scale": "2", "train_wall": "59", "gb_free": "27", "wall": "288931"} +[2022-08-02 19:06:21,726][train_inner][INFO] - {"epoch": 19, "update": 18.773, "loss": "1.96", "ppl": "3.89", "wps": "398270", "ups": "3.36", "wpb": "118673", "bsz": "256", "num_updates": "966200", "lr": "3.41414e-06", "gnorm": "2.271", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "288990"} +[2022-08-02 19:07:21,452][train_inner][INFO] - {"epoch": 19, "update": 18.777, "loss": "1.963", "ppl": "3.9", "wps": "398131", "ups": "3.35", "wpb": "118893", "bsz": "256", "num_updates": "966400", "lr": "3.39394e-06", "gnorm": "2.466", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "289050"} +[2022-08-02 19:08:21,088][train_inner][INFO] - {"epoch": 19, "update": 18.781, "loss": "1.957", "ppl": "3.88", "wps": "394848", "ups": "3.35", "wpb": "117737", "bsz": "256", "num_updates": "966600", "lr": "3.37374e-06", "gnorm": "2.425", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "289110"} +[2022-08-02 19:09:20,397][train_inner][INFO] - {"epoch": 19, "update": 18.785, "loss": "1.96", "ppl": "3.89", "wps": "399303", "ups": "3.37", "wpb": "118409", "bsz": "256", "num_updates": "966800", "lr": "3.35354e-06", "gnorm": "2.25", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "289169"} +[2022-08-02 19:10:19,888][train_inner][INFO] - {"epoch": 19, "update": 18.789, "loss": "1.962", "ppl": "3.9", "wps": "398335", "ups": "3.36", "wpb": "118486", "bsz": "256", "num_updates": "967000", "lr": "3.33333e-06", "gnorm": "2.288", "loss_scale": "2", "train_wall": "59", "gb_free": "27.9", "wall": "289228"} +[2022-08-02 19:11:19,001][train_inner][INFO] - {"epoch": 19, "update": 18.793, "loss": "1.956", "ppl": "3.88", "wps": "402691", "ups": "3.38", "wpb": "119021", "bsz": "256", "num_updates": "967200", "lr": "3.31313e-06", "gnorm": "2.213", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "289287"} +[2022-08-02 19:11:53,890][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 19:12:18,631][train_inner][INFO] - {"epoch": 19, "update": 18.797, "loss": "1.96", "ppl": "3.89", "wps": "394174", "ups": "3.35", "wpb": "117522", "bsz": "256", "num_updates": "967400", "lr": "3.29293e-06", "gnorm": "2.332", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "289347"} +[2022-08-02 19:13:18,083][train_inner][INFO] - {"epoch": 19, "update": 18.8, "loss": "1.961", "ppl": "3.89", "wps": "399126", "ups": "3.36", "wpb": "118643", "bsz": "256", "num_updates": "967600", "lr": "3.27273e-06", "gnorm": "2.239", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "289407"} +[2022-08-02 19:14:17,488][train_inner][INFO] - {"epoch": 19, "update": 18.804, "loss": "1.963", "ppl": "3.9", "wps": "396624", "ups": "3.37", "wpb": "117806", "bsz": "256", "num_updates": "967800", "lr": "3.25253e-06", "gnorm": "2.267", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "289466"} +[2022-08-02 19:15:16,918][train_inner][INFO] - {"epoch": 19, "update": 18.808, "loss": "1.957", "ppl": "3.88", "wps": "397536", "ups": "3.37", "wpb": "118128", "bsz": "256", "num_updates": "968000", "lr": "3.23232e-06", "gnorm": "2.275", "loss_scale": "2", "train_wall": "59", "gb_free": "26.5", "wall": "289525"} +[2022-08-02 19:16:16,469][train_inner][INFO] - {"epoch": 19, "update": 18.812, "loss": "1.958", "ppl": "3.89", "wps": "397788", "ups": "3.36", "wpb": "118442", "bsz": "256", "num_updates": "968200", "lr": "3.21212e-06", "gnorm": "2.272", "loss_scale": "2", "train_wall": "59", "gb_free": "26.4", "wall": "289585"} +[2022-08-02 19:17:16,761][train_inner][INFO] - {"epoch": 19, "update": 18.816, "loss": "1.961", "ppl": "3.89", "wps": "391340", "ups": "3.32", "wpb": "117973", "bsz": "256", "num_updates": "968400", "lr": "3.19192e-06", "gnorm": "2.198", "loss_scale": "2", "train_wall": "60", "gb_free": "26", "wall": "289645"} +[2022-08-02 19:18:16,163][train_inner][INFO] - {"epoch": 19, "update": 18.82, "loss": "1.959", "ppl": "3.89", "wps": "400487", "ups": "3.37", "wpb": "118949", "bsz": "256", "num_updates": "968600", "lr": "3.17172e-06", "gnorm": "2.2", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "289705"} +[2022-08-02 19:19:15,472][train_inner][INFO] - {"epoch": 19, "update": 18.824, "loss": "1.96", "ppl": "3.89", "wps": "401147", "ups": "3.37", "wpb": "118956", "bsz": "256", "num_updates": "968800", "lr": "3.15152e-06", "gnorm": "2.269", "loss_scale": "2", "train_wall": "59", "gb_free": "28.8", "wall": "289764"} +[2022-08-02 19:20:14,903][train_inner][INFO] - {"epoch": 19, "update": 18.828, "loss": "1.963", "ppl": "3.9", "wps": "398806", "ups": "3.37", "wpb": "118507", "bsz": "256", "num_updates": "969000", "lr": "3.13131e-06", "gnorm": "2.293", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "289823"} +[2022-08-02 19:21:14,786][train_inner][INFO] - {"epoch": 19, "update": 18.832, "loss": "1.964", "ppl": "3.9", "wps": "394915", "ups": "3.34", "wpb": "118241", "bsz": "256", "num_updates": "969200", "lr": "3.11111e-06", "gnorm": "2.327", "loss_scale": "2", "train_wall": "60", "gb_free": "22.9", "wall": "289883"} +[2022-08-02 19:22:05,530][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 19:22:14,939][train_inner][INFO] - {"epoch": 19, "update": 18.835, "loss": "1.956", "ppl": "3.88", "wps": "395664", "ups": "3.32", "wpb": "119001", "bsz": "256", "num_updates": "969400", "lr": "3.09091e-06", "gnorm": "2.228", "loss_scale": "2", "train_wall": "60", "gb_free": "21.6", "wall": "289943"} +[2022-08-02 19:23:14,738][train_inner][INFO] - {"epoch": 19, "update": 18.839, "loss": "1.956", "ppl": "3.88", "wps": "396781", "ups": "3.34", "wpb": "118636", "bsz": "256", "num_updates": "969600", "lr": "3.07071e-06", "gnorm": "2.26", "loss_scale": "2", "train_wall": "59", "gb_free": "27", "wall": "290003"} +[2022-08-02 19:24:14,001][train_inner][INFO] - {"epoch": 19, "update": 18.843, "loss": "1.963", "ppl": "3.9", "wps": "398392", "ups": "3.37", "wpb": "118049", "bsz": "256", "num_updates": "969800", "lr": "3.05051e-06", "gnorm": "2.266", "loss_scale": "2", "train_wall": "59", "gb_free": "25", "wall": "290062"} +[2022-08-02 19:25:13,506][train_inner][INFO] - {"epoch": 19, "update": 18.847, "loss": "1.952", "ppl": "3.87", "wps": "399128", "ups": "3.36", "wpb": "118749", "bsz": "256", "num_updates": "970000", "lr": "3.0303e-06", "gnorm": "2.214", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "290122"} +[2022-08-02 19:26:13,163][train_inner][INFO] - {"epoch": 19, "update": 18.851, "loss": "1.955", "ppl": "3.88", "wps": "396205", "ups": "3.35", "wpb": "118181", "bsz": "256", "num_updates": "970200", "lr": "3.0101e-06", "gnorm": "2.198", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "290182"} +[2022-08-02 19:27:12,359][train_inner][INFO] - {"epoch": 19, "update": 18.855, "loss": "1.959", "ppl": "3.89", "wps": "399023", "ups": "3.38", "wpb": "118103", "bsz": "256", "num_updates": "970400", "lr": "2.9899e-06", "gnorm": "2.308", "loss_scale": "2", "train_wall": "59", "gb_free": "24.9", "wall": "290241"} +[2022-08-02 19:28:11,707][train_inner][INFO] - {"epoch": 19, "update": 18.859, "loss": "1.953", "ppl": "3.87", "wps": "400425", "ups": "3.37", "wpb": "118821", "bsz": "256", "num_updates": "970600", "lr": "2.9697e-06", "gnorm": "2.196", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "290300"} +[2022-08-02 19:29:10,847][train_inner][INFO] - {"epoch": 19, "update": 18.863, "loss": "1.963", "ppl": "3.9", "wps": "398227", "ups": "3.38", "wpb": "117756", "bsz": "256", "num_updates": "970800", "lr": "2.94949e-06", "gnorm": "2.192", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "290359"} +[2022-08-02 19:30:10,381][train_inner][INFO] - {"epoch": 19, "update": 18.867, "loss": "1.958", "ppl": "3.88", "wps": "398445", "ups": "3.36", "wpb": "118603", "bsz": "256", "num_updates": "971000", "lr": "2.92929e-06", "gnorm": "2.251", "loss_scale": "2", "train_wall": "59", "gb_free": "24.7", "wall": "290419"} +[2022-08-02 19:31:09,693][train_inner][INFO] - {"epoch": 19, "update": 18.87, "loss": "1.956", "ppl": "3.88", "wps": "399720", "ups": "3.37", "wpb": "118541", "bsz": "256", "num_updates": "971200", "lr": "2.90909e-06", "gnorm": "2.26", "loss_scale": "2", "train_wall": "59", "gb_free": "27.1", "wall": "290478"} +[2022-08-02 19:32:09,025][train_inner][INFO] - {"epoch": 19, "update": 18.874, "loss": "1.959", "ppl": "3.89", "wps": "399513", "ups": "3.37", "wpb": "118519", "bsz": "256", "num_updates": "971400", "lr": "2.88889e-06", "gnorm": "2.199", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "290537"} +[2022-08-02 19:32:18,912][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 19:33:09,155][train_inner][INFO] - {"epoch": 19, "update": 18.878, "loss": "1.964", "ppl": "3.9", "wps": "394070", "ups": "3.33", "wpb": "118476", "bsz": "256", "num_updates": "971600", "lr": "2.86869e-06", "gnorm": "2.295", "loss_scale": "2", "train_wall": "60", "gb_free": "21.8", "wall": "290598"} +[2022-08-02 19:34:08,597][train_inner][INFO] - {"epoch": 19, "update": 18.882, "loss": "1.956", "ppl": "3.88", "wps": "395283", "ups": "3.36", "wpb": "117481", "bsz": "256", "num_updates": "971800", "lr": "2.84848e-06", "gnorm": "2.295", "loss_scale": "2", "train_wall": "59", "gb_free": "22.8", "wall": "290657"} +[2022-08-02 19:35:08,142][train_inner][INFO] - {"epoch": 19, "update": 18.886, "loss": "1.957", "ppl": "3.88", "wps": "396734", "ups": "3.36", "wpb": "118115", "bsz": "256", "num_updates": "972000", "lr": "2.82828e-06", "gnorm": "2.194", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "290717"} +[2022-08-02 19:36:07,523][train_inner][INFO] - {"epoch": 19, "update": 18.89, "loss": "1.958", "ppl": "3.88", "wps": "397494", "ups": "3.37", "wpb": "118018", "bsz": "256", "num_updates": "972200", "lr": "2.80808e-06", "gnorm": "2.234", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "290776"} +[2022-08-02 19:37:07,129][train_inner][INFO] - {"epoch": 19, "update": 18.894, "loss": "1.959", "ppl": "3.89", "wps": "397265", "ups": "3.36", "wpb": "118397", "bsz": "256", "num_updates": "972400", "lr": "2.78788e-06", "gnorm": "2.261", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "290836"} +[2022-08-02 19:38:06,730][train_inner][INFO] - {"epoch": 19, "update": 18.898, "loss": "1.959", "ppl": "3.89", "wps": "397454", "ups": "3.36", "wpb": "118442", "bsz": "256", "num_updates": "972600", "lr": "2.76768e-06", "gnorm": "2.229", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "290895"} +[2022-08-02 19:39:06,188][train_inner][INFO] - {"epoch": 19, "update": 18.902, "loss": "1.958", "ppl": "3.88", "wps": "398442", "ups": "3.36", "wpb": "118453", "bsz": "256", "num_updates": "972800", "lr": "2.74747e-06", "gnorm": "2.179", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "290955"} +[2022-08-02 19:40:05,754][train_inner][INFO] - {"epoch": 19, "update": 18.905, "loss": "1.955", "ppl": "3.88", "wps": "396649", "ups": "3.36", "wpb": "118134", "bsz": "256", "num_updates": "973000", "lr": "2.72727e-06", "gnorm": "2.222", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "291014"} +[2022-08-02 19:41:05,274][train_inner][INFO] - {"epoch": 19, "update": 18.909, "loss": "1.956", "ppl": "3.88", "wps": "397316", "ups": "3.36", "wpb": "118240", "bsz": "256", "num_updates": "973200", "lr": "2.70707e-06", "gnorm": "2.273", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "291074"} +[2022-08-02 19:42:04,963][train_inner][INFO] - {"epoch": 19, "update": 18.913, "loss": "1.959", "ppl": "3.89", "wps": "397911", "ups": "3.35", "wpb": "118754", "bsz": "256", "num_updates": "973400", "lr": "2.68687e-06", "gnorm": "2.226", "loss_scale": "2", "train_wall": "59", "gb_free": "32.4", "wall": "291133"} +[2022-08-02 19:42:38,445][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 19:43:04,617][train_inner][INFO] - {"epoch": 19, "update": 18.917, "loss": "1.963", "ppl": "3.9", "wps": "396231", "ups": "3.35", "wpb": "118183", "bsz": "256", "num_updates": "973600", "lr": "2.66667e-06", "gnorm": "2.218", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "291193"} +[2022-08-02 19:44:03,982][train_inner][INFO] - {"epoch": 19, "update": 18.921, "loss": "1.955", "ppl": "3.88", "wps": "399882", "ups": "3.37", "wpb": "118693", "bsz": "256", "num_updates": "973800", "lr": "2.64646e-06", "gnorm": "2.25", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "291252"} +[2022-08-02 19:45:03,503][train_inner][INFO] - {"epoch": 19, "update": 18.925, "loss": "1.961", "ppl": "3.89", "wps": "398352", "ups": "3.36", "wpb": "118552", "bsz": "256", "num_updates": "974000", "lr": "2.62626e-06", "gnorm": "2.242", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "291312"} +[2022-08-02 19:46:02,793][train_inner][INFO] - {"epoch": 19, "update": 18.929, "loss": "1.955", "ppl": "3.88", "wps": "397141", "ups": "3.37", "wpb": "117732", "bsz": "256", "num_updates": "974200", "lr": "2.60606e-06", "gnorm": "2.192", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "291371"} +[2022-08-02 19:47:02,600][train_inner][INFO] - {"epoch": 19, "update": 18.933, "loss": "1.957", "ppl": "3.88", "wps": "397146", "ups": "3.34", "wpb": "118759", "bsz": "256", "num_updates": "974400", "lr": "2.58586e-06", "gnorm": "2.196", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "291431"} +[2022-08-02 19:48:01,616][train_inner][INFO] - {"epoch": 19, "update": 18.937, "loss": "1.959", "ppl": "3.89", "wps": "401309", "ups": "3.39", "wpb": "118418", "bsz": "256", "num_updates": "974600", "lr": "2.56566e-06", "gnorm": "2.252", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "291490"} +[2022-08-02 19:49:01,209][train_inner][INFO] - {"epoch": 19, "update": 18.94, "loss": "1.964", "ppl": "3.9", "wps": "394374", "ups": "3.36", "wpb": "117510", "bsz": "256", "num_updates": "974800", "lr": "2.54545e-06", "gnorm": "2.229", "loss_scale": "2", "train_wall": "59", "gb_free": "26", "wall": "291550"} +[2022-08-02 19:50:00,611][train_inner][INFO] - {"epoch": 19, "update": 18.944, "loss": "1.96", "ppl": "3.89", "wps": "398336", "ups": "3.37", "wpb": "118309", "bsz": "256", "num_updates": "975000", "lr": "2.52525e-06", "gnorm": "2.237", "loss_scale": "2", "train_wall": "59", "gb_free": "25.6", "wall": "291609"} +[2022-08-02 19:51:00,156][train_inner][INFO] - {"epoch": 19, "update": 18.948, "loss": "1.959", "ppl": "3.89", "wps": "398048", "ups": "3.36", "wpb": "118508", "bsz": "256", "num_updates": "975200", "lr": "2.50505e-06", "gnorm": "2.248", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "291669"} +[2022-08-02 19:51:59,692][train_inner][INFO] - {"epoch": 19, "update": 18.952, "loss": "1.953", "ppl": "3.87", "wps": "398041", "ups": "3.36", "wpb": "118487", "bsz": "256", "num_updates": "975400", "lr": "2.48485e-06", "gnorm": "2.215", "loss_scale": "2", "train_wall": "59", "gb_free": "25.8", "wall": "291728"} +[2022-08-02 19:52:50,039][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 19:52:59,254][train_inner][INFO] - {"epoch": 19, "update": 18.956, "loss": "1.962", "ppl": "3.9", "wps": "398088", "ups": "3.36", "wpb": "118554", "bsz": "256", "num_updates": "975600", "lr": "2.46465e-06", "gnorm": "2.247", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "291788"} +[2022-08-02 19:53:58,562][train_inner][INFO] - {"epoch": 19, "update": 18.96, "loss": "1.96", "ppl": "3.89", "wps": "398694", "ups": "3.37", "wpb": "118229", "bsz": "256", "num_updates": "975800", "lr": "2.44444e-06", "gnorm": "2.269", "loss_scale": "2", "train_wall": "59", "gb_free": "27.6", "wall": "291847"} +[2022-08-02 19:54:57,528][train_inner][INFO] - {"epoch": 19, "update": 18.964, "loss": "1.957", "ppl": "3.88", "wps": "400699", "ups": "3.39", "wpb": "118136", "bsz": "256", "num_updates": "976000", "lr": "2.42424e-06", "gnorm": "2.213", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "291906"} +[2022-08-02 19:55:57,746][train_inner][INFO] - {"epoch": 19, "update": 18.968, "loss": "1.954", "ppl": "3.87", "wps": "393928", "ups": "3.32", "wpb": "118608", "bsz": "256", "num_updates": "976200", "lr": "2.40404e-06", "gnorm": "2.2", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "291966"} +[2022-08-02 19:56:57,088][train_inner][INFO] - {"epoch": 19, "update": 18.971, "loss": "1.957", "ppl": "3.88", "wps": "400546", "ups": "3.37", "wpb": "118845", "bsz": "256", "num_updates": "976400", "lr": "2.38384e-06", "gnorm": "2.189", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "292026"} +[2022-08-02 19:57:56,730][train_inner][INFO] - {"epoch": 19, "update": 18.975, "loss": "1.957", "ppl": "3.88", "wps": "397364", "ups": "3.35", "wpb": "118497", "bsz": "256", "num_updates": "976600", "lr": "2.36364e-06", "gnorm": "2.241", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "292085"} +[2022-08-02 19:58:55,798][train_inner][INFO] - {"epoch": 19, "update": 18.979, "loss": "1.962", "ppl": "3.9", "wps": "398809", "ups": "3.39", "wpb": "117784", "bsz": "256", "num_updates": "976800", "lr": "2.34343e-06", "gnorm": "2.201", "loss_scale": "2", "train_wall": "59", "gb_free": "30", "wall": "292144"} +[2022-08-02 19:59:55,584][train_inner][INFO] - {"epoch": 19, "update": 18.983, "loss": "1.961", "ppl": "3.89", "wps": "394775", "ups": "3.35", "wpb": "118010", "bsz": "256", "num_updates": "977000", "lr": "2.32323e-06", "gnorm": "2.205", "loss_scale": "2", "train_wall": "59", "gb_free": "24.5", "wall": "292204"} +[2022-08-02 20:00:54,903][train_inner][INFO] - {"epoch": 19, "update": 18.987, "loss": "1.962", "ppl": "3.9", "wps": "399237", "ups": "3.37", "wpb": "118410", "bsz": "256", "num_updates": "977200", "lr": "2.30303e-06", "gnorm": "2.33", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "292263"} +[2022-08-02 20:01:54,390][train_inner][INFO] - {"epoch": 19, "update": 18.991, "loss": "1.956", "ppl": "3.88", "wps": "398021", "ups": "3.36", "wpb": "118386", "bsz": "256", "num_updates": "977400", "lr": "2.28283e-06", "gnorm": "2.288", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "292323"} +[2022-08-02 20:02:53,670][train_inner][INFO] - {"epoch": 19, "update": 18.995, "loss": "1.962", "ppl": "3.89", "wps": "399980", "ups": "3.37", "wpb": "118553", "bsz": "256", "num_updates": "977600", "lr": "2.26263e-06", "gnorm": "2.316", "loss_scale": "2", "train_wall": "59", "gb_free": "29.5", "wall": "292382"} +[2022-08-02 20:03:04,387][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 20:03:53,189][train_inner][INFO] - {"epoch": 19, "update": 18.999, "loss": "1.958", "ppl": "3.88", "wps": "397504", "ups": "3.36", "wpb": "118294", "bsz": "256", "num_updates": "977800", "lr": "2.24242e-06", "gnorm": "2.45", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "292442"} +[2022-08-02 20:04:13,287][fairseq_cli.train][INFO] - end of epoch 19 (average epoch stats below) +[2022-08-02 20:04:13,287][train][INFO] - {"epoch": 19, "train_loss": "1.963", "train_ppl": "3.9", "train_wps": "395790", "train_ups": "3.35", "train_wpb": "118297", "train_bsz": "256", "train_num_updates": "977867", "train_lr": "2.23566e-06", "train_gnorm": "2.327", "train_loss_scale": "2", "train_train_wall": "15237", "train_gb_free": "22.4", "train_wall": "292462"} +[2022-08-02 20:04:13,395][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 20:04:13,398][fairseq.trainer][INFO] - begin training epoch 20 +[2022-08-02 20:04:13,398][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 20:05:03,930][train_inner][INFO] - {"epoch": 20, "update": 19.003, "loss": "1.96", "ppl": "3.89", "wps": "334082", "ups": "2.83", "wpb": "118166", "bsz": "255.4", "num_updates": "978000", "lr": "2.22222e-06", "gnorm": "2.407", "loss_scale": "2", "train_wall": "60", "gb_free": "24.5", "wall": "292512"} +[2022-08-02 20:06:03,803][train_inner][INFO] - {"epoch": 20, "update": 19.006, "loss": "1.953", "ppl": "3.87", "wps": "395387", "ups": "3.34", "wpb": "118364", "bsz": "256", "num_updates": "978200", "lr": "2.20202e-06", "gnorm": "2.441", "loss_scale": "2", "train_wall": "60", "gb_free": "25.5", "wall": "292572"} +[2022-08-02 20:07:03,426][train_inner][INFO] - {"epoch": 20, "update": 19.01, "loss": "1.958", "ppl": "3.89", "wps": "396856", "ups": "3.35", "wpb": "118309", "bsz": "256", "num_updates": "978400", "lr": "2.18182e-06", "gnorm": "2.495", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "292632"} +[2022-08-02 20:08:03,310][train_inner][INFO] - {"epoch": 20, "update": 19.014, "loss": "1.954", "ppl": "3.87", "wps": "396469", "ups": "3.34", "wpb": "118710", "bsz": "256", "num_updates": "978600", "lr": "2.16162e-06", "gnorm": "2.629", "loss_scale": "2", "train_wall": "60", "gb_free": "23.2", "wall": "292692"} +[2022-08-02 20:09:02,556][train_inner][INFO] - {"epoch": 20, "update": 19.018, "loss": "1.961", "ppl": "3.89", "wps": "398773", "ups": "3.38", "wpb": "118127", "bsz": "256", "num_updates": "978800", "lr": "2.14141e-06", "gnorm": "2.599", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "292751"} +[2022-08-02 20:10:02,258][train_inner][INFO] - {"epoch": 20, "update": 19.022, "loss": "1.954", "ppl": "3.88", "wps": "395456", "ups": "3.35", "wpb": "118048", "bsz": "256", "num_updates": "979000", "lr": "2.12121e-06", "gnorm": "2.544", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "292811"} +[2022-08-02 20:11:01,807][train_inner][INFO] - {"epoch": 20, "update": 19.026, "loss": "1.955", "ppl": "3.88", "wps": "398348", "ups": "3.36", "wpb": "118604", "bsz": "256", "num_updates": "979200", "lr": "2.10101e-06", "gnorm": "2.638", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "292870"} +[2022-08-02 20:12:01,462][train_inner][INFO] - {"epoch": 20, "update": 19.03, "loss": "1.955", "ppl": "3.88", "wps": "396608", "ups": "3.35", "wpb": "118298", "bsz": "256", "num_updates": "979400", "lr": "2.08081e-06", "gnorm": "2.437", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "292930"} +[2022-08-02 20:13:00,981][train_inner][INFO] - {"epoch": 20, "update": 19.034, "loss": "1.964", "ppl": "3.9", "wps": "396280", "ups": "3.36", "wpb": "117930", "bsz": "256", "num_updates": "979600", "lr": "2.06061e-06", "gnorm": "2.504", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "292989"} +[2022-08-02 20:13:26,279][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 20:14:00,790][train_inner][INFO] - {"epoch": 20, "update": 19.038, "loss": "1.956", "ppl": "3.88", "wps": "396440", "ups": "3.34", "wpb": "118553", "bsz": "256", "num_updates": "979800", "lr": "2.0404e-06", "gnorm": "2.576", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "293049"} +[2022-08-02 20:15:00,267][train_inner][INFO] - {"epoch": 20, "update": 19.041, "loss": "1.953", "ppl": "3.87", "wps": "399036", "ups": "3.36", "wpb": "118666", "bsz": "256", "num_updates": "980000", "lr": "2.0202e-06", "gnorm": "2.496", "loss_scale": "2", "train_wall": "59", "gb_free": "29.2", "wall": "293109"} +[2022-08-02 20:15:59,891][train_inner][INFO] - {"epoch": 20, "update": 19.045, "loss": "1.957", "ppl": "3.88", "wps": "397310", "ups": "3.35", "wpb": "118444", "bsz": "256", "num_updates": "980200", "lr": "2e-06", "gnorm": "2.531", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "293168"} +[2022-08-02 20:16:59,106][train_inner][INFO] - {"epoch": 20, "update": 19.049, "loss": "1.962", "ppl": "3.9", "wps": "397255", "ups": "3.38", "wpb": "117618", "bsz": "256", "num_updates": "980400", "lr": "1.9798e-06", "gnorm": "2.574", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "293228"} +[2022-08-02 20:17:58,186][train_inner][INFO] - {"epoch": 20, "update": 19.053, "loss": "1.957", "ppl": "3.88", "wps": "399366", "ups": "3.39", "wpb": "117972", "bsz": "256", "num_updates": "980600", "lr": "1.9596e-06", "gnorm": "2.353", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "293287"} +[2022-08-02 20:18:57,426][train_inner][INFO] - {"epoch": 20, "update": 19.057, "loss": "1.959", "ppl": "3.89", "wps": "397459", "ups": "3.38", "wpb": "117725", "bsz": "256", "num_updates": "980800", "lr": "1.93939e-06", "gnorm": "2.472", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "293346"} +[2022-08-02 20:19:56,916][train_inner][INFO] - {"epoch": 20, "update": 19.061, "loss": "1.956", "ppl": "3.88", "wps": "398221", "ups": "3.36", "wpb": "118450", "bsz": "256", "num_updates": "981000", "lr": "1.91919e-06", "gnorm": "2.413", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "293405"} +[2022-08-02 20:20:56,370][train_inner][INFO] - {"epoch": 20, "update": 19.065, "loss": "1.948", "ppl": "3.86", "wps": "400085", "ups": "3.36", "wpb": "118934", "bsz": "256", "num_updates": "981200", "lr": "1.89899e-06", "gnorm": "2.5", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "293465"} +[2022-08-02 20:21:56,127][train_inner][INFO] - {"epoch": 20, "update": 19.069, "loss": "1.96", "ppl": "3.89", "wps": "396344", "ups": "3.35", "wpb": "118420", "bsz": "256", "num_updates": "981400", "lr": "1.87879e-06", "gnorm": "2.347", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "293525"} +[2022-08-02 20:22:55,466][train_inner][INFO] - {"epoch": 20, "update": 19.073, "loss": "1.958", "ppl": "3.88", "wps": "398746", "ups": "3.37", "wpb": "118306", "bsz": "256", "num_updates": "981600", "lr": "1.85859e-06", "gnorm": "2.467", "loss_scale": "2", "train_wall": "59", "gb_free": "30.1", "wall": "293584"} +[2022-08-02 20:23:38,493][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 20:23:55,136][train_inner][INFO] - {"epoch": 20, "update": 19.076, "loss": "1.962", "ppl": "3.9", "wps": "394623", "ups": "3.35", "wpb": "117735", "bsz": "256", "num_updates": "981800", "lr": "1.83838e-06", "gnorm": "2.297", "loss_scale": "2", "train_wall": "59", "gb_free": "26.2", "wall": "293644"} +[2022-08-02 20:24:54,479][train_inner][INFO] - {"epoch": 20, "update": 19.08, "loss": "1.954", "ppl": "3.88", "wps": "399533", "ups": "3.37", "wpb": "118546", "bsz": "256", "num_updates": "982000", "lr": "1.81818e-06", "gnorm": "2.388", "loss_scale": "2", "train_wall": "59", "gb_free": "22.5", "wall": "293703"} +[2022-08-02 20:25:53,906][train_inner][INFO] - {"epoch": 20, "update": 19.084, "loss": "1.956", "ppl": "3.88", "wps": "397945", "ups": "3.37", "wpb": "118243", "bsz": "256", "num_updates": "982200", "lr": "1.79798e-06", "gnorm": "2.21", "loss_scale": "2", "train_wall": "59", "gb_free": "21.8", "wall": "293762"} +[2022-08-02 20:26:53,288][train_inner][INFO] - {"epoch": 20, "update": 19.088, "loss": "1.959", "ppl": "3.89", "wps": "397432", "ups": "3.37", "wpb": "118000", "bsz": "256", "num_updates": "982400", "lr": "1.77778e-06", "gnorm": "2.292", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "293822"} +[2022-08-02 20:27:52,633][train_inner][INFO] - {"epoch": 20, "update": 19.092, "loss": "1.959", "ppl": "3.89", "wps": "396886", "ups": "3.37", "wpb": "117765", "bsz": "256", "num_updates": "982600", "lr": "1.75758e-06", "gnorm": "2.383", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "293881"} +[2022-08-02 20:28:51,723][train_inner][INFO] - {"epoch": 20, "update": 19.096, "loss": "1.958", "ppl": "3.88", "wps": "400812", "ups": "3.38", "wpb": "118419", "bsz": "256", "num_updates": "982800", "lr": "1.73737e-06", "gnorm": "2.268", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "293940"} +[2022-08-02 20:29:51,095][train_inner][INFO] - {"epoch": 20, "update": 19.1, "loss": "1.95", "ppl": "3.86", "wps": "400249", "ups": "3.37", "wpb": "118818", "bsz": "256", "num_updates": "983000", "lr": "1.71717e-06", "gnorm": "2.384", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "294000"} +[2022-08-02 20:30:50,724][train_inner][INFO] - {"epoch": 20, "update": 19.104, "loss": "1.959", "ppl": "3.89", "wps": "397017", "ups": "3.35", "wpb": "118368", "bsz": "256", "num_updates": "983200", "lr": "1.69697e-06", "gnorm": "2.374", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "294059"} +[2022-08-02 20:31:50,245][train_inner][INFO] - {"epoch": 20, "update": 19.108, "loss": "1.956", "ppl": "3.88", "wps": "396668", "ups": "3.36", "wpb": "118048", "bsz": "256", "num_updates": "983400", "lr": "1.67677e-06", "gnorm": "2.164", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "294119"} +[2022-08-02 20:32:49,528][train_inner][INFO] - {"epoch": 20, "update": 19.111, "loss": "1.956", "ppl": "3.88", "wps": "399110", "ups": "3.37", "wpb": "118302", "bsz": "256", "num_updates": "983600", "lr": "1.65657e-06", "gnorm": "2.173", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "294178"} +[2022-08-02 20:33:49,201][train_inner][INFO] - {"epoch": 20, "update": 19.115, "loss": "1.951", "ppl": "3.87", "wps": "397431", "ups": "3.35", "wpb": "118578", "bsz": "256", "num_updates": "983800", "lr": "1.63636e-06", "gnorm": "2.232", "loss_scale": "4", "train_wall": "59", "gb_free": "21.7", "wall": "294238"} +[2022-08-02 20:33:58,617][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 20:34:48,784][train_inner][INFO] - {"epoch": 20, "update": 19.119, "loss": "1.961", "ppl": "3.89", "wps": "396905", "ups": "3.36", "wpb": "118244", "bsz": "256", "num_updates": "984000", "lr": "1.61616e-06", "gnorm": "2.23", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "294297"} +[2022-08-02 20:35:48,673][train_inner][INFO] - {"epoch": 20, "update": 19.123, "loss": "1.946", "ppl": "3.85", "wps": "397069", "ups": "3.34", "wpb": "118900", "bsz": "256", "num_updates": "984200", "lr": "1.59596e-06", "gnorm": "2.203", "loss_scale": "2", "train_wall": "60", "gb_free": "25", "wall": "294357"} +[2022-08-02 20:36:48,027][train_inner][INFO] - {"epoch": 20, "update": 19.127, "loss": "1.956", "ppl": "3.88", "wps": "398478", "ups": "3.37", "wpb": "118254", "bsz": "256", "num_updates": "984400", "lr": "1.57576e-06", "gnorm": "2.178", "loss_scale": "2", "train_wall": "59", "gb_free": "25.5", "wall": "294416"} +[2022-08-02 20:37:47,229][train_inner][INFO] - {"epoch": 20, "update": 19.131, "loss": "1.96", "ppl": "3.89", "wps": "397274", "ups": "3.38", "wpb": "117597", "bsz": "256", "num_updates": "984600", "lr": "1.55556e-06", "gnorm": "2.217", "loss_scale": "2", "train_wall": "59", "gb_free": "26.9", "wall": "294476"} +[2022-08-02 20:38:46,687][train_inner][INFO] - {"epoch": 20, "update": 19.135, "loss": "1.957", "ppl": "3.88", "wps": "397364", "ups": "3.36", "wpb": "118132", "bsz": "256", "num_updates": "984800", "lr": "1.53535e-06", "gnorm": "2.184", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "294535"} +[2022-08-02 20:39:45,940][train_inner][INFO] - {"epoch": 20, "update": 19.139, "loss": "1.956", "ppl": "3.88", "wps": "400189", "ups": "3.38", "wpb": "118560", "bsz": "256", "num_updates": "985000", "lr": "1.51515e-06", "gnorm": "2.154", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "294594"} +[2022-08-02 20:40:45,709][train_inner][INFO] - {"epoch": 20, "update": 19.142, "loss": "1.955", "ppl": "3.88", "wps": "398056", "ups": "3.35", "wpb": "118958", "bsz": "256", "num_updates": "985200", "lr": "1.49495e-06", "gnorm": "2.201", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "294654"} +[2022-08-02 20:41:45,161][train_inner][INFO] - {"epoch": 20, "update": 19.146, "loss": "1.958", "ppl": "3.89", "wps": "396425", "ups": "3.36", "wpb": "117840", "bsz": "256", "num_updates": "985400", "lr": "1.47475e-06", "gnorm": "2.233", "loss_scale": "2", "train_wall": "59", "gb_free": "25.4", "wall": "294714"} +[2022-08-02 20:42:44,181][train_inner][INFO] - {"epoch": 20, "update": 19.15, "loss": "1.952", "ppl": "3.87", "wps": "399545", "ups": "3.39", "wpb": "117904", "bsz": "256", "num_updates": "985600", "lr": "1.45455e-06", "gnorm": "2.174", "loss_scale": "2", "train_wall": "59", "gb_free": "23.2", "wall": "294773"} +[2022-08-02 20:43:43,797][train_inner][INFO] - {"epoch": 20, "update": 19.154, "loss": "1.96", "ppl": "3.89", "wps": "395335", "ups": "3.35", "wpb": "117842", "bsz": "256", "num_updates": "985800", "lr": "1.43434e-06", "gnorm": "2.182", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "294832"} +[2022-08-02 20:44:07,381][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 20:44:43,406][train_inner][INFO] - {"epoch": 20, "update": 19.158, "loss": "1.957", "ppl": "3.88", "wps": "396148", "ups": "3.36", "wpb": "118070", "bsz": "256", "num_updates": "986000", "lr": "1.41414e-06", "gnorm": "2.233", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "294892"} +[2022-08-02 20:45:42,850][train_inner][INFO] - {"epoch": 20, "update": 19.162, "loss": "1.953", "ppl": "3.87", "wps": "398584", "ups": "3.36", "wpb": "118466", "bsz": "256", "num_updates": "986200", "lr": "1.39394e-06", "gnorm": "2.188", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "294951"} +[2022-08-02 20:46:42,545][train_inner][INFO] - {"epoch": 20, "update": 19.166, "loss": "1.956", "ppl": "3.88", "wps": "396988", "ups": "3.35", "wpb": "118490", "bsz": "256", "num_updates": "986400", "lr": "1.37374e-06", "gnorm": "2.185", "loss_scale": "2", "train_wall": "59", "gb_free": "28.3", "wall": "295011"} +[2022-08-02 20:47:42,136][train_inner][INFO] - {"epoch": 20, "update": 19.17, "loss": "1.96", "ppl": "3.89", "wps": "399409", "ups": "3.36", "wpb": "119004", "bsz": "256", "num_updates": "986600", "lr": "1.35354e-06", "gnorm": "2.209", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "295071"} +[2022-08-02 20:48:41,931][train_inner][INFO] - {"epoch": 20, "update": 19.174, "loss": "1.957", "ppl": "3.88", "wps": "396374", "ups": "3.34", "wpb": "118504", "bsz": "256", "num_updates": "986800", "lr": "1.33333e-06", "gnorm": "2.186", "loss_scale": "2", "train_wall": "59", "gb_free": "23.8", "wall": "295130"} +[2022-08-02 20:49:40,971][train_inner][INFO] - {"epoch": 20, "update": 19.177, "loss": "1.955", "ppl": "3.88", "wps": "399618", "ups": "3.39", "wpb": "117967", "bsz": "256", "num_updates": "987000", "lr": "1.31313e-06", "gnorm": "2.197", "loss_scale": "2", "train_wall": "59", "gb_free": "25.3", "wall": "295189"} +[2022-08-02 20:50:40,535][train_inner][INFO] - {"epoch": 20, "update": 19.181, "loss": "1.959", "ppl": "3.89", "wps": "397903", "ups": "3.36", "wpb": "118502", "bsz": "256", "num_updates": "987200", "lr": "1.29293e-06", "gnorm": "2.096", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "295249"} +[2022-08-02 20:51:39,802][train_inner][INFO] - {"epoch": 20, "update": 19.185, "loss": "1.954", "ppl": "3.87", "wps": "397211", "ups": "3.37", "wpb": "117708", "bsz": "256", "num_updates": "987400", "lr": "1.27273e-06", "gnorm": "2.19", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "295308"} +[2022-08-02 20:52:39,017][train_inner][INFO] - {"epoch": 20, "update": 19.189, "loss": "1.959", "ppl": "3.89", "wps": "402496", "ups": "3.38", "wpb": "119167", "bsz": "256", "num_updates": "987600", "lr": "1.25253e-06", "gnorm": "2.13", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "295367"} +[2022-08-02 20:53:37,694][train_inner][INFO] - {"epoch": 20, "update": 19.193, "loss": "1.949", "ppl": "3.86", "wps": "402824", "ups": "3.41", "wpb": "118182", "bsz": "256", "num_updates": "987800", "lr": "1.23232e-06", "gnorm": "2.212", "loss_scale": "2", "train_wall": "58", "gb_free": "21.7", "wall": "295426"} +[2022-08-02 20:54:15,943][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 20:54:36,947][train_inner][INFO] - {"epoch": 20, "update": 19.197, "loss": "1.947", "ppl": "3.85", "wps": "400609", "ups": "3.38", "wpb": "118686", "bsz": "256", "num_updates": "988000", "lr": "1.21212e-06", "gnorm": "2.127", "loss_scale": "2", "train_wall": "59", "gb_free": "21.9", "wall": "295485"} +[2022-08-02 20:55:36,565][train_inner][INFO] - {"epoch": 20, "update": 19.201, "loss": "1.957", "ppl": "3.88", "wps": "397259", "ups": "3.35", "wpb": "118418", "bsz": "256", "num_updates": "988200", "lr": "1.19192e-06", "gnorm": "2.2", "loss_scale": "2", "train_wall": "59", "gb_free": "23.7", "wall": "295545"} +[2022-08-02 20:56:36,061][train_inner][INFO] - {"epoch": 20, "update": 19.205, "loss": "1.956", "ppl": "3.88", "wps": "398718", "ups": "3.36", "wpb": "118610", "bsz": "255.9", "num_updates": "988400", "lr": "1.17172e-06", "gnorm": "2.153", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "295604"} +[2022-08-02 20:57:35,662][train_inner][INFO] - {"epoch": 20, "update": 19.209, "loss": "1.953", "ppl": "3.87", "wps": "395277", "ups": "3.36", "wpb": "117794", "bsz": "256", "num_updates": "988600", "lr": "1.15152e-06", "gnorm": "2.158", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "295664"} +[2022-08-02 20:58:34,955][train_inner][INFO] - {"epoch": 20, "update": 19.212, "loss": "1.957", "ppl": "3.88", "wps": "399013", "ups": "3.37", "wpb": "118293", "bsz": "256", "num_updates": "988800", "lr": "1.13131e-06", "gnorm": "2.163", "loss_scale": "2", "train_wall": "59", "gb_free": "21.4", "wall": "295723"} +[2022-08-02 20:59:34,256][train_inner][INFO] - {"epoch": 20, "update": 19.216, "loss": "1.958", "ppl": "3.89", "wps": "397160", "ups": "3.37", "wpb": "117759", "bsz": "256", "num_updates": "989000", "lr": "1.11111e-06", "gnorm": "2.127", "loss_scale": "2", "train_wall": "59", "gb_free": "22.7", "wall": "295783"} +[2022-08-02 21:00:33,391][train_inner][INFO] - {"epoch": 20, "update": 19.22, "loss": "1.952", "ppl": "3.87", "wps": "400122", "ups": "3.38", "wpb": "118305", "bsz": "256", "num_updates": "989200", "lr": "1.09091e-06", "gnorm": "2.145", "loss_scale": "2", "train_wall": "59", "gb_free": "22", "wall": "295842"} +[2022-08-02 21:01:32,676][train_inner][INFO] - {"epoch": 20, "update": 19.224, "loss": "1.96", "ppl": "3.89", "wps": "400508", "ups": "3.37", "wpb": "118719", "bsz": "256", "num_updates": "989400", "lr": "1.07071e-06", "gnorm": "2.184", "loss_scale": "2", "train_wall": "59", "gb_free": "26.7", "wall": "295901"} +[2022-08-02 21:02:32,091][train_inner][INFO] - {"epoch": 20, "update": 19.228, "loss": "1.956", "ppl": "3.88", "wps": "399936", "ups": "3.37", "wpb": "118811", "bsz": "256", "num_updates": "989600", "lr": "1.05051e-06", "gnorm": "2.145", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "295961"} +[2022-08-02 21:03:31,387][train_inner][INFO] - {"epoch": 20, "update": 19.232, "loss": "1.955", "ppl": "3.88", "wps": "399504", "ups": "3.37", "wpb": "118444", "bsz": "256", "num_updates": "989800", "lr": "1.0303e-06", "gnorm": "2.143", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "296020"} +[2022-08-02 21:04:30,376][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 21:04:30,986][train_inner][INFO] - {"epoch": 20, "update": 19.236, "loss": "1.954", "ppl": "3.88", "wps": "396405", "ups": "3.36", "wpb": "118125", "bsz": "256", "num_updates": "990000", "lr": "1.0101e-06", "gnorm": "2.183", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "296079"} +[2022-08-02 21:05:30,482][train_inner][INFO] - {"epoch": 20, "update": 19.24, "loss": "1.956", "ppl": "3.88", "wps": "397862", "ups": "3.36", "wpb": "118356", "bsz": "256", "num_updates": "990200", "lr": "9.89899e-07", "gnorm": "2.153", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "296139"} +[2022-08-02 21:06:29,741][train_inner][INFO] - {"epoch": 20, "update": 19.244, "loss": "1.957", "ppl": "3.88", "wps": "398087", "ups": "3.38", "wpb": "117952", "bsz": "256", "num_updates": "990400", "lr": "9.69697e-07", "gnorm": "2.142", "loss_scale": "2", "train_wall": "59", "gb_free": "22.1", "wall": "296198"} +[2022-08-02 21:07:30,435][train_inner][INFO] - {"epoch": 20, "update": 19.247, "loss": "1.953", "ppl": "3.87", "wps": "391274", "ups": "3.3", "wpb": "118739", "bsz": "256", "num_updates": "990600", "lr": "9.49495e-07", "gnorm": "2.197", "loss_scale": "2", "train_wall": "60", "gb_free": "22.2", "wall": "296259"} +[2022-08-02 21:08:30,688][train_inner][INFO] - {"epoch": 20, "update": 19.251, "loss": "1.955", "ppl": "3.88", "wps": "391084", "ups": "3.32", "wpb": "117819", "bsz": "256", "num_updates": "990800", "lr": "9.29293e-07", "gnorm": "2.173", "loss_scale": "2", "train_wall": "60", "gb_free": "21.3", "wall": "296319"} +[2022-08-02 21:09:29,724][train_inner][INFO] - {"epoch": 20, "update": 19.255, "loss": "1.95", "ppl": "3.86", "wps": "401070", "ups": "3.39", "wpb": "118387", "bsz": "256", "num_updates": "991000", "lr": "9.09091e-07", "gnorm": "2.131", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "296378"} +[2022-08-02 21:10:29,062][train_inner][INFO] - {"epoch": 20, "update": 19.259, "loss": "1.953", "ppl": "3.87", "wps": "398943", "ups": "3.37", "wpb": "118361", "bsz": "256", "num_updates": "991200", "lr": "8.88889e-07", "gnorm": "2.153", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "296437"} +[2022-08-02 21:11:28,783][train_inner][INFO] - {"epoch": 20, "update": 19.263, "loss": "1.951", "ppl": "3.87", "wps": "398101", "ups": "3.35", "wpb": "118875", "bsz": "256", "num_updates": "991400", "lr": "8.68687e-07", "gnorm": "2.131", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "296497"} +[2022-08-02 21:12:28,641][train_inner][INFO] - {"epoch": 20, "update": 19.267, "loss": "1.954", "ppl": "3.87", "wps": "395901", "ups": "3.34", "wpb": "118488", "bsz": "256", "num_updates": "991600", "lr": "8.48485e-07", "gnorm": "2.155", "loss_scale": "2", "train_wall": "60", "gb_free": "24.1", "wall": "296557"} +[2022-08-02 21:13:27,953][train_inner][INFO] - {"epoch": 20, "update": 19.271, "loss": "1.951", "ppl": "3.87", "wps": "400484", "ups": "3.37", "wpb": "118766", "bsz": "256", "num_updates": "991800", "lr": "8.28283e-07", "gnorm": "2.143", "loss_scale": "2", "train_wall": "59", "gb_free": "23.6", "wall": "296616"} +[2022-08-02 21:14:27,718][train_inner][INFO] - {"epoch": 20, "update": 19.275, "loss": "1.953", "ppl": "3.87", "wps": "395442", "ups": "3.35", "wpb": "118169", "bsz": "256", "num_updates": "992000", "lr": "8.08081e-07", "gnorm": "2.138", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "296676"} +[2022-08-02 21:14:42,428][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 21:15:27,377][train_inner][INFO] - {"epoch": 20, "update": 19.279, "loss": "1.956", "ppl": "3.88", "wps": "397668", "ups": "3.35", "wpb": "118620", "bsz": "256", "num_updates": "992200", "lr": "7.87879e-07", "gnorm": "2.19", "loss_scale": "2", "train_wall": "59", "gb_free": "23.9", "wall": "296736"} +[2022-08-02 21:16:26,846][train_inner][INFO] - {"epoch": 20, "update": 19.282, "loss": "1.954", "ppl": "3.87", "wps": "398214", "ups": "3.36", "wpb": "118407", "bsz": "256", "num_updates": "992400", "lr": "7.67677e-07", "gnorm": "2.088", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "296795"} +[2022-08-02 21:17:26,065][train_inner][INFO] - {"epoch": 20, "update": 19.286, "loss": "1.956", "ppl": "3.88", "wps": "397616", "ups": "3.38", "wpb": "117731", "bsz": "256", "num_updates": "992600", "lr": "7.47475e-07", "gnorm": "2.171", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "296854"} +[2022-08-02 21:18:25,707][train_inner][INFO] - {"epoch": 20, "update": 19.29, "loss": "1.955", "ppl": "3.88", "wps": "396898", "ups": "3.35", "wpb": "118359", "bsz": "256", "num_updates": "992800", "lr": "7.27273e-07", "gnorm": "2.151", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "296914"} +[2022-08-02 21:19:25,387][train_inner][INFO] - {"epoch": 20, "update": 19.294, "loss": "1.95", "ppl": "3.86", "wps": "398128", "ups": "3.35", "wpb": "118800", "bsz": "256", "num_updates": "993000", "lr": "7.07071e-07", "gnorm": "2.136", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "296974"} +[2022-08-02 21:20:24,779][train_inner][INFO] - {"epoch": 20, "update": 19.298, "loss": "1.959", "ppl": "3.89", "wps": "398476", "ups": "3.37", "wpb": "118329", "bsz": "256", "num_updates": "993200", "lr": "6.86869e-07", "gnorm": "2.178", "loss_scale": "2", "train_wall": "59", "gb_free": "28.3", "wall": "297033"} +[2022-08-02 21:21:24,282][train_inner][INFO] - {"epoch": 20, "update": 19.302, "loss": "1.955", "ppl": "3.88", "wps": "396573", "ups": "3.36", "wpb": "117987", "bsz": "256", "num_updates": "993400", "lr": "6.66667e-07", "gnorm": "2.109", "loss_scale": "2", "train_wall": "59", "gb_free": "26.1", "wall": "297093"} +[2022-08-02 21:22:24,076][train_inner][INFO] - {"epoch": 20, "update": 19.306, "loss": "1.951", "ppl": "3.87", "wps": "394231", "ups": "3.34", "wpb": "117862", "bsz": "256", "num_updates": "993600", "lr": "6.46465e-07", "gnorm": "2.162", "loss_scale": "2", "train_wall": "59", "gb_free": "27.7", "wall": "297153"} +[2022-08-02 21:23:23,668][train_inner][INFO] - {"epoch": 20, "update": 19.31, "loss": "1.951", "ppl": "3.87", "wps": "399712", "ups": "3.36", "wpb": "119099", "bsz": "256", "num_updates": "993800", "lr": "6.26263e-07", "gnorm": "2.171", "loss_scale": "2", "train_wall": "59", "gb_free": "24.3", "wall": "297212"} +[2022-08-02 21:24:23,282][train_inner][INFO] - {"epoch": 20, "update": 19.313, "loss": "1.953", "ppl": "3.87", "wps": "395198", "ups": "3.35", "wpb": "117795", "bsz": "256", "num_updates": "994000", "lr": "6.06061e-07", "gnorm": "2.182", "loss_scale": "2", "train_wall": "59", "gb_free": "24.6", "wall": "297272"} +[2022-08-02 21:25:03,575][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 21:25:22,866][train_inner][INFO] - {"epoch": 20, "update": 19.317, "loss": "1.956", "ppl": "3.88", "wps": "399714", "ups": "3.36", "wpb": "119083", "bsz": "256", "num_updates": "994200", "lr": "5.85859e-07", "gnorm": "2.158", "loss_scale": "2", "train_wall": "59", "gb_free": "23.3", "wall": "297331"} +[2022-08-02 21:26:22,207][train_inner][INFO] - {"epoch": 20, "update": 19.321, "loss": "1.955", "ppl": "3.88", "wps": "398660", "ups": "3.37", "wpb": "118284", "bsz": "256", "num_updates": "994400", "lr": "5.65657e-07", "gnorm": "2.158", "loss_scale": "2", "train_wall": "59", "gb_free": "28.3", "wall": "297391"} +[2022-08-02 21:27:21,858][train_inner][INFO] - {"epoch": 20, "update": 19.325, "loss": "1.955", "ppl": "3.88", "wps": "397778", "ups": "3.35", "wpb": "118637", "bsz": "256", "num_updates": "994600", "lr": "5.45455e-07", "gnorm": "2.15", "loss_scale": "2", "train_wall": "59", "gb_free": "24.2", "wall": "297450"} +[2022-08-02 21:28:21,268][train_inner][INFO] - {"epoch": 20, "update": 19.329, "loss": "1.955", "ppl": "3.88", "wps": "397318", "ups": "3.37", "wpb": "118023", "bsz": "256", "num_updates": "994800", "lr": "5.25253e-07", "gnorm": "2.166", "loss_scale": "2", "train_wall": "59", "gb_free": "23.1", "wall": "297510"} +[2022-08-02 21:29:21,046][train_inner][INFO] - {"epoch": 20, "update": 19.333, "loss": "1.955", "ppl": "3.88", "wps": "396064", "ups": "3.35", "wpb": "118380", "bsz": "256", "num_updates": "995000", "lr": "5.05051e-07", "gnorm": "2.151", "loss_scale": "2", "train_wall": "59", "gb_free": "23", "wall": "297569"} +[2022-08-02 21:30:20,402][train_inner][INFO] - {"epoch": 20, "update": 19.337, "loss": "1.952", "ppl": "3.87", "wps": "400307", "ups": "3.37", "wpb": "118801", "bsz": "256", "num_updates": "995200", "lr": "4.84848e-07", "gnorm": "2.151", "loss_scale": "2", "train_wall": "59", "gb_free": "25.7", "wall": "297629"} +[2022-08-02 21:31:19,924][train_inner][INFO] - {"epoch": 20, "update": 19.341, "loss": "1.954", "ppl": "3.87", "wps": "398672", "ups": "3.36", "wpb": "118648", "bsz": "256", "num_updates": "995400", "lr": "4.64646e-07", "gnorm": "2.188", "loss_scale": "2", "train_wall": "59", "gb_free": "32.5", "wall": "297688"} +[2022-08-02 21:32:19,316][train_inner][INFO] - {"epoch": 20, "update": 19.345, "loss": "1.954", "ppl": "3.87", "wps": "398392", "ups": "3.37", "wpb": "118306", "bsz": "256", "num_updates": "995600", "lr": "4.44444e-07", "gnorm": "2.11", "loss_scale": "2", "train_wall": "59", "gb_free": "22.3", "wall": "297748"} +[2022-08-02 21:33:18,884][train_inner][INFO] - {"epoch": 20, "update": 19.348, "loss": "1.957", "ppl": "3.88", "wps": "397046", "ups": "3.36", "wpb": "118256", "bsz": "256", "num_updates": "995800", "lr": "4.24242e-07", "gnorm": "2.189", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "297807"} +[2022-08-02 21:34:18,461][train_inner][INFO] - {"epoch": 20, "update": 19.352, "loss": "1.949", "ppl": "3.86", "wps": "398168", "ups": "3.36", "wpb": "118606", "bsz": "256", "num_updates": "996000", "lr": "4.0404e-07", "gnorm": "2.096", "loss_scale": "2", "train_wall": "59", "gb_free": "21.5", "wall": "297867"} +[2022-08-02 21:35:19,287][train_inner][INFO] - {"epoch": 20, "update": 19.356, "loss": "1.954", "ppl": "3.87", "wps": "389050", "ups": "3.29", "wpb": "118322", "bsz": "256", "num_updates": "996200", "lr": "3.83838e-07", "gnorm": "2.116", "loss_scale": "4", "train_wall": "60", "gb_free": "21.3", "wall": "297928"} +[2022-08-02 21:35:30,668][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 21:36:19,259][train_inner][INFO] - {"epoch": 20, "update": 19.36, "loss": "1.961", "ppl": "3.89", "wps": "394384", "ups": "3.33", "wpb": "118259", "bsz": "256", "num_updates": "996400", "lr": "3.63636e-07", "gnorm": "2.114", "loss_scale": "2", "train_wall": "60", "gb_free": "21.4", "wall": "297988"} +[2022-08-02 21:37:18,747][train_inner][INFO] - {"epoch": 20, "update": 19.364, "loss": "1.952", "ppl": "3.87", "wps": "396781", "ups": "3.36", "wpb": "118017", "bsz": "256", "num_updates": "996600", "lr": "3.43434e-07", "gnorm": "2.141", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "298047"} +[2022-08-02 21:38:18,085][train_inner][INFO] - {"epoch": 20, "update": 19.368, "loss": "1.954", "ppl": "3.87", "wps": "396753", "ups": "3.37", "wpb": "117711", "bsz": "256", "num_updates": "996800", "lr": "3.23232e-07", "gnorm": "2.159", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "298107"} +[2022-08-02 21:39:17,726][train_inner][INFO] - {"epoch": 20, "update": 19.372, "loss": "1.956", "ppl": "3.88", "wps": "396060", "ups": "3.35", "wpb": "118108", "bsz": "256", "num_updates": "997000", "lr": "3.0303e-07", "gnorm": "2.17", "loss_scale": "2", "train_wall": "59", "gb_free": "22.9", "wall": "298166"} +[2022-08-02 21:40:16,973][train_inner][INFO] - {"epoch": 20, "update": 19.376, "loss": "1.958", "ppl": "3.89", "wps": "399208", "ups": "3.38", "wpb": "118258", "bsz": "256", "num_updates": "997200", "lr": "2.82828e-07", "gnorm": "2.128", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "298225"} +[2022-08-02 21:41:16,602][train_inner][INFO] - {"epoch": 20, "update": 19.38, "loss": "1.956", "ppl": "3.88", "wps": "395823", "ups": "3.35", "wpb": "118011", "bsz": "256", "num_updates": "997400", "lr": "2.62626e-07", "gnorm": "2.18", "loss_scale": "2", "train_wall": "59", "gb_free": "21.3", "wall": "298285"} +[2022-08-02 21:42:16,096][train_inner][INFO] - {"epoch": 20, "update": 19.383, "loss": "1.955", "ppl": "3.88", "wps": "397350", "ups": "3.36", "wpb": "118200", "bsz": "256", "num_updates": "997600", "lr": "2.42424e-07", "gnorm": "2.196", "loss_scale": "2", "train_wall": "59", "gb_free": "25.9", "wall": "298345"} +[2022-08-02 21:43:15,041][train_inner][INFO] - {"epoch": 20, "update": 19.387, "loss": "1.952", "ppl": "3.87", "wps": "402086", "ups": "3.39", "wpb": "118503", "bsz": "256", "num_updates": "997800", "lr": "2.22222e-07", "gnorm": "2.124", "loss_scale": "2", "train_wall": "59", "gb_free": "22.2", "wall": "298403"} +[2022-08-02 21:44:15,673][train_inner][INFO] - {"epoch": 20, "update": 19.391, "loss": "1.95", "ppl": "3.86", "wps": "392931", "ups": "3.3", "wpb": "119120", "bsz": "256", "num_updates": "998000", "lr": "2.0202e-07", "gnorm": "2.133", "loss_scale": "2", "train_wall": "60", "gb_free": "23.6", "wall": "298464"} +[2022-08-02 21:45:14,988][train_inner][INFO] - {"epoch": 20, "update": 19.395, "loss": "1.957", "ppl": "3.88", "wps": "397717", "ups": "3.37", "wpb": "117952", "bsz": "256", "num_updates": "998200", "lr": "1.81818e-07", "gnorm": "2.132", "loss_scale": "2", "train_wall": "59", "gb_free": "25.1", "wall": "298523"} +[2022-08-02 21:45:43,867][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 21:46:14,313][train_inner][INFO] - {"epoch": 20, "update": 19.399, "loss": "1.958", "ppl": "3.89", "wps": "398386", "ups": "3.37", "wpb": "118171", "bsz": "256", "num_updates": "998400", "lr": "1.61616e-07", "gnorm": "2.121", "loss_scale": "2", "train_wall": "59", "gb_free": "21.6", "wall": "298583"} +[2022-08-02 21:47:13,988][train_inner][INFO] - {"epoch": 20, "update": 19.403, "loss": "1.954", "ppl": "3.87", "wps": "395609", "ups": "3.35", "wpb": "118037", "bsz": "256", "num_updates": "998600", "lr": "1.41414e-07", "gnorm": "2.131", "loss_scale": "2", "train_wall": "59", "gb_free": "22.6", "wall": "298642"} +[2022-08-02 21:48:13,535][train_inner][INFO] - {"epoch": 20, "update": 19.407, "loss": "1.954", "ppl": "3.87", "wps": "397546", "ups": "3.36", "wpb": "118363", "bsz": "256", "num_updates": "998800", "lr": "1.21212e-07", "gnorm": "2.164", "loss_scale": "2", "train_wall": "59", "gb_free": "24", "wall": "298702"} +[2022-08-02 21:49:13,070][train_inner][INFO] - {"epoch": 20, "update": 19.411, "loss": "1.951", "ppl": "3.87", "wps": "396120", "ups": "3.36", "wpb": "117915", "bsz": "256", "num_updates": "999000", "lr": "1.0101e-07", "gnorm": "2.121", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "298761"} +[2022-08-02 21:50:12,414][train_inner][INFO] - {"epoch": 20, "update": 19.415, "loss": "1.953", "ppl": "3.87", "wps": "399881", "ups": "3.37", "wpb": "118652", "bsz": "256", "num_updates": "999200", "lr": "8.08081e-08", "gnorm": "2.112", "loss_scale": "2", "train_wall": "59", "gb_free": "23.4", "wall": "298821"} +[2022-08-02 21:51:12,087][train_inner][INFO] - {"epoch": 20, "update": 19.418, "loss": "1.958", "ppl": "3.89", "wps": "395932", "ups": "3.35", "wpb": "118131", "bsz": "256", "num_updates": "999400", "lr": "6.06061e-08", "gnorm": "2.101", "loss_scale": "2", "train_wall": "59", "gb_free": "21.7", "wall": "298881"} +[2022-08-02 21:52:11,472][train_inner][INFO] - {"epoch": 20, "update": 19.422, "loss": "1.953", "ppl": "3.87", "wps": "395861", "ups": "3.37", "wpb": "117541", "bsz": "256", "num_updates": "999600", "lr": "4.0404e-08", "gnorm": "2.137", "loss_scale": "2", "train_wall": "59", "gb_free": "22.4", "wall": "298940"} +[2022-08-02 21:53:10,872][train_inner][INFO] - {"epoch": 20, "update": 19.426, "loss": "1.954", "ppl": "3.87", "wps": "398354", "ups": "3.37", "wpb": "118309", "bsz": "256", "num_updates": "999800", "lr": "2.0202e-08", "gnorm": "2.145", "loss_scale": "2", "train_wall": "59", "gb_free": "26.2", "wall": "298999"} +[2022-08-02 21:54:12,119][train_inner][INFO] - {"epoch": 20, "update": 19.43, "loss": "1.955", "ppl": "3.88", "wps": "386142", "ups": "3.27", "wpb": "118251", "bsz": "256", "num_updates": "1e+06", "lr": "0", "gnorm": "2.174", "loss_scale": "2", "train_wall": "61", "gb_free": "22.4", "wall": "299061"} +[2022-08-02 21:54:12,120][fairseq_cli.train][INFO] - Stopping training due to num_updates: 1000000 >= max_update: 1000000 +[2022-08-02 21:54:12,121][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 21:54:34,973][valid][INFO] - {"epoch": 20, "valid_loss": "1.848", "valid_ppl": "3.6", "valid_wps": "1.58864e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "1e+06", "valid_best_loss": "1.848"} +[2022-08-02 21:54:34,976][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 20 @ 1000000 updates +[2022-08-02 21:54:34,977][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_20_1000000.pt +[2022-08-02 21:54:41,357][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-07-30/10-49-09/0/bert/baseline/checkpoint_20_1000000.pt +[2022-08-02 21:55:05,024][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/baseline/checkpoint_20_1000000.pt (epoch 20 @ 1000000 updates, score 1.848) (writing took 30.047691436484456 seconds) +[2022-08-02 21:55:05,024][fairseq_cli.train][INFO] - end of epoch 20 (average epoch stats below) +[2022-08-02 21:55:05,025][train][INFO] - {"epoch": 20, "train_loss": "1.955", "train_ppl": "3.88", "train_wps": "393680", "train_ups": "3.33", "train_wpb": "118315", "train_bsz": "256", "train_num_updates": "1e+06", "train_lr": "0", "train_gnorm": "2.229", "train_loss_scale": "2", "train_train_wall": "6552", "train_gb_free": "22.4", "train_wall": "299113"} +[2022-08-02 21:55:05,025][fairseq_cli.train][INFO] - done training in 299092.2 seconds diff --git a/NLP/BERT/exp_results/pretrain/hydra_train-adan-2.log b/NLP/BERT/exp_results/pretrain/hydra_train-adan-2.log new file mode 100644 index 0000000..ae8fd3f --- /dev/null +++ b/NLP/BERT/exp_results/pretrain/hydra_train-adan-2.log @@ -0,0 +1,1353 @@ +[2022-08-04 15:15:46,666][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 8, 'distributed_num_procs': 8, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': 'tcp://localhost:14718', 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 8, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 1, 'skip_invalid_size_inputs_valid_test': True, 'max_tokens': None, 'batch_size': 32, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': True, 'validate_interval': 5, 'validate_interval_updates': 50000, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 32, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 1000000, 'stop_time_hours': 0.0, 'clip_norm': 5.0, 'sentence_avg': False, 'update_freq': [1], 'lr': [0.001], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False}, 'checkpoint': {'_name': None, 'save_dir': '/vit-opt/fairseq/bert/adan2/', 'restore_file': '/vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_last.pt', 'continue_once': None, 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 5, 'save_interval_updates': 50000, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 8}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False, 'eos_token': None}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'roberta', 'max_positions': 512, 'dropout': 0.1, 'attention_dropout': 0.1}, 'task': {'_name': 'masked_lm', 'data': '/dataset/common/bert-corpus-0729/', 'sample_break_mode': complete, 'tokens_per_sample': 512, 'mask_prob': 0.15, 'leave_unmasked_prob': 0.1, 'random_token_prob': 0.1, 'freq_weighted_replacement': False, 'mask_whole_words': False, 'mask_multiple_length': 1, 'mask_stdev': 0.0, 'shorten_method': none, 'shorten_data_split_list': '', 'seed': 1, 'include_target_tokens': False}, 'criterion': {'_name': 'masked_lm', 'tpu': False}, 'optimizer': {'_name': 'adan', 'adan_betas': '(0.98,0.92,0.99)', 'adan_eps': 1e-08, 'weight_decay': 0.02, 'no_prox': False, 'fp16_adan_stats': False, 'tpu': False, 'lr': [0.001]}, 'lr_scheduler': {'_name': 'polynomial_decay', 'warmup_updates': 10000, 'force_anneal': None, 'end_learning_rate': 0.0, 'power': 1.0, 'total_num_update': 1000000.0, 'lr': [0.001]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} +[2022-08-04 15:15:46,732][fairseq.tasks.masked_lm][INFO] - dictionary: 50264 types +[2022-08-04 15:15:51,529][fairseq_cli.train][INFO] - RobertaModel( + (encoder): RobertaEncoder( + (sentence_encoder): TransformerEncoder( + (dropout_module): FairseqDropout() + (embed_tokens): Embedding(50265, 768, padding_idx=1) + (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1) + (layernorm_embedding): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (layers): ModuleList( + (0): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (1): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (2): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (3): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (4): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (5): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (6): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (7): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (8): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (9): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (10): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (11): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (lm_head): RobertaLMHead( + (dense): Linear(in_features=768, out_features=768, bias=True) + (layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + ) + (classification_heads): ModuleDict() +) +[2022-08-04 15:15:51,531][fairseq_cli.train][INFO] - task: MaskedLMTask +[2022-08-04 15:15:51,531][fairseq_cli.train][INFO] - model: RobertaModel +[2022-08-04 15:15:51,531][fairseq_cli.train][INFO] - criterion: MaskedLmLoss +[2022-08-04 15:15:51,533][fairseq_cli.train][INFO] - num. shared model params: 209,714,265 (num. trained: 209,714,265) +[2022-08-04 15:15:51,534][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) +[2022-08-04 15:15:51,593][fairseq.data.data_utils][INFO] - loaded 1,066,112 examples from: /dataset/common/bert-corpus-0729/valid +[2022-08-04 15:15:51,627][fairseq.tasks.masked_lm][INFO] - loaded 67780 blocks from: /dataset/common/bert-corpus-0729/valid +[2022-08-04 15:15:56,332][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:2 to store for rank: 0 +[2022-08-04 15:15:56,419][torch.distributed.distributed_c10d][INFO] - Rank 0: Completed store-based barrier for key:store_based_barrier_key:2 with 8 nodes. +[2022-08-04 15:15:56,419][fairseq.trainer][INFO] - detected shared parameter: encoder.sentence_encoder.embed_tokens.weight <- encoder.lm_head.weight +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 0: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 1: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 2: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 3: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 4: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 5: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 6: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - rank 7: capabilities = 8.0 ; total memory = 39.586 GB ; name = NVIDIA A100-SXM4-40GB +[2022-08-04 15:16:01,880][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** +[2022-08-04 15:16:01,880][fairseq_cli.train][INFO] - training on 8 devices (GPUs/TPUs) +[2022-08-04 15:16:01,881][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 32 +[2022-08-04 15:16:01,882][fairseq.trainer][INFO] - Preparing to load checkpoint /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_last.pt +[2022-08-04 15:16:39,084][fairseq.trainer][INFO] - Loaded checkpoint /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_last.pt (epoch 16 @ 800000 updates) +[2022-08-04 15:16:39,085][fairseq.trainer][INFO] - loading train data for epoch 16 +[2022-08-04 15:16:58,982][fairseq.data.data_utils][INFO] - loaded 214,960,826 examples from: /dataset/common/bert-corpus-0729/train +[2022-08-04 15:17:05,506][fairseq.tasks.masked_lm][INFO] - loaded 13244396 blocks from: /dataset/common/bert-corpus-0729/train +[2022-08-04 15:17:09,370][fairseq.tasks.fairseq_task][WARNING] - 63,646 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[11407155, 8928673, 3941655, 2058309, 2058002, 11308513, 1003447, 2725530, 13205669, 7271248] +[2022-08-04 15:17:19,496][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-04 15:17:19,499][fairseq.trainer][INFO] - begin training epoch 16 +[2022-08-04 15:17:19,499][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-04 15:17:31,869][fairseq.modules.cross_entropy][INFO] - using fused cross entropy +[2022-08-04 15:18:38,274][train_inner][INFO] - {"epoch": 16, "update": 15.548, "loss": "1.998", "ppl": "4", "wps": "166079", "ups": "1.41", "wpb": "118010", "bsz": "256", "num_updates": "800200", "lr": "0.000201818", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "21", "wall": "0"} +[2022-08-04 15:19:43,969][train_inner][INFO] - {"epoch": 16, "update": 15.552, "loss": "1.993", "ppl": "3.98", "wps": "360530", "ups": "3.04", "wpb": "118423", "bsz": "256", "num_updates": "800400", "lr": "0.000201616", "gnorm": "0.794", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 15:20:49,486][train_inner][INFO] - {"epoch": 16, "update": 15.555, "loss": "1.99", "ppl": "3.97", "wps": "362330", "ups": "3.05", "wpb": "118692", "bsz": "256", "num_updates": "800600", "lr": "0.000201414", "gnorm": "0.793", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 15:21:54,685][train_inner][INFO] - {"epoch": 16, "update": 15.559, "loss": "1.996", "ppl": "3.99", "wps": "362158", "ups": "3.07", "wpb": "118061", "bsz": "256", "num_updates": "800800", "lr": "0.000201212", "gnorm": "0.797", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 15:23:00,221][train_inner][INFO] - {"epoch": 16, "update": 15.563, "loss": "1.996", "ppl": "3.99", "wps": "360573", "ups": "3.05", "wpb": "118150", "bsz": "256", "num_updates": "801000", "lr": "0.00020101", "gnorm": "0.802", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 15:24:05,423][train_inner][INFO] - {"epoch": 16, "update": 15.567, "loss": "1.989", "ppl": "3.97", "wps": "361196", "ups": "3.07", "wpb": "117750", "bsz": "256", "num_updates": "801200", "lr": "0.000200808", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 15:25:11,176][train_inner][INFO] - {"epoch": 16, "update": 15.571, "loss": "1.992", "ppl": "3.98", "wps": "359751", "ups": "3.04", "wpb": "118272", "bsz": "256", "num_updates": "801400", "lr": "0.000200606", "gnorm": "0.799", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 15:26:16,853][train_inner][INFO] - {"epoch": 16, "update": 15.575, "loss": "1.992", "ppl": "3.98", "wps": "361349", "ups": "3.05", "wpb": "118660", "bsz": "256", "num_updates": "801600", "lr": "0.000200404", "gnorm": "0.792", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25", "wall": "0"} +[2022-08-04 15:27:22,549][train_inner][INFO] - {"epoch": 16, "update": 15.579, "loss": "1.992", "ppl": "3.98", "wps": "360688", "ups": "3.04", "wpb": "118471", "bsz": "256", "num_updates": "801800", "lr": "0.000200202", "gnorm": "0.793", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26", "wall": "0"} +[2022-08-04 15:28:27,967][train_inner][INFO] - {"epoch": 16, "update": 15.583, "loss": "1.993", "ppl": "3.98", "wps": "363022", "ups": "3.06", "wpb": "118730", "bsz": "256", "num_updates": "802000", "lr": "0.0002", "gnorm": "0.797", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 15:29:33,557][train_inner][INFO] - {"epoch": 16, "update": 15.586, "loss": "1.994", "ppl": "3.98", "wps": "360114", "ups": "3.05", "wpb": "118099", "bsz": "256", "num_updates": "802200", "lr": "0.000199798", "gnorm": "0.802", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 15:30:39,151][train_inner][INFO] - {"epoch": 16, "update": 15.59, "loss": "1.988", "ppl": "3.97", "wps": "360129", "ups": "3.05", "wpb": "118108", "bsz": "256", "num_updates": "802400", "lr": "0.000199596", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-04 15:31:44,723][train_inner][INFO] - {"epoch": 16, "update": 15.594, "loss": "1.983", "ppl": "3.95", "wps": "360167", "ups": "3.05", "wpb": "118083", "bsz": "256", "num_updates": "802600", "lr": "0.000199394", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 15:32:50,117][train_inner][INFO] - {"epoch": 16, "update": 15.598, "loss": "1.983", "ppl": "3.95", "wps": "362846", "ups": "3.06", "wpb": "118638", "bsz": "256", "num_updates": "802800", "lr": "0.000199192", "gnorm": "0.795", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.5", "wall": "0"} +[2022-08-04 15:33:55,892][train_inner][INFO] - {"epoch": 16, "update": 15.602, "loss": "1.994", "ppl": "3.98", "wps": "359686", "ups": "3.04", "wpb": "118290", "bsz": "256", "num_updates": "803000", "lr": "0.00019899", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 15:35:01,506][train_inner][INFO] - {"epoch": 16, "update": 15.606, "loss": "1.988", "ppl": "3.97", "wps": "360696", "ups": "3.05", "wpb": "118330", "bsz": "256", "num_updates": "803200", "lr": "0.000198788", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "0"} +[2022-08-04 15:36:06,905][train_inner][INFO] - {"epoch": 16, "update": 15.61, "loss": "1.99", "ppl": "3.97", "wps": "362056", "ups": "3.06", "wpb": "118390", "bsz": "256", "num_updates": "803400", "lr": "0.000198586", "gnorm": "0.809", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-04 15:37:12,334][train_inner][INFO] - {"epoch": 16, "update": 15.614, "loss": "1.983", "ppl": "3.95", "wps": "360846", "ups": "3.06", "wpb": "118046", "bsz": "256", "num_updates": "803600", "lr": "0.000198384", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 15:38:17,418][train_inner][INFO] - {"epoch": 16, "update": 15.618, "loss": "1.986", "ppl": "3.96", "wps": "365191", "ups": "3.07", "wpb": "118835", "bsz": "256", "num_updates": "803800", "lr": "0.000198182", "gnorm": "0.791", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 15:39:22,546][train_inner][INFO] - {"epoch": 16, "update": 15.621, "loss": "1.986", "ppl": "3.96", "wps": "361490", "ups": "3.07", "wpb": "117704", "bsz": "256", "num_updates": "804000", "lr": "0.00019798", "gnorm": "0.803", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.3", "wall": "0"} +[2022-08-04 15:40:28,070][train_inner][INFO] - {"epoch": 16, "update": 15.625, "loss": "1.986", "ppl": "3.96", "wps": "361812", "ups": "3.05", "wpb": "118534", "bsz": "256", "num_updates": "804200", "lr": "0.000197778", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 15:41:33,670][train_inner][INFO] - {"epoch": 16, "update": 15.629, "loss": "1.986", "ppl": "3.96", "wps": "361088", "ups": "3.05", "wpb": "118436", "bsz": "256", "num_updates": "804400", "lr": "0.000197576", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.9", "wall": "0"} +[2022-08-04 15:42:38,868][train_inner][INFO] - {"epoch": 16, "update": 15.633, "loss": "1.979", "ppl": "3.94", "wps": "364314", "ups": "3.07", "wpb": "118760", "bsz": "256", "num_updates": "804600", "lr": "0.000197374", "gnorm": "0.792", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 15:43:44,744][train_inner][INFO] - {"epoch": 16, "update": 15.637, "loss": "1.991", "ppl": "3.97", "wps": "359875", "ups": "3.04", "wpb": "118534", "bsz": "255.9", "num_updates": "804800", "lr": "0.000197172", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.8", "wall": "0"} +[2022-08-04 15:44:50,242][train_inner][INFO] - {"epoch": 16, "update": 15.641, "loss": "1.982", "ppl": "3.95", "wps": "361548", "ups": "3.05", "wpb": "118400", "bsz": "256", "num_updates": "805000", "lr": "0.00019697", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-04 15:45:55,306][train_inner][INFO] - {"epoch": 16, "update": 15.645, "loss": "1.982", "ppl": "3.95", "wps": "363908", "ups": "3.07", "wpb": "118385", "bsz": "256", "num_updates": "805200", "lr": "0.000196768", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-04 15:47:00,206][train_inner][INFO] - {"epoch": 16, "update": 15.649, "loss": "1.986", "ppl": "3.96", "wps": "364389", "ups": "3.08", "wpb": "118241", "bsz": "256", "num_updates": "805400", "lr": "0.000196566", "gnorm": "0.8", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "0"} +[2022-08-04 15:48:05,324][train_inner][INFO] - {"epoch": 16, "update": 15.653, "loss": "1.993", "ppl": "3.98", "wps": "364119", "ups": "3.07", "wpb": "118553", "bsz": "256", "num_updates": "805600", "lr": "0.000196364", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-04 15:49:10,651][train_inner][INFO] - {"epoch": 16, "update": 15.656, "loss": "1.988", "ppl": "3.97", "wps": "360559", "ups": "3.06", "wpb": "117764", "bsz": "256", "num_updates": "805800", "lr": "0.000196162", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 15:50:15,735][train_inner][INFO] - {"epoch": 16, "update": 15.66, "loss": "1.991", "ppl": "3.97", "wps": "362719", "ups": "3.07", "wpb": "118025", "bsz": "256", "num_updates": "806000", "lr": "0.00019596", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 15:51:21,443][train_inner][INFO] - {"epoch": 16, "update": 15.664, "loss": "1.986", "ppl": "3.96", "wps": "359979", "ups": "3.04", "wpb": "118267", "bsz": "256", "num_updates": "806200", "lr": "0.000195758", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "0"} +[2022-08-04 15:52:26,837][train_inner][INFO] - {"epoch": 16, "update": 15.668, "loss": "1.988", "ppl": "3.97", "wps": "361311", "ups": "3.06", "wpb": "118136", "bsz": "256", "num_updates": "806400", "lr": "0.000195556", "gnorm": "0.797", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 15:53:32,471][train_inner][INFO] - {"epoch": 16, "update": 15.672, "loss": "1.983", "ppl": "3.95", "wps": "361732", "ups": "3.05", "wpb": "118707", "bsz": "255.9", "num_updates": "806600", "lr": "0.000195354", "gnorm": "0.795", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 15:54:37,752][train_inner][INFO] - {"epoch": 16, "update": 15.676, "loss": "1.99", "ppl": "3.97", "wps": "363302", "ups": "3.06", "wpb": "118583", "bsz": "256", "num_updates": "806800", "lr": "0.000195152", "gnorm": "0.8", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.3", "wall": "0"} +[2022-08-04 15:55:42,854][train_inner][INFO] - {"epoch": 16, "update": 15.68, "loss": "1.986", "ppl": "3.96", "wps": "362627", "ups": "3.07", "wpb": "118037", "bsz": "256", "num_updates": "807000", "lr": "0.000194949", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 15:56:43,810][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 15:56:48,408][train_inner][INFO] - {"epoch": 16, "update": 15.684, "loss": "1.988", "ppl": "3.97", "wps": "358420", "ups": "3.05", "wpb": "117476", "bsz": "256", "num_updates": "807200", "lr": "0.000194747", "gnorm": "0.804", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 15:57:54,038][train_inner][INFO] - {"epoch": 16, "update": 15.688, "loss": "1.986", "ppl": "3.96", "wps": "361549", "ups": "3.05", "wpb": "118641", "bsz": "256", "num_updates": "807400", "lr": "0.000194545", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 15:58:59,833][train_inner][INFO] - {"epoch": 16, "update": 15.691, "loss": "1.981", "ppl": "3.95", "wps": "360288", "ups": "3.04", "wpb": "118524", "bsz": "256", "num_updates": "807600", "lr": "0.000194343", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 16:00:04,936][train_inner][INFO] - {"epoch": 16, "update": 15.695, "loss": "1.99", "ppl": "3.97", "wps": "362345", "ups": "3.07", "wpb": "117943", "bsz": "256", "num_updates": "807800", "lr": "0.000194141", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-04 16:01:10,450][train_inner][INFO] - {"epoch": 16, "update": 15.699, "loss": "1.993", "ppl": "3.98", "wps": "359926", "ups": "3.05", "wpb": "117874", "bsz": "256", "num_updates": "808000", "lr": "0.000193939", "gnorm": "0.801", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-04 16:02:15,476][train_inner][INFO] - {"epoch": 16, "update": 15.703, "loss": "1.987", "ppl": "3.96", "wps": "364216", "ups": "3.08", "wpb": "118416", "bsz": "256", "num_updates": "808200", "lr": "0.000193737", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 16:03:21,140][train_inner][INFO] - {"epoch": 16, "update": 15.707, "loss": "1.982", "ppl": "3.95", "wps": "361209", "ups": "3.05", "wpb": "118589", "bsz": "256", "num_updates": "808400", "lr": "0.000193535", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.7", "wall": "0"} +[2022-08-04 16:04:26,958][train_inner][INFO] - {"epoch": 16, "update": 15.711, "loss": "1.986", "ppl": "3.96", "wps": "360491", "ups": "3.04", "wpb": "118632", "bsz": "256", "num_updates": "808600", "lr": "0.000193333", "gnorm": "0.798", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 16:05:31,808][train_inner][INFO] - {"epoch": 16, "update": 15.715, "loss": "1.978", "ppl": "3.94", "wps": "365928", "ups": "3.08", "wpb": "118651", "bsz": "256", "num_updates": "808800", "lr": "0.000193131", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "0"} +[2022-08-04 16:06:36,856][train_inner][INFO] - {"epoch": 16, "update": 15.719, "loss": "1.988", "ppl": "3.97", "wps": "362396", "ups": "3.07", "wpb": "117863", "bsz": "256", "num_updates": "809000", "lr": "0.000192929", "gnorm": "0.815", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 16:07:42,132][train_inner][INFO] - {"epoch": 16, "update": 15.722, "loss": "1.986", "ppl": "3.96", "wps": "362096", "ups": "3.06", "wpb": "118180", "bsz": "256", "num_updates": "809200", "lr": "0.000192727", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 16:08:47,543][train_inner][INFO] - {"epoch": 16, "update": 15.726, "loss": "1.981", "ppl": "3.95", "wps": "362847", "ups": "3.06", "wpb": "118669", "bsz": "256", "num_updates": "809400", "lr": "0.000192525", "gnorm": "0.798", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-04 16:09:53,092][train_inner][INFO] - {"epoch": 16, "update": 15.73, "loss": "1.982", "ppl": "3.95", "wps": "361728", "ups": "3.05", "wpb": "118551", "bsz": "256", "num_updates": "809600", "lr": "0.000192323", "gnorm": "0.796", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 16:10:57,962][train_inner][INFO] - {"epoch": 16, "update": 15.734, "loss": "1.988", "ppl": "3.97", "wps": "364133", "ups": "3.08", "wpb": "118101", "bsz": "256", "num_updates": "809800", "lr": "0.000192121", "gnorm": "0.803", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 16:12:03,417][train_inner][INFO] - {"epoch": 16, "update": 15.738, "loss": "1.988", "ppl": "3.97", "wps": "361945", "ups": "3.06", "wpb": "118446", "bsz": "256", "num_updates": "810000", "lr": "0.000191919", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 16:13:08,852][train_inner][INFO] - {"epoch": 16, "update": 15.742, "loss": "1.99", "ppl": "3.97", "wps": "360611", "ups": "3.06", "wpb": "117980", "bsz": "256", "num_updates": "810200", "lr": "0.000191717", "gnorm": "0.799", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-04 16:14:14,191][train_inner][INFO] - {"epoch": 16, "update": 15.746, "loss": "1.984", "ppl": "3.96", "wps": "363233", "ups": "3.06", "wpb": "118664", "bsz": "256", "num_updates": "810400", "lr": "0.000191515", "gnorm": "0.8", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 16:15:19,675][train_inner][INFO] - {"epoch": 16, "update": 15.75, "loss": "1.979", "ppl": "3.94", "wps": "361800", "ups": "3.05", "wpb": "118459", "bsz": "256", "num_updates": "810600", "lr": "0.000191313", "gnorm": "0.799", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 16:16:24,977][train_inner][INFO] - {"epoch": 16, "update": 15.754, "loss": "1.982", "ppl": "3.95", "wps": "361602", "ups": "3.06", "wpb": "118063", "bsz": "256", "num_updates": "810800", "lr": "0.000191111", "gnorm": "0.799", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 16:17:30,333][train_inner][INFO] - {"epoch": 16, "update": 15.757, "loss": "1.983", "ppl": "3.95", "wps": "361540", "ups": "3.06", "wpb": "118143", "bsz": "256", "num_updates": "811000", "lr": "0.000190909", "gnorm": "0.802", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 16:18:35,509][train_inner][INFO] - {"epoch": 16, "update": 15.761, "loss": "1.988", "ppl": "3.97", "wps": "362745", "ups": "3.07", "wpb": "118209", "bsz": "256", "num_updates": "811200", "lr": "0.000190707", "gnorm": "0.802", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.4", "wall": "0"} +[2022-08-04 16:19:02,683][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 16:19:41,253][train_inner][INFO] - {"epoch": 16, "update": 15.765, "loss": "1.986", "ppl": "3.96", "wps": "358390", "ups": "3.04", "wpb": "117809", "bsz": "256", "num_updates": "811400", "lr": "0.000190505", "gnorm": "0.804", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 16:20:46,406][train_inner][INFO] - {"epoch": 16, "update": 15.769, "loss": "1.982", "ppl": "3.95", "wps": "362630", "ups": "3.07", "wpb": "118131", "bsz": "256", "num_updates": "811600", "lr": "0.000190303", "gnorm": "0.799", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-04 16:21:51,569][train_inner][INFO] - {"epoch": 16, "update": 15.773, "loss": "1.99", "ppl": "3.97", "wps": "362770", "ups": "3.07", "wpb": "118189", "bsz": "256", "num_updates": "811800", "lr": "0.000190101", "gnorm": "0.803", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 16:22:56,646][train_inner][INFO] - {"epoch": 16, "update": 15.777, "loss": "1.98", "ppl": "3.95", "wps": "363888", "ups": "3.07", "wpb": "118395", "bsz": "256", "num_updates": "812000", "lr": "0.000189899", "gnorm": "0.798", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 16:24:01,989][train_inner][INFO] - {"epoch": 16, "update": 15.781, "loss": "1.988", "ppl": "3.97", "wps": "361578", "ups": "3.06", "wpb": "118131", "bsz": "256", "num_updates": "812200", "lr": "0.000189697", "gnorm": "0.804", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 16:25:07,069][train_inner][INFO] - {"epoch": 16, "update": 15.785, "loss": "1.98", "ppl": "3.94", "wps": "363135", "ups": "3.07", "wpb": "118161", "bsz": "256", "num_updates": "812400", "lr": "0.000189495", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "0"} +[2022-08-04 16:26:12,351][train_inner][INFO] - {"epoch": 16, "update": 15.789, "loss": "1.983", "ppl": "3.95", "wps": "363022", "ups": "3.06", "wpb": "118492", "bsz": "256", "num_updates": "812600", "lr": "0.000189293", "gnorm": "0.799", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 16:27:17,556][train_inner][INFO] - {"epoch": 16, "update": 15.792, "loss": "1.981", "ppl": "3.95", "wps": "363648", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "812800", "lr": "0.000189091", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 16:28:23,099][train_inner][INFO] - {"epoch": 16, "update": 15.796, "loss": "1.986", "ppl": "3.96", "wps": "359573", "ups": "3.05", "wpb": "117835", "bsz": "256", "num_updates": "813000", "lr": "0.000188889", "gnorm": "0.807", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 16:29:28,653][train_inner][INFO] - {"epoch": 16, "update": 15.8, "loss": "1.989", "ppl": "3.97", "wps": "358976", "ups": "3.05", "wpb": "117660", "bsz": "256", "num_updates": "813200", "lr": "0.000188687", "gnorm": "0.809", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 16:30:11,856][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 16:30:34,464][train_inner][INFO] - {"epoch": 16, "update": 15.804, "loss": "1.978", "ppl": "3.94", "wps": "360631", "ups": "3.04", "wpb": "118665", "bsz": "256", "num_updates": "813400", "lr": "0.000188485", "gnorm": "0.8", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 16:31:40,004][train_inner][INFO] - {"epoch": 16, "update": 15.808, "loss": "1.982", "ppl": "3.95", "wps": "360547", "ups": "3.05", "wpb": "118151", "bsz": "256", "num_updates": "813600", "lr": "0.000188283", "gnorm": "0.802", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 16:32:45,264][train_inner][INFO] - {"epoch": 16, "update": 15.812, "loss": "1.985", "ppl": "3.96", "wps": "360327", "ups": "3.06", "wpb": "117569", "bsz": "256", "num_updates": "813800", "lr": "0.000188081", "gnorm": "0.807", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.3", "wall": "0"} +[2022-08-04 16:33:51,123][train_inner][INFO] - {"epoch": 16, "update": 15.816, "loss": "1.98", "ppl": "3.95", "wps": "359854", "ups": "3.04", "wpb": "118486", "bsz": "256", "num_updates": "814000", "lr": "0.000187879", "gnorm": "0.798", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 16:34:50,013][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 16:34:56,981][train_inner][INFO] - {"epoch": 16, "update": 15.82, "loss": "1.979", "ppl": "3.94", "wps": "359800", "ups": "3.04", "wpb": "118476", "bsz": "256", "num_updates": "814200", "lr": "0.000187677", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.1", "wall": "0"} +[2022-08-04 16:36:02,595][train_inner][INFO] - {"epoch": 16, "update": 15.824, "loss": "1.981", "ppl": "3.95", "wps": "360390", "ups": "3.05", "wpb": "118232", "bsz": "256", "num_updates": "814400", "lr": "0.000187475", "gnorm": "0.806", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 16:37:08,205][train_inner][INFO] - {"epoch": 16, "update": 15.827, "loss": "1.983", "ppl": "3.95", "wps": "361872", "ups": "3.05", "wpb": "118710", "bsz": "256", "num_updates": "814600", "lr": "0.000187273", "gnorm": "0.8", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "0"} +[2022-08-04 16:38:13,160][train_inner][INFO] - {"epoch": 16, "update": 15.831, "loss": "1.988", "ppl": "3.97", "wps": "363819", "ups": "3.08", "wpb": "118158", "bsz": "256", "num_updates": "814800", "lr": "0.000187071", "gnorm": "0.806", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-04 16:39:18,124][train_inner][INFO] - {"epoch": 16, "update": 15.835, "loss": "1.983", "ppl": "3.95", "wps": "364849", "ups": "3.08", "wpb": "118508", "bsz": "256", "num_updates": "815000", "lr": "0.000186869", "gnorm": "0.806", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 16:40:23,235][train_inner][INFO] - {"epoch": 16, "update": 15.839, "loss": "1.981", "ppl": "3.95", "wps": "363256", "ups": "3.07", "wpb": "118259", "bsz": "256", "num_updates": "815200", "lr": "0.000186667", "gnorm": "0.806", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 16:40:50,942][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-04 16:41:28,628][train_inner][INFO] - {"epoch": 16, "update": 15.843, "loss": "1.978", "ppl": "3.94", "wps": "362924", "ups": "3.06", "wpb": "118660", "bsz": "256", "num_updates": "815400", "lr": "0.000186465", "gnorm": "0.803", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 16:42:34,097][train_inner][INFO] - {"epoch": 16, "update": 15.847, "loss": "1.98", "ppl": "3.94", "wps": "360981", "ups": "3.05", "wpb": "118165", "bsz": "256", "num_updates": "815600", "lr": "0.000186263", "gnorm": "0.808", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.6", "wall": "0"} +[2022-08-04 16:43:39,542][train_inner][INFO] - {"epoch": 16, "update": 15.851, "loss": "1.978", "ppl": "3.94", "wps": "362252", "ups": "3.06", "wpb": "118530", "bsz": "256", "num_updates": "815800", "lr": "0.000186061", "gnorm": "0.806", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 16:44:45,087][train_inner][INFO] - {"epoch": 16, "update": 15.855, "loss": "1.979", "ppl": "3.94", "wps": "360715", "ups": "3.05", "wpb": "118203", "bsz": "256", "num_updates": "816000", "lr": "0.000185859", "gnorm": "0.803", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 16:45:50,669][train_inner][INFO] - {"epoch": 16, "update": 15.858, "loss": "1.979", "ppl": "3.94", "wps": "362788", "ups": "3.05", "wpb": "118960", "bsz": "256", "num_updates": "816200", "lr": "0.000185657", "gnorm": "0.801", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 16:46:56,165][train_inner][INFO] - {"epoch": 16, "update": 15.862, "loss": "1.984", "ppl": "3.96", "wps": "361123", "ups": "3.05", "wpb": "118259", "bsz": "256", "num_updates": "816400", "lr": "0.000185455", "gnorm": "0.805", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 16:48:01,782][train_inner][INFO] - {"epoch": 16, "update": 15.866, "loss": "1.973", "ppl": "3.93", "wps": "362098", "ups": "3.05", "wpb": "118796", "bsz": "256", "num_updates": "816600", "lr": "0.000185253", "gnorm": "0.802", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 16:49:07,248][train_inner][INFO] - {"epoch": 16, "update": 15.87, "loss": "1.981", "ppl": "3.95", "wps": "362398", "ups": "3.06", "wpb": "118622", "bsz": "256", "num_updates": "816800", "lr": "0.000185051", "gnorm": "0.805", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "0"} +[2022-08-04 16:50:12,833][train_inner][INFO] - {"epoch": 16, "update": 15.874, "loss": "1.981", "ppl": "3.95", "wps": "362898", "ups": "3.05", "wpb": "118997", "bsz": "256", "num_updates": "817000", "lr": "0.000184848", "gnorm": "0.804", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 16:51:17,775][train_inner][INFO] - {"epoch": 16, "update": 15.878, "loss": "1.983", "ppl": "3.95", "wps": "364540", "ups": "3.08", "wpb": "118368", "bsz": "256", "num_updates": "817200", "lr": "0.000184646", "gnorm": "0.801", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.5", "wall": "0"} +[2022-08-04 16:52:22,975][train_inner][INFO] - {"epoch": 16, "update": 15.882, "loss": "1.978", "ppl": "3.94", "wps": "363005", "ups": "3.07", "wpb": "118337", "bsz": "256", "num_updates": "817400", "lr": "0.000184444", "gnorm": "0.804", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 16:53:28,387][train_inner][INFO] - {"epoch": 16, "update": 15.886, "loss": "1.98", "ppl": "3.95", "wps": "361218", "ups": "3.06", "wpb": "118139", "bsz": "256", "num_updates": "817600", "lr": "0.000184242", "gnorm": "0.805", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 16:54:33,492][train_inner][INFO] - {"epoch": 16, "update": 15.89, "loss": "1.981", "ppl": "3.95", "wps": "363164", "ups": "3.07", "wpb": "118213", "bsz": "256", "num_updates": "817800", "lr": "0.00018404", "gnorm": "0.812", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 16:55:38,891][train_inner][INFO] - {"epoch": 16, "update": 15.893, "loss": "1.98", "ppl": "3.94", "wps": "361665", "ups": "3.06", "wpb": "118250", "bsz": "256", "num_updates": "818000", "lr": "0.000183838", "gnorm": "0.808", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.7", "wall": "0"} +[2022-08-04 16:56:44,090][train_inner][INFO] - {"epoch": 16, "update": 15.897, "loss": "1.979", "ppl": "3.94", "wps": "362879", "ups": "3.07", "wpb": "118295", "bsz": "256", "num_updates": "818200", "lr": "0.000183636", "gnorm": "0.808", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 16:57:49,419][train_inner][INFO] - {"epoch": 16, "update": 15.901, "loss": "1.977", "ppl": "3.94", "wps": "362147", "ups": "3.06", "wpb": "118292", "bsz": "256", "num_updates": "818400", "lr": "0.000183434", "gnorm": "0.809", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 16:58:54,949][train_inner][INFO] - {"epoch": 16, "update": 15.905, "loss": "1.976", "ppl": "3.93", "wps": "361017", "ups": "3.05", "wpb": "118285", "bsz": "256", "num_updates": "818600", "lr": "0.000183232", "gnorm": "0.805", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-04 17:00:00,317][train_inner][INFO] - {"epoch": 16, "update": 15.909, "loss": "1.979", "ppl": "3.94", "wps": "360732", "ups": "3.06", "wpb": "117899", "bsz": "256", "num_updates": "818800", "lr": "0.00018303", "gnorm": "0.81", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-04 17:01:05,663][train_inner][INFO] - {"epoch": 16, "update": 15.913, "loss": "1.982", "ppl": "3.95", "wps": "361796", "ups": "3.06", "wpb": "118209", "bsz": "256", "num_updates": "819000", "lr": "0.000182828", "gnorm": "0.807", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 17:02:10,568][train_inner][INFO] - {"epoch": 16, "update": 15.917, "loss": "1.98", "ppl": "3.94", "wps": "365718", "ups": "3.08", "wpb": "118681", "bsz": "256", "num_updates": "819200", "lr": "0.000182626", "gnorm": "0.808", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 17:02:36,769][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-04 17:03:16,550][train_inner][INFO] - {"epoch": 16, "update": 15.921, "loss": "1.981", "ppl": "3.95", "wps": "357115", "ups": "3.03", "wpb": "117815", "bsz": "256", "num_updates": "819400", "lr": "0.000182424", "gnorm": "0.806", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "27.6", "wall": "0"} +[2022-08-04 17:04:21,168][train_inner][INFO] - {"epoch": 16, "update": 15.925, "loss": "1.976", "ppl": "3.93", "wps": "364213", "ups": "3.1", "wpb": "117673", "bsz": "256", "num_updates": "819600", "lr": "0.000182222", "gnorm": "0.812", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "0"} +[2022-08-04 17:05:26,375][train_inner][INFO] - {"epoch": 16, "update": 15.928, "loss": "1.983", "ppl": "3.95", "wps": "362315", "ups": "3.07", "wpb": "118121", "bsz": "256", "num_updates": "819800", "lr": "0.00018202", "gnorm": "0.811", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.3", "wall": "0"} +[2022-08-04 17:06:31,739][train_inner][INFO] - {"epoch": 16, "update": 15.932, "loss": "1.977", "ppl": "3.94", "wps": "361208", "ups": "3.06", "wpb": "118039", "bsz": "256", "num_updates": "820000", "lr": "0.000181818", "gnorm": "0.812", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 17:07:36,784][train_inner][INFO] - {"epoch": 16, "update": 15.936, "loss": "1.983", "ppl": "3.95", "wps": "362798", "ups": "3.07", "wpb": "117988", "bsz": "256", "num_updates": "820200", "lr": "0.000181616", "gnorm": "0.808", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 17:08:42,259][train_inner][INFO] - {"epoch": 16, "update": 15.94, "loss": "1.979", "ppl": "3.94", "wps": "362114", "ups": "3.05", "wpb": "118545", "bsz": "256", "num_updates": "820400", "lr": "0.000181414", "gnorm": "0.806", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-04 17:09:47,581][train_inner][INFO] - {"epoch": 16, "update": 15.944, "loss": "1.981", "ppl": "3.95", "wps": "362279", "ups": "3.06", "wpb": "118321", "bsz": "256", "num_updates": "820600", "lr": "0.000181212", "gnorm": "0.808", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-04 17:10:52,675][train_inner][INFO] - {"epoch": 16, "update": 15.948, "loss": "1.983", "ppl": "3.95", "wps": "361983", "ups": "3.07", "wpb": "117812", "bsz": "256", "num_updates": "820800", "lr": "0.00018101", "gnorm": "0.814", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 17:11:58,072][train_inner][INFO] - {"epoch": 16, "update": 15.952, "loss": "1.978", "ppl": "3.94", "wps": "362615", "ups": "3.06", "wpb": "118569", "bsz": "256", "num_updates": "821000", "lr": "0.000180808", "gnorm": "0.808", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 17:13:02,961][train_inner][INFO] - {"epoch": 16, "update": 15.956, "loss": "1.98", "ppl": "3.95", "wps": "363600", "ups": "3.08", "wpb": "117908", "bsz": "256", "num_updates": "821200", "lr": "0.000180606", "gnorm": "0.81", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "0"} +[2022-08-04 17:14:07,845][train_inner][INFO] - {"epoch": 16, "update": 15.96, "loss": "1.975", "ppl": "3.93", "wps": "366077", "ups": "3.08", "wpb": "118761", "bsz": "256", "num_updates": "821400", "lr": "0.000180404", "gnorm": "0.804", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 17:15:13,327][train_inner][INFO] - {"epoch": 16, "update": 15.963, "loss": "1.976", "ppl": "3.93", "wps": "361281", "ups": "3.05", "wpb": "118285", "bsz": "256", "num_updates": "821600", "lr": "0.000180202", "gnorm": "0.807", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-04 17:16:19,050][train_inner][INFO] - {"epoch": 16, "update": 15.967, "loss": "1.979", "ppl": "3.94", "wps": "360858", "ups": "3.04", "wpb": "118577", "bsz": "256", "num_updates": "821800", "lr": "0.00018", "gnorm": "0.805", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.5", "wall": "0"} +[2022-08-04 17:17:24,351][train_inner][INFO] - {"epoch": 16, "update": 15.971, "loss": "1.98", "ppl": "3.94", "wps": "361431", "ups": "3.06", "wpb": "117998", "bsz": "256", "num_updates": "822000", "lr": "0.000179798", "gnorm": "0.809", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.6", "wall": "0"} +[2022-08-04 17:18:29,629][train_inner][INFO] - {"epoch": 16, "update": 15.975, "loss": "1.979", "ppl": "3.94", "wps": "362650", "ups": "3.06", "wpb": "118363", "bsz": "256", "num_updates": "822200", "lr": "0.000179596", "gnorm": "0.811", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 17:19:34,610][train_inner][INFO] - {"epoch": 16, "update": 15.979, "loss": "1.985", "ppl": "3.96", "wps": "362748", "ups": "3.08", "wpb": "117856", "bsz": "256", "num_updates": "822400", "lr": "0.000179394", "gnorm": "0.814", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 17:19:53,400][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-04 17:20:40,272][train_inner][INFO] - {"epoch": 16, "update": 15.983, "loss": "1.973", "ppl": "3.93", "wps": "360288", "ups": "3.05", "wpb": "118285", "bsz": "256", "num_updates": "822600", "lr": "0.000179192", "gnorm": "0.807", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 17:21:45,703][train_inner][INFO] - {"epoch": 16, "update": 15.987, "loss": "1.976", "ppl": "3.93", "wps": "362048", "ups": "3.06", "wpb": "118444", "bsz": "256", "num_updates": "822800", "lr": "0.00017899", "gnorm": "0.806", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 17:22:50,721][train_inner][INFO] - {"epoch": 16, "update": 15.991, "loss": "1.975", "ppl": "3.93", "wps": "362088", "ups": "3.08", "wpb": "117709", "bsz": "256", "num_updates": "823000", "lr": "0.000178788", "gnorm": "0.816", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.6", "wall": "0"} +[2022-08-04 17:23:55,524][train_inner][INFO] - {"epoch": 16, "update": 15.994, "loss": "1.975", "ppl": "3.93", "wps": "365515", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "823200", "lr": "0.000178586", "gnorm": "0.806", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.8", "wall": "0"} +[2022-08-04 17:25:00,970][train_inner][INFO] - {"epoch": 16, "update": 15.998, "loss": "1.973", "ppl": "3.93", "wps": "361716", "ups": "3.06", "wpb": "118363", "bsz": "256", "num_updates": "823400", "lr": "0.000178384", "gnorm": "0.807", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 17:25:28,243][fairseq_cli.train][INFO] - end of epoch 16 (average epoch stats below) +[2022-08-04 17:25:28,244][train][INFO] - {"epoch": 16, "train_loss": "1.991", "train_ppl": "3.98", "train_wps": "361239", "train_ups": "3.05", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "823484", "train_lr": "0.000178299", "train_gnorm": "0.794", "train_clip": "0", "train_loss_scale": "4", "train_train_wall": "16674", "train_gb_free": "22.2", "train_wall": "0"} +[2022-08-04 17:25:28,340][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-04 17:25:28,343][fairseq.trainer][INFO] - begin training epoch 17 +[2022-08-04 17:25:28,343][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-04 17:26:18,393][train_inner][INFO] - {"epoch": 17, "update": 16.002, "loss": "1.974", "ppl": "3.93", "wps": "304398", "ups": "2.58", "wpb": "117835", "bsz": "255.4", "num_updates": "823600", "lr": "0.000178182", "gnorm": "0.808", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 17:27:23,762][train_inner][INFO] - {"epoch": 17, "update": 16.006, "loss": "1.975", "ppl": "3.93", "wps": "359637", "ups": "3.06", "wpb": "117543", "bsz": "256", "num_updates": "823800", "lr": "0.00017798", "gnorm": "0.812", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 17:28:29,149][train_inner][INFO] - {"epoch": 17, "update": 16.01, "loss": "1.969", "ppl": "3.91", "wps": "364361", "ups": "3.06", "wpb": "119111", "bsz": "256", "num_updates": "824000", "lr": "0.000177778", "gnorm": "0.802", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "0"} +[2022-08-04 17:29:34,425][train_inner][INFO] - {"epoch": 17, "update": 16.014, "loss": "1.975", "ppl": "3.93", "wps": "362928", "ups": "3.06", "wpb": "118415", "bsz": "256", "num_updates": "824200", "lr": "0.000177576", "gnorm": "0.81", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 17:30:39,672][train_inner][INFO] - {"epoch": 17, "update": 16.018, "loss": "1.973", "ppl": "3.93", "wps": "363737", "ups": "3.07", "wpb": "118662", "bsz": "256", "num_updates": "824400", "lr": "0.000177374", "gnorm": "0.808", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 17:31:45,313][train_inner][INFO] - {"epoch": 17, "update": 16.022, "loss": "1.977", "ppl": "3.94", "wps": "360834", "ups": "3.05", "wpb": "118425", "bsz": "256", "num_updates": "824600", "lr": "0.000177172", "gnorm": "0.808", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-04 17:32:50,899][train_inner][INFO] - {"epoch": 17, "update": 16.026, "loss": "1.97", "ppl": "3.92", "wps": "360879", "ups": "3.05", "wpb": "118341", "bsz": "256", "num_updates": "824800", "lr": "0.00017697", "gnorm": "0.811", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "0"} +[2022-08-04 17:33:55,912][train_inner][INFO] - {"epoch": 17, "update": 16.029, "loss": "1.971", "ppl": "3.92", "wps": "364377", "ups": "3.08", "wpb": "118444", "bsz": "256", "num_updates": "825000", "lr": "0.000176768", "gnorm": "0.81", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 17:35:01,185][train_inner][INFO] - {"epoch": 17, "update": 16.033, "loss": "1.969", "ppl": "3.91", "wps": "362418", "ups": "3.06", "wpb": "118278", "bsz": "256", "num_updates": "825200", "lr": "0.000176566", "gnorm": "0.814", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 17:36:06,907][train_inner][INFO] - {"epoch": 17, "update": 16.037, "loss": "1.97", "ppl": "3.92", "wps": "363160", "ups": "3.04", "wpb": "119337", "bsz": "256", "num_updates": "825400", "lr": "0.000176364", "gnorm": "0.807", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25", "wall": "0"} +[2022-08-04 17:37:12,215][train_inner][INFO] - {"epoch": 17, "update": 16.041, "loss": "1.97", "ppl": "3.92", "wps": "361534", "ups": "3.06", "wpb": "118049", "bsz": "256", "num_updates": "825600", "lr": "0.000176162", "gnorm": "0.81", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 17:38:17,723][train_inner][INFO] - {"epoch": 17, "update": 16.045, "loss": "1.975", "ppl": "3.93", "wps": "362872", "ups": "3.05", "wpb": "118852", "bsz": "256", "num_updates": "825800", "lr": "0.00017596", "gnorm": "0.806", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 17:39:22,622][train_inner][INFO] - {"epoch": 17, "update": 16.049, "loss": "1.977", "ppl": "3.94", "wps": "363789", "ups": "3.08", "wpb": "118040", "bsz": "256", "num_updates": "826000", "lr": "0.000175758", "gnorm": "0.811", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-04 17:40:27,688][train_inner][INFO] - {"epoch": 17, "update": 16.053, "loss": "1.971", "ppl": "3.92", "wps": "361748", "ups": "3.07", "wpb": "117685", "bsz": "256", "num_updates": "826200", "lr": "0.000175556", "gnorm": "0.812", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 17:41:32,920][train_inner][INFO] - {"epoch": 17, "update": 16.057, "loss": "1.973", "ppl": "3.93", "wps": "362675", "ups": "3.07", "wpb": "118287", "bsz": "256", "num_updates": "826400", "lr": "0.000175354", "gnorm": "0.813", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.2", "wall": "0"} +[2022-08-04 17:42:38,480][train_inner][INFO] - {"epoch": 17, "update": 16.061, "loss": "1.974", "ppl": "3.93", "wps": "361284", "ups": "3.05", "wpb": "118428", "bsz": "256", "num_updates": "826600", "lr": "0.000175152", "gnorm": "0.812", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 17:43:43,649][train_inner][INFO] - {"epoch": 17, "update": 16.064, "loss": "1.978", "ppl": "3.94", "wps": "364016", "ups": "3.07", "wpb": "118611", "bsz": "256", "num_updates": "826800", "lr": "0.000174949", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-04 17:44:49,021][train_inner][INFO] - {"epoch": 17, "update": 16.068, "loss": "1.972", "ppl": "3.92", "wps": "361479", "ups": "3.06", "wpb": "118151", "bsz": "256", "num_updates": "827000", "lr": "0.000174747", "gnorm": "0.813", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 17:45:54,650][train_inner][INFO] - {"epoch": 17, "update": 16.072, "loss": "1.97", "ppl": "3.92", "wps": "361566", "ups": "3.05", "wpb": "118643", "bsz": "256", "num_updates": "827200", "lr": "0.000174545", "gnorm": "0.812", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.8", "wall": "0"} +[2022-08-04 17:46:59,830][train_inner][INFO] - {"epoch": 17, "update": 16.076, "loss": "1.976", "ppl": "3.94", "wps": "361720", "ups": "3.07", "wpb": "117882", "bsz": "256", "num_updates": "827400", "lr": "0.000174343", "gnorm": "0.816", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-04 17:48:05,528][train_inner][INFO] - {"epoch": 17, "update": 16.08, "loss": "1.973", "ppl": "3.93", "wps": "360210", "ups": "3.04", "wpb": "118320", "bsz": "256", "num_updates": "827600", "lr": "0.000174141", "gnorm": "0.806", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-04 17:49:10,925][train_inner][INFO] - {"epoch": 17, "update": 16.084, "loss": "1.97", "ppl": "3.92", "wps": "362181", "ups": "3.06", "wpb": "118426", "bsz": "256", "num_updates": "827800", "lr": "0.000173939", "gnorm": "0.812", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 17:50:15,697][train_inner][INFO] - {"epoch": 17, "update": 16.088, "loss": "1.972", "ppl": "3.92", "wps": "365563", "ups": "3.09", "wpb": "118379", "bsz": "256", "num_updates": "828000", "lr": "0.000173737", "gnorm": "0.808", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "0"} +[2022-08-04 17:51:20,960][train_inner][INFO] - {"epoch": 17, "update": 16.092, "loss": "1.967", "ppl": "3.91", "wps": "364617", "ups": "3.06", "wpb": "118978", "bsz": "256", "num_updates": "828200", "lr": "0.000173535", "gnorm": "0.808", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 17:52:26,542][train_inner][INFO] - {"epoch": 17, "update": 16.095, "loss": "1.966", "ppl": "3.91", "wps": "360538", "ups": "3.05", "wpb": "118221", "bsz": "256", "num_updates": "828400", "lr": "0.000173333", "gnorm": "0.813", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 17:53:31,947][train_inner][INFO] - {"epoch": 17, "update": 16.099, "loss": "1.967", "ppl": "3.91", "wps": "361800", "ups": "3.06", "wpb": "118316", "bsz": "256", "num_updates": "828600", "lr": "0.000173131", "gnorm": "0.813", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 17:53:34,210][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 17:54:37,913][train_inner][INFO] - {"epoch": 17, "update": 16.103, "loss": "1.977", "ppl": "3.94", "wps": "357031", "ups": "3.03", "wpb": "117758", "bsz": "256", "num_updates": "828800", "lr": "0.000172929", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.6", "wall": "0"} +[2022-08-04 17:55:43,499][train_inner][INFO] - {"epoch": 17, "update": 16.107, "loss": "1.971", "ppl": "3.92", "wps": "361061", "ups": "3.05", "wpb": "118399", "bsz": "256", "num_updates": "829000", "lr": "0.000172727", "gnorm": "0.813", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 17:56:49,004][train_inner][INFO] - {"epoch": 17, "update": 16.111, "loss": "1.97", "ppl": "3.92", "wps": "361629", "ups": "3.05", "wpb": "118442", "bsz": "256", "num_updates": "829200", "lr": "0.000172525", "gnorm": "0.812", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.4", "wall": "0"} +[2022-08-04 17:57:54,245][train_inner][INFO] - {"epoch": 17, "update": 16.115, "loss": "1.978", "ppl": "3.94", "wps": "361816", "ups": "3.07", "wpb": "118024", "bsz": "256", "num_updates": "829400", "lr": "0.000172323", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 17:58:59,879][train_inner][INFO] - {"epoch": 17, "update": 16.119, "loss": "1.975", "ppl": "3.93", "wps": "361188", "ups": "3.05", "wpb": "118525", "bsz": "256", "num_updates": "829600", "lr": "0.000172121", "gnorm": "0.811", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-04 18:00:05,432][train_inner][INFO] - {"epoch": 17, "update": 16.123, "loss": "1.973", "ppl": "3.93", "wps": "362371", "ups": "3.05", "wpb": "118771", "bsz": "256", "num_updates": "829800", "lr": "0.000171919", "gnorm": "0.811", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "0"} +[2022-08-04 18:00:12,605][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 18:01:11,426][train_inner][INFO] - {"epoch": 17, "update": 16.127, "loss": "1.969", "ppl": "3.91", "wps": "358943", "ups": "3.03", "wpb": "118430", "bsz": "256", "num_updates": "830000", "lr": "0.000171717", "gnorm": "0.814", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.1", "wall": "0"} +[2022-08-04 18:02:16,630][train_inner][INFO] - {"epoch": 17, "update": 16.13, "loss": "1.976", "ppl": "3.93", "wps": "360777", "ups": "3.07", "wpb": "117620", "bsz": "256", "num_updates": "830200", "lr": "0.000171515", "gnorm": "0.818", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "0"} +[2022-08-04 18:03:22,260][train_inner][INFO] - {"epoch": 17, "update": 16.134, "loss": "1.973", "ppl": "3.93", "wps": "359657", "ups": "3.05", "wpb": "118020", "bsz": "256", "num_updates": "830400", "lr": "0.000171313", "gnorm": "0.814", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-04 18:04:27,577][train_inner][INFO] - {"epoch": 17, "update": 16.138, "loss": "1.971", "ppl": "3.92", "wps": "358961", "ups": "3.06", "wpb": "117230", "bsz": "256", "num_updates": "830600", "lr": "0.000171111", "gnorm": "0.819", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 18:05:33,277][train_inner][INFO] - {"epoch": 17, "update": 16.142, "loss": "1.97", "ppl": "3.92", "wps": "359767", "ups": "3.04", "wpb": "118180", "bsz": "255.9", "num_updates": "830800", "lr": "0.000170909", "gnorm": "0.816", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 18:06:38,612][train_inner][INFO] - {"epoch": 17, "update": 16.146, "loss": "1.965", "ppl": "3.9", "wps": "362383", "ups": "3.06", "wpb": "118379", "bsz": "256", "num_updates": "831000", "lr": "0.000170707", "gnorm": "0.814", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 18:07:43,892][train_inner][INFO] - {"epoch": 17, "update": 16.15, "loss": "1.974", "ppl": "3.93", "wps": "362231", "ups": "3.06", "wpb": "118230", "bsz": "256", "num_updates": "831200", "lr": "0.000170505", "gnorm": "0.819", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 18:08:49,754][train_inner][INFO] - {"epoch": 17, "update": 16.154, "loss": "1.97", "ppl": "3.92", "wps": "360165", "ups": "3.04", "wpb": "118605", "bsz": "256", "num_updates": "831400", "lr": "0.000170303", "gnorm": "0.811", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-04 18:09:55,035][train_inner][INFO] - {"epoch": 17, "update": 16.158, "loss": "1.967", "ppl": "3.91", "wps": "363168", "ups": "3.06", "wpb": "118533", "bsz": "256", "num_updates": "831600", "lr": "0.000170101", "gnorm": "0.811", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-04 18:11:00,219][train_inner][INFO] - {"epoch": 17, "update": 16.162, "loss": "1.97", "ppl": "3.92", "wps": "362592", "ups": "3.07", "wpb": "118174", "bsz": "256", "num_updates": "831800", "lr": "0.000169899", "gnorm": "0.832", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-04 18:12:05,292][train_inner][INFO] - {"epoch": 17, "update": 16.165, "loss": "1.967", "ppl": "3.91", "wps": "362804", "ups": "3.07", "wpb": "118033", "bsz": "256", "num_updates": "832000", "lr": "0.000169697", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 18:13:10,635][train_inner][INFO] - {"epoch": 17, "update": 16.169, "loss": "1.969", "ppl": "3.91", "wps": "363293", "ups": "3.06", "wpb": "118691", "bsz": "256", "num_updates": "832200", "lr": "0.000169495", "gnorm": "0.817", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 18:14:15,777][train_inner][INFO] - {"epoch": 17, "update": 16.173, "loss": "1.971", "ppl": "3.92", "wps": "363975", "ups": "3.07", "wpb": "118547", "bsz": "256", "num_updates": "832400", "lr": "0.000169293", "gnorm": "0.814", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-04 18:15:21,593][train_inner][INFO] - {"epoch": 17, "update": 16.177, "loss": "1.974", "ppl": "3.93", "wps": "360534", "ups": "3.04", "wpb": "118643", "bsz": "256", "num_updates": "832600", "lr": "0.000169091", "gnorm": "0.815", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-04 18:16:26,812][train_inner][INFO] - {"epoch": 17, "update": 16.181, "loss": "1.97", "ppl": "3.92", "wps": "363020", "ups": "3.07", "wpb": "118377", "bsz": "256", "num_updates": "832800", "lr": "0.000168889", "gnorm": "0.817", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.2", "wall": "0"} +[2022-08-04 18:17:31,952][train_inner][INFO] - {"epoch": 17, "update": 16.185, "loss": "1.968", "ppl": "3.91", "wps": "365292", "ups": "3.07", "wpb": "118975", "bsz": "256", "num_updates": "833000", "lr": "0.000168687", "gnorm": "0.813", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 18:18:37,742][train_inner][INFO] - {"epoch": 17, "update": 16.189, "loss": "1.965", "ppl": "3.9", "wps": "361820", "ups": "3.04", "wpb": "119017", "bsz": "256", "num_updates": "833200", "lr": "0.000168485", "gnorm": "0.812", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 18:19:42,824][train_inner][INFO] - {"epoch": 17, "update": 16.193, "loss": "1.966", "ppl": "3.91", "wps": "364638", "ups": "3.07", "wpb": "118655", "bsz": "256", "num_updates": "833400", "lr": "0.000168283", "gnorm": "0.814", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "0"} +[2022-08-04 18:20:47,974][train_inner][INFO] - {"epoch": 17, "update": 16.197, "loss": "1.966", "ppl": "3.91", "wps": "361971", "ups": "3.07", "wpb": "117907", "bsz": "256", "num_updates": "833600", "lr": "0.000168081", "gnorm": "0.821", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 18:21:53,497][train_inner][INFO] - {"epoch": 17, "update": 16.2, "loss": "1.972", "ppl": "3.92", "wps": "360541", "ups": "3.05", "wpb": "118117", "bsz": "256", "num_updates": "833800", "lr": "0.000167879", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 18:22:31,761][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 18:22:58,756][train_inner][INFO] - {"epoch": 17, "update": 16.204, "loss": "1.968", "ppl": "3.91", "wps": "362434", "ups": "3.07", "wpb": "118248", "bsz": "256", "num_updates": "834000", "lr": "0.000167677", "gnorm": "0.815", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-04 18:24:03,892][train_inner][INFO] - {"epoch": 17, "update": 16.208, "loss": "1.966", "ppl": "3.91", "wps": "363979", "ups": "3.07", "wpb": "118540", "bsz": "256", "num_updates": "834200", "lr": "0.000167475", "gnorm": "0.815", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 18:25:09,093][train_inner][INFO] - {"epoch": 17, "update": 16.212, "loss": "1.972", "ppl": "3.92", "wps": "361882", "ups": "3.07", "wpb": "117974", "bsz": "256", "num_updates": "834400", "lr": "0.000167273", "gnorm": "0.814", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 18:26:14,745][train_inner][INFO] - {"epoch": 17, "update": 16.216, "loss": "1.965", "ppl": "3.91", "wps": "359326", "ups": "3.05", "wpb": "117950", "bsz": "256", "num_updates": "834600", "lr": "0.000167071", "gnorm": "0.817", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 18:27:20,210][train_inner][INFO] - {"epoch": 17, "update": 16.22, "loss": "1.969", "ppl": "3.92", "wps": "360910", "ups": "3.06", "wpb": "118134", "bsz": "256", "num_updates": "834800", "lr": "0.000166869", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "0"} +[2022-08-04 18:28:25,382][train_inner][INFO] - {"epoch": 17, "update": 16.224, "loss": "1.969", "ppl": "3.92", "wps": "362384", "ups": "3.07", "wpb": "118083", "bsz": "256", "num_updates": "835000", "lr": "0.000166667", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "0"} +[2022-08-04 18:29:31,070][train_inner][INFO] - {"epoch": 17, "update": 16.228, "loss": "1.964", "ppl": "3.9", "wps": "362291", "ups": "3.04", "wpb": "118989", "bsz": "256", "num_updates": "835200", "lr": "0.000166465", "gnorm": "0.813", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 18:30:36,317][train_inner][INFO] - {"epoch": 17, "update": 16.231, "loss": "1.967", "ppl": "3.91", "wps": "361611", "ups": "3.07", "wpb": "117969", "bsz": "256", "num_updates": "835400", "lr": "0.000166263", "gnorm": "0.815", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 18:31:41,243][train_inner][INFO] - {"epoch": 17, "update": 16.235, "loss": "1.974", "ppl": "3.93", "wps": "363541", "ups": "3.08", "wpb": "118011", "bsz": "256", "num_updates": "835600", "lr": "0.000166061", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 18:32:46,694][train_inner][INFO] - {"epoch": 17, "update": 16.239, "loss": "1.969", "ppl": "3.91", "wps": "362699", "ups": "3.06", "wpb": "118693", "bsz": "256", "num_updates": "835800", "lr": "0.000165859", "gnorm": "0.819", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 18:33:42,150][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 18:33:52,136][train_inner][INFO] - {"epoch": 17, "update": 16.243, "loss": "1.965", "ppl": "3.91", "wps": "361448", "ups": "3.06", "wpb": "118258", "bsz": "256", "num_updates": "836000", "lr": "0.000165657", "gnorm": "0.817", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 18:34:57,353][train_inner][INFO] - {"epoch": 17, "update": 16.247, "loss": "1.971", "ppl": "3.92", "wps": "363246", "ups": "3.07", "wpb": "118450", "bsz": "256", "num_updates": "836200", "lr": "0.000165455", "gnorm": "0.816", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 18:36:02,710][train_inner][INFO] - {"epoch": 17, "update": 16.251, "loss": "1.967", "ppl": "3.91", "wps": "361272", "ups": "3.06", "wpb": "118056", "bsz": "256", "num_updates": "836400", "lr": "0.000165253", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-04 18:37:08,063][train_inner][INFO] - {"epoch": 17, "update": 16.255, "loss": "1.972", "ppl": "3.92", "wps": "362972", "ups": "3.06", "wpb": "118604", "bsz": "256", "num_updates": "836600", "lr": "0.000165051", "gnorm": "0.819", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 18:38:13,097][train_inner][INFO] - {"epoch": 17, "update": 16.259, "loss": "1.962", "ppl": "3.9", "wps": "365256", "ups": "3.08", "wpb": "118769", "bsz": "256", "num_updates": "836800", "lr": "0.000164848", "gnorm": "0.817", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "0"} +[2022-08-04 18:39:18,747][train_inner][INFO] - {"epoch": 17, "update": 16.263, "loss": "1.967", "ppl": "3.91", "wps": "360156", "ups": "3.05", "wpb": "118219", "bsz": "256", "num_updates": "837000", "lr": "0.000164646", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 18:40:23,247][train_inner][INFO] - {"epoch": 17, "update": 16.266, "loss": "1.968", "ppl": "3.91", "wps": "364414", "ups": "3.1", "wpb": "117522", "bsz": "256", "num_updates": "837200", "lr": "0.000164444", "gnorm": "0.823", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "0"} +[2022-08-04 18:41:28,336][train_inner][INFO] - {"epoch": 17, "update": 16.27, "loss": "1.962", "ppl": "3.9", "wps": "364628", "ups": "3.07", "wpb": "118665", "bsz": "256", "num_updates": "837400", "lr": "0.000164242", "gnorm": "0.812", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-04 18:42:33,424][train_inner][INFO] - {"epoch": 17, "update": 16.274, "loss": "1.971", "ppl": "3.92", "wps": "362611", "ups": "3.07", "wpb": "118002", "bsz": "256", "num_updates": "837600", "lr": "0.00016404", "gnorm": "0.827", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-04 18:43:38,646][train_inner][INFO] - {"epoch": 17, "update": 16.278, "loss": "1.963", "ppl": "3.9", "wps": "363362", "ups": "3.07", "wpb": "118493", "bsz": "256", "num_updates": "837800", "lr": "0.000163838", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 18:44:43,756][train_inner][INFO] - {"epoch": 17, "update": 16.282, "loss": "1.968", "ppl": "3.91", "wps": "364161", "ups": "3.07", "wpb": "118542", "bsz": "256", "num_updates": "838000", "lr": "0.000163636", "gnorm": "0.821", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 18:44:50,144][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 18:45:49,086][train_inner][INFO] - {"epoch": 17, "update": 16.286, "loss": "1.96", "ppl": "3.89", "wps": "361931", "ups": "3.06", "wpb": "118223", "bsz": "256", "num_updates": "838200", "lr": "0.000163434", "gnorm": "0.817", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 18:46:54,285][train_inner][INFO] - {"epoch": 17, "update": 16.29, "loss": "1.968", "ppl": "3.91", "wps": "361345", "ups": "3.07", "wpb": "117796", "bsz": "256", "num_updates": "838400", "lr": "0.000163232", "gnorm": "0.821", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 18:47:59,614][train_inner][INFO] - {"epoch": 17, "update": 16.294, "loss": "1.966", "ppl": "3.91", "wps": "363712", "ups": "3.06", "wpb": "118802", "bsz": "256", "num_updates": "838600", "lr": "0.00016303", "gnorm": "0.819", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.5", "wall": "0"} +[2022-08-04 18:49:04,728][train_inner][INFO] - {"epoch": 17, "update": 16.298, "loss": "1.969", "ppl": "3.92", "wps": "363853", "ups": "3.07", "wpb": "118459", "bsz": "256", "num_updates": "838800", "lr": "0.000162828", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "0"} +[2022-08-04 18:50:10,144][train_inner][INFO] - {"epoch": 17, "update": 16.301, "loss": "1.964", "ppl": "3.9", "wps": "361248", "ups": "3.06", "wpb": "118154", "bsz": "256", "num_updates": "839000", "lr": "0.000162626", "gnorm": "0.822", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.1", "wall": "0"} +[2022-08-04 18:51:15,278][train_inner][INFO] - {"epoch": 17, "update": 16.305, "loss": "1.963", "ppl": "3.9", "wps": "363865", "ups": "3.07", "wpb": "118500", "bsz": "256", "num_updates": "839200", "lr": "0.000162424", "gnorm": "0.819", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 18:52:20,685][train_inner][INFO] - {"epoch": 17, "update": 16.309, "loss": "1.966", "ppl": "3.91", "wps": "361558", "ups": "3.06", "wpb": "118239", "bsz": "256", "num_updates": "839400", "lr": "0.000162222", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-04 18:52:40,088][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 18:53:25,931][train_inner][INFO] - {"epoch": 17, "update": 16.313, "loss": "1.963", "ppl": "3.9", "wps": "362508", "ups": "3.07", "wpb": "118255", "bsz": "256", "num_updates": "839600", "lr": "0.00016202", "gnorm": "0.824", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-04 18:54:31,462][train_inner][INFO] - {"epoch": 17, "update": 16.317, "loss": "1.963", "ppl": "3.9", "wps": "361315", "ups": "3.05", "wpb": "118385", "bsz": "256", "num_updates": "839800", "lr": "0.000161818", "gnorm": "0.819", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-04 18:55:36,417][train_inner][INFO] - {"epoch": 17, "update": 16.321, "loss": "1.962", "ppl": "3.9", "wps": "362956", "ups": "3.08", "wpb": "117871", "bsz": "256", "num_updates": "840000", "lr": "0.000161616", "gnorm": "0.826", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-04 18:56:41,145][train_inner][INFO] - {"epoch": 17, "update": 16.325, "loss": "1.964", "ppl": "3.9", "wps": "365885", "ups": "3.09", "wpb": "118413", "bsz": "256", "num_updates": "840200", "lr": "0.000161414", "gnorm": "0.82", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "0"} +[2022-08-04 18:57:46,612][train_inner][INFO] - {"epoch": 17, "update": 16.329, "loss": "1.968", "ppl": "3.91", "wps": "361810", "ups": "3.06", "wpb": "118430", "bsz": "256", "num_updates": "840400", "lr": "0.000161212", "gnorm": "0.819", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.5", "wall": "0"} +[2022-08-04 18:58:51,501][train_inner][INFO] - {"epoch": 17, "update": 16.333, "loss": "1.966", "ppl": "3.91", "wps": "364870", "ups": "3.08", "wpb": "118378", "bsz": "256", "num_updates": "840600", "lr": "0.00016101", "gnorm": "0.822", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 18:59:56,829][train_inner][INFO] - {"epoch": 17, "update": 16.336, "loss": "1.974", "ppl": "3.93", "wps": "362752", "ups": "3.06", "wpb": "118487", "bsz": "256", "num_updates": "840800", "lr": "0.000160808", "gnorm": "0.825", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 19:01:01,630][train_inner][INFO] - {"epoch": 17, "update": 16.34, "loss": "1.966", "ppl": "3.91", "wps": "362567", "ups": "3.09", "wpb": "117472", "bsz": "256", "num_updates": "841000", "lr": "0.000160606", "gnorm": "0.823", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "0"} +[2022-08-04 19:02:06,977][train_inner][INFO] - {"epoch": 17, "update": 16.344, "loss": "1.967", "ppl": "3.91", "wps": "362655", "ups": "3.06", "wpb": "118490", "bsz": "256", "num_updates": "841200", "lr": "0.000160404", "gnorm": "0.821", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 19:03:12,685][train_inner][INFO] - {"epoch": 17, "update": 16.348, "loss": "1.968", "ppl": "3.91", "wps": "359805", "ups": "3.04", "wpb": "118209", "bsz": "256", "num_updates": "841400", "lr": "0.000160202", "gnorm": "0.819", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 19:04:17,654][train_inner][INFO] - {"epoch": 17, "update": 16.352, "loss": "1.966", "ppl": "3.91", "wps": "364489", "ups": "3.08", "wpb": "118395", "bsz": "256", "num_updates": "841600", "lr": "0.00016", "gnorm": "0.822", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 19:05:22,686][train_inner][INFO] - {"epoch": 17, "update": 16.356, "loss": "1.967", "ppl": "3.91", "wps": "364615", "ups": "3.08", "wpb": "118556", "bsz": "256", "num_updates": "841800", "lr": "0.000159798", "gnorm": "0.821", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 19:06:28,155][train_inner][INFO] - {"epoch": 17, "update": 16.36, "loss": "1.966", "ppl": "3.91", "wps": "362098", "ups": "3.06", "wpb": "118521", "bsz": "256", "num_updates": "842000", "lr": "0.000159596", "gnorm": "0.821", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-04 19:07:33,855][train_inner][INFO] - {"epoch": 17, "update": 16.364, "loss": "1.959", "ppl": "3.89", "wps": "360807", "ups": "3.04", "wpb": "118523", "bsz": "256", "num_updates": "842200", "lr": "0.000159394", "gnorm": "0.819", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 19:08:39,524][train_inner][INFO] - {"epoch": 17, "update": 16.368, "loss": "1.958", "ppl": "3.89", "wps": "360118", "ups": "3.05", "wpb": "118241", "bsz": "256", "num_updates": "842400", "lr": "0.000159192", "gnorm": "0.823", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-04 19:09:44,881][train_inner][INFO] - {"epoch": 17, "update": 16.371, "loss": "1.967", "ppl": "3.91", "wps": "361311", "ups": "3.06", "wpb": "118068", "bsz": "256", "num_updates": "842600", "lr": "0.00015899", "gnorm": "0.826", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 19:10:50,173][train_inner][INFO] - {"epoch": 17, "update": 16.375, "loss": "1.961", "ppl": "3.89", "wps": "362883", "ups": "3.06", "wpb": "118466", "bsz": "256", "num_updates": "842800", "lr": "0.000158788", "gnorm": "0.818", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 19:11:55,754][train_inner][INFO] - {"epoch": 17, "update": 16.379, "loss": "1.962", "ppl": "3.9", "wps": "361477", "ups": "3.05", "wpb": "118527", "bsz": "256", "num_updates": "843000", "lr": "0.000158586", "gnorm": "0.825", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 19:13:01,002][train_inner][INFO] - {"epoch": 17, "update": 16.383, "loss": "1.968", "ppl": "3.91", "wps": "361215", "ups": "3.07", "wpb": "117842", "bsz": "256", "num_updates": "843200", "lr": "0.000158384", "gnorm": "0.827", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-04 19:14:06,238][train_inner][INFO] - {"epoch": 17, "update": 16.387, "loss": "1.962", "ppl": "3.9", "wps": "361400", "ups": "3.07", "wpb": "117879", "bsz": "256", "num_updates": "843400", "lr": "0.000158182", "gnorm": "0.825", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 19:14:58,524][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 19:15:11,729][train_inner][INFO] - {"epoch": 17, "update": 16.391, "loss": "1.966", "ppl": "3.91", "wps": "360166", "ups": "3.05", "wpb": "117932", "bsz": "256", "num_updates": "843600", "lr": "0.00015798", "gnorm": "0.821", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 19:16:16,862][train_inner][INFO] - {"epoch": 17, "update": 16.395, "loss": "1.969", "ppl": "3.92", "wps": "361611", "ups": "3.07", "wpb": "117763", "bsz": "256", "num_updates": "843800", "lr": "0.000157778", "gnorm": "0.824", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 19:17:22,370][train_inner][INFO] - {"epoch": 17, "update": 16.399, "loss": "1.96", "ppl": "3.89", "wps": "361692", "ups": "3.05", "wpb": "118457", "bsz": "256", "num_updates": "844000", "lr": "0.000157576", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 19:18:27,753][train_inner][INFO] - {"epoch": 17, "update": 16.402, "loss": "1.964", "ppl": "3.9", "wps": "363614", "ups": "3.06", "wpb": "118868", "bsz": "256", "num_updates": "844200", "lr": "0.000157374", "gnorm": "0.823", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.8", "wall": "0"} +[2022-08-04 19:19:33,003][train_inner][INFO] - {"epoch": 17, "update": 16.406, "loss": "1.963", "ppl": "3.9", "wps": "362323", "ups": "3.07", "wpb": "118208", "bsz": "256", "num_updates": "844400", "lr": "0.000157172", "gnorm": "0.823", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 19:20:38,535][train_inner][INFO] - {"epoch": 17, "update": 16.41, "loss": "1.963", "ppl": "3.9", "wps": "361182", "ups": "3.05", "wpb": "118342", "bsz": "256", "num_updates": "844600", "lr": "0.00015697", "gnorm": "0.826", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 19:21:44,023][train_inner][INFO] - {"epoch": 17, "update": 16.414, "loss": "1.956", "ppl": "3.88", "wps": "362820", "ups": "3.05", "wpb": "118799", "bsz": "256", "num_updates": "844800", "lr": "0.000156768", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-04 19:22:49,415][train_inner][INFO] - {"epoch": 17, "update": 16.418, "loss": "1.967", "ppl": "3.91", "wps": "361470", "ups": "3.06", "wpb": "118185", "bsz": "256", "num_updates": "845000", "lr": "0.000156566", "gnorm": "0.828", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-04 19:23:54,855][train_inner][INFO] - {"epoch": 17, "update": 16.422, "loss": "1.963", "ppl": "3.9", "wps": "361981", "ups": "3.06", "wpb": "118438", "bsz": "256", "num_updates": "845200", "lr": "0.000156364", "gnorm": "0.825", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-04 19:24:59,895][train_inner][INFO] - {"epoch": 17, "update": 16.426, "loss": "1.966", "ppl": "3.91", "wps": "364796", "ups": "3.08", "wpb": "118630", "bsz": "256", "num_updates": "845400", "lr": "0.000156162", "gnorm": "0.823", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 19:26:05,288][train_inner][INFO] - {"epoch": 17, "update": 16.43, "loss": "1.963", "ppl": "3.9", "wps": "361130", "ups": "3.06", "wpb": "118070", "bsz": "256", "num_updates": "845600", "lr": "0.00015596", "gnorm": "0.826", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 19:26:07,828][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 19:27:10,692][train_inner][INFO] - {"epoch": 17, "update": 16.434, "loss": "1.962", "ppl": "3.9", "wps": "363257", "ups": "3.06", "wpb": "118790", "bsz": "256", "num_updates": "845800", "lr": "0.000155758", "gnorm": "0.821", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-04 19:28:15,482][train_inner][INFO] - {"epoch": 17, "update": 16.437, "loss": "1.955", "ppl": "3.88", "wps": "364805", "ups": "3.09", "wpb": "118170", "bsz": "256", "num_updates": "846000", "lr": "0.000155556", "gnorm": "0.825", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "0"} +[2022-08-04 19:29:21,255][train_inner][INFO] - {"epoch": 17, "update": 16.441, "loss": "1.964", "ppl": "3.9", "wps": "359646", "ups": "3.04", "wpb": "118274", "bsz": "256", "num_updates": "846200", "lr": "0.000155354", "gnorm": "0.824", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "0"} +[2022-08-04 19:30:27,131][train_inner][INFO] - {"epoch": 17, "update": 16.445, "loss": "1.963", "ppl": "3.9", "wps": "359853", "ups": "3.04", "wpb": "118526", "bsz": "256", "num_updates": "846400", "lr": "0.000155152", "gnorm": "0.82", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.4", "wall": "0"} +[2022-08-04 19:30:30,084][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 19:31:32,948][train_inner][INFO] - {"epoch": 17, "update": 16.449, "loss": "1.963", "ppl": "3.9", "wps": "358303", "ups": "3.04", "wpb": "117909", "bsz": "256", "num_updates": "846600", "lr": "0.000154949", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 19:32:37,806][train_inner][INFO] - {"epoch": 17, "update": 16.453, "loss": "1.963", "ppl": "3.9", "wps": "362992", "ups": "3.08", "wpb": "117712", "bsz": "256", "num_updates": "846800", "lr": "0.000154747", "gnorm": "0.827", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 19:33:43,265][train_inner][INFO] - {"epoch": 17, "update": 16.457, "loss": "1.958", "ppl": "3.88", "wps": "359996", "ups": "3.06", "wpb": "117824", "bsz": "256", "num_updates": "847000", "lr": "0.000154545", "gnorm": "0.821", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 19:34:48,147][train_inner][INFO] - {"epoch": 17, "update": 16.461, "loss": "1.963", "ppl": "3.9", "wps": "363672", "ups": "3.08", "wpb": "117976", "bsz": "256", "num_updates": "847200", "lr": "0.000154343", "gnorm": "0.824", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 19:35:53,273][train_inner][INFO] - {"epoch": 17, "update": 16.465, "loss": "1.964", "ppl": "3.9", "wps": "362277", "ups": "3.07", "wpb": "117966", "bsz": "256", "num_updates": "847400", "lr": "0.000154141", "gnorm": "0.824", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-04 19:36:58,748][train_inner][INFO] - {"epoch": 17, "update": 16.469, "loss": "1.961", "ppl": "3.89", "wps": "361114", "ups": "3.05", "wpb": "118214", "bsz": "256", "num_updates": "847600", "lr": "0.000153939", "gnorm": "0.824", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.9", "wall": "0"} +[2022-08-04 19:38:04,228][train_inner][INFO] - {"epoch": 17, "update": 16.472, "loss": "1.958", "ppl": "3.88", "wps": "362888", "ups": "3.05", "wpb": "118807", "bsz": "256", "num_updates": "847800", "lr": "0.000153737", "gnorm": "0.82", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 19:39:09,260][train_inner][INFO] - {"epoch": 17, "update": 16.476, "loss": "1.958", "ppl": "3.88", "wps": "364209", "ups": "3.08", "wpb": "118415", "bsz": "256", "num_updates": "848000", "lr": "0.000153535", "gnorm": "0.823", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 19:40:15,126][train_inner][INFO] - {"epoch": 17, "update": 16.48, "loss": "1.958", "ppl": "3.88", "wps": "360470", "ups": "3.04", "wpb": "118713", "bsz": "256", "num_updates": "848200", "lr": "0.000153333", "gnorm": "0.826", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.2", "wall": "0"} +[2022-08-04 19:41:20,816][train_inner][INFO] - {"epoch": 17, "update": 16.484, "loss": "1.953", "ppl": "3.87", "wps": "361686", "ups": "3.04", "wpb": "118793", "bsz": "256", "num_updates": "848400", "lr": "0.000153131", "gnorm": "0.827", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-04 19:42:26,366][train_inner][INFO] - {"epoch": 17, "update": 16.488, "loss": "1.968", "ppl": "3.91", "wps": "359327", "ups": "3.05", "wpb": "117767", "bsz": "256", "num_updates": "848600", "lr": "0.000152929", "gnorm": "0.836", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "0"} +[2022-08-04 19:43:31,144][train_inner][INFO] - {"epoch": 17, "update": 16.492, "loss": "1.957", "ppl": "3.88", "wps": "365640", "ups": "3.09", "wpb": "118426", "bsz": "256", "num_updates": "848800", "lr": "0.000152727", "gnorm": "0.825", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "0"} +[2022-08-04 19:44:36,765][train_inner][INFO] - {"epoch": 17, "update": 16.496, "loss": "1.962", "ppl": "3.89", "wps": "360238", "ups": "3.05", "wpb": "118195", "bsz": "256", "num_updates": "849000", "lr": "0.000152525", "gnorm": "0.825", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 19:44:40,651][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 19:45:42,387][train_inner][INFO] - {"epoch": 17, "update": 16.5, "loss": "1.956", "ppl": "3.88", "wps": "359478", "ups": "3.05", "wpb": "117946", "bsz": "256", "num_updates": "849200", "lr": "0.000152323", "gnorm": "0.826", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 19:46:47,526][train_inner][INFO] - {"epoch": 17, "update": 16.504, "loss": "1.957", "ppl": "3.88", "wps": "362559", "ups": "3.07", "wpb": "118082", "bsz": "256", "num_updates": "849400", "lr": "0.000152121", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 19:47:53,001][train_inner][INFO] - {"epoch": 17, "update": 16.507, "loss": "1.956", "ppl": "3.88", "wps": "362715", "ups": "3.05", "wpb": "118736", "bsz": "256", "num_updates": "849600", "lr": "0.000151919", "gnorm": "0.821", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-04 19:48:58,276][train_inner][INFO] - {"epoch": 17, "update": 16.511, "loss": "1.962", "ppl": "3.9", "wps": "361557", "ups": "3.06", "wpb": "118001", "bsz": "256", "num_updates": "849800", "lr": "0.000151717", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 19:50:03,604][train_inner][INFO] - {"epoch": 17, "update": 16.515, "loss": "1.961", "ppl": "3.89", "wps": "361158", "ups": "3.06", "wpb": "117959", "bsz": "256", "num_updates": "850000", "lr": "0.000151515", "gnorm": "0.825", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-04 19:50:03,606][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-04 19:50:03,614][fairseq.tasks.fairseq_task][WARNING] - 576 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[2086, 18643, 32310, 47209, 49755, 37591, 48266, 19397, 19415, 38832] +[2022-08-04 19:50:26,988][valid][INFO] - {"epoch": 17, "valid_loss": "1.871", "valid_ppl": "3.66", "valid_wps": "1.57391e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "850000", "valid_best_loss": "1.871"} +[2022-08-04 19:50:26,991][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 17 @ 850000 updates +[2022-08-04 19:50:26,992][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_17_850000.pt +[2022-08-04 19:50:37,513][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_17_850000.pt +[2022-08-04 19:51:05,710][fairseq.checkpoint_utils][INFO] - Saved checkpoint /vit-opt/fairseq/bert/adan2/checkpoint_17_850000.pt (epoch 17 @ 850000 updates, score 1.871) (writing took 38.71909079598845 seconds) +[2022-08-04 19:52:10,341][train_inner][INFO] - {"epoch": 17, "update": 16.519, "loss": "1.959", "ppl": "3.89", "wps": "186444", "ups": "1.58", "wpb": "118145", "bsz": "256", "num_updates": "850200", "lr": "0.000151313", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.8", "wall": "0"} +[2022-08-04 19:53:15,210][train_inner][INFO] - {"epoch": 17, "update": 16.523, "loss": "1.959", "ppl": "3.89", "wps": "363804", "ups": "3.08", "wpb": "117996", "bsz": "256", "num_updates": "850400", "lr": "0.000151111", "gnorm": "0.829", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "0"} +[2022-08-04 19:54:20,008][train_inner][INFO] - {"epoch": 17, "update": 16.527, "loss": "1.962", "ppl": "3.9", "wps": "364739", "ups": "3.09", "wpb": "118170", "bsz": "256", "num_updates": "850600", "lr": "0.000150909", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.8", "wall": "0"} +[2022-08-04 19:55:25,280][train_inner][INFO] - {"epoch": 17, "update": 16.531, "loss": "1.957", "ppl": "3.88", "wps": "360862", "ups": "3.06", "wpb": "117769", "bsz": "256", "num_updates": "850800", "lr": "0.000150707", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 19:56:30,546][train_inner][INFO] - {"epoch": 17, "update": 16.535, "loss": "1.958", "ppl": "3.89", "wps": "365598", "ups": "3.06", "wpb": "119305", "bsz": "256", "num_updates": "851000", "lr": "0.000150505", "gnorm": "0.827", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 19:57:36,167][train_inner][INFO] - {"epoch": 17, "update": 16.538, "loss": "1.955", "ppl": "3.88", "wps": "361713", "ups": "3.05", "wpb": "118677", "bsz": "256", "num_updates": "851200", "lr": "0.000150303", "gnorm": "0.825", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-04 19:58:41,759][train_inner][INFO] - {"epoch": 17, "update": 16.542, "loss": "1.963", "ppl": "3.9", "wps": "360747", "ups": "3.05", "wpb": "118306", "bsz": "256", "num_updates": "851400", "lr": "0.000150101", "gnorm": "0.827", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 19:59:46,923][train_inner][INFO] - {"epoch": 17, "update": 16.546, "loss": "1.961", "ppl": "3.89", "wps": "362964", "ups": "3.07", "wpb": "118258", "bsz": "256", "num_updates": "851600", "lr": "0.000149899", "gnorm": "0.829", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.4", "wall": "0"} +[2022-08-04 20:00:52,559][train_inner][INFO] - {"epoch": 17, "update": 16.55, "loss": "1.953", "ppl": "3.87", "wps": "361419", "ups": "3.05", "wpb": "118600", "bsz": "256", "num_updates": "851800", "lr": "0.000149697", "gnorm": "0.828", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.7", "wall": "0"} +[2022-08-04 20:01:57,633][train_inner][INFO] - {"epoch": 17, "update": 16.554, "loss": "1.964", "ppl": "3.9", "wps": "363294", "ups": "3.07", "wpb": "118204", "bsz": "256", "num_updates": "852000", "lr": "0.000149495", "gnorm": "0.829", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 20:03:03,110][train_inner][INFO] - {"epoch": 17, "update": 16.558, "loss": "1.957", "ppl": "3.88", "wps": "362489", "ups": "3.05", "wpb": "118671", "bsz": "256", "num_updates": "852200", "lr": "0.000149293", "gnorm": "0.824", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 20:04:08,071][train_inner][INFO] - {"epoch": 17, "update": 16.562, "loss": "1.956", "ppl": "3.88", "wps": "366566", "ups": "3.08", "wpb": "119060", "bsz": "256", "num_updates": "852400", "lr": "0.000149091", "gnorm": "0.826", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 20:05:13,384][train_inner][INFO] - {"epoch": 17, "update": 16.566, "loss": "1.96", "ppl": "3.89", "wps": "363525", "ups": "3.06", "wpb": "118712", "bsz": "256", "num_updates": "852600", "lr": "0.000148889", "gnorm": "0.833", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "0"} +[2022-08-04 20:06:18,714][train_inner][INFO] - {"epoch": 17, "update": 16.57, "loss": "1.957", "ppl": "3.88", "wps": "359963", "ups": "3.06", "wpb": "117581", "bsz": "256", "num_updates": "852800", "lr": "0.000148687", "gnorm": "0.83", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "0"} +[2022-08-04 20:07:23,794][train_inner][INFO] - {"epoch": 17, "update": 16.573, "loss": "1.956", "ppl": "3.88", "wps": "364154", "ups": "3.07", "wpb": "118493", "bsz": "256", "num_updates": "853000", "lr": "0.000148485", "gnorm": "0.826", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 20:07:58,760][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 20:08:29,065][train_inner][INFO] - {"epoch": 17, "update": 16.577, "loss": "1.963", "ppl": "3.9", "wps": "361734", "ups": "3.06", "wpb": "118052", "bsz": "256", "num_updates": "853200", "lr": "0.000148283", "gnorm": "0.832", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 20:09:34,532][train_inner][INFO] - {"epoch": 17, "update": 16.581, "loss": "1.956", "ppl": "3.88", "wps": "360761", "ups": "3.06", "wpb": "118088", "bsz": "256", "num_updates": "853400", "lr": "0.000148081", "gnorm": "0.831", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 20:10:39,758][train_inner][INFO] - {"epoch": 17, "update": 16.585, "loss": "1.958", "ppl": "3.89", "wps": "359622", "ups": "3.07", "wpb": "117277", "bsz": "256", "num_updates": "853600", "lr": "0.000147879", "gnorm": "0.835", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 20:11:05,137][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 20:11:45,151][train_inner][INFO] - {"epoch": 17, "update": 16.589, "loss": "1.958", "ppl": "3.88", "wps": "360844", "ups": "3.06", "wpb": "117971", "bsz": "256", "num_updates": "853800", "lr": "0.000147677", "gnorm": "0.829", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 20:12:50,169][train_inner][INFO] - {"epoch": 17, "update": 16.593, "loss": "1.958", "ppl": "3.88", "wps": "362945", "ups": "3.08", "wpb": "117989", "bsz": "256", "num_updates": "854000", "lr": "0.000147475", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26", "wall": "0"} +[2022-08-04 20:13:55,011][train_inner][INFO] - {"epoch": 17, "update": 16.597, "loss": "1.954", "ppl": "3.87", "wps": "366718", "ups": "3.08", "wpb": "118892", "bsz": "256", "num_updates": "854200", "lr": "0.000147273", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "0"} +[2022-08-04 20:15:00,180][train_inner][INFO] - {"epoch": 17, "update": 16.601, "loss": "1.951", "ppl": "3.87", "wps": "363269", "ups": "3.07", "wpb": "118368", "bsz": "256", "num_updates": "854400", "lr": "0.000147071", "gnorm": "0.831", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 20:16:05,361][train_inner][INFO] - {"epoch": 17, "update": 16.605, "loss": "1.954", "ppl": "3.87", "wps": "363215", "ups": "3.07", "wpb": "118370", "bsz": "256", "num_updates": "854600", "lr": "0.000146869", "gnorm": "0.829", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-04 20:17:10,679][train_inner][INFO] - {"epoch": 17, "update": 16.608, "loss": "1.96", "ppl": "3.89", "wps": "363230", "ups": "3.06", "wpb": "118625", "bsz": "256", "num_updates": "854800", "lr": "0.000146667", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "0"} +[2022-08-04 20:18:15,882][train_inner][INFO] - {"epoch": 17, "update": 16.612, "loss": "1.953", "ppl": "3.87", "wps": "363124", "ups": "3.07", "wpb": "118384", "bsz": "256", "num_updates": "855000", "lr": "0.000146465", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 20:19:21,017][train_inner][INFO] - {"epoch": 17, "update": 16.616, "loss": "1.951", "ppl": "3.87", "wps": "363639", "ups": "3.07", "wpb": "118424", "bsz": "256", "num_updates": "855200", "lr": "0.000146263", "gnorm": "0.833", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.1", "wall": "0"} +[2022-08-04 20:20:26,752][train_inner][INFO] - {"epoch": 17, "update": 16.62, "loss": "1.953", "ppl": "3.87", "wps": "360112", "ups": "3.04", "wpb": "118359", "bsz": "256", "num_updates": "855400", "lr": "0.000146061", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-04 20:21:32,182][train_inner][INFO] - {"epoch": 17, "update": 16.624, "loss": "1.956", "ppl": "3.88", "wps": "361872", "ups": "3.06", "wpb": "118380", "bsz": "256", "num_updates": "855600", "lr": "0.000145859", "gnorm": "0.832", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "0"} +[2022-08-04 20:22:14,476][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 20:22:37,733][train_inner][INFO] - {"epoch": 17, "update": 16.628, "loss": "1.952", "ppl": "3.87", "wps": "361048", "ups": "3.05", "wpb": "118324", "bsz": "256", "num_updates": "855800", "lr": "0.000145657", "gnorm": "0.829", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27", "wall": "0"} +[2022-08-04 20:23:43,035][train_inner][INFO] - {"epoch": 17, "update": 16.632, "loss": "1.956", "ppl": "3.88", "wps": "361507", "ups": "3.06", "wpb": "118036", "bsz": "256", "num_updates": "856000", "lr": "0.000145455", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 20:24:48,048][train_inner][INFO] - {"epoch": 17, "update": 16.636, "loss": "1.955", "ppl": "3.88", "wps": "361445", "ups": "3.08", "wpb": "117489", "bsz": "256", "num_updates": "856200", "lr": "0.000145253", "gnorm": "0.831", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "0"} +[2022-08-04 20:25:53,512][train_inner][INFO] - {"epoch": 17, "update": 16.64, "loss": "1.953", "ppl": "3.87", "wps": "361738", "ups": "3.06", "wpb": "118402", "bsz": "256", "num_updates": "856400", "lr": "0.000145051", "gnorm": "0.829", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-04 20:26:58,736][train_inner][INFO] - {"epoch": 17, "update": 16.643, "loss": "1.945", "ppl": "3.85", "wps": "364537", "ups": "3.07", "wpb": "118880", "bsz": "256", "num_updates": "856600", "lr": "0.000144848", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-04 20:28:03,924][train_inner][INFO] - {"epoch": 17, "update": 16.647, "loss": "1.958", "ppl": "3.89", "wps": "361130", "ups": "3.07", "wpb": "117706", "bsz": "256", "num_updates": "856800", "lr": "0.000144646", "gnorm": "0.834", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 20:29:08,907][train_inner][INFO] - {"epoch": 17, "update": 16.651, "loss": "1.957", "ppl": "3.88", "wps": "361721", "ups": "3.08", "wpb": "117526", "bsz": "256", "num_updates": "857000", "lr": "0.000144444", "gnorm": "0.835", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28", "wall": "0"} +[2022-08-04 20:30:14,154][train_inner][INFO] - {"epoch": 17, "update": 16.655, "loss": "1.955", "ppl": "3.88", "wps": "363011", "ups": "3.07", "wpb": "118424", "bsz": "256", "num_updates": "857200", "lr": "0.000144242", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-04 20:31:19,304][train_inner][INFO] - {"epoch": 17, "update": 16.659, "loss": "1.954", "ppl": "3.87", "wps": "362222", "ups": "3.07", "wpb": "117993", "bsz": "256", "num_updates": "857400", "lr": "0.00014404", "gnorm": "0.832", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.1", "wall": "0"} +[2022-08-04 20:32:24,276][train_inner][INFO] - {"epoch": 17, "update": 16.663, "loss": "1.953", "ppl": "3.87", "wps": "363329", "ups": "3.08", "wpb": "118024", "bsz": "256", "num_updates": "857600", "lr": "0.000143838", "gnorm": "0.83", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 20:33:29,660][train_inner][INFO] - {"epoch": 17, "update": 16.667, "loss": "1.96", "ppl": "3.89", "wps": "360590", "ups": "3.06", "wpb": "117872", "bsz": "256", "num_updates": "857800", "lr": "0.000143636", "gnorm": "0.831", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 20:33:46,353][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 20:34:35,275][train_inner][INFO] - {"epoch": 17, "update": 16.671, "loss": "1.951", "ppl": "3.87", "wps": "358878", "ups": "3.05", "wpb": "117738", "bsz": "256", "num_updates": "858000", "lr": "0.000143434", "gnorm": "0.831", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-04 20:35:40,683][train_inner][INFO] - {"epoch": 17, "update": 16.675, "loss": "1.95", "ppl": "3.86", "wps": "361908", "ups": "3.06", "wpb": "118357", "bsz": "256", "num_updates": "858200", "lr": "0.000143232", "gnorm": "0.833", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "0"} +[2022-08-04 20:36:45,877][train_inner][INFO] - {"epoch": 17, "update": 16.678, "loss": "1.957", "ppl": "3.88", "wps": "361335", "ups": "3.07", "wpb": "117782", "bsz": "256", "num_updates": "858400", "lr": "0.00014303", "gnorm": "0.832", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 20:37:51,331][train_inner][INFO] - {"epoch": 17, "update": 16.682, "loss": "1.952", "ppl": "3.87", "wps": "362585", "ups": "3.06", "wpb": "118661", "bsz": "256", "num_updates": "858600", "lr": "0.000142828", "gnorm": "0.831", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 20:38:56,758][train_inner][INFO] - {"epoch": 17, "update": 16.686, "loss": "1.96", "ppl": "3.89", "wps": "362216", "ups": "3.06", "wpb": "118490", "bsz": "256", "num_updates": "858800", "lr": "0.000142626", "gnorm": "0.836", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "0"} +[2022-08-04 20:40:02,000][train_inner][INFO] - {"epoch": 17, "update": 16.69, "loss": "1.949", "ppl": "3.86", "wps": "364768", "ups": "3.07", "wpb": "118989", "bsz": "256", "num_updates": "859000", "lr": "0.000142424", "gnorm": "0.832", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 20:41:07,511][train_inner][INFO] - {"epoch": 17, "update": 16.694, "loss": "1.952", "ppl": "3.87", "wps": "361422", "ups": "3.05", "wpb": "118384", "bsz": "256", "num_updates": "859200", "lr": "0.000142222", "gnorm": "0.831", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-04 20:42:12,693][train_inner][INFO] - {"epoch": 17, "update": 16.698, "loss": "1.958", "ppl": "3.89", "wps": "363038", "ups": "3.07", "wpb": "118316", "bsz": "256", "num_updates": "859400", "lr": "0.00014202", "gnorm": "0.836", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 20:43:18,056][train_inner][INFO] - {"epoch": 17, "update": 16.702, "loss": "1.959", "ppl": "3.89", "wps": "362125", "ups": "3.06", "wpb": "118341", "bsz": "256", "num_updates": "859600", "lr": "0.000141818", "gnorm": "0.833", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 20:44:23,282][train_inner][INFO] - {"epoch": 17, "update": 16.706, "loss": "1.952", "ppl": "3.87", "wps": "361049", "ups": "3.07", "wpb": "117739", "bsz": "256", "num_updates": "859800", "lr": "0.000141616", "gnorm": "0.835", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 20:45:28,507][train_inner][INFO] - {"epoch": 17, "update": 16.709, "loss": "1.954", "ppl": "3.88", "wps": "363887", "ups": "3.07", "wpb": "118670", "bsz": "256", "num_updates": "860000", "lr": "0.000141414", "gnorm": "0.83", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 20:46:33,932][train_inner][INFO] - {"epoch": 17, "update": 16.713, "loss": "1.958", "ppl": "3.88", "wps": "362072", "ups": "3.06", "wpb": "118441", "bsz": "256", "num_updates": "860200", "lr": "0.000141212", "gnorm": "0.834", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 20:47:39,529][train_inner][INFO] - {"epoch": 17, "update": 16.717, "loss": "1.948", "ppl": "3.86", "wps": "361511", "ups": "3.05", "wpb": "118569", "bsz": "256", "num_updates": "860400", "lr": "0.00014101", "gnorm": "0.833", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 20:48:44,602][train_inner][INFO] - {"epoch": 17, "update": 16.721, "loss": "1.954", "ppl": "3.88", "wps": "361974", "ups": "3.07", "wpb": "117770", "bsz": "256", "num_updates": "860600", "lr": "0.000140808", "gnorm": "0.838", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.5", "wall": "0"} +[2022-08-04 20:49:49,592][train_inner][INFO] - {"epoch": 17, "update": 16.725, "loss": "1.958", "ppl": "3.88", "wps": "362409", "ups": "3.08", "wpb": "117764", "bsz": "256", "num_updates": "860800", "lr": "0.000140606", "gnorm": "0.837", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "0"} +[2022-08-04 20:50:54,801][train_inner][INFO] - {"epoch": 17, "update": 16.729, "loss": "1.947", "ppl": "3.86", "wps": "362279", "ups": "3.07", "wpb": "118117", "bsz": "256", "num_updates": "861000", "lr": "0.000140404", "gnorm": "0.832", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "0"} +[2022-08-04 20:52:00,218][train_inner][INFO] - {"epoch": 17, "update": 16.733, "loss": "1.95", "ppl": "3.86", "wps": "363479", "ups": "3.06", "wpb": "118886", "bsz": "256", "num_updates": "861200", "lr": "0.000140202", "gnorm": "0.834", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 20:52:21,295][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 20:53:05,478][train_inner][INFO] - {"epoch": 17, "update": 16.737, "loss": "1.954", "ppl": "3.87", "wps": "359624", "ups": "3.06", "wpb": "117343", "bsz": "256", "num_updates": "861400", "lr": "0.00014", "gnorm": "0.837", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "0"} +[2022-08-04 20:54:10,598][train_inner][INFO] - {"epoch": 17, "update": 16.741, "loss": "1.949", "ppl": "3.86", "wps": "363713", "ups": "3.07", "wpb": "118425", "bsz": "256", "num_updates": "861600", "lr": "0.000139798", "gnorm": "0.836", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 20:55:15,992][train_inner][INFO] - {"epoch": 17, "update": 16.744, "loss": "1.948", "ppl": "3.86", "wps": "364722", "ups": "3.06", "wpb": "119237", "bsz": "256", "num_updates": "861800", "lr": "0.000139596", "gnorm": "0.828", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 20:56:21,449][train_inner][INFO] - {"epoch": 17, "update": 16.748, "loss": "1.946", "ppl": "3.85", "wps": "362698", "ups": "3.06", "wpb": "118704", "bsz": "256", "num_updates": "862000", "lr": "0.000139394", "gnorm": "0.833", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 20:57:26,518][train_inner][INFO] - {"epoch": 17, "update": 16.752, "loss": "1.949", "ppl": "3.86", "wps": "363789", "ups": "3.07", "wpb": "118356", "bsz": "256", "num_updates": "862200", "lr": "0.000139192", "gnorm": "0.835", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 20:58:31,636][train_inner][INFO] - {"epoch": 17, "update": 16.756, "loss": "1.946", "ppl": "3.85", "wps": "362551", "ups": "3.07", "wpb": "118040", "bsz": "256", "num_updates": "862400", "lr": "0.00013899", "gnorm": "0.842", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 20:59:36,706][train_inner][INFO] - {"epoch": 17, "update": 16.76, "loss": "1.95", "ppl": "3.86", "wps": "363676", "ups": "3.07", "wpb": "118320", "bsz": "256", "num_updates": "862600", "lr": "0.000138788", "gnorm": "0.834", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.3", "wall": "0"} +[2022-08-04 21:00:42,178][train_inner][INFO] - {"epoch": 17, "update": 16.764, "loss": "1.952", "ppl": "3.87", "wps": "360406", "ups": "3.05", "wpb": "117981", "bsz": "256", "num_updates": "862800", "lr": "0.000138586", "gnorm": "0.837", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.9", "wall": "0"} +[2022-08-04 21:01:47,371][train_inner][INFO] - {"epoch": 17, "update": 16.768, "loss": "1.948", "ppl": "3.86", "wps": "363995", "ups": "3.07", "wpb": "118647", "bsz": "256", "num_updates": "863000", "lr": "0.000138384", "gnorm": "0.842", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 21:02:52,604][train_inner][INFO] - {"epoch": 17, "update": 16.772, "loss": "1.954", "ppl": "3.87", "wps": "363330", "ups": "3.07", "wpb": "118503", "bsz": "256", "num_updates": "863200", "lr": "0.000138182", "gnorm": "0.834", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-04 21:03:57,983][train_inner][INFO] - {"epoch": 17, "update": 16.776, "loss": "1.953", "ppl": "3.87", "wps": "361853", "ups": "3.06", "wpb": "118286", "bsz": "256", "num_updates": "863400", "lr": "0.00013798", "gnorm": "0.834", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 21:05:03,312][train_inner][INFO] - {"epoch": 17, "update": 16.779, "loss": "1.95", "ppl": "3.86", "wps": "361929", "ups": "3.06", "wpb": "118222", "bsz": "256", "num_updates": "863600", "lr": "0.000137778", "gnorm": "0.834", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 21:06:08,727][train_inner][INFO] - {"epoch": 17, "update": 16.783, "loss": "1.948", "ppl": "3.86", "wps": "360986", "ups": "3.06", "wpb": "118055", "bsz": "256", "num_updates": "863800", "lr": "0.000137576", "gnorm": "0.835", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 21:07:13,917][train_inner][INFO] - {"epoch": 17, "update": 16.787, "loss": "1.949", "ppl": "3.86", "wps": "364327", "ups": "3.07", "wpb": "118750", "bsz": "256", "num_updates": "864000", "lr": "0.000137374", "gnorm": "0.832", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 21:08:19,088][train_inner][INFO] - {"epoch": 17, "update": 16.791, "loss": "1.954", "ppl": "3.88", "wps": "362553", "ups": "3.07", "wpb": "118138", "bsz": "256", "num_updates": "864200", "lr": "0.000137172", "gnorm": "0.839", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 21:09:23,807][train_inner][INFO] - {"epoch": 17, "update": 16.795, "loss": "1.952", "ppl": "3.87", "wps": "363499", "ups": "3.09", "wpb": "117623", "bsz": "256", "num_updates": "864400", "lr": "0.00013697", "gnorm": "0.838", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "0"} +[2022-08-04 21:10:29,108][train_inner][INFO] - {"epoch": 17, "update": 16.799, "loss": "1.95", "ppl": "3.87", "wps": "362241", "ups": "3.06", "wpb": "118273", "bsz": "256", "num_updates": "864600", "lr": "0.000136768", "gnorm": "0.835", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 21:11:34,669][train_inner][INFO] - {"epoch": 17, "update": 16.803, "loss": "1.948", "ppl": "3.86", "wps": "361440", "ups": "3.05", "wpb": "118479", "bsz": "256", "num_updates": "864800", "lr": "0.000136566", "gnorm": "0.834", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 21:12:39,783][train_inner][INFO] - {"epoch": 17, "update": 16.807, "loss": "1.956", "ppl": "3.88", "wps": "362683", "ups": "3.07", "wpb": "118078", "bsz": "256", "num_updates": "865000", "lr": "0.000136364", "gnorm": "0.84", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 21:13:45,250][train_inner][INFO] - {"epoch": 17, "update": 16.81, "loss": "1.945", "ppl": "3.85", "wps": "361482", "ups": "3.06", "wpb": "118323", "bsz": "256", "num_updates": "865200", "lr": "0.000136162", "gnorm": "0.835", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 21:14:37,818][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 21:14:50,933][train_inner][INFO] - {"epoch": 17, "update": 16.814, "loss": "1.948", "ppl": "3.86", "wps": "361856", "ups": "3.04", "wpb": "118837", "bsz": "256", "num_updates": "865400", "lr": "0.00013596", "gnorm": "0.836", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 21:15:55,943][train_inner][INFO] - {"epoch": 17, "update": 16.818, "loss": "1.953", "ppl": "3.87", "wps": "365031", "ups": "3.08", "wpb": "118653", "bsz": "256", "num_updates": "865600", "lr": "0.000135758", "gnorm": "0.84", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 21:17:01,034][train_inner][INFO] - {"epoch": 17, "update": 16.822, "loss": "1.952", "ppl": "3.87", "wps": "362474", "ups": "3.07", "wpb": "117954", "bsz": "256", "num_updates": "865800", "lr": "0.000135556", "gnorm": "0.837", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 21:18:06,245][train_inner][INFO] - {"epoch": 17, "update": 16.826, "loss": "1.944", "ppl": "3.85", "wps": "363277", "ups": "3.07", "wpb": "118445", "bsz": "256", "num_updates": "866000", "lr": "0.000135354", "gnorm": "0.835", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 21:19:11,336][train_inner][INFO] - {"epoch": 17, "update": 16.83, "loss": "1.95", "ppl": "3.86", "wps": "361877", "ups": "3.07", "wpb": "117773", "bsz": "256", "num_updates": "866200", "lr": "0.000135152", "gnorm": "0.839", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.1", "wall": "0"} +[2022-08-04 21:20:16,819][train_inner][INFO] - {"epoch": 17, "update": 16.834, "loss": "1.946", "ppl": "3.85", "wps": "361611", "ups": "3.05", "wpb": "118395", "bsz": "256", "num_updates": "866400", "lr": "0.000134949", "gnorm": "0.834", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 21:21:22,028][train_inner][INFO] - {"epoch": 17, "update": 16.838, "loss": "1.944", "ppl": "3.85", "wps": "363202", "ups": "3.07", "wpb": "118419", "bsz": "256", "num_updates": "866600", "lr": "0.000134747", "gnorm": "0.835", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 21:22:27,084][train_inner][INFO] - {"epoch": 17, "update": 16.842, "loss": "1.95", "ppl": "3.86", "wps": "363536", "ups": "3.07", "wpb": "118248", "bsz": "256", "num_updates": "866800", "lr": "0.000134545", "gnorm": "0.837", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 21:23:32,255][train_inner][INFO] - {"epoch": 17, "update": 16.845, "loss": "1.945", "ppl": "3.85", "wps": "363199", "ups": "3.07", "wpb": "118349", "bsz": "256", "num_updates": "867000", "lr": "0.000134343", "gnorm": "0.837", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-04 21:24:37,235][train_inner][INFO] - {"epoch": 17, "update": 16.849, "loss": "1.948", "ppl": "3.86", "wps": "364198", "ups": "3.08", "wpb": "118326", "bsz": "256", "num_updates": "867200", "lr": "0.000134141", "gnorm": "0.842", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 21:25:42,319][train_inner][INFO] - {"epoch": 17, "update": 16.853, "loss": "1.951", "ppl": "3.87", "wps": "363406", "ups": "3.07", "wpb": "118258", "bsz": "256", "num_updates": "867400", "lr": "0.000133939", "gnorm": "0.84", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-04 21:25:47,769][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 21:26:48,207][train_inner][INFO] - {"epoch": 17, "update": 16.857, "loss": "1.949", "ppl": "3.86", "wps": "362156", "ups": "3.04", "wpb": "119307", "bsz": "256", "num_updates": "867600", "lr": "0.000133737", "gnorm": "0.833", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "22.7", "wall": "0"} +[2022-08-04 21:27:53,673][train_inner][INFO] - {"epoch": 17, "update": 16.861, "loss": "1.942", "ppl": "3.84", "wps": "362472", "ups": "3.06", "wpb": "118646", "bsz": "255.9", "num_updates": "867800", "lr": "0.000133535", "gnorm": "0.833", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 21:28:41,105][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 21:28:59,425][train_inner][INFO] - {"epoch": 17, "update": 16.865, "loss": "1.951", "ppl": "3.87", "wps": "359179", "ups": "3.04", "wpb": "118069", "bsz": "256", "num_updates": "868000", "lr": "0.000133333", "gnorm": "0.842", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 21:30:04,920][train_inner][INFO] - {"epoch": 17, "update": 16.869, "loss": "1.948", "ppl": "3.86", "wps": "361912", "ups": "3.05", "wpb": "118516", "bsz": "256", "num_updates": "868200", "lr": "0.000133131", "gnorm": "0.836", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 21:31:09,910][train_inner][INFO] - {"epoch": 17, "update": 16.873, "loss": "1.945", "ppl": "3.85", "wps": "362886", "ups": "3.08", "wpb": "117918", "bsz": "256", "num_updates": "868400", "lr": "0.000132929", "gnorm": "0.839", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-04 21:32:15,493][train_inner][INFO] - {"epoch": 17, "update": 16.877, "loss": "1.944", "ppl": "3.85", "wps": "360172", "ups": "3.05", "wpb": "118104", "bsz": "256", "num_updates": "868600", "lr": "0.000132727", "gnorm": "0.837", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 21:33:20,712][train_inner][INFO] - {"epoch": 17, "update": 16.88, "loss": "1.944", "ppl": "3.85", "wps": "363285", "ups": "3.07", "wpb": "118464", "bsz": "256", "num_updates": "868800", "lr": "0.000132525", "gnorm": "0.838", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 21:34:26,153][train_inner][INFO] - {"epoch": 17, "update": 16.884, "loss": "1.946", "ppl": "3.85", "wps": "361366", "ups": "3.06", "wpb": "118238", "bsz": "256", "num_updates": "869000", "lr": "0.000132323", "gnorm": "0.841", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 21:35:31,333][train_inner][INFO] - {"epoch": 17, "update": 16.888, "loss": "1.941", "ppl": "3.84", "wps": "364509", "ups": "3.07", "wpb": "118792", "bsz": "256", "num_updates": "869200", "lr": "0.000132121", "gnorm": "0.835", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 21:36:36,742][train_inner][INFO] - {"epoch": 17, "update": 16.892, "loss": "1.95", "ppl": "3.86", "wps": "360480", "ups": "3.06", "wpb": "117890", "bsz": "256", "num_updates": "869400", "lr": "0.000131919", "gnorm": "0.843", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-04 21:37:42,133][train_inner][INFO] - {"epoch": 17, "update": 16.896, "loss": "1.947", "ppl": "3.86", "wps": "363382", "ups": "3.06", "wpb": "118809", "bsz": "256", "num_updates": "869600", "lr": "0.000131717", "gnorm": "0.838", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 21:38:47,476][train_inner][INFO] - {"epoch": 17, "update": 16.9, "loss": "1.938", "ppl": "3.83", "wps": "364138", "ups": "3.06", "wpb": "118967", "bsz": "256", "num_updates": "869800", "lr": "0.000131515", "gnorm": "0.834", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-04 21:39:53,110][train_inner][INFO] - {"epoch": 17, "update": 16.904, "loss": "1.944", "ppl": "3.85", "wps": "361208", "ups": "3.05", "wpb": "118524", "bsz": "256", "num_updates": "870000", "lr": "0.000131313", "gnorm": "0.841", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 21:40:58,804][train_inner][INFO] - {"epoch": 17, "update": 16.908, "loss": "1.943", "ppl": "3.84", "wps": "361245", "ups": "3.04", "wpb": "118657", "bsz": "256", "num_updates": "870200", "lr": "0.000131111", "gnorm": "0.839", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 21:42:04,187][train_inner][INFO] - {"epoch": 17, "update": 16.912, "loss": "1.942", "ppl": "3.84", "wps": "361596", "ups": "3.06", "wpb": "118209", "bsz": "256", "num_updates": "870400", "lr": "0.000130909", "gnorm": "0.834", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.3", "wall": "0"} +[2022-08-04 21:43:09,732][train_inner][INFO] - {"epoch": 17, "update": 16.915, "loss": "1.947", "ppl": "3.86", "wps": "360438", "ups": "3.05", "wpb": "118123", "bsz": "256", "num_updates": "870600", "lr": "0.000130707", "gnorm": "0.839", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-04 21:44:14,725][train_inner][INFO] - {"epoch": 17, "update": 16.919, "loss": "1.945", "ppl": "3.85", "wps": "362911", "ups": "3.08", "wpb": "117930", "bsz": "256", "num_updates": "870800", "lr": "0.000130505", "gnorm": "0.843", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.1", "wall": "0"} +[2022-08-04 21:45:19,525][train_inner][INFO] - {"epoch": 17, "update": 16.923, "loss": "1.946", "ppl": "3.85", "wps": "364589", "ups": "3.09", "wpb": "118125", "bsz": "256", "num_updates": "871000", "lr": "0.000130303", "gnorm": "0.843", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "0"} +[2022-08-04 21:46:24,712][train_inner][INFO] - {"epoch": 17, "update": 16.927, "loss": "1.951", "ppl": "3.87", "wps": "361846", "ups": "3.07", "wpb": "117936", "bsz": "256", "num_updates": "871200", "lr": "0.000130101", "gnorm": "0.844", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "0"} +[2022-08-04 21:47:30,199][train_inner][INFO] - {"epoch": 17, "update": 16.931, "loss": "1.944", "ppl": "3.85", "wps": "360610", "ups": "3.05", "wpb": "118075", "bsz": "256", "num_updates": "871400", "lr": "0.000129899", "gnorm": "0.841", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 21:47:30,494][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 21:48:35,752][train_inner][INFO] - {"epoch": 17, "update": 16.935, "loss": "1.943", "ppl": "3.84", "wps": "360653", "ups": "3.05", "wpb": "118207", "bsz": "256", "num_updates": "871600", "lr": "0.000129697", "gnorm": "0.84", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-04 21:49:40,962][train_inner][INFO] - {"epoch": 17, "update": 16.939, "loss": "1.945", "ppl": "3.85", "wps": "362356", "ups": "3.07", "wpb": "118143", "bsz": "256", "num_updates": "871800", "lr": "0.000129495", "gnorm": "0.844", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 21:50:45,956][train_inner][INFO] - {"epoch": 17, "update": 16.943, "loss": "1.946", "ppl": "3.85", "wps": "363839", "ups": "3.08", "wpb": "118222", "bsz": "256", "num_updates": "872000", "lr": "0.000129293", "gnorm": "0.842", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 21:51:51,506][train_inner][INFO] - {"epoch": 17, "update": 16.947, "loss": "1.941", "ppl": "3.84", "wps": "361639", "ups": "3.05", "wpb": "118525", "bsz": "256", "num_updates": "872200", "lr": "0.000129091", "gnorm": "0.839", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 21:52:56,645][train_inner][INFO] - {"epoch": 17, "update": 16.95, "loss": "1.945", "ppl": "3.85", "wps": "364338", "ups": "3.07", "wpb": "118661", "bsz": "256", "num_updates": "872400", "lr": "0.000128889", "gnorm": "0.842", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-04 21:54:01,651][train_inner][INFO] - {"epoch": 17, "update": 16.954, "loss": "1.95", "ppl": "3.86", "wps": "361465", "ups": "3.08", "wpb": "117486", "bsz": "256", "num_updates": "872600", "lr": "0.000128687", "gnorm": "0.851", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 21:55:06,842][train_inner][INFO] - {"epoch": 17, "update": 16.958, "loss": "1.948", "ppl": "3.86", "wps": "362144", "ups": "3.07", "wpb": "118041", "bsz": "256", "num_updates": "872800", "lr": "0.000128485", "gnorm": "0.841", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 21:55:08,164][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-04 21:56:12,583][train_inner][INFO] - {"epoch": 17, "update": 16.962, "loss": "1.95", "ppl": "3.86", "wps": "359900", "ups": "3.04", "wpb": "118297", "bsz": "256", "num_updates": "873000", "lr": "0.000128283", "gnorm": "0.845", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 21:57:17,708][train_inner][INFO] - {"epoch": 17, "update": 16.966, "loss": "1.941", "ppl": "3.84", "wps": "363441", "ups": "3.07", "wpb": "118344", "bsz": "256", "num_updates": "873200", "lr": "0.000128081", "gnorm": "0.845", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-04 21:58:23,057][train_inner][INFO] - {"epoch": 17, "update": 16.97, "loss": "1.945", "ppl": "3.85", "wps": "362937", "ups": "3.06", "wpb": "118586", "bsz": "256", "num_updates": "873400", "lr": "0.000127879", "gnorm": "0.842", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.6", "wall": "0"} +[2022-08-04 21:59:28,183][train_inner][INFO] - {"epoch": 17, "update": 16.974, "loss": "1.945", "ppl": "3.85", "wps": "363985", "ups": "3.07", "wpb": "118522", "bsz": "256", "num_updates": "873600", "lr": "0.000127677", "gnorm": "0.845", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-04 22:00:33,162][train_inner][INFO] - {"epoch": 17, "update": 16.978, "loss": "1.946", "ppl": "3.85", "wps": "364449", "ups": "3.08", "wpb": "118406", "bsz": "256", "num_updates": "873800", "lr": "0.000127475", "gnorm": "0.843", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 22:01:38,384][train_inner][INFO] - {"epoch": 17, "update": 16.982, "loss": "1.943", "ppl": "3.84", "wps": "363055", "ups": "3.07", "wpb": "118376", "bsz": "256", "num_updates": "874000", "lr": "0.000127273", "gnorm": "0.844", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-04 22:02:43,746][train_inner][INFO] - {"epoch": 17, "update": 16.985, "loss": "1.939", "ppl": "3.83", "wps": "362374", "ups": "3.06", "wpb": "118425", "bsz": "256", "num_updates": "874200", "lr": "0.000127071", "gnorm": "0.846", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25", "wall": "0"} +[2022-08-04 22:03:49,041][train_inner][INFO] - {"epoch": 17, "update": 16.989, "loss": "1.94", "ppl": "3.84", "wps": "362543", "ups": "3.06", "wpb": "118361", "bsz": "256", "num_updates": "874400", "lr": "0.000126869", "gnorm": "0.841", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 22:04:53,904][train_inner][INFO] - {"epoch": 17, "update": 16.993, "loss": "1.944", "ppl": "3.85", "wps": "365151", "ups": "3.08", "wpb": "118421", "bsz": "256", "num_updates": "874600", "lr": "0.000126667", "gnorm": "0.843", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:05:59,076][train_inner][INFO] - {"epoch": 17, "update": 16.997, "loss": "1.941", "ppl": "3.84", "wps": "360615", "ups": "3.07", "wpb": "117509", "bsz": "256", "num_updates": "874800", "lr": "0.000126465", "gnorm": "0.849", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-04 22:06:48,735][fairseq_cli.train][INFO] - end of epoch 17 (average epoch stats below) +[2022-08-04 22:06:48,736][train][INFO] - {"epoch": 17, "train_loss": "1.959", "train_ppl": "3.89", "train_wps": "360688", "train_ups": "3.05", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "874952", "train_lr": "0.000126311", "train_gnorm": "0.827", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16712", "train_gb_free": "20.5", "train_wall": "0"} +[2022-08-04 22:06:48,832][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-04 22:06:48,835][fairseq.trainer][INFO] - begin training epoch 18 +[2022-08-04 22:06:48,835][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-04 22:07:16,341][train_inner][INFO] - {"epoch": 18, "update": 17.001, "loss": "1.94", "ppl": "3.84", "wps": "305842", "ups": "2.59", "wpb": "118153", "bsz": "255.4", "num_updates": "875000", "lr": "0.000126263", "gnorm": "0.847", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-04 22:08:21,640][train_inner][INFO] - {"epoch": 18, "update": 17.005, "loss": "1.943", "ppl": "3.85", "wps": "361150", "ups": "3.06", "wpb": "117912", "bsz": "256", "num_updates": "875200", "lr": "0.000126061", "gnorm": "0.848", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 22:09:26,777][train_inner][INFO] - {"epoch": 18, "update": 17.009, "loss": "1.942", "ppl": "3.84", "wps": "364674", "ups": "3.07", "wpb": "118766", "bsz": "256", "num_updates": "875400", "lr": "0.000125859", "gnorm": "0.84", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-04 22:10:32,109][train_inner][INFO] - {"epoch": 18, "update": 17.013, "loss": "1.939", "ppl": "3.83", "wps": "363149", "ups": "3.06", "wpb": "118625", "bsz": "256", "num_updates": "875600", "lr": "0.000125657", "gnorm": "0.847", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:11:37,234][train_inner][INFO] - {"epoch": 18, "update": 17.016, "loss": "1.938", "ppl": "3.83", "wps": "364080", "ups": "3.07", "wpb": "118552", "bsz": "256", "num_updates": "875800", "lr": "0.000125455", "gnorm": "0.84", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 22:12:42,152][train_inner][INFO] - {"epoch": 18, "update": 17.02, "loss": "1.937", "ppl": "3.83", "wps": "364400", "ups": "3.08", "wpb": "118268", "bsz": "256", "num_updates": "876000", "lr": "0.000125253", "gnorm": "0.846", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-04 22:13:47,619][train_inner][INFO] - {"epoch": 18, "update": 17.024, "loss": "1.939", "ppl": "3.83", "wps": "362617", "ups": "3.06", "wpb": "118695", "bsz": "256", "num_updates": "876200", "lr": "0.000125051", "gnorm": "0.846", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 22:14:53,172][train_inner][INFO] - {"epoch": 18, "update": 17.028, "loss": "1.935", "ppl": "3.82", "wps": "364574", "ups": "3.05", "wpb": "119492", "bsz": "256", "num_updates": "876400", "lr": "0.000124848", "gnorm": "0.839", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.1", "wall": "0"} +[2022-08-04 22:15:58,271][train_inner][INFO] - {"epoch": 18, "update": 17.032, "loss": "1.939", "ppl": "3.84", "wps": "363283", "ups": "3.07", "wpb": "118245", "bsz": "256", "num_updates": "876600", "lr": "0.000124646", "gnorm": "0.844", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 22:17:03,774][train_inner][INFO] - {"epoch": 18, "update": 17.036, "loss": "1.942", "ppl": "3.84", "wps": "362171", "ups": "3.05", "wpb": "118614", "bsz": "256", "num_updates": "876800", "lr": "0.000124444", "gnorm": "0.842", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.2", "wall": "0"} +[2022-08-04 22:18:08,966][train_inner][INFO] - {"epoch": 18, "update": 17.04, "loss": "1.943", "ppl": "3.85", "wps": "362244", "ups": "3.07", "wpb": "118076", "bsz": "256", "num_updates": "877000", "lr": "0.000124242", "gnorm": "0.848", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 22:19:13,939][train_inner][INFO] - {"epoch": 18, "update": 17.044, "loss": "1.941", "ppl": "3.84", "wps": "361673", "ups": "3.08", "wpb": "117493", "bsz": "256", "num_updates": "877200", "lr": "0.00012404", "gnorm": "0.855", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-04 22:20:19,600][train_inner][INFO] - {"epoch": 18, "update": 17.048, "loss": "1.938", "ppl": "3.83", "wps": "358623", "ups": "3.05", "wpb": "117735", "bsz": "256", "num_updates": "877400", "lr": "0.000123838", "gnorm": "0.845", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 22:21:24,553][train_inner][INFO] - {"epoch": 18, "update": 17.051, "loss": "1.94", "ppl": "3.84", "wps": "364348", "ups": "3.08", "wpb": "118326", "bsz": "256", "num_updates": "877600", "lr": "0.000123636", "gnorm": "0.848", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 22:22:27,504][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 22:22:30,412][train_inner][INFO] - {"epoch": 18, "update": 17.055, "loss": "1.941", "ppl": "3.84", "wps": "360249", "ups": "3.04", "wpb": "118627", "bsz": "256", "num_updates": "877800", "lr": "0.000123434", "gnorm": "0.841", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.1", "wall": "0"} +[2022-08-04 22:23:35,573][train_inner][INFO] - {"epoch": 18, "update": 17.059, "loss": "1.938", "ppl": "3.83", "wps": "364535", "ups": "3.07", "wpb": "118765", "bsz": "256", "num_updates": "878000", "lr": "0.000123232", "gnorm": "0.843", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 22:24:40,709][train_inner][INFO] - {"epoch": 18, "update": 17.063, "loss": "1.94", "ppl": "3.84", "wps": "362349", "ups": "3.07", "wpb": "118008", "bsz": "256", "num_updates": "878200", "lr": "0.00012303", "gnorm": "0.845", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-04 22:25:45,476][train_inner][INFO] - {"epoch": 18, "update": 17.067, "loss": "1.938", "ppl": "3.83", "wps": "367067", "ups": "3.09", "wpb": "118868", "bsz": "256", "num_updates": "878400", "lr": "0.000122828", "gnorm": "0.844", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "0"} +[2022-08-04 22:26:50,663][train_inner][INFO] - {"epoch": 18, "update": 17.071, "loss": "1.938", "ppl": "3.83", "wps": "361821", "ups": "3.07", "wpb": "117928", "bsz": "256", "num_updates": "878600", "lr": "0.000122626", "gnorm": "0.846", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:27:55,848][train_inner][INFO] - {"epoch": 18, "update": 17.075, "loss": "1.938", "ppl": "3.83", "wps": "363350", "ups": "3.07", "wpb": "118422", "bsz": "256", "num_updates": "878800", "lr": "0.000122424", "gnorm": "0.844", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 22:29:01,047][train_inner][INFO] - {"epoch": 18, "update": 17.079, "loss": "1.938", "ppl": "3.83", "wps": "363289", "ups": "3.07", "wpb": "118427", "bsz": "256", "num_updates": "879000", "lr": "0.000122222", "gnorm": "0.848", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:30:06,540][train_inner][INFO] - {"epoch": 18, "update": 17.083, "loss": "1.934", "ppl": "3.82", "wps": "362260", "ups": "3.05", "wpb": "118626", "bsz": "256", "num_updates": "879200", "lr": "0.00012202", "gnorm": "0.843", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 22:31:11,665][train_inner][INFO] - {"epoch": 18, "update": 17.086, "loss": "1.936", "ppl": "3.83", "wps": "364852", "ups": "3.07", "wpb": "118803", "bsz": "256", "num_updates": "879400", "lr": "0.000121818", "gnorm": "0.847", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-04 22:32:16,925][train_inner][INFO] - {"epoch": 18, "update": 17.09, "loss": "1.933", "ppl": "3.82", "wps": "363236", "ups": "3.06", "wpb": "118521", "bsz": "256", "num_updates": "879600", "lr": "0.000121616", "gnorm": "0.842", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-04 22:33:22,170][train_inner][INFO] - {"epoch": 18, "update": 17.094, "loss": "1.935", "ppl": "3.83", "wps": "361840", "ups": "3.07", "wpb": "118040", "bsz": "256", "num_updates": "879800", "lr": "0.000121414", "gnorm": "0.849", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27", "wall": "0"} +[2022-08-04 22:34:27,504][train_inner][INFO] - {"epoch": 18, "update": 17.098, "loss": "1.94", "ppl": "3.84", "wps": "360325", "ups": "3.06", "wpb": "117693", "bsz": "256", "num_updates": "880000", "lr": "0.000121212", "gnorm": "0.857", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-04 22:35:32,709][train_inner][INFO] - {"epoch": 18, "update": 17.102, "loss": "1.942", "ppl": "3.84", "wps": "361966", "ups": "3.07", "wpb": "118008", "bsz": "256", "num_updates": "880200", "lr": "0.00012101", "gnorm": "0.849", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:36:37,890][train_inner][INFO] - {"epoch": 18, "update": 17.106, "loss": "1.938", "ppl": "3.83", "wps": "361495", "ups": "3.07", "wpb": "117810", "bsz": "256", "num_updates": "880400", "lr": "0.000120808", "gnorm": "0.846", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-04 22:37:43,266][train_inner][INFO] - {"epoch": 18, "update": 17.11, "loss": "1.937", "ppl": "3.83", "wps": "363442", "ups": "3.06", "wpb": "118799", "bsz": "256", "num_updates": "880600", "lr": "0.000120606", "gnorm": "0.846", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 22:38:48,755][train_inner][INFO] - {"epoch": 18, "update": 17.114, "loss": "1.936", "ppl": "3.83", "wps": "361570", "ups": "3.05", "wpb": "118393", "bsz": "256", "num_updates": "880800", "lr": "0.000120404", "gnorm": "0.844", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-04 22:39:53,867][train_inner][INFO] - {"epoch": 18, "update": 17.117, "loss": "1.936", "ppl": "3.83", "wps": "363788", "ups": "3.07", "wpb": "118433", "bsz": "256", "num_updates": "881000", "lr": "0.000120202", "gnorm": "0.846", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 22:40:59,015][train_inner][INFO] - {"epoch": 18, "update": 17.121, "loss": "1.936", "ppl": "3.83", "wps": "364422", "ups": "3.07", "wpb": "118705", "bsz": "256", "num_updates": "881200", "lr": "0.00012", "gnorm": "0.842", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 22:42:04,050][train_inner][INFO] - {"epoch": 18, "update": 17.125, "loss": "1.939", "ppl": "3.83", "wps": "363203", "ups": "3.08", "wpb": "118101", "bsz": "256", "num_updates": "881400", "lr": "0.000119798", "gnorm": "0.848", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-04 22:43:09,388][train_inner][INFO] - {"epoch": 18, "update": 17.129, "loss": "1.939", "ppl": "3.83", "wps": "362111", "ups": "3.06", "wpb": "118297", "bsz": "256", "num_updates": "881600", "lr": "0.000119596", "gnorm": "0.848", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 22:44:14,565][train_inner][INFO] - {"epoch": 18, "update": 17.133, "loss": "1.941", "ppl": "3.84", "wps": "363021", "ups": "3.07", "wpb": "118301", "bsz": "256", "num_updates": "881800", "lr": "0.000119394", "gnorm": "0.849", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:44:43,329][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 22:45:19,462][train_inner][INFO] - {"epoch": 18, "update": 17.137, "loss": "1.942", "ppl": "3.84", "wps": "362844", "ups": "3.08", "wpb": "117723", "bsz": "256", "num_updates": "882000", "lr": "0.000119192", "gnorm": "0.85", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:46:24,413][train_inner][INFO] - {"epoch": 18, "update": 17.141, "loss": "1.94", "ppl": "3.84", "wps": "363113", "ups": "3.08", "wpb": "117921", "bsz": "256", "num_updates": "882200", "lr": "0.00011899", "gnorm": "0.854", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 22:47:29,566][train_inner][INFO] - {"epoch": 18, "update": 17.145, "loss": "1.934", "ppl": "3.82", "wps": "362304", "ups": "3.07", "wpb": "118025", "bsz": "256", "num_updates": "882400", "lr": "0.000118788", "gnorm": "0.848", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-04 22:48:34,792][train_inner][INFO] - {"epoch": 18, "update": 17.149, "loss": "1.931", "ppl": "3.81", "wps": "363649", "ups": "3.07", "wpb": "118594", "bsz": "256", "num_updates": "882600", "lr": "0.000118586", "gnorm": "0.846", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.4", "wall": "0"} +[2022-08-04 22:49:39,780][train_inner][INFO] - {"epoch": 18, "update": 17.152, "loss": "1.938", "ppl": "3.83", "wps": "365408", "ups": "3.08", "wpb": "118735", "bsz": "256", "num_updates": "882800", "lr": "0.000118384", "gnorm": "0.863", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-04 22:50:45,152][train_inner][INFO] - {"epoch": 18, "update": 17.156, "loss": "1.937", "ppl": "3.83", "wps": "362442", "ups": "3.06", "wpb": "118464", "bsz": "256", "num_updates": "883000", "lr": "0.000118182", "gnorm": "0.847", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-04 22:51:50,484][train_inner][INFO] - {"epoch": 18, "update": 17.16, "loss": "1.935", "ppl": "3.82", "wps": "361748", "ups": "3.06", "wpb": "118168", "bsz": "256", "num_updates": "883200", "lr": "0.00011798", "gnorm": "0.847", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 22:52:55,685][train_inner][INFO] - {"epoch": 18, "update": 17.164, "loss": "1.941", "ppl": "3.84", "wps": "362591", "ups": "3.07", "wpb": "118204", "bsz": "256", "num_updates": "883400", "lr": "0.000117778", "gnorm": "0.851", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 22:54:00,772][train_inner][INFO] - {"epoch": 18, "update": 17.168, "loss": "1.947", "ppl": "3.86", "wps": "362006", "ups": "3.07", "wpb": "117808", "bsz": "256", "num_updates": "883600", "lr": "0.000117576", "gnorm": "0.854", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 22:55:06,105][train_inner][INFO] - {"epoch": 18, "update": 17.172, "loss": "1.936", "ppl": "3.83", "wps": "361988", "ups": "3.06", "wpb": "118246", "bsz": "256", "num_updates": "883800", "lr": "0.000117374", "gnorm": "0.849", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 22:55:52,473][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 22:56:11,605][train_inner][INFO] - {"epoch": 18, "update": 17.176, "loss": "1.937", "ppl": "3.83", "wps": "359665", "ups": "3.05", "wpb": "117775", "bsz": "256", "num_updates": "884000", "lr": "0.000117172", "gnorm": "0.853", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-04 22:57:16,361][train_inner][INFO] - {"epoch": 18, "update": 17.18, "loss": "1.937", "ppl": "3.83", "wps": "364996", "ups": "3.09", "wpb": "118179", "bsz": "256", "num_updates": "884200", "lr": "0.00011697", "gnorm": "0.85", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "0"} +[2022-08-04 22:58:21,617][train_inner][INFO] - {"epoch": 18, "update": 17.184, "loss": "1.94", "ppl": "3.84", "wps": "361831", "ups": "3.06", "wpb": "118055", "bsz": "256", "num_updates": "884400", "lr": "0.000116768", "gnorm": "0.85", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 22:59:26,823][train_inner][INFO] - {"epoch": 18, "update": 17.187, "loss": "1.94", "ppl": "3.84", "wps": "361743", "ups": "3.07", "wpb": "117937", "bsz": "256", "num_updates": "884600", "lr": "0.000116566", "gnorm": "0.855", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-04 23:00:31,966][train_inner][INFO] - {"epoch": 18, "update": 17.191, "loss": "1.933", "ppl": "3.82", "wps": "362740", "ups": "3.07", "wpb": "118149", "bsz": "256", "num_updates": "884800", "lr": "0.000116364", "gnorm": "0.852", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-04 23:01:05,556][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 23:01:37,783][train_inner][INFO] - {"epoch": 18, "update": 17.195, "loss": "1.934", "ppl": "3.82", "wps": "362125", "ups": "3.04", "wpb": "119167", "bsz": "256", "num_updates": "885000", "lr": "0.000116162", "gnorm": "0.845", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 23:02:42,974][train_inner][INFO] - {"epoch": 18, "update": 17.199, "loss": "1.936", "ppl": "3.83", "wps": "364026", "ups": "3.07", "wpb": "118656", "bsz": "256", "num_updates": "885200", "lr": "0.00011596", "gnorm": "0.85", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.1", "wall": "0"} +[2022-08-04 23:03:48,051][train_inner][INFO] - {"epoch": 18, "update": 17.203, "loss": "1.938", "ppl": "3.83", "wps": "363585", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "885400", "lr": "0.000115758", "gnorm": "0.854", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 23:04:53,440][train_inner][INFO] - {"epoch": 18, "update": 17.207, "loss": "1.933", "ppl": "3.82", "wps": "361786", "ups": "3.06", "wpb": "118283", "bsz": "256", "num_updates": "885600", "lr": "0.000115556", "gnorm": "0.85", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-04 23:05:58,477][train_inner][INFO] - {"epoch": 18, "update": 17.211, "loss": "1.937", "ppl": "3.83", "wps": "363890", "ups": "3.08", "wpb": "118329", "bsz": "256", "num_updates": "885800", "lr": "0.000115354", "gnorm": "0.852", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 23:07:03,510][train_inner][INFO] - {"epoch": 18, "update": 17.215, "loss": "1.938", "ppl": "3.83", "wps": "362496", "ups": "3.08", "wpb": "117859", "bsz": "256", "num_updates": "886000", "lr": "0.000115152", "gnorm": "0.854", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-04 23:08:08,416][train_inner][INFO] - {"epoch": 18, "update": 17.219, "loss": "1.934", "ppl": "3.82", "wps": "363076", "ups": "3.08", "wpb": "117826", "bsz": "256", "num_updates": "886200", "lr": "0.000114949", "gnorm": "0.855", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-04 23:09:13,850][train_inner][INFO] - {"epoch": 18, "update": 17.222, "loss": "1.94", "ppl": "3.84", "wps": "359647", "ups": "3.06", "wpb": "117662", "bsz": "256", "num_updates": "886400", "lr": "0.000114747", "gnorm": "0.855", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 23:10:19,101][train_inner][INFO] - {"epoch": 18, "update": 17.226, "loss": "1.935", "ppl": "3.82", "wps": "361698", "ups": "3.07", "wpb": "118004", "bsz": "256", "num_updates": "886600", "lr": "0.000114545", "gnorm": "0.853", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 23:11:24,310][train_inner][INFO] - {"epoch": 18, "update": 17.23, "loss": "1.936", "ppl": "3.83", "wps": "363697", "ups": "3.07", "wpb": "118579", "bsz": "256", "num_updates": "886800", "lr": "0.000114343", "gnorm": "0.854", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-04 23:12:29,307][train_inner][INFO] - {"epoch": 18, "update": 17.234, "loss": "1.933", "ppl": "3.82", "wps": "363591", "ups": "3.08", "wpb": "118160", "bsz": "256", "num_updates": "887000", "lr": "0.000114141", "gnorm": "0.851", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-04 23:13:34,332][train_inner][INFO] - {"epoch": 18, "update": 17.238, "loss": "1.933", "ppl": "3.82", "wps": "363878", "ups": "3.08", "wpb": "118303", "bsz": "256", "num_updates": "887200", "lr": "0.000113939", "gnorm": "0.854", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.3", "wall": "0"} +[2022-08-04 23:14:39,577][train_inner][INFO] - {"epoch": 18, "update": 17.242, "loss": "1.94", "ppl": "3.84", "wps": "361882", "ups": "3.07", "wpb": "118052", "bsz": "256", "num_updates": "887400", "lr": "0.000113737", "gnorm": "0.855", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 23:15:44,975][train_inner][INFO] - {"epoch": 18, "update": 17.246, "loss": "1.935", "ppl": "3.82", "wps": "360233", "ups": "3.06", "wpb": "117792", "bsz": "256", "num_updates": "887600", "lr": "0.000113535", "gnorm": "0.855", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 23:16:50,478][train_inner][INFO] - {"epoch": 18, "update": 17.25, "loss": "1.935", "ppl": "3.82", "wps": "359279", "ups": "3.05", "wpb": "117666", "bsz": "256", "num_updates": "887800", "lr": "0.000113333", "gnorm": "0.853", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 23:17:55,365][train_inner][INFO] - {"epoch": 18, "update": 17.253, "loss": "1.941", "ppl": "3.84", "wps": "362714", "ups": "3.08", "wpb": "117664", "bsz": "256", "num_updates": "888000", "lr": "0.000113131", "gnorm": "0.86", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.2", "wall": "0"} +[2022-08-04 23:19:00,993][train_inner][INFO] - {"epoch": 18, "update": 17.257, "loss": "1.936", "ppl": "3.83", "wps": "360981", "ups": "3.05", "wpb": "118449", "bsz": "256", "num_updates": "888200", "lr": "0.000112929", "gnorm": "0.852", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-04 23:20:06,019][train_inner][INFO] - {"epoch": 18, "update": 17.261, "loss": "1.932", "ppl": "3.82", "wps": "365972", "ups": "3.08", "wpb": "118988", "bsz": "256", "num_updates": "888400", "lr": "0.000112727", "gnorm": "0.848", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 23:20:11,971][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 23:21:11,624][train_inner][INFO] - {"epoch": 18, "update": 17.265, "loss": "1.928", "ppl": "3.81", "wps": "361148", "ups": "3.05", "wpb": "118463", "bsz": "256", "num_updates": "888600", "lr": "0.000112525", "gnorm": "0.851", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.2", "wall": "0"} +[2022-08-04 23:22:17,061][train_inner][INFO] - {"epoch": 18, "update": 17.269, "loss": "1.927", "ppl": "3.8", "wps": "362603", "ups": "3.06", "wpb": "118637", "bsz": "256", "num_updates": "888800", "lr": "0.000112323", "gnorm": "0.846", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-04 23:23:22,495][train_inner][INFO] - {"epoch": 18, "update": 17.273, "loss": "1.934", "ppl": "3.82", "wps": "361840", "ups": "3.06", "wpb": "118380", "bsz": "256", "num_updates": "889000", "lr": "0.000112121", "gnorm": "0.855", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-04 23:24:27,631][train_inner][INFO] - {"epoch": 18, "update": 17.277, "loss": "1.929", "ppl": "3.81", "wps": "362960", "ups": "3.07", "wpb": "118207", "bsz": "256", "num_updates": "889200", "lr": "0.000111919", "gnorm": "0.853", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-04 23:25:32,866][train_inner][INFO] - {"epoch": 18, "update": 17.281, "loss": "1.941", "ppl": "3.84", "wps": "362491", "ups": "3.07", "wpb": "118233", "bsz": "256", "num_updates": "889400", "lr": "0.000111717", "gnorm": "0.859", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 23:26:37,537][train_inner][INFO] - {"epoch": 18, "update": 17.285, "loss": "1.932", "ppl": "3.82", "wps": "365620", "ups": "3.09", "wpb": "118224", "bsz": "256", "num_updates": "889600", "lr": "0.000111515", "gnorm": "0.855", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "0"} +[2022-08-04 23:27:42,719][train_inner][INFO] - {"epoch": 18, "update": 17.288, "loss": "1.935", "ppl": "3.82", "wps": "363195", "ups": "3.07", "wpb": "118367", "bsz": "256", "num_updates": "889800", "lr": "0.000111313", "gnorm": "0.852", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-04 23:28:48,009][train_inner][INFO] - {"epoch": 18, "update": 17.292, "loss": "1.93", "ppl": "3.81", "wps": "361435", "ups": "3.06", "wpb": "117976", "bsz": "256", "num_updates": "890000", "lr": "0.000111111", "gnorm": "0.855", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 23:29:53,433][train_inner][INFO] - {"epoch": 18, "update": 17.296, "loss": "1.935", "ppl": "3.82", "wps": "360578", "ups": "3.06", "wpb": "117951", "bsz": "256", "num_updates": "890200", "lr": "0.000110909", "gnorm": "0.857", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 23:30:58,703][train_inner][INFO] - {"epoch": 18, "update": 17.3, "loss": "1.933", "ppl": "3.82", "wps": "363273", "ups": "3.06", "wpb": "118551", "bsz": "256", "num_updates": "890400", "lr": "0.000110707", "gnorm": "0.866", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-04 23:32:03,396][train_inner][INFO] - {"epoch": 18, "update": 17.304, "loss": "1.937", "ppl": "3.83", "wps": "362785", "ups": "3.09", "wpb": "117348", "bsz": "256", "num_updates": "890600", "lr": "0.000110505", "gnorm": "0.866", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "0"} +[2022-08-04 23:33:08,916][train_inner][INFO] - {"epoch": 18, "update": 17.308, "loss": "1.933", "ppl": "3.82", "wps": "360179", "ups": "3.05", "wpb": "117992", "bsz": "256", "num_updates": "890800", "lr": "0.000110303", "gnorm": "0.863", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 23:33:19,724][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 23:34:14,443][train_inner][INFO] - {"epoch": 18, "update": 17.312, "loss": "1.934", "ppl": "3.82", "wps": "360886", "ups": "3.05", "wpb": "118236", "bsz": "256", "num_updates": "891000", "lr": "0.000110101", "gnorm": "0.852", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "0"} +[2022-08-04 23:34:55,534][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-04 23:35:20,209][train_inner][INFO] - {"epoch": 18, "update": 17.316, "loss": "1.935", "ppl": "3.82", "wps": "359575", "ups": "3.04", "wpb": "118238", "bsz": "256", "num_updates": "891200", "lr": "0.000109899", "gnorm": "0.854", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25", "wall": "0"} +[2022-08-04 23:36:25,319][train_inner][INFO] - {"epoch": 18, "update": 17.32, "loss": "1.93", "ppl": "3.81", "wps": "363381", "ups": "3.07", "wpb": "118297", "bsz": "256", "num_updates": "891400", "lr": "0.000109697", "gnorm": "0.856", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 23:37:30,539][train_inner][INFO] - {"epoch": 18, "update": 17.323, "loss": "1.934", "ppl": "3.82", "wps": "363211", "ups": "3.07", "wpb": "118442", "bsz": "256", "num_updates": "891600", "lr": "0.000109495", "gnorm": "0.856", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 23:38:36,076][train_inner][INFO] - {"epoch": 18, "update": 17.327, "loss": "1.931", "ppl": "3.81", "wps": "361098", "ups": "3.05", "wpb": "118325", "bsz": "256", "num_updates": "891800", "lr": "0.000109293", "gnorm": "0.855", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-04 23:39:40,720][train_inner][INFO] - {"epoch": 18, "update": 17.331, "loss": "1.934", "ppl": "3.82", "wps": "363704", "ups": "3.09", "wpb": "117545", "bsz": "256", "num_updates": "892000", "lr": "0.000109091", "gnorm": "0.854", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "0"} +[2022-08-04 23:40:46,097][train_inner][INFO] - {"epoch": 18, "update": 17.335, "loss": "1.922", "ppl": "3.79", "wps": "361839", "ups": "3.06", "wpb": "118276", "bsz": "256", "num_updates": "892200", "lr": "0.000108889", "gnorm": "0.859", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-04 23:41:51,125][train_inner][INFO] - {"epoch": 18, "update": 17.339, "loss": "1.931", "ppl": "3.81", "wps": "362893", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "892400", "lr": "0.000108687", "gnorm": "0.857", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-04 23:42:56,292][train_inner][INFO] - {"epoch": 18, "update": 17.343, "loss": "1.931", "ppl": "3.81", "wps": "362339", "ups": "3.07", "wpb": "118061", "bsz": "256", "num_updates": "892600", "lr": "0.000108485", "gnorm": "0.855", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-04 23:44:01,343][train_inner][INFO] - {"epoch": 18, "update": 17.347, "loss": "1.931", "ppl": "3.81", "wps": "363956", "ups": "3.07", "wpb": "118377", "bsz": "256", "num_updates": "892800", "lr": "0.000108283", "gnorm": "0.853", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-04 23:45:06,563][train_inner][INFO] - {"epoch": 18, "update": 17.351, "loss": "1.926", "ppl": "3.8", "wps": "362649", "ups": "3.07", "wpb": "118257", "bsz": "256", "num_updates": "893000", "lr": "0.000108081", "gnorm": "0.854", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-04 23:46:12,163][train_inner][INFO] - {"epoch": 18, "update": 17.355, "loss": "1.936", "ppl": "3.83", "wps": "360689", "ups": "3.05", "wpb": "118304", "bsz": "256", "num_updates": "893200", "lr": "0.000107879", "gnorm": "0.857", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-04 23:47:17,473][train_inner][INFO] - {"epoch": 18, "update": 17.358, "loss": "1.927", "ppl": "3.8", "wps": "363639", "ups": "3.06", "wpb": "118745", "bsz": "256", "num_updates": "893400", "lr": "0.000107677", "gnorm": "0.852", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-04 23:48:22,824][train_inner][INFO] - {"epoch": 18, "update": 17.362, "loss": "1.932", "ppl": "3.81", "wps": "361650", "ups": "3.06", "wpb": "118169", "bsz": "256", "num_updates": "893600", "lr": "0.000107475", "gnorm": "0.857", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-04 23:49:28,129][train_inner][INFO] - {"epoch": 18, "update": 17.366, "loss": "1.929", "ppl": "3.81", "wps": "363442", "ups": "3.06", "wpb": "118672", "bsz": "256", "num_updates": "893800", "lr": "0.000107273", "gnorm": "0.849", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-04 23:50:33,414][train_inner][INFO] - {"epoch": 18, "update": 17.37, "loss": "1.931", "ppl": "3.81", "wps": "362273", "ups": "3.06", "wpb": "118238", "bsz": "256", "num_updates": "894000", "lr": "0.000107071", "gnorm": "0.857", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.6", "wall": "0"} +[2022-08-04 23:51:38,200][train_inner][INFO] - {"epoch": 18, "update": 17.374, "loss": "1.933", "ppl": "3.82", "wps": "363377", "ups": "3.09", "wpb": "117708", "bsz": "256", "num_updates": "894200", "lr": "0.000106869", "gnorm": "0.857", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "0"} +[2022-08-04 23:52:06,128][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-04 23:52:08,336][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-04 23:52:43,868][train_inner][INFO] - {"epoch": 18, "update": 17.378, "loss": "1.931", "ppl": "3.81", "wps": "360352", "ups": "3.05", "wpb": "118316", "bsz": "256", "num_updates": "894400", "lr": "0.000106667", "gnorm": "0.893", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 23:53:49,154][train_inner][INFO] - {"epoch": 18, "update": 17.382, "loss": "1.929", "ppl": "3.81", "wps": "362406", "ups": "3.06", "wpb": "118298", "bsz": "256", "num_updates": "894600", "lr": "0.000106465", "gnorm": "0.858", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-04 23:54:54,196][train_inner][INFO] - {"epoch": 18, "update": 17.386, "loss": "1.928", "ppl": "3.8", "wps": "363151", "ups": "3.07", "wpb": "118098", "bsz": "256", "num_updates": "894800", "lr": "0.000106263", "gnorm": "0.852", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-04 23:55:59,492][train_inner][INFO] - {"epoch": 18, "update": 17.39, "loss": "1.928", "ppl": "3.81", "wps": "362177", "ups": "3.06", "wpb": "118241", "bsz": "256", "num_updates": "895000", "lr": "0.000106061", "gnorm": "0.859", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-04 23:57:04,882][train_inner][INFO] - {"epoch": 18, "update": 17.393, "loss": "1.928", "ppl": "3.81", "wps": "361050", "ups": "3.06", "wpb": "118043", "bsz": "256", "num_updates": "895200", "lr": "0.000105859", "gnorm": "0.858", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-04 23:58:10,273][train_inner][INFO] - {"epoch": 18, "update": 17.397, "loss": "1.935", "ppl": "3.82", "wps": "359386", "ups": "3.06", "wpb": "117501", "bsz": "256", "num_updates": "895400", "lr": "0.000105657", "gnorm": "0.862", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-04 23:59:15,715][train_inner][INFO] - {"epoch": 18, "update": 17.401, "loss": "1.925", "ppl": "3.8", "wps": "364234", "ups": "3.06", "wpb": "119180", "bsz": "256", "num_updates": "895600", "lr": "0.000105455", "gnorm": "0.854", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 00:00:21,457][train_inner][INFO] - {"epoch": 18, "update": 17.405, "loss": "1.918", "ppl": "3.78", "wps": "359161", "ups": "3.04", "wpb": "118058", "bsz": "256", "num_updates": "895800", "lr": "0.000105253", "gnorm": "0.855", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 00:01:26,597][train_inner][INFO] - {"epoch": 18, "update": 17.409, "loss": "1.93", "ppl": "3.81", "wps": "364125", "ups": "3.07", "wpb": "118579", "bsz": "256", "num_updates": "896000", "lr": "0.000105051", "gnorm": "0.856", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 00:02:31,624][train_inner][INFO] - {"epoch": 18, "update": 17.413, "loss": "1.927", "ppl": "3.8", "wps": "364356", "ups": "3.08", "wpb": "118464", "bsz": "256", "num_updates": "896200", "lr": "0.000104848", "gnorm": "0.857", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 00:03:37,193][train_inner][INFO] - {"epoch": 18, "update": 17.417, "loss": "1.931", "ppl": "3.81", "wps": "359378", "ups": "3.05", "wpb": "117818", "bsz": "256", "num_updates": "896400", "lr": "0.000104646", "gnorm": "0.862", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-05 00:04:41,880][train_inner][INFO] - {"epoch": 18, "update": 17.421, "loss": "1.932", "ppl": "3.82", "wps": "364736", "ups": "3.09", "wpb": "117967", "bsz": "256", "num_updates": "896600", "lr": "0.000104444", "gnorm": "0.865", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "0"} +[2022-08-05 00:05:47,251][train_inner][INFO] - {"epoch": 18, "update": 17.425, "loss": "1.927", "ppl": "3.8", "wps": "362394", "ups": "3.06", "wpb": "118447", "bsz": "256", "num_updates": "896800", "lr": "0.000104242", "gnorm": "0.861", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 00:06:52,426][train_inner][INFO] - {"epoch": 18, "update": 17.428, "loss": "1.935", "ppl": "3.82", "wps": "361703", "ups": "3.07", "wpb": "117868", "bsz": "256", "num_updates": "897000", "lr": "0.00010404", "gnorm": "0.863", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 00:07:57,745][train_inner][INFO] - {"epoch": 18, "update": 17.432, "loss": "1.927", "ppl": "3.8", "wps": "362750", "ups": "3.06", "wpb": "118469", "bsz": "256", "num_updates": "897200", "lr": "0.000103838", "gnorm": "0.856", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 00:09:03,071][train_inner][INFO] - {"epoch": 18, "update": 17.436, "loss": "1.925", "ppl": "3.8", "wps": "361342", "ups": "3.06", "wpb": "118024", "bsz": "256", "num_updates": "897400", "lr": "0.000103636", "gnorm": "0.857", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.7", "wall": "0"} +[2022-08-05 00:10:08,301][train_inner][INFO] - {"epoch": 18, "update": 17.44, "loss": "1.93", "ppl": "3.81", "wps": "362588", "ups": "3.07", "wpb": "118255", "bsz": "256", "num_updates": "897600", "lr": "0.000103434", "gnorm": "0.865", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 00:11:13,600][train_inner][INFO] - {"epoch": 18, "update": 17.444, "loss": "1.932", "ppl": "3.82", "wps": "361692", "ups": "3.06", "wpb": "118089", "bsz": "256", "num_updates": "897800", "lr": "0.000103232", "gnorm": "0.861", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 00:12:18,711][train_inner][INFO] - {"epoch": 18, "update": 17.448, "loss": "1.924", "ppl": "3.8", "wps": "363767", "ups": "3.07", "wpb": "118412", "bsz": "256", "num_updates": "898000", "lr": "0.00010303", "gnorm": "0.858", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 00:13:23,835][train_inner][INFO] - {"epoch": 18, "update": 17.452, "loss": "1.925", "ppl": "3.8", "wps": "365498", "ups": "3.07", "wpb": "119011", "bsz": "256", "num_updates": "898200", "lr": "0.000102828", "gnorm": "0.854", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 00:14:29,055][train_inner][INFO] - {"epoch": 18, "update": 17.456, "loss": "1.925", "ppl": "3.8", "wps": "362496", "ups": "3.07", "wpb": "118208", "bsz": "256", "num_updates": "898400", "lr": "0.000102626", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "0"} +[2022-08-05 00:15:34,478][train_inner][INFO] - {"epoch": 18, "update": 17.459, "loss": "1.924", "ppl": "3.8", "wps": "360588", "ups": "3.06", "wpb": "117952", "bsz": "256", "num_updates": "898600", "lr": "0.000102424", "gnorm": "0.861", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 00:16:39,819][train_inner][INFO] - {"epoch": 18, "update": 17.463, "loss": "1.931", "ppl": "3.81", "wps": "362657", "ups": "3.06", "wpb": "118480", "bsz": "256", "num_updates": "898800", "lr": "0.000102222", "gnorm": "0.861", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 00:17:45,115][train_inner][INFO] - {"epoch": 18, "update": 17.467, "loss": "1.922", "ppl": "3.79", "wps": "361814", "ups": "3.06", "wpb": "118124", "bsz": "256", "num_updates": "899000", "lr": "0.00010202", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 00:18:50,850][train_inner][INFO] - {"epoch": 18, "update": 17.471, "loss": "1.923", "ppl": "3.79", "wps": "359732", "ups": "3.04", "wpb": "118232", "bsz": "256", "num_updates": "899200", "lr": "0.000101818", "gnorm": "0.861", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-05 00:19:56,279][train_inner][INFO] - {"epoch": 18, "update": 17.475, "loss": "1.927", "ppl": "3.8", "wps": "361618", "ups": "3.06", "wpb": "118299", "bsz": "256", "num_updates": "899400", "lr": "0.000101616", "gnorm": "0.863", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 00:21:01,867][train_inner][INFO] - {"epoch": 18, "update": 17.479, "loss": "1.933", "ppl": "3.82", "wps": "359950", "ups": "3.05", "wpb": "118042", "bsz": "256", "num_updates": "899600", "lr": "0.000101414", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 00:22:07,064][train_inner][INFO] - {"epoch": 18, "update": 17.483, "loss": "1.924", "ppl": "3.79", "wps": "363359", "ups": "3.07", "wpb": "118447", "bsz": "256", "num_updates": "899800", "lr": "0.000101212", "gnorm": "0.859", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 00:23:12,286][train_inner][INFO] - {"epoch": 18, "update": 17.487, "loss": "1.931", "ppl": "3.81", "wps": "361730", "ups": "3.07", "wpb": "117950", "bsz": "256", "num_updates": "900000", "lr": "0.00010101", "gnorm": "0.862", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-05 00:23:12,288][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-05 00:23:35,119][valid][INFO] - {"epoch": 18, "valid_loss": "1.838", "valid_ppl": "3.58", "valid_wps": "1.59483e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "900000", "valid_best_loss": "1.838"} +[2022-08-05 00:23:35,122][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 18 @ 900000 updates +[2022-08-05 00:23:35,123][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_18_900000.pt +[2022-08-05 00:23:48,202][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_18_900000.pt +[2022-08-05 00:24:14,151][fairseq.checkpoint_utils][INFO] - Saved checkpoint /vit-opt/fairseq/bert/adan2/checkpoint_18_900000.pt (epoch 18 @ 900000 updates, score 1.838) (writing took 39.028598554985365 seconds) +[2022-08-05 00:25:19,410][train_inner][INFO] - {"epoch": 18, "update": 17.491, "loss": "1.926", "ppl": "3.8", "wps": "184786", "ups": "1.57", "wpb": "117453", "bsz": "256", "num_updates": "900200", "lr": "0.000100808", "gnorm": "0.864", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 00:26:24,966][train_inner][INFO] - {"epoch": 18, "update": 17.494, "loss": "1.928", "ppl": "3.81", "wps": "359975", "ups": "3.05", "wpb": "117990", "bsz": "256", "num_updates": "900400", "lr": "0.000100606", "gnorm": "0.866", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "0"} +[2022-08-05 00:27:30,291][train_inner][INFO] - {"epoch": 18, "update": 17.498, "loss": "1.926", "ppl": "3.8", "wps": "362312", "ups": "3.06", "wpb": "118338", "bsz": "256", "num_updates": "900600", "lr": "0.000100404", "gnorm": "0.861", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 00:28:35,346][train_inner][INFO] - {"epoch": 18, "update": 17.502, "loss": "1.924", "ppl": "3.79", "wps": "362842", "ups": "3.07", "wpb": "118022", "bsz": "256", "num_updates": "900800", "lr": "0.000100202", "gnorm": "0.863", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 00:28:53,812][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 00:29:40,737][train_inner][INFO] - {"epoch": 18, "update": 17.506, "loss": "1.928", "ppl": "3.81", "wps": "362607", "ups": "3.06", "wpb": "118554", "bsz": "256", "num_updates": "901000", "lr": "0.0001", "gnorm": "0.859", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.8", "wall": "0"} +[2022-08-05 00:30:46,249][train_inner][INFO] - {"epoch": 18, "update": 17.51, "loss": "1.93", "ppl": "3.81", "wps": "361271", "ups": "3.05", "wpb": "118337", "bsz": "256", "num_updates": "901200", "lr": "9.9798e-05", "gnorm": "0.864", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 00:31:51,678][train_inner][INFO] - {"epoch": 18, "update": 17.514, "loss": "1.927", "ppl": "3.8", "wps": "359527", "ups": "3.06", "wpb": "117616", "bsz": "256", "num_updates": "901400", "lr": "9.9596e-05", "gnorm": "0.863", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 00:32:56,565][train_inner][INFO] - {"epoch": 18, "update": 17.518, "loss": "1.926", "ppl": "3.8", "wps": "366345", "ups": "3.08", "wpb": "118852", "bsz": "256", "num_updates": "901600", "lr": "9.93939e-05", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 00:34:01,720][train_inner][INFO] - {"epoch": 18, "update": 17.522, "loss": "1.924", "ppl": "3.79", "wps": "364122", "ups": "3.07", "wpb": "118608", "bsz": "256", "num_updates": "901800", "lr": "9.91919e-05", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "0"} +[2022-08-05 00:35:06,891][train_inner][INFO] - {"epoch": 18, "update": 17.526, "loss": "1.922", "ppl": "3.79", "wps": "362945", "ups": "3.07", "wpb": "118266", "bsz": "256", "num_updates": "902000", "lr": "9.89899e-05", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "0"} +[2022-08-05 00:36:11,962][train_inner][INFO] - {"epoch": 18, "update": 17.529, "loss": "1.92", "ppl": "3.79", "wps": "363604", "ups": "3.07", "wpb": "118297", "bsz": "256", "num_updates": "902200", "lr": "9.87879e-05", "gnorm": "0.865", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 00:37:17,190][train_inner][INFO] - {"epoch": 18, "update": 17.533, "loss": "1.93", "ppl": "3.81", "wps": "361815", "ups": "3.07", "wpb": "118001", "bsz": "255.9", "num_updates": "902400", "lr": "9.85859e-05", "gnorm": "0.863", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 00:38:22,719][train_inner][INFO] - {"epoch": 18, "update": 17.537, "loss": "1.921", "ppl": "3.79", "wps": "362063", "ups": "3.05", "wpb": "118624", "bsz": "256", "num_updates": "902600", "lr": "9.83838e-05", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 00:39:28,077][train_inner][INFO] - {"epoch": 18, "update": 17.541, "loss": "1.925", "ppl": "3.8", "wps": "363378", "ups": "3.06", "wpb": "118748", "bsz": "256", "num_updates": "902800", "lr": "9.81818e-05", "gnorm": "0.859", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 00:40:33,235][train_inner][INFO] - {"epoch": 18, "update": 17.545, "loss": "1.923", "ppl": "3.79", "wps": "361371", "ups": "3.07", "wpb": "117728", "bsz": "256", "num_updates": "903000", "lr": "9.79798e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 00:41:38,628][train_inner][INFO] - {"epoch": 18, "update": 17.549, "loss": "1.923", "ppl": "3.79", "wps": "361614", "ups": "3.06", "wpb": "118234", "bsz": "256", "num_updates": "903200", "lr": "9.77778e-05", "gnorm": "0.863", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 00:42:44,031][train_inner][INFO] - {"epoch": 18, "update": 17.553, "loss": "1.928", "ppl": "3.8", "wps": "360923", "ups": "3.06", "wpb": "118026", "bsz": "256", "num_updates": "903400", "lr": "9.75758e-05", "gnorm": "0.867", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 00:43:49,519][train_inner][INFO] - {"epoch": 18, "update": 17.557, "loss": "1.925", "ppl": "3.8", "wps": "361594", "ups": "3.05", "wpb": "118398", "bsz": "256", "num_updates": "903600", "lr": "9.73737e-05", "gnorm": "0.861", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 00:44:54,991][train_inner][INFO] - {"epoch": 18, "update": 17.56, "loss": "1.926", "ppl": "3.8", "wps": "359160", "ups": "3.06", "wpb": "117560", "bsz": "256", "num_updates": "903800", "lr": "9.71717e-05", "gnorm": "0.865", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-05 00:46:00,017][train_inner][INFO] - {"epoch": 18, "update": 17.564, "loss": "1.925", "ppl": "3.8", "wps": "364513", "ups": "3.08", "wpb": "118513", "bsz": "256", "num_updates": "904000", "lr": "9.69697e-05", "gnorm": "0.865", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-05 00:47:05,183][train_inner][INFO] - {"epoch": 18, "update": 17.568, "loss": "1.921", "ppl": "3.79", "wps": "364527", "ups": "3.07", "wpb": "118772", "bsz": "256", "num_updates": "904200", "lr": "9.67677e-05", "gnorm": "0.863", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-05 00:48:10,509][train_inner][INFO] - {"epoch": 18, "update": 17.572, "loss": "1.923", "ppl": "3.79", "wps": "361922", "ups": "3.06", "wpb": "118211", "bsz": "256", "num_updates": "904400", "lr": "9.65657e-05", "gnorm": "0.864", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 00:49:15,820][train_inner][INFO] - {"epoch": 18, "update": 17.576, "loss": "1.924", "ppl": "3.79", "wps": "361759", "ups": "3.06", "wpb": "118132", "bsz": "256", "num_updates": "904600", "lr": "9.63636e-05", "gnorm": "0.867", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-05 00:50:20,738][train_inner][INFO] - {"epoch": 18, "update": 17.58, "loss": "1.92", "ppl": "3.78", "wps": "362874", "ups": "3.08", "wpb": "117785", "bsz": "256", "num_updates": "904800", "lr": "9.61616e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 00:51:10,274][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-05 00:51:26,090][train_inner][INFO] - {"epoch": 18, "update": 17.584, "loss": "1.919", "ppl": "3.78", "wps": "362664", "ups": "3.06", "wpb": "118501", "bsz": "256", "num_updates": "905000", "lr": "9.59596e-05", "gnorm": "0.864", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-05 00:52:31,098][train_inner][INFO] - {"epoch": 18, "update": 17.588, "loss": "1.922", "ppl": "3.79", "wps": "363365", "ups": "3.08", "wpb": "118107", "bsz": "256", "num_updates": "905200", "lr": "9.57576e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "0"} +[2022-08-05 00:53:13,327][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 00:53:36,463][train_inner][INFO] - {"epoch": 18, "update": 17.592, "loss": "1.925", "ppl": "3.8", "wps": "362223", "ups": "3.06", "wpb": "118382", "bsz": "256", "num_updates": "905400", "lr": "9.55556e-05", "gnorm": "0.863", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 00:54:41,861][train_inner][INFO] - {"epoch": 18, "update": 17.595, "loss": "1.928", "ppl": "3.8", "wps": "361910", "ups": "3.06", "wpb": "118340", "bsz": "256", "num_updates": "905600", "lr": "9.53535e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-05 00:55:46,951][train_inner][INFO] - {"epoch": 18, "update": 17.599, "loss": "1.922", "ppl": "3.79", "wps": "363377", "ups": "3.07", "wpb": "118245", "bsz": "256", "num_updates": "905800", "lr": "9.51515e-05", "gnorm": "0.865", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-05 00:56:52,153][train_inner][INFO] - {"epoch": 18, "update": 17.603, "loss": "1.915", "ppl": "3.77", "wps": "366004", "ups": "3.07", "wpb": "119319", "bsz": "256", "num_updates": "906000", "lr": "9.49495e-05", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "0"} +[2022-08-05 00:57:57,384][train_inner][INFO] - {"epoch": 18, "update": 17.607, "loss": "1.923", "ppl": "3.79", "wps": "362793", "ups": "3.07", "wpb": "118325", "bsz": "256", "num_updates": "906200", "lr": "9.47475e-05", "gnorm": "0.863", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 00:59:02,294][train_inner][INFO] - {"epoch": 18, "update": 17.611, "loss": "1.925", "ppl": "3.8", "wps": "362996", "ups": "3.08", "wpb": "117808", "bsz": "256", "num_updates": "906400", "lr": "9.45455e-05", "gnorm": "0.864", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26", "wall": "0"} +[2022-08-05 01:00:07,853][train_inner][INFO] - {"epoch": 18, "update": 17.615, "loss": "1.919", "ppl": "3.78", "wps": "361312", "ups": "3.05", "wpb": "118434", "bsz": "256", "num_updates": "906600", "lr": "9.43434e-05", "gnorm": "0.863", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 01:01:12,938][train_inner][INFO] - {"epoch": 18, "update": 17.619, "loss": "1.924", "ppl": "3.8", "wps": "363016", "ups": "3.07", "wpb": "118133", "bsz": "256", "num_updates": "906800", "lr": "9.41414e-05", "gnorm": "0.867", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 01:02:18,229][train_inner][INFO] - {"epoch": 18, "update": 17.623, "loss": "1.92", "ppl": "3.78", "wps": "362876", "ups": "3.06", "wpb": "118460", "bsz": "256", "num_updates": "907000", "lr": "9.39394e-05", "gnorm": "0.86", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 01:03:23,746][train_inner][INFO] - {"epoch": 18, "update": 17.627, "loss": "1.922", "ppl": "3.79", "wps": "362364", "ups": "3.05", "wpb": "118703", "bsz": "256", "num_updates": "907200", "lr": "9.37374e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 01:04:28,852][train_inner][INFO] - {"epoch": 18, "update": 17.63, "loss": "1.92", "ppl": "3.78", "wps": "365765", "ups": "3.07", "wpb": "119065", "bsz": "256", "num_updates": "907400", "lr": "9.35354e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "0"} +[2022-08-05 01:05:34,059][train_inner][INFO] - {"epoch": 18, "update": 17.634, "loss": "1.919", "ppl": "3.78", "wps": "363403", "ups": "3.07", "wpb": "118480", "bsz": "256", "num_updates": "907600", "lr": "9.33333e-05", "gnorm": "0.865", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 01:06:39,526][train_inner][INFO] - {"epoch": 18, "update": 17.638, "loss": "1.92", "ppl": "3.78", "wps": "362658", "ups": "3.06", "wpb": "118695", "bsz": "256", "num_updates": "907800", "lr": "9.31313e-05", "gnorm": "0.865", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-05 01:07:45,043][train_inner][INFO] - {"epoch": 18, "update": 17.642, "loss": "1.919", "ppl": "3.78", "wps": "361463", "ups": "3.05", "wpb": "118407", "bsz": "256", "num_updates": "908000", "lr": "9.29293e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 01:08:50,379][train_inner][INFO] - {"epoch": 18, "update": 17.646, "loss": "1.92", "ppl": "3.78", "wps": "363548", "ups": "3.06", "wpb": "118762", "bsz": "256", "num_updates": "908200", "lr": "9.27273e-05", "gnorm": "0.862", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 01:09:55,764][train_inner][INFO] - {"epoch": 18, "update": 17.65, "loss": "1.926", "ppl": "3.8", "wps": "360522", "ups": "3.06", "wpb": "117861", "bsz": "256", "num_updates": "908400", "lr": "9.25253e-05", "gnorm": "0.87", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-05 01:11:00,975][train_inner][INFO] - {"epoch": 18, "update": 17.654, "loss": "1.925", "ppl": "3.8", "wps": "362549", "ups": "3.07", "wpb": "118209", "bsz": "256", "num_updates": "908600", "lr": "9.23232e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 01:12:06,498][train_inner][INFO] - {"epoch": 18, "update": 17.658, "loss": "1.925", "ppl": "3.8", "wps": "361126", "ups": "3.05", "wpb": "118309", "bsz": "256", "num_updates": "908800", "lr": "9.21212e-05", "gnorm": "0.864", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 01:13:12,157][train_inner][INFO] - {"epoch": 18, "update": 17.662, "loss": "1.922", "ppl": "3.79", "wps": "362017", "ups": "3.05", "wpb": "118846", "bsz": "256", "num_updates": "909000", "lr": "9.19192e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 01:14:17,660][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 01:14:17,983][train_inner][INFO] - {"epoch": 18, "update": 17.665, "loss": "1.915", "ppl": "3.77", "wps": "359726", "ups": "3.04", "wpb": "118393", "bsz": "256", "num_updates": "909200", "lr": "9.17172e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.7", "wall": "0"} +[2022-08-05 01:15:23,702][train_inner][INFO] - {"epoch": 18, "update": 17.669, "loss": "1.919", "ppl": "3.78", "wps": "361942", "ups": "3.04", "wpb": "118931", "bsz": "256", "num_updates": "909400", "lr": "9.15152e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 01:16:28,875][train_inner][INFO] - {"epoch": 18, "update": 17.673, "loss": "1.921", "ppl": "3.79", "wps": "362320", "ups": "3.07", "wpb": "118066", "bsz": "256", "num_updates": "909600", "lr": "9.13131e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 01:17:34,186][train_inner][INFO] - {"epoch": 18, "update": 17.677, "loss": "1.922", "ppl": "3.79", "wps": "360731", "ups": "3.06", "wpb": "117784", "bsz": "256", "num_updates": "909800", "lr": "9.11111e-05", "gnorm": "0.873", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 01:18:39,312][train_inner][INFO] - {"epoch": 18, "update": 17.681, "loss": "1.925", "ppl": "3.8", "wps": "362788", "ups": "3.07", "wpb": "118133", "bsz": "256", "num_updates": "910000", "lr": "9.09091e-05", "gnorm": "0.868", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 01:19:44,612][train_inner][INFO] - {"epoch": 18, "update": 17.685, "loss": "1.922", "ppl": "3.79", "wps": "360725", "ups": "3.06", "wpb": "117776", "bsz": "256", "num_updates": "910200", "lr": "9.07071e-05", "gnorm": "0.87", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.1", "wall": "0"} +[2022-08-05 01:20:49,866][train_inner][INFO] - {"epoch": 18, "update": 17.689, "loss": "1.915", "ppl": "3.77", "wps": "363252", "ups": "3.06", "wpb": "118517", "bsz": "256", "num_updates": "910400", "lr": "9.05051e-05", "gnorm": "0.867", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 01:21:54,981][train_inner][INFO] - {"epoch": 18, "update": 17.693, "loss": "1.919", "ppl": "3.78", "wps": "364523", "ups": "3.07", "wpb": "118677", "bsz": "256", "num_updates": "910600", "lr": "9.0303e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 01:23:00,336][train_inner][INFO] - {"epoch": 18, "update": 17.696, "loss": "1.915", "ppl": "3.77", "wps": "360819", "ups": "3.06", "wpb": "117904", "bsz": "256", "num_updates": "910800", "lr": "9.0101e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.6", "wall": "0"} +[2022-08-05 01:24:05,643][train_inner][INFO] - {"epoch": 18, "update": 17.7, "loss": "1.921", "ppl": "3.79", "wps": "360832", "ups": "3.06", "wpb": "117822", "bsz": "256", "num_updates": "911000", "lr": "8.9899e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.2", "wall": "0"} +[2022-08-05 01:25:10,871][train_inner][INFO] - {"epoch": 18, "update": 17.704, "loss": "1.915", "ppl": "3.77", "wps": "364088", "ups": "3.07", "wpb": "118743", "bsz": "256", "num_updates": "911200", "lr": "8.9697e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "0"} +[2022-08-05 01:26:16,277][train_inner][INFO] - {"epoch": 18, "update": 17.708, "loss": "1.918", "ppl": "3.78", "wps": "361379", "ups": "3.06", "wpb": "118179", "bsz": "256", "num_updates": "911400", "lr": "8.94949e-05", "gnorm": "0.868", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 01:27:21,881][train_inner][INFO] - {"epoch": 18, "update": 17.712, "loss": "1.911", "ppl": "3.76", "wps": "359620", "ups": "3.05", "wpb": "117961", "bsz": "256", "num_updates": "911600", "lr": "8.92929e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 01:28:27,077][train_inner][INFO] - {"epoch": 18, "update": 17.716, "loss": "1.917", "ppl": "3.78", "wps": "361919", "ups": "3.07", "wpb": "117963", "bsz": "256", "num_updates": "911800", "lr": "8.90909e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 01:29:32,451][train_inner][INFO] - {"epoch": 18, "update": 17.72, "loss": "1.921", "ppl": "3.79", "wps": "362008", "ups": "3.06", "wpb": "118329", "bsz": "256", "num_updates": "912000", "lr": "8.88889e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-05 01:30:37,898][train_inner][INFO] - {"epoch": 18, "update": 17.724, "loss": "1.918", "ppl": "3.78", "wps": "361716", "ups": "3.06", "wpb": "118364", "bsz": "256", "num_updates": "912200", "lr": "8.86869e-05", "gnorm": "0.867", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 01:30:55,765][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 01:31:43,551][train_inner][INFO] - {"epoch": 18, "update": 17.728, "loss": "1.915", "ppl": "3.77", "wps": "358609", "ups": "3.05", "wpb": "117718", "bsz": "256", "num_updates": "912400", "lr": "8.84848e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.4", "wall": "0"} +[2022-08-05 01:32:48,901][train_inner][INFO] - {"epoch": 18, "update": 17.731, "loss": "1.916", "ppl": "3.77", "wps": "363405", "ups": "3.06", "wpb": "118740", "bsz": "256", "num_updates": "912600", "lr": "8.82828e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-05 01:33:53,637][train_inner][INFO] - {"epoch": 18, "update": 17.735, "loss": "1.919", "ppl": "3.78", "wps": "364762", "ups": "3.09", "wpb": "118064", "bsz": "255.9", "num_updates": "912800", "lr": "8.80808e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "0"} +[2022-08-05 01:34:58,892][train_inner][INFO] - {"epoch": 18, "update": 17.739, "loss": "1.918", "ppl": "3.78", "wps": "361785", "ups": "3.06", "wpb": "118040", "bsz": "256", "num_updates": "913000", "lr": "8.78788e-05", "gnorm": "0.868", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 01:36:04,159][train_inner][INFO] - {"epoch": 18, "update": 17.743, "loss": "1.918", "ppl": "3.78", "wps": "361757", "ups": "3.06", "wpb": "118053", "bsz": "256", "num_updates": "913200", "lr": "8.76768e-05", "gnorm": "0.867", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 01:37:09,276][train_inner][INFO] - {"epoch": 18, "update": 17.747, "loss": "1.914", "ppl": "3.77", "wps": "363760", "ups": "3.07", "wpb": "118432", "bsz": "256", "num_updates": "913400", "lr": "8.74747e-05", "gnorm": "0.868", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 01:38:14,700][train_inner][INFO] - {"epoch": 18, "update": 17.751, "loss": "1.918", "ppl": "3.78", "wps": "361755", "ups": "3.06", "wpb": "118337", "bsz": "256", "num_updates": "913600", "lr": "8.72727e-05", "gnorm": "0.868", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 01:39:20,358][train_inner][INFO] - {"epoch": 18, "update": 17.755, "loss": "1.917", "ppl": "3.78", "wps": "360774", "ups": "3.05", "wpb": "118424", "bsz": "256", "num_updates": "913800", "lr": "8.70707e-05", "gnorm": "0.87", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "0"} +[2022-08-05 01:40:25,791][train_inner][INFO] - {"epoch": 18, "update": 17.759, "loss": "1.925", "ppl": "3.8", "wps": "361067", "ups": "3.06", "wpb": "118122", "bsz": "256", "num_updates": "914000", "lr": "8.68687e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-05 01:41:31,266][train_inner][INFO] - {"epoch": 18, "update": 17.763, "loss": "1.916", "ppl": "3.77", "wps": "360817", "ups": "3.05", "wpb": "118119", "bsz": "256", "num_updates": "914200", "lr": "8.66667e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-05 01:42:36,663][train_inner][INFO] - {"epoch": 18, "update": 17.766, "loss": "1.915", "ppl": "3.77", "wps": "362543", "ups": "3.06", "wpb": "118544", "bsz": "256", "num_updates": "914400", "lr": "8.64646e-05", "gnorm": "0.872", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 01:43:41,961][train_inner][INFO] - {"epoch": 18, "update": 17.77, "loss": "1.922", "ppl": "3.79", "wps": "360072", "ups": "3.06", "wpb": "117559", "bsz": "256", "num_updates": "914600", "lr": "8.62626e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 01:44:47,229][train_inner][INFO] - {"epoch": 18, "update": 17.774, "loss": "1.913", "ppl": "3.77", "wps": "364124", "ups": "3.06", "wpb": "118825", "bsz": "256", "num_updates": "914800", "lr": "8.60606e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 01:45:52,655][train_inner][INFO] - {"epoch": 18, "update": 17.778, "loss": "1.91", "ppl": "3.76", "wps": "363115", "ups": "3.06", "wpb": "118784", "bsz": "256", "num_updates": "915000", "lr": "8.58586e-05", "gnorm": "0.866", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 01:46:58,217][train_inner][INFO] - {"epoch": 18, "update": 17.782, "loss": "1.921", "ppl": "3.79", "wps": "362193", "ups": "3.05", "wpb": "118729", "bsz": "256", "num_updates": "915200", "lr": "8.56566e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 01:48:03,264][train_inner][INFO] - {"epoch": 18, "update": 17.786, "loss": "1.913", "ppl": "3.77", "wps": "365483", "ups": "3.07", "wpb": "118865", "bsz": "256", "num_updates": "915400", "lr": "8.54545e-05", "gnorm": "0.884", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "0"} +[2022-08-05 01:49:08,856][train_inner][INFO] - {"epoch": 18, "update": 17.79, "loss": "1.916", "ppl": "3.77", "wps": "361750", "ups": "3.05", "wpb": "118638", "bsz": "256", "num_updates": "915600", "lr": "8.52525e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 01:50:13,798][train_inner][INFO] - {"epoch": 18, "update": 17.794, "loss": "1.916", "ppl": "3.77", "wps": "365114", "ups": "3.08", "wpb": "118540", "bsz": "256", "num_updates": "915800", "lr": "8.50505e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 01:51:19,209][train_inner][INFO] - {"epoch": 18, "update": 17.798, "loss": "1.92", "ppl": "3.78", "wps": "362052", "ups": "3.06", "wpb": "118409", "bsz": "256", "num_updates": "916000", "lr": "8.48485e-05", "gnorm": "0.872", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 01:52:24,485][train_inner][INFO] - {"epoch": 18, "update": 17.801, "loss": "1.918", "ppl": "3.78", "wps": "361154", "ups": "3.06", "wpb": "117873", "bsz": "256", "num_updates": "916200", "lr": "8.46465e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 01:53:14,696][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-05 01:53:30,327][train_inner][INFO] - {"epoch": 18, "update": 17.805, "loss": "1.914", "ppl": "3.77", "wps": "358708", "ups": "3.04", "wpb": "118087", "bsz": "256", "num_updates": "916400", "lr": "8.44444e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 01:54:35,677][train_inner][INFO] - {"epoch": 18, "update": 17.809, "loss": "1.918", "ppl": "3.78", "wps": "361653", "ups": "3.06", "wpb": "118170", "bsz": "256", "num_updates": "916600", "lr": "8.42424e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 01:55:40,974][train_inner][INFO] - {"epoch": 18, "update": 17.813, "loss": "1.914", "ppl": "3.77", "wps": "364198", "ups": "3.06", "wpb": "118904", "bsz": "256", "num_updates": "916800", "lr": "8.40404e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.8", "wall": "0"} +[2022-08-05 01:56:46,319][train_inner][INFO] - {"epoch": 18, "update": 17.817, "loss": "1.911", "ppl": "3.76", "wps": "363660", "ups": "3.06", "wpb": "118814", "bsz": "256", "num_updates": "917000", "lr": "8.38384e-05", "gnorm": "0.869", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 01:57:51,517][train_inner][INFO] - {"epoch": 18, "update": 17.821, "loss": "1.909", "ppl": "3.76", "wps": "363179", "ups": "3.07", "wpb": "118390", "bsz": "256", "num_updates": "917200", "lr": "8.36364e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 01:58:56,894][train_inner][INFO] - {"epoch": 18, "update": 17.825, "loss": "1.915", "ppl": "3.77", "wps": "362286", "ups": "3.06", "wpb": "118425", "bsz": "256", "num_updates": "917400", "lr": "8.34343e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 01:59:31,903][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 02:00:02,884][train_inner][INFO] - {"epoch": 18, "update": 17.829, "loss": "1.915", "ppl": "3.77", "wps": "359993", "ups": "3.03", "wpb": "118776", "bsz": "256", "num_updates": "917600", "lr": "8.32323e-05", "gnorm": "0.87", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.4", "wall": "0"} +[2022-08-05 02:01:08,506][train_inner][INFO] - {"epoch": 18, "update": 17.833, "loss": "1.913", "ppl": "3.77", "wps": "362617", "ups": "3.05", "wpb": "118978", "bsz": "256", "num_updates": "917800", "lr": "8.30303e-05", "gnorm": "0.867", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 02:02:13,468][train_inner][INFO] - {"epoch": 18, "update": 17.836, "loss": "1.91", "ppl": "3.76", "wps": "364940", "ups": "3.08", "wpb": "118522", "bsz": "256", "num_updates": "918000", "lr": "8.28283e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 02:03:18,748][train_inner][INFO] - {"epoch": 18, "update": 17.84, "loss": "1.92", "ppl": "3.78", "wps": "361602", "ups": "3.06", "wpb": "118025", "bsz": "256", "num_updates": "918200", "lr": "8.26263e-05", "gnorm": "0.872", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 02:04:24,385][train_inner][INFO] - {"epoch": 18, "update": 17.844, "loss": "1.914", "ppl": "3.77", "wps": "360506", "ups": "3.05", "wpb": "118310", "bsz": "256", "num_updates": "918400", "lr": "8.24242e-05", "gnorm": "0.873", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 02:05:29,560][train_inner][INFO] - {"epoch": 18, "update": 17.848, "loss": "1.918", "ppl": "3.78", "wps": "361775", "ups": "3.07", "wpb": "117892", "bsz": "256", "num_updates": "918600", "lr": "8.22222e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 02:06:35,217][train_inner][INFO] - {"epoch": 18, "update": 17.852, "loss": "1.918", "ppl": "3.78", "wps": "359305", "ups": "3.05", "wpb": "117952", "bsz": "256", "num_updates": "918800", "lr": "8.20202e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 02:07:40,539][train_inner][INFO] - {"epoch": 18, "update": 17.856, "loss": "1.92", "ppl": "3.78", "wps": "361776", "ups": "3.06", "wpb": "118159", "bsz": "256", "num_updates": "919000", "lr": "8.18182e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 02:08:46,118][train_inner][INFO] - {"epoch": 18, "update": 17.86, "loss": "1.91", "ppl": "3.76", "wps": "361845", "ups": "3.05", "wpb": "118645", "bsz": "256", "num_updates": "919200", "lr": "8.16162e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 02:09:50,918][train_inner][INFO] - {"epoch": 18, "update": 17.864, "loss": "1.916", "ppl": "3.77", "wps": "363983", "ups": "3.09", "wpb": "117928", "bsz": "256", "num_updates": "919400", "lr": "8.14141e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "0"} +[2022-08-05 02:10:56,179][train_inner][INFO] - {"epoch": 18, "update": 17.867, "loss": "1.916", "ppl": "3.77", "wps": "360626", "ups": "3.06", "wpb": "117672", "bsz": "256", "num_updates": "919600", "lr": "8.12121e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 02:11:57,275][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 02:12:01,779][train_inner][INFO] - {"epoch": 18, "update": 17.871, "loss": "1.912", "ppl": "3.76", "wps": "362287", "ups": "3.05", "wpb": "118829", "bsz": "256", "num_updates": "919800", "lr": "8.10101e-05", "gnorm": "0.873", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "0"} +[2022-08-05 02:13:07,218][train_inner][INFO] - {"epoch": 18, "update": 17.875, "loss": "1.911", "ppl": "3.76", "wps": "362491", "ups": "3.06", "wpb": "118605", "bsz": "256", "num_updates": "920000", "lr": "8.08081e-05", "gnorm": "0.87", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 02:14:12,216][train_inner][INFO] - {"epoch": 18, "update": 17.879, "loss": "1.922", "ppl": "3.79", "wps": "361804", "ups": "3.08", "wpb": "117579", "bsz": "256", "num_updates": "920200", "lr": "8.06061e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 02:15:16,684][train_inner][INFO] - {"epoch": 18, "update": 17.883, "loss": "1.916", "ppl": "3.77", "wps": "366085", "ups": "3.1", "wpb": "118003", "bsz": "256", "num_updates": "920400", "lr": "8.0404e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.1", "wall": "0"} +[2022-08-05 02:16:22,148][train_inner][INFO] - {"epoch": 18, "update": 17.887, "loss": "1.914", "ppl": "3.77", "wps": "363087", "ups": "3.06", "wpb": "118844", "bsz": "256", "num_updates": "920600", "lr": "8.0202e-05", "gnorm": "0.868", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 02:17:27,805][train_inner][INFO] - {"epoch": 18, "update": 17.891, "loss": "1.913", "ppl": "3.77", "wps": "362202", "ups": "3.05", "wpb": "118903", "bsz": "256", "num_updates": "920800", "lr": "8e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 02:18:32,976][train_inner][INFO] - {"epoch": 18, "update": 17.895, "loss": "1.912", "ppl": "3.76", "wps": "364737", "ups": "3.07", "wpb": "118848", "bsz": "256", "num_updates": "921000", "lr": "7.9798e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 02:19:38,102][train_inner][INFO] - {"epoch": 18, "update": 17.899, "loss": "1.913", "ppl": "3.77", "wps": "364999", "ups": "3.07", "wpb": "118853", "bsz": "256", "num_updates": "921200", "lr": "7.9596e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 02:20:43,664][train_inner][INFO] - {"epoch": 18, "update": 17.902, "loss": "1.912", "ppl": "3.76", "wps": "361822", "ups": "3.05", "wpb": "118606", "bsz": "256", "num_updates": "921400", "lr": "7.93939e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 02:21:49,132][train_inner][INFO] - {"epoch": 18, "update": 17.906, "loss": "1.906", "ppl": "3.75", "wps": "362870", "ups": "3.05", "wpb": "118781", "bsz": "256", "num_updates": "921600", "lr": "7.91919e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 02:22:54,437][train_inner][INFO] - {"epoch": 18, "update": 17.91, "loss": "1.917", "ppl": "3.78", "wps": "361015", "ups": "3.06", "wpb": "117877", "bsz": "256", "num_updates": "921800", "lr": "7.89899e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.2", "wall": "0"} +[2022-08-05 02:23:59,513][train_inner][INFO] - {"epoch": 18, "update": 17.914, "loss": "1.915", "ppl": "3.77", "wps": "362547", "ups": "3.07", "wpb": "117952", "bsz": "256", "num_updates": "922000", "lr": "7.87879e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.6", "wall": "0"} +[2022-08-05 02:25:04,594][train_inner][INFO] - {"epoch": 18, "update": 17.918, "loss": "1.909", "ppl": "3.76", "wps": "363747", "ups": "3.07", "wpb": "118363", "bsz": "256", "num_updates": "922200", "lr": "7.85859e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.4", "wall": "0"} +[2022-08-05 02:26:10,273][train_inner][INFO] - {"epoch": 18, "update": 17.922, "loss": "1.91", "ppl": "3.76", "wps": "361740", "ups": "3.05", "wpb": "118792", "bsz": "256", "num_updates": "922400", "lr": "7.83838e-05", "gnorm": "0.871", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 02:27:15,514][train_inner][INFO] - {"epoch": 18, "update": 17.926, "loss": "1.913", "ppl": "3.77", "wps": "363279", "ups": "3.07", "wpb": "118501", "bsz": "256", "num_updates": "922600", "lr": "7.81818e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 02:28:20,771][train_inner][INFO] - {"epoch": 18, "update": 17.93, "loss": "1.915", "ppl": "3.77", "wps": "361907", "ups": "3.06", "wpb": "118082", "bsz": "256", "num_updates": "922800", "lr": "7.79798e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "0"} +[2022-08-05 02:29:26,270][train_inner][INFO] - {"epoch": 18, "update": 17.934, "loss": "1.911", "ppl": "3.76", "wps": "362419", "ups": "3.05", "wpb": "118689", "bsz": "256", "num_updates": "923000", "lr": "7.77778e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 02:30:31,401][train_inner][INFO] - {"epoch": 18, "update": 17.937, "loss": "1.908", "ppl": "3.75", "wps": "363583", "ups": "3.07", "wpb": "118400", "bsz": "256", "num_updates": "923200", "lr": "7.75758e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 02:31:36,452][train_inner][INFO] - {"epoch": 18, "update": 17.941, "loss": "1.913", "ppl": "3.77", "wps": "363022", "ups": "3.07", "wpb": "118073", "bsz": "256", "num_updates": "923400", "lr": "7.73737e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 02:32:41,402][train_inner][INFO] - {"epoch": 18, "update": 17.945, "loss": "1.911", "ppl": "3.76", "wps": "364227", "ups": "3.08", "wpb": "118279", "bsz": "256", "num_updates": "923600", "lr": "7.71717e-05", "gnorm": "0.88", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 02:33:46,568][train_inner][INFO] - {"epoch": 18, "update": 17.949, "loss": "1.906", "ppl": "3.75", "wps": "365712", "ups": "3.07", "wpb": "119159", "bsz": "256", "num_updates": "923800", "lr": "7.69697e-05", "gnorm": "0.87", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 02:34:20,457][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-05 02:34:52,038][train_inner][INFO] - {"epoch": 18, "update": 17.953, "loss": "1.909", "ppl": "3.76", "wps": "361084", "ups": "3.06", "wpb": "118186", "bsz": "256", "num_updates": "924000", "lr": "7.67677e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 02:35:57,090][train_inner][INFO] - {"epoch": 18, "update": 17.957, "loss": "1.906", "ppl": "3.75", "wps": "364645", "ups": "3.07", "wpb": "118604", "bsz": "256", "num_updates": "924200", "lr": "7.65657e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-05 02:37:02,317][train_inner][INFO] - {"epoch": 18, "update": 17.961, "loss": "1.909", "ppl": "3.76", "wps": "362034", "ups": "3.07", "wpb": "118069", "bsz": "256", "num_updates": "924400", "lr": "7.63636e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 02:38:07,476][train_inner][INFO] - {"epoch": 18, "update": 17.965, "loss": "1.91", "ppl": "3.76", "wps": "362341", "ups": "3.07", "wpb": "118047", "bsz": "256", "num_updates": "924600", "lr": "7.61616e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 02:39:12,450][train_inner][INFO] - {"epoch": 18, "update": 17.968, "loss": "1.91", "ppl": "3.76", "wps": "364436", "ups": "3.08", "wpb": "118393", "bsz": "256", "num_updates": "924800", "lr": "7.59596e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 02:40:17,411][train_inner][INFO] - {"epoch": 18, "update": 17.972, "loss": "1.913", "ppl": "3.77", "wps": "363307", "ups": "3.08", "wpb": "118002", "bsz": "256", "num_updates": "925000", "lr": "7.57576e-05", "gnorm": "0.882", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 02:41:22,785][train_inner][INFO] - {"epoch": 18, "update": 17.976, "loss": "1.907", "ppl": "3.75", "wps": "363146", "ups": "3.06", "wpb": "118700", "bsz": "256", "num_updates": "925200", "lr": "7.55556e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-05 02:42:27,935][train_inner][INFO] - {"epoch": 18, "update": 17.98, "loss": "1.908", "ppl": "3.75", "wps": "362891", "ups": "3.07", "wpb": "118210", "bsz": "256", "num_updates": "925400", "lr": "7.53535e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 02:43:33,618][train_inner][INFO] - {"epoch": 18, "update": 17.984, "loss": "1.906", "ppl": "3.75", "wps": "362184", "ups": "3.05", "wpb": "118943", "bsz": "256", "num_updates": "925600", "lr": "7.51515e-05", "gnorm": "0.874", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 02:44:38,840][train_inner][INFO] - {"epoch": 18, "update": 17.988, "loss": "1.902", "ppl": "3.74", "wps": "362990", "ups": "3.07", "wpb": "118373", "bsz": "256", "num_updates": "925800", "lr": "7.49495e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-05 02:45:31,006][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-05 02:45:44,285][train_inner][INFO] - {"epoch": 18, "update": 17.992, "loss": "1.908", "ppl": "3.75", "wps": "362719", "ups": "3.06", "wpb": "118675", "bsz": "256", "num_updates": "926000", "lr": "7.47475e-05", "gnorm": "0.87", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-05 02:46:49,716][train_inner][INFO] - {"epoch": 18, "update": 17.996, "loss": "1.911", "ppl": "3.76", "wps": "362156", "ups": "3.06", "wpb": "118480", "bsz": "256", "num_updates": "926200", "lr": "7.45455e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 02:47:54,815][train_inner][INFO] - {"epoch": 18, "update": 18.0, "loss": "1.91", "ppl": "3.76", "wps": "364049", "ups": "3.07", "wpb": "118494", "bsz": "256", "num_updates": "926400", "lr": "7.43434e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-05 02:48:01,437][fairseq_cli.train][INFO] - end of epoch 18 (average epoch stats below) +[2022-08-05 02:48:01,438][train][INFO] - {"epoch": 18, "train_loss": "1.926", "train_ppl": "3.8", "train_wps": "360862", "train_ups": "3.05", "train_wpb": "118299", "train_bsz": "256", "train_num_updates": "926421", "train_lr": "7.43222e-05", "train_gnorm": "0.862", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16706", "train_gb_free": "20.2", "train_wall": "0"} +[2022-08-05 02:48:01,530][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-05 02:48:01,533][fairseq.trainer][INFO] - begin training epoch 19 +[2022-08-05 02:48:01,534][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-05 02:49:12,131][train_inner][INFO] - {"epoch": 19, "update": 18.003, "loss": "1.901", "ppl": "3.74", "wps": "305169", "ups": "2.59", "wpb": "117971", "bsz": "255.4", "num_updates": "926600", "lr": "7.41414e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 02:50:17,300][train_inner][INFO] - {"epoch": 19, "update": 18.007, "loss": "1.904", "ppl": "3.74", "wps": "362625", "ups": "3.07", "wpb": "118156", "bsz": "256", "num_updates": "926800", "lr": "7.39394e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 02:51:22,461][train_inner][INFO] - {"epoch": 19, "update": 18.011, "loss": "1.9", "ppl": "3.73", "wps": "362016", "ups": "3.07", "wpb": "117944", "bsz": "256", "num_updates": "927000", "lr": "7.37374e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "0"} +[2022-08-05 02:52:28,157][train_inner][INFO] - {"epoch": 19, "update": 18.015, "loss": "1.899", "ppl": "3.73", "wps": "359797", "ups": "3.04", "wpb": "118184", "bsz": "256", "num_updates": "927200", "lr": "7.35354e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 02:53:33,457][train_inner][INFO] - {"epoch": 19, "update": 18.019, "loss": "1.906", "ppl": "3.75", "wps": "360964", "ups": "3.06", "wpb": "117853", "bsz": "256", "num_updates": "927400", "lr": "7.33333e-05", "gnorm": "0.882", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 02:54:38,722][train_inner][INFO] - {"epoch": 19, "update": 18.023, "loss": "1.9", "ppl": "3.73", "wps": "363876", "ups": "3.06", "wpb": "118740", "bsz": "256", "num_updates": "927600", "lr": "7.31313e-05", "gnorm": "0.873", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 02:55:43,909][train_inner][INFO] - {"epoch": 19, "update": 18.027, "loss": "1.905", "ppl": "3.74", "wps": "362413", "ups": "3.07", "wpb": "118110", "bsz": "256", "num_updates": "927800", "lr": "7.29293e-05", "gnorm": "0.878", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 02:56:49,182][train_inner][INFO] - {"epoch": 19, "update": 18.031, "loss": "1.908", "ppl": "3.75", "wps": "363082", "ups": "3.06", "wpb": "118494", "bsz": "256", "num_updates": "928000", "lr": "7.27273e-05", "gnorm": "0.875", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "0"} +[2022-08-05 02:56:52,313][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-05 02:57:54,890][train_inner][INFO] - {"epoch": 19, "update": 18.035, "loss": "1.905", "ppl": "3.75", "wps": "360754", "ups": "3.04", "wpb": "118520", "bsz": "256", "num_updates": "928200", "lr": "7.25253e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 02:59:00,006][train_inner][INFO] - {"epoch": 19, "update": 18.038, "loss": "1.904", "ppl": "3.74", "wps": "363468", "ups": "3.07", "wpb": "118336", "bsz": "256", "num_updates": "928400", "lr": "7.23232e-05", "gnorm": "0.88", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "0"} +[2022-08-05 03:00:05,316][train_inner][INFO] - {"epoch": 19, "update": 18.042, "loss": "1.908", "ppl": "3.75", "wps": "359880", "ups": "3.06", "wpb": "117516", "bsz": "256", "num_updates": "928600", "lr": "7.21212e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 03:01:10,677][train_inner][INFO] - {"epoch": 19, "update": 18.046, "loss": "1.902", "ppl": "3.74", "wps": "363423", "ups": "3.06", "wpb": "118767", "bsz": "256", "num_updates": "928800", "lr": "7.19192e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 03:02:15,977][train_inner][INFO] - {"epoch": 19, "update": 18.05, "loss": "1.909", "ppl": "3.75", "wps": "363485", "ups": "3.06", "wpb": "118674", "bsz": "256", "num_updates": "929000", "lr": "7.17172e-05", "gnorm": "0.88", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-05 03:03:20,989][train_inner][INFO] - {"epoch": 19, "update": 18.054, "loss": "1.904", "ppl": "3.74", "wps": "363142", "ups": "3.08", "wpb": "118042", "bsz": "256", "num_updates": "929200", "lr": "7.15152e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 03:04:26,371][train_inner][INFO] - {"epoch": 19, "update": 18.058, "loss": "1.909", "ppl": "3.75", "wps": "361476", "ups": "3.06", "wpb": "118167", "bsz": "256", "num_updates": "929400", "lr": "7.13131e-05", "gnorm": "0.882", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 03:05:31,604][train_inner][INFO] - {"epoch": 19, "update": 18.062, "loss": "1.904", "ppl": "3.74", "wps": "362188", "ups": "3.07", "wpb": "118131", "bsz": "256", "num_updates": "929600", "lr": "7.11111e-05", "gnorm": "0.88", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 03:05:43,046][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 03:06:37,384][train_inner][INFO] - {"epoch": 19, "update": 18.066, "loss": "1.905", "ppl": "3.74", "wps": "358338", "ups": "3.04", "wpb": "117843", "bsz": "256", "num_updates": "929800", "lr": "7.09091e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 03:07:42,981][train_inner][INFO] - {"epoch": 19, "update": 18.07, "loss": "1.908", "ppl": "3.75", "wps": "360650", "ups": "3.05", "wpb": "118287", "bsz": "256", "num_updates": "930000", "lr": "7.07071e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 03:08:48,565][train_inner][INFO] - {"epoch": 19, "update": 18.073, "loss": "1.901", "ppl": "3.73", "wps": "361362", "ups": "3.05", "wpb": "118496", "bsz": "256", "num_updates": "930200", "lr": "7.05051e-05", "gnorm": "0.877", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 03:09:53,728][train_inner][INFO] - {"epoch": 19, "update": 18.077, "loss": "1.905", "ppl": "3.74", "wps": "362934", "ups": "3.07", "wpb": "118246", "bsz": "256", "num_updates": "930400", "lr": "7.0303e-05", "gnorm": "0.882", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25", "wall": "0"} +[2022-08-05 03:10:58,721][train_inner][INFO] - {"epoch": 19, "update": 18.081, "loss": "1.9", "ppl": "3.73", "wps": "361967", "ups": "3.08", "wpb": "117626", "bsz": "256", "num_updates": "930600", "lr": "7.0101e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 03:12:03,909][train_inner][INFO] - {"epoch": 19, "update": 18.085, "loss": "1.901", "ppl": "3.74", "wps": "364768", "ups": "3.07", "wpb": "118889", "bsz": "256", "num_updates": "930800", "lr": "6.9899e-05", "gnorm": "0.876", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-05 03:13:09,077][train_inner][INFO] - {"epoch": 19, "update": 18.089, "loss": "1.903", "ppl": "3.74", "wps": "362893", "ups": "3.07", "wpb": "118245", "bsz": "256", "num_updates": "931000", "lr": "6.9697e-05", "gnorm": "0.882", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 03:14:14,338][train_inner][INFO] - {"epoch": 19, "update": 18.093, "loss": "1.903", "ppl": "3.74", "wps": "362644", "ups": "3.06", "wpb": "118329", "bsz": "256", "num_updates": "931200", "lr": "6.94949e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 03:15:19,340][train_inner][INFO] - {"epoch": 19, "update": 18.097, "loss": "1.906", "ppl": "3.75", "wps": "363819", "ups": "3.08", "wpb": "118243", "bsz": "256", "num_updates": "931400", "lr": "6.92929e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 03:16:24,737][train_inner][INFO] - {"epoch": 19, "update": 18.101, "loss": "1.905", "ppl": "3.75", "wps": "361932", "ups": "3.06", "wpb": "118345", "bsz": "256", "num_updates": "931600", "lr": "6.90909e-05", "gnorm": "0.885", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-05 03:17:30,208][train_inner][INFO] - {"epoch": 19, "update": 18.105, "loss": "1.912", "ppl": "3.76", "wps": "361927", "ups": "3.06", "wpb": "118464", "bsz": "256", "num_updates": "931800", "lr": "6.88889e-05", "gnorm": "0.88", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-05 03:18:35,186][train_inner][INFO] - {"epoch": 19, "update": 18.108, "loss": "1.907", "ppl": "3.75", "wps": "362129", "ups": "3.08", "wpb": "117651", "bsz": "255.9", "num_updates": "932000", "lr": "6.86869e-05", "gnorm": "0.885", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 03:19:13,881][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 03:19:40,694][train_inner][INFO] - {"epoch": 19, "update": 18.112, "loss": "1.902", "ppl": "3.74", "wps": "359842", "ups": "3.05", "wpb": "117860", "bsz": "256", "num_updates": "932200", "lr": "6.84848e-05", "gnorm": "0.886", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 03:20:45,769][train_inner][INFO] - {"epoch": 19, "update": 18.116, "loss": "1.903", "ppl": "3.74", "wps": "364090", "ups": "3.07", "wpb": "118465", "bsz": "256", "num_updates": "932400", "lr": "6.82828e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 03:21:51,223][train_inner][INFO] - {"epoch": 19, "update": 18.12, "loss": "1.902", "ppl": "3.74", "wps": "360688", "ups": "3.06", "wpb": "118041", "bsz": "256", "num_updates": "932600", "lr": "6.80808e-05", "gnorm": "0.884", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 03:22:56,573][train_inner][INFO] - {"epoch": 19, "update": 18.124, "loss": "1.901", "ppl": "3.74", "wps": "361272", "ups": "3.06", "wpb": "118044", "bsz": "256", "num_updates": "932800", "lr": "6.78788e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 03:24:01,939][train_inner][INFO] - {"epoch": 19, "update": 18.128, "loss": "1.903", "ppl": "3.74", "wps": "364843", "ups": "3.06", "wpb": "119240", "bsz": "256", "num_updates": "933000", "lr": "6.76768e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 03:25:07,341][train_inner][INFO] - {"epoch": 19, "update": 18.132, "loss": "1.9", "ppl": "3.73", "wps": "363309", "ups": "3.06", "wpb": "118804", "bsz": "256", "num_updates": "933200", "lr": "6.74747e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-05 03:26:12,983][train_inner][INFO] - {"epoch": 19, "update": 18.136, "loss": "1.907", "ppl": "3.75", "wps": "359975", "ups": "3.05", "wpb": "118145", "bsz": "256", "num_updates": "933400", "lr": "6.72727e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28", "wall": "0"} +[2022-08-05 03:27:18,144][train_inner][INFO] - {"epoch": 19, "update": 18.139, "loss": "1.901", "ppl": "3.74", "wps": "362838", "ups": "3.07", "wpb": "118212", "bsz": "256", "num_updates": "933600", "lr": "6.70707e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 03:28:23,359][train_inner][INFO] - {"epoch": 19, "update": 18.143, "loss": "1.902", "ppl": "3.74", "wps": "362854", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "933800", "lr": "6.68687e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "0"} +[2022-08-05 03:29:28,648][train_inner][INFO] - {"epoch": 19, "update": 18.147, "loss": "1.906", "ppl": "3.75", "wps": "363183", "ups": "3.06", "wpb": "118557", "bsz": "256", "num_updates": "934000", "lr": "6.66667e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 03:30:33,178][train_inner][INFO] - {"epoch": 19, "update": 18.151, "loss": "1.902", "ppl": "3.74", "wps": "367301", "ups": "3.1", "wpb": "118507", "bsz": "256", "num_updates": "934200", "lr": "6.64646e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "0"} +[2022-08-05 03:31:38,210][train_inner][INFO] - {"epoch": 19, "update": 18.155, "loss": "1.903", "ppl": "3.74", "wps": "364501", "ups": "3.08", "wpb": "118519", "bsz": "256", "num_updates": "934400", "lr": "6.62626e-05", "gnorm": "0.884", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 03:31:55,491][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 03:32:28,120][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-05 03:32:44,168][train_inner][INFO] - {"epoch": 19, "update": 18.159, "loss": "1.903", "ppl": "3.74", "wps": "358583", "ups": "3.03", "wpb": "118255", "bsz": "256", "num_updates": "934600", "lr": "6.60606e-05", "gnorm": "0.884", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "19.9", "wall": "0"} +[2022-08-05 03:33:49,583][train_inner][INFO] - {"epoch": 19, "update": 18.163, "loss": "1.9", "ppl": "3.73", "wps": "361321", "ups": "3.06", "wpb": "118178", "bsz": "256", "num_updates": "934800", "lr": "6.58586e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-05 03:34:54,719][train_inner][INFO] - {"epoch": 19, "update": 18.167, "loss": "1.902", "ppl": "3.74", "wps": "363077", "ups": "3.07", "wpb": "118246", "bsz": "256", "num_updates": "935000", "lr": "6.56566e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 03:35:59,438][train_inner][INFO] - {"epoch": 19, "update": 18.171, "loss": "1.898", "ppl": "3.73", "wps": "365135", "ups": "3.09", "wpb": "118153", "bsz": "256", "num_updates": "935200", "lr": "6.54545e-05", "gnorm": "0.883", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.3", "wall": "0"} +[2022-08-05 03:37:04,940][train_inner][INFO] - {"epoch": 19, "update": 18.174, "loss": "1.901", "ppl": "3.73", "wps": "362755", "ups": "3.05", "wpb": "118804", "bsz": "256", "num_updates": "935400", "lr": "6.52525e-05", "gnorm": "0.879", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 03:38:10,312][train_inner][INFO] - {"epoch": 19, "update": 18.178, "loss": "1.898", "ppl": "3.73", "wps": "360758", "ups": "3.06", "wpb": "117916", "bsz": "256", "num_updates": "935600", "lr": "6.50505e-05", "gnorm": "0.882", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-05 03:39:15,286][train_inner][INFO] - {"epoch": 19, "update": 18.182, "loss": "1.904", "ppl": "3.74", "wps": "361062", "ups": "3.08", "wpb": "117282", "bsz": "256", "num_updates": "935800", "lr": "6.48485e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 03:40:20,193][train_inner][INFO] - {"epoch": 19, "update": 18.186, "loss": "1.902", "ppl": "3.74", "wps": "363876", "ups": "3.08", "wpb": "118088", "bsz": "256", "num_updates": "936000", "lr": "6.46465e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25", "wall": "0"} +[2022-08-05 03:41:25,361][train_inner][INFO] - {"epoch": 19, "update": 18.19, "loss": "1.904", "ppl": "3.74", "wps": "361933", "ups": "3.07", "wpb": "117932", "bsz": "256", "num_updates": "936200", "lr": "6.44444e-05", "gnorm": "0.886", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-05 03:42:30,351][train_inner][INFO] - {"epoch": 19, "update": 18.194, "loss": "1.901", "ppl": "3.73", "wps": "363406", "ups": "3.08", "wpb": "118086", "bsz": "256", "num_updates": "936400", "lr": "6.42424e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 03:43:35,977][train_inner][INFO] - {"epoch": 19, "update": 18.198, "loss": "1.904", "ppl": "3.74", "wps": "361174", "ups": "3.05", "wpb": "118510", "bsz": "256", "num_updates": "936600", "lr": "6.40404e-05", "gnorm": "0.885", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.7", "wall": "0"} +[2022-08-05 03:44:41,308][train_inner][INFO] - {"epoch": 19, "update": 18.202, "loss": "1.901", "ppl": "3.73", "wps": "363002", "ups": "3.06", "wpb": "118575", "bsz": "256", "num_updates": "936800", "lr": "6.38384e-05", "gnorm": "0.885", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 03:45:46,154][train_inner][INFO] - {"epoch": 19, "update": 18.206, "loss": "1.897", "ppl": "3.72", "wps": "363041", "ups": "3.08", "wpb": "117707", "bsz": "256", "num_updates": "937000", "lr": "6.36364e-05", "gnorm": "0.893", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "0"} +[2022-08-05 03:46:51,437][train_inner][INFO] - {"epoch": 19, "update": 18.209, "loss": "1.893", "ppl": "3.72", "wps": "364881", "ups": "3.06", "wpb": "119100", "bsz": "256", "num_updates": "937200", "lr": "6.34343e-05", "gnorm": "0.881", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 03:47:56,777][train_inner][INFO] - {"epoch": 19, "update": 18.213, "loss": "1.901", "ppl": "3.74", "wps": "362678", "ups": "3.06", "wpb": "118485", "bsz": "256", "num_updates": "937400", "lr": "6.32323e-05", "gnorm": "0.884", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "0"} +[2022-08-05 03:49:02,109][train_inner][INFO] - {"epoch": 19, "update": 18.217, "loss": "1.898", "ppl": "3.73", "wps": "361393", "ups": "3.06", "wpb": "118052", "bsz": "256", "num_updates": "937600", "lr": "6.30303e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-05 03:50:07,252][train_inner][INFO] - {"epoch": 19, "update": 18.221, "loss": "1.905", "ppl": "3.74", "wps": "362937", "ups": "3.07", "wpb": "118198", "bsz": "256", "num_updates": "937800", "lr": "6.28283e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 03:51:12,413][train_inner][INFO] - {"epoch": 19, "update": 18.225, "loss": "1.896", "ppl": "3.72", "wps": "362571", "ups": "3.07", "wpb": "118126", "bsz": "256", "num_updates": "938000", "lr": "6.26263e-05", "gnorm": "0.885", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.5", "wall": "0"} +[2022-08-05 03:52:17,736][train_inner][INFO] - {"epoch": 19, "update": 18.229, "loss": "1.902", "ppl": "3.74", "wps": "362683", "ups": "3.06", "wpb": "118456", "bsz": "256", "num_updates": "938200", "lr": "6.24242e-05", "gnorm": "0.886", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 03:53:23,001][train_inner][INFO] - {"epoch": 19, "update": 18.233, "loss": "1.901", "ppl": "3.74", "wps": "364478", "ups": "3.06", "wpb": "118937", "bsz": "256", "num_updates": "938400", "lr": "6.22222e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 03:54:28,077][train_inner][INFO] - {"epoch": 19, "update": 18.237, "loss": "1.905", "ppl": "3.75", "wps": "362716", "ups": "3.07", "wpb": "118019", "bsz": "256", "num_updates": "938600", "lr": "6.20202e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-05 03:55:33,559][train_inner][INFO] - {"epoch": 19, "update": 18.241, "loss": "1.899", "ppl": "3.73", "wps": "361600", "ups": "3.05", "wpb": "118388", "bsz": "256", "num_updates": "938800", "lr": "6.18182e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 03:55:46,668][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 03:56:39,291][train_inner][INFO] - {"epoch": 19, "update": 18.244, "loss": "1.893", "ppl": "3.71", "wps": "362757", "ups": "3.04", "wpb": "119223", "bsz": "256", "num_updates": "939000", "lr": "6.16162e-05", "gnorm": "0.882", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 03:57:44,630][train_inner][INFO] - {"epoch": 19, "update": 18.248, "loss": "1.894", "ppl": "3.72", "wps": "364420", "ups": "3.06", "wpb": "119053", "bsz": "256", "num_updates": "939200", "lr": "6.14141e-05", "gnorm": "0.884", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-05 03:58:50,159][train_inner][INFO] - {"epoch": 19, "update": 18.252, "loss": "1.897", "ppl": "3.72", "wps": "361022", "ups": "3.05", "wpb": "118282", "bsz": "256", "num_updates": "939400", "lr": "6.12121e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 03:59:55,304][train_inner][INFO] - {"epoch": 19, "update": 18.256, "loss": "1.903", "ppl": "3.74", "wps": "363932", "ups": "3.07", "wpb": "118541", "bsz": "256", "num_updates": "939600", "lr": "6.10101e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 04:01:00,582][train_inner][INFO] - {"epoch": 19, "update": 18.26, "loss": "1.9", "ppl": "3.73", "wps": "362926", "ups": "3.06", "wpb": "118436", "bsz": "256", "num_updates": "939800", "lr": "6.08081e-05", "gnorm": "0.885", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-05 04:02:05,689][train_inner][INFO] - {"epoch": 19, "update": 18.264, "loss": "1.903", "ppl": "3.74", "wps": "363058", "ups": "3.07", "wpb": "118186", "bsz": "256", "num_updates": "940000", "lr": "6.06061e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 04:03:10,698][train_inner][INFO] - {"epoch": 19, "update": 18.268, "loss": "1.895", "ppl": "3.72", "wps": "364351", "ups": "3.08", "wpb": "118429", "bsz": "256", "num_updates": "940200", "lr": "6.0404e-05", "gnorm": "0.885", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.7", "wall": "0"} +[2022-08-05 04:04:15,755][train_inner][INFO] - {"epoch": 19, "update": 18.272, "loss": "1.906", "ppl": "3.75", "wps": "363574", "ups": "3.07", "wpb": "118262", "bsz": "256", "num_updates": "940400", "lr": "6.0202e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 04:05:21,144][train_inner][INFO] - {"epoch": 19, "update": 18.276, "loss": "1.896", "ppl": "3.72", "wps": "360580", "ups": "3.06", "wpb": "117889", "bsz": "256", "num_updates": "940600", "lr": "6e-05", "gnorm": "0.89", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 04:06:26,496][train_inner][INFO] - {"epoch": 19, "update": 18.279, "loss": "1.9", "ppl": "3.73", "wps": "360409", "ups": "3.06", "wpb": "117765", "bsz": "256", "num_updates": "940800", "lr": "5.9798e-05", "gnorm": "0.89", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-05 04:07:31,648][train_inner][INFO] - {"epoch": 19, "update": 18.283, "loss": "1.9", "ppl": "3.73", "wps": "361683", "ups": "3.07", "wpb": "117819", "bsz": "256", "num_updates": "941000", "lr": "5.9596e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.7", "wall": "0"} +[2022-08-05 04:08:36,930][train_inner][INFO] - {"epoch": 19, "update": 18.287, "loss": "1.903", "ppl": "3.74", "wps": "361989", "ups": "3.06", "wpb": "118155", "bsz": "256", "num_updates": "941200", "lr": "5.93939e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-05 04:08:46,382][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 04:09:42,688][train_inner][INFO] - {"epoch": 19, "update": 18.291, "loss": "1.901", "ppl": "3.73", "wps": "359927", "ups": "3.04", "wpb": "118338", "bsz": "256", "num_updates": "941400", "lr": "5.91919e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 04:10:48,008][train_inner][INFO] - {"epoch": 19, "update": 18.295, "loss": "1.897", "ppl": "3.73", "wps": "362097", "ups": "3.06", "wpb": "118260", "bsz": "256", "num_updates": "941600", "lr": "5.89899e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 04:11:53,499][train_inner][INFO] - {"epoch": 19, "update": 18.299, "loss": "1.9", "ppl": "3.73", "wps": "361846", "ups": "3.05", "wpb": "118473", "bsz": "256", "num_updates": "941800", "lr": "5.87879e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 04:12:58,594][train_inner][INFO] - {"epoch": 19, "update": 18.303, "loss": "1.893", "ppl": "3.71", "wps": "361882", "ups": "3.07", "wpb": "117782", "bsz": "256", "num_updates": "942000", "lr": "5.85859e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 04:14:03,527][train_inner][INFO] - {"epoch": 19, "update": 18.307, "loss": "1.894", "ppl": "3.72", "wps": "365383", "ups": "3.08", "wpb": "118626", "bsz": "256", "num_updates": "942200", "lr": "5.83838e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 04:15:08,396][train_inner][INFO] - {"epoch": 19, "update": 18.31, "loss": "1.898", "ppl": "3.73", "wps": "365736", "ups": "3.08", "wpb": "118623", "bsz": "256", "num_updates": "942400", "lr": "5.81818e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 04:16:13,617][train_inner][INFO] - {"epoch": 19, "update": 18.314, "loss": "1.9", "ppl": "3.73", "wps": "363761", "ups": "3.07", "wpb": "118622", "bsz": "256", "num_updates": "942600", "lr": "5.79798e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-05 04:17:19,233][train_inner][INFO] - {"epoch": 19, "update": 18.318, "loss": "1.894", "ppl": "3.72", "wps": "360904", "ups": "3.05", "wpb": "118404", "bsz": "256", "num_updates": "942800", "lr": "5.77778e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "0"} +[2022-08-05 04:18:24,313][train_inner][INFO] - {"epoch": 19, "update": 18.322, "loss": "1.901", "ppl": "3.73", "wps": "362148", "ups": "3.07", "wpb": "117841", "bsz": "256", "num_updates": "943000", "lr": "5.75758e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 04:19:29,564][train_inner][INFO] - {"epoch": 19, "update": 18.326, "loss": "1.899", "ppl": "3.73", "wps": "359018", "ups": "3.07", "wpb": "117129", "bsz": "256", "num_updates": "943200", "lr": "5.73737e-05", "gnorm": "0.897", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 04:20:34,615][train_inner][INFO] - {"epoch": 19, "update": 18.33, "loss": "1.895", "ppl": "3.72", "wps": "362560", "ups": "3.07", "wpb": "117924", "bsz": "256", "num_updates": "943400", "lr": "5.71717e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 04:21:39,816][train_inner][INFO] - {"epoch": 19, "update": 18.334, "loss": "1.9", "ppl": "3.73", "wps": "363906", "ups": "3.07", "wpb": "118632", "bsz": "256", "num_updates": "943600", "lr": "5.69697e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 04:21:48,990][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 04:22:44,999][train_inner][INFO] - {"epoch": 19, "update": 18.338, "loss": "1.898", "ppl": "3.73", "wps": "361965", "ups": "3.07", "wpb": "117956", "bsz": "256", "num_updates": "943800", "lr": "5.67677e-05", "gnorm": "0.894", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "0"} +[2022-08-05 04:23:49,836][train_inner][INFO] - {"epoch": 19, "update": 18.342, "loss": "1.9", "ppl": "3.73", "wps": "362691", "ups": "3.08", "wpb": "117578", "bsz": "256", "num_updates": "944000", "lr": "5.65657e-05", "gnorm": "0.89", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "0"} +[2022-08-05 04:24:55,396][train_inner][INFO] - {"epoch": 19, "update": 18.345, "loss": "1.892", "ppl": "3.71", "wps": "360250", "ups": "3.05", "wpb": "118087", "bsz": "256", "num_updates": "944200", "lr": "5.63636e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 04:26:01,030][train_inner][INFO] - {"epoch": 19, "update": 18.349, "loss": "1.893", "ppl": "3.72", "wps": "361281", "ups": "3.05", "wpb": "118561", "bsz": "256", "num_updates": "944400", "lr": "5.61616e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 04:27:06,637][train_inner][INFO] - {"epoch": 19, "update": 18.353, "loss": "1.901", "ppl": "3.73", "wps": "360698", "ups": "3.05", "wpb": "118318", "bsz": "256", "num_updates": "944600", "lr": "5.59596e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 04:28:11,514][train_inner][INFO] - {"epoch": 19, "update": 18.357, "loss": "1.9", "ppl": "3.73", "wps": "363608", "ups": "3.08", "wpb": "117948", "bsz": "256", "num_updates": "944800", "lr": "5.57576e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 04:29:16,333][train_inner][INFO] - {"epoch": 19, "update": 18.361, "loss": "1.895", "ppl": "3.72", "wps": "365073", "ups": "3.09", "wpb": "118316", "bsz": "256", "num_updates": "945000", "lr": "5.55556e-05", "gnorm": "0.887", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "0"} +[2022-08-05 04:30:21,195][train_inner][INFO] - {"epoch": 19, "update": 18.365, "loss": "1.901", "ppl": "3.74", "wps": "363643", "ups": "3.08", "wpb": "117931", "bsz": "256", "num_updates": "945200", "lr": "5.53535e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.3", "wall": "0"} +[2022-08-05 04:31:26,654][train_inner][INFO] - {"epoch": 19, "update": 18.369, "loss": "1.897", "ppl": "3.72", "wps": "360146", "ups": "3.06", "wpb": "117871", "bsz": "256", "num_updates": "945400", "lr": "5.51515e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 04:32:31,958][train_inner][INFO] - {"epoch": 19, "update": 18.373, "loss": "1.896", "ppl": "3.72", "wps": "361058", "ups": "3.06", "wpb": "117890", "bsz": "256", "num_updates": "945600", "lr": "5.49495e-05", "gnorm": "0.894", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 04:33:37,234][train_inner][INFO] - {"epoch": 19, "update": 18.377, "loss": "1.891", "ppl": "3.71", "wps": "363177", "ups": "3.06", "wpb": "118520", "bsz": "256", "num_updates": "945800", "lr": "5.47475e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 04:34:42,477][train_inner][INFO] - {"epoch": 19, "update": 18.38, "loss": "1.895", "ppl": "3.72", "wps": "363024", "ups": "3.07", "wpb": "118421", "bsz": "256", "num_updates": "946000", "lr": "5.45455e-05", "gnorm": "0.89", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 04:35:30,172][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 04:35:47,772][train_inner][INFO] - {"epoch": 19, "update": 18.384, "loss": "1.895", "ppl": "3.72", "wps": "362366", "ups": "3.06", "wpb": "118303", "bsz": "255.9", "num_updates": "946200", "lr": "5.43434e-05", "gnorm": "0.893", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-05 04:36:52,670][train_inner][INFO] - {"epoch": 19, "update": 18.388, "loss": "1.901", "ppl": "3.73", "wps": "363393", "ups": "3.08", "wpb": "117916", "bsz": "256", "num_updates": "946400", "lr": "5.41414e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 04:37:57,988][train_inner][INFO] - {"epoch": 19, "update": 18.392, "loss": "1.89", "ppl": "3.71", "wps": "364070", "ups": "3.06", "wpb": "118900", "bsz": "256", "num_updates": "946600", "lr": "5.39394e-05", "gnorm": "0.886", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 04:39:02,853][train_inner][INFO] - {"epoch": 19, "update": 18.396, "loss": "1.894", "ppl": "3.72", "wps": "364150", "ups": "3.08", "wpb": "118100", "bsz": "256", "num_updates": "946800", "lr": "5.37374e-05", "gnorm": "0.894", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 04:40:08,008][train_inner][INFO] - {"epoch": 19, "update": 18.4, "loss": "1.889", "ppl": "3.7", "wps": "363454", "ups": "3.07", "wpb": "118402", "bsz": "256", "num_updates": "947000", "lr": "5.35354e-05", "gnorm": "0.886", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 04:41:12,811][train_inner][INFO] - {"epoch": 19, "update": 18.404, "loss": "1.896", "ppl": "3.72", "wps": "363684", "ups": "3.09", "wpb": "117837", "bsz": "256", "num_updates": "947200", "lr": "5.33333e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.7", "wall": "0"} +[2022-08-05 04:42:18,162][train_inner][INFO] - {"epoch": 19, "update": 18.408, "loss": "1.887", "ppl": "3.7", "wps": "360827", "ups": "3.06", "wpb": "117899", "bsz": "256", "num_updates": "947400", "lr": "5.31313e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 04:43:23,464][train_inner][INFO] - {"epoch": 19, "update": 18.412, "loss": "1.894", "ppl": "3.72", "wps": "361896", "ups": "3.06", "wpb": "118161", "bsz": "256", "num_updates": "947600", "lr": "5.29293e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 04:44:29,089][train_inner][INFO] - {"epoch": 19, "update": 18.415, "loss": "1.893", "ppl": "3.71", "wps": "362239", "ups": "3.05", "wpb": "118843", "bsz": "256", "num_updates": "947800", "lr": "5.27273e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 04:45:33,954][train_inner][INFO] - {"epoch": 19, "update": 18.419, "loss": "1.895", "ppl": "3.72", "wps": "365712", "ups": "3.08", "wpb": "118608", "bsz": "256", "num_updates": "948000", "lr": "5.25253e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25", "wall": "0"} +[2022-08-05 04:46:39,307][train_inner][INFO] - {"epoch": 19, "update": 18.423, "loss": "1.894", "ppl": "3.72", "wps": "360478", "ups": "3.06", "wpb": "117790", "bsz": "256", "num_updates": "948200", "lr": "5.23232e-05", "gnorm": "0.907", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 04:47:44,660][train_inner][INFO] - {"epoch": 19, "update": 18.427, "loss": "1.89", "ppl": "3.71", "wps": "360670", "ups": "3.06", "wpb": "117851", "bsz": "256", "num_updates": "948400", "lr": "5.21212e-05", "gnorm": "0.894", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 04:48:49,865][train_inner][INFO] - {"epoch": 19, "update": 18.431, "loss": "1.894", "ppl": "3.72", "wps": "363267", "ups": "3.07", "wpb": "118432", "bsz": "256", "num_updates": "948600", "lr": "5.19192e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-05 04:49:55,235][train_inner][INFO] - {"epoch": 19, "update": 18.435, "loss": "1.887", "ppl": "3.7", "wps": "362530", "ups": "3.06", "wpb": "118492", "bsz": "256", "num_updates": "948800", "lr": "5.17172e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 04:50:59,900][train_inner][INFO] - {"epoch": 19, "update": 18.439, "loss": "1.891", "ppl": "3.71", "wps": "365326", "ups": "3.09", "wpb": "118116", "bsz": "256", "num_updates": "949000", "lr": "5.15152e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "0"} +[2022-08-05 04:52:05,125][train_inner][INFO] - {"epoch": 19, "update": 18.443, "loss": "1.889", "ppl": "3.7", "wps": "363556", "ups": "3.07", "wpb": "118562", "bsz": "256", "num_updates": "949200", "lr": "5.13131e-05", "gnorm": "0.888", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 04:52:39,175][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 04:53:10,501][train_inner][INFO] - {"epoch": 19, "update": 18.446, "loss": "1.89", "ppl": "3.71", "wps": "361091", "ups": "3.06", "wpb": "118032", "bsz": "256", "num_updates": "949400", "lr": "5.11111e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-05 04:54:15,937][train_inner][INFO] - {"epoch": 19, "update": 18.45, "loss": "1.892", "ppl": "3.71", "wps": "362649", "ups": "3.06", "wpb": "118650", "bsz": "256", "num_updates": "949600", "lr": "5.09091e-05", "gnorm": "0.89", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 04:55:21,180][train_inner][INFO] - {"epoch": 19, "update": 18.454, "loss": "1.894", "ppl": "3.72", "wps": "361946", "ups": "3.07", "wpb": "118056", "bsz": "256", "num_updates": "949800", "lr": "5.07071e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 04:56:26,057][train_inner][INFO] - {"epoch": 19, "update": 18.458, "loss": "1.897", "ppl": "3.72", "wps": "365659", "ups": "3.08", "wpb": "118613", "bsz": "256", "num_updates": "950000", "lr": "5.05051e-05", "gnorm": "0.894", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 04:56:26,059][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-05 04:56:49,007][valid][INFO] - {"epoch": 19, "valid_loss": "1.806", "valid_ppl": "3.5", "valid_wps": "1.59658e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "950000", "valid_best_loss": "1.806"} +[2022-08-05 04:56:49,010][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 19 @ 950000 updates +[2022-08-05 04:56:49,012][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_19_950000.pt +[2022-08-05 04:56:58,610][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_19_950000.pt +[2022-08-05 04:57:29,482][fairseq.checkpoint_utils][INFO] - Saved checkpoint /vit-opt/fairseq/bert/adan2/checkpoint_19_950000.pt (epoch 19 @ 950000 updates, score 1.806) (writing took 40.47174836200429 seconds) +[2022-08-05 04:58:34,643][train_inner][INFO] - {"epoch": 19, "update": 18.462, "loss": "1.888", "ppl": "3.7", "wps": "184614", "ups": "1.56", "wpb": "118693", "bsz": "256", "num_updates": "950200", "lr": "5.0303e-05", "gnorm": "0.889", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-05 04:59:40,044][train_inner][INFO] - {"epoch": 19, "update": 18.466, "loss": "1.896", "ppl": "3.72", "wps": "359494", "ups": "3.06", "wpb": "117554", "bsz": "256", "num_updates": "950400", "lr": "5.0101e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 05:00:45,426][train_inner][INFO] - {"epoch": 19, "update": 18.47, "loss": "1.891", "ppl": "3.71", "wps": "363027", "ups": "3.06", "wpb": "118674", "bsz": "256", "num_updates": "950600", "lr": "4.9899e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "0"} +[2022-08-05 05:01:50,428][train_inner][INFO] - {"epoch": 19, "update": 18.474, "loss": "1.897", "ppl": "3.72", "wps": "364185", "ups": "3.08", "wpb": "118362", "bsz": "256", "num_updates": "950800", "lr": "4.9697e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.7", "wall": "0"} +[2022-08-05 05:02:55,649][train_inner][INFO] - {"epoch": 19, "update": 18.478, "loss": "1.892", "ppl": "3.71", "wps": "362572", "ups": "3.07", "wpb": "118234", "bsz": "256", "num_updates": "951000", "lr": "4.94949e-05", "gnorm": "0.897", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 05:04:00,931][train_inner][INFO] - {"epoch": 19, "update": 18.481, "loss": "1.886", "ppl": "3.7", "wps": "361856", "ups": "3.06", "wpb": "118112", "bsz": "256", "num_updates": "951200", "lr": "4.92929e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 05:05:06,516][train_inner][INFO] - {"epoch": 19, "update": 18.485, "loss": "1.887", "ppl": "3.7", "wps": "362477", "ups": "3.05", "wpb": "118862", "bsz": "256", "num_updates": "951400", "lr": "4.90909e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 05:06:11,651][train_inner][INFO] - {"epoch": 19, "update": 18.489, "loss": "1.893", "ppl": "3.71", "wps": "362324", "ups": "3.07", "wpb": "117985", "bsz": "256", "num_updates": "951600", "lr": "4.88889e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 05:07:16,672][train_inner][INFO] - {"epoch": 19, "update": 18.493, "loss": "1.896", "ppl": "3.72", "wps": "362707", "ups": "3.08", "wpb": "117917", "bsz": "256", "num_updates": "951800", "lr": "4.86869e-05", "gnorm": "0.897", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 05:08:21,417][train_inner][INFO] - {"epoch": 19, "update": 18.497, "loss": "1.895", "ppl": "3.72", "wps": "363799", "ups": "3.09", "wpb": "117769", "bsz": "256", "num_updates": "952000", "lr": "4.84848e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "0"} +[2022-08-05 05:09:26,535][train_inner][INFO] - {"epoch": 19, "update": 18.501, "loss": "1.888", "ppl": "3.7", "wps": "364948", "ups": "3.07", "wpb": "118821", "bsz": "256", "num_updates": "952200", "lr": "4.82828e-05", "gnorm": "0.89", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-05 05:10:31,561][train_inner][INFO] - {"epoch": 19, "update": 18.505, "loss": "1.892", "ppl": "3.71", "wps": "362654", "ups": "3.08", "wpb": "117908", "bsz": "256", "num_updates": "952400", "lr": "4.80808e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 05:11:36,997][train_inner][INFO] - {"epoch": 19, "update": 18.509, "loss": "1.89", "ppl": "3.71", "wps": "362117", "ups": "3.06", "wpb": "118474", "bsz": "256", "num_updates": "952600", "lr": "4.78788e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 05:12:42,389][train_inner][INFO] - {"epoch": 19, "update": 18.513, "loss": "1.892", "ppl": "3.71", "wps": "361056", "ups": "3.06", "wpb": "118050", "bsz": "256", "num_updates": "952800", "lr": "4.76768e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "0"} +[2022-08-05 05:13:47,511][train_inner][INFO] - {"epoch": 19, "update": 18.516, "loss": "1.891", "ppl": "3.71", "wps": "364987", "ups": "3.07", "wpb": "118842", "bsz": "256", "num_updates": "953000", "lr": "4.74747e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 05:14:52,610][train_inner][INFO] - {"epoch": 19, "update": 18.52, "loss": "1.889", "ppl": "3.7", "wps": "362479", "ups": "3.07", "wpb": "117983", "bsz": "256", "num_updates": "953200", "lr": "4.72727e-05", "gnorm": "0.902", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 05:15:57,919][train_inner][INFO] - {"epoch": 19, "update": 18.524, "loss": "1.89", "ppl": "3.71", "wps": "362590", "ups": "3.06", "wpb": "118399", "bsz": "256", "num_updates": "953400", "lr": "4.70707e-05", "gnorm": "0.894", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 05:15:59,524][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-05 05:16:35,364][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 05:17:03,594][train_inner][INFO] - {"epoch": 19, "update": 18.528, "loss": "1.891", "ppl": "3.71", "wps": "360523", "ups": "3.05", "wpb": "118372", "bsz": "256", "num_updates": "953600", "lr": "4.68687e-05", "gnorm": "0.897", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 05:18:09,224][train_inner][INFO] - {"epoch": 19, "update": 18.532, "loss": "1.887", "ppl": "3.7", "wps": "361381", "ups": "3.05", "wpb": "118587", "bsz": "256", "num_updates": "953800", "lr": "4.66667e-05", "gnorm": "0.893", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-05 05:19:14,521][train_inner][INFO] - {"epoch": 19, "update": 18.536, "loss": "1.891", "ppl": "3.71", "wps": "362107", "ups": "3.06", "wpb": "118220", "bsz": "256", "num_updates": "954000", "lr": "4.64646e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 05:20:19,769][train_inner][INFO] - {"epoch": 19, "update": 18.54, "loss": "1.892", "ppl": "3.71", "wps": "362968", "ups": "3.07", "wpb": "118412", "bsz": "256", "num_updates": "954200", "lr": "4.62626e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 05:21:24,965][train_inner][INFO] - {"epoch": 19, "update": 18.544, "loss": "1.885", "ppl": "3.69", "wps": "363366", "ups": "3.07", "wpb": "118449", "bsz": "256", "num_updates": "954400", "lr": "4.60606e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 05:22:30,317][train_inner][INFO] - {"epoch": 19, "update": 18.548, "loss": "1.89", "ppl": "3.71", "wps": "363770", "ups": "3.06", "wpb": "118863", "bsz": "256", "num_updates": "954600", "lr": "4.58586e-05", "gnorm": "0.892", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 05:23:35,616][train_inner][INFO] - {"epoch": 19, "update": 18.551, "loss": "1.891", "ppl": "3.71", "wps": "361541", "ups": "3.06", "wpb": "118039", "bsz": "256", "num_updates": "954800", "lr": "4.56566e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 05:24:40,984][train_inner][INFO] - {"epoch": 19, "update": 18.555, "loss": "1.886", "ppl": "3.7", "wps": "361653", "ups": "3.06", "wpb": "118201", "bsz": "256", "num_updates": "955000", "lr": "4.54545e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 05:25:45,924][train_inner][INFO] - {"epoch": 19, "update": 18.559, "loss": "1.888", "ppl": "3.7", "wps": "363905", "ups": "3.08", "wpb": "118157", "bsz": "256", "num_updates": "955200", "lr": "4.52525e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "0"} +[2022-08-05 05:26:51,203][train_inner][INFO] - {"epoch": 19, "update": 18.563, "loss": "1.894", "ppl": "3.72", "wps": "360688", "ups": "3.06", "wpb": "117725", "bsz": "256", "num_updates": "955400", "lr": "4.50505e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "0"} +[2022-08-05 05:27:56,655][train_inner][INFO] - {"epoch": 19, "update": 18.567, "loss": "1.889", "ppl": "3.7", "wps": "363014", "ups": "3.06", "wpb": "118783", "bsz": "256", "num_updates": "955600", "lr": "4.48485e-05", "gnorm": "0.893", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-05 05:29:02,545][train_inner][INFO] - {"epoch": 19, "update": 18.571, "loss": "1.888", "ppl": "3.7", "wps": "361223", "ups": "3.04", "wpb": "119004", "bsz": "256", "num_updates": "955800", "lr": "4.46465e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "22.8", "wall": "0"} +[2022-08-05 05:30:07,855][train_inner][INFO] - {"epoch": 19, "update": 18.575, "loss": "1.888", "ppl": "3.7", "wps": "363908", "ups": "3.06", "wpb": "118832", "bsz": "256", "num_updates": "956000", "lr": "4.44444e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-05 05:31:13,100][train_inner][INFO] - {"epoch": 19, "update": 18.579, "loss": "1.894", "ppl": "3.72", "wps": "362182", "ups": "3.07", "wpb": "118150", "bsz": "256", "num_updates": "956200", "lr": "4.42424e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 05:31:59,841][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 05:32:18,724][train_inner][INFO] - {"epoch": 19, "update": 18.583, "loss": "1.881", "ppl": "3.68", "wps": "362763", "ups": "3.05", "wpb": "119028", "bsz": "256", "num_updates": "956400", "lr": "4.40404e-05", "gnorm": "0.891", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 05:33:24,357][train_inner][INFO] - {"epoch": 19, "update": 18.586, "loss": "1.883", "ppl": "3.69", "wps": "360397", "ups": "3.05", "wpb": "118269", "bsz": "256", "num_updates": "956600", "lr": "4.38384e-05", "gnorm": "0.897", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 05:34:29,632][train_inner][INFO] - {"epoch": 19, "update": 18.59, "loss": "1.885", "ppl": "3.69", "wps": "362232", "ups": "3.06", "wpb": "118222", "bsz": "256", "num_updates": "956800", "lr": "4.36364e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-05 05:35:34,731][train_inner][INFO] - {"epoch": 19, "update": 18.594, "loss": "1.89", "ppl": "3.71", "wps": "362586", "ups": "3.07", "wpb": "118017", "bsz": "256", "num_updates": "957000", "lr": "4.34343e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "0"} +[2022-08-05 05:36:39,365][train_inner][INFO] - {"epoch": 19, "update": 18.598, "loss": "1.891", "ppl": "3.71", "wps": "364208", "ups": "3.09", "wpb": "117699", "bsz": "256", "num_updates": "957200", "lr": "4.32323e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "0"} +[2022-08-05 05:37:44,678][train_inner][INFO] - {"epoch": 19, "update": 18.602, "loss": "1.885", "ppl": "3.69", "wps": "362945", "ups": "3.06", "wpb": "118523", "bsz": "256", "num_updates": "957400", "lr": "4.30303e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 05:38:50,516][train_inner][INFO] - {"epoch": 19, "update": 18.606, "loss": "1.886", "ppl": "3.7", "wps": "359911", "ups": "3.04", "wpb": "118465", "bsz": "256", "num_updates": "957600", "lr": "4.28283e-05", "gnorm": "0.894", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.7", "wall": "0"} +[2022-08-05 05:39:56,408][train_inner][INFO] - {"epoch": 19, "update": 18.61, "loss": "1.882", "ppl": "3.69", "wps": "359609", "ups": "3.04", "wpb": "118475", "bsz": "256", "num_updates": "957800", "lr": "4.26263e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "27.7", "wall": "0"} +[2022-08-05 05:41:01,698][train_inner][INFO] - {"epoch": 19, "update": 18.614, "loss": "1.882", "ppl": "3.69", "wps": "362749", "ups": "3.06", "wpb": "118416", "bsz": "256", "num_updates": "958000", "lr": "4.24242e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 05:42:06,951][train_inner][INFO] - {"epoch": 19, "update": 18.617, "loss": "1.892", "ppl": "3.71", "wps": "361976", "ups": "3.07", "wpb": "118099", "bsz": "256", "num_updates": "958200", "lr": "4.22222e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 05:43:11,841][train_inner][INFO] - {"epoch": 19, "update": 18.621, "loss": "1.889", "ppl": "3.7", "wps": "363619", "ups": "3.08", "wpb": "117974", "bsz": "256", "num_updates": "958400", "lr": "4.20202e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 05:44:17,036][train_inner][INFO] - {"epoch": 19, "update": 18.625, "loss": "1.885", "ppl": "3.69", "wps": "361930", "ups": "3.07", "wpb": "117977", "bsz": "256", "num_updates": "958600", "lr": "4.18182e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-05 05:45:22,366][train_inner][INFO] - {"epoch": 19, "update": 18.629, "loss": "1.895", "ppl": "3.72", "wps": "363200", "ups": "3.06", "wpb": "118637", "bsz": "256", "num_updates": "958800", "lr": "4.16162e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 05:46:27,595][train_inner][INFO] - {"epoch": 19, "update": 18.633, "loss": "1.885", "ppl": "3.69", "wps": "362923", "ups": "3.07", "wpb": "118363", "bsz": "256", "num_updates": "959000", "lr": "4.14141e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-05 05:47:32,800][train_inner][INFO] - {"epoch": 19, "update": 18.637, "loss": "1.887", "ppl": "3.7", "wps": "362702", "ups": "3.07", "wpb": "118248", "bsz": "256", "num_updates": "959200", "lr": "4.12121e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 05:48:37,992][train_inner][INFO] - {"epoch": 19, "update": 18.641, "loss": "1.883", "ppl": "3.69", "wps": "363594", "ups": "3.07", "wpb": "118515", "bsz": "256", "num_updates": "959400", "lr": "4.10101e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-05 05:49:43,194][train_inner][INFO] - {"epoch": 19, "update": 18.645, "loss": "1.886", "ppl": "3.7", "wps": "361777", "ups": "3.07", "wpb": "117921", "bsz": "256", "num_updates": "959600", "lr": "4.08081e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 05:49:47,354][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 05:50:48,714][train_inner][INFO] - {"epoch": 19, "update": 18.649, "loss": "1.885", "ppl": "3.69", "wps": "361053", "ups": "3.05", "wpb": "118279", "bsz": "256", "num_updates": "959800", "lr": "4.06061e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-05 05:51:53,843][train_inner][INFO] - {"epoch": 19, "update": 18.652, "loss": "1.884", "ppl": "3.69", "wps": "362527", "ups": "3.07", "wpb": "118053", "bsz": "256", "num_updates": "960000", "lr": "4.0404e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 05:52:59,068][train_inner][INFO] - {"epoch": 19, "update": 18.656, "loss": "1.882", "ppl": "3.69", "wps": "364079", "ups": "3.07", "wpb": "118733", "bsz": "256", "num_updates": "960200", "lr": "4.0202e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 05:54:04,572][train_inner][INFO] - {"epoch": 19, "update": 18.66, "loss": "1.886", "ppl": "3.7", "wps": "360491", "ups": "3.05", "wpb": "118066", "bsz": "256", "num_updates": "960400", "lr": "4e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.8", "wall": "0"} +[2022-08-05 05:55:09,866][train_inner][INFO] - {"epoch": 19, "update": 18.664, "loss": "1.887", "ppl": "3.7", "wps": "361987", "ups": "3.06", "wpb": "118176", "bsz": "256", "num_updates": "960600", "lr": "3.9798e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "0"} +[2022-08-05 05:56:15,292][train_inner][INFO] - {"epoch": 19, "update": 18.668, "loss": "1.888", "ppl": "3.7", "wps": "362265", "ups": "3.06", "wpb": "118506", "bsz": "256", "num_updates": "960800", "lr": "3.9596e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-05 05:57:20,658][train_inner][INFO] - {"epoch": 19, "update": 18.672, "loss": "1.885", "ppl": "3.69", "wps": "361436", "ups": "3.06", "wpb": "118127", "bsz": "256", "num_updates": "961000", "lr": "3.93939e-05", "gnorm": "0.895", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 05:58:25,971][train_inner][INFO] - {"epoch": 19, "update": 18.676, "loss": "1.88", "ppl": "3.68", "wps": "362666", "ups": "3.06", "wpb": "118431", "bsz": "256", "num_updates": "961200", "lr": "3.91919e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 05:59:31,361][train_inner][INFO] - {"epoch": 19, "update": 18.68, "loss": "1.884", "ppl": "3.69", "wps": "360909", "ups": "3.06", "wpb": "117998", "bsz": "256", "num_updates": "961400", "lr": "3.89899e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 06:00:36,221][train_inner][INFO] - {"epoch": 19, "update": 18.684, "loss": "1.887", "ppl": "3.7", "wps": "364614", "ups": "3.08", "wpb": "118228", "bsz": "256", "num_updates": "961600", "lr": "3.87879e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "0"} +[2022-08-05 06:01:41,856][train_inner][INFO] - {"epoch": 19, "update": 18.687, "loss": "1.881", "ppl": "3.68", "wps": "361421", "ups": "3.05", "wpb": "118608", "bsz": "256", "num_updates": "961800", "lr": "3.85859e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 06:02:46,905][train_inner][INFO] - {"epoch": 19, "update": 18.691, "loss": "1.886", "ppl": "3.7", "wps": "363581", "ups": "3.07", "wpb": "118250", "bsz": "256", "num_updates": "962000", "lr": "3.83838e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-05 06:03:52,329][train_inner][INFO] - {"epoch": 19, "update": 18.695, "loss": "1.885", "ppl": "3.69", "wps": "361049", "ups": "3.06", "wpb": "118104", "bsz": "256", "num_updates": "962200", "lr": "3.81818e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 06:04:57,554][train_inner][INFO] - {"epoch": 19, "update": 18.699, "loss": "1.885", "ppl": "3.69", "wps": "362518", "ups": "3.07", "wpb": "118225", "bsz": "256", "num_updates": "962400", "lr": "3.79798e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 06:05:36,111][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 06:06:03,526][train_inner][INFO] - {"epoch": 19, "update": 18.703, "loss": "1.883", "ppl": "3.69", "wps": "357958", "ups": "3.03", "wpb": "118074", "bsz": "256", "num_updates": "962600", "lr": "3.77778e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21", "wall": "0"} +[2022-08-05 06:07:08,646][train_inner][INFO] - {"epoch": 19, "update": 18.707, "loss": "1.882", "ppl": "3.68", "wps": "364034", "ups": "3.07", "wpb": "118530", "bsz": "256", "num_updates": "962800", "lr": "3.75758e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 06:08:13,890][train_inner][INFO] - {"epoch": 19, "update": 18.711, "loss": "1.883", "ppl": "3.69", "wps": "362328", "ups": "3.07", "wpb": "118195", "bsz": "256", "num_updates": "963000", "lr": "3.73737e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 06:09:19,260][train_inner][INFO] - {"epoch": 19, "update": 18.715, "loss": "1.889", "ppl": "3.7", "wps": "361600", "ups": "3.06", "wpb": "118187", "bsz": "256", "num_updates": "963200", "lr": "3.71717e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 06:10:24,761][train_inner][INFO] - {"epoch": 19, "update": 18.718, "loss": "1.881", "ppl": "3.68", "wps": "362062", "ups": "3.05", "wpb": "118576", "bsz": "256", "num_updates": "963400", "lr": "3.69697e-05", "gnorm": "0.893", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 06:11:29,926][train_inner][INFO] - {"epoch": 19, "update": 18.722, "loss": "1.886", "ppl": "3.7", "wps": "364644", "ups": "3.07", "wpb": "118790", "bsz": "256", "num_updates": "963600", "lr": "3.67677e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 06:12:35,337][train_inner][INFO] - {"epoch": 19, "update": 18.726, "loss": "1.888", "ppl": "3.7", "wps": "360748", "ups": "3.06", "wpb": "117982", "bsz": "256", "num_updates": "963800", "lr": "3.65657e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 06:13:40,437][train_inner][INFO] - {"epoch": 19, "update": 18.73, "loss": "1.884", "ppl": "3.69", "wps": "363174", "ups": "3.07", "wpb": "118211", "bsz": "256", "num_updates": "964000", "lr": "3.63636e-05", "gnorm": "0.902", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-05 06:13:43,319][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-05 06:14:46,145][train_inner][INFO] - {"epoch": 19, "update": 18.734, "loss": "1.876", "ppl": "3.67", "wps": "360762", "ups": "3.04", "wpb": "118522", "bsz": "256", "num_updates": "964200", "lr": "3.61616e-05", "gnorm": "0.897", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 06:15:51,332][train_inner][INFO] - {"epoch": 19, "update": 18.738, "loss": "1.887", "ppl": "3.7", "wps": "361401", "ups": "3.07", "wpb": "117791", "bsz": "256", "num_updates": "964400", "lr": "3.59596e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 06:16:56,851][train_inner][INFO] - {"epoch": 19, "update": 18.742, "loss": "1.882", "ppl": "3.68", "wps": "362302", "ups": "3.05", "wpb": "118687", "bsz": "256", "num_updates": "964600", "lr": "3.57576e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "0"} +[2022-08-05 06:18:02,419][train_inner][INFO] - {"epoch": 19, "update": 18.746, "loss": "1.882", "ppl": "3.69", "wps": "359618", "ups": "3.05", "wpb": "117895", "bsz": "256", "num_updates": "964800", "lr": "3.55556e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 06:19:07,737][train_inner][INFO] - {"epoch": 19, "update": 18.75, "loss": "1.884", "ppl": "3.69", "wps": "363525", "ups": "3.06", "wpb": "118721", "bsz": "256", "num_updates": "965000", "lr": "3.53535e-05", "gnorm": "0.896", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 06:20:13,224][train_inner][INFO] - {"epoch": 19, "update": 18.753, "loss": "1.881", "ppl": "3.68", "wps": "363592", "ups": "3.05", "wpb": "119051", "bsz": "256", "num_updates": "965200", "lr": "3.51515e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 06:21:18,520][train_inner][INFO] - {"epoch": 19, "update": 18.757, "loss": "1.883", "ppl": "3.69", "wps": "361991", "ups": "3.06", "wpb": "118180", "bsz": "256", "num_updates": "965400", "lr": "3.49495e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 06:22:23,851][train_inner][INFO] - {"epoch": 19, "update": 18.761, "loss": "1.882", "ppl": "3.69", "wps": "362645", "ups": "3.06", "wpb": "118444", "bsz": "256", "num_updates": "965600", "lr": "3.47475e-05", "gnorm": "0.898", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "0"} +[2022-08-05 06:23:28,960][train_inner][INFO] - {"epoch": 19, "update": 18.765, "loss": "1.879", "ppl": "3.68", "wps": "363768", "ups": "3.07", "wpb": "118423", "bsz": "256", "num_updates": "965800", "lr": "3.45455e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-05 06:24:34,388][train_inner][INFO] - {"epoch": 19, "update": 18.769, "loss": "1.888", "ppl": "3.7", "wps": "360801", "ups": "3.06", "wpb": "118030", "bsz": "256", "num_updates": "966000", "lr": "3.43434e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 06:25:39,811][train_inner][INFO] - {"epoch": 19, "update": 18.773, "loss": "1.881", "ppl": "3.68", "wps": "362228", "ups": "3.06", "wpb": "118488", "bsz": "256", "num_updates": "966200", "lr": "3.41414e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 06:26:45,472][train_inner][INFO] - {"epoch": 19, "update": 18.777, "loss": "1.886", "ppl": "3.7", "wps": "362654", "ups": "3.05", "wpb": "119059", "bsz": "256", "num_updates": "966400", "lr": "3.39394e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-05 06:27:51,007][train_inner][INFO] - {"epoch": 19, "update": 18.781, "loss": "1.876", "ppl": "3.67", "wps": "359274", "ups": "3.05", "wpb": "117723", "bsz": "256", "num_updates": "966600", "lr": "3.37374e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "0"} +[2022-08-05 06:28:56,271][train_inner][INFO] - {"epoch": 19, "update": 18.785, "loss": "1.881", "ppl": "3.68", "wps": "362191", "ups": "3.06", "wpb": "118188", "bsz": "256", "num_updates": "966800", "lr": "3.35354e-05", "gnorm": "0.91", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 06:30:01,427][train_inner][INFO] - {"epoch": 19, "update": 18.788, "loss": "1.884", "ppl": "3.69", "wps": "365014", "ups": "3.07", "wpb": "118912", "bsz": "256", "num_updates": "967000", "lr": "3.33333e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 06:31:06,510][train_inner][INFO] - {"epoch": 19, "update": 18.792, "loss": "1.876", "ppl": "3.67", "wps": "364793", "ups": "3.07", "wpb": "118707", "bsz": "256", "num_updates": "967200", "lr": "3.31313e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 06:32:11,579][train_inner][INFO] - {"epoch": 19, "update": 18.796, "loss": "1.879", "ppl": "3.68", "wps": "361270", "ups": "3.07", "wpb": "117536", "bsz": "256", "num_updates": "967400", "lr": "3.29293e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 06:33:16,890][train_inner][INFO] - {"epoch": 19, "update": 18.8, "loss": "1.883", "ppl": "3.69", "wps": "363592", "ups": "3.06", "wpb": "118719", "bsz": "256", "num_updates": "967600", "lr": "3.27273e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-05 06:34:21,982][train_inner][INFO] - {"epoch": 19, "update": 18.804, "loss": "1.883", "ppl": "3.69", "wps": "361639", "ups": "3.07", "wpb": "117696", "bsz": "256", "num_updates": "967800", "lr": "3.25253e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 06:35:27,066][train_inner][INFO] - {"epoch": 19, "update": 18.808, "loss": "1.876", "ppl": "3.67", "wps": "363538", "ups": "3.07", "wpb": "118301", "bsz": "256", "num_updates": "968000", "lr": "3.23232e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-05 06:36:32,428][train_inner][INFO] - {"epoch": 19, "update": 18.812, "loss": "1.881", "ppl": "3.68", "wps": "362743", "ups": "3.06", "wpb": "118546", "bsz": "256", "num_updates": "968200", "lr": "3.21212e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 06:37:37,702][train_inner][INFO] - {"epoch": 19, "update": 18.816, "loss": "1.88", "ppl": "3.68", "wps": "361407", "ups": "3.06", "wpb": "117951", "bsz": "256", "num_updates": "968400", "lr": "3.19192e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 06:38:43,069][train_inner][INFO] - {"epoch": 19, "update": 18.82, "loss": "1.88", "ppl": "3.68", "wps": "363400", "ups": "3.06", "wpb": "118770", "bsz": "256", "num_updates": "968600", "lr": "3.17172e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 06:39:48,283][train_inner][INFO] - {"epoch": 19, "update": 18.823, "loss": "1.878", "ppl": "3.68", "wps": "364663", "ups": "3.07", "wpb": "118904", "bsz": "256", "num_updates": "968800", "lr": "3.15152e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 06:40:53,495][train_inner][INFO] - {"epoch": 19, "update": 18.827, "loss": "1.882", "ppl": "3.69", "wps": "364376", "ups": "3.07", "wpb": "118806", "bsz": "256", "num_updates": "969000", "lr": "3.13131e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 06:41:58,968][train_inner][INFO] - {"epoch": 19, "update": 18.831, "loss": "1.885", "ppl": "3.69", "wps": "360590", "ups": "3.05", "wpb": "118042", "bsz": "256", "num_updates": "969200", "lr": "3.11111e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 06:43:04,540][train_inner][INFO] - {"epoch": 19, "update": 18.835, "loss": "1.878", "ppl": "3.68", "wps": "362341", "ups": "3.05", "wpb": "118796", "bsz": "256", "num_updates": "969400", "lr": "3.09091e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "0"} +[2022-08-05 06:44:10,252][train_inner][INFO] - {"epoch": 19, "update": 18.839, "loss": "1.875", "ppl": "3.67", "wps": "361242", "ups": "3.04", "wpb": "118676", "bsz": "256", "num_updates": "969600", "lr": "3.07071e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 06:44:52,316][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 06:45:15,798][train_inner][INFO] - {"epoch": 19, "update": 18.843, "loss": "1.879", "ppl": "3.68", "wps": "361408", "ups": "3.05", "wpb": "118442", "bsz": "256", "num_updates": "969800", "lr": "3.05051e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-05 06:46:21,094][train_inner][INFO] - {"epoch": 19, "update": 18.847, "loss": "1.875", "ppl": "3.67", "wps": "362713", "ups": "3.06", "wpb": "118418", "bsz": "256", "num_updates": "970000", "lr": "3.0303e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "0"} +[2022-08-05 06:47:26,458][train_inner][INFO] - {"epoch": 19, "update": 18.851, "loss": "1.875", "ppl": "3.67", "wps": "361947", "ups": "3.06", "wpb": "118290", "bsz": "256", "num_updates": "970200", "lr": "3.0101e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 06:48:31,486][train_inner][INFO] - {"epoch": 19, "update": 18.854, "loss": "1.876", "ppl": "3.67", "wps": "362736", "ups": "3.08", "wpb": "117937", "bsz": "256", "num_updates": "970400", "lr": "2.9899e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 06:49:36,539][train_inner][INFO] - {"epoch": 19, "update": 18.858, "loss": "1.872", "ppl": "3.66", "wps": "364914", "ups": "3.07", "wpb": "118693", "bsz": "256", "num_updates": "970600", "lr": "2.9697e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 06:50:41,575][train_inner][INFO] - {"epoch": 19, "update": 18.862, "loss": "1.882", "ppl": "3.69", "wps": "363317", "ups": "3.08", "wpb": "118140", "bsz": "256", "num_updates": "970800", "lr": "2.94949e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 06:51:47,120][train_inner][INFO] - {"epoch": 19, "update": 18.866, "loss": "1.877", "ppl": "3.67", "wps": "360877", "ups": "3.05", "wpb": "118265", "bsz": "256", "num_updates": "971000", "lr": "2.92929e-05", "gnorm": "0.902", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 06:52:52,405][train_inner][INFO] - {"epoch": 19, "update": 18.87, "loss": "1.875", "ppl": "3.67", "wps": "363386", "ups": "3.06", "wpb": "118617", "bsz": "256", "num_updates": "971200", "lr": "2.90909e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 06:53:57,600][train_inner][INFO] - {"epoch": 19, "update": 18.874, "loss": "1.877", "ppl": "3.67", "wps": "363888", "ups": "3.07", "wpb": "118616", "bsz": "256", "num_updates": "971400", "lr": "2.88889e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 06:55:03,288][train_inner][INFO] - {"epoch": 19, "update": 18.878, "loss": "1.884", "ppl": "3.69", "wps": "360972", "ups": "3.05", "wpb": "118545", "bsz": "256", "num_updates": "971600", "lr": "2.86869e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 06:56:08,422][train_inner][INFO] - {"epoch": 19, "update": 18.882, "loss": "1.878", "ppl": "3.68", "wps": "360254", "ups": "3.07", "wpb": "117321", "bsz": "256", "num_updates": "971800", "lr": "2.84848e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27", "wall": "0"} +[2022-08-05 06:57:13,666][train_inner][INFO] - {"epoch": 19, "update": 18.886, "loss": "1.872", "ppl": "3.66", "wps": "362686", "ups": "3.07", "wpb": "118312", "bsz": "256", "num_updates": "972000", "lr": "2.82828e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 06:58:18,595][train_inner][INFO] - {"epoch": 19, "update": 18.889, "loss": "1.88", "ppl": "3.68", "wps": "362958", "ups": "3.08", "wpb": "117832", "bsz": "256", "num_updates": "972200", "lr": "2.80808e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 06:59:23,967][train_inner][INFO] - {"epoch": 19, "update": 18.893, "loss": "1.874", "ppl": "3.67", "wps": "362608", "ups": "3.06", "wpb": "118518", "bsz": "256", "num_updates": "972400", "lr": "2.78788e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 07:00:29,299][train_inner][INFO] - {"epoch": 19, "update": 18.897, "loss": "1.877", "ppl": "3.67", "wps": "362280", "ups": "3.06", "wpb": "118341", "bsz": "256", "num_updates": "972600", "lr": "2.76768e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 07:01:34,863][train_inner][INFO] - {"epoch": 19, "update": 18.901, "loss": "1.879", "ppl": "3.68", "wps": "361982", "ups": "3.05", "wpb": "118663", "bsz": "256", "num_updates": "972800", "lr": "2.74747e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 07:02:40,109][train_inner][INFO] - {"epoch": 19, "update": 18.905, "loss": "1.874", "ppl": "3.66", "wps": "361885", "ups": "3.07", "wpb": "118055", "bsz": "256", "num_updates": "973000", "lr": "2.72727e-05", "gnorm": "0.909", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 07:03:45,426][train_inner][INFO] - {"epoch": 19, "update": 18.909, "loss": "1.878", "ppl": "3.68", "wps": "361889", "ups": "3.06", "wpb": "118186", "bsz": "256", "num_updates": "973200", "lr": "2.70707e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 07:04:05,736][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 07:04:51,243][train_inner][INFO] - {"epoch": 19, "update": 18.913, "loss": "1.874", "ppl": "3.66", "wps": "360665", "ups": "3.04", "wpb": "118688", "bsz": "256", "num_updates": "973400", "lr": "2.68687e-05", "gnorm": "0.899", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 07:05:56,298][train_inner][INFO] - {"epoch": 19, "update": 18.917, "loss": "1.883", "ppl": "3.69", "wps": "363060", "ups": "3.07", "wpb": "118092", "bsz": "256", "num_updates": "973600", "lr": "2.66667e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 07:07:01,592][train_inner][INFO] - {"epoch": 19, "update": 18.921, "loss": "1.874", "ppl": "3.66", "wps": "363296", "ups": "3.06", "wpb": "118590", "bsz": "256", "num_updates": "973800", "lr": "2.64646e-05", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 07:08:06,887][train_inner][INFO] - {"epoch": 19, "update": 18.924, "loss": "1.877", "ppl": "3.67", "wps": "363984", "ups": "3.06", "wpb": "118830", "bsz": "256", "num_updates": "974000", "lr": "2.62626e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "0"} +[2022-08-05 07:09:12,071][train_inner][INFO] - {"epoch": 19, "update": 18.928, "loss": "1.871", "ppl": "3.66", "wps": "361193", "ups": "3.07", "wpb": "117717", "bsz": "256", "num_updates": "974200", "lr": "2.60606e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 07:10:17,625][train_inner][INFO] - {"epoch": 19, "update": 18.932, "loss": "1.877", "ppl": "3.67", "wps": "361636", "ups": "3.05", "wpb": "118531", "bsz": "256", "num_updates": "974400", "lr": "2.58586e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 07:11:22,520][train_inner][INFO] - {"epoch": 19, "update": 18.936, "loss": "1.875", "ppl": "3.67", "wps": "365537", "ups": "3.08", "wpb": "118605", "bsz": "256", "num_updates": "974600", "lr": "2.56566e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "0"} +[2022-08-05 07:12:27,892][train_inner][INFO] - {"epoch": 19, "update": 18.94, "loss": "1.878", "ppl": "3.68", "wps": "359627", "ups": "3.06", "wpb": "117547", "bsz": "256", "num_updates": "974800", "lr": "2.54545e-05", "gnorm": "0.91", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "0"} +[2022-08-05 07:12:37,129][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-05 07:13:33,562][train_inner][INFO] - {"epoch": 19, "update": 18.944, "loss": "1.878", "ppl": "3.68", "wps": "360407", "ups": "3.05", "wpb": "118336", "bsz": "256", "num_updates": "975000", "lr": "2.52525e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 07:14:38,771][train_inner][INFO] - {"epoch": 19, "update": 18.948, "loss": "1.878", "ppl": "3.68", "wps": "363205", "ups": "3.07", "wpb": "118421", "bsz": "256", "num_updates": "975200", "lr": "2.50505e-05", "gnorm": "0.913", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 07:15:44,038][train_inner][INFO] - {"epoch": 19, "update": 18.952, "loss": "1.872", "ppl": "3.66", "wps": "362599", "ups": "3.06", "wpb": "118326", "bsz": "256", "num_updates": "975400", "lr": "2.48485e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 07:16:49,228][train_inner][INFO] - {"epoch": 19, "update": 18.956, "loss": "1.877", "ppl": "3.67", "wps": "364383", "ups": "3.07", "wpb": "118768", "bsz": "256", "num_updates": "975600", "lr": "2.46465e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 07:17:54,652][train_inner][INFO] - {"epoch": 19, "update": 18.959, "loss": "1.88", "ppl": "3.68", "wps": "361353", "ups": "3.06", "wpb": "118192", "bsz": "256", "num_updates": "975800", "lr": "2.44444e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "0"} +[2022-08-05 07:18:59,607][train_inner][INFO] - {"epoch": 19, "update": 18.963, "loss": "1.873", "ppl": "3.66", "wps": "363779", "ups": "3.08", "wpb": "118143", "bsz": "256", "num_updates": "976000", "lr": "2.42424e-05", "gnorm": "0.902", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 07:20:04,725][train_inner][INFO] - {"epoch": 19, "update": 18.967, "loss": "1.871", "ppl": "3.66", "wps": "364714", "ups": "3.07", "wpb": "118746", "bsz": "256", "num_updates": "976200", "lr": "2.40404e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 07:21:10,031][train_inner][INFO] - {"epoch": 19, "update": 18.971, "loss": "1.875", "ppl": "3.67", "wps": "363466", "ups": "3.06", "wpb": "118681", "bsz": "256", "num_updates": "976400", "lr": "2.38384e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 07:22:15,313][train_inner][INFO] - {"epoch": 19, "update": 18.975, "loss": "1.873", "ppl": "3.66", "wps": "362765", "ups": "3.06", "wpb": "118407", "bsz": "256", "num_updates": "976600", "lr": "2.36364e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 07:23:20,171][train_inner][INFO] - {"epoch": 19, "update": 18.979, "loss": "1.878", "ppl": "3.68", "wps": "363617", "ups": "3.08", "wpb": "117915", "bsz": "256", "num_updates": "976800", "lr": "2.34343e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 07:24:25,577][train_inner][INFO] - {"epoch": 19, "update": 18.983, "loss": "1.875", "ppl": "3.67", "wps": "361154", "ups": "3.06", "wpb": "118107", "bsz": "256", "num_updates": "977000", "lr": "2.32323e-05", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 07:25:30,724][train_inner][INFO] - {"epoch": 19, "update": 18.987, "loss": "1.878", "ppl": "3.67", "wps": "363364", "ups": "3.07", "wpb": "118358", "bsz": "256", "num_updates": "977200", "lr": "2.30303e-05", "gnorm": "0.909", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 07:26:35,876][train_inner][INFO] - {"epoch": 19, "update": 18.99, "loss": "1.874", "ppl": "3.67", "wps": "363465", "ups": "3.07", "wpb": "118400", "bsz": "256", "num_updates": "977400", "lr": "2.28283e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-05 07:27:20,504][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-05 07:27:41,300][train_inner][INFO] - {"epoch": 19, "update": 18.994, "loss": "1.876", "ppl": "3.67", "wps": "362645", "ups": "3.06", "wpb": "118627", "bsz": "256", "num_updates": "977600", "lr": "2.26263e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 07:28:46,413][train_inner][INFO] - {"epoch": 19, "update": 18.998, "loss": "1.873", "ppl": "3.66", "wps": "363197", "ups": "3.07", "wpb": "118242", "bsz": "256", "num_updates": "977800", "lr": "2.24242e-05", "gnorm": "0.909", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 07:29:15,609][fairseq_cli.train][INFO] - end of epoch 19 (average epoch stats below) +[2022-08-05 07:29:15,610][train][INFO] - {"epoch": 19, "train_loss": "1.891", "train_ppl": "3.71", "train_wps": "360821", "train_ups": "3.05", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "977889", "train_lr": "2.23343e-05", "train_gnorm": "0.894", "train_clip": "0", "train_loss_scale": "4", "train_train_wall": "16705", "train_gb_free": "20.9", "train_wall": "0"} +[2022-08-05 07:29:15,702][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-05 07:29:15,705][fairseq.trainer][INFO] - begin training epoch 20 +[2022-08-05 07:29:15,705][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-05 07:30:03,205][train_inner][INFO] - {"epoch": 20, "update": 19.002, "loss": "1.876", "ppl": "3.67", "wps": "307181", "ups": "2.6", "wpb": "117933", "bsz": "255.4", "num_updates": "978000", "lr": "2.22222e-05", "gnorm": "0.912", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 07:31:08,739][train_inner][INFO] - {"epoch": 20, "update": 19.006, "loss": "1.868", "ppl": "3.65", "wps": "362788", "ups": "3.05", "wpb": "118874", "bsz": "256", "num_updates": "978200", "lr": "2.20202e-05", "gnorm": "0.902", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "0"} +[2022-08-05 07:32:13,951][train_inner][INFO] - {"epoch": 20, "update": 19.01, "loss": "1.871", "ppl": "3.66", "wps": "361508", "ups": "3.07", "wpb": "117872", "bsz": "256", "num_updates": "978400", "lr": "2.18182e-05", "gnorm": "0.921", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "0"} +[2022-08-05 07:33:19,556][train_inner][INFO] - {"epoch": 20, "update": 19.014, "loss": "1.87", "ppl": "3.66", "wps": "362117", "ups": "3.05", "wpb": "118779", "bsz": "256", "num_updates": "978600", "lr": "2.16162e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 07:34:24,554][train_inner][INFO] - {"epoch": 20, "update": 19.018, "loss": "1.877", "ppl": "3.67", "wps": "363399", "ups": "3.08", "wpb": "118101", "bsz": "256", "num_updates": "978800", "lr": "2.14141e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "0"} +[2022-08-05 07:35:29,850][train_inner][INFO] - {"epoch": 20, "update": 19.022, "loss": "1.868", "ppl": "3.65", "wps": "361586", "ups": "3.06", "wpb": "118047", "bsz": "256", "num_updates": "979000", "lr": "2.12121e-05", "gnorm": "0.915", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.7", "wall": "0"} +[2022-08-05 07:36:35,295][train_inner][INFO] - {"epoch": 20, "update": 19.025, "loss": "1.869", "ppl": "3.65", "wps": "362692", "ups": "3.06", "wpb": "118680", "bsz": "256", "num_updates": "979200", "lr": "2.10101e-05", "gnorm": "0.911", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 07:37:40,877][train_inner][INFO] - {"epoch": 20, "update": 19.029, "loss": "1.871", "ppl": "3.66", "wps": "360223", "ups": "3.05", "wpb": "118120", "bsz": "256", "num_updates": "979400", "lr": "2.08081e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "0"} +[2022-08-05 07:38:46,125][train_inner][INFO] - {"epoch": 20, "update": 19.033, "loss": "1.874", "ppl": "3.67", "wps": "362500", "ups": "3.07", "wpb": "118259", "bsz": "256", "num_updates": "979600", "lr": "2.06061e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.9", "wall": "0"} +[2022-08-05 07:39:51,417][train_inner][INFO] - {"epoch": 20, "update": 19.037, "loss": "1.873", "ppl": "3.66", "wps": "363541", "ups": "3.06", "wpb": "118668", "bsz": "256", "num_updates": "979800", "lr": "2.0404e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.7", "wall": "0"} +[2022-08-05 07:40:56,578][train_inner][INFO] - {"epoch": 20, "update": 19.041, "loss": "1.87", "ppl": "3.66", "wps": "363643", "ups": "3.07", "wpb": "118476", "bsz": "256", "num_updates": "980000", "lr": "2.0202e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "0"} +[2022-08-05 07:42:01,886][train_inner][INFO] - {"epoch": 20, "update": 19.045, "loss": "1.871", "ppl": "3.66", "wps": "362495", "ups": "3.06", "wpb": "118366", "bsz": "256", "num_updates": "980200", "lr": "2e-05", "gnorm": "0.909", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 07:43:07,080][train_inner][INFO] - {"epoch": 20, "update": 19.049, "loss": "1.874", "ppl": "3.67", "wps": "361172", "ups": "3.07", "wpb": "117729", "bsz": "256", "num_updates": "980400", "lr": "1.9798e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 07:44:11,944][train_inner][INFO] - {"epoch": 20, "update": 19.053, "loss": "1.873", "ppl": "3.66", "wps": "363068", "ups": "3.08", "wpb": "117749", "bsz": "256", "num_updates": "980600", "lr": "1.9596e-05", "gnorm": "0.91", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "0"} +[2022-08-05 07:45:16,960][train_inner][INFO] - {"epoch": 20, "update": 19.057, "loss": "1.87", "ppl": "3.66", "wps": "362649", "ups": "3.08", "wpb": "117887", "bsz": "256", "num_updates": "980800", "lr": "1.93939e-05", "gnorm": "0.912", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 07:46:22,575][train_inner][INFO] - {"epoch": 20, "update": 19.06, "loss": "1.872", "ppl": "3.66", "wps": "360550", "ups": "3.05", "wpb": "118286", "bsz": "256", "num_updates": "981000", "lr": "1.91919e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 07:47:27,759][train_inner][INFO] - {"epoch": 20, "update": 19.064, "loss": "1.861", "ppl": "3.63", "wps": "365368", "ups": "3.07", "wpb": "119078", "bsz": "256", "num_updates": "981200", "lr": "1.89899e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.9", "wall": "0"} +[2022-08-05 07:48:33,552][train_inner][INFO] - {"epoch": 20, "update": 19.068, "loss": "1.872", "ppl": "3.66", "wps": "360248", "ups": "3.04", "wpb": "118508", "bsz": "256", "num_updates": "981400", "lr": "1.87879e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 07:49:38,631][train_inner][INFO] - {"epoch": 20, "update": 19.072, "loss": "1.873", "ppl": "3.66", "wps": "363146", "ups": "3.07", "wpb": "118163", "bsz": "256", "num_updates": "981600", "lr": "1.85859e-05", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-05 07:50:44,125][train_inner][INFO] - {"epoch": 20, "update": 19.076, "loss": "1.875", "ppl": "3.67", "wps": "359322", "ups": "3.05", "wpb": "117655", "bsz": "256", "num_updates": "981800", "lr": "1.83838e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-05 07:51:40,995][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 07:51:49,771][train_inner][INFO] - {"epoch": 20, "update": 19.08, "loss": "1.869", "ppl": "3.65", "wps": "361474", "ups": "3.05", "wpb": "118645", "bsz": "256", "num_updates": "982000", "lr": "1.81818e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 07:52:54,885][train_inner][INFO] - {"epoch": 20, "update": 19.084, "loss": "1.867", "ppl": "3.65", "wps": "363348", "ups": "3.07", "wpb": "118294", "bsz": "256", "num_updates": "982200", "lr": "1.79798e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 07:54:00,183][train_inner][INFO] - {"epoch": 20, "update": 19.088, "loss": "1.873", "ppl": "3.66", "wps": "362122", "ups": "3.06", "wpb": "118227", "bsz": "256", "num_updates": "982400", "lr": "1.77778e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "0"} +[2022-08-05 07:55:05,432][train_inner][INFO] - {"epoch": 20, "update": 19.092, "loss": "1.871", "ppl": "3.66", "wps": "360720", "ups": "3.07", "wpb": "117682", "bsz": "256", "num_updates": "982600", "lr": "1.75758e-05", "gnorm": "0.909", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "0"} +[2022-08-05 07:56:10,386][train_inner][INFO] - {"epoch": 20, "update": 19.095, "loss": "1.871", "ppl": "3.66", "wps": "364358", "ups": "3.08", "wpb": "118331", "bsz": "256", "num_updates": "982800", "lr": "1.73737e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.7", "wall": "0"} +[2022-08-05 07:57:15,524][train_inner][INFO] - {"epoch": 20, "update": 19.099, "loss": "1.864", "ppl": "3.64", "wps": "364774", "ups": "3.07", "wpb": "118802", "bsz": "256", "num_updates": "983000", "lr": "1.71717e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 07:58:21,026][train_inner][INFO] - {"epoch": 20, "update": 19.103, "loss": "1.871", "ppl": "3.66", "wps": "361233", "ups": "3.05", "wpb": "118304", "bsz": "256", "num_updates": "983200", "lr": "1.69697e-05", "gnorm": "0.911", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 07:59:26,466][train_inner][INFO] - {"epoch": 20, "update": 19.107, "loss": "1.87", "ppl": "3.66", "wps": "360502", "ups": "3.06", "wpb": "117955", "bsz": "256", "num_updates": "983400", "lr": "1.67677e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 08:00:31,684][train_inner][INFO] - {"epoch": 20, "update": 19.111, "loss": "1.868", "ppl": "3.65", "wps": "362559", "ups": "3.07", "wpb": "118223", "bsz": "256", "num_updates": "983600", "lr": "1.65657e-05", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.5", "wall": "0"} +[2022-08-05 08:01:36,784][train_inner][INFO] - {"epoch": 20, "update": 19.115, "loss": "1.861", "ppl": "3.63", "wps": "364534", "ups": "3.07", "wpb": "118641", "bsz": "256", "num_updates": "983800", "lr": "1.63636e-05", "gnorm": "0.902", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "0"} +[2022-08-05 08:02:42,049][train_inner][INFO] - {"epoch": 20, "update": 19.119, "loss": "1.874", "ppl": "3.67", "wps": "363171", "ups": "3.06", "wpb": "118511", "bsz": "256", "num_updates": "984000", "lr": "1.61616e-05", "gnorm": "0.913", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 08:03:47,664][train_inner][INFO] - {"epoch": 20, "update": 19.123, "loss": "1.861", "ppl": "3.63", "wps": "361865", "ups": "3.05", "wpb": "118716", "bsz": "256", "num_updates": "984200", "lr": "1.59596e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-05 08:03:56,492][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 08:04:53,153][train_inner][INFO] - {"epoch": 20, "update": 19.126, "loss": "1.865", "ppl": "3.64", "wps": "361670", "ups": "3.05", "wpb": "118425", "bsz": "256", "num_updates": "984400", "lr": "1.57576e-05", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 08:05:58,186][train_inner][INFO] - {"epoch": 20, "update": 19.13, "loss": "1.875", "ppl": "3.67", "wps": "361003", "ups": "3.08", "wpb": "117385", "bsz": "256", "num_updates": "984600", "lr": "1.55556e-05", "gnorm": "0.913", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-05 08:07:03,418][train_inner][INFO] - {"epoch": 20, "update": 19.134, "loss": "1.87", "ppl": "3.66", "wps": "362370", "ups": "3.07", "wpb": "118189", "bsz": "256", "num_updates": "984800", "lr": "1.53535e-05", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 08:08:08,483][train_inner][INFO] - {"epoch": 20, "update": 19.138, "loss": "1.867", "ppl": "3.65", "wps": "363951", "ups": "3.07", "wpb": "118400", "bsz": "256", "num_updates": "985000", "lr": "1.51515e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "0"} +[2022-08-05 08:09:14,136][train_inner][INFO] - {"epoch": 20, "update": 19.142, "loss": "1.868", "ppl": "3.65", "wps": "362341", "ups": "3.05", "wpb": "118943", "bsz": "256", "num_updates": "985200", "lr": "1.49495e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 08:10:19,662][train_inner][INFO] - {"epoch": 20, "update": 19.146, "loss": "1.869", "ppl": "3.65", "wps": "360290", "ups": "3.05", "wpb": "118039", "bsz": "256", "num_updates": "985400", "lr": "1.47475e-05", "gnorm": "0.911", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 08:11:24,505][train_inner][INFO] - {"epoch": 20, "update": 19.15, "loss": "1.865", "ppl": "3.64", "wps": "363669", "ups": "3.08", "wpb": "117905", "bsz": "256", "num_updates": "985600", "lr": "1.45455e-05", "gnorm": "0.911", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "0"} +[2022-08-05 08:12:29,970][train_inner][INFO] - {"epoch": 20, "update": 19.154, "loss": "1.87", "ppl": "3.66", "wps": "360046", "ups": "3.06", "wpb": "117836", "bsz": "256", "num_updates": "985800", "lr": "1.43434e-05", "gnorm": "0.909", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "0"} +[2022-08-05 08:13:35,004][train_inner][INFO] - {"epoch": 20, "update": 19.158, "loss": "1.871", "ppl": "3.66", "wps": "362337", "ups": "3.08", "wpb": "117819", "bsz": "256", "num_updates": "986000", "lr": "1.41414e-05", "gnorm": "0.91", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "0"} +[2022-08-05 08:14:40,304][train_inner][INFO] - {"epoch": 20, "update": 19.161, "loss": "1.864", "ppl": "3.64", "wps": "362820", "ups": "3.06", "wpb": "118460", "bsz": "256", "num_updates": "986200", "lr": "1.39394e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "0"} +[2022-08-05 08:15:27,578][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 08:15:46,323][train_inner][INFO] - {"epoch": 20, "update": 19.165, "loss": "1.868", "ppl": "3.65", "wps": "359700", "ups": "3.03", "wpb": "118734", "bsz": "256", "num_updates": "986400", "lr": "1.37374e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "22.6", "wall": "0"} +[2022-08-05 08:16:51,739][train_inner][INFO] - {"epoch": 20, "update": 19.169, "loss": "1.87", "ppl": "3.65", "wps": "363029", "ups": "3.06", "wpb": "118738", "bsz": "256", "num_updates": "986600", "lr": "1.35354e-05", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 08:17:57,584][train_inner][INFO] - {"epoch": 20, "update": 19.173, "loss": "1.867", "ppl": "3.65", "wps": "360767", "ups": "3.04", "wpb": "118772", "bsz": "256", "num_updates": "986800", "lr": "1.33333e-05", "gnorm": "0.91", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "0"} +[2022-08-05 08:19:02,567][train_inner][INFO] - {"epoch": 20, "update": 19.177, "loss": "1.868", "ppl": "3.65", "wps": "363432", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "987000", "lr": "1.31313e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 08:20:07,892][train_inner][INFO] - {"epoch": 20, "update": 19.181, "loss": "1.869", "ppl": "3.65", "wps": "361816", "ups": "3.06", "wpb": "118175", "bsz": "256", "num_updates": "987200", "lr": "1.29293e-05", "gnorm": "0.909", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 08:21:12,989][train_inner][INFO] - {"epoch": 20, "update": 19.185, "loss": "1.867", "ppl": "3.65", "wps": "362489", "ups": "3.07", "wpb": "117984", "bsz": "256", "num_updates": "987400", "lr": "1.27273e-05", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "0"} +[2022-08-05 08:22:18,188][train_inner][INFO] - {"epoch": 20, "update": 19.189, "loss": "1.87", "ppl": "3.66", "wps": "364509", "ups": "3.07", "wpb": "118826", "bsz": "256", "num_updates": "987600", "lr": "1.25253e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.7", "wall": "0"} +[2022-08-05 08:23:22,869][train_inner][INFO] - {"epoch": 20, "update": 19.193, "loss": "1.859", "ppl": "3.63", "wps": "366283", "ups": "3.09", "wpb": "118444", "bsz": "256", "num_updates": "987800", "lr": "1.23232e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "0"} +[2022-08-05 08:24:27,660][train_inner][INFO] - {"epoch": 20, "update": 19.196, "loss": "1.859", "ppl": "3.63", "wps": "366275", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "988000", "lr": "1.21212e-05", "gnorm": "0.902", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "0"} +[2022-08-05 08:25:33,115][train_inner][INFO] - {"epoch": 20, "update": 19.2, "loss": "1.867", "ppl": "3.65", "wps": "361221", "ups": "3.06", "wpb": "118216", "bsz": "256", "num_updates": "988200", "lr": "1.19192e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 08:26:38,401][train_inner][INFO] - {"epoch": 20, "update": 19.204, "loss": "1.867", "ppl": "3.65", "wps": "364038", "ups": "3.06", "wpb": "118832", "bsz": "256", "num_updates": "988400", "lr": "1.17172e-05", "gnorm": "0.922", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 08:27:43,870][train_inner][INFO] - {"epoch": 20, "update": 19.208, "loss": "1.864", "ppl": "3.64", "wps": "359469", "ups": "3.05", "wpb": "117669", "bsz": "255.9", "num_updates": "988600", "lr": "1.15152e-05", "gnorm": "0.912", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 08:28:49,062][train_inner][INFO] - {"epoch": 20, "update": 19.212, "loss": "1.866", "ppl": "3.65", "wps": "363030", "ups": "3.07", "wpb": "118330", "bsz": "256", "num_updates": "988800", "lr": "1.13131e-05", "gnorm": "0.91", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-05 08:29:54,220][train_inner][INFO] - {"epoch": 20, "update": 19.216, "loss": "1.871", "ppl": "3.66", "wps": "361220", "ups": "3.07", "wpb": "117680", "bsz": "256", "num_updates": "989000", "lr": "1.11111e-05", "gnorm": "0.908", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 08:30:59,331][train_inner][INFO] - {"epoch": 20, "update": 19.22, "loss": "1.861", "ppl": "3.63", "wps": "364181", "ups": "3.07", "wpb": "118560", "bsz": "256", "num_updates": "989200", "lr": "1.09091e-05", "gnorm": "0.91", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 08:32:04,272][train_inner][INFO] - {"epoch": 20, "update": 19.224, "loss": "1.871", "ppl": "3.66", "wps": "364307", "ups": "3.08", "wpb": "118289", "bsz": "256", "num_updates": "989400", "lr": "1.07071e-05", "gnorm": "0.906", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 08:33:09,622][train_inner][INFO] - {"epoch": 20, "update": 19.228, "loss": "1.863", "ppl": "3.64", "wps": "364367", "ups": "3.06", "wpb": "119057", "bsz": "256", "num_updates": "989600", "lr": "1.05051e-05", "gnorm": "0.901", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 08:33:21,323][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 08:34:15,266][train_inner][INFO] - {"epoch": 20, "update": 19.231, "loss": "1.867", "ppl": "3.65", "wps": "360860", "ups": "3.05", "wpb": "118426", "bsz": "256", "num_updates": "989800", "lr": "1.0303e-05", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 08:35:20,534][train_inner][INFO] - {"epoch": 20, "update": 19.235, "loss": "1.865", "ppl": "3.64", "wps": "362268", "ups": "3.06", "wpb": "118222", "bsz": "256", "num_updates": "990000", "lr": "1.0101e-05", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "0"} +[2022-08-05 08:36:26,138][train_inner][INFO] - {"epoch": 20, "update": 19.239, "loss": "1.864", "ppl": "3.64", "wps": "360536", "ups": "3.05", "wpb": "118260", "bsz": "256", "num_updates": "990200", "lr": "9.89899e-06", "gnorm": "0.909", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 08:37:31,178][train_inner][INFO] - {"epoch": 20, "update": 19.243, "loss": "1.868", "ppl": "3.65", "wps": "363146", "ups": "3.08", "wpb": "118093", "bsz": "256", "num_updates": "990400", "lr": "9.69697e-06", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "0"} +[2022-08-05 08:38:36,707][train_inner][INFO] - {"epoch": 20, "update": 19.247, "loss": "1.864", "ppl": "3.64", "wps": "362500", "ups": "3.05", "wpb": "118770", "bsz": "256", "num_updates": "990600", "lr": "9.49495e-06", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 08:39:41,770][train_inner][INFO] - {"epoch": 20, "update": 19.251, "loss": "1.864", "ppl": "3.64", "wps": "361425", "ups": "3.07", "wpb": "117574", "bsz": "256", "num_updates": "990800", "lr": "9.29293e-06", "gnorm": "0.91", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "0"} +[2022-08-05 08:40:46,599][train_inner][INFO] - {"epoch": 20, "update": 19.255, "loss": "1.86", "ppl": "3.63", "wps": "365298", "ups": "3.09", "wpb": "118408", "bsz": "256", "num_updates": "991000", "lr": "9.09091e-06", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "0"} +[2022-08-05 08:41:51,496][train_inner][INFO] - {"epoch": 20, "update": 19.259, "loss": "1.862", "ppl": "3.64", "wps": "365305", "ups": "3.08", "wpb": "118534", "bsz": "256", "num_updates": "991200", "lr": "8.88889e-06", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 08:42:57,082][train_inner][INFO] - {"epoch": 20, "update": 19.262, "loss": "1.861", "ppl": "3.63", "wps": "362209", "ups": "3.05", "wpb": "118777", "bsz": "256", "num_updates": "991400", "lr": "8.68687e-06", "gnorm": "0.899", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 08:44:02,749][train_inner][INFO] - {"epoch": 20, "update": 19.266, "loss": "1.864", "ppl": "3.64", "wps": "361359", "ups": "3.05", "wpb": "118645", "bsz": "256", "num_updates": "991600", "lr": "8.48485e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 08:45:07,939][train_inner][INFO] - {"epoch": 20, "update": 19.27, "loss": "1.862", "ppl": "3.63", "wps": "363742", "ups": "3.07", "wpb": "118549", "bsz": "256", "num_updates": "991800", "lr": "8.28283e-06", "gnorm": "0.902", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 08:46:11,313][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 08:46:13,907][train_inner][INFO] - {"epoch": 20, "update": 19.274, "loss": "1.865", "ppl": "3.64", "wps": "358319", "ups": "3.03", "wpb": "118186", "bsz": "256", "num_updates": "992000", "lr": "8.08081e-06", "gnorm": "0.911", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "0"} +[2022-08-05 08:47:19,279][train_inner][INFO] - {"epoch": 20, "update": 19.278, "loss": "1.866", "ppl": "3.65", "wps": "362714", "ups": "3.06", "wpb": "118555", "bsz": "256", "num_updates": "992200", "lr": "7.87879e-06", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 08:48:24,672][train_inner][INFO] - {"epoch": 20, "update": 19.282, "loss": "1.862", "ppl": "3.63", "wps": "362665", "ups": "3.06", "wpb": "118578", "bsz": "256", "num_updates": "992400", "lr": "7.67677e-06", "gnorm": "0.902", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 08:49:29,807][train_inner][INFO] - {"epoch": 20, "update": 19.286, "loss": "1.864", "ppl": "3.64", "wps": "361500", "ups": "3.07", "wpb": "117728", "bsz": "256", "num_updates": "992600", "lr": "7.47475e-06", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 08:50:35,396][train_inner][INFO] - {"epoch": 20, "update": 19.29, "loss": "1.866", "ppl": "3.65", "wps": "360721", "ups": "3.05", "wpb": "118295", "bsz": "256", "num_updates": "992800", "lr": "7.27273e-06", "gnorm": "0.908", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "0"} +[2022-08-05 08:51:40,954][train_inner][INFO] - {"epoch": 20, "update": 19.294, "loss": "1.859", "ppl": "3.63", "wps": "362687", "ups": "3.05", "wpb": "118882", "bsz": "256", "num_updates": "993000", "lr": "7.07071e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "0"} +[2022-08-05 08:52:46,157][train_inner][INFO] - {"epoch": 20, "update": 19.297, "loss": "1.866", "ppl": "3.65", "wps": "362765", "ups": "3.07", "wpb": "118266", "bsz": "256", "num_updates": "993200", "lr": "6.86869e-06", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "0"} +[2022-08-05 08:53:51,509][train_inner][INFO] - {"epoch": 20, "update": 19.301, "loss": "1.864", "ppl": "3.64", "wps": "361427", "ups": "3.06", "wpb": "118096", "bsz": "256", "num_updates": "993400", "lr": "6.66667e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "0"} +[2022-08-05 08:54:57,014][train_inner][INFO] - {"epoch": 20, "update": 19.305, "loss": "1.862", "ppl": "3.63", "wps": "360062", "ups": "3.05", "wpb": "117928", "bsz": "256", "num_updates": "993600", "lr": "6.46465e-06", "gnorm": "0.909", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-05 08:56:02,422][train_inner][INFO] - {"epoch": 20, "update": 19.309, "loss": "1.858", "ppl": "3.62", "wps": "363637", "ups": "3.06", "wpb": "118909", "bsz": "256", "num_updates": "993800", "lr": "6.26263e-06", "gnorm": "0.91", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 08:57:07,751][train_inner][INFO] - {"epoch": 20, "update": 19.313, "loss": "1.861", "ppl": "3.63", "wps": "361249", "ups": "3.06", "wpb": "117998", "bsz": "256", "num_updates": "994000", "lr": "6.06061e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 08:57:45,732][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 08:58:13,022][train_inner][INFO] - {"epoch": 20, "update": 19.317, "loss": "1.865", "ppl": "3.64", "wps": "363849", "ups": "3.06", "wpb": "118742", "bsz": "256", "num_updates": "994200", "lr": "5.85859e-06", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 08:59:18,107][train_inner][INFO] - {"epoch": 20, "update": 19.321, "loss": "1.862", "ppl": "3.64", "wps": "363636", "ups": "3.07", "wpb": "118335", "bsz": "256", "num_updates": "994400", "lr": "5.65657e-06", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "0"} +[2022-08-05 09:00:23,548][train_inner][INFO] - {"epoch": 20, "update": 19.325, "loss": "1.865", "ppl": "3.64", "wps": "362590", "ups": "3.06", "wpb": "118640", "bsz": "256", "num_updates": "994600", "lr": "5.45455e-06", "gnorm": "0.907", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "0"} +[2022-08-05 09:01:28,755][train_inner][INFO] - {"epoch": 20, "update": 19.329, "loss": "1.862", "ppl": "3.64", "wps": "362796", "ups": "3.07", "wpb": "118282", "bsz": "256", "num_updates": "994800", "lr": "5.25253e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25", "wall": "0"} +[2022-08-05 09:02:34,302][train_inner][INFO] - {"epoch": 20, "update": 19.332, "loss": "1.861", "ppl": "3.63", "wps": "360287", "ups": "3.05", "wpb": "118076", "bsz": "256", "num_updates": "995000", "lr": "5.05051e-06", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.6", "wall": "0"} +[2022-08-05 09:03:39,269][train_inner][INFO] - {"epoch": 20, "update": 19.336, "loss": "1.864", "ppl": "3.64", "wps": "365434", "ups": "3.08", "wpb": "118703", "bsz": "256", "num_updates": "995200", "lr": "4.84848e-06", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "0"} +[2022-08-05 09:04:44,770][train_inner][INFO] - {"epoch": 20, "update": 19.34, "loss": "1.861", "ppl": "3.63", "wps": "362785", "ups": "3.05", "wpb": "118812", "bsz": "256", "num_updates": "995400", "lr": "4.64646e-06", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 09:05:49,931][train_inner][INFO] - {"epoch": 20, "update": 19.344, "loss": "1.864", "ppl": "3.64", "wps": "363249", "ups": "3.07", "wpb": "118347", "bsz": "256", "num_updates": "995600", "lr": "4.44444e-06", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-05 09:06:55,161][train_inner][INFO] - {"epoch": 20, "update": 19.348, "loss": "1.864", "ppl": "3.64", "wps": "362181", "ups": "3.07", "wpb": "118108", "bsz": "256", "num_updates": "995800", "lr": "4.24242e-06", "gnorm": "0.917", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 09:08:00,693][train_inner][INFO] - {"epoch": 20, "update": 19.352, "loss": "1.856", "ppl": "3.62", "wps": "363042", "ups": "3.05", "wpb": "118952", "bsz": "256", "num_updates": "996000", "lr": "4.0404e-06", "gnorm": "0.898", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "0"} +[2022-08-05 09:09:06,269][train_inner][INFO] - {"epoch": 20, "update": 19.356, "loss": "1.861", "ppl": "3.63", "wps": "360395", "ups": "3.05", "wpb": "118165", "bsz": "256", "num_updates": "996200", "lr": "3.83838e-06", "gnorm": "0.914", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.2", "wall": "0"} +[2022-08-05 09:10:11,935][train_inner][INFO] - {"epoch": 20, "update": 19.36, "loss": "1.866", "ppl": "3.64", "wps": "360281", "ups": "3.05", "wpb": "118288", "bsz": "256", "num_updates": "996400", "lr": "3.63636e-06", "gnorm": "0.904", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "0"} +[2022-08-05 09:11:17,189][train_inner][INFO] - {"epoch": 20, "update": 19.364, "loss": "1.864", "ppl": "3.64", "wps": "361820", "ups": "3.07", "wpb": "118049", "bsz": "256", "num_updates": "996600", "lr": "3.43434e-06", "gnorm": "0.909", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "0"} +[2022-08-05 09:12:22,500][train_inner][INFO] - {"epoch": 20, "update": 19.367, "loss": "1.861", "ppl": "3.63", "wps": "360653", "ups": "3.06", "wpb": "117772", "bsz": "256", "num_updates": "996800", "lr": "3.23232e-06", "gnorm": "0.91", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 09:13:27,938][train_inner][INFO] - {"epoch": 20, "update": 19.371, "loss": "1.866", "ppl": "3.64", "wps": "359825", "ups": "3.06", "wpb": "117728", "bsz": "256", "num_updates": "997000", "lr": "3.0303e-06", "gnorm": "0.91", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "0"} +[2022-08-05 09:14:33,011][train_inner][INFO] - {"epoch": 20, "update": 19.375, "loss": "1.865", "ppl": "3.64", "wps": "364623", "ups": "3.07", "wpb": "118634", "bsz": "256", "num_updates": "997200", "lr": "2.82828e-06", "gnorm": "0.904", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.6", "wall": "0"} +[2022-08-05 09:15:38,304][train_inner][INFO] - {"epoch": 20, "update": 19.379, "loss": "1.862", "ppl": "3.63", "wps": "361216", "ups": "3.06", "wpb": "117922", "bsz": "256", "num_updates": "997400", "lr": "2.62626e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "0"} +[2022-08-05 09:16:43,472][train_inner][INFO] - {"epoch": 20, "update": 19.383, "loss": "1.864", "ppl": "3.64", "wps": "363171", "ups": "3.07", "wpb": "118333", "bsz": "256", "num_updates": "997600", "lr": "2.42424e-06", "gnorm": "0.911", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 09:17:48,381][train_inner][INFO] - {"epoch": 20, "update": 19.387, "loss": "1.856", "ppl": "3.62", "wps": "364428", "ups": "3.08", "wpb": "118258", "bsz": "256", "num_updates": "997800", "lr": "2.22222e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "0"} +[2022-08-05 09:18:35,361][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-05 09:18:53,863][train_inner][INFO] - {"epoch": 20, "update": 19.391, "loss": "1.859", "ppl": "3.63", "wps": "363769", "ups": "3.05", "wpb": "119100", "bsz": "256", "num_updates": "998000", "lr": "2.0202e-06", "gnorm": "0.9", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "0"} +[2022-08-05 09:19:59,140][train_inner][INFO] - {"epoch": 20, "update": 19.395, "loss": "1.863", "ppl": "3.64", "wps": "361491", "ups": "3.06", "wpb": "117984", "bsz": "256", "num_updates": "998200", "lr": "1.81818e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 09:21:04,002][train_inner][INFO] - {"epoch": 20, "update": 19.399, "loss": "1.864", "ppl": "3.64", "wps": "364666", "ups": "3.08", "wpb": "118262", "bsz": "256", "num_updates": "998400", "lr": "1.61616e-06", "gnorm": "0.911", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "0"} +[2022-08-05 09:22:09,547][train_inner][INFO] - {"epoch": 20, "update": 19.402, "loss": "1.862", "ppl": "3.64", "wps": "360092", "ups": "3.05", "wpb": "118009", "bsz": "256", "num_updates": "998600", "lr": "1.41414e-06", "gnorm": "0.905", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "0"} +[2022-08-05 09:23:14,701][train_inner][INFO] - {"epoch": 20, "update": 19.406, "loss": "1.862", "ppl": "3.64", "wps": "362795", "ups": "3.07", "wpb": "118186", "bsz": "256", "num_updates": "998800", "lr": "1.21212e-06", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.1", "wall": "0"} +[2022-08-05 09:24:20,023][train_inner][INFO] - {"epoch": 20, "update": 19.41, "loss": "1.86", "ppl": "3.63", "wps": "361091", "ups": "3.06", "wpb": "117935", "bsz": "256", "num_updates": "999000", "lr": "1.0101e-06", "gnorm": "0.903", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "0"} +[2022-08-05 09:25:24,906][train_inner][INFO] - {"epoch": 20, "update": 19.414, "loss": "1.859", "ppl": "3.63", "wps": "365361", "ups": "3.08", "wpb": "118526", "bsz": "256", "num_updates": "999200", "lr": "8.08081e-07", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "0"} +[2022-08-05 09:26:30,316][train_inner][INFO] - {"epoch": 20, "update": 19.418, "loss": "1.863", "ppl": "3.64", "wps": "362444", "ups": "3.06", "wpb": "118536", "bsz": "256", "num_updates": "999400", "lr": "6.06061e-07", "gnorm": "0.901", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "0"} +[2022-08-05 09:27:35,755][train_inner][INFO] - {"epoch": 20, "update": 19.422, "loss": "1.859", "ppl": "3.63", "wps": "360540", "ups": "3.06", "wpb": "117966", "bsz": "256", "num_updates": "999600", "lr": "4.0404e-07", "gnorm": "0.904", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "0"} +[2022-08-05 09:28:40,987][train_inner][INFO] - {"epoch": 20, "update": 19.426, "loss": "1.864", "ppl": "3.64", "wps": "360975", "ups": "3.07", "wpb": "117721", "bsz": "256", "num_updates": "999800", "lr": "2.0202e-07", "gnorm": "0.906", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "0"} +[2022-08-05 09:29:46,754][train_inner][INFO] - {"epoch": 20, "update": 19.43, "loss": "1.859", "ppl": "3.63", "wps": "359378", "ups": "3.04", "wpb": "118174", "bsz": "256", "num_updates": "1e+06", "lr": "0", "gnorm": "0.911", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "0"} +[2022-08-05 09:29:46,755][fairseq_cli.train][INFO] - Stopping training due to num_updates: 1000000 >= max_update: 1000000 +[2022-08-05 09:29:46,755][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-05 09:30:09,903][valid][INFO] - {"epoch": 20, "valid_loss": "1.777", "valid_ppl": "3.43", "valid_wps": "1.56335e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "1e+06", "valid_best_loss": "1.777"} +[2022-08-05 09:30:09,906][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 20 @ 1000000 updates +[2022-08-05 09:30:09,907][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_20_1000000.pt +[2022-08-05 09:30:19,083][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/bert/adan2/checkpoint_20_1000000.pt +[2022-08-05 09:30:50,453][fairseq.checkpoint_utils][INFO] - Saved checkpoint /vit-opt/fairseq/bert/adan2/checkpoint_20_1000000.pt (epoch 20 @ 1000000 updates, score 1.777) (writing took 40.54718010898796 seconds) +[2022-08-05 09:30:50,454][fairseq_cli.train][INFO] - end of epoch 20 (average epoch stats below) +[2022-08-05 09:30:50,455][train][INFO] - {"epoch": 20, "train_loss": "1.866", "train_ppl": "3.64", "train_wps": "358612", "train_ups": "3.03", "train_wpb": "118313", "train_bsz": "256", "train_num_updates": "1e+06", "train_lr": "0", "train_gnorm": "0.907", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "7179", "train_gb_free": "20", "train_wall": "0"} +[2022-08-05 09:30:50,456][fairseq_cli.train][INFO] - done training in 65611.0 seconds diff --git a/NLP/BERT/exp_results/pretrain/hydra_train-adan.log b/NLP/BERT/exp_results/pretrain/hydra_train-adan.log new file mode 100644 index 0000000..26921d4 --- /dev/null +++ b/NLP/BERT/exp_results/pretrain/hydra_train-adan.log @@ -0,0 +1,4776 @@ +[2022-08-01 01:27:38,652][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 8, 'distributed_num_procs': 8, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': 'tcp://localhost:19896', 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 8, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 1, 'skip_invalid_size_inputs_valid_test': True, 'max_tokens': None, 'batch_size': 32, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': True, 'validate_interval': 5, 'validate_interval_updates': 50000, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 32, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 1000000, 'stop_time_hours': 0.0, 'clip_norm': 5.0, 'sentence_avg': False, 'update_freq': [1], 'lr': [0.001], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False}, 'checkpoint': {'_name': None, 'save_dir': 'bert/adan2/', 'restore_file': 'checkpoint_last.pt', 'continue_once': None, 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 5, 'save_interval_updates': 50000, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 8}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False, 'eos_token': None}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'roberta', 'max_positions': 512, 'dropout': 0.1, 'attention_dropout': 0.1}, 'task': {'_name': 'masked_lm', 'data': '/dataset/common/bert-corpus-0729/', 'sample_break_mode': complete, 'tokens_per_sample': 512, 'mask_prob': 0.15, 'leave_unmasked_prob': 0.1, 'random_token_prob': 0.1, 'freq_weighted_replacement': False, 'mask_whole_words': False, 'mask_multiple_length': 1, 'mask_stdev': 0.0, 'shorten_method': none, 'shorten_data_split_list': '', 'seed': 1, 'include_target_tokens': False}, 'criterion': {'_name': 'masked_lm', 'tpu': False}, 'optimizer': {'_name': 'adan', 'adan_betas': '(0.98,0.92,0.99)', 'adan_eps': 1e-08, 'weight_decay': 0.02, 'no_prox': False, 'fp16_adan_stats': False, 'tpu': False, 'lr': [0.001]}, 'lr_scheduler': {'_name': 'polynomial_decay', 'warmup_updates': 10000, 'force_anneal': None, 'end_learning_rate': 0.0, 'power': 1.0, 'total_num_update': 1000000.0, 'lr': [0.001]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} +[2022-08-01 01:27:38,705][fairseq.tasks.masked_lm][INFO] - dictionary: 50264 types +[2022-08-01 01:27:47,160][fairseq_cli.train][INFO] - RobertaModel( + (encoder): RobertaEncoder( + (sentence_encoder): TransformerEncoder( + (dropout_module): FairseqDropout() + (embed_tokens): Embedding(50265, 768, padding_idx=1) + (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1) + (layernorm_embedding): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (layers): ModuleList( + (0): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (1): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (2): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (3): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (4): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (5): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (6): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (7): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (8): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (9): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (10): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + (11): TransformerEncoderLayerBase( + (self_attn): MultiheadAttention( + (dropout_module): FairseqDropout() + (k_proj): Linear(in_features=768, out_features=768, bias=True) + (v_proj): Linear(in_features=768, out_features=768, bias=True) + (q_proj): Linear(in_features=768, out_features=768, bias=True) + (out_proj): Linear(in_features=768, out_features=768, bias=True) + ) + (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + (dropout_module): FairseqDropout() + (activation_dropout_module): FairseqDropout() + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (fc2): Linear(in_features=3072, out_features=768, bias=True) + (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + ) + ) + (lm_head): RobertaLMHead( + (dense): Linear(in_features=768, out_features=768, bias=True) + (layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) + ) + ) + (classification_heads): ModuleDict() +) +[2022-08-01 01:27:47,162][fairseq_cli.train][INFO] - task: MaskedLMTask +[2022-08-01 01:27:47,162][fairseq_cli.train][INFO] - model: RobertaModel +[2022-08-01 01:27:47,162][fairseq_cli.train][INFO] - criterion: MaskedLmLoss +[2022-08-01 01:27:47,163][fairseq_cli.train][INFO] - num. shared model params: 209,714,265 (num. trained: 209,714,265) +[2022-08-01 01:27:47,164][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) +[2022-08-01 01:27:47,252][fairseq.data.data_utils][INFO] - loaded 1,066,112 examples from: /dataset/common/bert-corpus-0729/valid +[2022-08-01 01:27:47,284][fairseq.tasks.masked_lm][INFO] - loaded 67780 blocks from: /dataset/common/bert-corpus-0729/valid +[2022-08-01 01:28:02,749][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:2 to store for rank: 0 +[2022-08-01 01:28:02,770][torch.distributed.distributed_c10d][INFO] - Rank 0: Completed store-based barrier for key:store_based_barrier_key:2 with 8 nodes. +[2022-08-01 01:28:02,771][fairseq.trainer][INFO] - detected shared parameter: encoder.sentence_encoder.embed_tokens.weight <- encoder.lm_head.weight +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 0: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 1: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 2: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 3: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 4: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 5: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 6: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 7: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB +[2022-08-01 01:28:07,678][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** +[2022-08-01 01:28:07,678][fairseq_cli.train][INFO] - training on 8 devices (GPUs/TPUs) +[2022-08-01 01:28:07,678][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 32 +[2022-08-01 01:28:07,680][fairseq.trainer][INFO] - Preparing to load checkpoint bert/adan2/checkpoint_last.pt +[2022-08-01 01:28:07,680][fairseq.trainer][INFO] - No existing checkpoint found bert/adan2/checkpoint_last.pt +[2022-08-01 01:28:07,680][fairseq.trainer][INFO] - loading train data for epoch 1 +[2022-08-01 01:28:10,423][fairseq.data.data_utils][INFO] - loaded 214,960,826 examples from: /dataset/common/bert-corpus-0729/train +[2022-08-01 01:28:14,723][fairseq.tasks.masked_lm][INFO] - loaded 13244396 blocks from: /dataset/common/bert-corpus-0729/train +[2022-08-01 01:28:19,456][fairseq.tasks.fairseq_task][WARNING] - 63,646 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[11407155, 8928673, 3941655, 2058309, 2058002, 11308513, 1003447, 2725530, 13205669, 7271248] +[2022-08-01 01:28:28,999][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 01:28:29,002][fairseq.trainer][INFO] - begin training epoch 1 +[2022-08-01 01:28:29,002][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 01:28:38,650][fairseq.modules.cross_entropy][INFO] - using fused cross entropy +[2022-08-01 01:28:59,309][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-08-01 01:29:52,104][train_inner][INFO] - {"epoch": 1, "update": 0.004, "loss": "14.125", "ppl": "17863.8", "wps": "360757", "ups": "3.05", "wpb": "118068", "bsz": "256", "num_updates": "200", "lr": "2e-05", "gnorm": "2.326", "clip": "0", "loss_scale": "64", "train_wall": "73", "gb_free": "19.9", "wall": "104"} +[2022-08-01 01:30:57,103][train_inner][INFO] - {"epoch": 1, "update": 0.008, "loss": "11.592", "ppl": "3087.5", "wps": "363489", "ups": "3.08", "wpb": "118132", "bsz": "256", "num_updates": "400", "lr": "4e-05", "gnorm": "0.807", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.9", "wall": "169"} +[2022-08-01 01:32:02,116][train_inner][INFO] - {"epoch": 1, "update": 0.012, "loss": "10.661", "ppl": "1618.86", "wps": "364315", "ups": "3.08", "wpb": "118423", "bsz": "256", "num_updates": "600", "lr": "6e-05", "gnorm": "0.546", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "26.3", "wall": "234"} +[2022-08-01 01:33:06,831][train_inner][INFO] - {"epoch": 1, "update": 0.016, "loss": "10.338", "ppl": "1294.13", "wps": "363299", "ups": "3.09", "wpb": "117553", "bsz": "256", "num_updates": "800", "lr": "8e-05", "gnorm": "0.604", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "19.8", "wall": "299"} +[2022-08-01 01:34:11,556][train_inner][INFO] - {"epoch": 1, "update": 0.019, "loss": "10.123", "ppl": "1115.05", "wps": "366500", "ups": "3.09", "wpb": "118607", "bsz": "256", "num_updates": "1000", "lr": "0.0001", "gnorm": "0.648", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.1", "wall": "364"} +[2022-08-01 01:35:16,599][train_inner][INFO] - {"epoch": 1, "update": 0.023, "loss": "9.956", "ppl": "993.35", "wps": "363026", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "1200", "lr": "0.00012", "gnorm": "0.689", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.2", "wall": "429"} +[2022-08-01 01:36:22,235][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "9.822", "ppl": "905.4", "wps": "359906", "ups": "3.05", "wpb": "118112", "bsz": "256", "num_updates": "1400", "lr": "0.00014", "gnorm": "0.735", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.8", "wall": "495"} +[2022-08-01 01:37:26,957][train_inner][INFO] - {"epoch": 1, "update": 0.031, "loss": "9.719", "ppl": "842.51", "wps": "365887", "ups": "3.09", "wpb": "118403", "bsz": "256", "num_updates": "1600", "lr": "0.00016", "gnorm": "0.755", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "26.8", "wall": "559"} +[2022-08-01 01:38:32,064][train_inner][INFO] - {"epoch": 1, "update": 0.035, "loss": "9.622", "ppl": "788.07", "wps": "362895", "ups": "3.07", "wpb": "118133", "bsz": "256", "num_updates": "1800", "lr": "0.00018", "gnorm": "0.78", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21", "wall": "624"} +[2022-08-01 01:39:37,046][train_inner][INFO] - {"epoch": 1, "update": 0.039, "loss": "9.546", "ppl": "747.46", "wps": "365269", "ups": "3.08", "wpb": "118677", "bsz": "256", "num_updates": "2000", "lr": "0.0002", "gnorm": "0.771", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.7", "wall": "689"} +[2022-08-01 01:40:41,879][train_inner][INFO] - {"epoch": 1, "update": 0.043, "loss": "9.482", "ppl": "715.2", "wps": "363413", "ups": "3.08", "wpb": "117804", "bsz": "256", "num_updates": "2200", "lr": "0.00022", "gnorm": "0.791", "clip": "0", "loss_scale": "128", "train_wall": "64", "gb_free": "21", "wall": "754"} +[2022-08-01 01:41:46,995][train_inner][INFO] - {"epoch": 1, "update": 0.047, "loss": "9.422", "ppl": "685.78", "wps": "364787", "ups": "3.07", "wpb": "118766", "bsz": "256", "num_updates": "2400", "lr": "0.00024", "gnorm": "0.79", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "23.3", "wall": "819"} +[2022-08-01 01:42:51,832][train_inner][INFO] - {"epoch": 1, "update": 0.051, "loss": "9.368", "ppl": "660.99", "wps": "364640", "ups": "3.08", "wpb": "118210", "bsz": "256", "num_updates": "2600", "lr": "0.00026", "gnorm": "0.81", "clip": "0", "loss_scale": "128", "train_wall": "64", "gb_free": "19.7", "wall": "884"} +[2022-08-01 01:43:56,819][train_inner][INFO] - {"epoch": 1, "update": 0.054, "loss": "9.219", "ppl": "596", "wps": "362311", "ups": "3.08", "wpb": "117726", "bsz": "256", "num_updates": "2800", "lr": "0.00028", "gnorm": "0.908", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "19.9", "wall": "949"} +[2022-08-01 01:44:08,096][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-08-01 01:45:02,131][train_inner][INFO] - {"epoch": 1, "update": 0.058, "loss": "9.106", "ppl": "551.07", "wps": "363424", "ups": "3.06", "wpb": "118677", "bsz": "256", "num_updates": "3000", "lr": "0.0003", "gnorm": "1.065", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.3", "wall": "1014"} +[2022-08-01 01:46:07,222][train_inner][INFO] - {"epoch": 1, "update": 0.062, "loss": "9.014", "ppl": "517.14", "wps": "365921", "ups": "3.07", "wpb": "119090", "bsz": "256", "num_updates": "3200", "lr": "0.00032", "gnorm": "1.186", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "27.9", "wall": "1080"} +[2022-08-01 01:47:12,058][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "8.936", "ppl": "489.75", "wps": "365284", "ups": "3.08", "wpb": "118416", "bsz": "256", "num_updates": "3400", "lr": "0.00034", "gnorm": "1.253", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.8", "wall": "1144"} +[2022-08-01 01:48:16,904][train_inner][INFO] - {"epoch": 1, "update": 0.07, "loss": "8.856", "ppl": "463.33", "wps": "366286", "ups": "3.08", "wpb": "118758", "bsz": "256", "num_updates": "3600", "lr": "0.00036", "gnorm": "1.347", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.7", "wall": "1209"} +[2022-08-01 01:49:22,903][train_inner][INFO] - {"epoch": 1, "update": 0.074, "loss": "8.759", "ppl": "433.3", "wps": "357149", "ups": "3.03", "wpb": "117857", "bsz": "256", "num_updates": "3800", "lr": "0.00038", "gnorm": "1.436", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "20.3", "wall": "1275"} +[2022-08-01 01:50:27,582][train_inner][INFO] - {"epoch": 1, "update": 0.078, "loss": "8.653", "ppl": "402.42", "wps": "367566", "ups": "3.09", "wpb": "118866", "bsz": "256", "num_updates": "4000", "lr": "0.0004", "gnorm": "1.581", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "23.1", "wall": "1340"} +[2022-08-01 01:51:33,516][train_inner][INFO] - {"epoch": 1, "update": 0.082, "loss": "8.525", "ppl": "368.4", "wps": "359342", "ups": "3.03", "wpb": "118462", "bsz": "256", "num_updates": "4200", "lr": "0.00042", "gnorm": "1.715", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "19.9", "wall": "1406"} +[2022-08-01 01:52:38,588][train_inner][INFO] - {"epoch": 1, "update": 0.085, "loss": "8.361", "ppl": "328.71", "wps": "363183", "ups": "3.07", "wpb": "118164", "bsz": "256", "num_updates": "4400", "lr": "0.00044", "gnorm": "1.935", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "26.1", "wall": "1471"} +[2022-08-01 01:53:04,858][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 01:53:43,888][train_inner][INFO] - {"epoch": 1, "update": 0.089, "loss": "8.007", "ppl": "257.24", "wps": "362078", "ups": "3.06", "wpb": "118216", "bsz": "256", "num_updates": "4600", "lr": "0.00046", "gnorm": "2.246", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.3", "wall": "1536"} +[2022-08-01 01:54:49,049][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "7.418", "ppl": "170.98", "wps": "363305", "ups": "3.07", "wpb": "118367", "bsz": "256", "num_updates": "4800", "lr": "0.00048", "gnorm": "2.344", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "1601"} +[2022-08-01 01:55:53,662][train_inner][INFO] - {"epoch": 1, "update": 0.097, "loss": "6.793", "ppl": "110.92", "wps": "366968", "ups": "3.1", "wpb": "118553", "bsz": "256", "num_updates": "5000", "lr": "0.0005", "gnorm": "1.904", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "1666"} +[2022-08-01 01:56:58,565][train_inner][INFO] - {"epoch": 1, "update": 0.101, "loss": "5.971", "ppl": "62.74", "wps": "364983", "ups": "3.08", "wpb": "118440", "bsz": "256", "num_updates": "5200", "lr": "0.00052", "gnorm": "1.458", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.9", "wall": "1731"} +[2022-08-01 01:58:03,416][train_inner][INFO] - {"epoch": 1, "update": 0.105, "loss": "5.461", "ppl": "44.04", "wps": "363252", "ups": "3.08", "wpb": "117785", "bsz": "256", "num_updates": "5400", "lr": "0.00054", "gnorm": "1.353", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.5", "wall": "1796"} +[2022-08-01 01:59:08,586][train_inner][INFO] - {"epoch": 1, "update": 0.109, "loss": "5.127", "ppl": "34.94", "wps": "363059", "ups": "3.07", "wpb": "118300", "bsz": "256", "num_updates": "5600", "lr": "0.00056", "gnorm": "1.246", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21", "wall": "1861"} +[2022-08-01 02:00:15,025][train_inner][INFO] - {"epoch": 1, "update": 0.113, "loss": "4.887", "ppl": "29.6", "wps": "357821", "ups": "3.01", "wpb": "118863", "bsz": "256", "num_updates": "5800", "lr": "0.00058", "gnorm": "1.17", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "1927"} +[2022-08-01 02:01:20,318][train_inner][INFO] - {"epoch": 1, "update": 0.117, "loss": "4.723", "ppl": "26.41", "wps": "361443", "ups": "3.06", "wpb": "117997", "bsz": "256", "num_updates": "6000", "lr": "0.0006", "gnorm": "1.116", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.3", "wall": "1993"} +[2022-08-01 02:02:25,331][train_inner][INFO] - {"epoch": 1, "update": 0.12, "loss": "4.588", "ppl": "24.05", "wps": "364245", "ups": "3.08", "wpb": "118402", "bsz": "256", "num_updates": "6200", "lr": "0.00062", "gnorm": "1.079", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "2058"} +[2022-08-01 02:03:30,606][train_inner][INFO] - {"epoch": 1, "update": 0.124, "loss": "4.46", "ppl": "22.01", "wps": "362584", "ups": "3.06", "wpb": "118336", "bsz": "256", "num_updates": "6400", "lr": "0.00064", "gnorm": "1.044", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.7", "wall": "2123"} +[2022-08-01 02:04:35,712][train_inner][INFO] - {"epoch": 1, "update": 0.128, "loss": "4.371", "ppl": "20.7", "wps": "363604", "ups": "3.07", "wpb": "118362", "bsz": "256", "num_updates": "6600", "lr": "0.00066", "gnorm": "1.016", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "23.2", "wall": "2188"} +[2022-08-01 02:05:40,591][train_inner][INFO] - {"epoch": 1, "update": 0.132, "loss": "4.282", "ppl": "19.45", "wps": "363942", "ups": "3.08", "wpb": "118060", "bsz": "256", "num_updates": "6800", "lr": "0.00068", "gnorm": "0.987", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "25.4", "wall": "2253"} +[2022-08-01 02:06:45,664][train_inner][INFO] - {"epoch": 1, "update": 0.136, "loss": "4.202", "ppl": "18.4", "wps": "363267", "ups": "3.07", "wpb": "118192", "bsz": "256", "num_updates": "7000", "lr": "0.0007", "gnorm": "0.968", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.3", "wall": "2318"} +[2022-08-01 02:07:50,591][train_inner][INFO] - {"epoch": 1, "update": 0.14, "loss": "4.138", "ppl": "17.6", "wps": "364663", "ups": "3.08", "wpb": "118381", "bsz": "256", "num_updates": "7200", "lr": "0.00072", "gnorm": "0.947", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20", "wall": "2383"} +[2022-08-01 02:08:55,941][train_inner][INFO] - {"epoch": 1, "update": 0.144, "loss": "4.074", "ppl": "16.85", "wps": "362326", "ups": "3.06", "wpb": "118388", "bsz": "256", "num_updates": "7400", "lr": "0.00074", "gnorm": "0.933", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.5", "wall": "2448"} +[2022-08-01 02:10:01,077][train_inner][INFO] - {"epoch": 1, "update": 0.148, "loss": "4.023", "ppl": "16.26", "wps": "365803", "ups": "3.07", "wpb": "119133", "bsz": "256", "num_updates": "7600", "lr": "0.00076", "gnorm": "0.907", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.8", "wall": "2513"} +[2022-08-01 02:11:05,800][train_inner][INFO] - {"epoch": 1, "update": 0.152, "loss": "3.982", "ppl": "15.81", "wps": "364478", "ups": "3.09", "wpb": "117943", "bsz": "256", "num_updates": "7800", "lr": "0.00078", "gnorm": "0.902", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "24.3", "wall": "2578"} +[2022-08-01 02:12:11,192][train_inner][INFO] - {"epoch": 1, "update": 0.155, "loss": "3.931", "ppl": "15.25", "wps": "361977", "ups": "3.06", "wpb": "118350", "bsz": "256", "num_updates": "8000", "lr": "0.0008", "gnorm": "0.881", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.4", "wall": "2644"} +[2022-08-01 02:13:16,096][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "3.891", "ppl": "14.84", "wps": "364880", "ups": "3.08", "wpb": "118409", "bsz": "256", "num_updates": "8200", "lr": "0.00082", "gnorm": "0.877", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.1", "wall": "2708"} +[2022-08-01 02:14:21,136][train_inner][INFO] - {"epoch": 1, "update": 0.163, "loss": "3.861", "ppl": "14.53", "wps": "361933", "ups": "3.08", "wpb": "117698", "bsz": "256", "num_updates": "8400", "lr": "0.00084", "gnorm": "0.86", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.3", "wall": "2773"} +[2022-08-01 02:15:26,472][train_inner][INFO] - {"epoch": 1, "update": 0.167, "loss": "3.819", "ppl": "14.11", "wps": "363868", "ups": "3.06", "wpb": "118866", "bsz": "256", "num_updates": "8600", "lr": "0.00086", "gnorm": "0.847", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "21.9", "wall": "2839"} +[2022-08-01 02:16:31,524][train_inner][INFO] - {"epoch": 1, "update": 0.171, "loss": "3.795", "ppl": "13.88", "wps": "363190", "ups": "3.07", "wpb": "118125", "bsz": "256", "num_updates": "8800", "lr": "0.00088", "gnorm": "0.84", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "26.5", "wall": "2904"} +[2022-08-01 02:17:36,473][train_inner][INFO] - {"epoch": 1, "update": 0.175, "loss": "3.765", "ppl": "13.6", "wps": "365548", "ups": "3.08", "wpb": "118708", "bsz": "256", "num_updates": "9000", "lr": "0.0009", "gnorm": "0.83", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "19.9", "wall": "2969"} +[2022-08-01 02:18:41,597][train_inner][INFO] - {"epoch": 1, "update": 0.179, "loss": "3.736", "ppl": "13.32", "wps": "364441", "ups": "3.07", "wpb": "118668", "bsz": "256", "num_updates": "9200", "lr": "0.00092", "gnorm": "0.831", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "22", "wall": "3034"} +[2022-08-01 02:19:46,540][train_inner][INFO] - {"epoch": 1, "update": 0.183, "loss": "3.709", "ppl": "13.08", "wps": "363658", "ups": "3.08", "wpb": "118083", "bsz": "256", "num_updates": "9400", "lr": "0.00094", "gnorm": "0.819", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "20", "wall": "3099"} +[2022-08-01 02:20:52,340][train_inner][INFO] - {"epoch": 1, "update": 0.187, "loss": "3.685", "ppl": "12.86", "wps": "358726", "ups": "3.04", "wpb": "118018", "bsz": "256", "num_updates": "9600", "lr": "0.00096", "gnorm": "0.816", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "22.6", "wall": "3165"} +[2022-08-01 02:21:58,374][train_inner][INFO] - {"epoch": 1, "update": 0.19, "loss": "3.658", "ppl": "12.62", "wps": "358406", "ups": "3.03", "wpb": "118334", "bsz": "256", "num_updates": "9800", "lr": "0.00098", "gnorm": "0.806", "clip": "0", "loss_scale": "128", "train_wall": "66", "gb_free": "25.9", "wall": "3231"} +[2022-08-01 02:23:04,138][train_inner][INFO] - {"epoch": 1, "update": 0.194, "loss": "3.646", "ppl": "12.52", "wps": "361683", "ups": "3.04", "wpb": "118927", "bsz": "256", "num_updates": "10000", "lr": "0.001", "gnorm": "0.801", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "26.9", "wall": "3296"} +[2022-08-01 02:24:09,151][train_inner][INFO] - {"epoch": 1, "update": 0.198, "loss": "3.626", "ppl": "12.35", "wps": "364313", "ups": "3.08", "wpb": "118424", "bsz": "256", "num_updates": "10200", "lr": "0.000999798", "gnorm": "0.794", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "21.1", "wall": "3361"} +[2022-08-01 02:24:59,671][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 +[2022-08-01 02:25:14,067][train_inner][INFO] - {"epoch": 1, "update": 0.202, "loss": "3.61", "ppl": "12.21", "wps": "362650", "ups": "3.08", "wpb": "117707", "bsz": "256", "num_updates": "10400", "lr": "0.000999596", "gnorm": "0.793", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.1", "wall": "3426"} +[2022-08-01 02:26:19,932][train_inner][INFO] - {"epoch": 1, "update": 0.206, "loss": "3.584", "ppl": "11.99", "wps": "359211", "ups": "3.04", "wpb": "118297", "bsz": "256", "num_updates": "10600", "lr": "0.000999394", "gnorm": "0.784", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20", "wall": "3492"} +[2022-08-01 02:27:25,207][train_inner][INFO] - {"epoch": 1, "update": 0.21, "loss": "3.557", "ppl": "11.77", "wps": "361972", "ups": "3.06", "wpb": "118137", "bsz": "256", "num_updates": "10800", "lr": "0.000999192", "gnorm": "0.775", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.1", "wall": "3558"} +[2022-08-01 02:28:29,867][train_inner][INFO] - {"epoch": 1, "update": 0.214, "loss": "3.536", "ppl": "11.6", "wps": "367024", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "11000", "lr": "0.00099899", "gnorm": "0.771", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.2", "wall": "3622"} +[2022-08-01 02:29:34,915][train_inner][INFO] - {"epoch": 1, "update": 0.218, "loss": "3.528", "ppl": "11.54", "wps": "363294", "ups": "3.07", "wpb": "118157", "bsz": "256", "num_updates": "11200", "lr": "0.000998788", "gnorm": "0.772", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.6", "wall": "3687"} +[2022-08-01 02:30:39,998][train_inner][INFO] - {"epoch": 1, "update": 0.221, "loss": "3.508", "ppl": "11.37", "wps": "362272", "ups": "3.07", "wpb": "117886", "bsz": "256", "num_updates": "11400", "lr": "0.000998586", "gnorm": "0.766", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "24.3", "wall": "3752"} +[2022-08-01 02:31:44,763][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "3.473", "ppl": "11.1", "wps": "366090", "ups": "3.09", "wpb": "118548", "bsz": "256", "num_updates": "11600", "lr": "0.000998384", "gnorm": "0.76", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "21", "wall": "3817"} +[2022-08-01 02:32:49,468][train_inner][INFO] - {"epoch": 1, "update": 0.229, "loss": "3.47", "ppl": "11.08", "wps": "366042", "ups": "3.09", "wpb": "118422", "bsz": "256", "num_updates": "11800", "lr": "0.000998182", "gnorm": "0.759", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.3", "wall": "3882"} +[2022-08-01 02:33:54,422][train_inner][INFO] - {"epoch": 1, "update": 0.233, "loss": "3.451", "ppl": "10.93", "wps": "364707", "ups": "3.08", "wpb": "118445", "bsz": "256", "num_updates": "12000", "lr": "0.00099798", "gnorm": "0.756", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "23.9", "wall": "3947"} +[2022-08-01 02:34:40,225][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 02:35:00,221][train_inner][INFO] - {"epoch": 1, "update": 0.237, "loss": "3.432", "ppl": "10.79", "wps": "359656", "ups": "3.04", "wpb": "118323", "bsz": "256", "num_updates": "12200", "lr": "0.000997778", "gnorm": "0.768", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "4013"} +[2022-08-01 02:36:05,287][train_inner][INFO] - {"epoch": 1, "update": 0.241, "loss": "3.408", "ppl": "10.61", "wps": "364885", "ups": "3.07", "wpb": "118707", "bsz": "256", "num_updates": "12400", "lr": "0.000997576", "gnorm": "0.758", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.7", "wall": "4078"} +[2022-08-01 02:37:10,239][train_inner][INFO] - {"epoch": 1, "update": 0.245, "loss": "3.404", "ppl": "10.59", "wps": "365271", "ups": "3.08", "wpb": "118623", "bsz": "256", "num_updates": "12600", "lr": "0.000997374", "gnorm": "0.752", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "4143"} +[2022-08-01 02:38:15,090][train_inner][INFO] - {"epoch": 1, "update": 0.249, "loss": "3.379", "ppl": "10.4", "wps": "365640", "ups": "3.08", "wpb": "118558", "bsz": "256", "num_updates": "12800", "lr": "0.000997172", "gnorm": "0.746", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "4207"} +[2022-08-01 02:39:20,229][train_inner][INFO] - {"epoch": 1, "update": 0.253, "loss": "3.37", "ppl": "10.34", "wps": "364435", "ups": "3.07", "wpb": "118694", "bsz": "256", "num_updates": "13000", "lr": "0.00099697", "gnorm": "0.747", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "4273"} +[2022-08-01 02:40:25,298][train_inner][INFO] - {"epoch": 1, "update": 0.256, "loss": "3.358", "ppl": "10.25", "wps": "364627", "ups": "3.07", "wpb": "118627", "bsz": "256", "num_updates": "13200", "lr": "0.000996768", "gnorm": "0.747", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "4338"} +[2022-08-01 02:41:30,600][train_inner][INFO] - {"epoch": 1, "update": 0.26, "loss": "3.342", "ppl": "10.14", "wps": "363175", "ups": "3.06", "wpb": "118579", "bsz": "256", "num_updates": "13400", "lr": "0.000996566", "gnorm": "0.743", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "4403"} +[2022-08-01 02:42:35,453][train_inner][INFO] - {"epoch": 1, "update": 0.264, "loss": "3.326", "ppl": "10.03", "wps": "366632", "ups": "3.08", "wpb": "118884", "bsz": "256", "num_updates": "13600", "lr": "0.000996364", "gnorm": "0.746", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "4468"} +[2022-08-01 02:43:39,936][train_inner][INFO] - {"epoch": 1, "update": 0.268, "loss": "3.318", "ppl": "9.97", "wps": "366951", "ups": "3.1", "wpb": "118308", "bsz": "256", "num_updates": "13800", "lr": "0.000996162", "gnorm": "0.744", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23", "wall": "4532"} +[2022-08-01 02:44:44,731][train_inner][INFO] - {"epoch": 1, "update": 0.272, "loss": "3.307", "ppl": "9.9", "wps": "363851", "ups": "3.09", "wpb": "117876", "bsz": "256", "num_updates": "14000", "lr": "0.00099596", "gnorm": "0.745", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "4597"} +[2022-08-01 02:45:49,941][train_inner][INFO] - {"epoch": 1, "update": 0.276, "loss": "3.299", "ppl": "9.84", "wps": "363765", "ups": "3.07", "wpb": "118604", "bsz": "256", "num_updates": "14200", "lr": "0.000995758", "gnorm": "0.743", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "23.7", "wall": "4662"} +[2022-08-01 02:46:54,916][train_inner][INFO] - {"epoch": 1, "update": 0.28, "loss": "3.282", "ppl": "9.72", "wps": "365032", "ups": "3.08", "wpb": "118588", "bsz": "256", "num_updates": "14400", "lr": "0.000995556", "gnorm": "0.74", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21", "wall": "4727"} +[2022-08-01 02:46:56,797][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 02:48:00,086][train_inner][INFO] - {"epoch": 1, "update": 0.284, "loss": "3.272", "ppl": "9.66", "wps": "361694", "ups": "3.07", "wpb": "117856", "bsz": "256", "num_updates": "14600", "lr": "0.000995354", "gnorm": "0.74", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "4792"} +[2022-08-01 02:49:05,447][train_inner][INFO] - {"epoch": 1, "update": 0.288, "loss": "3.26", "ppl": "9.58", "wps": "364066", "ups": "3.06", "wpb": "118976", "bsz": "256", "num_updates": "14800", "lr": "0.000995152", "gnorm": "0.738", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.8", "wall": "4858"} +[2022-08-01 02:50:10,199][train_inner][INFO] - {"epoch": 1, "update": 0.291, "loss": "3.256", "ppl": "9.55", "wps": "367160", "ups": "3.09", "wpb": "118871", "bsz": "256", "num_updates": "15000", "lr": "0.000994949", "gnorm": "0.737", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "26.9", "wall": "4923"} +[2022-08-01 02:51:15,132][train_inner][INFO] - {"epoch": 1, "update": 0.295, "loss": "3.245", "ppl": "9.48", "wps": "364320", "ups": "3.08", "wpb": "118279", "bsz": "256", "num_updates": "15200", "lr": "0.000994747", "gnorm": "0.735", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.4", "wall": "4987"} +[2022-08-01 02:52:20,309][train_inner][INFO] - {"epoch": 1, "update": 0.299, "loss": "3.238", "ppl": "9.43", "wps": "362960", "ups": "3.07", "wpb": "118282", "bsz": "256", "num_updates": "15400", "lr": "0.000994545", "gnorm": "0.734", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "5053"} +[2022-08-01 02:53:24,456][train_inner][INFO] - {"epoch": 1, "update": 0.303, "loss": "3.229", "ppl": "9.37", "wps": "368870", "ups": "3.12", "wpb": "118307", "bsz": "256", "num_updates": "15600", "lr": "0.000994343", "gnorm": "0.737", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.4", "wall": "5117"} +[2022-08-01 02:54:29,288][train_inner][INFO] - {"epoch": 1, "update": 0.307, "loss": "3.218", "ppl": "9.31", "wps": "366493", "ups": "3.08", "wpb": "118800", "bsz": "256", "num_updates": "15800", "lr": "0.000994141", "gnorm": "0.732", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "5182"} +[2022-08-01 02:55:34,014][train_inner][INFO] - {"epoch": 1, "update": 0.311, "loss": "3.209", "ppl": "9.25", "wps": "365178", "ups": "3.09", "wpb": "118181", "bsz": "256", "num_updates": "16000", "lr": "0.000993939", "gnorm": "0.736", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "5246"} +[2022-08-01 02:56:38,770][train_inner][INFO] - {"epoch": 1, "update": 0.315, "loss": "3.199", "ppl": "9.19", "wps": "366690", "ups": "3.09", "wpb": "118725", "bsz": "256", "num_updates": "16200", "lr": "0.000993737", "gnorm": "0.735", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "5311"} +[2022-08-01 02:57:43,710][train_inner][INFO] - {"epoch": 1, "update": 0.319, "loss": "3.202", "ppl": "9.2", "wps": "360894", "ups": "3.08", "wpb": "117180", "bsz": "256", "num_updates": "16400", "lr": "0.000993535", "gnorm": "0.738", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "5376"} +[2022-08-01 02:58:48,710][train_inner][INFO] - {"epoch": 1, "update": 0.323, "loss": "3.182", "ppl": "9.07", "wps": "365526", "ups": "3.08", "wpb": "118794", "bsz": "256", "num_updates": "16600", "lr": "0.000993333", "gnorm": "0.731", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.9", "wall": "5441"} +[2022-08-01 02:59:01,684][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 02:59:54,100][train_inner][INFO] - {"epoch": 1, "update": 0.326, "loss": "3.183", "ppl": "9.08", "wps": "359853", "ups": "3.06", "wpb": "117653", "bsz": "256", "num_updates": "16800", "lr": "0.000993131", "gnorm": "0.735", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "5506"} +[2022-08-01 03:00:58,610][train_inner][INFO] - {"epoch": 1, "update": 0.33, "loss": "3.17", "ppl": "9", "wps": "365707", "ups": "3.1", "wpb": "117957", "bsz": "256", "num_updates": "17000", "lr": "0.000992929", "gnorm": "0.728", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.8", "wall": "5571"} +[2022-08-01 03:02:03,338][train_inner][INFO] - {"epoch": 1, "update": 0.334, "loss": "3.155", "ppl": "8.91", "wps": "364933", "ups": "3.09", "wpb": "118104", "bsz": "256", "num_updates": "17200", "lr": "0.000992727", "gnorm": "0.733", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.4", "wall": "5636"} +[2022-08-01 03:03:08,531][train_inner][INFO] - {"epoch": 1, "update": 0.338, "loss": "3.148", "ppl": "8.86", "wps": "363216", "ups": "3.07", "wpb": "118394", "bsz": "256", "num_updates": "17400", "lr": "0.000992525", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "5701"} +[2022-08-01 03:04:13,658][train_inner][INFO] - {"epoch": 1, "update": 0.342, "loss": "3.143", "ppl": "8.84", "wps": "363554", "ups": "3.07", "wpb": "118385", "bsz": "256", "num_updates": "17600", "lr": "0.000992323", "gnorm": "0.726", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "5766"} +[2022-08-01 03:05:18,568][train_inner][INFO] - {"epoch": 1, "update": 0.346, "loss": "3.133", "ppl": "8.77", "wps": "364258", "ups": "3.08", "wpb": "118218", "bsz": "256", "num_updates": "17800", "lr": "0.000992121", "gnorm": "0.73", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.7", "wall": "5831"} +[2022-08-01 03:06:23,065][train_inner][INFO] - {"epoch": 1, "update": 0.35, "loss": "3.136", "ppl": "8.79", "wps": "364792", "ups": "3.1", "wpb": "117638", "bsz": "256", "num_updates": "18000", "lr": "0.000991919", "gnorm": "0.736", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "5895"} +[2022-08-01 03:07:28,092][train_inner][INFO] - {"epoch": 1, "update": 0.354, "loss": "3.126", "ppl": "8.73", "wps": "364173", "ups": "3.08", "wpb": "118404", "bsz": "256", "num_updates": "18200", "lr": "0.000991717", "gnorm": "0.726", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "5960"} +[2022-08-01 03:08:33,179][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "3.116", "ppl": "8.67", "wps": "364669", "ups": "3.07", "wpb": "118674", "bsz": "256", "num_updates": "18400", "lr": "0.000991515", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "6026"} +[2022-08-01 03:09:38,037][train_inner][INFO] - {"epoch": 1, "update": 0.361, "loss": "3.115", "ppl": "8.67", "wps": "365815", "ups": "3.08", "wpb": "118629", "bsz": "256", "num_updates": "18600", "lr": "0.000991313", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.4", "wall": "6090"} +[2022-08-01 03:10:38,330][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 03:10:43,263][train_inner][INFO] - {"epoch": 1, "update": 0.365, "loss": "3.108", "ppl": "8.62", "wps": "362499", "ups": "3.07", "wpb": "118218", "bsz": "256", "num_updates": "18800", "lr": "0.000991111", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "6156"} +[2022-08-01 03:11:48,239][train_inner][INFO] - {"epoch": 1, "update": 0.369, "loss": "3.098", "ppl": "8.56", "wps": "365594", "ups": "3.08", "wpb": "118774", "bsz": "256", "num_updates": "19000", "lr": "0.000990909", "gnorm": "0.725", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.8", "wall": "6221"} +[2022-08-01 03:12:52,791][train_inner][INFO] - {"epoch": 1, "update": 0.373, "loss": "3.085", "ppl": "8.48", "wps": "366524", "ups": "3.1", "wpb": "118297", "bsz": "256", "num_updates": "19200", "lr": "0.000990707", "gnorm": "0.718", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "6285"} +[2022-08-01 03:13:57,490][train_inner][INFO] - {"epoch": 1, "update": 0.377, "loss": "3.09", "ppl": "8.51", "wps": "364214", "ups": "3.09", "wpb": "117820", "bsz": "256", "num_updates": "19400", "lr": "0.000990505", "gnorm": "0.717", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "6350"} +[2022-08-01 03:15:02,391][train_inner][INFO] - {"epoch": 1, "update": 0.381, "loss": "3.084", "ppl": "8.48", "wps": "364037", "ups": "3.08", "wpb": "118130", "bsz": "256", "num_updates": "19600", "lr": "0.000990303", "gnorm": "0.718", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "6415"} +[2022-08-01 03:16:07,196][train_inner][INFO] - {"epoch": 1, "update": 0.385, "loss": "3.083", "ppl": "8.48", "wps": "363889", "ups": "3.09", "wpb": "117908", "bsz": "256", "num_updates": "19800", "lr": "0.000990101", "gnorm": "0.717", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "6480"} +[2022-08-01 03:17:12,182][train_inner][INFO] - {"epoch": 1, "update": 0.389, "loss": "3.067", "ppl": "8.38", "wps": "365063", "ups": "3.08", "wpb": "118617", "bsz": "256", "num_updates": "20000", "lr": "0.000989899", "gnorm": "0.712", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.3", "wall": "6545"} +[2022-08-01 03:18:17,329][train_inner][INFO] - {"epoch": 1, "update": 0.392, "loss": "3.064", "ppl": "8.37", "wps": "362315", "ups": "3.07", "wpb": "118017", "bsz": "256", "num_updates": "20200", "lr": "0.000989697", "gnorm": "0.713", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "6610"} +[2022-08-01 03:19:22,442][train_inner][INFO] - {"epoch": 1, "update": 0.396, "loss": "3.057", "ppl": "8.33", "wps": "363082", "ups": "3.07", "wpb": "118207", "bsz": "256", "num_updates": "20400", "lr": "0.000989495", "gnorm": "0.719", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "6675"} +[2022-08-01 03:20:27,714][train_inner][INFO] - {"epoch": 1, "update": 0.4, "loss": "3.05", "ppl": "8.28", "wps": "362843", "ups": "3.06", "wpb": "118414", "bsz": "256", "num_updates": "20600", "lr": "0.000989293", "gnorm": "0.718", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "28.2", "wall": "6740"} +[2022-08-01 03:21:32,570][train_inner][INFO] - {"epoch": 1, "update": 0.404, "loss": "3.048", "ppl": "8.27", "wps": "366310", "ups": "3.08", "wpb": "118785", "bsz": "256", "num_updates": "20800", "lr": "0.000989091", "gnorm": "0.711", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.9", "wall": "6805"} +[2022-08-01 03:22:38,762][train_inner][INFO] - {"epoch": 1, "update": 0.408, "loss": "3.043", "ppl": "8.24", "wps": "358502", "ups": "3.02", "wpb": "118648", "bsz": "256", "num_updates": "21000", "lr": "0.000988889", "gnorm": "0.711", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "20.3", "wall": "6871"} +[2022-08-01 03:23:43,695][train_inner][INFO] - {"epoch": 1, "update": 0.412, "loss": "3.038", "ppl": "8.21", "wps": "364573", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "21200", "lr": "0.000988687", "gnorm": "0.708", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.9", "wall": "6936"} +[2022-08-01 03:24:02,937][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 03:24:49,056][train_inner][INFO] - {"epoch": 1, "update": 0.416, "loss": "3.038", "ppl": "8.21", "wps": "360575", "ups": "3.06", "wpb": "117836", "bsz": "256", "num_updates": "21400", "lr": "0.000988485", "gnorm": "0.708", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22", "wall": "7001"} +[2022-08-01 03:25:53,640][train_inner][INFO] - {"epoch": 1, "update": 0.42, "loss": "3.036", "ppl": "8.2", "wps": "365382", "ups": "3.1", "wpb": "117988", "bsz": "256", "num_updates": "21600", "lr": "0.000988283", "gnorm": "0.71", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.5", "wall": "7066"} +[2022-08-01 03:26:58,821][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "3.022", "ppl": "8.12", "wps": "364667", "ups": "3.07", "wpb": "118845", "bsz": "256", "num_updates": "21800", "lr": "0.000988081", "gnorm": "0.706", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.2", "wall": "7131"} +[2022-08-01 03:28:03,811][train_inner][INFO] - {"epoch": 1, "update": 0.427, "loss": "3.022", "ppl": "8.13", "wps": "362993", "ups": "3.08", "wpb": "117953", "bsz": "256", "num_updates": "22000", "lr": "0.000987879", "gnorm": "0.704", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "7196"} +[2022-08-01 03:29:08,578][train_inner][INFO] - {"epoch": 1, "update": 0.431, "loss": "3.009", "ppl": "8.05", "wps": "365777", "ups": "3.09", "wpb": "118450", "bsz": "256", "num_updates": "22200", "lr": "0.000987677", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "7261"} +[2022-08-01 03:30:13,239][train_inner][INFO] - {"epoch": 1, "update": 0.435, "loss": "3.007", "ppl": "8.04", "wps": "365155", "ups": "3.09", "wpb": "118055", "bsz": "256", "num_updates": "22400", "lr": "0.000987475", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "7326"} +[2022-08-01 03:31:18,031][train_inner][INFO] - {"epoch": 1, "update": 0.439, "loss": "3.003", "ppl": "8.02", "wps": "366872", "ups": "3.09", "wpb": "118851", "bsz": "256", "num_updates": "22600", "lr": "0.000987273", "gnorm": "0.698", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "7390"} +[2022-08-01 03:32:23,020][train_inner][INFO] - {"epoch": 1, "update": 0.443, "loss": "2.997", "ppl": "7.98", "wps": "363770", "ups": "3.08", "wpb": "118204", "bsz": "256", "num_updates": "22800", "lr": "0.000987071", "gnorm": "0.694", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "7455"} +[2022-08-01 03:33:27,835][train_inner][INFO] - {"epoch": 1, "update": 0.447, "loss": "2.988", "ppl": "7.93", "wps": "366242", "ups": "3.09", "wpb": "118687", "bsz": "256", "num_updates": "23000", "lr": "0.000986869", "gnorm": "0.697", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.5", "wall": "7520"} +[2022-08-01 03:34:32,856][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "2.989", "ppl": "7.94", "wps": "364354", "ups": "3.08", "wpb": "118452", "bsz": "256", "num_updates": "23200", "lr": "0.000986667", "gnorm": "0.697", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.2", "wall": "7585"} +[2022-08-01 03:35:37,960][train_inner][INFO] - {"epoch": 1, "update": 0.455, "loss": "2.985", "ppl": "7.92", "wps": "364288", "ups": "3.07", "wpb": "118582", "bsz": "256", "num_updates": "23400", "lr": "0.000986465", "gnorm": "0.692", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.7", "wall": "7650"} +[2022-08-01 03:36:43,047][train_inner][INFO] - {"epoch": 1, "update": 0.459, "loss": "2.98", "ppl": "7.89", "wps": "364196", "ups": "3.07", "wpb": "118520", "bsz": "256", "num_updates": "23600", "lr": "0.000986263", "gnorm": "0.696", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.8", "wall": "7715"} +[2022-08-01 03:37:14,594][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 03:37:48,269][train_inner][INFO] - {"epoch": 1, "update": 0.462, "loss": "2.973", "ppl": "7.85", "wps": "363912", "ups": "3.07", "wpb": "118674", "bsz": "256", "num_updates": "23800", "lr": "0.000986061", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26.7", "wall": "7781"} +[2022-08-01 03:38:54,158][train_inner][INFO] - {"epoch": 1, "update": 0.466, "loss": "2.974", "ppl": "7.86", "wps": "358955", "ups": "3.04", "wpb": "118254", "bsz": "256", "num_updates": "24000", "lr": "0.000985859", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "19.8", "wall": "7846"} +[2022-08-01 03:39:58,991][train_inner][INFO] - {"epoch": 1, "update": 0.47, "loss": "2.971", "ppl": "7.84", "wps": "364458", "ups": "3.08", "wpb": "118143", "bsz": "255.9", "num_updates": "24200", "lr": "0.000985657", "gnorm": "0.687", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "7911"} +[2022-08-01 03:41:03,671][train_inner][INFO] - {"epoch": 1, "update": 0.474, "loss": "2.968", "ppl": "7.82", "wps": "365722", "ups": "3.09", "wpb": "118273", "bsz": "256", "num_updates": "24400", "lr": "0.000985455", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "28.2", "wall": "7976"} +[2022-08-01 03:42:08,447][train_inner][INFO] - {"epoch": 1, "update": 0.478, "loss": "2.966", "ppl": "7.81", "wps": "364137", "ups": "3.09", "wpb": "117934", "bsz": "256", "num_updates": "24600", "lr": "0.000985253", "gnorm": "0.7", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "8041"} +[2022-08-01 03:43:13,386][train_inner][INFO] - {"epoch": 1, "update": 0.482, "loss": "2.955", "ppl": "7.75", "wps": "364784", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "24800", "lr": "0.000985051", "gnorm": "0.686", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.7", "wall": "8106"} +[2022-08-01 03:44:19,247][train_inner][INFO] - {"epoch": 1, "update": 0.486, "loss": "2.951", "ppl": "7.73", "wps": "359210", "ups": "3.04", "wpb": "118289", "bsz": "256", "num_updates": "25000", "lr": "0.000984848", "gnorm": "0.683", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.2", "wall": "8172"} +[2022-08-01 03:45:25,238][train_inner][INFO] - {"epoch": 1, "update": 0.49, "loss": "2.951", "ppl": "7.73", "wps": "357625", "ups": "3.03", "wpb": "117998", "bsz": "256", "num_updates": "25200", "lr": "0.000984646", "gnorm": "0.683", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "25.6", "wall": "8238"} +[2022-08-01 03:46:30,142][train_inner][INFO] - {"epoch": 1, "update": 0.494, "loss": "2.949", "ppl": "7.72", "wps": "363780", "ups": "3.08", "wpb": "118051", "bsz": "256", "num_updates": "25400", "lr": "0.000984444", "gnorm": "0.685", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "8302"} +[2022-08-01 03:47:35,064][train_inner][INFO] - {"epoch": 1, "update": 0.497, "loss": "2.942", "ppl": "7.68", "wps": "365141", "ups": "3.08", "wpb": "118528", "bsz": "256", "num_updates": "25600", "lr": "0.000984242", "gnorm": "0.687", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.9", "wall": "8367"} +[2022-08-01 03:48:41,037][train_inner][INFO] - {"epoch": 1, "update": 0.501, "loss": "2.941", "ppl": "7.68", "wps": "357528", "ups": "3.03", "wpb": "117933", "bsz": "256", "num_updates": "25800", "lr": "0.00098404", "gnorm": "0.685", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "21.7", "wall": "8433"} +[2022-08-01 03:49:45,572][train_inner][INFO] - {"epoch": 1, "update": 0.505, "loss": "2.937", "ppl": "7.66", "wps": "365847", "ups": "3.1", "wpb": "118049", "bsz": "256", "num_updates": "26000", "lr": "0.000983838", "gnorm": "0.681", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "22", "wall": "8498"} +[2022-08-01 03:50:41,271][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 03:50:50,730][train_inner][INFO] - {"epoch": 1, "update": 0.509, "loss": "2.935", "ppl": "7.65", "wps": "362514", "ups": "3.07", "wpb": "118101", "bsz": "256", "num_updates": "26200", "lr": "0.000983636", "gnorm": "0.677", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "8563"} +[2022-08-01 03:51:55,818][train_inner][INFO] - {"epoch": 1, "update": 0.513, "loss": "2.929", "ppl": "7.61", "wps": "364139", "ups": "3.07", "wpb": "118505", "bsz": "256", "num_updates": "26400", "lr": "0.000983434", "gnorm": "0.678", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.6", "wall": "8628"} +[2022-08-01 03:53:00,516][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "2.923", "ppl": "7.59", "wps": "365396", "ups": "3.09", "wpb": "118200", "bsz": "256", "num_updates": "26600", "lr": "0.000983232", "gnorm": "0.677", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.8", "wall": "8693"} +[2022-08-01 03:54:05,300][train_inner][INFO] - {"epoch": 1, "update": 0.521, "loss": "2.916", "ppl": "7.55", "wps": "364843", "ups": "3.09", "wpb": "118177", "bsz": "256", "num_updates": "26800", "lr": "0.00098303", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.2", "wall": "8758"} +[2022-08-01 03:55:10,320][train_inner][INFO] - {"epoch": 1, "update": 0.525, "loss": "2.926", "ppl": "7.6", "wps": "363429", "ups": "3.08", "wpb": "118150", "bsz": "256", "num_updates": "27000", "lr": "0.000982828", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "8823"} +[2022-08-01 03:56:15,765][train_inner][INFO] - {"epoch": 1, "update": 0.528, "loss": "2.917", "ppl": "7.55", "wps": "361701", "ups": "3.06", "wpb": "118356", "bsz": "256", "num_updates": "27200", "lr": "0.000982626", "gnorm": "0.672", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.9", "wall": "8888"} +[2022-08-01 03:57:21,196][train_inner][INFO] - {"epoch": 1, "update": 0.532, "loss": "2.913", "ppl": "7.53", "wps": "361064", "ups": "3.06", "wpb": "118122", "bsz": "256", "num_updates": "27400", "lr": "0.000982424", "gnorm": "0.673", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.5", "wall": "8954"} +[2022-08-01 03:58:26,150][train_inner][INFO] - {"epoch": 1, "update": 0.536, "loss": "2.906", "ppl": "7.5", "wps": "363081", "ups": "3.08", "wpb": "117915", "bsz": "256", "num_updates": "27600", "lr": "0.000982222", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.3", "wall": "9018"} +[2022-08-01 03:59:31,344][train_inner][INFO] - {"epoch": 1, "update": 0.54, "loss": "2.896", "ppl": "7.44", "wps": "365156", "ups": "3.07", "wpb": "119029", "bsz": "256", "num_updates": "27800", "lr": "0.00098202", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "9084"} +[2022-08-01 04:00:37,306][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "2.897", "ppl": "7.45", "wps": "357880", "ups": "3.03", "wpb": "118029", "bsz": "256", "num_updates": "28000", "lr": "0.000981818", "gnorm": "0.665", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "9150"} +[2022-08-01 04:01:42,562][train_inner][INFO] - {"epoch": 1, "update": 0.548, "loss": "2.888", "ppl": "7.4", "wps": "363964", "ups": "3.07", "wpb": "118745", "bsz": "256", "num_updates": "28200", "lr": "0.000981616", "gnorm": "0.663", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "9215"} +[2022-08-01 04:02:47,260][train_inner][INFO] - {"epoch": 1, "update": 0.552, "loss": "2.893", "ppl": "7.43", "wps": "366210", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "28400", "lr": "0.000981414", "gnorm": "0.663", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.6", "wall": "9280"} +[2022-08-01 04:03:53,436][train_inner][INFO] - {"epoch": 1, "update": 0.556, "loss": "2.892", "ppl": "7.42", "wps": "357328", "ups": "3.02", "wpb": "118230", "bsz": "256", "num_updates": "28600", "lr": "0.000981212", "gnorm": "0.658", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "26.7", "wall": "9346"} +[2022-08-01 04:04:24,997][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 04:04:58,651][train_inner][INFO] - {"epoch": 1, "update": 0.56, "loss": "2.891", "ppl": "7.42", "wps": "362865", "ups": "3.07", "wpb": "118320", "bsz": "256", "num_updates": "28800", "lr": "0.00098101", "gnorm": "0.662", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "9411"} +[2022-08-01 04:06:03,854][train_inner][INFO] - {"epoch": 1, "update": 0.563, "loss": "2.881", "ppl": "7.37", "wps": "362225", "ups": "3.07", "wpb": "118090", "bsz": "256", "num_updates": "29000", "lr": "0.000980808", "gnorm": "0.661", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.8", "wall": "9476"} +[2022-08-01 04:07:01,207][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 04:07:08,929][train_inner][INFO] - {"epoch": 1, "update": 0.567, "loss": "2.877", "ppl": "7.35", "wps": "362262", "ups": "3.07", "wpb": "117869", "bsz": "256", "num_updates": "29200", "lr": "0.000980606", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "9541"} +[2022-08-01 04:08:13,834][train_inner][INFO] - {"epoch": 1, "update": 0.571, "loss": "2.884", "ppl": "7.38", "wps": "363391", "ups": "3.08", "wpb": "117928", "bsz": "256", "num_updates": "29400", "lr": "0.000980404", "gnorm": "0.663", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "9606"} +[2022-08-01 04:09:18,585][train_inner][INFO] - {"epoch": 1, "update": 0.575, "loss": "2.869", "ppl": "7.3", "wps": "366461", "ups": "3.09", "wpb": "118642", "bsz": "256", "num_updates": "29600", "lr": "0.000980202", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "9671"} +[2022-08-01 04:10:23,632][train_inner][INFO] - {"epoch": 1, "update": 0.579, "loss": "2.873", "ppl": "7.32", "wps": "363877", "ups": "3.07", "wpb": "118343", "bsz": "256", "num_updates": "29800", "lr": "0.00098", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "9736"} +[2022-08-01 04:11:28,594][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "2.87", "ppl": "7.31", "wps": "365756", "ups": "3.08", "wpb": "118800", "bsz": "256", "num_updates": "30000", "lr": "0.000979798", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "9801"} +[2022-08-01 04:12:33,635][train_inner][INFO] - {"epoch": 1, "update": 0.587, "loss": "2.865", "ppl": "7.29", "wps": "362940", "ups": "3.08", "wpb": "118028", "bsz": "256", "num_updates": "30200", "lr": "0.000979596", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "9866"} +[2022-08-01 04:13:38,492][train_inner][INFO] - {"epoch": 1, "update": 0.591, "loss": "2.875", "ppl": "7.33", "wps": "364018", "ups": "3.08", "wpb": "118044", "bsz": "256", "num_updates": "30400", "lr": "0.000979394", "gnorm": "0.653", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "9931"} +[2022-08-01 04:14:43,306][train_inner][INFO] - {"epoch": 1, "update": 0.595, "loss": "2.868", "ppl": "7.3", "wps": "364588", "ups": "3.09", "wpb": "118151", "bsz": "256", "num_updates": "30600", "lr": "0.000979192", "gnorm": "0.657", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "9996"} +[2022-08-01 04:15:49,120][train_inner][INFO] - {"epoch": 1, "update": 0.598, "loss": "2.851", "ppl": "7.22", "wps": "361280", "ups": "3.04", "wpb": "118884", "bsz": "256", "num_updates": "30800", "lr": "0.00097899", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "10061"} +[2022-08-01 04:16:54,330][train_inner][INFO] - {"epoch": 1, "update": 0.602, "loss": "2.861", "ppl": "7.26", "wps": "363370", "ups": "3.07", "wpb": "118476", "bsz": "256", "num_updates": "31000", "lr": "0.000978788", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "10127"} +[2022-08-01 04:17:58,862][train_inner][INFO] - {"epoch": 1, "update": 0.606, "loss": "2.851", "ppl": "7.21", "wps": "365286", "ups": "3.1", "wpb": "117860", "bsz": "256", "num_updates": "31200", "lr": "0.000978586", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "10191"} +[2022-08-01 04:19:03,684][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "2.852", "ppl": "7.22", "wps": "364354", "ups": "3.09", "wpb": "118090", "bsz": "256", "num_updates": "31400", "lr": "0.000978384", "gnorm": "0.649", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.4", "wall": "10256"} +[2022-08-01 04:20:08,600][train_inner][INFO] - {"epoch": 1, "update": 0.614, "loss": "2.85", "ppl": "7.21", "wps": "363322", "ups": "3.08", "wpb": "117927", "bsz": "256", "num_updates": "31600", "lr": "0.000978182", "gnorm": "0.648", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "10321"} +[2022-08-01 04:21:14,614][train_inner][INFO] - {"epoch": 1, "update": 0.618, "loss": "2.845", "ppl": "7.19", "wps": "357344", "ups": "3.03", "wpb": "117946", "bsz": "256", "num_updates": "31800", "lr": "0.00097798", "gnorm": "0.645", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "10387"} +[2022-08-01 04:22:19,551][train_inner][INFO] - {"epoch": 1, "update": 0.622, "loss": "2.836", "ppl": "7.14", "wps": "366321", "ups": "3.08", "wpb": "118938", "bsz": "256", "num_updates": "32000", "lr": "0.000977778", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "10452"} +[2022-08-01 04:23:24,647][train_inner][INFO] - {"epoch": 1, "update": 0.626, "loss": "2.841", "ppl": "7.17", "wps": "365214", "ups": "3.07", "wpb": "118868", "bsz": "256", "num_updates": "32200", "lr": "0.000977576", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.9", "wall": "10517"} +[2022-08-01 04:24:30,501][train_inner][INFO] - {"epoch": 1, "update": 0.63, "loss": "2.838", "ppl": "7.15", "wps": "357703", "ups": "3.04", "wpb": "117780", "bsz": "256", "num_updates": "32400", "lr": "0.000977374", "gnorm": "0.64", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "21.3", "wall": "10583"} +[2022-08-01 04:25:35,211][train_inner][INFO] - {"epoch": 1, "update": 0.633, "loss": "2.835", "ppl": "7.13", "wps": "365928", "ups": "3.09", "wpb": "118392", "bsz": "256", "num_updates": "32600", "lr": "0.000977172", "gnorm": "0.644", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "26.5", "wall": "10648"} +[2022-08-01 04:26:39,986][train_inner][INFO] - {"epoch": 1, "update": 0.637, "loss": "2.826", "ppl": "7.09", "wps": "364437", "ups": "3.09", "wpb": "118032", "bsz": "256", "num_updates": "32800", "lr": "0.00097697", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23", "wall": "10712"} +[2022-08-01 04:27:45,778][train_inner][INFO] - {"epoch": 1, "update": 0.641, "loss": "2.833", "ppl": "7.13", "wps": "359856", "ups": "3.04", "wpb": "118376", "bsz": "256", "num_updates": "33000", "lr": "0.000976768", "gnorm": "0.636", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.5", "wall": "10778"} +[2022-08-01 04:28:50,339][train_inner][INFO] - {"epoch": 1, "update": 0.645, "loss": "2.829", "ppl": "7.11", "wps": "365848", "ups": "3.1", "wpb": "118095", "bsz": "256", "num_updates": "33200", "lr": "0.000976566", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.8", "wall": "10843"} +[2022-08-01 04:29:31,239][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 04:29:55,836][train_inner][INFO] - {"epoch": 1, "update": 0.649, "loss": "2.82", "ppl": "7.06", "wps": "360395", "ups": "3.05", "wpb": "118022", "bsz": "256", "num_updates": "33400", "lr": "0.000976364", "gnorm": "0.634", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23", "wall": "10908"} +[2022-08-01 04:31:00,298][train_inner][INFO] - {"epoch": 1, "update": 0.653, "loss": "2.825", "ppl": "7.08", "wps": "366254", "ups": "3.1", "wpb": "118048", "bsz": "256", "num_updates": "33600", "lr": "0.000976162", "gnorm": "0.632", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "10973"} +[2022-08-01 04:32:05,355][train_inner][INFO] - {"epoch": 1, "update": 0.657, "loss": "2.82", "ppl": "7.06", "wps": "365442", "ups": "3.07", "wpb": "118871", "bsz": "256", "num_updates": "33800", "lr": "0.00097596", "gnorm": "0.632", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "11038"} +[2022-08-01 04:33:10,454][train_inner][INFO] - {"epoch": 1, "update": 0.661, "loss": "2.822", "ppl": "7.07", "wps": "363173", "ups": "3.07", "wpb": "118208", "bsz": "256", "num_updates": "34000", "lr": "0.000975758", "gnorm": "0.631", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "11103"} +[2022-08-01 04:34:15,389][train_inner][INFO] - {"epoch": 1, "update": 0.665, "loss": "2.82", "ppl": "7.06", "wps": "363641", "ups": "3.08", "wpb": "118064", "bsz": "256", "num_updates": "34200", "lr": "0.000975556", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "27.4", "wall": "11168"} +[2022-08-01 04:35:20,068][train_inner][INFO] - {"epoch": 1, "update": 0.668, "loss": "2.813", "ppl": "7.03", "wps": "365465", "ups": "3.09", "wpb": "118187", "bsz": "256", "num_updates": "34400", "lr": "0.000975354", "gnorm": "0.632", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "26.5", "wall": "11232"} +[2022-08-01 04:36:24,776][train_inner][INFO] - {"epoch": 1, "update": 0.672, "loss": "2.808", "ppl": "7", "wps": "364928", "ups": "3.09", "wpb": "118067", "bsz": "256", "num_updates": "34600", "lr": "0.000975152", "gnorm": "0.63", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "11297"} +[2022-08-01 04:37:29,694][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "2.804", "ppl": "6.98", "wps": "365627", "ups": "3.08", "wpb": "118676", "bsz": "256", "num_updates": "34800", "lr": "0.000974949", "gnorm": "0.629", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "11362"} +[2022-08-01 04:38:34,327][train_inner][INFO] - {"epoch": 1, "update": 0.68, "loss": "2.807", "ppl": "7", "wps": "363080", "ups": "3.09", "wpb": "117334", "bsz": "256", "num_updates": "35000", "lr": "0.000974747", "gnorm": "0.634", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.8", "wall": "11427"} +[2022-08-01 04:39:39,464][train_inner][INFO] - {"epoch": 1, "update": 0.684, "loss": "2.799", "ppl": "6.96", "wps": "365199", "ups": "3.07", "wpb": "118936", "bsz": "256", "num_updates": "35200", "lr": "0.000974545", "gnorm": "0.622", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "11492"} +[2022-08-01 04:40:44,942][train_inner][INFO] - {"epoch": 1, "update": 0.688, "loss": "2.8", "ppl": "6.96", "wps": "359993", "ups": "3.05", "wpb": "117858", "bsz": "256", "num_updates": "35400", "lr": "0.000974343", "gnorm": "0.627", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "25.6", "wall": "11557"} +[2022-08-01 04:41:49,732][train_inner][INFO] - {"epoch": 1, "update": 0.692, "loss": "2.798", "ppl": "6.96", "wps": "367557", "ups": "3.09", "wpb": "119068", "bsz": "256", "num_updates": "35600", "lr": "0.000974141", "gnorm": "0.624", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.6", "wall": "11622"} +[2022-08-01 04:41:57,123][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-01 04:42:55,343][train_inner][INFO] - {"epoch": 1, "update": 0.696, "loss": "2.799", "ppl": "6.96", "wps": "361820", "ups": "3.05", "wpb": "118694", "bsz": "256", "num_updates": "35800", "lr": "0.000973939", "gnorm": "0.626", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "28.2", "wall": "11688"} +[2022-08-01 04:44:00,366][train_inner][INFO] - {"epoch": 1, "update": 0.699, "loss": "2.8", "ppl": "6.96", "wps": "363132", "ups": "3.08", "wpb": "118059", "bsz": "256", "num_updates": "36000", "lr": "0.000973737", "gnorm": "0.625", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "11753"} +[2022-08-01 04:45:05,218][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "2.795", "ppl": "6.94", "wps": "364988", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "36200", "lr": "0.000973535", "gnorm": "0.624", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "11818"} +[2022-08-01 04:46:10,315][train_inner][INFO] - {"epoch": 1, "update": 0.707, "loss": "2.79", "ppl": "6.91", "wps": "365211", "ups": "3.07", "wpb": "118869", "bsz": "256", "num_updates": "36400", "lr": "0.000973333", "gnorm": "0.619", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "11883"} +[2022-08-01 04:47:14,836][train_inner][INFO] - {"epoch": 1, "update": 0.711, "loss": "2.783", "ppl": "6.88", "wps": "367250", "ups": "3.1", "wpb": "118475", "bsz": "256", "num_updates": "36600", "lr": "0.000973131", "gnorm": "0.621", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.4", "wall": "11947"} +[2022-08-01 04:47:38,407][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 04:48:20,121][train_inner][INFO] - {"epoch": 1, "update": 0.715, "loss": "2.782", "ppl": "6.88", "wps": "362329", "ups": "3.06", "wpb": "118272", "bsz": "256", "num_updates": "36800", "lr": "0.000972929", "gnorm": "0.618", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "12012"} +[2022-08-01 04:49:24,785][train_inner][INFO] - {"epoch": 1, "update": 0.719, "loss": "2.792", "ppl": "6.93", "wps": "364407", "ups": "3.09", "wpb": "117820", "bsz": "256", "num_updates": "37000", "lr": "0.000972727", "gnorm": "0.621", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "12077"} +[2022-08-01 04:50:29,917][train_inner][INFO] - {"epoch": 1, "update": 0.723, "loss": "2.786", "ppl": "6.9", "wps": "362397", "ups": "3.07", "wpb": "118016", "bsz": "256", "num_updates": "37200", "lr": "0.000972525", "gnorm": "0.621", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "12142"} +[2022-08-01 04:51:34,631][train_inner][INFO] - {"epoch": 1, "update": 0.727, "loss": "2.78", "ppl": "6.87", "wps": "366528", "ups": "3.09", "wpb": "118594", "bsz": "256", "num_updates": "37400", "lr": "0.000972323", "gnorm": "0.618", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "12207"} +[2022-08-01 04:52:39,743][train_inner][INFO] - {"epoch": 1, "update": 0.731, "loss": "2.776", "ppl": "6.85", "wps": "362510", "ups": "3.07", "wpb": "118017", "bsz": "256", "num_updates": "37600", "lr": "0.000972121", "gnorm": "0.618", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "12272"} +[2022-08-01 04:53:44,996][train_inner][INFO] - {"epoch": 1, "update": 0.734, "loss": "2.776", "ppl": "6.85", "wps": "364390", "ups": "3.07", "wpb": "118886", "bsz": "256", "num_updates": "37800", "lr": "0.000971919", "gnorm": "0.62", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "12337"} +[2022-08-01 04:54:45,235][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 04:54:50,807][train_inner][INFO] - {"epoch": 1, "update": 0.738, "loss": "2.785", "ppl": "6.89", "wps": "358905", "ups": "3.04", "wpb": "118099", "bsz": "256", "num_updates": "38000", "lr": "0.000971717", "gnorm": "0.621", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "12403"} +[2022-08-01 04:55:55,491][train_inner][INFO] - {"epoch": 1, "update": 0.742, "loss": "2.775", "ppl": "6.84", "wps": "366347", "ups": "3.09", "wpb": "118483", "bsz": "256", "num_updates": "38200", "lr": "0.000971515", "gnorm": "0.62", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "12468"} +[2022-08-01 04:57:00,689][train_inner][INFO] - {"epoch": 1, "update": 0.746, "loss": "2.771", "ppl": "6.83", "wps": "363887", "ups": "3.07", "wpb": "118621", "bsz": "256", "num_updates": "38400", "lr": "0.000971313", "gnorm": "0.615", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "12533"} +[2022-08-01 04:58:05,015][train_inner][INFO] - {"epoch": 1, "update": 0.75, "loss": "2.774", "ppl": "6.84", "wps": "365856", "ups": "3.11", "wpb": "117669", "bsz": "256", "num_updates": "38600", "lr": "0.000971111", "gnorm": "0.626", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "12597"} +[2022-08-01 04:59:09,866][train_inner][INFO] - {"epoch": 1, "update": 0.754, "loss": "2.77", "ppl": "6.82", "wps": "363516", "ups": "3.08", "wpb": "117869", "bsz": "256", "num_updates": "38800", "lr": "0.000970909", "gnorm": "0.62", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.4", "wall": "12662"} +[2022-08-01 05:00:15,020][train_inner][INFO] - {"epoch": 1, "update": 0.758, "loss": "2.77", "ppl": "6.82", "wps": "364870", "ups": "3.07", "wpb": "118862", "bsz": "255.9", "num_updates": "39000", "lr": "0.000970707", "gnorm": "0.612", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "12727"} +[2022-08-01 05:01:19,394][train_inner][INFO] - {"epoch": 1, "update": 0.762, "loss": "2.774", "ppl": "6.84", "wps": "365249", "ups": "3.11", "wpb": "117560", "bsz": "256", "num_updates": "39200", "lr": "0.000970505", "gnorm": "0.625", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "12792"} +[2022-08-01 05:02:24,164][train_inner][INFO] - {"epoch": 1, "update": 0.766, "loss": "2.762", "ppl": "6.79", "wps": "362437", "ups": "3.09", "wpb": "117372", "bsz": "256", "num_updates": "39400", "lr": "0.000970303", "gnorm": "0.613", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "12856"} +[2022-08-01 05:03:28,845][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "2.766", "ppl": "6.8", "wps": "363354", "ups": "3.09", "wpb": "117509", "bsz": "256", "num_updates": "39600", "lr": "0.000970101", "gnorm": "0.615", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "12921"} +[2022-08-01 05:04:34,151][train_inner][INFO] - {"epoch": 1, "update": 0.773, "loss": "2.749", "ppl": "6.72", "wps": "361769", "ups": "3.06", "wpb": "118127", "bsz": "256", "num_updates": "39800", "lr": "0.000969899", "gnorm": "0.611", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "12986"} +[2022-08-01 05:05:38,644][train_inner][INFO] - {"epoch": 1, "update": 0.777, "loss": "2.763", "ppl": "6.79", "wps": "366563", "ups": "3.1", "wpb": "118202", "bsz": "256", "num_updates": "40000", "lr": "0.000969697", "gnorm": "0.615", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "13051"} +[2022-08-01 05:06:43,170][train_inner][INFO] - {"epoch": 1, "update": 0.781, "loss": "2.765", "ppl": "6.8", "wps": "366018", "ups": "3.1", "wpb": "118086", "bsz": "256", "num_updates": "40200", "lr": "0.000969495", "gnorm": "0.609", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "13115"} +[2022-08-01 05:07:47,947][train_inner][INFO] - {"epoch": 1, "update": 0.785, "loss": "2.758", "ppl": "6.76", "wps": "363980", "ups": "3.09", "wpb": "117887", "bsz": "256", "num_updates": "40400", "lr": "0.000969293", "gnorm": "0.611", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "13180"} +[2022-08-01 05:08:52,455][train_inner][INFO] - {"epoch": 1, "update": 0.789, "loss": "2.754", "ppl": "6.74", "wps": "366622", "ups": "3.1", "wpb": "118249", "bsz": "256", "num_updates": "40600", "lr": "0.000969091", "gnorm": "0.609", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "13245"} +[2022-08-01 05:09:57,272][train_inner][INFO] - {"epoch": 1, "update": 0.793, "loss": "2.749", "ppl": "6.72", "wps": "364363", "ups": "3.09", "wpb": "118083", "bsz": "256", "num_updates": "40800", "lr": "0.000968889", "gnorm": "0.61", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "13310"} +[2022-08-01 05:11:02,467][train_inner][INFO] - {"epoch": 1, "update": 0.797, "loss": "2.754", "ppl": "6.75", "wps": "362213", "ups": "3.07", "wpb": "118071", "bsz": "256", "num_updates": "41000", "lr": "0.000968687", "gnorm": "0.607", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "13375"} +[2022-08-01 05:12:07,607][train_inner][INFO] - {"epoch": 1, "update": 0.801, "loss": "2.752", "ppl": "6.74", "wps": "361983", "ups": "3.07", "wpb": "117895", "bsz": "256", "num_updates": "41200", "lr": "0.000968485", "gnorm": "0.616", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "13440"} +[2022-08-01 05:13:12,448][train_inner][INFO] - {"epoch": 1, "update": 0.804, "loss": "2.738", "ppl": "6.67", "wps": "364532", "ups": "3.08", "wpb": "118182", "bsz": "256", "num_updates": "41400", "lr": "0.000968283", "gnorm": "0.604", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.1", "wall": "13505"} +[2022-08-01 05:14:17,363][train_inner][INFO] - {"epoch": 1, "update": 0.808, "loss": "2.737", "ppl": "6.67", "wps": "365332", "ups": "3.08", "wpb": "118574", "bsz": "256", "num_updates": "41600", "lr": "0.000968081", "gnorm": "0.603", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "13570"} +[2022-08-01 05:15:22,260][train_inner][INFO] - {"epoch": 1, "update": 0.812, "loss": "2.741", "ppl": "6.69", "wps": "364164", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "41800", "lr": "0.000967879", "gnorm": "0.604", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "13635"} +[2022-08-01 05:16:27,352][train_inner][INFO] - {"epoch": 1, "update": 0.816, "loss": "2.747", "ppl": "6.71", "wps": "362769", "ups": "3.07", "wpb": "118064", "bsz": "256", "num_updates": "42000", "lr": "0.000967677", "gnorm": "0.602", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "13700"} +[2022-08-01 05:17:32,474][train_inner][INFO] - {"epoch": 1, "update": 0.82, "loss": "2.745", "ppl": "6.7", "wps": "361393", "ups": "3.07", "wpb": "117672", "bsz": "256", "num_updates": "42200", "lr": "0.000967475", "gnorm": "0.609", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.6", "wall": "13765"} +[2022-08-01 05:18:37,402][train_inner][INFO] - {"epoch": 1, "update": 0.824, "loss": "2.741", "ppl": "6.68", "wps": "364074", "ups": "3.08", "wpb": "118191", "bsz": "256", "num_updates": "42400", "lr": "0.000967273", "gnorm": "0.602", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "13830"} +[2022-08-01 05:19:42,553][train_inner][INFO] - {"epoch": 1, "update": 0.828, "loss": "2.735", "ppl": "6.66", "wps": "361989", "ups": "3.07", "wpb": "117918", "bsz": "256", "num_updates": "42600", "lr": "0.000967071", "gnorm": "0.602", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "13895"} +[2022-08-01 05:20:12,599][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 05:20:47,779][train_inner][INFO] - {"epoch": 1, "update": 0.832, "loss": "2.735", "ppl": "6.66", "wps": "362147", "ups": "3.07", "wpb": "118105", "bsz": "256", "num_updates": "42800", "lr": "0.000966869", "gnorm": "0.605", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "13960"} +[2022-08-01 05:21:52,854][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "2.729", "ppl": "6.63", "wps": "364339", "ups": "3.07", "wpb": "118546", "bsz": "256", "num_updates": "43000", "lr": "0.000966667", "gnorm": "0.602", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "14025"} +[2022-08-01 05:22:57,882][train_inner][INFO] - {"epoch": 1, "update": 0.839, "loss": "2.734", "ppl": "6.66", "wps": "364818", "ups": "3.08", "wpb": "118617", "bsz": "256", "num_updates": "43200", "lr": "0.000966465", "gnorm": "0.607", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "14090"} +[2022-08-01 05:24:02,692][train_inner][INFO] - {"epoch": 1, "update": 0.843, "loss": "2.723", "ppl": "6.6", "wps": "366654", "ups": "3.09", "wpb": "118812", "bsz": "256", "num_updates": "43400", "lr": "0.000966263", "gnorm": "0.605", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "14155"} +[2022-08-01 05:25:07,268][train_inner][INFO] - {"epoch": 1, "update": 0.847, "loss": "2.724", "ppl": "6.61", "wps": "368516", "ups": "3.1", "wpb": "118984", "bsz": "256", "num_updates": "43600", "lr": "0.000966061", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "14220"} +[2022-08-01 05:26:12,284][train_inner][INFO] - {"epoch": 1, "update": 0.851, "loss": "2.727", "ppl": "6.62", "wps": "364200", "ups": "3.08", "wpb": "118393", "bsz": "256", "num_updates": "43800", "lr": "0.000965859", "gnorm": "0.599", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "14285"} +[2022-08-01 05:27:17,105][train_inner][INFO] - {"epoch": 1, "update": 0.855, "loss": "2.726", "ppl": "6.62", "wps": "364202", "ups": "3.09", "wpb": "118038", "bsz": "256", "num_updates": "44000", "lr": "0.000965657", "gnorm": "0.601", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "14349"} +[2022-08-01 05:28:21,942][train_inner][INFO] - {"epoch": 1, "update": 0.859, "loss": "2.723", "ppl": "6.6", "wps": "365282", "ups": "3.08", "wpb": "118417", "bsz": "256", "num_updates": "44200", "lr": "0.000965455", "gnorm": "0.6", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.3", "wall": "14414"} +[2022-08-01 05:29:26,801][train_inner][INFO] - {"epoch": 1, "update": 0.863, "loss": "2.73", "ppl": "6.63", "wps": "364988", "ups": "3.08", "wpb": "118361", "bsz": "256", "num_updates": "44400", "lr": "0.000965253", "gnorm": "0.597", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "14479"} +[2022-08-01 05:30:31,463][train_inner][INFO] - {"epoch": 1, "update": 0.867, "loss": "2.717", "ppl": "6.58", "wps": "365176", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "44600", "lr": "0.000965051", "gnorm": "0.599", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "14544"} +[2022-08-01 05:31:36,295][train_inner][INFO] - {"epoch": 1, "update": 0.87, "loss": "2.718", "ppl": "6.58", "wps": "365488", "ups": "3.08", "wpb": "118475", "bsz": "256", "num_updates": "44800", "lr": "0.000964848", "gnorm": "0.599", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.3", "wall": "14609"} +[2022-08-01 05:32:06,658][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 05:32:41,320][train_inner][INFO] - {"epoch": 1, "update": 0.874, "loss": "2.722", "ppl": "6.6", "wps": "364470", "ups": "3.08", "wpb": "118495", "bsz": "256", "num_updates": "45000", "lr": "0.000964646", "gnorm": "0.598", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "14674"} +[2022-08-01 05:33:46,128][train_inner][INFO] - {"epoch": 1, "update": 0.878, "loss": "2.709", "ppl": "6.54", "wps": "366899", "ups": "3.09", "wpb": "118890", "bsz": "256", "num_updates": "45200", "lr": "0.000964444", "gnorm": "0.598", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "14738"} +[2022-08-01 05:34:50,499][train_inner][INFO] - {"epoch": 1, "update": 0.882, "loss": "2.713", "ppl": "6.56", "wps": "367221", "ups": "3.11", "wpb": "118190", "bsz": "256", "num_updates": "45400", "lr": "0.000964242", "gnorm": "0.6", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "14803"} +[2022-08-01 05:35:55,285][train_inner][INFO] - {"epoch": 1, "update": 0.886, "loss": "2.708", "ppl": "6.54", "wps": "366760", "ups": "3.09", "wpb": "118802", "bsz": "256", "num_updates": "45600", "lr": "0.00096404", "gnorm": "0.597", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "14868"} +[2022-08-01 05:37:00,197][train_inner][INFO] - {"epoch": 1, "update": 0.89, "loss": "2.709", "ppl": "6.54", "wps": "365361", "ups": "3.08", "wpb": "118580", "bsz": "256", "num_updates": "45800", "lr": "0.000963838", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "14933"} +[2022-08-01 05:38:05,372][train_inner][INFO] - {"epoch": 1, "update": 0.894, "loss": "2.71", "ppl": "6.54", "wps": "364238", "ups": "3.07", "wpb": "118693", "bsz": "256", "num_updates": "46000", "lr": "0.000963636", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "14998"} +[2022-08-01 05:39:10,433][train_inner][INFO] - {"epoch": 1, "update": 0.898, "loss": "2.71", "ppl": "6.54", "wps": "364881", "ups": "3.07", "wpb": "118697", "bsz": "256", "num_updates": "46200", "lr": "0.000963434", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.3", "wall": "15063"} +[2022-08-01 05:40:15,260][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "2.701", "ppl": "6.5", "wps": "365900", "ups": "3.09", "wpb": "118598", "bsz": "256", "num_updates": "46400", "lr": "0.000963232", "gnorm": "0.597", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "15128"} +[2022-08-01 05:41:19,775][train_inner][INFO] - {"epoch": 1, "update": 0.905, "loss": "2.707", "ppl": "6.53", "wps": "366495", "ups": "3.1", "wpb": "118222", "bsz": "256", "num_updates": "46600", "lr": "0.00096303", "gnorm": "0.594", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "15192"} +[2022-08-01 05:42:24,695][train_inner][INFO] - {"epoch": 1, "update": 0.909, "loss": "2.705", "ppl": "6.52", "wps": "363773", "ups": "3.08", "wpb": "118078", "bsz": "256", "num_updates": "46800", "lr": "0.000962828", "gnorm": "0.595", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "15257"} +[2022-08-01 05:43:29,632][train_inner][INFO] - {"epoch": 1, "update": 0.913, "loss": "2.709", "ppl": "6.54", "wps": "363754", "ups": "3.08", "wpb": "118104", "bsz": "256", "num_updates": "47000", "lr": "0.000962626", "gnorm": "0.601", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "15322"} +[2022-08-01 05:44:34,359][train_inner][INFO] - {"epoch": 1, "update": 0.917, "loss": "2.702", "ppl": "6.51", "wps": "364418", "ups": "3.09", "wpb": "117936", "bsz": "256", "num_updates": "47200", "lr": "0.000962424", "gnorm": "0.596", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "15387"} +[2022-08-01 05:45:39,370][train_inner][INFO] - {"epoch": 1, "update": 0.921, "loss": "2.705", "ppl": "6.52", "wps": "363650", "ups": "3.08", "wpb": "118205", "bsz": "256", "num_updates": "47400", "lr": "0.000962222", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "15452"} +[2022-08-01 05:46:43,550][train_inner][INFO] - {"epoch": 1, "update": 0.925, "loss": "2.707", "ppl": "6.53", "wps": "366931", "ups": "3.12", "wpb": "117748", "bsz": "256", "num_updates": "47600", "lr": "0.00096202", "gnorm": "0.595", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.9", "wall": "15516"} +[2022-08-01 05:47:48,709][train_inner][INFO] - {"epoch": 1, "update": 0.929, "loss": "2.696", "ppl": "6.48", "wps": "363664", "ups": "3.07", "wpb": "118477", "bsz": "256", "num_updates": "47800", "lr": "0.000961818", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "15581"} +[2022-08-01 05:48:53,823][train_inner][INFO] - {"epoch": 1, "update": 0.933, "loss": "2.689", "ppl": "6.45", "wps": "364724", "ups": "3.07", "wpb": "118741", "bsz": "256", "num_updates": "48000", "lr": "0.000961616", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "15646"} +[2022-08-01 05:49:58,778][train_inner][INFO] - {"epoch": 1, "update": 0.937, "loss": "2.696", "ppl": "6.48", "wps": "363485", "ups": "3.08", "wpb": "118048", "bsz": "256", "num_updates": "48200", "lr": "0.000961414", "gnorm": "0.592", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "15711"} +[2022-08-01 05:51:03,602][train_inner][INFO] - {"epoch": 1, "update": 0.94, "loss": "2.704", "ppl": "6.52", "wps": "364041", "ups": "3.09", "wpb": "117982", "bsz": "256", "num_updates": "48400", "lr": "0.000961212", "gnorm": "0.596", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "15776"} +[2022-08-01 05:51:47,961][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 05:51:52,437][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 05:52:08,678][train_inner][INFO] - {"epoch": 1, "update": 0.944, "loss": "2.69", "ppl": "6.45", "wps": "363203", "ups": "3.07", "wpb": "118177", "bsz": "256", "num_updates": "48600", "lr": "0.00096101", "gnorm": "0.612", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "15841"} +[2022-08-01 05:53:13,378][train_inner][INFO] - {"epoch": 1, "update": 0.948, "loss": "2.694", "ppl": "6.47", "wps": "366091", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "48800", "lr": "0.000960808", "gnorm": "0.589", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "15906"} +[2022-08-01 05:54:18,607][train_inner][INFO] - {"epoch": 1, "update": 0.952, "loss": "2.687", "ppl": "6.44", "wps": "363076", "ups": "3.07", "wpb": "118413", "bsz": "256", "num_updates": "49000", "lr": "0.000960606", "gnorm": "0.594", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "15971"} +[2022-08-01 05:55:22,885][train_inner][INFO] - {"epoch": 1, "update": 0.956, "loss": "2.687", "ppl": "6.44", "wps": "366915", "ups": "3.11", "wpb": "117920", "bsz": "256", "num_updates": "49200", "lr": "0.000960404", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "16035"} +[2022-08-01 05:56:27,829][train_inner][INFO] - {"epoch": 1, "update": 0.96, "loss": "2.689", "ppl": "6.45", "wps": "365616", "ups": "3.08", "wpb": "118722", "bsz": "256", "num_updates": "49400", "lr": "0.000960202", "gnorm": "0.589", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "16100"} +[2022-08-01 05:57:32,897][train_inner][INFO] - {"epoch": 1, "update": 0.964, "loss": "2.689", "ppl": "6.45", "wps": "361562", "ups": "3.07", "wpb": "117629", "bsz": "256", "num_updates": "49600", "lr": "0.00096", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "16165"} +[2022-08-01 05:58:37,776][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "2.683", "ppl": "6.42", "wps": "363716", "ups": "3.08", "wpb": "117986", "bsz": "256", "num_updates": "49800", "lr": "0.000959798", "gnorm": "0.592", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "16230"} +[2022-08-01 05:59:42,386][train_inner][INFO] - {"epoch": 1, "update": 0.972, "loss": "2.682", "ppl": "6.42", "wps": "366933", "ups": "3.1", "wpb": "118537", "bsz": "256", "num_updates": "50000", "lr": "0.000959596", "gnorm": "0.587", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "16295"} +[2022-08-01 05:59:42,388][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 05:59:42,395][fairseq.tasks.fairseq_task][WARNING] - 576 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[2086, 18643, 32310, 47209, 49755, 37591, 48266, 19397, 19415, 38832] +[2022-08-01 06:00:04,987][valid][INFO] - {"epoch": 1, "valid_loss": "2.572", "valid_ppl": "5.95", "valid_wps": "1.61125e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "50000"} +[2022-08-01 06:00:04,991][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 50000 updates +[2022-08-01 06:00:04,992][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_1_50000.pt +[2022-08-01 06:00:20,027][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_1_50000.pt +[2022-08-01 06:00:51,853][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_1_50000.pt (epoch 1 @ 50000 updates, score 2.572) (writing took 46.862448069266975 seconds) +[2022-08-01 06:01:56,965][train_inner][INFO] - {"epoch": 1, "update": 0.975, "loss": "2.686", "ppl": "6.43", "wps": "175494", "ups": "1.49", "wpb": "118088", "bsz": "256", "num_updates": "50200", "lr": "0.000959394", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "16429"} +[2022-08-01 06:03:01,973][train_inner][INFO] - {"epoch": 1, "update": 0.979, "loss": "2.681", "ppl": "6.41", "wps": "363864", "ups": "3.08", "wpb": "118268", "bsz": "256", "num_updates": "50400", "lr": "0.000959192", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "16494"} +[2022-08-01 06:04:06,738][train_inner][INFO] - {"epoch": 1, "update": 0.983, "loss": "2.677", "ppl": "6.4", "wps": "364088", "ups": "3.09", "wpb": "117900", "bsz": "256", "num_updates": "50600", "lr": "0.00095899", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "16559"} +[2022-08-01 06:05:12,408][train_inner][INFO] - {"epoch": 1, "update": 0.987, "loss": "2.686", "ppl": "6.44", "wps": "360770", "ups": "3.05", "wpb": "118457", "bsz": "256", "num_updates": "50800", "lr": "0.000958788", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "16625"} +[2022-08-01 06:06:17,255][train_inner][INFO] - {"epoch": 1, "update": 0.991, "loss": "2.683", "ppl": "6.42", "wps": "364123", "ups": "3.08", "wpb": "118060", "bsz": "256", "num_updates": "51000", "lr": "0.000958586", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "16690"} +[2022-08-01 06:07:23,352][train_inner][INFO] - {"epoch": 1, "update": 0.995, "loss": "2.678", "ppl": "6.4", "wps": "359124", "ups": "3.03", "wpb": "118684", "bsz": "256", "num_updates": "51200", "lr": "0.000958384", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.9", "wall": "16756"} +[2022-08-01 06:08:28,291][train_inner][INFO] - {"epoch": 1, "update": 0.999, "loss": "2.679", "ppl": "6.4", "wps": "364100", "ups": "3.08", "wpb": "118219", "bsz": "256", "num_updates": "51400", "lr": "0.000958182", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "16821"} +[2022-08-01 06:08:50,004][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below) +[2022-08-01 06:08:50,005][train][INFO] - {"epoch": 1, "train_loss": "3.688", "train_ppl": "12.89", "train_wps": "362334", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "51467", "train_lr": "0.000958114", "train_gnorm": "0.744", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16649", "train_gb_free": "22.8", "train_wall": "16842"} +[2022-08-01 06:08:50,093][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 06:08:50,096][fairseq.trainer][INFO] - begin training epoch 2 +[2022-08-01 06:08:50,096][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 06:09:43,489][train_inner][INFO] - {"epoch": 2, "update": 1.003, "loss": "2.674", "ppl": "6.38", "wps": "312913", "ups": "2.66", "wpb": "117650", "bsz": "255.4", "num_updates": "51600", "lr": "0.00095798", "gnorm": "0.594", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.6", "wall": "16896"} +[2022-08-01 06:10:48,189][train_inner][INFO] - {"epoch": 2, "update": 1.006, "loss": "2.673", "ppl": "6.38", "wps": "365726", "ups": "3.09", "wpb": "118311", "bsz": "256", "num_updates": "51800", "lr": "0.000957778", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "16961"} +[2022-08-01 06:11:52,801][train_inner][INFO] - {"epoch": 2, "update": 1.01, "loss": "2.672", "ppl": "6.38", "wps": "364625", "ups": "3.1", "wpb": "117793", "bsz": "256", "num_updates": "52000", "lr": "0.000957576", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "17025"} +[2022-08-01 06:12:57,721][train_inner][INFO] - {"epoch": 2, "update": 1.014, "loss": "2.664", "ppl": "6.34", "wps": "365380", "ups": "3.08", "wpb": "118602", "bsz": "256", "num_updates": "52200", "lr": "0.000957374", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.7", "wall": "17090"} +[2022-08-01 06:14:02,828][train_inner][INFO] - {"epoch": 2, "update": 1.018, "loss": "2.66", "ppl": "6.32", "wps": "363574", "ups": "3.07", "wpb": "118354", "bsz": "256", "num_updates": "52400", "lr": "0.000957172", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.3", "wall": "17155"} +[2022-08-01 06:15:07,520][train_inner][INFO] - {"epoch": 2, "update": 1.022, "loss": "2.666", "ppl": "6.35", "wps": "364571", "ups": "3.09", "wpb": "117923", "bsz": "256", "num_updates": "52600", "lr": "0.00095697", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "17220"} +[2022-08-01 06:16:12,548][train_inner][INFO] - {"epoch": 2, "update": 1.026, "loss": "2.667", "ppl": "6.35", "wps": "362542", "ups": "3.08", "wpb": "117876", "bsz": "256", "num_updates": "52800", "lr": "0.000956768", "gnorm": "0.589", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.8", "wall": "17285"} +[2022-08-01 06:17:17,295][train_inner][INFO] - {"epoch": 2, "update": 1.03, "loss": "2.657", "ppl": "6.31", "wps": "366310", "ups": "3.09", "wpb": "118584", "bsz": "256", "num_updates": "53000", "lr": "0.000956566", "gnorm": "0.586", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.2", "wall": "17350"} +[2022-08-01 06:18:22,004][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "2.659", "ppl": "6.31", "wps": "366596", "ups": "3.09", "wpb": "118610", "bsz": "256", "num_updates": "53200", "lr": "0.000956364", "gnorm": "0.583", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.2", "wall": "17414"} +[2022-08-01 06:19:27,932][train_inner][INFO] - {"epoch": 2, "update": 1.038, "loss": "2.666", "ppl": "6.35", "wps": "360244", "ups": "3.03", "wpb": "118746", "bsz": "256", "num_updates": "53400", "lr": "0.000956162", "gnorm": "0.587", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "21.6", "wall": "17480"} +[2022-08-01 06:20:32,781][train_inner][INFO] - {"epoch": 2, "update": 1.041, "loss": "2.662", "ppl": "6.33", "wps": "362976", "ups": "3.08", "wpb": "117681", "bsz": "256", "num_updates": "53600", "lr": "0.00095596", "gnorm": "0.589", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "17545"} +[2022-08-01 06:20:40,154][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 06:21:20,235][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 06:21:38,163][train_inner][INFO] - {"epoch": 2, "update": 1.045, "loss": "2.66", "ppl": "6.32", "wps": "362479", "ups": "3.06", "wpb": "118498", "bsz": "256", "num_updates": "53800", "lr": "0.000955758", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "17610"} +[2022-08-01 06:22:42,712][train_inner][INFO] - {"epoch": 2, "update": 1.049, "loss": "2.662", "ppl": "6.33", "wps": "366301", "ups": "3.1", "wpb": "118220", "bsz": "256", "num_updates": "54000", "lr": "0.000955556", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "17675"} +[2022-08-01 06:23:47,830][train_inner][INFO] - {"epoch": 2, "update": 1.053, "loss": "2.653", "ppl": "6.29", "wps": "363954", "ups": "3.07", "wpb": "118498", "bsz": "256", "num_updates": "54200", "lr": "0.000955354", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "17740"} +[2022-08-01 06:24:52,296][train_inner][INFO] - {"epoch": 2, "update": 1.057, "loss": "2.651", "ppl": "6.28", "wps": "368792", "ups": "3.1", "wpb": "118871", "bsz": "256", "num_updates": "54400", "lr": "0.000955152", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.5", "wall": "17805"} +[2022-08-01 06:25:57,353][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "2.656", "ppl": "6.3", "wps": "364467", "ups": "3.07", "wpb": "118554", "bsz": "256", "num_updates": "54600", "lr": "0.000954949", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "17870"} +[2022-08-01 06:27:02,071][train_inner][INFO] - {"epoch": 2, "update": 1.065, "loss": "2.659", "ppl": "6.31", "wps": "367567", "ups": "3.09", "wpb": "118939", "bsz": "256", "num_updates": "54800", "lr": "0.000954747", "gnorm": "0.58", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "17934"} +[2022-08-01 06:28:07,076][train_inner][INFO] - {"epoch": 2, "update": 1.069, "loss": "2.653", "ppl": "6.29", "wps": "363708", "ups": "3.08", "wpb": "118213", "bsz": "256", "num_updates": "55000", "lr": "0.000954545", "gnorm": "0.581", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "17999"} +[2022-08-01 06:29:11,707][train_inner][INFO] - {"epoch": 2, "update": 1.073, "loss": "2.651", "ppl": "6.28", "wps": "364310", "ups": "3.09", "wpb": "117726", "bsz": "256", "num_updates": "55200", "lr": "0.000954343", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "18064"} +[2022-08-01 06:30:16,308][train_inner][INFO] - {"epoch": 2, "update": 1.076, "loss": "2.651", "ppl": "6.28", "wps": "365568", "ups": "3.1", "wpb": "118080", "bsz": "256", "num_updates": "55400", "lr": "0.000954141", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "18129"} +[2022-08-01 06:31:20,859][train_inner][INFO] - {"epoch": 2, "update": 1.08, "loss": "2.653", "ppl": "6.29", "wps": "367066", "ups": "3.1", "wpb": "118471", "bsz": "256", "num_updates": "55600", "lr": "0.000953939", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "18193"} +[2022-08-01 06:32:25,588][train_inner][INFO] - {"epoch": 2, "update": 1.084, "loss": "2.661", "ppl": "6.32", "wps": "364839", "ups": "3.09", "wpb": "118076", "bsz": "256", "num_updates": "55800", "lr": "0.000953737", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "18258"} +[2022-08-01 06:33:29,902][train_inner][INFO] - {"epoch": 2, "update": 1.088, "loss": "2.652", "ppl": "6.29", "wps": "367557", "ups": "3.11", "wpb": "118194", "bsz": "256", "num_updates": "56000", "lr": "0.000953535", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.9", "wall": "18322"} +[2022-08-01 06:34:34,717][train_inner][INFO] - {"epoch": 2, "update": 1.092, "loss": "2.641", "ppl": "6.24", "wps": "366871", "ups": "3.09", "wpb": "118891", "bsz": "256", "num_updates": "56200", "lr": "0.000953333", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "18387"} +[2022-08-01 06:35:39,781][train_inner][INFO] - {"epoch": 2, "update": 1.096, "loss": "2.644", "ppl": "6.25", "wps": "363573", "ups": "3.07", "wpb": "118275", "bsz": "256", "num_updates": "56400", "lr": "0.000953131", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "18452"} +[2022-08-01 06:36:44,694][train_inner][INFO] - {"epoch": 2, "update": 1.1, "loss": "2.639", "ppl": "6.23", "wps": "364582", "ups": "3.08", "wpb": "118330", "bsz": "256", "num_updates": "56600", "lr": "0.000952929", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "18517"} +[2022-08-01 06:37:49,982][train_inner][INFO] - {"epoch": 2, "update": 1.104, "loss": "2.647", "ppl": "6.26", "wps": "362119", "ups": "3.06", "wpb": "118207", "bsz": "256", "num_updates": "56800", "lr": "0.000952727", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "18582"} +[2022-08-01 06:38:54,678][train_inner][INFO] - {"epoch": 2, "update": 1.108, "loss": "2.639", "ppl": "6.23", "wps": "366008", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "57000", "lr": "0.000952525", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "18647"} +[2022-08-01 06:39:59,351][train_inner][INFO] - {"epoch": 2, "update": 1.111, "loss": "2.651", "ppl": "6.28", "wps": "364708", "ups": "3.09", "wpb": "117931", "bsz": "256", "num_updates": "57200", "lr": "0.000952323", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "18712"} +[2022-08-01 06:41:03,989][train_inner][INFO] - {"epoch": 2, "update": 1.115, "loss": "2.642", "ppl": "6.24", "wps": "365393", "ups": "3.09", "wpb": "118090", "bsz": "256", "num_updates": "57400", "lr": "0.000952121", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "18776"} +[2022-08-01 06:42:08,986][train_inner][INFO] - {"epoch": 2, "update": 1.119, "loss": "2.636", "ppl": "6.22", "wps": "366180", "ups": "3.08", "wpb": "119001", "bsz": "256", "num_updates": "57600", "lr": "0.000951919", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "18841"} +[2022-08-01 06:43:13,943][train_inner][INFO] - {"epoch": 2, "update": 1.123, "loss": "2.635", "ppl": "6.21", "wps": "367540", "ups": "3.08", "wpb": "119370", "bsz": "256", "num_updates": "57800", "lr": "0.000951717", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "18906"} +[2022-08-01 06:43:57,774][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 06:44:19,256][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "2.639", "ppl": "6.23", "wps": "362031", "ups": "3.06", "wpb": "118225", "bsz": "256", "num_updates": "58000", "lr": "0.000951515", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "18972"} +[2022-08-01 06:45:24,460][train_inner][INFO] - {"epoch": 2, "update": 1.131, "loss": "2.637", "ppl": "6.22", "wps": "361222", "ups": "3.07", "wpb": "117766", "bsz": "256", "num_updates": "58200", "lr": "0.000951313", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "19037"} +[2022-08-01 06:46:29,111][train_inner][INFO] - {"epoch": 2, "update": 1.135, "loss": "2.641", "ppl": "6.24", "wps": "364936", "ups": "3.09", "wpb": "117965", "bsz": "256", "num_updates": "58400", "lr": "0.000951111", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "19101"} +[2022-08-01 06:47:33,982][train_inner][INFO] - {"epoch": 2, "update": 1.139, "loss": "2.634", "ppl": "6.21", "wps": "364570", "ups": "3.08", "wpb": "118250", "bsz": "256", "num_updates": "58600", "lr": "0.000950909", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "19166"} +[2022-08-01 06:48:38,839][train_inner][INFO] - {"epoch": 2, "update": 1.142, "loss": "2.635", "ppl": "6.21", "wps": "363855", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "58800", "lr": "0.000950707", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "19231"} +[2022-08-01 06:49:30,424][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 06:49:43,424][train_inner][INFO] - {"epoch": 2, "update": 1.146, "loss": "2.637", "ppl": "6.22", "wps": "364492", "ups": "3.1", "wpb": "117702", "bsz": "256", "num_updates": "59000", "lr": "0.000950505", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "19296"} +[2022-08-01 06:50:48,418][train_inner][INFO] - {"epoch": 2, "update": 1.15, "loss": "2.633", "ppl": "6.2", "wps": "364987", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "59200", "lr": "0.000950303", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "19361"} +[2022-08-01 06:51:16,563][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 06:51:53,594][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "2.63", "ppl": "6.19", "wps": "362574", "ups": "3.07", "wpb": "118153", "bsz": "256", "num_updates": "59400", "lr": "0.000950101", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "19426"} +[2022-08-01 06:52:58,453][train_inner][INFO] - {"epoch": 2, "update": 1.158, "loss": "2.63", "ppl": "6.19", "wps": "365884", "ups": "3.08", "wpb": "118654", "bsz": "256", "num_updates": "59600", "lr": "0.000949899", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "19491"} +[2022-08-01 06:54:03,075][train_inner][INFO] - {"epoch": 2, "update": 1.162, "loss": "2.629", "ppl": "6.18", "wps": "367569", "ups": "3.09", "wpb": "118763", "bsz": "256", "num_updates": "59800", "lr": "0.000949697", "gnorm": "0.583", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "19555"} +[2022-08-01 06:55:07,762][train_inner][INFO] - {"epoch": 2, "update": 1.166, "loss": "2.628", "ppl": "6.18", "wps": "365643", "ups": "3.09", "wpb": "118261", "bsz": "256", "num_updates": "60000", "lr": "0.000949495", "gnorm": "0.585", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "19620"} +[2022-08-01 06:56:12,744][train_inner][INFO] - {"epoch": 2, "update": 1.17, "loss": "2.623", "ppl": "6.16", "wps": "367475", "ups": "3.08", "wpb": "119394", "bsz": "256", "num_updates": "60200", "lr": "0.000949293", "gnorm": "0.583", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "19685"} +[2022-08-01 06:57:17,968][train_inner][INFO] - {"epoch": 2, "update": 1.174, "loss": "2.623", "ppl": "6.16", "wps": "364274", "ups": "3.07", "wpb": "118796", "bsz": "256", "num_updates": "60400", "lr": "0.000949091", "gnorm": "0.58", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.3", "wall": "19750"} +[2022-08-01 06:58:22,817][train_inner][INFO] - {"epoch": 2, "update": 1.177, "loss": "2.633", "ppl": "6.2", "wps": "363967", "ups": "3.08", "wpb": "118013", "bsz": "256", "num_updates": "60600", "lr": "0.000948889", "gnorm": "0.582", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "19815"} +[2022-08-01 06:59:27,341][train_inner][INFO] - {"epoch": 2, "update": 1.181, "loss": "2.626", "ppl": "6.17", "wps": "367055", "ups": "3.1", "wpb": "118417", "bsz": "256", "num_updates": "60800", "lr": "0.000948687", "gnorm": "0.583", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "19880"} +[2022-08-01 07:00:31,888][train_inner][INFO] - {"epoch": 2, "update": 1.185, "loss": "2.624", "ppl": "6.17", "wps": "366518", "ups": "3.1", "wpb": "118287", "bsz": "256", "num_updates": "61000", "lr": "0.000948485", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "19944"} +[2022-08-01 07:01:36,853][train_inner][INFO] - {"epoch": 2, "update": 1.189, "loss": "2.628", "ppl": "6.18", "wps": "363608", "ups": "3.08", "wpb": "118107", "bsz": "256", "num_updates": "61200", "lr": "0.000948283", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "20009"} +[2022-08-01 07:02:41,453][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "2.629", "ppl": "6.18", "wps": "364928", "ups": "3.1", "wpb": "117870", "bsz": "256", "num_updates": "61400", "lr": "0.000948081", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "20074"} +[2022-08-01 07:03:46,362][train_inner][INFO] - {"epoch": 2, "update": 1.197, "loss": "2.617", "ppl": "6.14", "wps": "364661", "ups": "3.08", "wpb": "118349", "bsz": "256", "num_updates": "61600", "lr": "0.000947879", "gnorm": "0.58", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "20139"} +[2022-08-01 07:04:50,932][train_inner][INFO] - {"epoch": 2, "update": 1.201, "loss": "2.625", "ppl": "6.17", "wps": "366632", "ups": "3.1", "wpb": "118365", "bsz": "256", "num_updates": "61800", "lr": "0.000947677", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "20203"} +[2022-08-01 07:05:55,992][train_inner][INFO] - {"epoch": 2, "update": 1.205, "loss": "2.618", "ppl": "6.14", "wps": "363784", "ups": "3.07", "wpb": "118338", "bsz": "256", "num_updates": "62000", "lr": "0.000947475", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "20268"} +[2022-08-01 07:07:00,700][train_inner][INFO] - {"epoch": 2, "update": 1.209, "loss": "2.624", "ppl": "6.16", "wps": "367470", "ups": "3.09", "wpb": "118888", "bsz": "256", "num_updates": "62200", "lr": "0.000947273", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "20333"} +[2022-08-01 07:08:05,395][train_inner][INFO] - {"epoch": 2, "update": 1.212, "loss": "2.614", "ppl": "6.12", "wps": "367091", "ups": "3.09", "wpb": "118744", "bsz": "256", "num_updates": "62400", "lr": "0.000947071", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "20398"} +[2022-08-01 07:09:10,088][train_inner][INFO] - {"epoch": 2, "update": 1.216, "loss": "2.615", "ppl": "6.12", "wps": "366082", "ups": "3.09", "wpb": "118413", "bsz": "256", "num_updates": "62600", "lr": "0.000946869", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "20462"} +[2022-08-01 07:10:15,069][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "2.618", "ppl": "6.14", "wps": "366199", "ups": "3.08", "wpb": "118977", "bsz": "256", "num_updates": "62800", "lr": "0.000946667", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "20527"} +[2022-08-01 07:11:20,222][train_inner][INFO] - {"epoch": 2, "update": 1.224, "loss": "2.614", "ppl": "6.12", "wps": "364782", "ups": "3.07", "wpb": "118833", "bsz": "255.9", "num_updates": "63000", "lr": "0.000946465", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "20593"} +[2022-08-01 07:12:25,181][train_inner][INFO] - {"epoch": 2, "update": 1.228, "loss": "2.618", "ppl": "6.14", "wps": "363641", "ups": "3.08", "wpb": "118106", "bsz": "256", "num_updates": "63200", "lr": "0.000946263", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "20658"} +[2022-08-01 07:13:29,981][train_inner][INFO] - {"epoch": 2, "update": 1.232, "loss": "2.612", "ppl": "6.11", "wps": "366254", "ups": "3.09", "wpb": "118665", "bsz": "256", "num_updates": "63400", "lr": "0.000946061", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "20722"} +[2022-08-01 07:14:34,610][train_inner][INFO] - {"epoch": 2, "update": 1.236, "loss": "2.609", "ppl": "6.1", "wps": "366563", "ups": "3.09", "wpb": "118450", "bsz": "256", "num_updates": "63600", "lr": "0.000945859", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.2", "wall": "20787"} +[2022-08-01 07:15:39,563][train_inner][INFO] - {"epoch": 2, "update": 1.24, "loss": "2.615", "ppl": "6.13", "wps": "363531", "ups": "3.08", "wpb": "118062", "bsz": "256", "num_updates": "63800", "lr": "0.000945657", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "20852"} +[2022-08-01 07:16:44,496][train_inner][INFO] - {"epoch": 2, "update": 1.244, "loss": "2.614", "ppl": "6.12", "wps": "364089", "ups": "3.08", "wpb": "118205", "bsz": "256", "num_updates": "64000", "lr": "0.000945455", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "20917"} +[2022-08-01 07:17:49,616][train_inner][INFO] - {"epoch": 2, "update": 1.247, "loss": "2.611", "ppl": "6.11", "wps": "361215", "ups": "3.07", "wpb": "117608", "bsz": "256", "num_updates": "64200", "lr": "0.000945253", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "20982"} +[2022-08-01 07:18:54,460][train_inner][INFO] - {"epoch": 2, "update": 1.251, "loss": "2.614", "ppl": "6.12", "wps": "361416", "ups": "3.08", "wpb": "117178", "bsz": "256", "num_updates": "64400", "lr": "0.000945051", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "21047"} +[2022-08-01 07:19:59,374][train_inner][INFO] - {"epoch": 2, "update": 1.255, "loss": "2.609", "ppl": "6.1", "wps": "363558", "ups": "3.08", "wpb": "117997", "bsz": "256", "num_updates": "64600", "lr": "0.000944848", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21112"} +[2022-08-01 07:21:04,133][train_inner][INFO] - {"epoch": 2, "update": 1.259, "loss": "2.615", "ppl": "6.13", "wps": "363300", "ups": "3.09", "wpb": "117634", "bsz": "256", "num_updates": "64800", "lr": "0.000944646", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.9", "wall": "21176"} +[2022-08-01 07:22:09,746][train_inner][INFO] - {"epoch": 2, "update": 1.263, "loss": "2.607", "ppl": "6.09", "wps": "361010", "ups": "3.05", "wpb": "118432", "bsz": "256", "num_updates": "65000", "lr": "0.000944444", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "21242"} +[2022-08-01 07:23:14,979][train_inner][INFO] - {"epoch": 2, "update": 1.267, "loss": "2.606", "ppl": "6.09", "wps": "364255", "ups": "3.07", "wpb": "118806", "bsz": "256", "num_updates": "65200", "lr": "0.000944242", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21307"} +[2022-08-01 07:24:19,313][train_inner][INFO] - {"epoch": 2, "update": 1.271, "loss": "2.601", "ppl": "6.07", "wps": "367759", "ups": "3.11", "wpb": "118296", "bsz": "256", "num_updates": "65400", "lr": "0.00094404", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "21372"} +[2022-08-01 07:25:24,275][train_inner][INFO] - {"epoch": 2, "update": 1.275, "loss": "2.608", "ppl": "6.1", "wps": "364452", "ups": "3.08", "wpb": "118376", "bsz": "256", "num_updates": "65600", "lr": "0.000943838", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.3", "wall": "21437"} +[2022-08-01 07:25:58,757][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 07:26:29,411][train_inner][INFO] - {"epoch": 2, "update": 1.278, "loss": "2.601", "ppl": "6.07", "wps": "363952", "ups": "3.07", "wpb": "118530", "bsz": "256", "num_updates": "65800", "lr": "0.000943636", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21502"} +[2022-08-01 07:27:34,175][train_inner][INFO] - {"epoch": 2, "update": 1.282, "loss": "2.606", "ppl": "6.09", "wps": "364663", "ups": "3.09", "wpb": "118085", "bsz": "256", "num_updates": "66000", "lr": "0.000943434", "gnorm": "0.58", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "21566"} +[2022-08-01 07:28:39,278][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "2.601", "ppl": "6.07", "wps": "364681", "ups": "3.07", "wpb": "118706", "bsz": "256", "num_updates": "66200", "lr": "0.000943232", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "21632"} +[2022-08-01 07:29:45,180][train_inner][INFO] - {"epoch": 2, "update": 1.29, "loss": "2.605", "ppl": "6.08", "wps": "357707", "ups": "3.03", "wpb": "117867", "bsz": "256", "num_updates": "66400", "lr": "0.00094303", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.1", "wall": "21698"} +[2022-08-01 07:30:50,076][train_inner][INFO] - {"epoch": 2, "update": 1.294, "loss": "2.602", "ppl": "6.07", "wps": "362944", "ups": "3.08", "wpb": "117766", "bsz": "256", "num_updates": "66600", "lr": "0.000942828", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21762"} +[2022-08-01 07:31:54,776][train_inner][INFO] - {"epoch": 2, "update": 1.298, "loss": "2.602", "ppl": "6.07", "wps": "364216", "ups": "3.09", "wpb": "117822", "bsz": "256", "num_updates": "66800", "lr": "0.000942626", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "21827"} +[2022-08-01 07:32:59,685][train_inner][INFO] - {"epoch": 2, "update": 1.302, "loss": "2.596", "ppl": "6.04", "wps": "367756", "ups": "3.08", "wpb": "119352", "bsz": "256", "num_updates": "67000", "lr": "0.000942424", "gnorm": "0.579", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.2", "wall": "21892"} +[2022-08-01 07:34:04,425][train_inner][INFO] - {"epoch": 2, "update": 1.306, "loss": "2.594", "ppl": "6.04", "wps": "365476", "ups": "3.09", "wpb": "118302", "bsz": "256", "num_updates": "67200", "lr": "0.000942222", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "21957"} +[2022-08-01 07:35:09,263][train_inner][INFO] - {"epoch": 2, "update": 1.31, "loss": "2.591", "ppl": "6.03", "wps": "366092", "ups": "3.08", "wpb": "118683", "bsz": "256", "num_updates": "67400", "lr": "0.00094202", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.3", "wall": "22022"} +[2022-08-01 07:36:14,075][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "2.596", "ppl": "6.05", "wps": "364662", "ups": "3.09", "wpb": "118170", "bsz": "256", "num_updates": "67600", "lr": "0.000941818", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "22086"} +[2022-08-01 07:36:26,576][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 07:37:18,908][train_inner][INFO] - {"epoch": 2, "update": 1.317, "loss": "2.604", "ppl": "6.08", "wps": "364984", "ups": "3.08", "wpb": "118313", "bsz": "256", "num_updates": "67800", "lr": "0.000941616", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.8", "wall": "22151"} +[2022-08-01 07:38:23,212][train_inner][INFO] - {"epoch": 2, "update": 1.321, "loss": "2.592", "ppl": "6.03", "wps": "369343", "ups": "3.11", "wpb": "118752", "bsz": "256", "num_updates": "68000", "lr": "0.000941414", "gnorm": "0.581", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "22216"} +[2022-08-01 07:39:28,308][train_inner][INFO] - {"epoch": 2, "update": 1.325, "loss": "2.597", "ppl": "6.05", "wps": "363637", "ups": "3.07", "wpb": "118353", "bsz": "256", "num_updates": "68200", "lr": "0.000941212", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "22281"} +[2022-08-01 07:40:33,588][train_inner][INFO] - {"epoch": 2, "update": 1.329, "loss": "2.596", "ppl": "6.04", "wps": "363641", "ups": "3.06", "wpb": "118692", "bsz": "256", "num_updates": "68400", "lr": "0.00094101", "gnorm": "0.58", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "22346"} +[2022-08-01 07:41:39,238][train_inner][INFO] - {"epoch": 2, "update": 1.333, "loss": "2.595", "ppl": "6.04", "wps": "359568", "ups": "3.05", "wpb": "118026", "bsz": "256", "num_updates": "68600", "lr": "0.000940808", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "22412"} +[2022-08-01 07:42:44,035][train_inner][INFO] - {"epoch": 2, "update": 1.337, "loss": "2.588", "ppl": "6.01", "wps": "365914", "ups": "3.09", "wpb": "118539", "bsz": "256", "num_updates": "68800", "lr": "0.000940606", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "22476"} +[2022-08-01 07:43:49,363][train_inner][INFO] - {"epoch": 2, "update": 1.341, "loss": "2.59", "ppl": "6.02", "wps": "360753", "ups": "3.06", "wpb": "117834", "bsz": "256", "num_updates": "69000", "lr": "0.000940404", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "22542"} +[2022-08-01 07:44:54,389][train_inner][INFO] - {"epoch": 2, "update": 1.345, "loss": "2.583", "ppl": "5.99", "wps": "365542", "ups": "3.08", "wpb": "118847", "bsz": "256", "num_updates": "69200", "lr": "0.000940202", "gnorm": "0.591", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "22607"} +[2022-08-01 07:45:59,592][train_inner][INFO] - {"epoch": 2, "update": 1.348, "loss": "2.588", "ppl": "6.01", "wps": "365367", "ups": "3.07", "wpb": "119113", "bsz": "256", "num_updates": "69400", "lr": "0.00094", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "22672"} +[2022-08-01 07:47:04,848][train_inner][INFO] - {"epoch": 2, "update": 1.352, "loss": "2.597", "ppl": "6.05", "wps": "362663", "ups": "3.06", "wpb": "118328", "bsz": "256", "num_updates": "69600", "lr": "0.000939798", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "22737"} +[2022-08-01 07:48:09,936][train_inner][INFO] - {"epoch": 2, "update": 1.356, "loss": "2.588", "ppl": "6.01", "wps": "364810", "ups": "3.07", "wpb": "118722", "bsz": "256", "num_updates": "69800", "lr": "0.000939596", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "22802"} +[2022-08-01 07:49:14,508][train_inner][INFO] - {"epoch": 2, "update": 1.36, "loss": "2.588", "ppl": "6.01", "wps": "365987", "ups": "3.1", "wpb": "118162", "bsz": "256", "num_updates": "70000", "lr": "0.000939394", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "22867"} +[2022-08-01 07:49:38,908][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 07:50:19,521][train_inner][INFO] - {"epoch": 2, "update": 1.364, "loss": "2.588", "ppl": "6.01", "wps": "364807", "ups": "3.08", "wpb": "118584", "bsz": "256", "num_updates": "70200", "lr": "0.000939192", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "22932"} +[2022-08-01 07:51:24,361][train_inner][INFO] - {"epoch": 2, "update": 1.368, "loss": "2.579", "ppl": "5.97", "wps": "364875", "ups": "3.08", "wpb": "118292", "bsz": "256", "num_updates": "70400", "lr": "0.00093899", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "22997"} +[2022-08-01 07:52:29,678][train_inner][INFO] - {"epoch": 2, "update": 1.372, "loss": "2.593", "ppl": "6.03", "wps": "362199", "ups": "3.06", "wpb": "118287", "bsz": "256", "num_updates": "70600", "lr": "0.000938788", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "23062"} +[2022-08-01 07:53:34,652][train_inner][INFO] - {"epoch": 2, "update": 1.376, "loss": "2.58", "ppl": "5.98", "wps": "363378", "ups": "3.08", "wpb": "118049", "bsz": "256", "num_updates": "70800", "lr": "0.000938586", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "23127"} +[2022-08-01 07:54:39,485][train_inner][INFO] - {"epoch": 2, "update": 1.38, "loss": "2.591", "ppl": "6.03", "wps": "363717", "ups": "3.08", "wpb": "117902", "bsz": "256", "num_updates": "71000", "lr": "0.000938384", "gnorm": "0.592", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "23192"} +[2022-08-01 07:55:44,382][train_inner][INFO] - {"epoch": 2, "update": 1.383, "loss": "2.586", "ppl": "6.01", "wps": "363498", "ups": "3.08", "wpb": "117949", "bsz": "256", "num_updates": "71200", "lr": "0.000938182", "gnorm": "0.592", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "23257"} +[2022-08-01 07:56:49,245][train_inner][INFO] - {"epoch": 2, "update": 1.387, "loss": "2.589", "ppl": "6.02", "wps": "362436", "ups": "3.08", "wpb": "117540", "bsz": "256", "num_updates": "71400", "lr": "0.00093798", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "23322"} +[2022-08-01 07:57:53,816][train_inner][INFO] - {"epoch": 2, "update": 1.391, "loss": "2.586", "ppl": "6", "wps": "365958", "ups": "3.1", "wpb": "118150", "bsz": "256", "num_updates": "71600", "lr": "0.000937778", "gnorm": "0.587", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "23386"} +[2022-08-01 07:58:59,582][train_inner][INFO] - {"epoch": 2, "update": 1.395, "loss": "2.58", "ppl": "5.98", "wps": "359777", "ups": "3.04", "wpb": "118304", "bsz": "256", "num_updates": "71800", "lr": "0.000937576", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "23452"} +[2022-08-01 08:00:05,390][train_inner][INFO] - {"epoch": 2, "update": 1.399, "loss": "2.587", "ppl": "6.01", "wps": "359149", "ups": "3.04", "wpb": "118174", "bsz": "256", "num_updates": "72000", "lr": "0.000937374", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "23518"} +[2022-08-01 08:01:09,957][train_inner][INFO] - {"epoch": 2, "update": 1.403, "loss": "2.582", "ppl": "5.99", "wps": "365965", "ups": "3.1", "wpb": "118145", "bsz": "256", "num_updates": "72200", "lr": "0.000937172", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "23582"} +[2022-08-01 08:02:14,849][train_inner][INFO] - {"epoch": 2, "update": 1.407, "loss": "2.584", "ppl": "6", "wps": "364448", "ups": "3.08", "wpb": "118247", "bsz": "256", "num_updates": "72400", "lr": "0.00093697", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "23647"} +[2022-08-01 08:02:18,952][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 08:03:20,148][train_inner][INFO] - {"epoch": 2, "update": 1.411, "loss": "2.578", "ppl": "5.97", "wps": "363675", "ups": "3.06", "wpb": "118736", "bsz": "256", "num_updates": "72600", "lr": "0.000936768", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "23712"} +[2022-08-01 08:04:25,206][train_inner][INFO] - {"epoch": 2, "update": 1.415, "loss": "2.58", "ppl": "5.98", "wps": "363871", "ups": "3.07", "wpb": "118362", "bsz": "256", "num_updates": "72800", "lr": "0.000936566", "gnorm": "0.587", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "23778"} +[2022-08-01 08:05:30,237][train_inner][INFO] - {"epoch": 2, "update": 1.418, "loss": "2.579", "ppl": "5.97", "wps": "363089", "ups": "3.08", "wpb": "118058", "bsz": "256", "num_updates": "73000", "lr": "0.000936364", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "23843"} +[2022-08-01 08:06:34,958][train_inner][INFO] - {"epoch": 2, "update": 1.422, "loss": "2.57", "ppl": "5.94", "wps": "366351", "ups": "3.09", "wpb": "118552", "bsz": "256", "num_updates": "73200", "lr": "0.000936162", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "23907"} +[2022-08-01 08:07:39,882][train_inner][INFO] - {"epoch": 2, "update": 1.426, "loss": "2.571", "ppl": "5.94", "wps": "364598", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "73400", "lr": "0.00093596", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "23972"} +[2022-08-01 08:08:45,369][train_inner][INFO] - {"epoch": 2, "update": 1.43, "loss": "2.569", "ppl": "5.93", "wps": "363060", "ups": "3.05", "wpb": "118876", "bsz": "256", "num_updates": "73600", "lr": "0.000935758", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "24038"} +[2022-08-01 08:09:50,210][train_inner][INFO] - {"epoch": 2, "update": 1.434, "loss": "2.576", "ppl": "5.96", "wps": "363203", "ups": "3.08", "wpb": "117752", "bsz": "256", "num_updates": "73800", "lr": "0.000935556", "gnorm": "0.591", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.1", "wall": "24103"} +[2022-08-01 08:10:54,827][train_inner][INFO] - {"epoch": 2, "update": 1.438, "loss": "2.575", "ppl": "5.96", "wps": "366350", "ups": "3.1", "wpb": "118361", "bsz": "256", "num_updates": "74000", "lr": "0.000935354", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "24167"} +[2022-08-01 08:12:00,567][train_inner][INFO] - {"epoch": 2, "update": 1.442, "loss": "2.571", "ppl": "5.94", "wps": "359076", "ups": "3.04", "wpb": "118026", "bsz": "256", "num_updates": "74200", "lr": "0.000935152", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "24233"} +[2022-08-01 08:13:06,239][train_inner][INFO] - {"epoch": 2, "update": 1.446, "loss": "2.577", "ppl": "5.96", "wps": "359472", "ups": "3.05", "wpb": "118035", "bsz": "256", "num_updates": "74400", "lr": "0.000934949", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "24299"} +[2022-08-01 08:14:11,248][train_inner][INFO] - {"epoch": 2, "update": 1.449, "loss": "2.573", "ppl": "5.95", "wps": "363409", "ups": "3.08", "wpb": "118123", "bsz": "256", "num_updates": "74600", "lr": "0.000934747", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "24364"} +[2022-08-01 08:15:16,399][train_inner][INFO] - {"epoch": 2, "update": 1.453, "loss": "2.568", "ppl": "5.93", "wps": "363771", "ups": "3.07", "wpb": "118498", "bsz": "256", "num_updates": "74800", "lr": "0.000934545", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "24429"} +[2022-08-01 08:16:21,505][train_inner][INFO] - {"epoch": 2, "update": 1.457, "loss": "2.567", "ppl": "5.93", "wps": "365674", "ups": "3.07", "wpb": "119036", "bsz": "256", "num_updates": "75000", "lr": "0.000934343", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "24494"} +[2022-08-01 08:17:26,172][train_inner][INFO] - {"epoch": 2, "update": 1.461, "loss": "2.569", "ppl": "5.93", "wps": "365356", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "75200", "lr": "0.000934141", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "24558"} +[2022-08-01 08:18:30,887][train_inner][INFO] - {"epoch": 2, "update": 1.465, "loss": "2.57", "ppl": "5.94", "wps": "364874", "ups": "3.09", "wpb": "118062", "bsz": "255.9", "num_updates": "75400", "lr": "0.000933939", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "24623"} +[2022-08-01 08:19:35,823][train_inner][INFO] - {"epoch": 2, "update": 1.469, "loss": "2.57", "ppl": "5.94", "wps": "365448", "ups": "3.08", "wpb": "118653", "bsz": "256", "num_updates": "75600", "lr": "0.000933737", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "24688"} +[2022-08-01 08:20:40,779][train_inner][INFO] - {"epoch": 2, "update": 1.473, "loss": "2.569", "ppl": "5.93", "wps": "362488", "ups": "3.08", "wpb": "117726", "bsz": "256", "num_updates": "75800", "lr": "0.000933535", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "24753"} +[2022-08-01 08:21:45,604][train_inner][INFO] - {"epoch": 2, "update": 1.477, "loss": "2.565", "ppl": "5.92", "wps": "366040", "ups": "3.09", "wpb": "118643", "bsz": "256", "num_updates": "76000", "lr": "0.000933333", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "24818"} +[2022-08-01 08:22:50,618][train_inner][INFO] - {"epoch": 2, "update": 1.481, "loss": "2.561", "ppl": "5.9", "wps": "365922", "ups": "3.08", "wpb": "118947", "bsz": "256", "num_updates": "76200", "lr": "0.000933131", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "24883"} +[2022-08-01 08:23:55,494][train_inner][INFO] - {"epoch": 2, "update": 1.484, "loss": "2.572", "ppl": "5.95", "wps": "363344", "ups": "3.08", "wpb": "117861", "bsz": "256", "num_updates": "76400", "lr": "0.000932929", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "24948"} +[2022-08-01 08:25:00,161][train_inner][INFO] - {"epoch": 2, "update": 1.488, "loss": "2.577", "ppl": "5.97", "wps": "363490", "ups": "3.09", "wpb": "117526", "bsz": "256", "num_updates": "76600", "lr": "0.000932727", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.9", "wall": "25012"} +[2022-08-01 08:26:05,153][train_inner][INFO] - {"epoch": 2, "update": 1.492, "loss": "2.565", "ppl": "5.92", "wps": "364308", "ups": "3.08", "wpb": "118384", "bsz": "256", "num_updates": "76800", "lr": "0.000932525", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22", "wall": "25077"} +[2022-08-01 08:27:10,147][train_inner][INFO] - {"epoch": 2, "update": 1.496, "loss": "2.556", "ppl": "5.88", "wps": "365484", "ups": "3.08", "wpb": "118771", "bsz": "256", "num_updates": "77000", "lr": "0.000932323", "gnorm": "0.587", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "25142"} +[2022-08-01 08:28:15,304][train_inner][INFO] - {"epoch": 2, "update": 1.5, "loss": "2.562", "ppl": "5.91", "wps": "364590", "ups": "3.07", "wpb": "118776", "bsz": "256", "num_updates": "77200", "lr": "0.000932121", "gnorm": "0.584", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.1", "wall": "25208"} +[2022-08-01 08:29:20,295][train_inner][INFO] - {"epoch": 2, "update": 1.504, "loss": "2.561", "ppl": "5.9", "wps": "365514", "ups": "3.08", "wpb": "118774", "bsz": "256", "num_updates": "77400", "lr": "0.000931919", "gnorm": "0.584", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "25273"} +[2022-08-01 08:30:25,341][train_inner][INFO] - {"epoch": 2, "update": 1.508, "loss": "2.557", "ppl": "5.88", "wps": "364122", "ups": "3.07", "wpb": "118422", "bsz": "256", "num_updates": "77600", "lr": "0.000931717", "gnorm": "0.585", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.2", "wall": "25338"} +[2022-08-01 08:31:30,269][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "2.564", "ppl": "5.91", "wps": "362522", "ups": "3.08", "wpb": "117687", "bsz": "256", "num_updates": "77800", "lr": "0.000931515", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.9", "wall": "25403"} +[2022-08-01 08:32:06,578][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 08:32:35,233][train_inner][INFO] - {"epoch": 2, "update": 1.516, "loss": "2.564", "ppl": "5.92", "wps": "363412", "ups": "3.08", "wpb": "118043", "bsz": "256", "num_updates": "78000", "lr": "0.000931313", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.9", "wall": "25468"} +[2022-08-01 08:33:40,343][train_inner][INFO] - {"epoch": 2, "update": 1.519, "loss": "2.561", "ppl": "5.9", "wps": "363092", "ups": "3.07", "wpb": "118203", "bsz": "256", "num_updates": "78200", "lr": "0.000931111", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.7", "wall": "25533"} +[2022-08-01 08:34:45,158][train_inner][INFO] - {"epoch": 2, "update": 1.523, "loss": "2.561", "ppl": "5.9", "wps": "364713", "ups": "3.09", "wpb": "118193", "bsz": "256", "num_updates": "78400", "lr": "0.000930909", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "25597"} +[2022-08-01 08:35:50,240][train_inner][INFO] - {"epoch": 2, "update": 1.527, "loss": "2.557", "ppl": "5.89", "wps": "363994", "ups": "3.07", "wpb": "118445", "bsz": "256", "num_updates": "78600", "lr": "0.000930707", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "25663"} +[2022-08-01 08:36:54,708][train_inner][INFO] - {"epoch": 2, "update": 1.531, "loss": "2.56", "ppl": "5.9", "wps": "366525", "ups": "3.1", "wpb": "118144", "bsz": "256", "num_updates": "78800", "lr": "0.000930505", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "25727"} +[2022-08-01 08:37:59,549][train_inner][INFO] - {"epoch": 2, "update": 1.535, "loss": "2.555", "ppl": "5.88", "wps": "365402", "ups": "3.08", "wpb": "118463", "bsz": "256", "num_updates": "79000", "lr": "0.000930303", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "25792"} +[2022-08-01 08:39:04,235][train_inner][INFO] - {"epoch": 2, "update": 1.539, "loss": "2.556", "ppl": "5.88", "wps": "366662", "ups": "3.09", "wpb": "118587", "bsz": "256", "num_updates": "79200", "lr": "0.000930101", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "25857"} +[2022-08-01 08:40:08,950][train_inner][INFO] - {"epoch": 2, "update": 1.543, "loss": "2.556", "ppl": "5.88", "wps": "365273", "ups": "3.09", "wpb": "118191", "bsz": "256", "num_updates": "79400", "lr": "0.000929899", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.2", "wall": "25921"} +[2022-08-01 08:41:13,777][train_inner][INFO] - {"epoch": 2, "update": 1.547, "loss": "2.562", "ppl": "5.91", "wps": "365768", "ups": "3.09", "wpb": "118558", "bsz": "256", "num_updates": "79600", "lr": "0.000929697", "gnorm": "0.603", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "25986"} +[2022-08-01 08:42:18,784][train_inner][INFO] - {"epoch": 2, "update": 1.55, "loss": "2.557", "ppl": "5.88", "wps": "364318", "ups": "3.08", "wpb": "118413", "bsz": "256", "num_updates": "79800", "lr": "0.000929495", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "26051"} +[2022-08-01 08:43:23,416][train_inner][INFO] - {"epoch": 2, "update": 1.554, "loss": "2.547", "ppl": "5.85", "wps": "366833", "ups": "3.09", "wpb": "118544", "bsz": "256", "num_updates": "80000", "lr": "0.000929293", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "26116"} +[2022-08-01 08:44:28,210][train_inner][INFO] - {"epoch": 2, "update": 1.558, "loss": "2.557", "ppl": "5.88", "wps": "364566", "ups": "3.09", "wpb": "118107", "bsz": "256", "num_updates": "80200", "lr": "0.000929091", "gnorm": "0.586", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "26181"} +[2022-08-01 08:45:33,354][train_inner][INFO] - {"epoch": 2, "update": 1.562, "loss": "2.558", "ppl": "5.89", "wps": "362727", "ups": "3.07", "wpb": "118147", "bsz": "256", "num_updates": "80400", "lr": "0.000928889", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "26246"} +[2022-08-01 08:46:38,611][train_inner][INFO] - {"epoch": 2, "update": 1.566, "loss": "2.549", "ppl": "5.85", "wps": "363237", "ups": "3.06", "wpb": "118516", "bsz": "256", "num_updates": "80600", "lr": "0.000928687", "gnorm": "0.586", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26.1", "wall": "26311"} +[2022-08-01 08:47:43,947][train_inner][INFO] - {"epoch": 2, "update": 1.57, "loss": "2.543", "ppl": "5.83", "wps": "363959", "ups": "3.06", "wpb": "118897", "bsz": "256", "num_updates": "80800", "lr": "0.000928485", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "26376"} +[2022-08-01 08:48:48,951][train_inner][INFO] - {"epoch": 2, "update": 1.574, "loss": "2.559", "ppl": "5.89", "wps": "365121", "ups": "3.08", "wpb": "118669", "bsz": "256", "num_updates": "81000", "lr": "0.000928283", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "26441"} +[2022-08-01 08:49:53,947][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "2.547", "ppl": "5.84", "wps": "362969", "ups": "3.08", "wpb": "117956", "bsz": "256", "num_updates": "81200", "lr": "0.000928081", "gnorm": "0.593", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.8", "wall": "26506"} +[2022-08-01 08:50:58,882][train_inner][INFO] - {"epoch": 2, "update": 1.582, "loss": "2.552", "ppl": "5.86", "wps": "363379", "ups": "3.08", "wpb": "117979", "bsz": "256", "num_updates": "81400", "lr": "0.000927879", "gnorm": "0.589", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "26571"} +[2022-08-01 08:51:25,575][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 08:52:03,772][train_inner][INFO] - {"epoch": 2, "update": 1.585, "loss": "2.55", "ppl": "5.86", "wps": "364672", "ups": "3.08", "wpb": "118316", "bsz": "256", "num_updates": "81600", "lr": "0.000927677", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "26636"} +[2022-08-01 08:53:08,654][train_inner][INFO] - {"epoch": 2, "update": 1.589, "loss": "2.555", "ppl": "5.88", "wps": "362901", "ups": "3.08", "wpb": "117728", "bsz": "256", "num_updates": "81800", "lr": "0.000927475", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "26701"} +[2022-08-01 08:54:13,588][train_inner][INFO] - {"epoch": 2, "update": 1.593, "loss": "2.548", "ppl": "5.85", "wps": "363675", "ups": "3.08", "wpb": "118072", "bsz": "256", "num_updates": "82000", "lr": "0.000927273", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "26766"} +[2022-08-01 08:55:18,314][train_inner][INFO] - {"epoch": 2, "update": 1.597, "loss": "2.547", "ppl": "5.84", "wps": "366099", "ups": "3.09", "wpb": "118478", "bsz": "256", "num_updates": "82200", "lr": "0.000927071", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "26831"} +[2022-08-01 08:56:23,364][train_inner][INFO] - {"epoch": 2, "update": 1.601, "loss": "2.554", "ppl": "5.87", "wps": "362264", "ups": "3.07", "wpb": "117825", "bsz": "256", "num_updates": "82400", "lr": "0.000926869", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "26896"} +[2022-08-01 08:57:27,837][train_inner][INFO] - {"epoch": 2, "update": 1.605, "loss": "2.548", "ppl": "5.85", "wps": "366118", "ups": "3.1", "wpb": "118021", "bsz": "256", "num_updates": "82600", "lr": "0.000926667", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "26960"} +[2022-08-01 08:58:33,068][train_inner][INFO] - {"epoch": 2, "update": 1.609, "loss": "2.545", "ppl": "5.83", "wps": "362488", "ups": "3.07", "wpb": "118226", "bsz": "256", "num_updates": "82800", "lr": "0.000926465", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "27025"} +[2022-08-01 08:59:38,205][train_inner][INFO] - {"epoch": 2, "update": 1.613, "loss": "2.543", "ppl": "5.83", "wps": "364113", "ups": "3.07", "wpb": "118584", "bsz": "256", "num_updates": "83000", "lr": "0.000926263", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "27091"} +[2022-08-01 09:00:43,154][train_inner][INFO] - {"epoch": 2, "update": 1.617, "loss": "2.542", "ppl": "5.82", "wps": "365641", "ups": "3.08", "wpb": "118739", "bsz": "256", "num_updates": "83200", "lr": "0.000926061", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "27155"} +[2022-08-01 09:01:47,835][train_inner][INFO] - {"epoch": 2, "update": 1.62, "loss": "2.545", "ppl": "5.83", "wps": "365988", "ups": "3.09", "wpb": "118361", "bsz": "256", "num_updates": "83400", "lr": "0.000925859", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "27220"} +[2022-08-01 09:02:52,678][train_inner][INFO] - {"epoch": 2, "update": 1.624, "loss": "2.546", "ppl": "5.84", "wps": "366841", "ups": "3.08", "wpb": "118934", "bsz": "256", "num_updates": "83600", "lr": "0.000925657", "gnorm": "0.587", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.8", "wall": "27285"} +[2022-08-01 09:03:57,090][train_inner][INFO] - {"epoch": 2, "update": 1.628, "loss": "2.552", "ppl": "5.86", "wps": "365447", "ups": "3.11", "wpb": "117694", "bsz": "256", "num_updates": "83800", "lr": "0.000925455", "gnorm": "0.592", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.9", "wall": "27349"} +[2022-08-01 09:04:46,483][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 09:05:01,989][train_inner][INFO] - {"epoch": 2, "update": 1.632, "loss": "2.547", "ppl": "5.85", "wps": "361633", "ups": "3.08", "wpb": "117347", "bsz": "256", "num_updates": "84000", "lr": "0.000925253", "gnorm": "0.593", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "27414"} +[2022-08-01 09:06:06,785][train_inner][INFO] - {"epoch": 2, "update": 1.636, "loss": "2.539", "ppl": "5.81", "wps": "367432", "ups": "3.09", "wpb": "119039", "bsz": "256", "num_updates": "84200", "lr": "0.000925051", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "27479"} +[2022-08-01 09:07:11,365][train_inner][INFO] - {"epoch": 2, "update": 1.64, "loss": "2.546", "ppl": "5.84", "wps": "364692", "ups": "3.1", "wpb": "117711", "bsz": "256", "num_updates": "84400", "lr": "0.000924848", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "27544"} +[2022-08-01 09:08:15,890][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "2.539", "ppl": "5.81", "wps": "367411", "ups": "3.1", "wpb": "118533", "bsz": "256", "num_updates": "84600", "lr": "0.000924646", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "27608"} +[2022-08-01 09:09:21,782][train_inner][INFO] - {"epoch": 2, "update": 1.648, "loss": "2.536", "ppl": "5.8", "wps": "358888", "ups": "3.04", "wpb": "118238", "bsz": "256", "num_updates": "84800", "lr": "0.000924444", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.1", "wall": "27674"} +[2022-08-01 09:10:26,863][train_inner][INFO] - {"epoch": 2, "update": 1.652, "loss": "2.541", "ppl": "5.82", "wps": "365117", "ups": "3.07", "wpb": "118810", "bsz": "256", "num_updates": "85000", "lr": "0.000924242", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "27739"} +[2022-08-01 09:11:31,830][train_inner][INFO] - {"epoch": 2, "update": 1.655, "loss": "2.544", "ppl": "5.83", "wps": "363156", "ups": "3.08", "wpb": "117963", "bsz": "256", "num_updates": "85200", "lr": "0.00092404", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.6", "wall": "27804"} +[2022-08-01 09:12:36,231][train_inner][INFO] - {"epoch": 2, "update": 1.659, "loss": "2.541", "ppl": "5.82", "wps": "366058", "ups": "3.11", "wpb": "117872", "bsz": "256", "num_updates": "85400", "lr": "0.000923838", "gnorm": "0.595", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "27869"} +[2022-08-01 09:13:41,028][train_inner][INFO] - {"epoch": 2, "update": 1.663, "loss": "2.537", "ppl": "5.8", "wps": "364099", "ups": "3.09", "wpb": "117959", "bsz": "256", "num_updates": "85600", "lr": "0.000923636", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "27933"} +[2022-08-01 09:14:45,675][train_inner][INFO] - {"epoch": 2, "update": 1.667, "loss": "2.543", "ppl": "5.83", "wps": "363570", "ups": "3.09", "wpb": "117517", "bsz": "256", "num_updates": "85800", "lr": "0.000923434", "gnorm": "0.593", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "27998"} +[2022-08-01 09:15:50,304][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "2.537", "ppl": "5.8", "wps": "365349", "ups": "3.09", "wpb": "118059", "bsz": "256", "num_updates": "86000", "lr": "0.000923232", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.7", "wall": "28063"} +[2022-08-01 09:16:02,411][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 09:16:03,368][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 09:16:56,147][train_inner][INFO] - {"epoch": 2, "update": 1.675, "loss": "2.536", "ppl": "5.8", "wps": "361400", "ups": "3.04", "wpb": "118978", "bsz": "256", "num_updates": "86200", "lr": "0.00092303", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "28128"} +[2022-08-01 09:18:00,844][train_inner][INFO] - {"epoch": 2, "update": 1.679, "loss": "2.535", "ppl": "5.79", "wps": "366150", "ups": "3.09", "wpb": "118443", "bsz": "256", "num_updates": "86400", "lr": "0.000922828", "gnorm": "0.589", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "28193"} +[2022-08-01 09:19:05,714][train_inner][INFO] - {"epoch": 2, "update": 1.683, "loss": "2.54", "ppl": "5.82", "wps": "363101", "ups": "3.08", "wpb": "117770", "bsz": "256", "num_updates": "86600", "lr": "0.000922626", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "28258"} +[2022-08-01 09:19:56,022][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 09:20:10,644][train_inner][INFO] - {"epoch": 2, "update": 1.687, "loss": "2.536", "ppl": "5.8", "wps": "363920", "ups": "3.08", "wpb": "118146", "bsz": "256", "num_updates": "86800", "lr": "0.000922424", "gnorm": "0.606", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "28323"} +[2022-08-01 09:21:15,850][train_inner][INFO] - {"epoch": 2, "update": 1.69, "loss": "2.529", "ppl": "5.77", "wps": "365823", "ups": "3.07", "wpb": "119269", "bsz": "256", "num_updates": "87000", "lr": "0.000922222", "gnorm": "0.586", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.2", "wall": "28388"} +[2022-08-01 09:22:20,981][train_inner][INFO] - {"epoch": 2, "update": 1.694, "loss": "2.542", "ppl": "5.83", "wps": "362970", "ups": "3.07", "wpb": "118201", "bsz": "256", "num_updates": "87200", "lr": "0.00092202", "gnorm": "0.61", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "28453"} +[2022-08-01 09:23:25,423][train_inner][INFO] - {"epoch": 2, "update": 1.698, "loss": "2.537", "ppl": "5.8", "wps": "367176", "ups": "3.1", "wpb": "118305", "bsz": "256", "num_updates": "87400", "lr": "0.000921818", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.3", "wall": "28518"} +[2022-08-01 09:24:30,368][train_inner][INFO] - {"epoch": 2, "update": 1.702, "loss": "2.534", "ppl": "5.79", "wps": "364656", "ups": "3.08", "wpb": "118412", "bsz": "256", "num_updates": "87600", "lr": "0.000921616", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "28583"} +[2022-08-01 09:25:35,399][train_inner][INFO] - {"epoch": 2, "update": 1.706, "loss": "2.532", "ppl": "5.78", "wps": "363644", "ups": "3.08", "wpb": "118238", "bsz": "256", "num_updates": "87800", "lr": "0.000921414", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "28648"} +[2022-08-01 09:26:40,163][train_inner][INFO] - {"epoch": 2, "update": 1.71, "loss": "2.533", "ppl": "5.79", "wps": "364227", "ups": "3.09", "wpb": "117942", "bsz": "256", "num_updates": "88000", "lr": "0.000921212", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "28712"} +[2022-08-01 09:27:45,225][train_inner][INFO] - {"epoch": 2, "update": 1.714, "loss": "2.533", "ppl": "5.79", "wps": "362805", "ups": "3.07", "wpb": "118023", "bsz": "256", "num_updates": "88200", "lr": "0.00092101", "gnorm": "0.593", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "28778"} +[2022-08-01 09:28:49,854][train_inner][INFO] - {"epoch": 2, "update": 1.718, "loss": "2.531", "ppl": "5.78", "wps": "366160", "ups": "3.09", "wpb": "118320", "bsz": "256", "num_updates": "88400", "lr": "0.000920808", "gnorm": "0.597", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "28842"} +[2022-08-01 09:29:54,976][train_inner][INFO] - {"epoch": 2, "update": 1.721, "loss": "2.537", "ppl": "5.8", "wps": "364949", "ups": "3.07", "wpb": "118829", "bsz": "256", "num_updates": "88600", "lr": "0.000920606", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "28907"} +[2022-08-01 09:31:00,288][train_inner][INFO] - {"epoch": 2, "update": 1.725, "loss": "2.521", "ppl": "5.74", "wps": "364738", "ups": "3.06", "wpb": "119107", "bsz": "256", "num_updates": "88800", "lr": "0.000920404", "gnorm": "0.587", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "28973"} +[2022-08-01 09:32:05,222][train_inner][INFO] - {"epoch": 2, "update": 1.729, "loss": "2.538", "ppl": "5.81", "wps": "363083", "ups": "3.08", "wpb": "117881", "bsz": "256", "num_updates": "89000", "lr": "0.000920202", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "29038"} +[2022-08-01 09:33:10,417][train_inner][INFO] - {"epoch": 2, "update": 1.733, "loss": "2.531", "ppl": "5.78", "wps": "362186", "ups": "3.07", "wpb": "118063", "bsz": "256", "num_updates": "89200", "lr": "0.00092", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "29103"} +[2022-08-01 09:34:14,964][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "2.541", "ppl": "5.82", "wps": "364733", "ups": "3.1", "wpb": "117709", "bsz": "256", "num_updates": "89400", "lr": "0.000919798", "gnorm": "0.596", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "29167"} +[2022-08-01 09:34:54,822][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 09:35:19,987][train_inner][INFO] - {"epoch": 2, "update": 1.741, "loss": "2.535", "ppl": "5.79", "wps": "362625", "ups": "3.08", "wpb": "117893", "bsz": "256", "num_updates": "89600", "lr": "0.000919596", "gnorm": "0.592", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "29232"} +[2022-08-01 09:36:24,890][train_inner][INFO] - {"epoch": 2, "update": 1.745, "loss": "2.532", "ppl": "5.78", "wps": "362330", "ups": "3.08", "wpb": "117582", "bsz": "256", "num_updates": "89800", "lr": "0.000919394", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "29297"} +[2022-08-01 09:37:29,835][train_inner][INFO] - {"epoch": 2, "update": 1.749, "loss": "2.528", "ppl": "5.77", "wps": "363643", "ups": "3.08", "wpb": "118081", "bsz": "256", "num_updates": "90000", "lr": "0.000919192", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "29362"} +[2022-08-01 09:38:34,932][train_inner][INFO] - {"epoch": 2, "update": 1.753, "loss": "2.524", "ppl": "5.75", "wps": "363712", "ups": "3.07", "wpb": "118381", "bsz": "256", "num_updates": "90200", "lr": "0.00091899", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "29427"} +[2022-08-01 09:39:40,250][train_inner][INFO] - {"epoch": 2, "update": 1.756, "loss": "2.522", "ppl": "5.74", "wps": "362500", "ups": "3.06", "wpb": "118387", "bsz": "256", "num_updates": "90400", "lr": "0.000918788", "gnorm": "0.592", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "29493"} +[2022-08-01 09:40:44,980][train_inner][INFO] - {"epoch": 2, "update": 1.76, "loss": "2.522", "ppl": "5.75", "wps": "365738", "ups": "3.09", "wpb": "118370", "bsz": "256", "num_updates": "90600", "lr": "0.000918586", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "29557"} +[2022-08-01 09:41:49,535][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "2.524", "ppl": "5.75", "wps": "364682", "ups": "3.1", "wpb": "117709", "bsz": "256", "num_updates": "90800", "lr": "0.000918384", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "29622"} +[2022-08-01 09:42:54,407][train_inner][INFO] - {"epoch": 2, "update": 1.768, "loss": "2.525", "ppl": "5.76", "wps": "364777", "ups": "3.08", "wpb": "118317", "bsz": "256", "num_updates": "91000", "lr": "0.000918182", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "29687"} +[2022-08-01 09:43:59,313][train_inner][INFO] - {"epoch": 2, "update": 1.772, "loss": "2.529", "ppl": "5.77", "wps": "364279", "ups": "3.08", "wpb": "118219", "bsz": "256", "num_updates": "91200", "lr": "0.00091798", "gnorm": "0.606", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "29752"} +[2022-08-01 09:45:04,430][train_inner][INFO] - {"epoch": 2, "update": 1.776, "loss": "2.519", "ppl": "5.73", "wps": "364850", "ups": "3.07", "wpb": "118788", "bsz": "256", "num_updates": "91400", "lr": "0.000917778", "gnorm": "0.589", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.4", "wall": "29817"} +[2022-08-01 09:46:09,294][train_inner][INFO] - {"epoch": 2, "update": 1.78, "loss": "2.526", "ppl": "5.76", "wps": "364856", "ups": "3.08", "wpb": "118328", "bsz": "256", "num_updates": "91600", "lr": "0.000917576", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "29882"} +[2022-08-01 09:47:14,281][train_inner][INFO] - {"epoch": 2, "update": 1.784, "loss": "2.526", "ppl": "5.76", "wps": "363981", "ups": "3.08", "wpb": "118269", "bsz": "256", "num_updates": "91800", "lr": "0.000917374", "gnorm": "0.594", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "29947"} +[2022-08-01 09:48:19,186][train_inner][INFO] - {"epoch": 2, "update": 1.788, "loss": "2.516", "ppl": "5.72", "wps": "365327", "ups": "3.08", "wpb": "118556", "bsz": "256", "num_updates": "92000", "lr": "0.000917172", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "30012"} +[2022-08-01 09:48:59,107][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 09:48:59,397][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 09:48:59,695][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 09:49:24,927][train_inner][INFO] - {"epoch": 2, "update": 1.791, "loss": "2.522", "ppl": "5.74", "wps": "360157", "ups": "3.04", "wpb": "118384", "bsz": "256", "num_updates": "92200", "lr": "0.00091697", "gnorm": "0.759", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "23.2", "wall": "30077"} +[2022-08-01 09:50:30,183][train_inner][INFO] - {"epoch": 2, "update": 1.795, "loss": "2.515", "ppl": "5.72", "wps": "362003", "ups": "3.06", "wpb": "118112", "bsz": "256", "num_updates": "92400", "lr": "0.000916768", "gnorm": "0.602", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "30143"} +[2022-08-01 09:51:35,207][train_inner][INFO] - {"epoch": 2, "update": 1.799, "loss": "2.528", "ppl": "5.77", "wps": "361620", "ups": "3.08", "wpb": "117570", "bsz": "256", "num_updates": "92600", "lr": "0.000916566", "gnorm": "0.615", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.4", "wall": "30208"} +[2022-08-01 09:52:39,819][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "2.519", "ppl": "5.73", "wps": "365528", "ups": "3.1", "wpb": "118086", "bsz": "256", "num_updates": "92800", "lr": "0.000916364", "gnorm": "0.592", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "30272"} +[2022-08-01 09:53:44,380][train_inner][INFO] - {"epoch": 2, "update": 1.807, "loss": "2.517", "ppl": "5.72", "wps": "368288", "ups": "3.1", "wpb": "118883", "bsz": "256", "num_updates": "93000", "lr": "0.000916162", "gnorm": "0.591", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.3", "wall": "30337"} +[2022-08-01 09:54:49,023][train_inner][INFO] - {"epoch": 2, "update": 1.811, "loss": "2.517", "ppl": "5.73", "wps": "366920", "ups": "3.09", "wpb": "118592", "bsz": "256", "num_updates": "93200", "lr": "0.00091596", "gnorm": "0.611", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "30401"} +[2022-08-01 09:55:53,764][train_inner][INFO] - {"epoch": 2, "update": 1.815, "loss": "2.521", "ppl": "5.74", "wps": "364705", "ups": "3.09", "wpb": "118055", "bsz": "256", "num_updates": "93400", "lr": "0.000915758", "gnorm": "0.593", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "30466"} +[2022-08-01 09:56:58,868][train_inner][INFO] - {"epoch": 2, "update": 1.819, "loss": "2.521", "ppl": "5.74", "wps": "362654", "ups": "3.07", "wpb": "118051", "bsz": "256", "num_updates": "93600", "lr": "0.000915556", "gnorm": "0.597", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "30531"} +[2022-08-01 09:58:03,309][train_inner][INFO] - {"epoch": 2, "update": 1.823, "loss": "2.518", "ppl": "5.73", "wps": "368470", "ups": "3.1", "wpb": "118721", "bsz": "256", "num_updates": "93800", "lr": "0.000915354", "gnorm": "0.592", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.8", "wall": "30596"} +[2022-08-01 09:59:08,329][train_inner][INFO] - {"epoch": 2, "update": 1.826, "loss": "2.516", "ppl": "5.72", "wps": "363549", "ups": "3.08", "wpb": "118189", "bsz": "256", "num_updates": "94000", "lr": "0.000915152", "gnorm": "0.596", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "30661"} +[2022-08-01 10:00:13,494][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "2.519", "ppl": "5.73", "wps": "364395", "ups": "3.07", "wpb": "118726", "bsz": "256", "num_updates": "94200", "lr": "0.000914949", "gnorm": "0.594", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "30726"} +[2022-08-01 10:01:18,454][train_inner][INFO] - {"epoch": 2, "update": 1.834, "loss": "2.509", "ppl": "5.69", "wps": "366053", "ups": "3.08", "wpb": "118894", "bsz": "256", "num_updates": "94400", "lr": "0.000914747", "gnorm": "0.588", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "30791"} +[2022-08-01 10:02:23,852][train_inner][INFO] - {"epoch": 2, "update": 1.838, "loss": "2.518", "ppl": "5.73", "wps": "362933", "ups": "3.06", "wpb": "118672", "bsz": "256", "num_updates": "94600", "lr": "0.000914545", "gnorm": "0.607", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "30856"} +[2022-08-01 10:03:28,799][train_inner][INFO] - {"epoch": 2, "update": 1.842, "loss": "2.516", "ppl": "5.72", "wps": "363959", "ups": "3.08", "wpb": "118189", "bsz": "256", "num_updates": "94800", "lr": "0.000914343", "gnorm": "0.592", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.1", "wall": "30921"} +[2022-08-01 10:04:34,548][train_inner][INFO] - {"epoch": 2, "update": 1.846, "loss": "2.521", "ppl": "5.74", "wps": "359623", "ups": "3.04", "wpb": "118222", "bsz": "256", "num_updates": "95000", "lr": "0.000914141", "gnorm": "0.594", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "30987"} +[2022-08-01 10:05:39,305][train_inner][INFO] - {"epoch": 2, "update": 1.85, "loss": "2.52", "ppl": "5.74", "wps": "365359", "ups": "3.09", "wpb": "118298", "bsz": "256", "num_updates": "95200", "lr": "0.000913939", "gnorm": "0.599", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "31052"} +[2022-08-01 10:06:44,321][train_inner][INFO] - {"epoch": 2, "update": 1.854, "loss": "2.519", "ppl": "5.73", "wps": "363019", "ups": "3.08", "wpb": "118007", "bsz": "256", "num_updates": "95400", "lr": "0.000913737", "gnorm": "0.596", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.9", "wall": "31117"} +[2022-08-01 10:07:49,157][train_inner][INFO] - {"epoch": 2, "update": 1.858, "loss": "2.51", "ppl": "5.7", "wps": "365628", "ups": "3.08", "wpb": "118529", "bsz": "256", "num_updates": "95600", "lr": "0.000913535", "gnorm": "0.592", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.2", "wall": "31181"} +[2022-08-01 10:08:53,845][train_inner][INFO] - {"epoch": 2, "update": 1.861, "loss": "2.509", "ppl": "5.69", "wps": "365311", "ups": "3.09", "wpb": "118154", "bsz": "256", "num_updates": "95800", "lr": "0.000913333", "gnorm": "0.618", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.7", "wall": "31246"} +[2022-08-01 10:09:58,438][train_inner][INFO] - {"epoch": 2, "update": 1.865, "loss": "2.51", "ppl": "5.7", "wps": "366046", "ups": "3.1", "wpb": "118217", "bsz": "256", "num_updates": "96000", "lr": "0.000913131", "gnorm": "0.598", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "31311"} +[2022-08-01 10:11:03,052][train_inner][INFO] - {"epoch": 2, "update": 1.869, "loss": "2.517", "ppl": "5.72", "wps": "365471", "ups": "3.1", "wpb": "118072", "bsz": "256", "num_updates": "96200", "lr": "0.000912929", "gnorm": "0.595", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.9", "wall": "31375"} +[2022-08-01 10:12:07,358][train_inner][INFO] - {"epoch": 2, "update": 1.873, "loss": "2.516", "ppl": "5.72", "wps": "366208", "ups": "3.11", "wpb": "117745", "bsz": "256", "num_updates": "96400", "lr": "0.000912727", "gnorm": "0.602", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "31440"} +[2022-08-01 10:13:12,279][train_inner][INFO] - {"epoch": 2, "update": 1.877, "loss": "2.515", "ppl": "5.72", "wps": "364214", "ups": "3.08", "wpb": "118224", "bsz": "256", "num_updates": "96600", "lr": "0.000912525", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.7", "wall": "31505"} +[2022-08-01 10:14:17,378][train_inner][INFO] - {"epoch": 2, "update": 1.881, "loss": "2.512", "ppl": "5.7", "wps": "363472", "ups": "3.07", "wpb": "118306", "bsz": "256", "num_updates": "96800", "lr": "0.000912323", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "31570"} +[2022-08-01 10:15:22,497][train_inner][INFO] - {"epoch": 2, "update": 1.885, "loss": "2.516", "ppl": "5.72", "wps": "362784", "ups": "3.07", "wpb": "118119", "bsz": "256", "num_updates": "97000", "lr": "0.000912121", "gnorm": "0.597", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "31635"} +[2022-08-01 10:16:26,834][train_inner][INFO] - {"epoch": 2, "update": 1.889, "loss": "2.521", "ppl": "5.74", "wps": "366178", "ups": "3.11", "wpb": "117794", "bsz": "256", "num_updates": "97200", "lr": "0.000911919", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.6", "wall": "31699"} +[2022-08-01 10:17:31,515][train_inner][INFO] - {"epoch": 2, "update": 1.892, "loss": "2.51", "ppl": "5.7", "wps": "365395", "ups": "3.09", "wpb": "118168", "bsz": "256", "num_updates": "97400", "lr": "0.000911717", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "31764"} +[2022-08-01 10:18:36,291][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "2.51", "ppl": "5.7", "wps": "366071", "ups": "3.09", "wpb": "118561", "bsz": "256", "num_updates": "97600", "lr": "0.000911515", "gnorm": "0.593", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "31829"} +[2022-08-01 10:19:41,211][train_inner][INFO] - {"epoch": 2, "update": 1.9, "loss": "2.503", "ppl": "5.67", "wps": "364143", "ups": "3.08", "wpb": "118200", "bsz": "256", "num_updates": "97800", "lr": "0.000911313", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.5", "wall": "31894"} +[2022-08-01 10:20:47,037][train_inner][INFO] - {"epoch": 2, "update": 1.904, "loss": "2.514", "ppl": "5.71", "wps": "359252", "ups": "3.04", "wpb": "118238", "bsz": "256", "num_updates": "98000", "lr": "0.000911111", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "31959"} +[2022-08-01 10:21:52,224][train_inner][INFO] - {"epoch": 2, "update": 1.908, "loss": "2.506", "ppl": "5.68", "wps": "364909", "ups": "3.07", "wpb": "118936", "bsz": "256", "num_updates": "98200", "lr": "0.000910909", "gnorm": "0.604", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.7", "wall": "32025"} +[2022-08-01 10:22:56,917][train_inner][INFO] - {"epoch": 2, "update": 1.912, "loss": "2.506", "ppl": "5.68", "wps": "365818", "ups": "3.09", "wpb": "118326", "bsz": "256", "num_updates": "98400", "lr": "0.000910707", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.5", "wall": "32089"} +[2022-08-01 10:24:02,844][train_inner][INFO] - {"epoch": 2, "update": 1.916, "loss": "2.513", "ppl": "5.71", "wps": "358190", "ups": "3.03", "wpb": "118071", "bsz": "256", "num_updates": "98600", "lr": "0.000910505", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.2", "wall": "32155"} +[2022-08-01 10:24:18,785][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 10:25:07,851][train_inner][INFO] - {"epoch": 2, "update": 1.92, "loss": "2.506", "ppl": "5.68", "wps": "364086", "ups": "3.08", "wpb": "118339", "bsz": "256", "num_updates": "98800", "lr": "0.000910303", "gnorm": "0.6", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "32220"} +[2022-08-01 10:26:12,680][train_inner][INFO] - {"epoch": 2, "update": 1.924, "loss": "2.51", "ppl": "5.7", "wps": "363755", "ups": "3.09", "wpb": "117908", "bsz": "256", "num_updates": "99000", "lr": "0.000910101", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "32285"} +[2022-08-01 10:27:17,364][train_inner][INFO] - {"epoch": 2, "update": 1.927, "loss": "2.51", "ppl": "5.7", "wps": "364835", "ups": "3.09", "wpb": "117994", "bsz": "256", "num_updates": "99200", "lr": "0.000909899", "gnorm": "0.603", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "32350"} +[2022-08-01 10:28:22,328][train_inner][INFO] - {"epoch": 2, "update": 1.931, "loss": "2.502", "ppl": "5.67", "wps": "364380", "ups": "3.08", "wpb": "118357", "bsz": "256", "num_updates": "99400", "lr": "0.000909697", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "32415"} +[2022-08-01 10:29:26,856][train_inner][INFO] - {"epoch": 2, "update": 1.935, "loss": "2.511", "ppl": "5.7", "wps": "365606", "ups": "3.1", "wpb": "117958", "bsz": "256", "num_updates": "99600", "lr": "0.000909495", "gnorm": "0.618", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "32479"} +[2022-08-01 10:30:31,701][train_inner][INFO] - {"epoch": 2, "update": 1.939, "loss": "2.509", "ppl": "5.69", "wps": "366086", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "99800", "lr": "0.000909293", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.7", "wall": "32544"} +[2022-08-01 10:31:36,493][train_inner][INFO] - {"epoch": 2, "update": 1.943, "loss": "2.502", "ppl": "5.67", "wps": "363825", "ups": "3.09", "wpb": "117864", "bsz": "256", "num_updates": "100000", "lr": "0.000909091", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.6", "wall": "32609"} +[2022-08-01 10:31:36,495][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 10:31:59,089][valid][INFO] - {"epoch": 2, "valid_loss": "2.395", "valid_ppl": "5.26", "valid_wps": "1.56106e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "100000", "valid_best_loss": "2.395"} +[2022-08-01 10:31:59,092][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 100000 updates +[2022-08-01 10:31:59,093][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_2_100000.pt +[2022-08-01 10:32:09,733][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_2_100000.pt +[2022-08-01 10:32:41,346][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_2_100000.pt (epoch 2 @ 100000 updates, score 2.395) (writing took 42.25377578288317 seconds) +[2022-08-01 10:33:45,958][train_inner][INFO] - {"epoch": 2, "update": 1.947, "loss": "2.501", "ppl": "5.66", "wps": "182320", "ups": "1.54", "wpb": "118019", "bsz": "256", "num_updates": "100200", "lr": "0.000908889", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.8", "wall": "32738"} +[2022-08-01 10:34:51,120][train_inner][INFO] - {"epoch": 2, "update": 1.951, "loss": "2.505", "ppl": "5.68", "wps": "363674", "ups": "3.07", "wpb": "118487", "bsz": "256", "num_updates": "100400", "lr": "0.000908687", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "32803"} +[2022-08-01 10:35:55,492][train_inner][INFO] - {"epoch": 2, "update": 1.955, "loss": "2.506", "ppl": "5.68", "wps": "366376", "ups": "3.11", "wpb": "117920", "bsz": "256", "num_updates": "100600", "lr": "0.000908485", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "32868"} +[2022-08-01 10:37:00,416][train_inner][INFO] - {"epoch": 2, "update": 1.959, "loss": "2.505", "ppl": "5.68", "wps": "364490", "ups": "3.08", "wpb": "118319", "bsz": "256", "num_updates": "100800", "lr": "0.000908283", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.5", "wall": "32933"} +[2022-08-01 10:38:05,263][train_inner][INFO] - {"epoch": 2, "update": 1.962, "loss": "2.5", "ppl": "5.66", "wps": "365392", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "101000", "lr": "0.000908081", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "32998"} +[2022-08-01 10:39:09,771][train_inner][INFO] - {"epoch": 2, "update": 1.966, "loss": "2.496", "ppl": "5.64", "wps": "367455", "ups": "3.1", "wpb": "118517", "bsz": "256", "num_updates": "101200", "lr": "0.000907879", "gnorm": "0.598", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "33062"} +[2022-08-01 10:40:14,512][train_inner][INFO] - {"epoch": 2, "update": 1.97, "loss": "2.506", "ppl": "5.68", "wps": "365253", "ups": "3.09", "wpb": "118234", "bsz": "256", "num_updates": "101400", "lr": "0.000907677", "gnorm": "0.596", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "33127"} +[2022-08-01 10:41:19,451][train_inner][INFO] - {"epoch": 2, "update": 1.974, "loss": "2.501", "ppl": "5.66", "wps": "364842", "ups": "3.08", "wpb": "118461", "bsz": "256", "num_updates": "101600", "lr": "0.000907475", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "33192"} +[2022-08-01 10:42:24,035][train_inner][INFO] - {"epoch": 2, "update": 1.978, "loss": "2.496", "ppl": "5.64", "wps": "369076", "ups": "3.1", "wpb": "119179", "bsz": "256", "num_updates": "101800", "lr": "0.000907273", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.5", "wall": "33256"} +[2022-08-01 10:42:33,714][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 10:43:29,268][train_inner][INFO] - {"epoch": 2, "update": 1.982, "loss": "2.503", "ppl": "5.67", "wps": "363910", "ups": "3.07", "wpb": "118694", "bsz": "256", "num_updates": "102000", "lr": "0.000907071", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "33322"} +[2022-08-01 10:44:33,761][train_inner][INFO] - {"epoch": 2, "update": 1.986, "loss": "2.498", "ppl": "5.65", "wps": "366765", "ups": "3.1", "wpb": "118268", "bsz": "256", "num_updates": "102200", "lr": "0.000906869", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "33386"} +[2022-08-01 10:45:03,556][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 10:45:38,685][train_inner][INFO] - {"epoch": 2, "update": 1.99, "loss": "2.501", "ppl": "5.66", "wps": "363851", "ups": "3.08", "wpb": "118112", "bsz": "256", "num_updates": "102400", "lr": "0.000906667", "gnorm": "0.6", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.3", "wall": "33451"} +[2022-08-01 10:46:43,644][train_inner][INFO] - {"epoch": 2, "update": 1.994, "loss": "2.509", "ppl": "5.69", "wps": "363544", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "102600", "lr": "0.000906465", "gnorm": "0.598", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.7", "wall": "33516"} +[2022-08-01 10:47:48,400][train_inner][INFO] - {"epoch": 2, "update": 1.997, "loss": "2.501", "ppl": "5.66", "wps": "365799", "ups": "3.09", "wpb": "118436", "bsz": "256", "num_updates": "102800", "lr": "0.000906263", "gnorm": "0.628", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "33581"} +[2022-08-01 10:48:31,410][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below) +[2022-08-01 10:48:31,410][train][INFO] - {"epoch": 2, "train_loss": "2.571", "train_ppl": "5.94", "train_wps": "362805", "train_ups": "3.07", "train_wpb": "118299", "train_bsz": "256", "train_num_updates": "102933", "train_lr": "0.000906128", "train_gnorm": "0.591", "train_clip": "0", "train_loss_scale": "2", "train_train_wall": "16616", "train_gb_free": "24.3", "train_wall": "33624"} +[2022-08-01 10:48:31,496][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 10:48:31,499][fairseq.trainer][INFO] - begin training epoch 3 +[2022-08-01 10:48:31,499][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 10:49:03,480][train_inner][INFO] - {"epoch": 3, "update": 2.001, "loss": "2.502", "ppl": "5.67", "wps": "313018", "ups": "2.66", "wpb": "117505", "bsz": "255.4", "num_updates": "103000", "lr": "0.000906061", "gnorm": "0.601", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "24.3", "wall": "33656"} +[2022-08-01 10:50:08,385][train_inner][INFO] - {"epoch": 3, "update": 2.005, "loss": "2.501", "ppl": "5.66", "wps": "364367", "ups": "3.08", "wpb": "118245", "bsz": "256", "num_updates": "103200", "lr": "0.000905859", "gnorm": "0.597", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "33721"} +[2022-08-01 10:51:13,107][train_inner][INFO] - {"epoch": 3, "update": 2.009, "loss": "2.501", "ppl": "5.66", "wps": "364309", "ups": "3.09", "wpb": "117892", "bsz": "256", "num_updates": "103400", "lr": "0.000905657", "gnorm": "0.597", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "33785"} +[2022-08-01 10:52:18,314][train_inner][INFO] - {"epoch": 3, "update": 2.013, "loss": "2.489", "ppl": "5.62", "wps": "362558", "ups": "3.07", "wpb": "118205", "bsz": "256", "num_updates": "103600", "lr": "0.000905455", "gnorm": "0.597", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "33851"} +[2022-08-01 10:53:23,474][train_inner][INFO] - {"epoch": 3, "update": 2.017, "loss": "2.498", "ppl": "5.65", "wps": "364216", "ups": "3.07", "wpb": "118660", "bsz": "256", "num_updates": "103800", "lr": "0.000905253", "gnorm": "0.598", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "33916"} +[2022-08-01 10:53:51,941][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 10:53:52,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 10:53:53,155][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-01 10:54:28,989][train_inner][INFO] - {"epoch": 3, "update": 2.021, "loss": "2.588", "ppl": "6.01", "wps": "359404", "ups": "3.05", "wpb": "117730", "bsz": "256", "num_updates": "104000", "lr": "0.000905051", "gnorm": "1.373", "clip": "1", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.1", "wall": "33981"} +[2022-08-01 10:55:34,246][train_inner][INFO] - {"epoch": 3, "update": 2.025, "loss": "2.502", "ppl": "5.67", "wps": "362296", "ups": "3.06", "wpb": "118210", "bsz": "256", "num_updates": "104200", "lr": "0.000904848", "gnorm": "0.707", "clip": "0.5", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.9", "wall": "34047"} +[2022-08-01 10:56:40,105][train_inner][INFO] - {"epoch": 3, "update": 2.029, "loss": "2.5", "ppl": "5.66", "wps": "358775", "ups": "3.04", "wpb": "118142", "bsz": "256", "num_updates": "104400", "lr": "0.000904646", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "34112"} +[2022-08-01 10:57:45,153][train_inner][INFO] - {"epoch": 3, "update": 2.032, "loss": "2.497", "ppl": "5.65", "wps": "364337", "ups": "3.07", "wpb": "118494", "bsz": "256", "num_updates": "104600", "lr": "0.000904444", "gnorm": "0.598", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "26.1", "wall": "34177"} +[2022-08-01 10:58:26,175][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 +[2022-08-01 10:58:50,380][train_inner][INFO] - {"epoch": 3, "update": 2.036, "loss": "2.496", "ppl": "5.64", "wps": "362480", "ups": "3.07", "wpb": "118215", "bsz": "256", "num_updates": "104800", "lr": "0.000904242", "gnorm": "1.175", "clip": "0.5", "loss_scale": "0.125", "train_wall": "65", "gb_free": "23.7", "wall": "34243"} +[2022-08-01 10:59:55,048][train_inner][INFO] - {"epoch": 3, "update": 2.04, "loss": "2.504", "ppl": "5.67", "wps": "365693", "ups": "3.09", "wpb": "118242", "bsz": "256", "num_updates": "105000", "lr": "0.00090404", "gnorm": "0.932", "clip": "1.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.1", "wall": "34307"} +[2022-08-01 11:00:59,880][train_inner][INFO] - {"epoch": 3, "update": 2.044, "loss": "2.488", "ppl": "5.61", "wps": "365421", "ups": "3.08", "wpb": "118453", "bsz": "256", "num_updates": "105200", "lr": "0.000903838", "gnorm": "0.597", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.4", "wall": "34372"} +[2022-08-01 11:02:04,868][train_inner][INFO] - {"epoch": 3, "update": 2.048, "loss": "2.487", "ppl": "5.61", "wps": "364463", "ups": "3.08", "wpb": "118428", "bsz": "256", "num_updates": "105400", "lr": "0.000903636", "gnorm": "0.595", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "24.4", "wall": "34437"} +[2022-08-01 11:03:09,514][train_inner][INFO] - {"epoch": 3, "update": 2.052, "loss": "2.487", "ppl": "5.61", "wps": "367293", "ups": "3.09", "wpb": "118718", "bsz": "256", "num_updates": "105600", "lr": "0.000903434", "gnorm": "0.597", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "23.5", "wall": "34502"} +[2022-08-01 11:04:14,371][train_inner][INFO] - {"epoch": 3, "update": 2.056, "loss": "2.491", "ppl": "5.62", "wps": "363877", "ups": "3.08", "wpb": "117999", "bsz": "256", "num_updates": "105800", "lr": "0.000903232", "gnorm": "0.6", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "26.4", "wall": "34567"} +[2022-08-01 11:05:20,489][train_inner][INFO] - {"epoch": 3, "update": 2.06, "loss": "2.488", "ppl": "5.61", "wps": "358229", "ups": "3.02", "wpb": "118425", "bsz": "256", "num_updates": "106000", "lr": "0.00090303", "gnorm": "0.597", "clip": "0", "loss_scale": "0.125", "train_wall": "66", "gb_free": "28.2", "wall": "34633"} +[2022-08-01 11:06:25,390][train_inner][INFO] - {"epoch": 3, "update": 2.064, "loss": "2.49", "ppl": "5.62", "wps": "365509", "ups": "3.08", "wpb": "118608", "bsz": "256", "num_updates": "106200", "lr": "0.000902828", "gnorm": "0.598", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "21.6", "wall": "34698"} +[2022-08-01 11:07:30,310][train_inner][INFO] - {"epoch": 3, "update": 2.067, "loss": "2.493", "ppl": "5.63", "wps": "363900", "ups": "3.08", "wpb": "118120", "bsz": "256", "num_updates": "106400", "lr": "0.000902626", "gnorm": "0.599", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "21.7", "wall": "34763"} +[2022-08-01 11:08:35,391][train_inner][INFO] - {"epoch": 3, "update": 2.071, "loss": "2.497", "ppl": "5.65", "wps": "362177", "ups": "3.07", "wpb": "117854", "bsz": "256", "num_updates": "106600", "lr": "0.000902424", "gnorm": "0.603", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "34828"} +[2022-08-01 11:09:40,216][train_inner][INFO] - {"epoch": 3, "update": 2.075, "loss": "2.484", "ppl": "5.6", "wps": "366131", "ups": "3.09", "wpb": "118670", "bsz": "256", "num_updates": "106800", "lr": "0.000902222", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21", "wall": "34893"} +[2022-08-01 11:10:45,122][train_inner][INFO] - {"epoch": 3, "update": 2.079, "loss": "2.484", "ppl": "5.59", "wps": "363443", "ups": "3.08", "wpb": "117946", "bsz": "256", "num_updates": "107000", "lr": "0.00090202", "gnorm": "0.601", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "27.1", "wall": "34957"} +[2022-08-01 11:11:50,029][train_inner][INFO] - {"epoch": 3, "update": 2.083, "loss": "2.489", "ppl": "5.62", "wps": "366610", "ups": "3.08", "wpb": "118975", "bsz": "256", "num_updates": "107200", "lr": "0.000901818", "gnorm": "0.599", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.6", "wall": "35022"} +[2022-08-01 11:12:54,891][train_inner][INFO] - {"epoch": 3, "update": 2.087, "loss": "2.484", "ppl": "5.59", "wps": "363795", "ups": "3.08", "wpb": "117982", "bsz": "256", "num_updates": "107400", "lr": "0.000901616", "gnorm": "0.603", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "35087"} +[2022-08-01 11:13:59,401][train_inner][INFO] - {"epoch": 3, "update": 2.091, "loss": "2.492", "ppl": "5.63", "wps": "364546", "ups": "3.1", "wpb": "117582", "bsz": "256", "num_updates": "107600", "lr": "0.000901414", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.8", "wall": "35152"} +[2022-08-01 11:15:04,444][train_inner][INFO] - {"epoch": 3, "update": 2.095, "loss": "2.492", "ppl": "5.62", "wps": "362518", "ups": "3.07", "wpb": "117895", "bsz": "256", "num_updates": "107800", "lr": "0.000901212", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.3", "wall": "35217"} +[2022-08-01 11:16:09,391][train_inner][INFO] - {"epoch": 3, "update": 2.098, "loss": "2.482", "ppl": "5.59", "wps": "364481", "ups": "3.08", "wpb": "118359", "bsz": "256", "num_updates": "108000", "lr": "0.00090101", "gnorm": "0.599", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.1", "wall": "35282"} +[2022-08-01 11:17:14,394][train_inner][INFO] - {"epoch": 3, "update": 2.102, "loss": "2.484", "ppl": "5.59", "wps": "362609", "ups": "3.08", "wpb": "117851", "bsz": "256", "num_updates": "108200", "lr": "0.000900808", "gnorm": "0.599", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.1", "wall": "35347"} +[2022-08-01 11:18:19,153][train_inner][INFO] - {"epoch": 3, "update": 2.106, "loss": "2.484", "ppl": "5.59", "wps": "366457", "ups": "3.09", "wpb": "118657", "bsz": "256", "num_updates": "108400", "lr": "0.000900606", "gnorm": "0.6", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "24.9", "wall": "35411"} +[2022-08-01 11:19:23,750][train_inner][INFO] - {"epoch": 3, "update": 2.11, "loss": "2.481", "ppl": "5.58", "wps": "366603", "ups": "3.1", "wpb": "118404", "bsz": "256", "num_updates": "108600", "lr": "0.000900404", "gnorm": "0.603", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "35476"} +[2022-08-01 11:20:28,245][train_inner][INFO] - {"epoch": 3, "update": 2.114, "loss": "2.483", "ppl": "5.59", "wps": "368644", "ups": "3.1", "wpb": "118877", "bsz": "256", "num_updates": "108800", "lr": "0.000900202", "gnorm": "0.598", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.3", "wall": "35541"} +[2022-08-01 11:21:32,914][train_inner][INFO] - {"epoch": 3, "update": 2.118, "loss": "2.478", "ppl": "5.57", "wps": "364762", "ups": "3.09", "wpb": "117942", "bsz": "256", "num_updates": "109000", "lr": "0.0009", "gnorm": "0.602", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "35605"} +[2022-08-01 11:22:37,670][train_inner][INFO] - {"epoch": 3, "update": 2.122, "loss": "2.49", "ppl": "5.62", "wps": "365929", "ups": "3.09", "wpb": "118479", "bsz": "256", "num_updates": "109200", "lr": "0.000899798", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.4", "wall": "35670"} +[2022-08-01 11:23:42,030][train_inner][INFO] - {"epoch": 3, "update": 2.126, "loss": "2.488", "ppl": "5.61", "wps": "366895", "ups": "3.11", "wpb": "118064", "bsz": "256", "num_updates": "109400", "lr": "0.000899596", "gnorm": "0.606", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "35734"} +[2022-08-01 11:24:47,036][train_inner][INFO] - {"epoch": 3, "update": 2.13, "loss": "2.482", "ppl": "5.59", "wps": "366171", "ups": "3.08", "wpb": "119015", "bsz": "256", "num_updates": "109600", "lr": "0.000899394", "gnorm": "0.601", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "35799"} +[2022-08-01 11:25:51,976][train_inner][INFO] - {"epoch": 3, "update": 2.133, "loss": "2.49", "ppl": "5.62", "wps": "362875", "ups": "3.08", "wpb": "117824", "bsz": "256", "num_updates": "109800", "lr": "0.000899192", "gnorm": "0.607", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "35864"} +[2022-08-01 11:26:56,866][train_inner][INFO] - {"epoch": 3, "update": 2.137, "loss": "2.477", "ppl": "5.57", "wps": "364342", "ups": "3.08", "wpb": "118209", "bsz": "256", "num_updates": "110000", "lr": "0.00089899", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "35929"} +[2022-08-01 11:28:01,769][train_inner][INFO] - {"epoch": 3, "update": 2.141, "loss": "2.477", "ppl": "5.57", "wps": "365186", "ups": "3.08", "wpb": "118506", "bsz": "256", "num_updates": "110200", "lr": "0.000898788", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "35994"} +[2022-08-01 11:29:06,668][train_inner][INFO] - {"epoch": 3, "update": 2.145, "loss": "2.475", "ppl": "5.56", "wps": "364725", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "110400", "lr": "0.000898586", "gnorm": "0.604", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.9", "wall": "36059"} +[2022-08-01 11:30:11,980][train_inner][INFO] - {"epoch": 3, "update": 2.149, "loss": "2.478", "ppl": "5.57", "wps": "362818", "ups": "3.06", "wpb": "118479", "bsz": "256", "num_updates": "110600", "lr": "0.000898384", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "36124"} +[2022-08-01 11:31:16,939][train_inner][INFO] - {"epoch": 3, "update": 2.153, "loss": "2.479", "ppl": "5.57", "wps": "364714", "ups": "3.08", "wpb": "118456", "bsz": "256", "num_updates": "110800", "lr": "0.000898182", "gnorm": "0.605", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.1", "wall": "36189"} +[2022-08-01 11:32:22,010][train_inner][INFO] - {"epoch": 3, "update": 2.157, "loss": "2.478", "ppl": "5.57", "wps": "364400", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "111000", "lr": "0.00089798", "gnorm": "0.618", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "36254"} +[2022-08-01 11:33:27,014][train_inner][INFO] - {"epoch": 3, "update": 2.161, "loss": "2.48", "ppl": "5.58", "wps": "362542", "ups": "3.08", "wpb": "117832", "bsz": "256", "num_updates": "111200", "lr": "0.000897778", "gnorm": "0.606", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24", "wall": "36319"} +[2022-08-01 11:34:32,069][train_inner][INFO] - {"epoch": 3, "update": 2.165, "loss": "2.482", "ppl": "5.59", "wps": "364070", "ups": "3.07", "wpb": "118421", "bsz": "256", "num_updates": "111400", "lr": "0.000897576", "gnorm": "0.604", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22", "wall": "36384"} +[2022-08-01 11:35:37,854][train_inner][INFO] - {"epoch": 3, "update": 2.168, "loss": "2.486", "ppl": "5.6", "wps": "358534", "ups": "3.04", "wpb": "117929", "bsz": "256", "num_updates": "111600", "lr": "0.000897374", "gnorm": "0.605", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "36450"} +[2022-08-01 11:36:43,380][train_inner][INFO] - {"epoch": 3, "update": 2.172, "loss": "2.477", "ppl": "5.57", "wps": "361498", "ups": "3.05", "wpb": "118437", "bsz": "256", "num_updates": "111800", "lr": "0.000897172", "gnorm": "0.604", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.5", "wall": "36516"} +[2022-08-01 11:37:48,459][train_inner][INFO] - {"epoch": 3, "update": 2.176, "loss": "2.482", "ppl": "5.59", "wps": "362201", "ups": "3.07", "wpb": "117856", "bsz": "256", "num_updates": "112000", "lr": "0.00089697", "gnorm": "0.62", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.4", "wall": "36581"} +[2022-08-01 11:38:53,152][train_inner][INFO] - {"epoch": 3, "update": 2.18, "loss": "2.48", "ppl": "5.58", "wps": "365525", "ups": "3.09", "wpb": "118234", "bsz": "256", "num_updates": "112200", "lr": "0.000896768", "gnorm": "0.603", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.9", "wall": "36645"} +[2022-08-01 11:39:58,187][train_inner][INFO] - {"epoch": 3, "update": 2.184, "loss": "2.475", "ppl": "5.56", "wps": "364353", "ups": "3.08", "wpb": "118477", "bsz": "256", "num_updates": "112400", "lr": "0.000896566", "gnorm": "0.606", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "36711"} +[2022-08-01 11:41:02,664][train_inner][INFO] - {"epoch": 3, "update": 2.188, "loss": "2.478", "ppl": "5.57", "wps": "365613", "ups": "3.1", "wpb": "117865", "bsz": "256", "num_updates": "112600", "lr": "0.000896364", "gnorm": "0.606", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.8", "wall": "36775"} +[2022-08-01 11:42:07,773][train_inner][INFO] - {"epoch": 3, "update": 2.192, "loss": "2.48", "ppl": "5.58", "wps": "362926", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "112800", "lr": "0.000896162", "gnorm": "0.612", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "36840"} +[2022-08-01 11:43:12,954][train_inner][INFO] - {"epoch": 3, "update": 2.196, "loss": "2.487", "ppl": "5.61", "wps": "361650", "ups": "3.07", "wpb": "117861", "bsz": "256", "num_updates": "113000", "lr": "0.00089596", "gnorm": "0.626", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "36905"} +[2022-08-01 11:44:17,631][train_inner][INFO] - {"epoch": 3, "update": 2.199, "loss": "2.479", "ppl": "5.57", "wps": "365457", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "113200", "lr": "0.000895758", "gnorm": "0.61", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.5", "wall": "36970"} +[2022-08-01 11:45:11,940][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 11:45:12,230][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 11:45:12,840][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-01 11:45:13,121][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 +[2022-08-01 11:45:23,523][train_inner][INFO] - {"epoch": 3, "update": 2.203, "loss": "2.515", "ppl": "5.72", "wps": "360268", "ups": "3.04", "wpb": "118691", "bsz": "256", "num_updates": "113400", "lr": "0.000895556", "gnorm": "1.209", "clip": "1", "loss_scale": "0.125", "train_wall": "66", "gb_free": "20.4", "wall": "37036"} +[2022-08-01 11:46:28,097][train_inner][INFO] - {"epoch": 3, "update": 2.207, "loss": "2.482", "ppl": "5.58", "wps": "365575", "ups": "3.1", "wpb": "118033", "bsz": "256", "num_updates": "113600", "lr": "0.000895354", "gnorm": "0.612", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.2", "wall": "37100"} +[2022-08-01 11:47:32,945][train_inner][INFO] - {"epoch": 3, "update": 2.211, "loss": "2.482", "ppl": "5.59", "wps": "363016", "ups": "3.08", "wpb": "117701", "bsz": "256", "num_updates": "113800", "lr": "0.000895152", "gnorm": "0.607", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.7", "wall": "37165"} +[2022-08-01 11:48:37,853][train_inner][INFO] - {"epoch": 3, "update": 2.215, "loss": "2.479", "ppl": "5.57", "wps": "364335", "ups": "3.08", "wpb": "118241", "bsz": "256", "num_updates": "114000", "lr": "0.000894949", "gnorm": "0.613", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "37230"} +[2022-08-01 11:49:42,880][train_inner][INFO] - {"epoch": 3, "update": 2.219, "loss": "2.478", "ppl": "5.57", "wps": "362811", "ups": "3.08", "wpb": "117959", "bsz": "256", "num_updates": "114200", "lr": "0.000894747", "gnorm": "0.611", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.8", "wall": "37295"} +[2022-08-01 11:50:47,847][train_inner][INFO] - {"epoch": 3, "update": 2.223, "loss": "2.472", "ppl": "5.55", "wps": "364122", "ups": "3.08", "wpb": "118278", "bsz": "256", "num_updates": "114400", "lr": "0.000894545", "gnorm": "0.605", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "37360"} +[2022-08-01 11:51:52,527][train_inner][INFO] - {"epoch": 3, "update": 2.227, "loss": "2.471", "ppl": "5.54", "wps": "366565", "ups": "3.09", "wpb": "118545", "bsz": "256", "num_updates": "114600", "lr": "0.000894343", "gnorm": "0.608", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "24", "wall": "37425"} +[2022-08-01 11:52:57,370][train_inner][INFO] - {"epoch": 3, "update": 2.231, "loss": "2.477", "ppl": "5.57", "wps": "365223", "ups": "3.08", "wpb": "118410", "bsz": "256", "num_updates": "114800", "lr": "0.000894141", "gnorm": "0.608", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.8", "wall": "37490"} +[2022-08-01 11:54:02,133][train_inner][INFO] - {"epoch": 3, "update": 2.235, "loss": "2.476", "ppl": "5.56", "wps": "365608", "ups": "3.09", "wpb": "118387", "bsz": "256", "num_updates": "115000", "lr": "0.000893939", "gnorm": "0.611", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.5", "wall": "37554"} +[2022-08-01 11:55:07,059][train_inner][INFO] - {"epoch": 3, "update": 2.238, "loss": "2.479", "ppl": "5.57", "wps": "365535", "ups": "3.08", "wpb": "118662", "bsz": "256", "num_updates": "115200", "lr": "0.000893737", "gnorm": "0.61", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.8", "wall": "37619"} +[2022-08-01 11:56:12,102][train_inner][INFO] - {"epoch": 3, "update": 2.242, "loss": "2.473", "ppl": "5.55", "wps": "362685", "ups": "3.07", "wpb": "117948", "bsz": "255.9", "num_updates": "115400", "lr": "0.000893535", "gnorm": "0.611", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.7", "wall": "37684"} +[2022-08-01 11:57:17,211][train_inner][INFO] - {"epoch": 3, "update": 2.246, "loss": "2.471", "ppl": "5.55", "wps": "364470", "ups": "3.07", "wpb": "118649", "bsz": "256", "num_updates": "115600", "lr": "0.000893333", "gnorm": "0.615", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24", "wall": "37750"} +[2022-08-01 11:58:22,094][train_inner][INFO] - {"epoch": 3, "update": 2.25, "loss": "2.466", "ppl": "5.52", "wps": "366981", "ups": "3.08", "wpb": "119053", "bsz": "256", "num_updates": "115800", "lr": "0.000893131", "gnorm": "0.609", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "37814"} +[2022-08-01 11:59:27,146][train_inner][INFO] - {"epoch": 3, "update": 2.254, "loss": "2.469", "ppl": "5.54", "wps": "363453", "ups": "3.07", "wpb": "118215", "bsz": "256", "num_updates": "116000", "lr": "0.000892929", "gnorm": "0.619", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "25.2", "wall": "37879"} +[2022-08-01 12:00:32,246][train_inner][INFO] - {"epoch": 3, "update": 2.258, "loss": "2.472", "ppl": "5.55", "wps": "363236", "ups": "3.07", "wpb": "118232", "bsz": "256", "num_updates": "116200", "lr": "0.000892727", "gnorm": "0.611", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.6", "wall": "37945"} +[2022-08-01 12:01:37,201][train_inner][INFO] - {"epoch": 3, "update": 2.262, "loss": "2.473", "ppl": "5.55", "wps": "365851", "ups": "3.08", "wpb": "118818", "bsz": "256", "num_updates": "116400", "lr": "0.000892525", "gnorm": "0.612", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "38010"} +[2022-08-01 12:02:41,903][train_inner][INFO] - {"epoch": 3, "update": 2.266, "loss": "2.476", "ppl": "5.56", "wps": "365040", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "116600", "lr": "0.000892323", "gnorm": "0.613", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.2", "wall": "38074"} +[2022-08-01 12:03:47,120][train_inner][INFO] - {"epoch": 3, "update": 2.269, "loss": "2.472", "ppl": "5.55", "wps": "362454", "ups": "3.07", "wpb": "118189", "bsz": "256", "num_updates": "116800", "lr": "0.000892121", "gnorm": "0.612", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.9", "wall": "38139"} +[2022-08-01 12:04:51,938][train_inner][INFO] - {"epoch": 3, "update": 2.273, "loss": "2.476", "ppl": "5.56", "wps": "363713", "ups": "3.09", "wpb": "117874", "bsz": "256", "num_updates": "117000", "lr": "0.000891919", "gnorm": "0.617", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "25.5", "wall": "38204"} +[2022-08-01 12:05:56,538][train_inner][INFO] - {"epoch": 3, "update": 2.277, "loss": "2.477", "ppl": "5.57", "wps": "364520", "ups": "3.1", "wpb": "117738", "bsz": "256", "num_updates": "117200", "lr": "0.000891717", "gnorm": "0.62", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.4", "wall": "38269"} +[2022-08-01 12:07:01,544][train_inner][INFO] - {"epoch": 3, "update": 2.281, "loss": "2.467", "ppl": "5.53", "wps": "365234", "ups": "3.08", "wpb": "118710", "bsz": "256", "num_updates": "117400", "lr": "0.000891515", "gnorm": "0.614", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.3", "wall": "38334"} +[2022-08-01 12:08:06,262][train_inner][INFO] - {"epoch": 3, "update": 2.285, "loss": "2.469", "ppl": "5.54", "wps": "364709", "ups": "3.09", "wpb": "118012", "bsz": "256", "num_updates": "117600", "lr": "0.000891313", "gnorm": "0.616", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "38399"} +[2022-08-01 12:09:10,972][train_inner][INFO] - {"epoch": 3, "update": 2.289, "loss": "2.472", "ppl": "5.55", "wps": "366764", "ups": "3.09", "wpb": "118666", "bsz": "256", "num_updates": "117800", "lr": "0.000891111", "gnorm": "0.613", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "38463"} +[2022-08-01 12:10:16,243][train_inner][INFO] - {"epoch": 3, "update": 2.293, "loss": "2.465", "ppl": "5.52", "wps": "363663", "ups": "3.06", "wpb": "118680", "bsz": "256", "num_updates": "118000", "lr": "0.000890909", "gnorm": "0.613", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "38529"} +[2022-08-01 12:11:21,345][train_inner][INFO] - {"epoch": 3, "update": 2.297, "loss": "2.47", "ppl": "5.54", "wps": "364575", "ups": "3.07", "wpb": "118671", "bsz": "256", "num_updates": "118200", "lr": "0.000890707", "gnorm": "0.615", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "38594"} +[2022-08-01 12:12:26,062][train_inner][INFO] - {"epoch": 3, "update": 2.301, "loss": "2.466", "ppl": "5.52", "wps": "365970", "ups": "3.09", "wpb": "118422", "bsz": "256", "num_updates": "118400", "lr": "0.000890505", "gnorm": "0.614", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23", "wall": "38658"} +[2022-08-01 12:13:30,926][train_inner][INFO] - {"epoch": 3, "update": 2.304, "loss": "2.468", "ppl": "5.53", "wps": "365667", "ups": "3.08", "wpb": "118590", "bsz": "256", "num_updates": "118600", "lr": "0.000890303", "gnorm": "0.611", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.6", "wall": "38723"} +[2022-08-01 12:14:35,801][train_inner][INFO] - {"epoch": 3, "update": 2.308, "loss": "2.466", "ppl": "5.53", "wps": "365708", "ups": "3.08", "wpb": "118626", "bsz": "256", "num_updates": "118800", "lr": "0.000890101", "gnorm": "0.616", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.5", "wall": "38788"} +[2022-08-01 12:15:41,137][train_inner][INFO] - {"epoch": 3, "update": 2.312, "loss": "2.463", "ppl": "5.52", "wps": "363718", "ups": "3.06", "wpb": "118817", "bsz": "256", "num_updates": "119000", "lr": "0.000889899", "gnorm": "0.615", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.2", "wall": "38853"} +[2022-08-01 12:16:46,102][train_inner][INFO] - {"epoch": 3, "update": 2.316, "loss": "2.468", "ppl": "5.53", "wps": "363967", "ups": "3.08", "wpb": "118224", "bsz": "256", "num_updates": "119200", "lr": "0.000889697", "gnorm": "0.635", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "38918"} +[2022-08-01 12:17:50,926][train_inner][INFO] - {"epoch": 3, "update": 2.32, "loss": "2.468", "ppl": "5.53", "wps": "365942", "ups": "3.09", "wpb": "118608", "bsz": "256", "num_updates": "119400", "lr": "0.000889495", "gnorm": "0.617", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "38983"} +[2022-08-01 12:18:55,747][train_inner][INFO] - {"epoch": 3, "update": 2.324, "loss": "2.474", "ppl": "5.56", "wps": "363580", "ups": "3.09", "wpb": "117836", "bsz": "256", "num_updates": "119600", "lr": "0.000889293", "gnorm": "0.62", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.3", "wall": "39048"} +[2022-08-01 12:20:00,766][train_inner][INFO] - {"epoch": 3, "update": 2.328, "loss": "2.47", "ppl": "5.54", "wps": "363184", "ups": "3.08", "wpb": "118067", "bsz": "256", "num_updates": "119800", "lr": "0.000889091", "gnorm": "0.618", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "39113"} +[2022-08-01 12:21:05,423][train_inner][INFO] - {"epoch": 3, "update": 2.332, "loss": "2.473", "ppl": "5.55", "wps": "364172", "ups": "3.09", "wpb": "117730", "bsz": "256", "num_updates": "120000", "lr": "0.000888889", "gnorm": "0.617", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "39178"} +[2022-08-01 12:22:10,492][train_inner][INFO] - {"epoch": 3, "update": 2.336, "loss": "2.465", "ppl": "5.52", "wps": "362946", "ups": "3.07", "wpb": "118081", "bsz": "256", "num_updates": "120200", "lr": "0.000888687", "gnorm": "0.626", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.7", "wall": "39243"} +[2022-08-01 12:23:14,877][train_inner][INFO] - {"epoch": 3, "update": 2.339, "loss": "2.466", "ppl": "5.52", "wps": "366543", "ups": "3.11", "wpb": "117998", "bsz": "256", "num_updates": "120400", "lr": "0.000888485", "gnorm": "0.62", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.3", "wall": "39307"} +[2022-08-01 12:24:20,291][train_inner][INFO] - {"epoch": 3, "update": 2.343, "loss": "2.461", "ppl": "5.51", "wps": "362328", "ups": "3.06", "wpb": "118504", "bsz": "256", "num_updates": "120600", "lr": "0.000888283", "gnorm": "0.622", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26.8", "wall": "39373"} +[2022-08-01 12:25:24,842][train_inner][INFO] - {"epoch": 3, "update": 2.347, "loss": "2.468", "ppl": "5.53", "wps": "365696", "ups": "3.1", "wpb": "118030", "bsz": "256", "num_updates": "120800", "lr": "0.000888081", "gnorm": "0.622", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.7", "wall": "39437"} +[2022-08-01 12:26:30,113][train_inner][INFO] - {"epoch": 3, "update": 2.351, "loss": "2.458", "ppl": "5.5", "wps": "363602", "ups": "3.06", "wpb": "118662", "bsz": "256", "num_updates": "121000", "lr": "0.000887879", "gnorm": "0.619", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "39502"} +[2022-08-01 12:27:35,320][train_inner][INFO] - {"epoch": 3, "update": 2.355, "loss": "2.464", "ppl": "5.52", "wps": "363579", "ups": "3.07", "wpb": "118538", "bsz": "256", "num_updates": "121200", "lr": "0.000887677", "gnorm": "0.623", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22", "wall": "39568"} +[2022-08-01 12:28:40,013][train_inner][INFO] - {"epoch": 3, "update": 2.359, "loss": "2.475", "ppl": "5.56", "wps": "365325", "ups": "3.09", "wpb": "118167", "bsz": "256", "num_updates": "121400", "lr": "0.000887475", "gnorm": "0.625", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "39632"} +[2022-08-01 12:29:44,781][train_inner][INFO] - {"epoch": 3, "update": 2.363, "loss": "2.459", "ppl": "5.5", "wps": "364842", "ups": "3.09", "wpb": "118149", "bsz": "256", "num_updates": "121600", "lr": "0.000887273", "gnorm": "0.621", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23", "wall": "39697"} +[2022-08-01 12:30:49,674][train_inner][INFO] - {"epoch": 3, "update": 2.367, "loss": "2.462", "ppl": "5.51", "wps": "364878", "ups": "3.08", "wpb": "118390", "bsz": "256", "num_updates": "121800", "lr": "0.000887071", "gnorm": "0.626", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "39762"} +[2022-08-01 12:31:54,369][train_inner][INFO] - {"epoch": 3, "update": 2.37, "loss": "2.461", "ppl": "5.5", "wps": "366016", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "122000", "lr": "0.000886869", "gnorm": "0.628", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.1", "wall": "39827"} +[2022-08-01 12:32:59,149][train_inner][INFO] - {"epoch": 3, "update": 2.374, "loss": "2.463", "ppl": "5.51", "wps": "365483", "ups": "3.09", "wpb": "118378", "bsz": "256", "num_updates": "122200", "lr": "0.000886667", "gnorm": "0.625", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.3", "wall": "39891"} +[2022-08-01 12:34:03,790][train_inner][INFO] - {"epoch": 3, "update": 2.378, "loss": "2.46", "ppl": "5.5", "wps": "365235", "ups": "3.09", "wpb": "118045", "bsz": "256", "num_updates": "122400", "lr": "0.000886465", "gnorm": "0.623", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "39956"} +[2022-08-01 12:35:08,682][train_inner][INFO] - {"epoch": 3, "update": 2.382, "loss": "2.458", "ppl": "5.49", "wps": "364070", "ups": "3.08", "wpb": "118125", "bsz": "255.9", "num_updates": "122600", "lr": "0.000886263", "gnorm": "0.627", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.2", "wall": "40021"} +[2022-08-01 12:36:13,473][train_inner][INFO] - {"epoch": 3, "update": 2.386, "loss": "2.464", "ppl": "5.52", "wps": "362520", "ups": "3.09", "wpb": "117439", "bsz": "256", "num_updates": "122800", "lr": "0.000886061", "gnorm": "0.631", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.3", "wall": "40086"} +[2022-08-01 12:37:18,277][train_inner][INFO] - {"epoch": 3, "update": 2.39, "loss": "2.463", "ppl": "5.51", "wps": "364659", "ups": "3.09", "wpb": "118154", "bsz": "256", "num_updates": "123000", "lr": "0.000885859", "gnorm": "0.629", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.7", "wall": "40151"} +[2022-08-01 12:38:23,278][train_inner][INFO] - {"epoch": 3, "update": 2.394, "loss": "2.457", "ppl": "5.49", "wps": "363852", "ups": "3.08", "wpb": "118252", "bsz": "256", "num_updates": "123200", "lr": "0.000885657", "gnorm": "0.626", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.2", "wall": "40216"} +[2022-08-01 12:39:28,254][train_inner][INFO] - {"epoch": 3, "update": 2.398, "loss": "2.457", "ppl": "5.49", "wps": "365312", "ups": "3.08", "wpb": "118680", "bsz": "256", "num_updates": "123400", "lr": "0.000885455", "gnorm": "0.623", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "40281"} +[2022-08-01 12:40:32,892][train_inner][INFO] - {"epoch": 3, "update": 2.402, "loss": "2.457", "ppl": "5.49", "wps": "365320", "ups": "3.09", "wpb": "118066", "bsz": "256", "num_updates": "123600", "lr": "0.000885253", "gnorm": "0.63", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "40345"} +[2022-08-01 12:41:37,888][train_inner][INFO] - {"epoch": 3, "update": 2.405, "loss": "2.457", "ppl": "5.49", "wps": "365419", "ups": "3.08", "wpb": "118753", "bsz": "256", "num_updates": "123800", "lr": "0.000885051", "gnorm": "0.63", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "40410"} +[2022-08-01 12:42:42,834][train_inner][INFO] - {"epoch": 3, "update": 2.409, "loss": "2.459", "ppl": "5.5", "wps": "362359", "ups": "3.08", "wpb": "117667", "bsz": "256", "num_updates": "124000", "lr": "0.000884848", "gnorm": "0.628", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "40475"} +[2022-08-01 12:43:48,011][train_inner][INFO] - {"epoch": 3, "update": 2.413, "loss": "2.467", "ppl": "5.53", "wps": "360417", "ups": "3.07", "wpb": "117453", "bsz": "256", "num_updates": "124200", "lr": "0.000884646", "gnorm": "0.629", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.4", "wall": "40540"} +[2022-08-01 12:44:52,875][train_inner][INFO] - {"epoch": 3, "update": 2.417, "loss": "2.458", "ppl": "5.49", "wps": "366859", "ups": "3.08", "wpb": "118977", "bsz": "256", "num_updates": "124400", "lr": "0.000884444", "gnorm": "0.623", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "40605"} +[2022-08-01 12:45:57,536][train_inner][INFO] - {"epoch": 3, "update": 2.421, "loss": "2.462", "ppl": "5.51", "wps": "366324", "ups": "3.09", "wpb": "118433", "bsz": "256", "num_updates": "124600", "lr": "0.000884242", "gnorm": "0.624", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.2", "wall": "40670"} +[2022-08-01 12:47:02,194][train_inner][INFO] - {"epoch": 3, "update": 2.425, "loss": "2.457", "ppl": "5.49", "wps": "366877", "ups": "3.09", "wpb": "118606", "bsz": "256", "num_updates": "124800", "lr": "0.00088404", "gnorm": "0.634", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.8", "wall": "40735"} +[2022-08-01 12:48:06,986][train_inner][INFO] - {"epoch": 3, "update": 2.429, "loss": "2.453", "ppl": "5.48", "wps": "365159", "ups": "3.09", "wpb": "118294", "bsz": "256", "num_updates": "125000", "lr": "0.000883838", "gnorm": "0.625", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "40799"} +[2022-08-01 12:49:12,197][train_inner][INFO] - {"epoch": 3, "update": 2.433, "loss": "2.452", "ppl": "5.47", "wps": "366723", "ups": "3.07", "wpb": "119570", "bsz": "256", "num_updates": "125200", "lr": "0.000883636", "gnorm": "0.625", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "40865"} +[2022-08-01 12:50:17,190][train_inner][INFO] - {"epoch": 3, "update": 2.437, "loss": "2.452", "ppl": "5.47", "wps": "364539", "ups": "3.08", "wpb": "118461", "bsz": "256", "num_updates": "125400", "lr": "0.000883434", "gnorm": "0.63", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "40930"} +[2022-08-01 12:51:22,411][train_inner][INFO] - {"epoch": 3, "update": 2.44, "loss": "2.459", "ppl": "5.5", "wps": "363345", "ups": "3.07", "wpb": "118487", "bsz": "256", "num_updates": "125600", "lr": "0.000883232", "gnorm": "0.624", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24", "wall": "40995"} +[2022-08-01 12:52:27,038][train_inner][INFO] - {"epoch": 3, "update": 2.444, "loss": "2.463", "ppl": "5.51", "wps": "364014", "ups": "3.09", "wpb": "117623", "bsz": "256", "num_updates": "125800", "lr": "0.00088303", "gnorm": "0.635", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "41059"} +[2022-08-01 12:53:31,878][train_inner][INFO] - {"epoch": 3, "update": 2.448, "loss": "2.459", "ppl": "5.5", "wps": "364919", "ups": "3.08", "wpb": "118305", "bsz": "256", "num_updates": "126000", "lr": "0.000882828", "gnorm": "0.625", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "41124"} +[2022-08-01 12:54:37,186][train_inner][INFO] - {"epoch": 3, "update": 2.452, "loss": "2.454", "ppl": "5.48", "wps": "363431", "ups": "3.06", "wpb": "118673", "bsz": "256", "num_updates": "126200", "lr": "0.000882626", "gnorm": "0.627", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "41190"} +[2022-08-01 12:55:41,917][train_inner][INFO] - {"epoch": 3, "update": 2.456, "loss": "2.462", "ppl": "5.51", "wps": "363484", "ups": "3.09", "wpb": "117642", "bsz": "256", "num_updates": "126400", "lr": "0.000882424", "gnorm": "0.636", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "41254"} +[2022-08-01 12:56:45,741][train_inner][INFO] - {"epoch": 3, "update": 2.46, "loss": "2.454", "ppl": "5.48", "wps": "370140", "ups": "3.13", "wpb": "118118", "bsz": "256", "num_updates": "126600", "lr": "0.000882222", "gnorm": "0.626", "clip": "0", "loss_scale": "8", "train_wall": "63", "gb_free": "22.1", "wall": "41318"} +[2022-08-01 12:57:50,394][train_inner][INFO] - {"epoch": 3, "update": 2.464, "loss": "2.459", "ppl": "5.5", "wps": "364485", "ups": "3.09", "wpb": "117824", "bsz": "256", "num_updates": "126800", "lr": "0.00088202", "gnorm": "0.636", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "41383"} +[2022-08-01 12:58:56,265][train_inner][INFO] - {"epoch": 3, "update": 2.468, "loss": "2.453", "ppl": "5.47", "wps": "360250", "ups": "3.04", "wpb": "118648", "bsz": "256", "num_updates": "127000", "lr": "0.000881818", "gnorm": "0.631", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.9", "wall": "41449"} +[2022-08-01 13:00:00,979][train_inner][INFO] - {"epoch": 3, "update": 2.471, "loss": "2.452", "ppl": "5.47", "wps": "366514", "ups": "3.09", "wpb": "118590", "bsz": "256", "num_updates": "127200", "lr": "0.000881616", "gnorm": "0.629", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "41513"} +[2022-08-01 13:01:05,739][train_inner][INFO] - {"epoch": 3, "update": 2.475, "loss": "2.46", "ppl": "5.5", "wps": "364049", "ups": "3.09", "wpb": "117878", "bsz": "256", "num_updates": "127400", "lr": "0.000881414", "gnorm": "0.633", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "41578"} +[2022-08-01 13:02:10,397][train_inner][INFO] - {"epoch": 3, "update": 2.479, "loss": "2.451", "ppl": "5.47", "wps": "366429", "ups": "3.09", "wpb": "118461", "bsz": "256", "num_updates": "127600", "lr": "0.000881212", "gnorm": "0.629", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "41643"} +[2022-08-01 13:03:15,214][train_inner][INFO] - {"epoch": 3, "update": 2.483, "loss": "2.456", "ppl": "5.49", "wps": "363294", "ups": "3.09", "wpb": "117736", "bsz": "256", "num_updates": "127800", "lr": "0.00088101", "gnorm": "0.634", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "41708"} +[2022-08-01 13:04:20,192][train_inner][INFO] - {"epoch": 3, "update": 2.487, "loss": "2.451", "ppl": "5.47", "wps": "365423", "ups": "3.08", "wpb": "118721", "bsz": "256", "num_updates": "128000", "lr": "0.000880808", "gnorm": "0.629", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "41773"} +[2022-08-01 13:05:24,934][train_inner][INFO] - {"epoch": 3, "update": 2.491, "loss": "2.457", "ppl": "5.49", "wps": "365927", "ups": "3.09", "wpb": "118451", "bsz": "256", "num_updates": "128200", "lr": "0.000880606", "gnorm": "0.631", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "41837"} +[2022-08-01 13:06:29,486][train_inner][INFO] - {"epoch": 3, "update": 2.495, "loss": "2.445", "ppl": "5.44", "wps": "367012", "ups": "3.1", "wpb": "118456", "bsz": "256", "num_updates": "128400", "lr": "0.000880404", "gnorm": "0.628", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "41902"} +[2022-08-01 13:07:34,395][train_inner][INFO] - {"epoch": 3, "update": 2.499, "loss": "2.45", "ppl": "5.46", "wps": "366682", "ups": "3.08", "wpb": "119003", "bsz": "256", "num_updates": "128600", "lr": "0.000880202", "gnorm": "0.63", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "41967"} +[2022-08-01 13:08:39,362][train_inner][INFO] - {"epoch": 3, "update": 2.503, "loss": "2.444", "ppl": "5.44", "wps": "363612", "ups": "3.08", "wpb": "118112", "bsz": "256", "num_updates": "128800", "lr": "0.00088", "gnorm": "0.632", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "42032"} +[2022-08-01 13:09:44,262][train_inner][INFO] - {"epoch": 3, "update": 2.506, "loss": "2.445", "ppl": "5.44", "wps": "364237", "ups": "3.08", "wpb": "118192", "bsz": "256", "num_updates": "129000", "lr": "0.000879798", "gnorm": "0.63", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "42097"} +[2022-08-01 13:10:49,063][train_inner][INFO] - {"epoch": 3, "update": 2.51, "loss": "2.459", "ppl": "5.5", "wps": "364854", "ups": "3.09", "wpb": "118213", "bsz": "256", "num_updates": "129200", "lr": "0.000879596", "gnorm": "0.636", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "42161"} +[2022-08-01 13:11:53,770][train_inner][INFO] - {"epoch": 3, "update": 2.514, "loss": "2.451", "ppl": "5.47", "wps": "365174", "ups": "3.09", "wpb": "118144", "bsz": "256", "num_updates": "129400", "lr": "0.000879394", "gnorm": "0.633", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "42226"} +[2022-08-01 13:12:58,776][train_inner][INFO] - {"epoch": 3, "update": 2.518, "loss": "2.447", "ppl": "5.45", "wps": "364760", "ups": "3.08", "wpb": "118557", "bsz": "256", "num_updates": "129600", "lr": "0.000879192", "gnorm": "0.631", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "42291"} +[2022-08-01 13:13:52,926][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 13:14:03,925][train_inner][INFO] - {"epoch": 3, "update": 2.522, "loss": "2.447", "ppl": "5.45", "wps": "361118", "ups": "3.07", "wpb": "117630", "bsz": "256", "num_updates": "129800", "lr": "0.00087899", "gnorm": "0.631", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "42356"} +[2022-08-01 13:15:08,812][train_inner][INFO] - {"epoch": 3, "update": 2.526, "loss": "2.458", "ppl": "5.49", "wps": "364326", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "130000", "lr": "0.000878788", "gnorm": "0.634", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "42421"} +[2022-08-01 13:16:13,497][train_inner][INFO] - {"epoch": 3, "update": 2.53, "loss": "2.448", "ppl": "5.46", "wps": "365893", "ups": "3.09", "wpb": "118337", "bsz": "256", "num_updates": "130200", "lr": "0.000878586", "gnorm": "0.629", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "42486"} +[2022-08-01 13:17:18,093][train_inner][INFO] - {"epoch": 3, "update": 2.534, "loss": "2.449", "ppl": "5.46", "wps": "364837", "ups": "3.1", "wpb": "117834", "bsz": "256", "num_updates": "130400", "lr": "0.000878384", "gnorm": "0.634", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "42550"} +[2022-08-01 13:17:23,519][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 13:18:24,361][train_inner][INFO] - {"epoch": 3, "update": 2.538, "loss": "2.443", "ppl": "5.44", "wps": "357690", "ups": "3.02", "wpb": "118516", "bsz": "256", "num_updates": "130600", "lr": "0.000878182", "gnorm": "0.64", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.7", "wall": "42617"} +[2022-08-01 13:19:29,072][train_inner][INFO] - {"epoch": 3, "update": 2.541, "loss": "2.445", "ppl": "5.45", "wps": "365136", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "130800", "lr": "0.00087798", "gnorm": "0.64", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "42681"} +[2022-08-01 13:20:34,061][train_inner][INFO] - {"epoch": 3, "update": 2.545, "loss": "2.445", "ppl": "5.44", "wps": "364871", "ups": "3.08", "wpb": "118562", "bsz": "256", "num_updates": "131000", "lr": "0.000877778", "gnorm": "0.635", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "42746"} +[2022-08-01 13:21:39,161][train_inner][INFO] - {"epoch": 3, "update": 2.549, "loss": "2.445", "ppl": "5.44", "wps": "365121", "ups": "3.07", "wpb": "118844", "bsz": "256", "num_updates": "131200", "lr": "0.000877576", "gnorm": "0.632", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "42811"} +[2022-08-01 13:22:44,010][train_inner][INFO] - {"epoch": 3, "update": 2.553, "loss": "2.448", "ppl": "5.46", "wps": "364805", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "131400", "lr": "0.000877374", "gnorm": "0.634", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "42876"} +[2022-08-01 13:23:49,148][train_inner][INFO] - {"epoch": 3, "update": 2.557, "loss": "2.45", "ppl": "5.47", "wps": "363588", "ups": "3.07", "wpb": "118415", "bsz": "256", "num_updates": "131600", "lr": "0.000877172", "gnorm": "0.636", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "42941"} +[2022-08-01 13:24:53,844][train_inner][INFO] - {"epoch": 3, "update": 2.561, "loss": "2.447", "ppl": "5.45", "wps": "366328", "ups": "3.09", "wpb": "118496", "bsz": "256", "num_updates": "131800", "lr": "0.00087697", "gnorm": "0.638", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "43006"} +[2022-08-01 13:25:59,075][train_inner][INFO] - {"epoch": 3, "update": 2.565, "loss": "2.443", "ppl": "5.44", "wps": "364066", "ups": "3.07", "wpb": "118741", "bsz": "256", "num_updates": "132000", "lr": "0.000876768", "gnorm": "0.634", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "43071"} +[2022-08-01 13:26:04,195][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 13:27:04,438][train_inner][INFO] - {"epoch": 3, "update": 2.569, "loss": "2.45", "ppl": "5.47", "wps": "361260", "ups": "3.06", "wpb": "118064", "bsz": "256", "num_updates": "132200", "lr": "0.000876566", "gnorm": "0.638", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.3", "wall": "43137"} +[2022-08-01 13:28:09,433][train_inner][INFO] - {"epoch": 3, "update": 2.573, "loss": "2.451", "ppl": "5.47", "wps": "362710", "ups": "3.08", "wpb": "117871", "bsz": "256", "num_updates": "132400", "lr": "0.000876364", "gnorm": "0.635", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "43202"} +[2022-08-01 13:29:14,618][train_inner][INFO] - {"epoch": 3, "update": 2.576, "loss": "2.445", "ppl": "5.44", "wps": "363595", "ups": "3.07", "wpb": "118504", "bsz": "256", "num_updates": "132600", "lr": "0.000876162", "gnorm": "0.631", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "43267"} +[2022-08-01 13:30:19,657][train_inner][INFO] - {"epoch": 3, "update": 2.58, "loss": "2.444", "ppl": "5.44", "wps": "364038", "ups": "3.08", "wpb": "118380", "bsz": "256", "num_updates": "132800", "lr": "0.00087596", "gnorm": "0.634", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.1", "wall": "43332"} +[2022-08-01 13:31:24,532][train_inner][INFO] - {"epoch": 3, "update": 2.584, "loss": "2.441", "ppl": "5.43", "wps": "364504", "ups": "3.08", "wpb": "118235", "bsz": "256", "num_updates": "133000", "lr": "0.000875758", "gnorm": "0.636", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "43397"} +[2022-08-01 13:31:33,444][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 13:31:33,732][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 13:32:29,715][train_inner][INFO] - {"epoch": 3, "update": 2.588, "loss": "2.45", "ppl": "5.46", "wps": "362923", "ups": "3.07", "wpb": "118279", "bsz": "256", "num_updates": "133200", "lr": "0.000875556", "gnorm": "0.667", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "20.9", "wall": "43462"} +[2022-08-01 13:33:34,646][train_inner][INFO] - {"epoch": 3, "update": 2.592, "loss": "2.441", "ppl": "5.43", "wps": "364434", "ups": "3.08", "wpb": "118315", "bsz": "256", "num_updates": "133400", "lr": "0.000875354", "gnorm": "0.635", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "43527"} +[2022-08-01 13:33:51,703][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 13:34:39,940][train_inner][INFO] - {"epoch": 3, "update": 2.596, "loss": "2.441", "ppl": "5.43", "wps": "362998", "ups": "3.06", "wpb": "118508", "bsz": "256", "num_updates": "133600", "lr": "0.000875152", "gnorm": "0.635", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "43592"} +[2022-08-01 13:35:44,463][train_inner][INFO] - {"epoch": 3, "update": 2.6, "loss": "2.443", "ppl": "5.44", "wps": "365987", "ups": "3.1", "wpb": "118071", "bsz": "256", "num_updates": "133800", "lr": "0.000874949", "gnorm": "0.633", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.4", "wall": "43657"} +[2022-08-01 13:36:49,252][train_inner][INFO] - {"epoch": 3, "update": 2.604, "loss": "2.437", "ppl": "5.42", "wps": "366477", "ups": "3.09", "wpb": "118717", "bsz": "256", "num_updates": "134000", "lr": "0.000874747", "gnorm": "0.632", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24.3", "wall": "43722"} +[2022-08-01 13:37:54,147][train_inner][INFO] - {"epoch": 3, "update": 2.608, "loss": "2.443", "ppl": "5.44", "wps": "363709", "ups": "3.08", "wpb": "118013", "bsz": "256", "num_updates": "134200", "lr": "0.000874545", "gnorm": "0.636", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "43786"} +[2022-08-01 13:38:58,764][train_inner][INFO] - {"epoch": 3, "update": 2.611, "loss": "2.434", "ppl": "5.4", "wps": "368940", "ups": "3.1", "wpb": "119198", "bsz": "256", "num_updates": "134400", "lr": "0.000874343", "gnorm": "0.631", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "28.2", "wall": "43851"} +[2022-08-01 13:40:03,355][train_inner][INFO] - {"epoch": 3, "update": 2.615, "loss": "2.442", "ppl": "5.43", "wps": "366190", "ups": "3.1", "wpb": "118261", "bsz": "256", "num_updates": "134600", "lr": "0.000874141", "gnorm": "0.635", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24.8", "wall": "43916"} +[2022-08-01 13:41:08,270][train_inner][INFO] - {"epoch": 3, "update": 2.619, "loss": "2.435", "ppl": "5.41", "wps": "365682", "ups": "3.08", "wpb": "118689", "bsz": "256", "num_updates": "134800", "lr": "0.000873939", "gnorm": "0.634", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.8", "wall": "43981"} +[2022-08-01 13:42:13,404][train_inner][INFO] - {"epoch": 3, "update": 2.623, "loss": "2.445", "ppl": "5.45", "wps": "361176", "ups": "3.07", "wpb": "117623", "bsz": "256", "num_updates": "135000", "lr": "0.000873737", "gnorm": "0.642", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.5", "wall": "44046"} +[2022-08-01 13:43:18,233][train_inner][INFO] - {"epoch": 3, "update": 2.627, "loss": "2.437", "ppl": "5.41", "wps": "367594", "ups": "3.09", "wpb": "119152", "bsz": "256", "num_updates": "135200", "lr": "0.000873535", "gnorm": "0.636", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.4", "wall": "44111"} +[2022-08-01 13:44:23,464][train_inner][INFO] - {"epoch": 3, "update": 2.631, "loss": "2.446", "ppl": "5.45", "wps": "361460", "ups": "3.07", "wpb": "117889", "bsz": "256", "num_updates": "135400", "lr": "0.000873333", "gnorm": "0.639", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.9", "wall": "44176"} +[2022-08-01 13:45:28,167][train_inner][INFO] - {"epoch": 3, "update": 2.635, "loss": "2.445", "ppl": "5.44", "wps": "367410", "ups": "3.09", "wpb": "118861", "bsz": "256", "num_updates": "135600", "lr": "0.000873131", "gnorm": "0.695", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "44240"} +[2022-08-01 13:46:33,232][train_inner][INFO] - {"epoch": 3, "update": 2.639, "loss": "2.445", "ppl": "5.45", "wps": "363221", "ups": "3.07", "wpb": "118164", "bsz": "256", "num_updates": "135800", "lr": "0.000872929", "gnorm": "0.889", "clip": "1.5", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "44306"} +[2022-08-01 13:47:38,620][train_inner][INFO] - {"epoch": 3, "update": 2.642, "loss": "2.431", "ppl": "5.39", "wps": "364042", "ups": "3.06", "wpb": "119019", "bsz": "256", "num_updates": "136000", "lr": "0.000872727", "gnorm": "0.632", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "44371"} +[2022-08-01 13:48:43,765][train_inner][INFO] - {"epoch": 3, "update": 2.646, "loss": "2.438", "ppl": "5.42", "wps": "362984", "ups": "3.07", "wpb": "118232", "bsz": "256", "num_updates": "136200", "lr": "0.000872525", "gnorm": "0.636", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "44436"} +[2022-08-01 13:49:48,717][train_inner][INFO] - {"epoch": 3, "update": 2.65, "loss": "2.437", "ppl": "5.41", "wps": "363888", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "136400", "lr": "0.000872323", "gnorm": "0.634", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "44501"} +[2022-08-01 13:50:54,991][train_inner][INFO] - {"epoch": 3, "update": 2.654, "loss": "2.436", "ppl": "5.41", "wps": "357269", "ups": "3.02", "wpb": "118386", "bsz": "256", "num_updates": "136600", "lr": "0.000872121", "gnorm": "0.637", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "21", "wall": "44567"} +[2022-08-01 13:52:00,078][train_inner][INFO] - {"epoch": 3, "update": 2.658, "loss": "2.435", "ppl": "5.41", "wps": "363296", "ups": "3.07", "wpb": "118228", "bsz": "256", "num_updates": "136800", "lr": "0.000871919", "gnorm": "0.639", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "44632"} +[2022-08-01 13:53:04,827][train_inner][INFO] - {"epoch": 3, "update": 2.662, "loss": "2.437", "ppl": "5.42", "wps": "365341", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "137000", "lr": "0.000871717", "gnorm": "0.651", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "44697"} +[2022-08-01 13:54:09,953][train_inner][INFO] - {"epoch": 3, "update": 2.666, "loss": "2.437", "ppl": "5.42", "wps": "363681", "ups": "3.07", "wpb": "118422", "bsz": "256", "num_updates": "137200", "lr": "0.000871515", "gnorm": "0.637", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.3", "wall": "44762"} +[2022-08-01 13:55:14,906][train_inner][INFO] - {"epoch": 3, "update": 2.67, "loss": "2.439", "ppl": "5.42", "wps": "362640", "ups": "3.08", "wpb": "117772", "bsz": "256", "num_updates": "137400", "lr": "0.000871313", "gnorm": "0.644", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "44827"} +[2022-08-01 13:56:19,048][train_inner][INFO] - {"epoch": 3, "update": 2.674, "loss": "2.433", "ppl": "5.4", "wps": "368882", "ups": "3.12", "wpb": "118303", "bsz": "256", "num_updates": "137600", "lr": "0.000871111", "gnorm": "0.639", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "44891"} +[2022-08-01 13:57:12,055][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 13:57:23,834][train_inner][INFO] - {"epoch": 3, "update": 2.677, "loss": "2.496", "ppl": "5.64", "wps": "364370", "ups": "3.09", "wpb": "118029", "bsz": "256", "num_updates": "137800", "lr": "0.000870909", "gnorm": "0.917", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.7", "wall": "44956"} +[2022-08-01 13:58:05,766][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 13:58:29,181][train_inner][INFO] - {"epoch": 3, "update": 2.681, "loss": "2.445", "ppl": "5.45", "wps": "360716", "ups": "3.06", "wpb": "117857", "bsz": "256", "num_updates": "138000", "lr": "0.000870707", "gnorm": "0.806", "clip": "1.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "45022"} +[2022-08-01 13:58:47,630][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-01 13:59:34,451][train_inner][INFO] - {"epoch": 3, "update": 2.685, "loss": "2.44", "ppl": "5.43", "wps": "361940", "ups": "3.06", "wpb": "118118", "bsz": "256", "num_updates": "138200", "lr": "0.000870505", "gnorm": "0.657", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.6", "wall": "45087"} +[2022-08-01 14:00:39,014][train_inner][INFO] - {"epoch": 3, "update": 2.689, "loss": "2.442", "ppl": "5.43", "wps": "365534", "ups": "3.1", "wpb": "117999", "bsz": "256", "num_updates": "138400", "lr": "0.000870303", "gnorm": "0.703", "clip": "0.5", "loss_scale": "0.25", "train_wall": "64", "gb_free": "23.8", "wall": "45151"} +[2022-08-01 14:01:44,155][train_inner][INFO] - {"epoch": 3, "update": 2.693, "loss": "2.444", "ppl": "5.44", "wps": "363284", "ups": "3.07", "wpb": "118323", "bsz": "256", "num_updates": "138600", "lr": "0.000870101", "gnorm": "0.734", "clip": "1", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "45216"} +[2022-08-01 14:02:49,333][train_inner][INFO] - {"epoch": 3, "update": 2.697, "loss": "2.435", "ppl": "5.41", "wps": "364171", "ups": "3.07", "wpb": "118677", "bsz": "256", "num_updates": "138800", "lr": "0.000869899", "gnorm": "0.636", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.5", "wall": "45282"} +[2022-08-01 14:03:54,651][train_inner][INFO] - {"epoch": 3, "update": 2.701, "loss": "2.443", "ppl": "5.44", "wps": "363496", "ups": "3.06", "wpb": "118714", "bsz": "256", "num_updates": "139000", "lr": "0.000869697", "gnorm": "0.636", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "45347"} +[2022-08-01 14:04:59,706][train_inner][INFO] - {"epoch": 3, "update": 2.705, "loss": "2.438", "ppl": "5.42", "wps": "363894", "ups": "3.07", "wpb": "118364", "bsz": "256", "num_updates": "139200", "lr": "0.000869495", "gnorm": "0.64", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "45412"} +[2022-08-01 14:06:04,681][train_inner][INFO] - {"epoch": 3, "update": 2.709, "loss": "2.433", "ppl": "5.4", "wps": "364747", "ups": "3.08", "wpb": "118496", "bsz": "256", "num_updates": "139400", "lr": "0.000869293", "gnorm": "0.639", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "27.5", "wall": "45477"} +[2022-08-01 14:07:09,916][train_inner][INFO] - {"epoch": 3, "update": 2.712, "loss": "2.433", "ppl": "5.4", "wps": "362887", "ups": "3.07", "wpb": "118362", "bsz": "256", "num_updates": "139600", "lr": "0.000869091", "gnorm": "0.639", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "45542"} +[2022-08-01 14:08:14,930][train_inner][INFO] - {"epoch": 3, "update": 2.716, "loss": "2.432", "ppl": "5.39", "wps": "362968", "ups": "3.08", "wpb": "117989", "bsz": "256", "num_updates": "139800", "lr": "0.000868889", "gnorm": "0.64", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "45607"} +[2022-08-01 14:09:19,466][train_inner][INFO] - {"epoch": 3, "update": 2.72, "loss": "2.426", "ppl": "5.37", "wps": "366413", "ups": "3.1", "wpb": "118231", "bsz": "256", "num_updates": "140000", "lr": "0.000868687", "gnorm": "0.64", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.9", "wall": "45672"} +[2022-08-01 14:10:24,699][train_inner][INFO] - {"epoch": 3, "update": 2.724, "loss": "2.429", "ppl": "5.39", "wps": "362974", "ups": "3.07", "wpb": "118388", "bsz": "256", "num_updates": "140200", "lr": "0.000868485", "gnorm": "0.641", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.1", "wall": "45737"} +[2022-08-01 14:11:29,716][train_inner][INFO] - {"epoch": 3, "update": 2.728, "loss": "2.43", "ppl": "5.39", "wps": "364825", "ups": "3.08", "wpb": "118598", "bsz": "256", "num_updates": "140400", "lr": "0.000868283", "gnorm": "0.641", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "45802"} +[2022-08-01 14:12:35,380][train_inner][INFO] - {"epoch": 3, "update": 2.732, "loss": "2.434", "ppl": "5.4", "wps": "359599", "ups": "3.05", "wpb": "118062", "bsz": "256", "num_updates": "140600", "lr": "0.000868081", "gnorm": "0.645", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "45868"} +[2022-08-01 14:13:41,516][train_inner][INFO] - {"epoch": 3, "update": 2.736, "loss": "2.433", "ppl": "5.4", "wps": "359012", "ups": "3.02", "wpb": "118715", "bsz": "256", "num_updates": "140800", "lr": "0.000867879", "gnorm": "0.641", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "23.9", "wall": "45934"} +[2022-08-01 14:14:46,334][train_inner][INFO] - {"epoch": 3, "update": 2.74, "loss": "2.431", "ppl": "5.39", "wps": "366010", "ups": "3.09", "wpb": "118618", "bsz": "256", "num_updates": "141000", "lr": "0.000867677", "gnorm": "0.638", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "45999"} +[2022-08-01 14:15:51,263][train_inner][INFO] - {"epoch": 3, "update": 2.744, "loss": "2.439", "ppl": "5.42", "wps": "364486", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "141200", "lr": "0.000867475", "gnorm": "0.655", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.4", "wall": "46064"} +[2022-08-01 14:16:57,066][train_inner][INFO] - {"epoch": 3, "update": 2.747, "loss": "2.435", "ppl": "5.41", "wps": "360111", "ups": "3.04", "wpb": "118480", "bsz": "256", "num_updates": "141400", "lr": "0.000867273", "gnorm": "0.642", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "46129"} +[2022-08-01 14:18:01,905][train_inner][INFO] - {"epoch": 3, "update": 2.751, "loss": "2.436", "ppl": "5.41", "wps": "363560", "ups": "3.08", "wpb": "117863", "bsz": "256", "num_updates": "141600", "lr": "0.000867071", "gnorm": "0.643", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "25.4", "wall": "46194"} +[2022-08-01 14:19:06,754][train_inner][INFO] - {"epoch": 3, "update": 2.755, "loss": "2.427", "ppl": "5.38", "wps": "366215", "ups": "3.08", "wpb": "118742", "bsz": "256", "num_updates": "141800", "lr": "0.000866869", "gnorm": "0.645", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "46259"} +[2022-08-01 14:20:11,826][train_inner][INFO] - {"epoch": 3, "update": 2.759, "loss": "2.428", "ppl": "5.38", "wps": "363447", "ups": "3.07", "wpb": "118249", "bsz": "256", "num_updates": "142000", "lr": "0.000866667", "gnorm": "0.642", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "46324"} +[2022-08-01 14:21:16,933][train_inner][INFO] - {"epoch": 3, "update": 2.763, "loss": "2.427", "ppl": "5.38", "wps": "364856", "ups": "3.07", "wpb": "118772", "bsz": "256", "num_updates": "142200", "lr": "0.000866465", "gnorm": "0.639", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "46389"} +[2022-08-01 14:22:21,886][train_inner][INFO] - {"epoch": 3, "update": 2.767, "loss": "2.43", "ppl": "5.39", "wps": "364821", "ups": "3.08", "wpb": "118478", "bsz": "256", "num_updates": "142400", "lr": "0.000866263", "gnorm": "0.638", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "46454"} +[2022-08-01 14:23:26,874][train_inner][INFO] - {"epoch": 3, "update": 2.771, "loss": "2.437", "ppl": "5.41", "wps": "364267", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "142600", "lr": "0.000866061", "gnorm": "0.649", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "46519"} +[2022-08-01 14:24:31,694][train_inner][INFO] - {"epoch": 3, "update": 2.775, "loss": "2.431", "ppl": "5.39", "wps": "364127", "ups": "3.09", "wpb": "118011", "bsz": "256", "num_updates": "142800", "lr": "0.000865859", "gnorm": "0.646", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.4", "wall": "46584"} +[2022-08-01 14:25:36,673][train_inner][INFO] - {"epoch": 3, "update": 2.779, "loss": "2.438", "ppl": "5.42", "wps": "364030", "ups": "3.08", "wpb": "118271", "bsz": "256", "num_updates": "143000", "lr": "0.000865657", "gnorm": "0.642", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "46649"} +[2022-08-01 14:26:41,287][train_inner][INFO] - {"epoch": 3, "update": 2.782, "loss": "2.431", "ppl": "5.39", "wps": "366367", "ups": "3.1", "wpb": "118360", "bsz": "256", "num_updates": "143200", "lr": "0.000865455", "gnorm": "0.638", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "46714"} +[2022-08-01 14:27:46,221][train_inner][INFO] - {"epoch": 3, "update": 2.786, "loss": "2.434", "ppl": "5.4", "wps": "363073", "ups": "3.08", "wpb": "117877", "bsz": "256", "num_updates": "143400", "lr": "0.000865253", "gnorm": "0.643", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21", "wall": "46779"} +[2022-08-01 14:28:50,913][train_inner][INFO] - {"epoch": 3, "update": 2.79, "loss": "2.423", "ppl": "5.36", "wps": "367796", "ups": "3.09", "wpb": "118966", "bsz": "256", "num_updates": "143600", "lr": "0.000865051", "gnorm": "0.643", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "25.3", "wall": "46843"} +[2022-08-01 14:29:55,541][train_inner][INFO] - {"epoch": 3, "update": 2.794, "loss": "2.433", "ppl": "5.4", "wps": "365733", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "143800", "lr": "0.000864848", "gnorm": "0.667", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "46908"} +[2022-08-01 14:31:01,514][train_inner][INFO] - {"epoch": 3, "update": 2.798, "loss": "2.436", "ppl": "5.41", "wps": "358875", "ups": "3.03", "wpb": "118379", "bsz": "256", "num_updates": "144000", "lr": "0.000864646", "gnorm": "0.644", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "20.8", "wall": "46974"} +[2022-08-01 14:32:06,420][train_inner][INFO] - {"epoch": 3, "update": 2.802, "loss": "2.426", "ppl": "5.37", "wps": "364341", "ups": "3.08", "wpb": "118231", "bsz": "256", "num_updates": "144200", "lr": "0.000864444", "gnorm": "0.639", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "47039"} +[2022-08-01 14:33:11,345][train_inner][INFO] - {"epoch": 3, "update": 2.806, "loss": "2.431", "ppl": "5.39", "wps": "363626", "ups": "3.08", "wpb": "118040", "bsz": "256", "num_updates": "144400", "lr": "0.000864242", "gnorm": "0.645", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "47104"} +[2022-08-01 14:34:16,419][train_inner][INFO] - {"epoch": 3, "update": 2.81, "loss": "2.43", "ppl": "5.39", "wps": "363792", "ups": "3.07", "wpb": "118365", "bsz": "256", "num_updates": "144600", "lr": "0.00086404", "gnorm": "0.646", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "47169"} +[2022-08-01 14:35:21,258][train_inner][INFO] - {"epoch": 3, "update": 2.813, "loss": "2.428", "ppl": "5.38", "wps": "364466", "ups": "3.08", "wpb": "118156", "bsz": "256", "num_updates": "144800", "lr": "0.000863838", "gnorm": "0.643", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "47234"} +[2022-08-01 14:36:27,140][train_inner][INFO] - {"epoch": 3, "update": 2.817, "loss": "2.424", "ppl": "5.37", "wps": "357303", "ups": "3.04", "wpb": "117698", "bsz": "256", "num_updates": "145000", "lr": "0.000863636", "gnorm": "0.641", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "21.8", "wall": "47299"} +[2022-08-01 14:37:32,029][train_inner][INFO] - {"epoch": 3, "update": 2.821, "loss": "2.423", "ppl": "5.36", "wps": "363686", "ups": "3.08", "wpb": "117994", "bsz": "256", "num_updates": "145200", "lr": "0.000863434", "gnorm": "0.643", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "47364"} +[2022-08-01 14:38:37,198][train_inner][INFO] - {"epoch": 3, "update": 2.825, "loss": "2.419", "ppl": "5.35", "wps": "365148", "ups": "3.07", "wpb": "118980", "bsz": "256", "num_updates": "145400", "lr": "0.000863232", "gnorm": "0.638", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "27.6", "wall": "47430"} +[2022-08-01 14:39:42,415][train_inner][INFO] - {"epoch": 3, "update": 2.829, "loss": "2.422", "ppl": "5.36", "wps": "363520", "ups": "3.07", "wpb": "118537", "bsz": "256", "num_updates": "145600", "lr": "0.00086303", "gnorm": "0.647", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.4", "wall": "47495"} +[2022-08-01 14:40:47,619][train_inner][INFO] - {"epoch": 3, "update": 2.833, "loss": "2.429", "ppl": "5.38", "wps": "362146", "ups": "3.07", "wpb": "118065", "bsz": "256", "num_updates": "145800", "lr": "0.000862828", "gnorm": "0.645", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "47560"} +[2022-08-01 14:41:52,406][train_inner][INFO] - {"epoch": 3, "update": 2.837, "loss": "2.428", "ppl": "5.38", "wps": "365627", "ups": "3.09", "wpb": "118438", "bsz": "256", "num_updates": "146000", "lr": "0.000862626", "gnorm": "0.646", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "47625"} +[2022-08-01 14:42:57,456][train_inner][INFO] - {"epoch": 3, "update": 2.841, "loss": "2.427", "ppl": "5.38", "wps": "363498", "ups": "3.07", "wpb": "118226", "bsz": "256", "num_updates": "146200", "lr": "0.000862424", "gnorm": "0.641", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "47690"} +[2022-08-01 14:44:02,643][train_inner][INFO] - {"epoch": 3, "update": 2.845, "loss": "2.43", "ppl": "5.39", "wps": "363064", "ups": "3.07", "wpb": "118332", "bsz": "256", "num_updates": "146400", "lr": "0.000862222", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "47755"} +[2022-08-01 14:45:07,605][train_inner][INFO] - {"epoch": 3, "update": 2.848, "loss": "2.43", "ppl": "5.39", "wps": "363107", "ups": "3.08", "wpb": "117940", "bsz": "256", "num_updates": "146600", "lr": "0.00086202", "gnorm": "0.648", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "47820"} +[2022-08-01 14:46:12,344][train_inner][INFO] - {"epoch": 3, "update": 2.852, "loss": "2.431", "ppl": "5.39", "wps": "366638", "ups": "3.09", "wpb": "118677", "bsz": "256", "num_updates": "146800", "lr": "0.000861818", "gnorm": "0.641", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "47885"} +[2022-08-01 14:47:17,142][train_inner][INFO] - {"epoch": 3, "update": 2.856, "loss": "2.42", "ppl": "5.35", "wps": "364993", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "147000", "lr": "0.000861616", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.5", "wall": "47949"} +[2022-08-01 14:48:21,600][train_inner][INFO] - {"epoch": 3, "update": 2.86, "loss": "2.427", "ppl": "5.38", "wps": "366280", "ups": "3.1", "wpb": "118046", "bsz": "256", "num_updates": "147200", "lr": "0.000861414", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "48014"} +[2022-08-01 14:49:26,550][train_inner][INFO] - {"epoch": 3, "update": 2.864, "loss": "2.432", "ppl": "5.39", "wps": "363529", "ups": "3.08", "wpb": "118055", "bsz": "256", "num_updates": "147400", "lr": "0.000861212", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "48079"} +[2022-08-01 14:50:31,444][train_inner][INFO] - {"epoch": 3, "update": 2.868, "loss": "2.422", "ppl": "5.36", "wps": "365114", "ups": "3.08", "wpb": "118467", "bsz": "256", "num_updates": "147600", "lr": "0.00086101", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.6", "wall": "48144"} +[2022-08-01 14:51:36,246][train_inner][INFO] - {"epoch": 3, "update": 2.872, "loss": "2.424", "ppl": "5.37", "wps": "364788", "ups": "3.09", "wpb": "118194", "bsz": "256", "num_updates": "147800", "lr": "0.000860808", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "48209"} +[2022-08-01 14:52:41,022][train_inner][INFO] - {"epoch": 3, "update": 2.876, "loss": "2.426", "ppl": "5.37", "wps": "365044", "ups": "3.09", "wpb": "118228", "bsz": "256", "num_updates": "148000", "lr": "0.000860606", "gnorm": "0.699", "clip": "0.5", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "48273"} +[2022-08-01 14:53:45,935][train_inner][INFO] - {"epoch": 3, "update": 2.88, "loss": "2.414", "ppl": "5.33", "wps": "365277", "ups": "3.08", "wpb": "118555", "bsz": "256", "num_updates": "148200", "lr": "0.000860404", "gnorm": "0.643", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "48338"} +[2022-08-01 14:54:50,512][train_inner][INFO] - {"epoch": 3, "update": 2.883, "loss": "2.427", "ppl": "5.38", "wps": "366605", "ups": "3.1", "wpb": "118368", "bsz": "256", "num_updates": "148400", "lr": "0.000860202", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25", "wall": "48403"} +[2022-08-01 14:55:55,537][train_inner][INFO] - {"epoch": 3, "update": 2.887, "loss": "2.422", "ppl": "5.36", "wps": "363144", "ups": "3.08", "wpb": "118066", "bsz": "256", "num_updates": "148600", "lr": "0.00086", "gnorm": "0.644", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "48468"} +[2022-08-01 14:57:00,239][train_inner][INFO] - {"epoch": 3, "update": 2.891, "loss": "2.417", "ppl": "5.34", "wps": "366078", "ups": "3.09", "wpb": "118428", "bsz": "256", "num_updates": "148800", "lr": "0.000859798", "gnorm": "0.641", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "48533"} +[2022-08-01 14:58:05,242][train_inner][INFO] - {"epoch": 3, "update": 2.895, "loss": "2.42", "ppl": "5.35", "wps": "364926", "ups": "3.08", "wpb": "118604", "bsz": "256", "num_updates": "149000", "lr": "0.000859596", "gnorm": "0.646", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "48598"} +[2022-08-01 14:59:10,376][train_inner][INFO] - {"epoch": 3, "update": 2.899, "loss": "2.423", "ppl": "5.36", "wps": "364244", "ups": "3.07", "wpb": "118623", "bsz": "256", "num_updates": "149200", "lr": "0.000859394", "gnorm": "0.646", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "48663"} +[2022-08-01 15:00:15,116][train_inner][INFO] - {"epoch": 3, "update": 2.903, "loss": "2.424", "ppl": "5.37", "wps": "365498", "ups": "3.09", "wpb": "118310", "bsz": "256", "num_updates": "149400", "lr": "0.000859192", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "48727"} +[2022-08-01 15:01:19,907][train_inner][INFO] - {"epoch": 3, "update": 2.907, "loss": "2.424", "ppl": "5.37", "wps": "366358", "ups": "3.09", "wpb": "118682", "bsz": "256", "num_updates": "149600", "lr": "0.00085899", "gnorm": "0.645", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "48792"} +[2022-08-01 15:02:25,014][train_inner][INFO] - {"epoch": 3, "update": 2.911, "loss": "2.42", "ppl": "5.35", "wps": "363435", "ups": "3.07", "wpb": "118310", "bsz": "256", "num_updates": "149800", "lr": "0.000858788", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "48857"} +[2022-08-01 15:03:29,865][train_inner][INFO] - {"epoch": 3, "update": 2.914, "loss": "2.425", "ppl": "5.37", "wps": "363904", "ups": "3.08", "wpb": "117997", "bsz": "256", "num_updates": "150000", "lr": "0.000858586", "gnorm": "0.649", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "48922"} +[2022-08-01 15:03:29,867][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 15:03:52,464][valid][INFO] - {"epoch": 3, "valid_loss": "2.314", "valid_ppl": "4.97", "valid_wps": "1.60236e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "150000", "valid_best_loss": "2.314"} +[2022-08-01 15:03:52,467][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 150000 updates +[2022-08-01 15:03:52,468][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_3_150000.pt +[2022-08-01 15:04:02,469][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_3_150000.pt +[2022-08-01 15:04:33,690][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_3_150000.pt (epoch 3 @ 150000 updates, score 2.314) (writing took 41.22356229927391 seconds) +[2022-08-01 15:05:38,294][train_inner][INFO] - {"epoch": 3, "update": 2.918, "loss": "2.418", "ppl": "5.34", "wps": "183907", "ups": "1.56", "wpb": "118093", "bsz": "256", "num_updates": "150200", "lr": "0.000858384", "gnorm": "0.644", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "49051"} +[2022-08-01 15:06:44,541][train_inner][INFO] - {"epoch": 3, "update": 2.922, "loss": "2.418", "ppl": "5.35", "wps": "357204", "ups": "3.02", "wpb": "118318", "bsz": "256", "num_updates": "150400", "lr": "0.000858182", "gnorm": "0.644", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.8", "wall": "49117"} +[2022-08-01 15:07:49,968][train_inner][INFO] - {"epoch": 3, "update": 2.926, "loss": "2.427", "ppl": "5.38", "wps": "359429", "ups": "3.06", "wpb": "117580", "bsz": "256", "num_updates": "150600", "lr": "0.00085798", "gnorm": "0.645", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "49182"} +[2022-08-01 15:08:54,706][train_inner][INFO] - {"epoch": 3, "update": 2.93, "loss": "2.42", "ppl": "5.35", "wps": "366363", "ups": "3.09", "wpb": "118586", "bsz": "256", "num_updates": "150800", "lr": "0.000857778", "gnorm": "0.645", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "49247"} +[2022-08-01 15:09:59,939][train_inner][INFO] - {"epoch": 3, "update": 2.934, "loss": "2.416", "ppl": "5.34", "wps": "363593", "ups": "3.07", "wpb": "118589", "bsz": "256", "num_updates": "151000", "lr": "0.000857576", "gnorm": "0.646", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "49312"} +[2022-08-01 15:11:04,732][train_inner][INFO] - {"epoch": 3, "update": 2.938, "loss": "2.421", "ppl": "5.36", "wps": "364449", "ups": "3.09", "wpb": "118068", "bsz": "256", "num_updates": "151200", "lr": "0.000857374", "gnorm": "0.646", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "49377"} +[2022-08-01 15:11:55,536][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 15:12:09,849][train_inner][INFO] - {"epoch": 3, "update": 2.942, "loss": "2.418", "ppl": "5.34", "wps": "362583", "ups": "3.07", "wpb": "118049", "bsz": "256", "num_updates": "151400", "lr": "0.000857172", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "49442"} +[2022-08-01 15:13:14,883][train_inner][INFO] - {"epoch": 3, "update": 2.946, "loss": "2.414", "ppl": "5.33", "wps": "365483", "ups": "3.08", "wpb": "118842", "bsz": "256", "num_updates": "151600", "lr": "0.00085697", "gnorm": "0.646", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "49507"} +[2022-08-01 15:14:19,784][train_inner][INFO] - {"epoch": 3, "update": 2.949, "loss": "2.419", "ppl": "5.35", "wps": "364321", "ups": "3.08", "wpb": "118223", "bsz": "256", "num_updates": "151800", "lr": "0.000856768", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "49572"} +[2022-08-01 15:15:24,559][train_inner][INFO] - {"epoch": 3, "update": 2.953, "loss": "2.419", "ppl": "5.35", "wps": "364797", "ups": "3.09", "wpb": "118147", "bsz": "256", "num_updates": "152000", "lr": "0.000856566", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "49637"} +[2022-08-01 15:15:48,728][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:16:30,021][train_inner][INFO] - {"epoch": 3, "update": 2.957, "loss": "2.418", "ppl": "5.34", "wps": "363505", "ups": "3.06", "wpb": "118978", "bsz": "256", "num_updates": "152200", "lr": "0.000856364", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "49702"} +[2022-08-01 15:17:34,977][train_inner][INFO] - {"epoch": 3, "update": 2.961, "loss": "2.415", "ppl": "5.33", "wps": "364485", "ups": "3.08", "wpb": "118375", "bsz": "256", "num_updates": "152400", "lr": "0.000856162", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "49767"} +[2022-08-01 15:18:40,813][train_inner][INFO] - {"epoch": 3, "update": 2.965, "loss": "2.416", "ppl": "5.34", "wps": "360823", "ups": "3.04", "wpb": "118774", "bsz": "256", "num_updates": "152600", "lr": "0.00085596", "gnorm": "0.644", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "49833"} +[2022-08-01 15:19:45,901][train_inner][INFO] - {"epoch": 3, "update": 2.969, "loss": "2.426", "ppl": "5.37", "wps": "362250", "ups": "3.07", "wpb": "117889", "bsz": "256", "num_updates": "152800", "lr": "0.000855758", "gnorm": "0.651", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "49898"} +[2022-08-01 15:20:50,585][train_inner][INFO] - {"epoch": 3, "update": 2.973, "loss": "2.422", "ppl": "5.36", "wps": "364682", "ups": "3.09", "wpb": "117943", "bsz": "256", "num_updates": "153000", "lr": "0.000855556", "gnorm": "0.646", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.4", "wall": "49963"} +[2022-08-01 15:21:55,769][train_inner][INFO] - {"epoch": 3, "update": 2.977, "loss": "2.415", "ppl": "5.33", "wps": "362934", "ups": "3.07", "wpb": "118287", "bsz": "256", "num_updates": "153200", "lr": "0.000855354", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "50028"} +[2022-08-01 15:23:00,788][train_inner][INFO] - {"epoch": 3, "update": 2.981, "loss": "2.41", "ppl": "5.32", "wps": "364460", "ups": "3.08", "wpb": "118481", "bsz": "256", "num_updates": "153400", "lr": "0.000855152", "gnorm": "0.646", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "50093"} +[2022-08-01 15:24:05,680][train_inner][INFO] - {"epoch": 3, "update": 2.984, "loss": "2.416", "ppl": "5.34", "wps": "364175", "ups": "3.08", "wpb": "118158", "bsz": "256", "num_updates": "153600", "lr": "0.000854949", "gnorm": "0.648", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "50158"} +[2022-08-01 15:25:10,662][train_inner][INFO] - {"epoch": 3, "update": 2.988, "loss": "2.419", "ppl": "5.35", "wps": "363418", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "153800", "lr": "0.000854747", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "50223"} +[2022-08-01 15:26:15,319][train_inner][INFO] - {"epoch": 3, "update": 2.992, "loss": "2.424", "ppl": "5.37", "wps": "364511", "ups": "3.09", "wpb": "117840", "bsz": "256", "num_updates": "154000", "lr": "0.000854545", "gnorm": "0.651", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "50288"} +[2022-08-01 15:27:19,790][train_inner][INFO] - {"epoch": 3, "update": 2.996, "loss": "2.415", "ppl": "5.33", "wps": "366014", "ups": "3.1", "wpb": "117984", "bsz": "256", "num_updates": "154200", "lr": "0.000854343", "gnorm": "0.649", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "50352"} +[2022-08-01 15:28:24,182][train_inner][INFO] - {"epoch": 3, "update": 3.0, "loss": "2.409", "ppl": "5.31", "wps": "367256", "ups": "3.11", "wpb": "118241", "bsz": "256", "num_updates": "154400", "lr": "0.000854141", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.7", "wall": "50417"} +[2022-08-01 15:28:24,792][fairseq_cli.train][INFO] - end of epoch 3 (average epoch stats below) +[2022-08-01 15:28:24,792][train][INFO] - {"epoch": 3, "train_loss": "2.454", "train_ppl": "5.48", "train_wps": "362564", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "154402", "train_lr": "0.000854139", "train_gnorm": "0.641", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16628", "train_gb_free": "19.7", "train_wall": "50417"} +[2022-08-01 15:28:24,884][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 15:28:24,887][fairseq.trainer][INFO] - begin training epoch 4 +[2022-08-01 15:28:24,887][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 15:29:40,152][train_inner][INFO] - {"epoch": 4, "update": 3.004, "loss": "2.413", "ppl": "5.33", "wps": "310648", "ups": "2.63", "wpb": "117997", "bsz": "255.4", "num_updates": "154600", "lr": "0.000853939", "gnorm": "0.651", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "50492"} +[2022-08-01 15:30:45,288][train_inner][INFO] - {"epoch": 4, "update": 3.008, "loss": "2.417", "ppl": "5.34", "wps": "362762", "ups": "3.07", "wpb": "118143", "bsz": "256", "num_updates": "154800", "lr": "0.000853737", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "50558"} +[2022-08-01 15:31:50,074][train_inner][INFO] - {"epoch": 4, "update": 3.012, "loss": "2.408", "ppl": "5.31", "wps": "364749", "ups": "3.09", "wpb": "118150", "bsz": "256", "num_updates": "155000", "lr": "0.000853535", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "50622"} +[2022-08-01 15:32:55,075][train_inner][INFO] - {"epoch": 4, "update": 3.015, "loss": "2.412", "ppl": "5.32", "wps": "364393", "ups": "3.08", "wpb": "118428", "bsz": "256", "num_updates": "155200", "lr": "0.000853333", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "50687"} +[2022-08-01 15:34:00,305][train_inner][INFO] - {"epoch": 4, "update": 3.019, "loss": "2.407", "ppl": "5.31", "wps": "363522", "ups": "3.07", "wpb": "118562", "bsz": "256", "num_updates": "155400", "lr": "0.000853131", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "50753"} +[2022-08-01 15:34:40,243][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 15:35:05,441][train_inner][INFO] - {"epoch": 4, "update": 3.023, "loss": "2.408", "ppl": "5.31", "wps": "363704", "ups": "3.07", "wpb": "118449", "bsz": "256", "num_updates": "155600", "lr": "0.000852929", "gnorm": "0.659", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26", "wall": "50818"} +[2022-08-01 15:36:10,346][train_inner][INFO] - {"epoch": 4, "update": 3.027, "loss": "2.414", "ppl": "5.33", "wps": "364192", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "155800", "lr": "0.000852727", "gnorm": "0.649", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "50883"} +[2022-08-01 15:37:15,357][train_inner][INFO] - {"epoch": 4, "update": 3.031, "loss": "2.411", "ppl": "5.32", "wps": "365680", "ups": "3.08", "wpb": "118865", "bsz": "256", "num_updates": "156000", "lr": "0.000852525", "gnorm": "0.658", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "50948"} +[2022-08-01 15:38:20,325][train_inner][INFO] - {"epoch": 4, "update": 3.035, "loss": "2.405", "ppl": "5.3", "wps": "365202", "ups": "3.08", "wpb": "118631", "bsz": "256", "num_updates": "156200", "lr": "0.000852323", "gnorm": "0.649", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "51013"} +[2022-08-01 15:39:25,394][train_inner][INFO] - {"epoch": 4, "update": 3.039, "loss": "2.411", "ppl": "5.32", "wps": "363022", "ups": "3.07", "wpb": "118105", "bsz": "256", "num_updates": "156400", "lr": "0.000852121", "gnorm": "0.649", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "51078"} +[2022-08-01 15:40:30,135][train_inner][INFO] - {"epoch": 4, "update": 3.043, "loss": "2.405", "ppl": "5.3", "wps": "368103", "ups": "3.09", "wpb": "119156", "bsz": "256", "num_updates": "156600", "lr": "0.000851919", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26", "wall": "51142"} +[2022-08-01 15:41:35,173][train_inner][INFO] - {"epoch": 4, "update": 3.047, "loss": "2.411", "ppl": "5.32", "wps": "361267", "ups": "3.08", "wpb": "117479", "bsz": "256", "num_updates": "156800", "lr": "0.000851717", "gnorm": "0.651", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "51207"} +[2022-08-01 15:42:40,078][train_inner][INFO] - {"epoch": 4, "update": 3.05, "loss": "2.41", "ppl": "5.31", "wps": "364133", "ups": "3.08", "wpb": "118167", "bsz": "256", "num_updates": "157000", "lr": "0.000851515", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.2", "wall": "51272"} +[2022-08-01 15:43:45,307][train_inner][INFO] - {"epoch": 4, "update": 3.054, "loss": "2.412", "ppl": "5.32", "wps": "366186", "ups": "3.07", "wpb": "119429", "bsz": "256", "num_updates": "157200", "lr": "0.000851313", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "51338"} +[2022-08-01 15:44:50,124][train_inner][INFO] - {"epoch": 4, "update": 3.058, "loss": "2.408", "ppl": "5.31", "wps": "365609", "ups": "3.09", "wpb": "118486", "bsz": "256", "num_updates": "157400", "lr": "0.000851111", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "51402"} +[2022-08-01 15:45:54,813][train_inner][INFO] - {"epoch": 4, "update": 3.062, "loss": "2.403", "ppl": "5.29", "wps": "366880", "ups": "3.09", "wpb": "118664", "bsz": "256", "num_updates": "157600", "lr": "0.000850909", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "51467"} +[2022-08-01 15:46:59,882][train_inner][INFO] - {"epoch": 4, "update": 3.066, "loss": "2.411", "ppl": "5.32", "wps": "364479", "ups": "3.07", "wpb": "118581", "bsz": "256", "num_updates": "157800", "lr": "0.000850707", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "51532"} +[2022-08-01 15:48:05,212][train_inner][INFO] - {"epoch": 4, "update": 3.07, "loss": "2.408", "ppl": "5.31", "wps": "362087", "ups": "3.06", "wpb": "118274", "bsz": "256", "num_updates": "158000", "lr": "0.000850505", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.9", "wall": "51598"} +[2022-08-01 15:49:10,174][train_inner][INFO] - {"epoch": 4, "update": 3.074, "loss": "2.413", "ppl": "5.33", "wps": "361554", "ups": "3.08", "wpb": "117434", "bsz": "256", "num_updates": "158200", "lr": "0.000850303", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "51662"} +[2022-08-01 15:50:15,000][train_inner][INFO] - {"epoch": 4, "update": 3.078, "loss": "2.415", "ppl": "5.33", "wps": "364932", "ups": "3.09", "wpb": "118285", "bsz": "256", "num_updates": "158400", "lr": "0.000850101", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "51727"} +[2022-08-01 15:51:19,966][train_inner][INFO] - {"epoch": 4, "update": 3.082, "loss": "2.414", "ppl": "5.33", "wps": "362867", "ups": "3.08", "wpb": "117868", "bsz": "256", "num_updates": "158600", "lr": "0.000849899", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "51792"} +[2022-08-01 15:52:24,597][train_inner][INFO] - {"epoch": 4, "update": 3.085, "loss": "2.408", "ppl": "5.31", "wps": "366190", "ups": "3.09", "wpb": "118333", "bsz": "256", "num_updates": "158800", "lr": "0.000849697", "gnorm": "0.649", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "51857"} +[2022-08-01 15:53:29,326][train_inner][INFO] - {"epoch": 4, "update": 3.089, "loss": "2.405", "ppl": "5.3", "wps": "366110", "ups": "3.09", "wpb": "118488", "bsz": "256", "num_updates": "159000", "lr": "0.000849495", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "51922"} +[2022-08-01 15:54:34,215][train_inner][INFO] - {"epoch": 4, "update": 3.093, "loss": "2.406", "ppl": "5.3", "wps": "365678", "ups": "3.08", "wpb": "118641", "bsz": "256", "num_updates": "159200", "lr": "0.000849293", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "51987"} +[2022-08-01 15:55:38,866][train_inner][INFO] - {"epoch": 4, "update": 3.097, "loss": "2.413", "ppl": "5.32", "wps": "364508", "ups": "3.09", "wpb": "117826", "bsz": "256", "num_updates": "159400", "lr": "0.000849091", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "52051"} +[2022-08-01 15:56:43,566][train_inner][INFO] - {"epoch": 4, "update": 3.101, "loss": "2.41", "ppl": "5.32", "wps": "365181", "ups": "3.09", "wpb": "118136", "bsz": "256", "num_updates": "159600", "lr": "0.000848889", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.7", "wall": "52116"} +[2022-08-01 15:57:48,401][train_inner][INFO] - {"epoch": 4, "update": 3.105, "loss": "2.402", "ppl": "5.28", "wps": "365171", "ups": "3.08", "wpb": "118377", "bsz": "256", "num_updates": "159800", "lr": "0.000848687", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "52181"} +[2022-08-01 15:58:53,048][train_inner][INFO] - {"epoch": 4, "update": 3.109, "loss": "2.404", "ppl": "5.29", "wps": "366782", "ups": "3.09", "wpb": "118556", "bsz": "256", "num_updates": "160000", "lr": "0.000848485", "gnorm": "0.649", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "52245"} +[2022-08-01 15:59:57,788][train_inner][INFO] - {"epoch": 4, "update": 3.113, "loss": "2.409", "ppl": "5.31", "wps": "364420", "ups": "3.09", "wpb": "117961", "bsz": "256", "num_updates": "160200", "lr": "0.000848283", "gnorm": "0.659", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25", "wall": "52310"} +[2022-08-01 16:01:02,478][train_inner][INFO] - {"epoch": 4, "update": 3.117, "loss": "2.402", "ppl": "5.28", "wps": "365482", "ups": "3.09", "wpb": "118214", "bsz": "256", "num_updates": "160400", "lr": "0.000848081", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "52375"} +[2022-08-01 16:02:06,777][train_inner][INFO] - {"epoch": 4, "update": 3.12, "loss": "2.41", "ppl": "5.31", "wps": "367467", "ups": "3.11", "wpb": "118136", "bsz": "256", "num_updates": "160600", "lr": "0.000847879", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "52439"} +[2022-08-01 16:03:11,797][train_inner][INFO] - {"epoch": 4, "update": 3.124, "loss": "2.406", "ppl": "5.3", "wps": "363572", "ups": "3.08", "wpb": "118197", "bsz": "256", "num_updates": "160800", "lr": "0.000847677", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "52504"} +[2022-08-01 16:04:17,463][train_inner][INFO] - {"epoch": 4, "update": 3.128, "loss": "2.41", "ppl": "5.32", "wps": "359702", "ups": "3.05", "wpb": "118098", "bsz": "256", "num_updates": "161000", "lr": "0.000847475", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "52570"} +[2022-08-01 16:05:22,271][train_inner][INFO] - {"epoch": 4, "update": 3.132, "loss": "2.402", "ppl": "5.29", "wps": "365474", "ups": "3.09", "wpb": "118428", "bsz": "256", "num_updates": "161200", "lr": "0.000847273", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "52635"} +[2022-08-01 16:06:27,239][train_inner][INFO] - {"epoch": 4, "update": 3.136, "loss": "2.404", "ppl": "5.29", "wps": "363539", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "161400", "lr": "0.000847071", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "52700"} +[2022-08-01 16:07:32,110][train_inner][INFO] - {"epoch": 4, "update": 3.14, "loss": "2.406", "ppl": "5.3", "wps": "365331", "ups": "3.08", "wpb": "118494", "bsz": "256", "num_updates": "161600", "lr": "0.000846869", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "52764"} +[2022-08-01 16:08:37,340][train_inner][INFO] - {"epoch": 4, "update": 3.144, "loss": "2.406", "ppl": "5.3", "wps": "364837", "ups": "3.07", "wpb": "118990", "bsz": "256", "num_updates": "161800", "lr": "0.000846667", "gnorm": "0.651", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.7", "wall": "52830"} +[2022-08-01 16:08:48,474][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 16:09:42,709][train_inner][INFO] - {"epoch": 4, "update": 3.148, "loss": "2.403", "ppl": "5.29", "wps": "362916", "ups": "3.06", "wpb": "118615", "bsz": "256", "num_updates": "162000", "lr": "0.000846465", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "52895"} +[2022-08-01 16:10:47,175][train_inner][INFO] - {"epoch": 4, "update": 3.151, "loss": "2.413", "ppl": "5.32", "wps": "365875", "ups": "3.1", "wpb": "117932", "bsz": "256", "num_updates": "162200", "lr": "0.000846263", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "52959"} +[2022-08-01 16:11:52,323][train_inner][INFO] - {"epoch": 4, "update": 3.155, "loss": "2.404", "ppl": "5.29", "wps": "364015", "ups": "3.07", "wpb": "118572", "bsz": "256", "num_updates": "162400", "lr": "0.000846061", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "53025"} +[2022-08-01 16:12:57,065][train_inner][INFO] - {"epoch": 4, "update": 3.159, "loss": "2.402", "ppl": "5.28", "wps": "366173", "ups": "3.09", "wpb": "118533", "bsz": "256", "num_updates": "162600", "lr": "0.000845859", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.6", "wall": "53089"} +[2022-08-01 16:13:53,874][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 16:14:02,354][train_inner][INFO] - {"epoch": 4, "update": 3.163, "loss": "2.4", "ppl": "5.28", "wps": "362538", "ups": "3.06", "wpb": "118346", "bsz": "256", "num_updates": "162800", "lr": "0.000845657", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "53155"} +[2022-08-01 16:15:07,491][train_inner][INFO] - {"epoch": 4, "update": 3.167, "loss": "2.4", "ppl": "5.28", "wps": "362233", "ups": "3.07", "wpb": "117973", "bsz": "256", "num_updates": "163000", "lr": "0.000845455", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "53220"} +[2022-08-01 16:16:12,582][train_inner][INFO] - {"epoch": 4, "update": 3.171, "loss": "2.4", "ppl": "5.28", "wps": "363003", "ups": "3.07", "wpb": "118140", "bsz": "256", "num_updates": "163200", "lr": "0.000845253", "gnorm": "0.653", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "53285"} +[2022-08-01 16:17:18,619][train_inner][INFO] - {"epoch": 4, "update": 3.175, "loss": "2.397", "ppl": "5.27", "wps": "357314", "ups": "3.03", "wpb": "117978", "bsz": "256", "num_updates": "163400", "lr": "0.000845051", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "22.1", "wall": "53351"} +[2022-08-01 16:18:23,676][train_inner][INFO] - {"epoch": 4, "update": 3.179, "loss": "2.401", "ppl": "5.28", "wps": "363920", "ups": "3.07", "wpb": "118376", "bsz": "256", "num_updates": "163600", "lr": "0.000844848", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "53416"} +[2022-08-01 16:19:28,693][train_inner][INFO] - {"epoch": 4, "update": 3.183, "loss": "2.405", "ppl": "5.3", "wps": "362741", "ups": "3.08", "wpb": "117919", "bsz": "256", "num_updates": "163800", "lr": "0.000844646", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "53481"} +[2022-08-01 16:20:34,018][train_inner][INFO] - {"epoch": 4, "update": 3.186, "loss": "2.406", "ppl": "5.3", "wps": "361393", "ups": "3.06", "wpb": "118039", "bsz": "256", "num_updates": "164000", "lr": "0.000844444", "gnorm": "0.653", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.1", "wall": "53546"} +[2022-08-01 16:21:39,280][train_inner][INFO] - {"epoch": 4, "update": 3.19, "loss": "2.399", "ppl": "5.27", "wps": "364190", "ups": "3.06", "wpb": "118837", "bsz": "256", "num_updates": "164200", "lr": "0.000844242", "gnorm": "0.652", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "53612"} +[2022-08-01 16:22:44,114][train_inner][INFO] - {"epoch": 4, "update": 3.194, "loss": "2.399", "ppl": "5.28", "wps": "365102", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "164400", "lr": "0.00084404", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "53676"} +[2022-08-01 16:23:49,389][train_inner][INFO] - {"epoch": 4, "update": 3.198, "loss": "2.403", "ppl": "5.29", "wps": "363121", "ups": "3.06", "wpb": "118513", "bsz": "256", "num_updates": "164600", "lr": "0.000843838", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "53742"} +[2022-08-01 16:24:53,813][train_inner][INFO] - {"epoch": 4, "update": 3.202, "loss": "2.397", "ppl": "5.27", "wps": "366766", "ups": "3.1", "wpb": "118141", "bsz": "256", "num_updates": "164800", "lr": "0.000843636", "gnorm": "0.655", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "53806"} +[2022-08-01 16:25:58,535][train_inner][INFO] - {"epoch": 4, "update": 3.206, "loss": "2.403", "ppl": "5.29", "wps": "364039", "ups": "3.09", "wpb": "117805", "bsz": "256", "num_updates": "165000", "lr": "0.000843434", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25", "wall": "53871"} +[2022-08-01 16:27:03,608][train_inner][INFO] - {"epoch": 4, "update": 3.21, "loss": "2.407", "ppl": "5.3", "wps": "363678", "ups": "3.07", "wpb": "118326", "bsz": "256", "num_updates": "165200", "lr": "0.000843232", "gnorm": "0.657", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "53936"} +[2022-08-01 16:28:08,585][train_inner][INFO] - {"epoch": 4, "update": 3.214, "loss": "2.393", "ppl": "5.25", "wps": "364389", "ups": "3.08", "wpb": "118384", "bsz": "256", "num_updates": "165400", "lr": "0.00084303", "gnorm": "0.653", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.1", "wall": "54001"} +[2022-08-01 16:29:13,402][train_inner][INFO] - {"epoch": 4, "update": 3.218, "loss": "2.404", "ppl": "5.29", "wps": "365848", "ups": "3.09", "wpb": "118563", "bsz": "256", "num_updates": "165600", "lr": "0.000842828", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "54066"} +[2022-08-01 16:30:17,889][train_inner][INFO] - {"epoch": 4, "update": 3.221, "loss": "2.398", "ppl": "5.27", "wps": "367013", "ups": "3.1", "wpb": "118337", "bsz": "256", "num_updates": "165800", "lr": "0.000842626", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "54130"} +[2022-08-01 16:31:22,694][train_inner][INFO] - {"epoch": 4, "update": 3.225, "loss": "2.397", "ppl": "5.27", "wps": "366561", "ups": "3.09", "wpb": "118773", "bsz": "256", "num_updates": "166000", "lr": "0.000842424", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "54195"} +[2022-08-01 16:32:27,375][train_inner][INFO] - {"epoch": 4, "update": 3.229, "loss": "2.405", "ppl": "5.3", "wps": "363429", "ups": "3.09", "wpb": "117533", "bsz": "256", "num_updates": "166200", "lr": "0.000842222", "gnorm": "0.663", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "54260"} +[2022-08-01 16:33:32,199][train_inner][INFO] - {"epoch": 4, "update": 3.233, "loss": "2.398", "ppl": "5.27", "wps": "366367", "ups": "3.09", "wpb": "118745", "bsz": "256", "num_updates": "166400", "lr": "0.00084202", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "54325"} +[2022-08-01 16:34:36,999][train_inner][INFO] - {"epoch": 4, "update": 3.237, "loss": "2.401", "ppl": "5.28", "wps": "364960", "ups": "3.09", "wpb": "118244", "bsz": "256", "num_updates": "166600", "lr": "0.000841818", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "54389"} +[2022-08-01 16:34:41,819][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 16:35:42,096][train_inner][INFO] - {"epoch": 4, "update": 3.241, "loss": "2.403", "ppl": "5.29", "wps": "363070", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "166800", "lr": "0.000841616", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "54454"} +[2022-08-01 16:36:47,016][train_inner][INFO] - {"epoch": 4, "update": 3.245, "loss": "2.398", "ppl": "5.27", "wps": "365165", "ups": "3.08", "wpb": "118531", "bsz": "255.9", "num_updates": "167000", "lr": "0.000841414", "gnorm": "0.655", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "54519"} +[2022-08-01 16:37:51,754][train_inner][INFO] - {"epoch": 4, "update": 3.249, "loss": "2.396", "ppl": "5.26", "wps": "366560", "ups": "3.09", "wpb": "118650", "bsz": "256", "num_updates": "167200", "lr": "0.000841212", "gnorm": "0.653", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "54584"} +[2022-08-01 16:38:56,452][train_inner][INFO] - {"epoch": 4, "update": 3.253, "loss": "2.4", "ppl": "5.28", "wps": "365648", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "167400", "lr": "0.00084101", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "54649"} +[2022-08-01 16:40:01,515][train_inner][INFO] - {"epoch": 4, "update": 3.256, "loss": "2.397", "ppl": "5.27", "wps": "363700", "ups": "3.07", "wpb": "118314", "bsz": "256", "num_updates": "167600", "lr": "0.000840808", "gnorm": "0.657", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "54714"} +[2022-08-01 16:41:05,976][train_inner][INFO] - {"epoch": 4, "update": 3.26, "loss": "2.402", "ppl": "5.29", "wps": "366754", "ups": "3.1", "wpb": "118205", "bsz": "256", "num_updates": "167800", "lr": "0.000840606", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.9", "wall": "54778"} +[2022-08-01 16:42:10,775][train_inner][INFO] - {"epoch": 4, "update": 3.264, "loss": "2.395", "ppl": "5.26", "wps": "364703", "ups": "3.09", "wpb": "118160", "bsz": "256", "num_updates": "168000", "lr": "0.000840404", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "54843"} +[2022-08-01 16:43:15,178][train_inner][INFO] - {"epoch": 4, "update": 3.268, "loss": "2.399", "ppl": "5.28", "wps": "367045", "ups": "3.11", "wpb": "118192", "bsz": "256", "num_updates": "168200", "lr": "0.000840202", "gnorm": "0.655", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "54907"} +[2022-08-01 16:44:20,115][train_inner][INFO] - {"epoch": 4, "update": 3.272, "loss": "2.404", "ppl": "5.29", "wps": "363168", "ups": "3.08", "wpb": "117915", "bsz": "256", "num_updates": "168400", "lr": "0.00084", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "54972"} +[2022-08-01 16:45:24,918][train_inner][INFO] - {"epoch": 4, "update": 3.276, "loss": "2.402", "ppl": "5.29", "wps": "364855", "ups": "3.09", "wpb": "118216", "bsz": "256", "num_updates": "168600", "lr": "0.000839798", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "55037"} +[2022-08-01 16:46:29,991][train_inner][INFO] - {"epoch": 4, "update": 3.28, "loss": "2.388", "ppl": "5.24", "wps": "363352", "ups": "3.07", "wpb": "118221", "bsz": "256", "num_updates": "168800", "lr": "0.000839596", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "55102"} +[2022-08-01 16:47:34,856][train_inner][INFO] - {"epoch": 4, "update": 3.284, "loss": "2.387", "ppl": "5.23", "wps": "365862", "ups": "3.08", "wpb": "118656", "bsz": "256", "num_updates": "169000", "lr": "0.000839394", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "55167"} +[2022-08-01 16:48:39,502][train_inner][INFO] - {"epoch": 4, "update": 3.287, "loss": "2.394", "ppl": "5.25", "wps": "366136", "ups": "3.09", "wpb": "118345", "bsz": "256", "num_updates": "169200", "lr": "0.000839192", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "55232"} +[2022-08-01 16:49:44,021][train_inner][INFO] - {"epoch": 4, "update": 3.291, "loss": "2.398", "ppl": "5.27", "wps": "366969", "ups": "3.1", "wpb": "118379", "bsz": "256", "num_updates": "169400", "lr": "0.00083899", "gnorm": "0.658", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "55296"} +[2022-08-01 16:50:48,948][train_inner][INFO] - {"epoch": 4, "update": 3.295, "loss": "2.394", "ppl": "5.26", "wps": "363266", "ups": "3.08", "wpb": "117927", "bsz": "256", "num_updates": "169600", "lr": "0.000838788", "gnorm": "0.658", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "55361"} +[2022-08-01 16:51:53,817][train_inner][INFO] - {"epoch": 4, "update": 3.299, "loss": "2.394", "ppl": "5.25", "wps": "364316", "ups": "3.08", "wpb": "118164", "bsz": "256", "num_updates": "169800", "lr": "0.000838586", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "55426"} +[2022-08-01 16:52:58,589][train_inner][INFO] - {"epoch": 4, "update": 3.303, "loss": "2.397", "ppl": "5.27", "wps": "366416", "ups": "3.09", "wpb": "118665", "bsz": "256", "num_updates": "170000", "lr": "0.000838384", "gnorm": "0.66", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "55491"} +[2022-08-01 16:54:03,646][train_inner][INFO] - {"epoch": 4, "update": 3.307, "loss": "2.393", "ppl": "5.25", "wps": "364267", "ups": "3.07", "wpb": "118488", "bsz": "256", "num_updates": "170200", "lr": "0.000838182", "gnorm": "0.658", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "55556"} +[2022-08-01 16:54:15,162][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 16:55:08,929][train_inner][INFO] - {"epoch": 4, "update": 3.311, "loss": "2.392", "ppl": "5.25", "wps": "363740", "ups": "3.06", "wpb": "118730", "bsz": "256", "num_updates": "170400", "lr": "0.00083798", "gnorm": "0.657", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "55621"} +[2022-08-01 16:56:13,967][train_inner][INFO] - {"epoch": 4, "update": 3.315, "loss": "2.397", "ppl": "5.27", "wps": "363417", "ups": "3.08", "wpb": "118178", "bsz": "256", "num_updates": "170600", "lr": "0.000837778", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "55686"} +[2022-08-01 16:57:18,858][train_inner][INFO] - {"epoch": 4, "update": 3.319, "loss": "2.393", "ppl": "5.25", "wps": "361463", "ups": "3.08", "wpb": "117277", "bsz": "256", "num_updates": "170800", "lr": "0.000837576", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "55751"} +[2022-08-01 16:58:23,656][train_inner][INFO] - {"epoch": 4, "update": 3.322, "loss": "2.399", "ppl": "5.27", "wps": "364081", "ups": "3.09", "wpb": "117958", "bsz": "256", "num_updates": "171000", "lr": "0.000837374", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "55816"} +[2022-08-01 16:59:28,403][train_inner][INFO] - {"epoch": 4, "update": 3.326, "loss": "2.399", "ppl": "5.27", "wps": "364829", "ups": "3.09", "wpb": "118106", "bsz": "256", "num_updates": "171200", "lr": "0.000837172", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "55881"} +[2022-08-01 17:00:33,207][train_inner][INFO] - {"epoch": 4, "update": 3.33, "loss": "2.39", "ppl": "5.24", "wps": "366318", "ups": "3.09", "wpb": "118693", "bsz": "256", "num_updates": "171400", "lr": "0.00083697", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "55946"} +[2022-08-01 17:01:38,227][train_inner][INFO] - {"epoch": 4, "update": 3.334, "loss": "2.398", "ppl": "5.27", "wps": "362437", "ups": "3.08", "wpb": "117826", "bsz": "256", "num_updates": "171600", "lr": "0.000836768", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "56011"} +[2022-08-01 17:02:43,115][train_inner][INFO] - {"epoch": 4, "update": 3.338, "loss": "2.391", "ppl": "5.24", "wps": "363963", "ups": "3.08", "wpb": "118083", "bsz": "256", "num_updates": "171800", "lr": "0.000836566", "gnorm": "0.661", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "56075"} +[2022-08-01 17:03:48,311][train_inner][INFO] - {"epoch": 4, "update": 3.342, "loss": "2.395", "ppl": "5.26", "wps": "359581", "ups": "3.07", "wpb": "117216", "bsz": "256", "num_updates": "172000", "lr": "0.000836364", "gnorm": "0.661", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "56141"} +[2022-08-01 17:04:53,343][train_inner][INFO] - {"epoch": 4, "update": 3.346, "loss": "2.393", "ppl": "5.25", "wps": "363695", "ups": "3.08", "wpb": "118257", "bsz": "256", "num_updates": "172200", "lr": "0.000836162", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.6", "wall": "56206"} +[2022-08-01 17:05:58,431][train_inner][INFO] - {"epoch": 4, "update": 3.35, "loss": "2.394", "ppl": "5.26", "wps": "363104", "ups": "3.07", "wpb": "118166", "bsz": "256", "num_updates": "172400", "lr": "0.00083596", "gnorm": "0.659", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "56271"} +[2022-08-01 17:07:03,231][train_inner][INFO] - {"epoch": 4, "update": 3.354, "loss": "2.393", "ppl": "5.25", "wps": "366052", "ups": "3.09", "wpb": "118599", "bsz": "256", "num_updates": "172600", "lr": "0.000835758", "gnorm": "0.661", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "56336"} +[2022-08-01 17:07:54,180][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 17:08:08,217][train_inner][INFO] - {"epoch": 4, "update": 3.357, "loss": "2.391", "ppl": "5.25", "wps": "364488", "ups": "3.08", "wpb": "118433", "bsz": "256", "num_updates": "172800", "lr": "0.000835556", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "56401"} +[2022-08-01 17:09:13,156][train_inner][INFO] - {"epoch": 4, "update": 3.361, "loss": "2.399", "ppl": "5.27", "wps": "364823", "ups": "3.08", "wpb": "118454", "bsz": "256", "num_updates": "173000", "lr": "0.000835354", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "56465"} +[2022-08-01 17:10:18,641][train_inner][INFO] - {"epoch": 4, "update": 3.365, "loss": "2.389", "ppl": "5.24", "wps": "361810", "ups": "3.05", "wpb": "118465", "bsz": "256", "num_updates": "173200", "lr": "0.000835152", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "56531"} +[2022-08-01 17:11:23,339][train_inner][INFO] - {"epoch": 4, "update": 3.369, "loss": "2.388", "ppl": "5.23", "wps": "365094", "ups": "3.09", "wpb": "118103", "bsz": "256", "num_updates": "173400", "lr": "0.000834949", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.8", "wall": "56596"} +[2022-08-01 17:12:28,028][train_inner][INFO] - {"epoch": 4, "update": 3.373, "loss": "2.393", "ppl": "5.25", "wps": "366419", "ups": "3.09", "wpb": "118515", "bsz": "256", "num_updates": "173600", "lr": "0.000834747", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "56660"} +[2022-08-01 17:13:32,802][train_inner][INFO] - {"epoch": 4, "update": 3.377, "loss": "2.386", "ppl": "5.23", "wps": "367155", "ups": "3.09", "wpb": "118908", "bsz": "256", "num_updates": "173800", "lr": "0.000834545", "gnorm": "0.657", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "56725"} +[2022-08-01 17:14:37,596][train_inner][INFO] - {"epoch": 4, "update": 3.381, "loss": "2.387", "ppl": "5.23", "wps": "366171", "ups": "3.09", "wpb": "118627", "bsz": "256", "num_updates": "174000", "lr": "0.000834343", "gnorm": "0.661", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "56790"} +[2022-08-01 17:15:42,700][train_inner][INFO] - {"epoch": 4, "update": 3.385, "loss": "2.395", "ppl": "5.26", "wps": "362916", "ups": "3.07", "wpb": "118134", "bsz": "256", "num_updates": "174200", "lr": "0.000834141", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "56855"} +[2022-08-01 17:16:47,199][train_inner][INFO] - {"epoch": 4, "update": 3.389, "loss": "2.386", "ppl": "5.23", "wps": "364762", "ups": "3.1", "wpb": "117633", "bsz": "256", "num_updates": "174400", "lr": "0.000833939", "gnorm": "0.662", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "56920"} +[2022-08-01 17:17:51,828][train_inner][INFO] - {"epoch": 4, "update": 3.392, "loss": "2.396", "ppl": "5.26", "wps": "363746", "ups": "3.09", "wpb": "117540", "bsz": "256", "num_updates": "174600", "lr": "0.000833737", "gnorm": "0.662", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "56984"} +[2022-08-01 17:18:56,253][train_inner][INFO] - {"epoch": 4, "update": 3.396, "loss": "2.392", "ppl": "5.25", "wps": "367028", "ups": "3.1", "wpb": "118229", "bsz": "256", "num_updates": "174800", "lr": "0.000833535", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "57049"} +[2022-08-01 17:20:01,173][train_inner][INFO] - {"epoch": 4, "update": 3.4, "loss": "2.393", "ppl": "5.25", "wps": "365820", "ups": "3.08", "wpb": "118742", "bsz": "256", "num_updates": "175000", "lr": "0.000833333", "gnorm": "0.657", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "57113"} +[2022-08-01 17:21:06,043][train_inner][INFO] - {"epoch": 4, "update": 3.404, "loss": "2.39", "ppl": "5.24", "wps": "364063", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "175200", "lr": "0.000833131", "gnorm": "0.663", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "57178"} +[2022-08-01 17:21:31,021][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 17:21:51,929][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 17:22:11,419][train_inner][INFO] - {"epoch": 4, "update": 3.408, "loss": "2.391", "ppl": "5.24", "wps": "361346", "ups": "3.06", "wpb": "118116", "bsz": "256", "num_updates": "175400", "lr": "0.000832929", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.7", "wall": "57244"} +[2022-08-01 17:23:16,368][train_inner][INFO] - {"epoch": 4, "update": 3.412, "loss": "2.393", "ppl": "5.25", "wps": "362175", "ups": "3.08", "wpb": "117614", "bsz": "256", "num_updates": "175600", "lr": "0.000832727", "gnorm": "0.661", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "57309"} +[2022-08-01 17:24:21,262][train_inner][INFO] - {"epoch": 4, "update": 3.416, "loss": "2.388", "ppl": "5.23", "wps": "364573", "ups": "3.08", "wpb": "118290", "bsz": "256", "num_updates": "175800", "lr": "0.000832525", "gnorm": "0.66", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "57374"} +[2022-08-01 17:24:56,562][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 17:25:26,549][train_inner][INFO] - {"epoch": 4, "update": 3.42, "loss": "2.379", "ppl": "5.2", "wps": "362879", "ups": "3.06", "wpb": "118455", "bsz": "256", "num_updates": "176000", "lr": "0.000832323", "gnorm": "0.659", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.1", "wall": "57439"} +[2022-08-01 17:26:31,343][train_inner][INFO] - {"epoch": 4, "update": 3.424, "loss": "2.39", "ppl": "5.24", "wps": "365928", "ups": "3.09", "wpb": "118548", "bsz": "256", "num_updates": "176200", "lr": "0.000832121", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.5", "wall": "57504"} +[2022-08-01 17:27:08,751][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 17:27:36,466][train_inner][INFO] - {"epoch": 4, "update": 3.427, "loss": "2.393", "ppl": "5.25", "wps": "362488", "ups": "3.07", "wpb": "118031", "bsz": "256", "num_updates": "176400", "lr": "0.000831919", "gnorm": "0.663", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "57569"} +[2022-08-01 17:28:41,275][train_inner][INFO] - {"epoch": 4, "update": 3.431, "loss": "2.388", "ppl": "5.23", "wps": "365801", "ups": "3.09", "wpb": "118534", "bsz": "256", "num_updates": "176600", "lr": "0.000831717", "gnorm": "0.661", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.9", "wall": "57634"} +[2022-08-01 17:29:46,158][train_inner][INFO] - {"epoch": 4, "update": 3.435, "loss": "2.384", "ppl": "5.22", "wps": "365286", "ups": "3.08", "wpb": "118503", "bsz": "256", "num_updates": "176800", "lr": "0.000831515", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.4", "wall": "57698"} +[2022-08-01 17:30:50,933][train_inner][INFO] - {"epoch": 4, "update": 3.439, "loss": "2.386", "ppl": "5.23", "wps": "366134", "ups": "3.09", "wpb": "118581", "bsz": "256", "num_updates": "177000", "lr": "0.000831313", "gnorm": "0.657", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "57763"} +[2022-08-01 17:31:55,499][train_inner][INFO] - {"epoch": 4, "update": 3.443, "loss": "2.389", "ppl": "5.24", "wps": "365103", "ups": "3.1", "wpb": "117864", "bsz": "256", "num_updates": "177200", "lr": "0.000831111", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "57828"} +[2022-08-01 17:33:01,550][train_inner][INFO] - {"epoch": 4, "update": 3.447, "loss": "2.39", "ppl": "5.24", "wps": "358267", "ups": "3.03", "wpb": "118317", "bsz": "256", "num_updates": "177400", "lr": "0.000830909", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "23.4", "wall": "57894"} +[2022-08-01 17:34:07,583][train_inner][INFO] - {"epoch": 4, "update": 3.451, "loss": "2.39", "ppl": "5.24", "wps": "359552", "ups": "3.03", "wpb": "118711", "bsz": "256", "num_updates": "177600", "lr": "0.000830707", "gnorm": "0.657", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "20.3", "wall": "57960"} +[2022-08-01 17:35:12,534][train_inner][INFO] - {"epoch": 4, "update": 3.455, "loss": "2.382", "ppl": "5.21", "wps": "363968", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "177800", "lr": "0.000830505", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "58025"} +[2022-08-01 17:36:17,227][train_inner][INFO] - {"epoch": 4, "update": 3.459, "loss": "2.386", "ppl": "5.23", "wps": "365684", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "178000", "lr": "0.000830303", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "58090"} +[2022-08-01 17:37:22,538][train_inner][INFO] - {"epoch": 4, "update": 3.462, "loss": "2.383", "ppl": "5.22", "wps": "360574", "ups": "3.06", "wpb": "117747", "bsz": "256", "num_updates": "178200", "lr": "0.000830101", "gnorm": "0.663", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "58155"} +[2022-08-01 17:38:27,322][train_inner][INFO] - {"epoch": 4, "update": 3.466, "loss": "2.383", "ppl": "5.22", "wps": "365577", "ups": "3.09", "wpb": "118415", "bsz": "256", "num_updates": "178400", "lr": "0.000829899", "gnorm": "0.663", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.2", "wall": "58220"} +[2022-08-01 17:39:32,108][train_inner][INFO] - {"epoch": 4, "update": 3.47, "loss": "2.383", "ppl": "5.22", "wps": "366019", "ups": "3.09", "wpb": "118562", "bsz": "256", "num_updates": "178600", "lr": "0.000829697", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "58284"} +[2022-08-01 17:40:36,932][train_inner][INFO] - {"epoch": 4, "update": 3.474, "loss": "2.381", "ppl": "5.21", "wps": "366968", "ups": "3.09", "wpb": "118942", "bsz": "256", "num_updates": "178800", "lr": "0.000829495", "gnorm": "0.66", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "58349"} +[2022-08-01 17:41:41,779][train_inner][INFO] - {"epoch": 4, "update": 3.478, "loss": "2.381", "ppl": "5.21", "wps": "365196", "ups": "3.08", "wpb": "118407", "bsz": "256", "num_updates": "179000", "lr": "0.000829293", "gnorm": "0.662", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "58414"} +[2022-08-01 17:42:46,834][train_inner][INFO] - {"epoch": 4, "update": 3.482, "loss": "2.387", "ppl": "5.23", "wps": "364891", "ups": "3.07", "wpb": "118688", "bsz": "256", "num_updates": "179200", "lr": "0.000829091", "gnorm": "0.66", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "58479"} +[2022-08-01 17:43:51,655][train_inner][INFO] - {"epoch": 4, "update": 3.486, "loss": "2.392", "ppl": "5.25", "wps": "364839", "ups": "3.09", "wpb": "118244", "bsz": "256", "num_updates": "179400", "lr": "0.000828889", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.9", "wall": "58544"} +[2022-08-01 17:44:56,511][train_inner][INFO] - {"epoch": 4, "update": 3.49, "loss": "2.378", "ppl": "5.2", "wps": "365803", "ups": "3.08", "wpb": "118622", "bsz": "256", "num_updates": "179600", "lr": "0.000828687", "gnorm": "0.66", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "58609"} +[2022-08-01 17:46:01,527][train_inner][INFO] - {"epoch": 4, "update": 3.493, "loss": "2.381", "ppl": "5.21", "wps": "364480", "ups": "3.08", "wpb": "118483", "bsz": "256", "num_updates": "179800", "lr": "0.000828485", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "58674"} +[2022-08-01 17:47:06,266][train_inner][INFO] - {"epoch": 4, "update": 3.497, "loss": "2.384", "ppl": "5.22", "wps": "365315", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "180000", "lr": "0.000828283", "gnorm": "0.662", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "58739"} +[2022-08-01 17:47:30,165][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 17:48:11,230][train_inner][INFO] - {"epoch": 4, "update": 3.501, "loss": "2.41", "ppl": "5.31", "wps": "364302", "ups": "3.08", "wpb": "118329", "bsz": "256", "num_updates": "180200", "lr": "0.000828081", "gnorm": "0.839", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "58804"} +[2022-08-01 17:49:15,791][train_inner][INFO] - {"epoch": 4, "update": 3.505, "loss": "2.383", "ppl": "5.22", "wps": "367613", "ups": "3.1", "wpb": "118668", "bsz": "256", "num_updates": "180400", "lr": "0.000827879", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "58868"} +[2022-08-01 17:50:20,551][train_inner][INFO] - {"epoch": 4, "update": 3.509, "loss": "2.389", "ppl": "5.24", "wps": "365582", "ups": "3.09", "wpb": "118373", "bsz": "256", "num_updates": "180600", "lr": "0.000827677", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.8", "wall": "58933"} +[2022-08-01 17:51:25,541][train_inner][INFO] - {"epoch": 4, "update": 3.513, "loss": "2.386", "ppl": "5.23", "wps": "363391", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "180800", "lr": "0.000827475", "gnorm": "0.665", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.9", "wall": "58998"} +[2022-08-01 17:52:30,968][train_inner][INFO] - {"epoch": 4, "update": 3.517, "loss": "2.382", "ppl": "5.21", "wps": "362889", "ups": "3.06", "wpb": "118712", "bsz": "256", "num_updates": "181000", "lr": "0.000827273", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.7", "wall": "59063"} +[2022-08-01 17:53:35,795][train_inner][INFO] - {"epoch": 4, "update": 3.521, "loss": "2.38", "ppl": "5.21", "wps": "365438", "ups": "3.09", "wpb": "118449", "bsz": "256", "num_updates": "181200", "lr": "0.000827071", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.7", "wall": "59128"} +[2022-08-01 17:54:40,977][train_inner][INFO] - {"epoch": 4, "update": 3.525, "loss": "2.383", "ppl": "5.22", "wps": "361843", "ups": "3.07", "wpb": "117925", "bsz": "256", "num_updates": "181400", "lr": "0.000826869", "gnorm": "0.659", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "59193"} +[2022-08-01 17:55:45,766][train_inner][INFO] - {"epoch": 4, "update": 3.528, "loss": "2.385", "ppl": "5.23", "wps": "366282", "ups": "3.09", "wpb": "118653", "bsz": "256", "num_updates": "181600", "lr": "0.000826667", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "59258"} +[2022-08-01 17:56:50,877][train_inner][INFO] - {"epoch": 4, "update": 3.532, "loss": "2.385", "ppl": "5.22", "wps": "360740", "ups": "3.07", "wpb": "117438", "bsz": "256", "num_updates": "181800", "lr": "0.000826465", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "59323"} +[2022-08-01 17:57:55,820][train_inner][INFO] - {"epoch": 4, "update": 3.536, "loss": "2.385", "ppl": "5.22", "wps": "365284", "ups": "3.08", "wpb": "118611", "bsz": "256", "num_updates": "182000", "lr": "0.000826263", "gnorm": "0.661", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.4", "wall": "59388"} +[2022-08-01 17:59:00,897][train_inner][INFO] - {"epoch": 4, "update": 3.54, "loss": "2.381", "ppl": "5.21", "wps": "363492", "ups": "3.07", "wpb": "118272", "bsz": "256", "num_updates": "182200", "lr": "0.000826061", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "59453"} +[2022-08-01 18:00:05,577][train_inner][INFO] - {"epoch": 4, "update": 3.544, "loss": "2.377", "ppl": "5.19", "wps": "366398", "ups": "3.09", "wpb": "118492", "bsz": "256", "num_updates": "182400", "lr": "0.000825859", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.7", "wall": "59518"} +[2022-08-01 18:01:10,464][train_inner][INFO] - {"epoch": 4, "update": 3.548, "loss": "2.385", "ppl": "5.22", "wps": "364513", "ups": "3.08", "wpb": "118259", "bsz": "256", "num_updates": "182600", "lr": "0.000825657", "gnorm": "0.663", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "59583"} +[2022-08-01 18:02:15,590][train_inner][INFO] - {"epoch": 4, "update": 3.552, "loss": "2.382", "ppl": "5.21", "wps": "363318", "ups": "3.07", "wpb": "118306", "bsz": "256", "num_updates": "182800", "lr": "0.000825455", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "59648"} +[2022-08-01 18:03:20,693][train_inner][INFO] - {"epoch": 4, "update": 3.556, "loss": "2.378", "ppl": "5.2", "wps": "365050", "ups": "3.07", "wpb": "118828", "bsz": "256", "num_updates": "183000", "lr": "0.000825253", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.5", "wall": "59713"} +[2022-08-01 18:04:25,643][train_inner][INFO] - {"epoch": 4, "update": 3.56, "loss": "2.383", "ppl": "5.22", "wps": "365296", "ups": "3.08", "wpb": "118627", "bsz": "256", "num_updates": "183200", "lr": "0.000825051", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "59778"} +[2022-08-01 18:05:30,435][train_inner][INFO] - {"epoch": 4, "update": 3.563, "loss": "2.391", "ppl": "5.24", "wps": "365274", "ups": "3.09", "wpb": "118332", "bsz": "256", "num_updates": "183400", "lr": "0.000824848", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "59843"} +[2022-08-01 18:06:35,883][train_inner][INFO] - {"epoch": 4, "update": 3.567, "loss": "2.376", "ppl": "5.19", "wps": "363549", "ups": "3.06", "wpb": "118966", "bsz": "256", "num_updates": "183600", "lr": "0.000824646", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "59908"} +[2022-08-01 18:07:40,846][train_inner][INFO] - {"epoch": 4, "update": 3.571, "loss": "2.384", "ppl": "5.22", "wps": "365557", "ups": "3.08", "wpb": "118732", "bsz": "256", "num_updates": "183800", "lr": "0.000824444", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "59973"} +[2022-08-01 18:08:45,592][train_inner][INFO] - {"epoch": 4, "update": 3.575, "loss": "2.378", "ppl": "5.2", "wps": "366412", "ups": "3.09", "wpb": "118616", "bsz": "256", "num_updates": "184000", "lr": "0.000824242", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.8", "wall": "60038"} +[2022-08-01 18:09:50,097][train_inner][INFO] - {"epoch": 4, "update": 3.579, "loss": "2.384", "ppl": "5.22", "wps": "365839", "ups": "3.1", "wpb": "117990", "bsz": "256", "num_updates": "184200", "lr": "0.00082404", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.4", "wall": "60102"} +[2022-08-01 18:10:55,015][train_inner][INFO] - {"epoch": 4, "update": 3.583, "loss": "2.386", "ppl": "5.23", "wps": "364076", "ups": "3.08", "wpb": "118175", "bsz": "256", "num_updates": "184400", "lr": "0.000823838", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.1", "wall": "60167"} +[2022-08-01 18:12:00,224][train_inner][INFO] - {"epoch": 4, "update": 3.587, "loss": "2.385", "ppl": "5.23", "wps": "362817", "ups": "3.07", "wpb": "118292", "bsz": "256", "num_updates": "184600", "lr": "0.000823636", "gnorm": "0.663", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "60233"} +[2022-08-01 18:13:05,229][train_inner][INFO] - {"epoch": 4, "update": 3.591, "loss": "2.379", "ppl": "5.2", "wps": "365028", "ups": "3.08", "wpb": "118641", "bsz": "256", "num_updates": "184800", "lr": "0.000823434", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "60298"} +[2022-08-01 18:14:09,942][train_inner][INFO] - {"epoch": 4, "update": 3.594, "loss": "2.384", "ppl": "5.22", "wps": "364749", "ups": "3.09", "wpb": "118018", "bsz": "256", "num_updates": "185000", "lr": "0.000823232", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "60362"} +[2022-08-01 18:15:14,866][train_inner][INFO] - {"epoch": 4, "update": 3.598, "loss": "2.375", "ppl": "5.19", "wps": "364396", "ups": "3.08", "wpb": "118289", "bsz": "256", "num_updates": "185200", "lr": "0.00082303", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "60427"} +[2022-08-01 18:16:19,641][train_inner][INFO] - {"epoch": 4, "update": 3.602, "loss": "2.381", "ppl": "5.21", "wps": "365362", "ups": "3.09", "wpb": "118329", "bsz": "256", "num_updates": "185400", "lr": "0.000822828", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "60492"} +[2022-08-01 18:17:24,310][train_inner][INFO] - {"epoch": 4, "update": 3.606, "loss": "2.386", "ppl": "5.23", "wps": "364248", "ups": "3.09", "wpb": "117777", "bsz": "256", "num_updates": "185600", "lr": "0.000822626", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.1", "wall": "60557"} +[2022-08-01 18:18:28,921][train_inner][INFO] - {"epoch": 4, "update": 3.61, "loss": "2.387", "ppl": "5.23", "wps": "365360", "ups": "3.1", "wpb": "118030", "bsz": "256", "num_updates": "185800", "lr": "0.000822424", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.3", "wall": "60621"} +[2022-08-01 18:19:35,060][train_inner][INFO] - {"epoch": 4, "update": 3.614, "loss": "2.379", "ppl": "5.2", "wps": "356832", "ups": "3.02", "wpb": "118000", "bsz": "256", "num_updates": "186000", "lr": "0.000822222", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "19.8", "wall": "60687"} +[2022-08-01 18:20:40,045][train_inner][INFO] - {"epoch": 4, "update": 3.618, "loss": "2.373", "ppl": "5.18", "wps": "365374", "ups": "3.08", "wpb": "118716", "bsz": "256", "num_updates": "186200", "lr": "0.00082202", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "60752"} +[2022-08-01 18:21:45,559][train_inner][INFO] - {"epoch": 4, "update": 3.622, "loss": "2.378", "ppl": "5.2", "wps": "358447", "ups": "3.05", "wpb": "117414", "bsz": "256", "num_updates": "186400", "lr": "0.000821818", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.7", "wall": "60818"} +[2022-08-01 18:22:51,478][train_inner][INFO] - {"epoch": 4, "update": 3.626, "loss": "2.375", "ppl": "5.19", "wps": "359188", "ups": "3.03", "wpb": "118386", "bsz": "256", "num_updates": "186600", "lr": "0.000821616", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.1", "wall": "60884"} +[2022-08-01 18:23:56,230][train_inner][INFO] - {"epoch": 4, "update": 3.629, "loss": "2.374", "ppl": "5.18", "wps": "367359", "ups": "3.09", "wpb": "118934", "bsz": "256", "num_updates": "186800", "lr": "0.000821414", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "60949"} +[2022-08-01 18:25:02,519][train_inner][INFO] - {"epoch": 4, "update": 3.633, "loss": "2.374", "ppl": "5.18", "wps": "357850", "ups": "3.02", "wpb": "118605", "bsz": "256", "num_updates": "187000", "lr": "0.000821212", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.9", "wall": "61015"} +[2022-08-01 18:26:07,554][train_inner][INFO] - {"epoch": 4, "update": 3.637, "loss": "2.377", "ppl": "5.19", "wps": "364972", "ups": "3.08", "wpb": "118678", "bsz": "256", "num_updates": "187200", "lr": "0.00082101", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "61080"} +[2022-08-01 18:27:12,782][train_inner][INFO] - {"epoch": 4, "update": 3.641, "loss": "2.379", "ppl": "5.2", "wps": "363651", "ups": "3.07", "wpb": "118599", "bsz": "256", "num_updates": "187400", "lr": "0.000820808", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "61145"} +[2022-08-01 18:28:18,965][train_inner][INFO] - {"epoch": 4, "update": 3.645, "loss": "2.378", "ppl": "5.2", "wps": "356468", "ups": "3.02", "wpb": "117958", "bsz": "256", "num_updates": "187600", "lr": "0.000820606", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "26.4", "wall": "61211"} +[2022-08-01 18:29:24,049][train_inner][INFO] - {"epoch": 4, "update": 3.649, "loss": "2.377", "ppl": "5.19", "wps": "364576", "ups": "3.07", "wpb": "118638", "bsz": "256", "num_updates": "187800", "lr": "0.000820404", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "61276"} +[2022-08-01 18:30:29,200][train_inner][INFO] - {"epoch": 4, "update": 3.653, "loss": "2.372", "ppl": "5.18", "wps": "364062", "ups": "3.07", "wpb": "118594", "bsz": "256", "num_updates": "188000", "lr": "0.000820202", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "61342"} +[2022-08-01 18:31:34,011][train_inner][INFO] - {"epoch": 4, "update": 3.657, "loss": "2.378", "ppl": "5.2", "wps": "366088", "ups": "3.09", "wpb": "118630", "bsz": "256", "num_updates": "188200", "lr": "0.00082", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "61406"} +[2022-08-01 18:32:38,882][train_inner][INFO] - {"epoch": 4, "update": 3.661, "loss": "2.38", "ppl": "5.2", "wps": "365505", "ups": "3.08", "wpb": "118553", "bsz": "256", "num_updates": "188400", "lr": "0.000819798", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "61471"} +[2022-08-01 18:33:43,878][train_inner][INFO] - {"epoch": 4, "update": 3.664, "loss": "2.371", "ppl": "5.17", "wps": "363879", "ups": "3.08", "wpb": "118251", "bsz": "256", "num_updates": "188600", "lr": "0.000819596", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "61536"} +[2022-08-01 18:34:48,593][train_inner][INFO] - {"epoch": 4, "update": 3.668, "loss": "2.377", "ppl": "5.2", "wps": "365530", "ups": "3.09", "wpb": "118276", "bsz": "256", "num_updates": "188800", "lr": "0.000819394", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "61601"} +[2022-08-01 18:35:53,802][train_inner][INFO] - {"epoch": 4, "update": 3.672, "loss": "2.375", "ppl": "5.19", "wps": "363943", "ups": "3.07", "wpb": "118659", "bsz": "256", "num_updates": "189000", "lr": "0.000819192", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "61666"} +[2022-08-01 18:36:58,641][train_inner][INFO] - {"epoch": 4, "update": 3.676, "loss": "2.375", "ppl": "5.19", "wps": "364008", "ups": "3.08", "wpb": "118009", "bsz": "256", "num_updates": "189200", "lr": "0.00081899", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "61731"} +[2022-08-01 18:38:00,458][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 18:38:03,940][train_inner][INFO] - {"epoch": 4, "update": 3.68, "loss": "2.377", "ppl": "5.2", "wps": "364214", "ups": "3.06", "wpb": "118911", "bsz": "256", "num_updates": "189400", "lr": "0.000818788", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "61796"} +[2022-08-01 18:39:08,663][train_inner][INFO] - {"epoch": 4, "update": 3.684, "loss": "2.366", "ppl": "5.15", "wps": "365910", "ups": "3.09", "wpb": "118415", "bsz": "256", "num_updates": "189600", "lr": "0.000818586", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "61861"} +[2022-08-01 18:40:13,317][train_inner][INFO] - {"epoch": 4, "update": 3.688, "loss": "2.366", "ppl": "5.16", "wps": "367761", "ups": "3.09", "wpb": "118884", "bsz": "256", "num_updates": "189800", "lr": "0.000818384", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "61926"} +[2022-08-01 18:41:18,139][train_inner][INFO] - {"epoch": 4, "update": 3.692, "loss": "2.369", "ppl": "5.17", "wps": "365622", "ups": "3.09", "wpb": "118500", "bsz": "256", "num_updates": "190000", "lr": "0.000818182", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "61990"} +[2022-08-01 18:42:23,333][train_inner][INFO] - {"epoch": 4, "update": 3.696, "loss": "2.37", "ppl": "5.17", "wps": "363025", "ups": "3.07", "wpb": "118332", "bsz": "256", "num_updates": "190200", "lr": "0.00081798", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "62056"} +[2022-08-01 18:43:27,972][train_inner][INFO] - {"epoch": 4, "update": 3.699, "loss": "2.378", "ppl": "5.2", "wps": "365993", "ups": "3.09", "wpb": "118287", "bsz": "256", "num_updates": "190400", "lr": "0.000817778", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.2", "wall": "62120"} +[2022-08-01 18:44:32,356][train_inner][INFO] - {"epoch": 4, "update": 3.703, "loss": "2.373", "ppl": "5.18", "wps": "367234", "ups": "3.11", "wpb": "118217", "bsz": "256", "num_updates": "190600", "lr": "0.000817576", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "62185"} +[2022-08-01 18:45:37,437][train_inner][INFO] - {"epoch": 4, "update": 3.707, "loss": "2.375", "ppl": "5.19", "wps": "363238", "ups": "3.07", "wpb": "118198", "bsz": "256", "num_updates": "190800", "lr": "0.000817374", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "62250"} +[2022-08-01 18:45:39,661][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 18:45:40,602][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 18:45:41,202][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 18:45:41,496][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 18:45:41,795][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-01 18:46:44,145][train_inner][INFO] - {"epoch": 4, "update": 3.711, "loss": "2.387", "ppl": "5.23", "wps": "356060", "ups": "3", "wpb": "118759", "bsz": "256", "num_updates": "191000", "lr": "0.000817172", "gnorm": "0.973", "clip": "0.5", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.5", "wall": "62316"} +[2022-08-01 18:47:49,019][train_inner][INFO] - {"epoch": 4, "update": 3.715, "loss": "2.375", "ppl": "5.19", "wps": "364366", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "191200", "lr": "0.00081697", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "62381"} +[2022-08-01 18:48:53,703][train_inner][INFO] - {"epoch": 4, "update": 3.719, "loss": "2.374", "ppl": "5.19", "wps": "366708", "ups": "3.09", "wpb": "118598", "bsz": "256", "num_updates": "191400", "lr": "0.000816768", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "62446"} +[2022-08-01 18:49:58,310][train_inner][INFO] - {"epoch": 4, "update": 3.723, "loss": "2.369", "ppl": "5.17", "wps": "366659", "ups": "3.1", "wpb": "118442", "bsz": "256", "num_updates": "191600", "lr": "0.000816566", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "27.5", "wall": "62511"} +[2022-08-01 18:51:03,550][train_inner][INFO] - {"epoch": 4, "update": 3.727, "loss": "2.378", "ppl": "5.2", "wps": "362286", "ups": "3.07", "wpb": "118174", "bsz": "256", "num_updates": "191800", "lr": "0.000816364", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "62576"} +[2022-08-01 18:52:09,235][train_inner][INFO] - {"epoch": 4, "update": 3.731, "loss": "2.378", "ppl": "5.2", "wps": "359861", "ups": "3.04", "wpb": "118187", "bsz": "256", "num_updates": "192000", "lr": "0.000816162", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "27.5", "wall": "62642"} +[2022-08-01 18:53:14,083][train_inner][INFO] - {"epoch": 4, "update": 3.734, "loss": "2.382", "ppl": "5.21", "wps": "362126", "ups": "3.08", "wpb": "117413", "bsz": "256", "num_updates": "192200", "lr": "0.00081596", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.1", "wall": "62706"} +[2022-08-01 18:54:19,237][train_inner][INFO] - {"epoch": 4, "update": 3.738, "loss": "2.372", "ppl": "5.18", "wps": "363480", "ups": "3.07", "wpb": "118408", "bsz": "256", "num_updates": "192400", "lr": "0.000815758", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.9", "wall": "62772"} +[2022-08-01 18:55:24,052][train_inner][INFO] - {"epoch": 4, "update": 3.742, "loss": "2.37", "ppl": "5.17", "wps": "365631", "ups": "3.09", "wpb": "118492", "bsz": "256", "num_updates": "192600", "lr": "0.000815556", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.4", "wall": "62836"} +[2022-08-01 18:56:29,864][train_inner][INFO] - {"epoch": 4, "update": 3.746, "loss": "2.369", "ppl": "5.17", "wps": "359840", "ups": "3.04", "wpb": "118406", "bsz": "256", "num_updates": "192800", "lr": "0.000815354", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.5", "wall": "62902"} +[2022-08-01 18:57:34,477][train_inner][INFO] - {"epoch": 4, "update": 3.75, "loss": "2.372", "ppl": "5.18", "wps": "364922", "ups": "3.1", "wpb": "117893", "bsz": "256", "num_updates": "193000", "lr": "0.000815152", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "62967"} +[2022-08-01 18:58:39,224][train_inner][INFO] - {"epoch": 4, "update": 3.754, "loss": "2.365", "ppl": "5.15", "wps": "365732", "ups": "3.09", "wpb": "118398", "bsz": "256", "num_updates": "193200", "lr": "0.000814949", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "63032"} +[2022-08-01 18:59:43,691][train_inner][INFO] - {"epoch": 4, "update": 3.758, "loss": "2.377", "ppl": "5.2", "wps": "366553", "ups": "3.1", "wpb": "118152", "bsz": "256", "num_updates": "193400", "lr": "0.000814747", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "63096"} +[2022-08-01 19:00:48,443][train_inner][INFO] - {"epoch": 4, "update": 3.762, "loss": "2.373", "ppl": "5.18", "wps": "366528", "ups": "3.09", "wpb": "118664", "bsz": "256", "num_updates": "193600", "lr": "0.000814545", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.4", "wall": "63161"} +[2022-08-01 19:01:54,163][train_inner][INFO] - {"epoch": 4, "update": 3.766, "loss": "2.369", "ppl": "5.17", "wps": "359318", "ups": "3.04", "wpb": "118071", "bsz": "256", "num_updates": "193800", "lr": "0.000814343", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "63226"} +[2022-08-01 19:02:58,655][train_inner][INFO] - {"epoch": 4, "update": 3.769, "loss": "2.377", "ppl": "5.19", "wps": "365323", "ups": "3.1", "wpb": "117801", "bsz": "256", "num_updates": "194000", "lr": "0.000814141", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.5", "wall": "63291"} +[2022-08-01 19:04:03,796][train_inner][INFO] - {"epoch": 4, "update": 3.773, "loss": "2.37", "ppl": "5.17", "wps": "362484", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "194200", "lr": "0.000813939", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.1", "wall": "63356"} +[2022-08-01 19:05:08,819][train_inner][INFO] - {"epoch": 4, "update": 3.777, "loss": "2.37", "ppl": "5.17", "wps": "363182", "ups": "3.08", "wpb": "118075", "bsz": "256", "num_updates": "194400", "lr": "0.000813737", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "63421"} +[2022-08-01 19:06:13,588][train_inner][INFO] - {"epoch": 4, "update": 3.781, "loss": "2.371", "ppl": "5.17", "wps": "364293", "ups": "3.09", "wpb": "117972", "bsz": "256", "num_updates": "194600", "lr": "0.000813535", "gnorm": "0.696", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.5", "wall": "63486"} +[2022-08-01 19:07:17,994][train_inner][INFO] - {"epoch": 4, "update": 3.785, "loss": "2.371", "ppl": "5.17", "wps": "368137", "ups": "3.11", "wpb": "118549", "bsz": "256", "num_updates": "194800", "lr": "0.000813333", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.3", "wall": "63550"} +[2022-08-01 19:08:22,669][train_inner][INFO] - {"epoch": 4, "update": 3.789, "loss": "2.365", "ppl": "5.15", "wps": "365994", "ups": "3.09", "wpb": "118352", "bsz": "256", "num_updates": "195000", "lr": "0.000813131", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "63615"} +[2022-08-01 19:09:27,809][train_inner][INFO] - {"epoch": 4, "update": 3.793, "loss": "2.374", "ppl": "5.19", "wps": "362489", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "195200", "lr": "0.000812929", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "63680"} +[2022-08-01 19:10:32,542][train_inner][INFO] - {"epoch": 4, "update": 3.797, "loss": "2.364", "ppl": "5.15", "wps": "366742", "ups": "3.09", "wpb": "118700", "bsz": "256", "num_updates": "195400", "lr": "0.000812727", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "63745"} +[2022-08-01 19:11:37,570][train_inner][INFO] - {"epoch": 4, "update": 3.8, "loss": "2.371", "ppl": "5.17", "wps": "363933", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "195600", "lr": "0.000812525", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "63810"} +[2022-08-01 19:12:42,341][train_inner][INFO] - {"epoch": 4, "update": 3.804, "loss": "2.365", "ppl": "5.15", "wps": "364030", "ups": "3.09", "wpb": "117892", "bsz": "256", "num_updates": "195800", "lr": "0.000812323", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.3", "wall": "63875"} +[2022-08-01 19:13:47,019][train_inner][INFO] - {"epoch": 4, "update": 3.808, "loss": "2.366", "ppl": "5.16", "wps": "367765", "ups": "3.09", "wpb": "118930", "bsz": "256", "num_updates": "196000", "lr": "0.000812121", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "63939"} +[2022-08-01 19:14:52,385][train_inner][INFO] - {"epoch": 4, "update": 3.812, "loss": "2.372", "ppl": "5.18", "wps": "361210", "ups": "3.06", "wpb": "118053", "bsz": "256", "num_updates": "196200", "lr": "0.000811919", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "64005"} +[2022-08-01 19:15:58,329][train_inner][INFO] - {"epoch": 4, "update": 3.816, "loss": "2.37", "ppl": "5.17", "wps": "359463", "ups": "3.03", "wpb": "118520", "bsz": "256", "num_updates": "196400", "lr": "0.000811717", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "22.8", "wall": "64071"} +[2022-08-01 19:17:03,021][train_inner][INFO] - {"epoch": 4, "update": 3.82, "loss": "2.372", "ppl": "5.18", "wps": "364479", "ups": "3.09", "wpb": "117893", "bsz": "256", "num_updates": "196600", "lr": "0.000811515", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22", "wall": "64135"} +[2022-08-01 19:18:07,873][train_inner][INFO] - {"epoch": 4, "update": 3.824, "loss": "2.366", "ppl": "5.15", "wps": "363811", "ups": "3.08", "wpb": "117968", "bsz": "256", "num_updates": "196800", "lr": "0.000811313", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23", "wall": "64200"} +[2022-08-01 19:19:12,461][train_inner][INFO] - {"epoch": 4, "update": 3.828, "loss": "2.367", "ppl": "5.16", "wps": "365961", "ups": "3.1", "wpb": "118181", "bsz": "256", "num_updates": "197000", "lr": "0.000811111", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "64265"} +[2022-08-01 19:20:17,420][train_inner][INFO] - {"epoch": 4, "update": 3.832, "loss": "2.364", "ppl": "5.15", "wps": "364709", "ups": "3.08", "wpb": "118454", "bsz": "256", "num_updates": "197200", "lr": "0.000810909", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "64330"} +[2022-08-01 19:20:31,394][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 19:20:31,692][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 19:21:23,044][train_inner][INFO] - {"epoch": 4, "update": 3.835, "loss": "2.371", "ppl": "5.17", "wps": "360997", "ups": "3.05", "wpb": "118449", "bsz": "256", "num_updates": "197400", "lr": "0.000810707", "gnorm": "0.79", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.5", "wall": "64395"} +[2022-08-01 19:22:28,151][train_inner][INFO] - {"epoch": 4, "update": 3.839, "loss": "2.373", "ppl": "5.18", "wps": "364201", "ups": "3.07", "wpb": "118559", "bsz": "256", "num_updates": "197600", "lr": "0.000810505", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.3", "wall": "64460"} +[2022-08-01 19:23:32,871][train_inner][INFO] - {"epoch": 4, "update": 3.843, "loss": "2.368", "ppl": "5.16", "wps": "365460", "ups": "3.09", "wpb": "118260", "bsz": "256", "num_updates": "197800", "lr": "0.000810303", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "64525"} +[2022-08-01 19:24:37,596][train_inner][INFO] - {"epoch": 4, "update": 3.847, "loss": "2.366", "ppl": "5.16", "wps": "366162", "ups": "3.09", "wpb": "118498", "bsz": "256", "num_updates": "198000", "lr": "0.000810101", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "64590"} +[2022-08-01 19:25:42,503][train_inner][INFO] - {"epoch": 4, "update": 3.851, "loss": "2.367", "ppl": "5.16", "wps": "365250", "ups": "3.08", "wpb": "118535", "bsz": "256", "num_updates": "198200", "lr": "0.000809899", "gnorm": "0.718", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.6", "wall": "64655"} +[2022-08-01 19:26:47,354][train_inner][INFO] - {"epoch": 4, "update": 3.855, "loss": "2.374", "ppl": "5.18", "wps": "363855", "ups": "3.08", "wpb": "117981", "bsz": "256", "num_updates": "198400", "lr": "0.000809697", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.2", "wall": "64720"} +[2022-08-01 19:27:52,472][train_inner][INFO] - {"epoch": 4, "update": 3.859, "loss": "2.367", "ppl": "5.16", "wps": "362735", "ups": "3.07", "wpb": "118101", "bsz": "256", "num_updates": "198600", "lr": "0.000809495", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.5", "wall": "64785"} +[2022-08-01 19:28:57,518][train_inner][INFO] - {"epoch": 4, "update": 3.863, "loss": "2.365", "ppl": "5.15", "wps": "363968", "ups": "3.07", "wpb": "118371", "bsz": "256", "num_updates": "198800", "lr": "0.000809293", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "64850"} +[2022-08-01 19:30:02,000][train_inner][INFO] - {"epoch": 4, "update": 3.867, "loss": "2.362", "ppl": "5.14", "wps": "366510", "ups": "3.1", "wpb": "118166", "bsz": "256", "num_updates": "199000", "lr": "0.000809091", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "64914"} +[2022-08-01 19:31:06,465][train_inner][INFO] - {"epoch": 4, "update": 3.87, "loss": "2.364", "ppl": "5.15", "wps": "365672", "ups": "3.1", "wpb": "117863", "bsz": "256", "num_updates": "199200", "lr": "0.000808889", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "64979"} +[2022-08-01 19:32:11,304][train_inner][INFO] - {"epoch": 4, "update": 3.874, "loss": "2.371", "ppl": "5.17", "wps": "362642", "ups": "3.08", "wpb": "117566", "bsz": "256", "num_updates": "199400", "lr": "0.000808687", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.9", "wall": "65044"} +[2022-08-01 19:33:16,035][train_inner][INFO] - {"epoch": 4, "update": 3.878, "loss": "2.368", "ppl": "5.16", "wps": "363892", "ups": "3.09", "wpb": "117772", "bsz": "256", "num_updates": "199600", "lr": "0.000808485", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "65108"} +[2022-08-01 19:34:20,804][train_inner][INFO] - {"epoch": 4, "update": 3.882, "loss": "2.363", "ppl": "5.14", "wps": "364194", "ups": "3.09", "wpb": "117941", "bsz": "256", "num_updates": "199800", "lr": "0.000808283", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.6", "wall": "65173"} +[2022-08-01 19:35:25,668][train_inner][INFO] - {"epoch": 4, "update": 3.886, "loss": "2.368", "ppl": "5.16", "wps": "365748", "ups": "3.08", "wpb": "118619", "bsz": "256", "num_updates": "200000", "lr": "0.000808081", "gnorm": "0.686", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "65238"} +[2022-08-01 19:35:25,670][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-01 19:35:48,423][valid][INFO] - {"epoch": 4, "valid_loss": "2.265", "valid_ppl": "4.81", "valid_wps": "1.61012e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "200000", "valid_best_loss": "2.265"} +[2022-08-01 19:35:48,427][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 4 @ 200000 updates +[2022-08-01 19:35:48,427][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_4_200000.pt +[2022-08-01 19:35:58,298][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_4_200000.pt +[2022-08-01 19:36:27,074][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_4_200000.pt (epoch 4 @ 200000 updates, score 2.265) (writing took 38.64764930214733 seconds) +[2022-08-01 19:37:31,949][train_inner][INFO] - {"epoch": 4, "update": 3.89, "loss": "2.364", "ppl": "5.15", "wps": "187928", "ups": "1.58", "wpb": "118658", "bsz": "256", "num_updates": "200200", "lr": "0.000807879", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "65364"} +[2022-08-01 19:38:37,026][train_inner][INFO] - {"epoch": 4, "update": 3.894, "loss": "2.362", "ppl": "5.14", "wps": "363952", "ups": "3.07", "wpb": "118422", "bsz": "256", "num_updates": "200400", "lr": "0.000807677", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "65429"} +[2022-08-01 19:39:43,481][train_inner][INFO] - {"epoch": 4, "update": 3.898, "loss": "2.365", "ppl": "5.15", "wps": "356713", "ups": "3.01", "wpb": "118524", "bsz": "256", "num_updates": "200600", "lr": "0.000807475", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "20.3", "wall": "65496"} +[2022-08-01 19:40:48,837][train_inner][INFO] - {"epoch": 4, "update": 3.902, "loss": "2.358", "ppl": "5.13", "wps": "362642", "ups": "3.06", "wpb": "118502", "bsz": "256", "num_updates": "200800", "lr": "0.000807273", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "65561"} +[2022-08-01 19:41:53,564][train_inner][INFO] - {"epoch": 4, "update": 3.905, "loss": "2.363", "ppl": "5.14", "wps": "367279", "ups": "3.09", "wpb": "118863", "bsz": "256", "num_updates": "201000", "lr": "0.000807071", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "65626"} +[2022-08-01 19:42:58,610][train_inner][INFO] - {"epoch": 4, "update": 3.909, "loss": "2.369", "ppl": "5.16", "wps": "363921", "ups": "3.07", "wpb": "118358", "bsz": "256", "num_updates": "201200", "lr": "0.000806869", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.9", "wall": "65691"} +[2022-08-01 19:44:03,296][train_inner][INFO] - {"epoch": 4, "update": 3.913, "loss": "2.367", "ppl": "5.16", "wps": "364948", "ups": "3.09", "wpb": "118032", "bsz": "256", "num_updates": "201400", "lr": "0.000806667", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.7", "wall": "65756"} +[2022-08-01 19:45:08,221][train_inner][INFO] - {"epoch": 4, "update": 3.917, "loss": "2.354", "ppl": "5.11", "wps": "365148", "ups": "3.08", "wpb": "118535", "bsz": "256", "num_updates": "201600", "lr": "0.000806465", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "65821"} +[2022-08-01 19:46:13,063][train_inner][INFO] - {"epoch": 4, "update": 3.921, "loss": "2.369", "ppl": "5.16", "wps": "364444", "ups": "3.08", "wpb": "118154", "bsz": "255.9", "num_updates": "201800", "lr": "0.000806263", "gnorm": "0.715", "clip": "0.5", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "65885"} +[2022-08-01 19:47:17,817][train_inner][INFO] - {"epoch": 4, "update": 3.925, "loss": "2.364", "ppl": "5.15", "wps": "365316", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "202000", "lr": "0.000806061", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "65950"} +[2022-08-01 19:48:22,541][train_inner][INFO] - {"epoch": 4, "update": 3.929, "loss": "2.358", "ppl": "5.13", "wps": "365423", "ups": "3.09", "wpb": "118256", "bsz": "256", "num_updates": "202200", "lr": "0.000805859", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "66015"} +[2022-08-01 19:49:27,350][train_inner][INFO] - {"epoch": 4, "update": 3.933, "loss": "2.365", "ppl": "5.15", "wps": "365453", "ups": "3.09", "wpb": "118422", "bsz": "256", "num_updates": "202400", "lr": "0.000805657", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "66080"} +[2022-08-01 19:50:31,730][train_inner][INFO] - {"epoch": 4, "update": 3.936, "loss": "2.365", "ppl": "5.15", "wps": "366934", "ups": "3.11", "wpb": "118114", "bsz": "256", "num_updates": "202600", "lr": "0.000805455", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.9", "wall": "66144"} +[2022-08-01 19:51:36,430][train_inner][INFO] - {"epoch": 4, "update": 3.94, "loss": "2.364", "ppl": "5.15", "wps": "364818", "ups": "3.09", "wpb": "118016", "bsz": "256", "num_updates": "202800", "lr": "0.000805253", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "66209"} +[2022-08-01 19:52:17,076][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 19:52:17,699][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 19:52:23,505][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-01 19:52:42,387][train_inner][INFO] - {"epoch": 4, "update": 3.944, "loss": "2.431", "ppl": "5.39", "wps": "358262", "ups": "3.03", "wpb": "118148", "bsz": "256", "num_updates": "203000", "lr": "0.000805051", "gnorm": "1.64", "clip": "1", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.2", "wall": "66275"} +[2022-08-01 19:53:47,361][train_inner][INFO] - {"epoch": 4, "update": 3.948, "loss": "2.355", "ppl": "5.12", "wps": "364176", "ups": "3.08", "wpb": "118310", "bsz": "256", "num_updates": "203200", "lr": "0.000804848", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "66340"} +[2022-08-01 19:54:52,063][train_inner][INFO] - {"epoch": 4, "update": 3.952, "loss": "2.367", "ppl": "5.16", "wps": "364254", "ups": "3.09", "wpb": "117837", "bsz": "256", "num_updates": "203400", "lr": "0.000804646", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.2", "wall": "66404"} +[2022-08-01 19:55:56,763][train_inner][INFO] - {"epoch": 4, "update": 3.956, "loss": "2.364", "ppl": "5.15", "wps": "364266", "ups": "3.09", "wpb": "117839", "bsz": "256", "num_updates": "203600", "lr": "0.000804444", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.6", "wall": "66469"} +[2022-08-01 19:57:01,722][train_inner][INFO] - {"epoch": 4, "update": 3.96, "loss": "2.364", "ppl": "5.15", "wps": "362563", "ups": "3.08", "wpb": "117757", "bsz": "256", "num_updates": "203800", "lr": "0.000804242", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.7", "wall": "66534"} +[2022-08-01 19:58:06,304][train_inner][INFO] - {"epoch": 4, "update": 3.964, "loss": "2.362", "ppl": "5.14", "wps": "365646", "ups": "3.1", "wpb": "118069", "bsz": "256", "num_updates": "204000", "lr": "0.00080404", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "66599"} +[2022-08-01 19:59:11,283][train_inner][INFO] - {"epoch": 4, "update": 3.968, "loss": "2.364", "ppl": "5.15", "wps": "363847", "ups": "3.08", "wpb": "118210", "bsz": "256", "num_updates": "204200", "lr": "0.000803838", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.2", "wall": "66664"} +[2022-08-01 20:00:16,552][train_inner][INFO] - {"epoch": 4, "update": 3.971, "loss": "2.358", "ppl": "5.12", "wps": "364114", "ups": "3.06", "wpb": "118824", "bsz": "256", "num_updates": "204400", "lr": "0.000803636", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24", "wall": "66729"} +[2022-08-01 20:01:21,476][train_inner][INFO] - {"epoch": 4, "update": 3.975, "loss": "2.362", "ppl": "5.14", "wps": "363275", "ups": "3.08", "wpb": "117924", "bsz": "256", "num_updates": "204600", "lr": "0.000803434", "gnorm": "0.684", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.7", "wall": "66794"} +[2022-08-01 20:02:26,221][train_inner][INFO] - {"epoch": 4, "update": 3.979, "loss": "2.364", "ppl": "5.15", "wps": "363988", "ups": "3.09", "wpb": "117830", "bsz": "256", "num_updates": "204800", "lr": "0.000803232", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.4", "wall": "66859"} +[2022-08-01 20:03:30,880][train_inner][INFO] - {"epoch": 4, "update": 3.983, "loss": "2.37", "ppl": "5.17", "wps": "364321", "ups": "3.09", "wpb": "117781", "bsz": "256", "num_updates": "205000", "lr": "0.00080303", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "66923"} +[2022-08-01 20:04:35,759][train_inner][INFO] - {"epoch": 4, "update": 3.987, "loss": "2.358", "ppl": "5.13", "wps": "366074", "ups": "3.08", "wpb": "118751", "bsz": "256", "num_updates": "205200", "lr": "0.000802828", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.3", "wall": "66988"} +[2022-08-01 20:05:40,751][train_inner][INFO] - {"epoch": 4, "update": 3.991, "loss": "2.363", "ppl": "5.14", "wps": "365216", "ups": "3.08", "wpb": "118680", "bsz": "256", "num_updates": "205400", "lr": "0.000802626", "gnorm": "0.684", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.9", "wall": "67053"} +[2022-08-01 20:06:45,504][train_inner][INFO] - {"epoch": 4, "update": 3.995, "loss": "2.362", "ppl": "5.14", "wps": "364780", "ups": "3.09", "wpb": "118101", "bsz": "256", "num_updates": "205600", "lr": "0.000802424", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "67118"} +[2022-08-01 20:07:50,695][train_inner][INFO] - {"epoch": 4, "update": 3.999, "loss": "2.358", "ppl": "5.13", "wps": "362212", "ups": "3.07", "wpb": "118063", "bsz": "256", "num_updates": "205800", "lr": "0.000802222", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "67183"} +[2022-08-01 20:08:12,550][fairseq_cli.train][INFO] - end of epoch 4 (average epoch stats below) +[2022-08-01 20:08:12,551][train][INFO] - {"epoch": 4, "train_loss": "2.386", "train_ppl": "5.23", "train_wps": "362664", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "205868", "train_lr": "0.000802154", "train_gnorm": "0.67", "train_clip": "0", "train_loss_scale": "0.5", "train_train_wall": "16624", "train_gb_free": "22.2", "train_wall": "67205"} +[2022-08-01 20:08:12,663][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-01 20:08:12,666][fairseq.trainer][INFO] - begin training epoch 5 +[2022-08-01 20:08:12,667][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-01 20:09:05,828][train_inner][INFO] - {"epoch": 5, "update": 4.003, "loss": "2.364", "ppl": "5.15", "wps": "312709", "ups": "2.66", "wpb": "117472", "bsz": "255.4", "num_updates": "206000", "lr": "0.00080202", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "67258"} +[2022-08-01 20:10:11,949][train_inner][INFO] - {"epoch": 5, "update": 4.006, "loss": "2.363", "ppl": "5.15", "wps": "356929", "ups": "3.02", "wpb": "118001", "bsz": "256", "num_updates": "206200", "lr": "0.000801818", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "21.3", "wall": "67324"} +[2022-08-01 20:11:16,975][train_inner][INFO] - {"epoch": 5, "update": 4.01, "loss": "2.348", "ppl": "5.09", "wps": "363790", "ups": "3.08", "wpb": "118276", "bsz": "256", "num_updates": "206400", "lr": "0.000801616", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.7", "wall": "67389"} +[2022-08-01 20:12:21,859][train_inner][INFO] - {"epoch": 5, "update": 4.014, "loss": "2.358", "ppl": "5.13", "wps": "364282", "ups": "3.08", "wpb": "118180", "bsz": "256", "num_updates": "206600", "lr": "0.000801414", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.9", "wall": "67454"} +[2022-08-01 20:13:26,880][train_inner][INFO] - {"epoch": 5, "update": 4.018, "loss": "2.358", "ppl": "5.13", "wps": "364525", "ups": "3.08", "wpb": "118507", "bsz": "256", "num_updates": "206800", "lr": "0.000801212", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "67519"} +[2022-08-01 20:14:31,749][train_inner][INFO] - {"epoch": 5, "update": 4.022, "loss": "2.357", "ppl": "5.12", "wps": "365286", "ups": "3.08", "wpb": "118477", "bsz": "256", "num_updates": "207000", "lr": "0.00080101", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.3", "wall": "67584"} +[2022-08-01 20:15:36,467][train_inner][INFO] - {"epoch": 5, "update": 4.026, "loss": "2.357", "ppl": "5.12", "wps": "366058", "ups": "3.09", "wpb": "118451", "bsz": "256", "num_updates": "207200", "lr": "0.000800808", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "67649"} +[2022-08-01 20:16:41,675][train_inner][INFO] - {"epoch": 5, "update": 4.03, "loss": "2.349", "ppl": "5.1", "wps": "364819", "ups": "3.07", "wpb": "118944", "bsz": "256", "num_updates": "207400", "lr": "0.000800606", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.1", "wall": "67714"} +[2022-08-01 20:17:46,576][train_inner][INFO] - {"epoch": 5, "update": 4.034, "loss": "2.353", "ppl": "5.11", "wps": "365061", "ups": "3.08", "wpb": "118462", "bsz": "256", "num_updates": "207600", "lr": "0.000800404", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "27", "wall": "67779"} +[2022-08-01 20:18:52,440][train_inner][INFO] - {"epoch": 5, "update": 4.038, "loss": "2.354", "ppl": "5.11", "wps": "360777", "ups": "3.04", "wpb": "118808", "bsz": "256", "num_updates": "207800", "lr": "0.000800202", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.7", "wall": "67845"} +[2022-08-01 20:19:57,426][train_inner][INFO] - {"epoch": 5, "update": 4.041, "loss": "2.355", "ppl": "5.12", "wps": "364688", "ups": "3.08", "wpb": "118497", "bsz": "256", "num_updates": "208000", "lr": "0.0008", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "67910"} +[2022-08-01 20:21:02,564][train_inner][INFO] - {"epoch": 5, "update": 4.045, "loss": "2.36", "ppl": "5.13", "wps": "363056", "ups": "3.07", "wpb": "118244", "bsz": "256", "num_updates": "208200", "lr": "0.000799798", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "67975"} +[2022-08-01 20:22:07,767][train_inner][INFO] - {"epoch": 5, "update": 4.049, "loss": "2.353", "ppl": "5.11", "wps": "362853", "ups": "3.07", "wpb": "118293", "bsz": "256", "num_updates": "208400", "lr": "0.000799596", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.4", "wall": "68040"} +[2022-08-01 20:23:12,284][train_inner][INFO] - {"epoch": 5, "update": 4.053, "loss": "2.361", "ppl": "5.14", "wps": "365884", "ups": "3.1", "wpb": "118027", "bsz": "256", "num_updates": "208600", "lr": "0.000799394", "gnorm": "0.686", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "68105"} +[2022-08-01 20:24:17,222][train_inner][INFO] - {"epoch": 5, "update": 4.057, "loss": "2.357", "ppl": "5.12", "wps": "363501", "ups": "3.08", "wpb": "118023", "bsz": "256", "num_updates": "208800", "lr": "0.000799192", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "68170"} +[2022-08-01 20:25:22,537][train_inner][INFO] - {"epoch": 5, "update": 4.061, "loss": "2.354", "ppl": "5.11", "wps": "362500", "ups": "3.06", "wpb": "118382", "bsz": "256", "num_updates": "209000", "lr": "0.00079899", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "68235"} +[2022-08-01 20:26:27,886][train_inner][INFO] - {"epoch": 5, "update": 4.065, "loss": "2.357", "ppl": "5.12", "wps": "361368", "ups": "3.06", "wpb": "118074", "bsz": "256", "num_updates": "209200", "lr": "0.000798788", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "68300"} +[2022-08-01 20:27:32,657][train_inner][INFO] - {"epoch": 5, "update": 4.069, "loss": "2.355", "ppl": "5.12", "wps": "363904", "ups": "3.09", "wpb": "117851", "bsz": "256", "num_updates": "209400", "lr": "0.000798586", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "68365"} +[2022-08-01 20:28:37,820][train_inner][INFO] - {"epoch": 5, "update": 4.072, "loss": "2.354", "ppl": "5.11", "wps": "364569", "ups": "3.07", "wpb": "118780", "bsz": "256", "num_updates": "209600", "lr": "0.000798384", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "68430"} +[2022-08-01 20:29:42,515][train_inner][INFO] - {"epoch": 5, "update": 4.076, "loss": "2.355", "ppl": "5.12", "wps": "366124", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "209800", "lr": "0.000798182", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "68495"} +[2022-08-01 20:30:47,779][train_inner][INFO] - {"epoch": 5, "update": 4.08, "loss": "2.353", "ppl": "5.11", "wps": "361648", "ups": "3.06", "wpb": "118011", "bsz": "256", "num_updates": "210000", "lr": "0.00079798", "gnorm": "0.701", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "20.9", "wall": "68560"} +[2022-08-01 20:31:52,487][train_inner][INFO] - {"epoch": 5, "update": 4.084, "loss": "2.353", "ppl": "5.11", "wps": "365766", "ups": "3.09", "wpb": "118338", "bsz": "255.9", "num_updates": "210200", "lr": "0.000797778", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "68625"} +[2022-08-01 20:32:57,541][train_inner][INFO] - {"epoch": 5, "update": 4.088, "loss": "2.354", "ppl": "5.11", "wps": "365862", "ups": "3.07", "wpb": "119002", "bsz": "256", "num_updates": "210400", "lr": "0.000797576", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "68690"} +[2022-08-01 20:34:02,502][train_inner][INFO] - {"epoch": 5, "update": 4.092, "loss": "2.352", "ppl": "5.1", "wps": "363970", "ups": "3.08", "wpb": "118217", "bsz": "256", "num_updates": "210600", "lr": "0.000797374", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.8", "wall": "68755"} +[2022-08-01 20:35:07,184][train_inner][INFO] - {"epoch": 5, "update": 4.096, "loss": "2.356", "ppl": "5.12", "wps": "365226", "ups": "3.09", "wpb": "118116", "bsz": "256", "num_updates": "210800", "lr": "0.000797172", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "68820"} +[2022-08-01 20:36:12,216][train_inner][INFO] - {"epoch": 5, "update": 4.1, "loss": "2.352", "ppl": "5.1", "wps": "363609", "ups": "3.08", "wpb": "118230", "bsz": "256", "num_updates": "211000", "lr": "0.00079697", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "68885"} +[2022-08-01 20:37:17,186][train_inner][INFO] - {"epoch": 5, "update": 4.104, "loss": "2.359", "ppl": "5.13", "wps": "363387", "ups": "3.08", "wpb": "118044", "bsz": "256", "num_updates": "211200", "lr": "0.000796768", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.2", "wall": "68950"} +[2022-08-01 20:38:22,412][train_inner][INFO] - {"epoch": 5, "update": 4.107, "loss": "2.358", "ppl": "5.13", "wps": "363377", "ups": "3.07", "wpb": "118507", "bsz": "256", "num_updates": "211400", "lr": "0.000796566", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "69015"} +[2022-08-01 20:39:27,313][train_inner][INFO] - {"epoch": 5, "update": 4.111, "loss": "2.355", "ppl": "5.12", "wps": "365473", "ups": "3.08", "wpb": "118596", "bsz": "256", "num_updates": "211600", "lr": "0.000796364", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "69080"} +[2022-08-01 20:40:32,620][train_inner][INFO] - {"epoch": 5, "update": 4.115, "loss": "2.352", "ppl": "5.11", "wps": "361888", "ups": "3.06", "wpb": "118167", "bsz": "256", "num_updates": "211800", "lr": "0.000796162", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.4", "wall": "69145"} +[2022-08-01 20:41:37,412][train_inner][INFO] - {"epoch": 5, "update": 4.119, "loss": "2.353", "ppl": "5.11", "wps": "364862", "ups": "3.09", "wpb": "118200", "bsz": "256", "num_updates": "212000", "lr": "0.00079596", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "69210"} +[2022-08-01 20:42:42,479][train_inner][INFO] - {"epoch": 5, "update": 4.123, "loss": "2.345", "ppl": "5.08", "wps": "364478", "ups": "3.07", "wpb": "118576", "bsz": "256", "num_updates": "212200", "lr": "0.000795758", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "69275"} +[2022-08-01 20:43:47,313][train_inner][INFO] - {"epoch": 5, "update": 4.127, "loss": "2.355", "ppl": "5.12", "wps": "366131", "ups": "3.08", "wpb": "118687", "bsz": "256", "num_updates": "212400", "lr": "0.000795556", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "69340"} +[2022-08-01 20:44:52,219][train_inner][INFO] - {"epoch": 5, "update": 4.131, "loss": "2.349", "ppl": "5.1", "wps": "363878", "ups": "3.08", "wpb": "118088", "bsz": "256", "num_updates": "212600", "lr": "0.000795354", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "69405"} +[2022-08-01 20:45:57,552][train_inner][INFO] - {"epoch": 5, "update": 4.135, "loss": "2.348", "ppl": "5.09", "wps": "362324", "ups": "3.06", "wpb": "118357", "bsz": "256", "num_updates": "212800", "lr": "0.000795152", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "69470"} +[2022-08-01 20:47:02,418][train_inner][INFO] - {"epoch": 5, "update": 4.139, "loss": "2.353", "ppl": "5.11", "wps": "363305", "ups": "3.08", "wpb": "117828", "bsz": "256", "num_updates": "213000", "lr": "0.000794949", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "69535"} +[2022-08-01 20:48:08,342][train_inner][INFO] - {"epoch": 5, "update": 4.142, "loss": "2.347", "ppl": "5.09", "wps": "358966", "ups": "3.03", "wpb": "118321", "bsz": "256", "num_updates": "213200", "lr": "0.000794747", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.1", "wall": "69601"} +[2022-08-01 20:49:13,491][train_inner][INFO] - {"epoch": 5, "update": 4.146, "loss": "2.349", "ppl": "5.09", "wps": "362430", "ups": "3.07", "wpb": "118057", "bsz": "256", "num_updates": "213400", "lr": "0.000794545", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "69666"} +[2022-08-01 20:50:18,552][train_inner][INFO] - {"epoch": 5, "update": 4.15, "loss": "2.351", "ppl": "5.1", "wps": "364586", "ups": "3.07", "wpb": "118601", "bsz": "256", "num_updates": "213600", "lr": "0.000794343", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "69731"} +[2022-08-01 20:51:24,242][train_inner][INFO] - {"epoch": 5, "update": 4.154, "loss": "2.352", "ppl": "5.1", "wps": "358643", "ups": "3.04", "wpb": "117795", "bsz": "256", "num_updates": "213800", "lr": "0.000794141", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "69797"} +[2022-08-01 20:52:29,346][train_inner][INFO] - {"epoch": 5, "update": 4.158, "loss": "2.356", "ppl": "5.12", "wps": "363151", "ups": "3.07", "wpb": "118210", "bsz": "256", "num_updates": "214000", "lr": "0.000793939", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "69862"} +[2022-08-01 20:53:34,216][train_inner][INFO] - {"epoch": 5, "update": 4.162, "loss": "2.351", "ppl": "5.1", "wps": "362279", "ups": "3.08", "wpb": "117502", "bsz": "256", "num_updates": "214200", "lr": "0.000793737", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "69927"} +[2022-08-01 20:54:39,031][train_inner][INFO] - {"epoch": 5, "update": 4.166, "loss": "2.351", "ppl": "5.1", "wps": "363963", "ups": "3.09", "wpb": "117951", "bsz": "256", "num_updates": "214400", "lr": "0.000793535", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "69991"} +[2022-08-01 20:55:07,857][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 20:55:44,689][train_inner][INFO] - {"epoch": 5, "update": 4.17, "loss": "2.353", "ppl": "5.11", "wps": "358570", "ups": "3.05", "wpb": "117714", "bsz": "256", "num_updates": "214600", "lr": "0.000793333", "gnorm": "0.696", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.2", "wall": "70057"} +[2022-08-01 20:56:49,974][train_inner][INFO] - {"epoch": 5, "update": 4.173, "loss": "2.351", "ppl": "5.1", "wps": "363830", "ups": "3.06", "wpb": "118761", "bsz": "256", "num_updates": "214800", "lr": "0.000793131", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "70122"} +[2022-08-01 20:57:55,159][train_inner][INFO] - {"epoch": 5, "update": 4.177, "loss": "2.354", "ppl": "5.11", "wps": "362479", "ups": "3.07", "wpb": "118140", "bsz": "256", "num_updates": "215000", "lr": "0.000792929", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "70187"} +[2022-08-01 20:58:59,824][train_inner][INFO] - {"epoch": 5, "update": 4.181, "loss": "2.353", "ppl": "5.11", "wps": "366408", "ups": "3.09", "wpb": "118466", "bsz": "256", "num_updates": "215200", "lr": "0.000792727", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.9", "wall": "70252"} +[2022-08-01 21:00:04,635][train_inner][INFO] - {"epoch": 5, "update": 4.185, "loss": "2.356", "ppl": "5.12", "wps": "364282", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "215400", "lr": "0.000792525", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "70317"} +[2022-08-01 21:01:09,819][train_inner][INFO] - {"epoch": 5, "update": 4.189, "loss": "2.354", "ppl": "5.11", "wps": "362815", "ups": "3.07", "wpb": "118246", "bsz": "256", "num_updates": "215600", "lr": "0.000792323", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "70382"} +[2022-08-01 21:02:14,966][train_inner][INFO] - {"epoch": 5, "update": 4.193, "loss": "2.347", "ppl": "5.09", "wps": "363189", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "215800", "lr": "0.000792121", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "70447"} +[2022-08-01 21:03:19,818][train_inner][INFO] - {"epoch": 5, "update": 4.197, "loss": "2.345", "ppl": "5.08", "wps": "366314", "ups": "3.08", "wpb": "118779", "bsz": "256", "num_updates": "216000", "lr": "0.000791919", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "70512"} +[2022-08-01 21:04:25,075][train_inner][INFO] - {"epoch": 5, "update": 4.201, "loss": "2.347", "ppl": "5.09", "wps": "363815", "ups": "3.06", "wpb": "118706", "bsz": "256", "num_updates": "216200", "lr": "0.000791717", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "70577"} +[2022-08-01 21:05:29,725][train_inner][INFO] - {"epoch": 5, "update": 4.205, "loss": "2.348", "ppl": "5.09", "wps": "365564", "ups": "3.09", "wpb": "118167", "bsz": "256", "num_updates": "216400", "lr": "0.000791515", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "70642"} +[2022-08-01 21:06:34,736][train_inner][INFO] - {"epoch": 5, "update": 4.208, "loss": "2.353", "ppl": "5.11", "wps": "362704", "ups": "3.08", "wpb": "117897", "bsz": "256", "num_updates": "216600", "lr": "0.000791313", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "70707"} +[2022-08-01 21:07:39,925][train_inner][INFO] - {"epoch": 5, "update": 4.212, "loss": "2.346", "ppl": "5.08", "wps": "363733", "ups": "3.07", "wpb": "118556", "bsz": "256", "num_updates": "216800", "lr": "0.000791111", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "70772"} +[2022-08-01 21:08:44,950][train_inner][INFO] - {"epoch": 5, "update": 4.216, "loss": "2.353", "ppl": "5.11", "wps": "364276", "ups": "3.08", "wpb": "118434", "bsz": "256", "num_updates": "217000", "lr": "0.000790909", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "70837"} +[2022-08-01 21:09:50,006][train_inner][INFO] - {"epoch": 5, "update": 4.22, "loss": "2.348", "ppl": "5.09", "wps": "363826", "ups": "3.07", "wpb": "118344", "bsz": "256", "num_updates": "217200", "lr": "0.000790707", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "70902"} +[2022-08-01 21:10:56,067][train_inner][INFO] - {"epoch": 5, "update": 4.224, "loss": "2.349", "ppl": "5.09", "wps": "357307", "ups": "3.03", "wpb": "118017", "bsz": "256", "num_updates": "217400", "lr": "0.000790505", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.3", "wall": "70968"} +[2022-08-01 21:12:00,748][train_inner][INFO] - {"epoch": 5, "update": 4.228, "loss": "2.356", "ppl": "5.12", "wps": "365595", "ups": "3.09", "wpb": "118234", "bsz": "256", "num_updates": "217600", "lr": "0.000790303", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "71033"} +[2022-08-01 21:13:05,731][train_inner][INFO] - {"epoch": 5, "update": 4.232, "loss": "2.352", "ppl": "5.11", "wps": "364375", "ups": "3.08", "wpb": "118389", "bsz": "256", "num_updates": "217800", "lr": "0.000790101", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "71098"} +[2022-08-01 21:14:10,860][train_inner][INFO] - {"epoch": 5, "update": 4.236, "loss": "2.349", "ppl": "5.1", "wps": "364160", "ups": "3.07", "wpb": "118585", "bsz": "256", "num_updates": "218000", "lr": "0.000789899", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "71163"} +[2022-08-01 21:15:15,311][train_inner][INFO] - {"epoch": 5, "update": 4.24, "loss": "2.347", "ppl": "5.09", "wps": "367651", "ups": "3.1", "wpb": "118475", "bsz": "256", "num_updates": "218200", "lr": "0.000789697", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "71228"} +[2022-08-01 21:16:20,267][train_inner][INFO] - {"epoch": 5, "update": 4.243, "loss": "2.343", "ppl": "5.07", "wps": "365263", "ups": "3.08", "wpb": "118628", "bsz": "256", "num_updates": "218400", "lr": "0.000789495", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "71293"} +[2022-08-01 21:17:24,788][train_inner][INFO] - {"epoch": 5, "update": 4.247, "loss": "2.354", "ppl": "5.11", "wps": "365044", "ups": "3.1", "wpb": "117763", "bsz": "256", "num_updates": "218600", "lr": "0.000789293", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "71357"} +[2022-08-01 21:18:29,736][train_inner][INFO] - {"epoch": 5, "update": 4.251, "loss": "2.351", "ppl": "5.1", "wps": "365902", "ups": "3.08", "wpb": "118820", "bsz": "256", "num_updates": "218800", "lr": "0.000789091", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "71422"} +[2022-08-01 21:19:34,388][train_inner][INFO] - {"epoch": 5, "update": 4.255, "loss": "2.344", "ppl": "5.08", "wps": "366495", "ups": "3.09", "wpb": "118471", "bsz": "256", "num_updates": "219000", "lr": "0.000788889", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "71487"} +[2022-08-01 21:20:40,252][train_inner][INFO] - {"epoch": 5, "update": 4.259, "loss": "2.35", "ppl": "5.1", "wps": "359108", "ups": "3.04", "wpb": "118261", "bsz": "256", "num_updates": "219200", "lr": "0.000788687", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "71553"} +[2022-08-01 21:21:45,218][train_inner][INFO] - {"epoch": 5, "update": 4.263, "loss": "2.344", "ppl": "5.08", "wps": "364858", "ups": "3.08", "wpb": "118514", "bsz": "256", "num_updates": "219400", "lr": "0.000788485", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "71618"} +[2022-08-01 21:22:50,796][train_inner][INFO] - {"epoch": 5, "update": 4.267, "loss": "2.345", "ppl": "5.08", "wps": "362250", "ups": "3.05", "wpb": "118776", "bsz": "256", "num_updates": "219600", "lr": "0.000788283", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "71683"} +[2022-08-01 21:23:55,898][train_inner][INFO] - {"epoch": 5, "update": 4.271, "loss": "2.353", "ppl": "5.11", "wps": "364194", "ups": "3.07", "wpb": "118548", "bsz": "256", "num_updates": "219800", "lr": "0.000788081", "gnorm": "0.683", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "71748"} +[2022-08-01 21:25:00,492][train_inner][INFO] - {"epoch": 5, "update": 4.274, "loss": "2.344", "ppl": "5.08", "wps": "368116", "ups": "3.1", "wpb": "118888", "bsz": "256", "num_updates": "220000", "lr": "0.000787879", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "71813"} +[2022-08-01 21:26:05,406][train_inner][INFO] - {"epoch": 5, "update": 4.278, "loss": "2.345", "ppl": "5.08", "wps": "364811", "ups": "3.08", "wpb": "118405", "bsz": "256", "num_updates": "220200", "lr": "0.000787677", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "71878"} +[2022-08-01 21:27:10,368][train_inner][INFO] - {"epoch": 5, "update": 4.282, "loss": "2.344", "ppl": "5.08", "wps": "363277", "ups": "3.08", "wpb": "117994", "bsz": "256", "num_updates": "220400", "lr": "0.000787475", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "71943"} +[2022-08-01 21:28:14,902][train_inner][INFO] - {"epoch": 5, "update": 4.286, "loss": "2.352", "ppl": "5.11", "wps": "364882", "ups": "3.1", "wpb": "117735", "bsz": "256", "num_updates": "220600", "lr": "0.000787273", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "72007"} +[2022-08-01 21:28:27,136][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-01 21:29:04,928][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-01 21:29:20,609][train_inner][INFO] - {"epoch": 5, "update": 4.29, "loss": "2.343", "ppl": "5.07", "wps": "359915", "ups": "3.04", "wpb": "118243", "bsz": "256", "num_updates": "220800", "lr": "0.000787071", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "72073"} +[2022-08-01 21:30:25,238][train_inner][INFO] - {"epoch": 5, "update": 4.294, "loss": "2.344", "ppl": "5.08", "wps": "363420", "ups": "3.09", "wpb": "117436", "bsz": "256", "num_updates": "221000", "lr": "0.000786869", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "72138"} +[2022-08-01 21:31:30,171][train_inner][INFO] - {"epoch": 5, "update": 4.298, "loss": "2.341", "ppl": "5.07", "wps": "364132", "ups": "3.08", "wpb": "118220", "bsz": "256", "num_updates": "221200", "lr": "0.000786667", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "72202"} +[2022-08-01 21:32:34,945][train_inner][INFO] - {"epoch": 5, "update": 4.302, "loss": "2.339", "ppl": "5.06", "wps": "365476", "ups": "3.09", "wpb": "118365", "bsz": "256", "num_updates": "221400", "lr": "0.000786465", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "72267"} +[2022-08-01 21:33:40,163][train_inner][INFO] - {"epoch": 5, "update": 4.306, "loss": "2.343", "ppl": "5.07", "wps": "362907", "ups": "3.07", "wpb": "118338", "bsz": "256", "num_updates": "221600", "lr": "0.000786263", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "72332"} +[2022-08-01 21:34:44,976][train_inner][INFO] - {"epoch": 5, "update": 4.309, "loss": "2.351", "ppl": "5.1", "wps": "364781", "ups": "3.09", "wpb": "118212", "bsz": "256", "num_updates": "221800", "lr": "0.000786061", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "72397"} +[2022-08-01 21:35:50,091][train_inner][INFO] - {"epoch": 5, "update": 4.313, "loss": "2.343", "ppl": "5.07", "wps": "362468", "ups": "3.07", "wpb": "118008", "bsz": "256", "num_updates": "222000", "lr": "0.000785859", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "72462"} +[2022-08-01 21:36:16,433][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 21:36:17,389][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 21:36:17,681][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 21:36:17,979][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 21:36:56,343][train_inner][INFO] - {"epoch": 5, "update": 4.317, "loss": "2.368", "ppl": "5.16", "wps": "355985", "ups": "3.02", "wpb": "117922", "bsz": "256", "num_updates": "222200", "lr": "0.000785657", "gnorm": "1.024", "clip": "2.5", "loss_scale": "0.5", "train_wall": "66", "gb_free": "19.8", "wall": "72529"} +[2022-08-01 21:38:01,326][train_inner][INFO] - {"epoch": 5, "update": 4.321, "loss": "2.347", "ppl": "5.09", "wps": "365468", "ups": "3.08", "wpb": "118745", "bsz": "256", "num_updates": "222400", "lr": "0.000785455", "gnorm": "0.687", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.2", "wall": "72594"} +[2022-08-01 21:39:06,367][train_inner][INFO] - {"epoch": 5, "update": 4.325, "loss": "2.344", "ppl": "5.08", "wps": "365798", "ups": "3.08", "wpb": "118958", "bsz": "256", "num_updates": "222600", "lr": "0.000785253", "gnorm": "0.694", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.8", "wall": "72659"} +[2022-08-01 21:40:11,609][train_inner][INFO] - {"epoch": 5, "update": 4.329, "loss": "2.348", "ppl": "5.09", "wps": "362464", "ups": "3.07", "wpb": "118237", "bsz": "256", "num_updates": "222800", "lr": "0.000785051", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "72724"} +[2022-08-01 21:41:16,314][train_inner][INFO] - {"epoch": 5, "update": 4.333, "loss": "2.339", "ppl": "5.06", "wps": "367767", "ups": "3.09", "wpb": "118980", "bsz": "256", "num_updates": "223000", "lr": "0.000784848", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.3", "wall": "72789"} +[2022-08-01 21:42:20,664][train_inner][INFO] - {"epoch": 5, "update": 4.337, "loss": "2.349", "ppl": "5.09", "wps": "367275", "ups": "3.11", "wpb": "118169", "bsz": "256", "num_updates": "223200", "lr": "0.000784646", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.8", "wall": "72853"} +[2022-08-01 21:43:25,275][train_inner][INFO] - {"epoch": 5, "update": 4.341, "loss": "2.348", "ppl": "5.09", "wps": "365962", "ups": "3.1", "wpb": "118224", "bsz": "256", "num_updates": "223400", "lr": "0.000784444", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22", "wall": "72918"} +[2022-08-01 21:44:29,874][train_inner][INFO] - {"epoch": 5, "update": 4.345, "loss": "2.345", "ppl": "5.08", "wps": "365506", "ups": "3.1", "wpb": "118054", "bsz": "256", "num_updates": "223600", "lr": "0.000784242", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "72982"} +[2022-08-01 21:45:34,936][train_inner][INFO] - {"epoch": 5, "update": 4.348, "loss": "2.344", "ppl": "5.08", "wps": "365469", "ups": "3.07", "wpb": "118889", "bsz": "256", "num_updates": "223800", "lr": "0.00078404", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.7", "wall": "73047"} +[2022-08-01 21:46:39,656][train_inner][INFO] - {"epoch": 5, "update": 4.352, "loss": "2.334", "ppl": "5.04", "wps": "365814", "ups": "3.09", "wpb": "118376", "bsz": "256", "num_updates": "224000", "lr": "0.000783838", "gnorm": "0.698", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.7", "wall": "73112"} +[2022-08-01 21:47:44,626][train_inner][INFO] - {"epoch": 5, "update": 4.356, "loss": "2.347", "ppl": "5.09", "wps": "363535", "ups": "3.08", "wpb": "118093", "bsz": "256", "num_updates": "224200", "lr": "0.000783636", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "73177"} +[2022-08-01 21:48:49,789][train_inner][INFO] - {"epoch": 5, "update": 4.36, "loss": "2.336", "ppl": "5.05", "wps": "364238", "ups": "3.07", "wpb": "118673", "bsz": "256", "num_updates": "224400", "lr": "0.000783434", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "73242"} +[2022-08-01 21:49:54,779][train_inner][INFO] - {"epoch": 5, "update": 4.364, "loss": "2.344", "ppl": "5.08", "wps": "363280", "ups": "3.08", "wpb": "118046", "bsz": "256", "num_updates": "224600", "lr": "0.000783232", "gnorm": "0.687", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "73307"} +[2022-08-01 21:50:59,978][train_inner][INFO] - {"epoch": 5, "update": 4.368, "loss": "2.349", "ppl": "5.09", "wps": "359573", "ups": "3.07", "wpb": "117217", "bsz": "256", "num_updates": "224800", "lr": "0.00078303", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "25.9", "wall": "73372"} +[2022-08-01 21:52:04,824][train_inner][INFO] - {"epoch": 5, "update": 4.372, "loss": "2.341", "ppl": "5.07", "wps": "365967", "ups": "3.08", "wpb": "118655", "bsz": "256", "num_updates": "225000", "lr": "0.000782828", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "73437"} +[2022-08-01 21:53:10,011][train_inner][INFO] - {"epoch": 5, "update": 4.376, "loss": "2.337", "ppl": "5.05", "wps": "362874", "ups": "3.07", "wpb": "118273", "bsz": "256", "num_updates": "225200", "lr": "0.000782626", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "73502"} +[2022-08-01 21:54:14,998][train_inner][INFO] - {"epoch": 5, "update": 4.379, "loss": "2.343", "ppl": "5.07", "wps": "364660", "ups": "3.08", "wpb": "118488", "bsz": "256", "num_updates": "225400", "lr": "0.000782424", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "73567"} +[2022-08-01 21:55:19,899][train_inner][INFO] - {"epoch": 5, "update": 4.383, "loss": "2.336", "ppl": "5.05", "wps": "364368", "ups": "3.08", "wpb": "118238", "bsz": "256", "num_updates": "225600", "lr": "0.000782222", "gnorm": "0.686", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "27.7", "wall": "73632"} +[2022-08-01 21:56:24,920][train_inner][INFO] - {"epoch": 5, "update": 4.387, "loss": "2.338", "ppl": "5.06", "wps": "364260", "ups": "3.08", "wpb": "118422", "bsz": "256", "num_updates": "225800", "lr": "0.00078202", "gnorm": "0.683", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.8", "wall": "73697"} +[2022-08-01 21:57:30,154][train_inner][INFO] - {"epoch": 5, "update": 4.391, "loss": "2.345", "ppl": "5.08", "wps": "362382", "ups": "3.07", "wpb": "118196", "bsz": "256", "num_updates": "226000", "lr": "0.000781818", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "73762"} +[2022-08-01 21:58:35,157][train_inner][INFO] - {"epoch": 5, "update": 4.395, "loss": "2.34", "ppl": "5.06", "wps": "363055", "ups": "3.08", "wpb": "117996", "bsz": "256", "num_updates": "226200", "lr": "0.000781616", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "73827"} +[2022-08-01 21:59:39,819][train_inner][INFO] - {"epoch": 5, "update": 4.399, "loss": "2.342", "ppl": "5.07", "wps": "365797", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "226400", "lr": "0.000781414", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "73892"} +[2022-08-01 22:00:44,631][train_inner][INFO] - {"epoch": 5, "update": 4.403, "loss": "2.342", "ppl": "5.07", "wps": "365950", "ups": "3.09", "wpb": "118587", "bsz": "256", "num_updates": "226600", "lr": "0.000781212", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "73957"} +[2022-08-01 22:01:36,794][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 22:01:39,056][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 22:01:50,533][train_inner][INFO] - {"epoch": 5, "update": 4.407, "loss": "2.589", "ppl": "6.02", "wps": "359474", "ups": "3.03", "wpb": "118448", "bsz": "256", "num_updates": "226800", "lr": "0.00078101", "gnorm": "1.246", "clip": "4.5", "loss_scale": "0.5", "train_wall": "66", "gb_free": "22.2", "wall": "74023"} +[2022-08-01 22:02:55,444][train_inner][INFO] - {"epoch": 5, "update": 4.411, "loss": "2.345", "ppl": "5.08", "wps": "365307", "ups": "3.08", "wpb": "118562", "bsz": "256", "num_updates": "227000", "lr": "0.000780808", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "74088"} +[2022-08-01 22:04:00,084][train_inner][INFO] - {"epoch": 5, "update": 4.414, "loss": "2.341", "ppl": "5.07", "wps": "367782", "ups": "3.09", "wpb": "118865", "bsz": "256", "num_updates": "227200", "lr": "0.000780606", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.7", "wall": "74152"} +[2022-08-01 22:05:05,104][train_inner][INFO] - {"epoch": 5, "update": 4.418, "loss": "2.343", "ppl": "5.07", "wps": "364309", "ups": "3.08", "wpb": "118435", "bsz": "256", "num_updates": "227400", "lr": "0.000780404", "gnorm": "0.692", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.6", "wall": "74217"} +[2022-08-01 22:06:09,708][train_inner][INFO] - {"epoch": 5, "update": 4.422, "loss": "2.34", "ppl": "5.06", "wps": "365639", "ups": "3.1", "wpb": "118108", "bsz": "256", "num_updates": "227600", "lr": "0.000780202", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.3", "wall": "74282"} +[2022-08-01 22:07:14,581][train_inner][INFO] - {"epoch": 5, "update": 4.426, "loss": "2.345", "ppl": "5.08", "wps": "365896", "ups": "3.08", "wpb": "118682", "bsz": "256", "num_updates": "227800", "lr": "0.00078", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.4", "wall": "74347"} +[2022-08-01 22:08:19,485][train_inner][INFO] - {"epoch": 5, "update": 4.43, "loss": "2.341", "ppl": "5.07", "wps": "366091", "ups": "3.08", "wpb": "118802", "bsz": "256", "num_updates": "228000", "lr": "0.000779798", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "74412"} +[2022-08-01 22:09:23,882][train_inner][INFO] - {"epoch": 5, "update": 4.434, "loss": "2.336", "ppl": "5.05", "wps": "367060", "ups": "3.11", "wpb": "118187", "bsz": "256", "num_updates": "228200", "lr": "0.000779596", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "25.8", "wall": "74476"} +[2022-08-01 22:10:28,752][train_inner][INFO] - {"epoch": 5, "update": 4.438, "loss": "2.339", "ppl": "5.06", "wps": "363760", "ups": "3.08", "wpb": "117983", "bsz": "256", "num_updates": "228400", "lr": "0.000779394", "gnorm": "0.694", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.4", "wall": "74541"} +[2022-08-01 22:11:33,286][train_inner][INFO] - {"epoch": 5, "update": 4.442, "loss": "2.336", "ppl": "5.05", "wps": "366003", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "228600", "lr": "0.000779192", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "74606"} +[2022-08-01 22:12:38,134][train_inner][INFO] - {"epoch": 5, "update": 4.446, "loss": "2.342", "ppl": "5.07", "wps": "365085", "ups": "3.08", "wpb": "118375", "bsz": "256", "num_updates": "228800", "lr": "0.00077899", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21", "wall": "74670"} +[2022-08-01 22:13:43,149][train_inner][INFO] - {"epoch": 5, "update": 4.449, "loss": "2.339", "ppl": "5.06", "wps": "363608", "ups": "3.08", "wpb": "118197", "bsz": "256", "num_updates": "229000", "lr": "0.000778788", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "74735"} +[2022-08-01 22:14:48,150][train_inner][INFO] - {"epoch": 5, "update": 4.453, "loss": "2.34", "ppl": "5.06", "wps": "364814", "ups": "3.08", "wpb": "118565", "bsz": "256", "num_updates": "229200", "lr": "0.000778586", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "74800"} +[2022-08-01 22:15:52,943][train_inner][INFO] - {"epoch": 5, "update": 4.457, "loss": "2.343", "ppl": "5.07", "wps": "364234", "ups": "3.09", "wpb": "117998", "bsz": "256", "num_updates": "229400", "lr": "0.000778384", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "74865"} +[2022-08-01 22:16:57,979][train_inner][INFO] - {"epoch": 5, "update": 4.461, "loss": "2.337", "ppl": "5.05", "wps": "364736", "ups": "3.08", "wpb": "118604", "bsz": "256", "num_updates": "229600", "lr": "0.000778182", "gnorm": "0.691", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "74930"} +[2022-08-01 22:18:02,485][train_inner][INFO] - {"epoch": 5, "update": 4.465, "loss": "2.338", "ppl": "5.06", "wps": "365758", "ups": "3.1", "wpb": "117965", "bsz": "256", "num_updates": "229800", "lr": "0.00077798", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.5", "wall": "74995"} +[2022-08-01 22:19:07,792][train_inner][INFO] - {"epoch": 5, "update": 4.469, "loss": "2.333", "ppl": "5.04", "wps": "363802", "ups": "3.06", "wpb": "118793", "bsz": "256", "num_updates": "230000", "lr": "0.000777778", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "75060"} +[2022-08-01 22:20:12,487][train_inner][INFO] - {"epoch": 5, "update": 4.473, "loss": "2.342", "ppl": "5.07", "wps": "364140", "ups": "3.09", "wpb": "117789", "bsz": "256", "num_updates": "230200", "lr": "0.000777576", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.4", "wall": "75125"} +[2022-08-01 22:21:17,394][train_inner][INFO] - {"epoch": 5, "update": 4.477, "loss": "2.341", "ppl": "5.07", "wps": "365448", "ups": "3.08", "wpb": "118598", "bsz": "256", "num_updates": "230400", "lr": "0.000777374", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "75190"} +[2022-08-01 22:22:22,459][train_inner][INFO] - {"epoch": 5, "update": 4.481, "loss": "2.335", "ppl": "5.05", "wps": "362539", "ups": "3.07", "wpb": "117941", "bsz": "256", "num_updates": "230600", "lr": "0.000777172", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "75255"} +[2022-08-01 22:23:27,158][train_inner][INFO] - {"epoch": 5, "update": 4.484, "loss": "2.338", "ppl": "5.05", "wps": "365032", "ups": "3.09", "wpb": "118086", "bsz": "256", "num_updates": "230800", "lr": "0.00077697", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.4", "wall": "75319"} +[2022-08-01 22:24:31,963][train_inner][INFO] - {"epoch": 5, "update": 4.488, "loss": "2.335", "ppl": "5.05", "wps": "364028", "ups": "3.09", "wpb": "117951", "bsz": "256", "num_updates": "231000", "lr": "0.000776768", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.3", "wall": "75384"} +[2022-08-01 22:25:36,585][train_inner][INFO] - {"epoch": 5, "update": 4.492, "loss": "2.334", "ppl": "5.04", "wps": "365710", "ups": "3.09", "wpb": "118162", "bsz": "256", "num_updates": "231200", "lr": "0.000776566", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "75449"} +[2022-08-01 22:26:41,572][train_inner][INFO] - {"epoch": 5, "update": 4.496, "loss": "2.34", "ppl": "5.06", "wps": "363858", "ups": "3.08", "wpb": "118229", "bsz": "256", "num_updates": "231400", "lr": "0.000776364", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "75514"} +[2022-08-01 22:27:46,688][train_inner][INFO] - {"epoch": 5, "update": 4.5, "loss": "2.337", "ppl": "5.05", "wps": "363442", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "231600", "lr": "0.000776162", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.4", "wall": "75579"} +[2022-08-01 22:28:51,640][train_inner][INFO] - {"epoch": 5, "update": 4.504, "loss": "2.33", "ppl": "5.03", "wps": "365171", "ups": "3.08", "wpb": "118590", "bsz": "256", "num_updates": "231800", "lr": "0.00077596", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "75644"} +[2022-08-01 22:29:55,691][train_inner][INFO] - {"epoch": 5, "update": 4.508, "loss": "2.335", "ppl": "5.04", "wps": "369209", "ups": "3.12", "wpb": "118241", "bsz": "256", "num_updates": "232000", "lr": "0.000775758", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.6", "wall": "75708"} +[2022-08-01 22:31:00,827][train_inner][INFO] - {"epoch": 5, "update": 4.512, "loss": "2.335", "ppl": "5.05", "wps": "363754", "ups": "3.07", "wpb": "118465", "bsz": "256", "num_updates": "232200", "lr": "0.000775556", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "75773"} +[2022-08-01 22:32:05,807][train_inner][INFO] - {"epoch": 5, "update": 4.515, "loss": "2.33", "ppl": "5.03", "wps": "365640", "ups": "3.08", "wpb": "118795", "bsz": "256", "num_updates": "232400", "lr": "0.000775354", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "75838"} +[2022-08-01 22:33:10,526][train_inner][INFO] - {"epoch": 5, "update": 4.519, "loss": "2.328", "ppl": "5.02", "wps": "369114", "ups": "3.09", "wpb": "119442", "bsz": "256", "num_updates": "232600", "lr": "0.000775152", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "75903"} +[2022-08-01 22:34:00,236][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-01 22:34:00,849][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-01 22:34:15,894][train_inner][INFO] - {"epoch": 5, "update": 4.523, "loss": "2.35", "ppl": "5.1", "wps": "362770", "ups": "3.06", "wpb": "118565", "bsz": "256", "num_updates": "232800", "lr": "0.000774949", "gnorm": "0.883", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "75968"} +[2022-08-01 22:35:20,793][train_inner][INFO] - {"epoch": 5, "update": 4.527, "loss": "2.343", "ppl": "5.07", "wps": "366480", "ups": "3.08", "wpb": "118921", "bsz": "256", "num_updates": "233000", "lr": "0.000774747", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "76033"} +[2022-08-01 22:36:26,079][train_inner][INFO] - {"epoch": 5, "update": 4.531, "loss": "2.337", "ppl": "5.05", "wps": "361842", "ups": "3.06", "wpb": "118115", "bsz": "256", "num_updates": "233200", "lr": "0.000774545", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "76098"} +[2022-08-01 22:37:31,178][train_inner][INFO] - {"epoch": 5, "update": 4.535, "loss": "2.336", "ppl": "5.05", "wps": "364352", "ups": "3.07", "wpb": "118592", "bsz": "256", "num_updates": "233400", "lr": "0.000774343", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "27.2", "wall": "76163"} +[2022-08-01 22:38:36,309][train_inner][INFO] - {"epoch": 5, "update": 4.539, "loss": "2.329", "ppl": "5.02", "wps": "364574", "ups": "3.07", "wpb": "118724", "bsz": "256", "num_updates": "233600", "lr": "0.000774141", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.9", "wall": "76229"} +[2022-08-01 22:39:41,247][train_inner][INFO] - {"epoch": 5, "update": 4.543, "loss": "2.336", "ppl": "5.05", "wps": "364342", "ups": "3.08", "wpb": "118295", "bsz": "256", "num_updates": "233800", "lr": "0.000773939", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "76294"} +[2022-08-01 22:40:45,906][train_inner][INFO] - {"epoch": 5, "update": 4.547, "loss": "2.336", "ppl": "5.05", "wps": "366804", "ups": "3.09", "wpb": "118585", "bsz": "256", "num_updates": "234000", "lr": "0.000773737", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.5", "wall": "76358"} +[2022-08-01 22:41:50,601][train_inner][INFO] - {"epoch": 5, "update": 4.55, "loss": "2.335", "ppl": "5.05", "wps": "366380", "ups": "3.09", "wpb": "118513", "bsz": "256", "num_updates": "234200", "lr": "0.000773535", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.5", "wall": "76423"} +[2022-08-01 22:42:55,529][train_inner][INFO] - {"epoch": 5, "update": 4.554, "loss": "2.34", "ppl": "5.06", "wps": "364509", "ups": "3.08", "wpb": "118333", "bsz": "256", "num_updates": "234400", "lr": "0.000773333", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "76488"} +[2022-08-01 22:44:00,580][train_inner][INFO] - {"epoch": 5, "update": 4.558, "loss": "2.335", "ppl": "5.05", "wps": "364634", "ups": "3.07", "wpb": "118598", "bsz": "256", "num_updates": "234600", "lr": "0.000773131", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "76553"} +[2022-08-01 22:45:05,495][train_inner][INFO] - {"epoch": 5, "update": 4.562, "loss": "2.332", "ppl": "5.04", "wps": "364676", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "234800", "lr": "0.000772929", "gnorm": "0.699", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.4", "wall": "76618"} +[2022-08-01 22:46:10,382][train_inner][INFO] - {"epoch": 5, "update": 4.566, "loss": "2.329", "ppl": "5.03", "wps": "365144", "ups": "3.08", "wpb": "118464", "bsz": "256", "num_updates": "235000", "lr": "0.000772727", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "76683"} +[2022-08-01 22:47:15,343][train_inner][INFO] - {"epoch": 5, "update": 4.57, "loss": "2.333", "ppl": "5.04", "wps": "363018", "ups": "3.08", "wpb": "117908", "bsz": "256", "num_updates": "235200", "lr": "0.000772525", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "76748"} +[2022-08-01 22:48:20,079][train_inner][INFO] - {"epoch": 5, "update": 4.574, "loss": "2.343", "ppl": "5.07", "wps": "364627", "ups": "3.09", "wpb": "118021", "bsz": "256", "num_updates": "235400", "lr": "0.000772323", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "76812"} +[2022-08-01 22:49:25,036][train_inner][INFO] - {"epoch": 5, "update": 4.578, "loss": "2.339", "ppl": "5.06", "wps": "362495", "ups": "3.08", "wpb": "117730", "bsz": "256", "num_updates": "235600", "lr": "0.000772121", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "76877"} +[2022-08-01 22:50:29,629][train_inner][INFO] - {"epoch": 5, "update": 4.582, "loss": "2.333", "ppl": "5.04", "wps": "365651", "ups": "3.1", "wpb": "118090", "bsz": "256", "num_updates": "235800", "lr": "0.000771919", "gnorm": "0.694", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.1", "wall": "76942"} +[2022-08-01 22:51:34,277][train_inner][INFO] - {"epoch": 5, "update": 4.585, "loss": "2.334", "ppl": "5.04", "wps": "364133", "ups": "3.09", "wpb": "117702", "bsz": "255.9", "num_updates": "236000", "lr": "0.000771717", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.5", "wall": "77007"} +[2022-08-01 22:52:39,198][train_inner][INFO] - {"epoch": 5, "update": 4.589, "loss": "2.334", "ppl": "5.04", "wps": "362024", "ups": "3.08", "wpb": "117513", "bsz": "256", "num_updates": "236200", "lr": "0.000771515", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "77072"} +[2022-08-01 22:53:44,329][train_inner][INFO] - {"epoch": 5, "update": 4.593, "loss": "2.333", "ppl": "5.04", "wps": "362917", "ups": "3.07", "wpb": "118184", "bsz": "256", "num_updates": "236400", "lr": "0.000771313", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "77137"} +[2022-08-01 22:54:49,519][train_inner][INFO] - {"epoch": 5, "update": 4.597, "loss": "2.327", "ppl": "5.02", "wps": "363628", "ups": "3.07", "wpb": "118522", "bsz": "256", "num_updates": "236600", "lr": "0.000771111", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24", "wall": "77202"} +[2022-08-01 22:55:54,566][train_inner][INFO] - {"epoch": 5, "update": 4.601, "loss": "2.334", "ppl": "5.04", "wps": "363199", "ups": "3.07", "wpb": "118124", "bsz": "256", "num_updates": "236800", "lr": "0.000770909", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "77267"} +[2022-08-01 22:56:59,395][train_inner][INFO] - {"epoch": 5, "update": 4.605, "loss": "2.33", "ppl": "5.03", "wps": "365398", "ups": "3.09", "wpb": "118440", "bsz": "256", "num_updates": "237000", "lr": "0.000770707", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.9", "wall": "77332"} +[2022-08-01 22:58:04,228][train_inner][INFO] - {"epoch": 5, "update": 4.609, "loss": "2.337", "ppl": "5.05", "wps": "364048", "ups": "3.08", "wpb": "118009", "bsz": "256", "num_updates": "237200", "lr": "0.000770505", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.3", "wall": "77397"} +[2022-08-01 22:59:08,930][train_inner][INFO] - {"epoch": 5, "update": 4.613, "loss": "2.33", "ppl": "5.03", "wps": "366884", "ups": "3.09", "wpb": "118690", "bsz": "256", "num_updates": "237400", "lr": "0.000770303", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "77461"} +[2022-08-01 23:00:13,730][train_inner][INFO] - {"epoch": 5, "update": 4.617, "loss": "2.333", "ppl": "5.04", "wps": "364280", "ups": "3.09", "wpb": "118024", "bsz": "256", "num_updates": "237600", "lr": "0.000770101", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "77526"} +[2022-08-01 23:01:18,216][train_inner][INFO] - {"epoch": 5, "update": 4.62, "loss": "2.325", "ppl": "5.01", "wps": "367333", "ups": "3.1", "wpb": "118438", "bsz": "256", "num_updates": "237800", "lr": "0.000769899", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.8", "wall": "77591"} +[2022-08-01 23:02:22,710][train_inner][INFO] - {"epoch": 5, "update": 4.624, "loss": "2.328", "ppl": "5.02", "wps": "366051", "ups": "3.1", "wpb": "118038", "bsz": "256", "num_updates": "238000", "lr": "0.000769697", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.9", "wall": "77655"} +[2022-08-01 23:03:27,302][train_inner][INFO] - {"epoch": 5, "update": 4.628, "loss": "2.332", "ppl": "5.04", "wps": "366700", "ups": "3.1", "wpb": "118428", "bsz": "256", "num_updates": "238200", "lr": "0.000769495", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "77720"} +[2022-08-01 23:04:31,729][train_inner][INFO] - {"epoch": 5, "update": 4.632, "loss": "2.334", "ppl": "5.04", "wps": "363827", "ups": "3.1", "wpb": "117201", "bsz": "256", "num_updates": "238400", "lr": "0.000769293", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.9", "wall": "77784"} +[2022-08-01 23:05:36,827][train_inner][INFO] - {"epoch": 5, "update": 4.636, "loss": "2.335", "ppl": "5.05", "wps": "362792", "ups": "3.07", "wpb": "118083", "bsz": "256", "num_updates": "238600", "lr": "0.000769091", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.2", "wall": "77849"} +[2022-08-01 23:06:41,244][train_inner][INFO] - {"epoch": 5, "update": 4.64, "loss": "2.334", "ppl": "5.04", "wps": "368798", "ups": "3.1", "wpb": "118782", "bsz": "256", "num_updates": "238800", "lr": "0.000768889", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "77914"} +[2022-08-01 23:07:45,967][train_inner][INFO] - {"epoch": 5, "update": 4.644, "loss": "2.326", "ppl": "5.01", "wps": "366624", "ups": "3.09", "wpb": "118644", "bsz": "256", "num_updates": "239000", "lr": "0.000768687", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "77978"} +[2022-08-01 23:08:51,066][train_inner][INFO] - {"epoch": 5, "update": 4.648, "loss": "2.331", "ppl": "5.03", "wps": "364426", "ups": "3.07", "wpb": "118616", "bsz": "256", "num_updates": "239200", "lr": "0.000768485", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.1", "wall": "78043"} +[2022-08-01 23:09:55,865][train_inner][INFO] - {"epoch": 5, "update": 4.651, "loss": "2.332", "ppl": "5.03", "wps": "366014", "ups": "3.09", "wpb": "118584", "bsz": "256", "num_updates": "239400", "lr": "0.000768283", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.5", "wall": "78108"} +[2022-08-01 23:11:00,765][train_inner][INFO] - {"epoch": 5, "update": 4.655, "loss": "2.328", "ppl": "5.02", "wps": "365329", "ups": "3.08", "wpb": "118548", "bsz": "256", "num_updates": "239600", "lr": "0.000768081", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "78173"} +[2022-08-01 23:12:05,420][train_inner][INFO] - {"epoch": 5, "update": 4.659, "loss": "2.331", "ppl": "5.03", "wps": "364183", "ups": "3.09", "wpb": "117731", "bsz": "256", "num_updates": "239800", "lr": "0.000767879", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "78238"} +[2022-08-01 23:13:10,341][train_inner][INFO] - {"epoch": 5, "update": 4.663, "loss": "2.332", "ppl": "5.04", "wps": "364242", "ups": "3.08", "wpb": "118233", "bsz": "256", "num_updates": "240000", "lr": "0.000767677", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "78303"} +[2022-08-01 23:14:15,290][train_inner][INFO] - {"epoch": 5, "update": 4.667, "loss": "2.335", "ppl": "5.05", "wps": "362772", "ups": "3.08", "wpb": "117805", "bsz": "256", "num_updates": "240200", "lr": "0.000767475", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "78368"} +[2022-08-01 23:15:20,381][train_inner][INFO] - {"epoch": 5, "update": 4.671, "loss": "2.328", "ppl": "5.02", "wps": "365480", "ups": "3.07", "wpb": "118946", "bsz": "256", "num_updates": "240400", "lr": "0.000767273", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "78433"} +[2022-08-01 23:16:24,868][train_inner][INFO] - {"epoch": 5, "update": 4.675, "loss": "2.327", "ppl": "5.02", "wps": "366039", "ups": "3.1", "wpb": "118023", "bsz": "256", "num_updates": "240600", "lr": "0.000767071", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "78497"} +[2022-08-01 23:17:29,493][train_inner][INFO] - {"epoch": 5, "update": 4.679, "loss": "2.324", "ppl": "5.01", "wps": "364466", "ups": "3.09", "wpb": "117766", "bsz": "256", "num_updates": "240800", "lr": "0.000766869", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "78562"} +[2022-08-01 23:18:34,477][train_inner][INFO] - {"epoch": 5, "update": 4.683, "loss": "2.33", "ppl": "5.03", "wps": "363308", "ups": "3.08", "wpb": "118044", "bsz": "256", "num_updates": "241000", "lr": "0.000766667", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "78627"} +[2022-08-01 23:19:39,590][train_inner][INFO] - {"epoch": 5, "update": 4.686, "loss": "2.33", "ppl": "5.03", "wps": "361820", "ups": "3.07", "wpb": "117795", "bsz": "256", "num_updates": "241200", "lr": "0.000766465", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "78692"} +[2022-08-01 23:20:43,866][train_inner][INFO] - {"epoch": 5, "update": 4.69, "loss": "2.323", "ppl": "5.01", "wps": "368331", "ups": "3.11", "wpb": "118373", "bsz": "256", "num_updates": "241400", "lr": "0.000766263", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "78756"} +[2022-08-01 23:21:48,927][train_inner][INFO] - {"epoch": 5, "update": 4.694, "loss": "2.323", "ppl": "5", "wps": "366382", "ups": "3.07", "wpb": "119183", "bsz": "256", "num_updates": "241600", "lr": "0.000766061", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.5", "wall": "78821"} +[2022-08-01 23:22:54,843][train_inner][INFO] - {"epoch": 5, "update": 4.698, "loss": "2.322", "ppl": "5", "wps": "360946", "ups": "3.03", "wpb": "118959", "bsz": "256", "num_updates": "241800", "lr": "0.000765859", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.6", "wall": "78887"} +[2022-08-01 23:23:59,620][train_inner][INFO] - {"epoch": 5, "update": 4.702, "loss": "2.328", "ppl": "5.02", "wps": "365960", "ups": "3.09", "wpb": "118527", "bsz": "256", "num_updates": "242000", "lr": "0.000765657", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "78952"} +[2022-08-01 23:25:04,750][train_inner][INFO] - {"epoch": 5, "update": 4.706, "loss": "2.326", "ppl": "5.01", "wps": "364192", "ups": "3.07", "wpb": "118598", "bsz": "256", "num_updates": "242200", "lr": "0.000765455", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.5", "wall": "79017"} +[2022-08-01 23:25:06,302][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-01 23:25:53,783][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 23:26:09,995][train_inner][INFO] - {"epoch": 5, "update": 4.71, "loss": "2.326", "ppl": "5.01", "wps": "362236", "ups": "3.07", "wpb": "118168", "bsz": "256", "num_updates": "242400", "lr": "0.000765253", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "79082"} +[2022-08-01 23:27:14,707][train_inner][INFO] - {"epoch": 5, "update": 4.714, "loss": "2.328", "ppl": "5.02", "wps": "366599", "ups": "3.09", "wpb": "118617", "bsz": "256", "num_updates": "242600", "lr": "0.000765051", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "79147"} +[2022-08-01 23:28:19,630][train_inner][INFO] - {"epoch": 5, "update": 4.718, "loss": "2.333", "ppl": "5.04", "wps": "364556", "ups": "3.08", "wpb": "118338", "bsz": "256", "num_updates": "242800", "lr": "0.000764848", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "79212"} +[2022-08-01 23:29:24,728][train_inner][INFO] - {"epoch": 5, "update": 4.721, "loss": "2.334", "ppl": "5.04", "wps": "362516", "ups": "3.07", "wpb": "117994", "bsz": "256", "num_updates": "243000", "lr": "0.000764646", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "79277"} +[2022-08-01 23:30:29,558][train_inner][INFO] - {"epoch": 5, "update": 4.725, "loss": "2.328", "ppl": "5.02", "wps": "364876", "ups": "3.09", "wpb": "118272", "bsz": "256", "num_updates": "243200", "lr": "0.000764444", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "79342"} +[2022-08-01 23:31:34,414][train_inner][INFO] - {"epoch": 5, "update": 4.729, "loss": "2.322", "ppl": "5", "wps": "365770", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "243400", "lr": "0.000764242", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "79407"} +[2022-08-01 23:32:39,213][train_inner][INFO] - {"epoch": 5, "update": 4.733, "loss": "2.33", "ppl": "5.03", "wps": "366254", "ups": "3.09", "wpb": "118664", "bsz": "256", "num_updates": "243600", "lr": "0.00076404", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.9", "wall": "79472"} +[2022-08-01 23:33:43,846][train_inner][INFO] - {"epoch": 5, "update": 4.737, "loss": "2.334", "ppl": "5.04", "wps": "362649", "ups": "3.09", "wpb": "117193", "bsz": "256", "num_updates": "243800", "lr": "0.000763838", "gnorm": "0.684", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.1", "wall": "79536"} +[2022-08-01 23:34:48,654][train_inner][INFO] - {"epoch": 5, "update": 4.741, "loss": "2.324", "ppl": "5.01", "wps": "364802", "ups": "3.09", "wpb": "118207", "bsz": "256", "num_updates": "244000", "lr": "0.000763636", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21", "wall": "79601"} +[2022-08-01 23:35:53,959][train_inner][INFO] - {"epoch": 5, "update": 4.745, "loss": "2.331", "ppl": "5.03", "wps": "361623", "ups": "3.06", "wpb": "118078", "bsz": "256", "num_updates": "244200", "lr": "0.000763434", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.2", "wall": "79666"} +[2022-08-01 23:36:59,191][train_inner][INFO] - {"epoch": 5, "update": 4.749, "loss": "2.321", "ppl": "5", "wps": "363649", "ups": "3.07", "wpb": "118606", "bsz": "256", "num_updates": "244400", "lr": "0.000763232", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "79732"} +[2022-08-01 23:38:04,250][train_inner][INFO] - {"epoch": 5, "update": 4.753, "loss": "2.319", "ppl": "4.99", "wps": "363740", "ups": "3.07", "wpb": "118320", "bsz": "256", "num_updates": "244600", "lr": "0.00076303", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.5", "wall": "79797"} +[2022-08-01 23:39:09,101][train_inner][INFO] - {"epoch": 5, "update": 4.756, "loss": "2.326", "ppl": "5.02", "wps": "362599", "ups": "3.08", "wpb": "117573", "bsz": "256", "num_updates": "244800", "lr": "0.000762828", "gnorm": "0.696", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "79861"} +[2022-08-01 23:40:14,049][train_inner][INFO] - {"epoch": 5, "update": 4.76, "loss": "2.325", "ppl": "5.01", "wps": "363820", "ups": "3.08", "wpb": "118146", "bsz": "256", "num_updates": "245000", "lr": "0.000762626", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "79926"} +[2022-08-01 23:41:19,077][train_inner][INFO] - {"epoch": 5, "update": 4.764, "loss": "2.331", "ppl": "5.03", "wps": "363110", "ups": "3.08", "wpb": "118058", "bsz": "256", "num_updates": "245200", "lr": "0.000762424", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.9", "wall": "79991"} +[2022-08-01 23:42:24,074][train_inner][INFO] - {"epoch": 5, "update": 4.768, "loss": "2.329", "ppl": "5.03", "wps": "363338", "ups": "3.08", "wpb": "118078", "bsz": "256", "num_updates": "245400", "lr": "0.000762222", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "80056"} +[2022-08-01 23:43:28,852][train_inner][INFO] - {"epoch": 5, "update": 4.772, "loss": "2.329", "ppl": "5.03", "wps": "364681", "ups": "3.09", "wpb": "118115", "bsz": "256", "num_updates": "245600", "lr": "0.00076202", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "80121"} +[2022-08-01 23:44:33,918][train_inner][INFO] - {"epoch": 5, "update": 4.776, "loss": "2.327", "ppl": "5.02", "wps": "363604", "ups": "3.07", "wpb": "118290", "bsz": "256", "num_updates": "245800", "lr": "0.000761818", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "80186"} +[2022-08-01 23:45:00,349][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-01 23:45:39,544][train_inner][INFO] - {"epoch": 5, "update": 4.78, "loss": "2.318", "ppl": "4.99", "wps": "360854", "ups": "3.05", "wpb": "118403", "bsz": "256", "num_updates": "246000", "lr": "0.000761616", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.9", "wall": "80252"} +[2022-08-01 23:46:44,757][train_inner][INFO] - {"epoch": 5, "update": 4.784, "loss": "2.326", "ppl": "5.01", "wps": "363224", "ups": "3.07", "wpb": "118434", "bsz": "256", "num_updates": "246200", "lr": "0.000761414", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "80317"} +[2022-08-01 23:47:49,513][train_inner][INFO] - {"epoch": 5, "update": 4.787, "loss": "2.326", "ppl": "5.01", "wps": "366038", "ups": "3.09", "wpb": "118514", "bsz": "256", "num_updates": "246400", "lr": "0.000761212", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "80382"} +[2022-08-01 23:48:54,477][train_inner][INFO] - {"epoch": 5, "update": 4.791, "loss": "2.317", "ppl": "4.98", "wps": "366148", "ups": "3.08", "wpb": "118930", "bsz": "256", "num_updates": "246600", "lr": "0.00076101", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "80447"} +[2022-08-01 23:49:59,289][train_inner][INFO] - {"epoch": 5, "update": 4.795, "loss": "2.328", "ppl": "5.02", "wps": "365284", "ups": "3.09", "wpb": "118372", "bsz": "256", "num_updates": "246800", "lr": "0.000760808", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.3", "wall": "80512"} +[2022-08-01 23:51:03,878][train_inner][INFO] - {"epoch": 5, "update": 4.799, "loss": "2.324", "ppl": "5.01", "wps": "366032", "ups": "3.1", "wpb": "118207", "bsz": "256", "num_updates": "247000", "lr": "0.000760606", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.6", "wall": "80576"} +[2022-08-01 23:52:08,782][train_inner][INFO] - {"epoch": 5, "update": 4.803, "loss": "2.321", "ppl": "5", "wps": "363846", "ups": "3.08", "wpb": "118073", "bsz": "256", "num_updates": "247200", "lr": "0.000760404", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "80641"} +[2022-08-01 23:53:14,229][train_inner][INFO] - {"epoch": 5, "update": 4.807, "loss": "2.32", "ppl": "4.99", "wps": "362256", "ups": "3.06", "wpb": "118541", "bsz": "256", "num_updates": "247400", "lr": "0.000760202", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "80707"} +[2022-08-01 23:54:20,091][train_inner][INFO] - {"epoch": 5, "update": 4.811, "loss": "2.321", "ppl": "5", "wps": "360176", "ups": "3.04", "wpb": "118608", "bsz": "256", "num_updates": "247600", "lr": "0.00076", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.5", "wall": "80772"} +[2022-08-01 23:55:25,023][train_inner][INFO] - {"epoch": 5, "update": 4.815, "loss": "2.33", "ppl": "5.03", "wps": "362323", "ups": "3.08", "wpb": "117629", "bsz": "256", "num_updates": "247800", "lr": "0.000759798", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "80837"} +[2022-08-01 23:56:30,054][train_inner][INFO] - {"epoch": 5, "update": 4.819, "loss": "2.326", "ppl": "5.01", "wps": "363716", "ups": "3.08", "wpb": "118262", "bsz": "256", "num_updates": "248000", "lr": "0.000759596", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "80902"} +[2022-08-01 23:57:34,943][train_inner][INFO] - {"epoch": 5, "update": 4.822, "loss": "2.321", "ppl": "5", "wps": "364976", "ups": "3.08", "wpb": "118412", "bsz": "256", "num_updates": "248200", "lr": "0.000759394", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "80967"} +[2022-08-01 23:58:39,849][train_inner][INFO] - {"epoch": 5, "update": 4.826, "loss": "2.319", "ppl": "4.99", "wps": "364718", "ups": "3.08", "wpb": "118361", "bsz": "256", "num_updates": "248400", "lr": "0.000759192", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "81032"} +[2022-08-01 23:59:44,992][train_inner][INFO] - {"epoch": 5, "update": 4.83, "loss": "2.327", "ppl": "5.02", "wps": "361627", "ups": "3.07", "wpb": "117785", "bsz": "256", "num_updates": "248600", "lr": "0.00075899", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "81097"} +[2022-08-02 00:00:49,225][train_inner][INFO] - {"epoch": 5, "update": 4.834, "loss": "2.321", "ppl": "5", "wps": "369719", "ups": "3.11", "wpb": "118739", "bsz": "256", "num_updates": "248800", "lr": "0.000758788", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "81162"} +[2022-08-02 00:01:54,144][train_inner][INFO] - {"epoch": 5, "update": 4.838, "loss": "2.32", "ppl": "4.99", "wps": "364595", "ups": "3.08", "wpb": "118344", "bsz": "256", "num_updates": "249000", "lr": "0.000758586", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "81226"} +[2022-08-02 00:02:58,979][train_inner][INFO] - {"epoch": 5, "update": 4.842, "loss": "2.323", "ppl": "5", "wps": "365298", "ups": "3.08", "wpb": "118418", "bsz": "256", "num_updates": "249200", "lr": "0.000758384", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "81291"} +[2022-08-02 00:04:03,760][train_inner][INFO] - {"epoch": 5, "update": 4.846, "loss": "2.331", "ppl": "5.03", "wps": "364536", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "249400", "lr": "0.000758182", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "81356"} +[2022-08-02 00:05:08,912][train_inner][INFO] - {"epoch": 5, "update": 4.85, "loss": "2.315", "ppl": "4.98", "wps": "365097", "ups": "3.07", "wpb": "118932", "bsz": "256", "num_updates": "249600", "lr": "0.00075798", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "81421"} +[2022-08-02 00:06:13,732][train_inner][INFO] - {"epoch": 5, "update": 4.854, "loss": "2.322", "ppl": "5", "wps": "364946", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "249800", "lr": "0.000757778", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23", "wall": "81486"} +[2022-08-02 00:07:18,412][train_inner][INFO] - {"epoch": 5, "update": 4.857, "loss": "2.321", "ppl": "5", "wps": "365201", "ups": "3.09", "wpb": "118103", "bsz": "256", "num_updates": "250000", "lr": "0.000757576", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "81551"} +[2022-08-02 00:07:18,413][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 00:07:40,991][valid][INFO] - {"epoch": 5, "valid_loss": "2.223", "valid_ppl": "4.67", "valid_wps": "1.61597e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "250000", "valid_best_loss": "2.223"} +[2022-08-02 00:07:40,994][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 5 @ 250000 updates +[2022-08-02 00:07:40,994][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_5_250000.pt +[2022-08-02 00:07:55,779][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_5_250000.pt +[2022-08-02 00:08:23,482][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_5_250000.pt (epoch 5 @ 250000 updates, score 2.223) (writing took 42.48845534957945 seconds) +[2022-08-02 00:09:28,051][train_inner][INFO] - {"epoch": 5, "update": 4.861, "loss": "2.319", "ppl": "4.99", "wps": "182658", "ups": "1.54", "wpb": "118398", "bsz": "256", "num_updates": "250200", "lr": "0.000757374", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "81680"} +[2022-08-02 00:10:32,855][train_inner][INFO] - {"epoch": 5, "update": 4.865, "loss": "2.323", "ppl": "5", "wps": "365916", "ups": "3.09", "wpb": "118561", "bsz": "256", "num_updates": "250400", "lr": "0.000757172", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "81745"} +[2022-08-02 00:11:38,990][train_inner][INFO] - {"epoch": 5, "update": 4.869, "loss": "2.327", "ppl": "5.02", "wps": "356789", "ups": "3.02", "wpb": "117979", "bsz": "256", "num_updates": "250600", "lr": "0.00075697", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.7", "wall": "81811"} +[2022-08-02 00:12:43,747][train_inner][INFO] - {"epoch": 5, "update": 4.873, "loss": "2.317", "ppl": "4.98", "wps": "366654", "ups": "3.09", "wpb": "118716", "bsz": "256", "num_updates": "250800", "lr": "0.000756768", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "81876"} +[2022-08-02 00:13:48,661][train_inner][INFO] - {"epoch": 5, "update": 4.877, "loss": "2.32", "ppl": "4.99", "wps": "364284", "ups": "3.08", "wpb": "118232", "bsz": "256", "num_updates": "251000", "lr": "0.000756566", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "81941"} +[2022-08-02 00:14:53,447][train_inner][INFO] - {"epoch": 5, "update": 4.881, "loss": "2.323", "ppl": "5", "wps": "364950", "ups": "3.09", "wpb": "118214", "bsz": "256", "num_updates": "251200", "lr": "0.000756364", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "82006"} +[2022-08-02 00:15:58,160][train_inner][INFO] - {"epoch": 5, "update": 4.885, "loss": "2.326", "ppl": "5.01", "wps": "366101", "ups": "3.09", "wpb": "118445", "bsz": "256", "num_updates": "251400", "lr": "0.000756162", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.5", "wall": "82070"} +[2022-08-02 00:17:02,887][train_inner][INFO] - {"epoch": 5, "update": 4.888, "loss": "2.323", "ppl": "5", "wps": "364190", "ups": "3.09", "wpb": "117864", "bsz": "256", "num_updates": "251600", "lr": "0.00075596", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "82135"} +[2022-08-02 00:18:07,998][train_inner][INFO] - {"epoch": 5, "update": 4.892, "loss": "2.315", "ppl": "4.98", "wps": "363739", "ups": "3.07", "wpb": "118414", "bsz": "256", "num_updates": "251800", "lr": "0.000755758", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "82200"} +[2022-08-02 00:19:12,898][train_inner][INFO] - {"epoch": 5, "update": 4.896, "loss": "2.321", "ppl": "5", "wps": "363160", "ups": "3.08", "wpb": "117844", "bsz": "256", "num_updates": "252000", "lr": "0.000755556", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "82265"} +[2022-08-02 00:20:17,457][train_inner][INFO] - {"epoch": 5, "update": 4.9, "loss": "2.318", "ppl": "4.98", "wps": "363360", "ups": "3.1", "wpb": "117289", "bsz": "256", "num_updates": "252200", "lr": "0.000755354", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "82330"} +[2022-08-02 00:21:23,085][train_inner][INFO] - {"epoch": 5, "update": 4.904, "loss": "2.32", "ppl": "4.99", "wps": "359546", "ups": "3.05", "wpb": "117979", "bsz": "256", "num_updates": "252400", "lr": "0.000755152", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "82395"} +[2022-08-02 00:22:27,837][train_inner][INFO] - {"epoch": 5, "update": 4.908, "loss": "2.33", "ppl": "5.03", "wps": "365497", "ups": "3.09", "wpb": "118325", "bsz": "256", "num_updates": "252600", "lr": "0.000754949", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "82460"} +[2022-08-02 00:23:32,951][train_inner][INFO] - {"epoch": 5, "update": 4.912, "loss": "2.316", "ppl": "4.98", "wps": "363266", "ups": "3.07", "wpb": "118266", "bsz": "256", "num_updates": "252800", "lr": "0.000754747", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "82525"} +[2022-08-02 00:24:37,652][train_inner][INFO] - {"epoch": 5, "update": 4.916, "loss": "2.32", "ppl": "4.99", "wps": "366103", "ups": "3.09", "wpb": "118434", "bsz": "256", "num_updates": "253000", "lr": "0.000754545", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "82590"} +[2022-08-02 00:25:42,067][train_inner][INFO] - {"epoch": 5, "update": 4.92, "loss": "2.319", "ppl": "4.99", "wps": "366091", "ups": "3.1", "wpb": "117908", "bsz": "256", "num_updates": "253200", "lr": "0.000754343", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "82654"} +[2022-08-02 00:26:47,027][train_inner][INFO] - {"epoch": 5, "update": 4.923, "loss": "2.319", "ppl": "4.99", "wps": "364898", "ups": "3.08", "wpb": "118517", "bsz": "256", "num_updates": "253400", "lr": "0.000754141", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.8", "wall": "82719"} +[2022-08-02 00:27:52,213][train_inner][INFO] - {"epoch": 5, "update": 4.927, "loss": "2.319", "ppl": "4.99", "wps": "362722", "ups": "3.07", "wpb": "118220", "bsz": "256", "num_updates": "253600", "lr": "0.000753939", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "82785"} +[2022-08-02 00:28:57,485][train_inner][INFO] - {"epoch": 5, "update": 4.931, "loss": "2.312", "ppl": "4.97", "wps": "362342", "ups": "3.06", "wpb": "118251", "bsz": "256", "num_updates": "253800", "lr": "0.000753737", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "82850"} +[2022-08-02 00:30:02,279][train_inner][INFO] - {"epoch": 5, "update": 4.935, "loss": "2.321", "ppl": "5", "wps": "364979", "ups": "3.09", "wpb": "118241", "bsz": "256", "num_updates": "254000", "lr": "0.000753535", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "82915"} +[2022-08-02 00:30:39,976][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 00:31:07,674][train_inner][INFO] - {"epoch": 5, "update": 4.939, "loss": "2.316", "ppl": "4.98", "wps": "363690", "ups": "3.06", "wpb": "118916", "bsz": "256", "num_updates": "254200", "lr": "0.000753333", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "82980"} +[2022-08-02 00:32:13,264][train_inner][INFO] - {"epoch": 5, "update": 4.943, "loss": "2.318", "ppl": "4.99", "wps": "360756", "ups": "3.05", "wpb": "118308", "bsz": "256", "num_updates": "254400", "lr": "0.000753131", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "83046"} +[2022-08-02 00:32:22,313][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 00:33:18,270][train_inner][INFO] - {"epoch": 5, "update": 4.947, "loss": "2.319", "ppl": "4.99", "wps": "364283", "ups": "3.08", "wpb": "118401", "bsz": "256", "num_updates": "254600", "lr": "0.000752929", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "83111"} +[2022-08-02 00:34:23,547][train_inner][INFO] - {"epoch": 5, "update": 4.951, "loss": "2.32", "ppl": "4.99", "wps": "361784", "ups": "3.06", "wpb": "118081", "bsz": "256", "num_updates": "254800", "lr": "0.000752727", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "83176"} +[2022-08-02 00:35:28,252][train_inner][INFO] - {"epoch": 5, "update": 4.955, "loss": "2.323", "ppl": "5", "wps": "365148", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "255000", "lr": "0.000752525", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "83241"} +[2022-08-02 00:36:32,681][train_inner][INFO] - {"epoch": 5, "update": 4.958, "loss": "2.324", "ppl": "5.01", "wps": "366312", "ups": "3.1", "wpb": "118004", "bsz": "256", "num_updates": "255200", "lr": "0.000752323", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "83305"} +[2022-08-02 00:37:37,452][train_inner][INFO] - {"epoch": 5, "update": 4.962, "loss": "2.321", "ppl": "5", "wps": "366263", "ups": "3.09", "wpb": "118614", "bsz": "256", "num_updates": "255400", "lr": "0.000752121", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "83370"} +[2022-08-02 00:38:42,438][train_inner][INFO] - {"epoch": 5, "update": 4.966, "loss": "2.311", "ppl": "4.96", "wps": "364052", "ups": "3.08", "wpb": "118290", "bsz": "256", "num_updates": "255600", "lr": "0.000751919", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "83435"} +[2022-08-02 00:39:47,170][train_inner][INFO] - {"epoch": 5, "update": 4.97, "loss": "2.318", "ppl": "4.99", "wps": "362062", "ups": "3.09", "wpb": "117183", "bsz": "256", "num_updates": "255800", "lr": "0.000751717", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.1", "wall": "83499"} +[2022-08-02 00:40:52,265][train_inner][INFO] - {"epoch": 5, "update": 4.974, "loss": "2.314", "ppl": "4.97", "wps": "365103", "ups": "3.07", "wpb": "118831", "bsz": "256", "num_updates": "256000", "lr": "0.000751515", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "83565"} +[2022-08-02 00:41:57,257][train_inner][INFO] - {"epoch": 5, "update": 4.978, "loss": "2.319", "ppl": "4.99", "wps": "364242", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "256200", "lr": "0.000751313", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "83630"} +[2022-08-02 00:43:02,267][train_inner][INFO] - {"epoch": 5, "update": 4.982, "loss": "2.318", "ppl": "4.99", "wps": "364856", "ups": "3.08", "wpb": "118594", "bsz": "256", "num_updates": "256400", "lr": "0.000751111", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "83695"} +[2022-08-02 00:44:06,826][train_inner][INFO] - {"epoch": 5, "update": 4.986, "loss": "2.323", "ppl": "5", "wps": "365493", "ups": "3.1", "wpb": "117978", "bsz": "256", "num_updates": "256600", "lr": "0.000750909", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "83759"} +[2022-08-02 00:44:55,969][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 00:45:11,980][train_inner][INFO] - {"epoch": 5, "update": 4.99, "loss": "2.324", "ppl": "5.01", "wps": "363439", "ups": "3.07", "wpb": "118394", "bsz": "256", "num_updates": "256800", "lr": "0.000750707", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "83824"} +[2022-08-02 00:46:16,814][train_inner][INFO] - {"epoch": 5, "update": 4.993, "loss": "2.319", "ppl": "4.99", "wps": "364520", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "257000", "lr": "0.000750505", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "83889"} +[2022-08-02 00:47:21,868][train_inner][INFO] - {"epoch": 5, "update": 4.997, "loss": "2.317", "ppl": "4.98", "wps": "364231", "ups": "3.07", "wpb": "118470", "bsz": "256", "num_updates": "257200", "lr": "0.000750303", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "83954"} +[2022-08-02 00:48:07,828][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 00:48:30,655][valid][INFO] - {"epoch": 5, "valid_loss": "2.218", "valid_ppl": "4.65", "valid_wps": "1.61793e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "257339", "valid_best_loss": "2.218"} +[2022-08-02 00:48:30,657][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 5 @ 257339 updates +[2022-08-02 00:48:30,658][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt +[2022-08-02 00:48:42,410][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt +[2022-08-02 00:49:01,671][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_best.pt (epoch 5 @ 257339 updates, score 2.218) (writing took 31.013541775755584 seconds) +[2022-08-02 00:49:01,671][fairseq_cli.train][INFO] - end of epoch 5 (average epoch stats below) +[2022-08-02 00:49:01,672][train][INFO] - {"epoch": 5, "train_loss": "2.338", "train_ppl": "5.06", "train_wps": "361380", "train_ups": "3.05", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "257339", "train_lr": "0.000750163", "train_gnorm": "0.685", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16627", "train_gb_free": "20.7", "train_wall": "84054"} +[2022-08-02 00:49:01,783][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 00:49:01,786][fairseq.trainer][INFO] - begin training epoch 6 +[2022-08-02 00:49:01,787][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 00:49:31,142][train_inner][INFO] - {"epoch": 6, "update": 5.001, "loss": "2.309", "ppl": "4.96", "wps": "182371", "ups": "1.55", "wpb": "117879", "bsz": "255.4", "num_updates": "257400", "lr": "0.000750101", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.8", "wall": "84083"} +[2022-08-02 00:50:36,165][train_inner][INFO] - {"epoch": 6, "update": 5.005, "loss": "2.315", "ppl": "4.98", "wps": "363794", "ups": "3.08", "wpb": "118274", "bsz": "256", "num_updates": "257600", "lr": "0.000749899", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "84148"} +[2022-08-02 00:51:40,769][train_inner][INFO] - {"epoch": 6, "update": 5.009, "loss": "2.314", "ppl": "4.97", "wps": "365924", "ups": "3.1", "wpb": "118198", "bsz": "256", "num_updates": "257800", "lr": "0.000749697", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "84213"} +[2022-08-02 00:52:45,668][train_inner][INFO] - {"epoch": 6, "update": 5.013, "loss": "2.311", "ppl": "4.96", "wps": "365611", "ups": "3.08", "wpb": "118637", "bsz": "256", "num_updates": "258000", "lr": "0.000749495", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "84278"} +[2022-08-02 00:53:51,603][train_inner][INFO] - {"epoch": 6, "update": 5.017, "loss": "2.31", "ppl": "4.96", "wps": "359883", "ups": "3.03", "wpb": "118643", "bsz": "256", "num_updates": "258200", "lr": "0.000749293", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.3", "wall": "84344"} +[2022-08-02 00:54:57,350][train_inner][INFO] - {"epoch": 6, "update": 5.021, "loss": "2.32", "ppl": "4.99", "wps": "359358", "ups": "3.04", "wpb": "118132", "bsz": "256", "num_updates": "258400", "lr": "0.000749091", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "84410"} +[2022-08-02 00:56:02,098][train_inner][INFO] - {"epoch": 6, "update": 5.024, "loss": "2.316", "ppl": "4.98", "wps": "364207", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "258600", "lr": "0.000748889", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "84474"} +[2022-08-02 00:57:06,864][train_inner][INFO] - {"epoch": 6, "update": 5.028, "loss": "2.318", "ppl": "4.99", "wps": "362007", "ups": "3.09", "wpb": "117228", "bsz": "256", "num_updates": "258800", "lr": "0.000748687", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "84539"} +[2022-08-02 00:58:11,886][train_inner][INFO] - {"epoch": 6, "update": 5.032, "loss": "2.31", "ppl": "4.96", "wps": "365670", "ups": "3.08", "wpb": "118881", "bsz": "256", "num_updates": "259000", "lr": "0.000748485", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "84604"} +[2022-08-02 00:59:16,227][train_inner][INFO] - {"epoch": 6, "update": 5.036, "loss": "2.307", "ppl": "4.95", "wps": "369115", "ups": "3.11", "wpb": "118744", "bsz": "256", "num_updates": "259200", "lr": "0.000748283", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "84669"} +[2022-08-02 01:00:21,097][train_inner][INFO] - {"epoch": 6, "update": 5.04, "loss": "2.31", "ppl": "4.96", "wps": "365846", "ups": "3.08", "wpb": "118660", "bsz": "256", "num_updates": "259400", "lr": "0.000748081", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "84733"} +[2022-08-02 01:01:25,587][train_inner][INFO] - {"epoch": 6, "update": 5.044, "loss": "2.318", "ppl": "4.99", "wps": "366266", "ups": "3.1", "wpb": "118101", "bsz": "256", "num_updates": "259600", "lr": "0.000747879", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "84798"} +[2022-08-02 01:02:30,325][train_inner][INFO] - {"epoch": 6, "update": 5.048, "loss": "2.32", "ppl": "4.99", "wps": "364778", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "259800", "lr": "0.000747677", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "84863"} +[2022-08-02 01:03:35,278][train_inner][INFO] - {"epoch": 6, "update": 5.052, "loss": "2.309", "ppl": "4.96", "wps": "364883", "ups": "3.08", "wpb": "118499", "bsz": "256", "num_updates": "260000", "lr": "0.000747475", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "84928"} +[2022-08-02 01:04:39,982][train_inner][INFO] - {"epoch": 6, "update": 5.056, "loss": "2.307", "ppl": "4.95", "wps": "367270", "ups": "3.09", "wpb": "118816", "bsz": "256", "num_updates": "260200", "lr": "0.000747273", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "84992"} +[2022-08-02 01:05:44,585][train_inner][INFO] - {"epoch": 6, "update": 5.059, "loss": "2.305", "ppl": "4.94", "wps": "368106", "ups": "3.1", "wpb": "118902", "bsz": "256", "num_updates": "260400", "lr": "0.000747071", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "85057"} +[2022-08-02 01:06:37,358][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 01:06:49,756][train_inner][INFO] - {"epoch": 6, "update": 5.063, "loss": "2.32", "ppl": "4.99", "wps": "362312", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "260600", "lr": "0.000746869", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "85122"} +[2022-08-02 01:07:54,231][train_inner][INFO] - {"epoch": 6, "update": 5.067, "loss": "2.309", "ppl": "4.95", "wps": "366348", "ups": "3.1", "wpb": "118100", "bsz": "256", "num_updates": "260800", "lr": "0.000746667", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "85187"} +[2022-08-02 01:08:59,226][train_inner][INFO] - {"epoch": 6, "update": 5.071, "loss": "2.305", "ppl": "4.94", "wps": "366091", "ups": "3.08", "wpb": "118969", "bsz": "256", "num_updates": "261000", "lr": "0.000746465", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "85252"} +[2022-08-02 01:10:04,214][train_inner][INFO] - {"epoch": 6, "update": 5.075, "loss": "2.308", "ppl": "4.95", "wps": "363850", "ups": "3.08", "wpb": "118227", "bsz": "256", "num_updates": "261200", "lr": "0.000746263", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.2", "wall": "85317"} +[2022-08-02 01:11:09,000][train_inner][INFO] - {"epoch": 6, "update": 5.079, "loss": "2.312", "ppl": "4.97", "wps": "366292", "ups": "3.09", "wpb": "118651", "bsz": "256", "num_updates": "261400", "lr": "0.000746061", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.1", "wall": "85381"} +[2022-08-02 01:12:13,663][train_inner][INFO] - {"epoch": 6, "update": 5.083, "loss": "2.309", "ppl": "4.95", "wps": "365662", "ups": "3.09", "wpb": "118222", "bsz": "256", "num_updates": "261600", "lr": "0.000745859", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "85446"} +[2022-08-02 01:13:18,093][train_inner][INFO] - {"epoch": 6, "update": 5.087, "loss": "2.313", "ppl": "4.97", "wps": "366371", "ups": "3.1", "wpb": "118024", "bsz": "256", "num_updates": "261800", "lr": "0.000745657", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "85510"} +[2022-08-02 01:14:11,128][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 01:14:22,992][train_inner][INFO] - {"epoch": 6, "update": 5.091, "loss": "2.31", "ppl": "4.96", "wps": "364857", "ups": "3.08", "wpb": "118392", "bsz": "256", "num_updates": "262000", "lr": "0.000745455", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.9", "wall": "85575"} +[2022-08-02 01:14:32,642][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 01:14:32,941][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 01:14:33,236][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 01:15:28,690][train_inner][INFO] - {"epoch": 6, "update": 5.095, "loss": "2.312", "ppl": "4.97", "wps": "360879", "ups": "3.04", "wpb": "118544", "bsz": "256", "num_updates": "262200", "lr": "0.000745253", "gnorm": "0.734", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "85641"} +[2022-08-02 01:16:33,707][train_inner][INFO] - {"epoch": 6, "update": 5.098, "loss": "2.314", "ppl": "4.97", "wps": "364336", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "262400", "lr": "0.000745051", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "25.8", "wall": "85706"} +[2022-08-02 01:17:38,722][train_inner][INFO] - {"epoch": 6, "update": 5.102, "loss": "2.314", "ppl": "4.97", "wps": "362931", "ups": "3.08", "wpb": "117978", "bsz": "256", "num_updates": "262600", "lr": "0.000744848", "gnorm": "0.742", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.7", "wall": "85771"} +[2022-08-02 01:18:43,825][train_inner][INFO] - {"epoch": 6, "update": 5.106, "loss": "2.309", "ppl": "4.96", "wps": "362281", "ups": "3.07", "wpb": "117925", "bsz": "256", "num_updates": "262800", "lr": "0.000744646", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.2", "wall": "85836"} +[2022-08-02 01:19:48,658][train_inner][INFO] - {"epoch": 6, "update": 5.11, "loss": "2.309", "ppl": "4.96", "wps": "364046", "ups": "3.08", "wpb": "118011", "bsz": "256", "num_updates": "263000", "lr": "0.000744444", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.1", "wall": "85901"} +[2022-08-02 01:20:53,420][train_inner][INFO] - {"epoch": 6, "update": 5.114, "loss": "2.309", "ppl": "4.96", "wps": "364665", "ups": "3.09", "wpb": "118080", "bsz": "256", "num_updates": "263200", "lr": "0.000744242", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.3", "wall": "85966"} +[2022-08-02 01:21:51,923][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-02 01:21:58,184][train_inner][INFO] - {"epoch": 6, "update": 5.118, "loss": "2.355", "ppl": "5.12", "wps": "363249", "ups": "3.09", "wpb": "117625", "bsz": "256", "num_updates": "263400", "lr": "0.00074404", "gnorm": "1.675", "clip": "2.5", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.5", "wall": "86031"} +[2022-08-02 01:23:03,061][train_inner][INFO] - {"epoch": 6, "update": 5.122, "loss": "2.315", "ppl": "4.98", "wps": "362232", "ups": "3.08", "wpb": "117502", "bsz": "256", "num_updates": "263600", "lr": "0.000743838", "gnorm": "0.724", "clip": "0.5", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.8", "wall": "86095"} +[2022-08-02 01:23:58,653][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 +[2022-08-02 01:24:08,000][train_inner][INFO] - {"epoch": 6, "update": 5.126, "loss": "2.351", "ppl": "5.1", "wps": "364698", "ups": "3.08", "wpb": "118413", "bsz": "256", "num_updates": "263800", "lr": "0.000743636", "gnorm": "1.227", "clip": "3", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.9", "wall": "86160"} +[2022-08-02 01:25:12,828][train_inner][INFO] - {"epoch": 6, "update": 5.13, "loss": "2.314", "ppl": "4.97", "wps": "364541", "ups": "3.09", "wpb": "118163", "bsz": "256", "num_updates": "264000", "lr": "0.000743434", "gnorm": "0.675", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.2", "wall": "86225"} +[2022-08-02 01:26:17,624][train_inner][INFO] - {"epoch": 6, "update": 5.133, "loss": "2.314", "ppl": "4.97", "wps": "364036", "ups": "3.09", "wpb": "117938", "bsz": "256", "num_updates": "264200", "lr": "0.000743232", "gnorm": "0.701", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "19.9", "wall": "86290"} +[2022-08-02 01:27:22,314][train_inner][INFO] - {"epoch": 6, "update": 5.137, "loss": "2.314", "ppl": "4.97", "wps": "365451", "ups": "3.09", "wpb": "118203", "bsz": "256", "num_updates": "264400", "lr": "0.00074303", "gnorm": "0.675", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.6", "wall": "86355"} +[2022-08-02 01:28:27,198][train_inner][INFO] - {"epoch": 6, "update": 5.141, "loss": "2.32", "ppl": "4.99", "wps": "362560", "ups": "3.08", "wpb": "117619", "bsz": "256", "num_updates": "264600", "lr": "0.000742828", "gnorm": "0.702", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.7", "wall": "86420"} +[2022-08-02 01:29:32,435][train_inner][INFO] - {"epoch": 6, "update": 5.145, "loss": "2.309", "ppl": "4.96", "wps": "363076", "ups": "3.07", "wpb": "118428", "bsz": "256", "num_updates": "264800", "lr": "0.000742626", "gnorm": "0.675", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.5", "wall": "86485"} +[2022-08-02 01:30:18,151][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.0625 +[2022-08-02 01:30:37,552][train_inner][INFO] - {"epoch": 6, "update": 5.149, "loss": "2.323", "ppl": "5", "wps": "361908", "ups": "3.07", "wpb": "117831", "bsz": "256", "num_updates": "265000", "lr": "0.000742424", "gnorm": "1.374", "clip": "0.5", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "28", "wall": "86550"} +[2022-08-02 01:31:42,323][train_inner][INFO] - {"epoch": 6, "update": 5.153, "loss": "2.314", "ppl": "4.97", "wps": "365684", "ups": "3.09", "wpb": "118427", "bsz": "256", "num_updates": "265200", "lr": "0.000742222", "gnorm": "0.675", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "27.2", "wall": "86615"} +[2022-08-02 01:32:47,406][train_inner][INFO] - {"epoch": 6, "update": 5.157, "loss": "2.309", "ppl": "4.96", "wps": "363142", "ups": "3.07", "wpb": "118171", "bsz": "256", "num_updates": "265400", "lr": "0.00074202", "gnorm": "0.676", "clip": "0", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "19.8", "wall": "86680"} +[2022-08-02 01:33:52,220][train_inner][INFO] - {"epoch": 6, "update": 5.161, "loss": "2.314", "ppl": "4.97", "wps": "366393", "ups": "3.09", "wpb": "118735", "bsz": "256", "num_updates": "265600", "lr": "0.000741818", "gnorm": "0.688", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "21.3", "wall": "86745"} +[2022-08-02 01:34:57,123][train_inner][INFO] - {"epoch": 6, "update": 5.164, "loss": "2.313", "ppl": "4.97", "wps": "364790", "ups": "3.08", "wpb": "118377", "bsz": "256", "num_updates": "265800", "lr": "0.000741616", "gnorm": "0.679", "clip": "0", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "22.6", "wall": "86809"} +[2022-08-02 01:36:01,940][train_inner][INFO] - {"epoch": 6, "update": 5.168, "loss": "2.313", "ppl": "4.97", "wps": "366026", "ups": "3.09", "wpb": "118623", "bsz": "256", "num_updates": "266000", "lr": "0.000741414", "gnorm": "0.701", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "20.1", "wall": "86874"} +[2022-08-02 01:37:06,533][train_inner][INFO] - {"epoch": 6, "update": 5.172, "loss": "2.314", "ppl": "4.97", "wps": "366484", "ups": "3.1", "wpb": "118358", "bsz": "256", "num_updates": "266200", "lr": "0.000741212", "gnorm": "0.678", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "21.2", "wall": "86939"} +[2022-08-02 01:38:11,197][train_inner][INFO] - {"epoch": 6, "update": 5.176, "loss": "2.308", "ppl": "4.95", "wps": "366350", "ups": "3.09", "wpb": "118447", "bsz": "256", "num_updates": "266400", "lr": "0.00074101", "gnorm": "0.677", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "20.4", "wall": "87004"} +[2022-08-02 01:39:16,347][train_inner][INFO] - {"epoch": 6, "update": 5.18, "loss": "2.314", "ppl": "4.97", "wps": "362160", "ups": "3.07", "wpb": "117972", "bsz": "256", "num_updates": "266600", "lr": "0.000740808", "gnorm": "0.681", "clip": "0", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "20.3", "wall": "87069"} +[2022-08-02 01:40:21,066][train_inner][INFO] - {"epoch": 6, "update": 5.184, "loss": "2.315", "ppl": "4.98", "wps": "366006", "ups": "3.09", "wpb": "118436", "bsz": "256", "num_updates": "266800", "lr": "0.000740606", "gnorm": "0.743", "clip": "0.5", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "23.4", "wall": "87133"} +[2022-08-02 01:41:26,121][train_inner][INFO] - {"epoch": 6, "update": 5.188, "loss": "2.317", "ppl": "4.98", "wps": "362739", "ups": "3.07", "wpb": "117987", "bsz": "256", "num_updates": "267000", "lr": "0.000740404", "gnorm": "0.682", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.9", "wall": "87198"} +[2022-08-02 01:42:30,806][train_inner][INFO] - {"epoch": 6, "update": 5.192, "loss": "2.313", "ppl": "4.97", "wps": "365350", "ups": "3.09", "wpb": "118162", "bsz": "256", "num_updates": "267200", "lr": "0.000740202", "gnorm": "0.679", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.5", "wall": "87263"} +[2022-08-02 01:43:35,789][train_inner][INFO] - {"epoch": 6, "update": 5.196, "loss": "2.314", "ppl": "4.97", "wps": "362681", "ups": "3.08", "wpb": "117840", "bsz": "256", "num_updates": "267400", "lr": "0.00074", "gnorm": "0.689", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.2", "wall": "87328"} +[2022-08-02 01:44:40,364][train_inner][INFO] - {"epoch": 6, "update": 5.199, "loss": "2.315", "ppl": "4.98", "wps": "367093", "ups": "3.1", "wpb": "118524", "bsz": "256", "num_updates": "267600", "lr": "0.000739798", "gnorm": "0.692", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "23.1", "wall": "87393"} +[2022-08-02 01:45:45,430][train_inner][INFO] - {"epoch": 6, "update": 5.203, "loss": "2.31", "ppl": "4.96", "wps": "363237", "ups": "3.07", "wpb": "118169", "bsz": "256", "num_updates": "267800", "lr": "0.000739596", "gnorm": "0.678", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.8", "wall": "87458"} +[2022-08-02 01:46:51,500][train_inner][INFO] - {"epoch": 6, "update": 5.207, "loss": "2.309", "ppl": "4.96", "wps": "357714", "ups": "3.03", "wpb": "118168", "bsz": "256", "num_updates": "268000", "lr": "0.000739394", "gnorm": "0.68", "clip": "0", "loss_scale": "0.125", "train_wall": "66", "gb_free": "22.5", "wall": "87524"} +[2022-08-02 01:47:56,273][train_inner][INFO] - {"epoch": 6, "update": 5.211, "loss": "2.311", "ppl": "4.96", "wps": "363573", "ups": "3.09", "wpb": "117748", "bsz": "256", "num_updates": "268200", "lr": "0.000739192", "gnorm": "0.697", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.9", "wall": "87589"} +[2022-08-02 01:49:01,014][train_inner][INFO] - {"epoch": 6, "update": 5.215, "loss": "2.309", "ppl": "4.95", "wps": "365702", "ups": "3.09", "wpb": "118377", "bsz": "256", "num_updates": "268400", "lr": "0.00073899", "gnorm": "0.68", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.2", "wall": "87653"} +[2022-08-02 01:50:05,994][train_inner][INFO] - {"epoch": 6, "update": 5.219, "loss": "2.307", "ppl": "4.95", "wps": "365307", "ups": "3.08", "wpb": "118688", "bsz": "255.9", "num_updates": "268600", "lr": "0.000738788", "gnorm": "0.677", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.3", "wall": "87718"} +[2022-08-02 01:51:11,504][train_inner][INFO] - {"epoch": 6, "update": 5.223, "loss": "2.314", "ppl": "4.97", "wps": "360591", "ups": "3.05", "wpb": "118109", "bsz": "256", "num_updates": "268800", "lr": "0.000738586", "gnorm": "0.694", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.9", "wall": "87784"} +[2022-08-02 01:52:16,393][train_inner][INFO] - {"epoch": 6, "update": 5.227, "loss": "2.308", "ppl": "4.95", "wps": "365634", "ups": "3.08", "wpb": "118626", "bsz": "256", "num_updates": "269000", "lr": "0.000738384", "gnorm": "0.674", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "23.6", "wall": "87849"} +[2022-08-02 01:53:20,995][train_inner][INFO] - {"epoch": 6, "update": 5.231, "loss": "2.313", "ppl": "4.97", "wps": "367442", "ups": "3.1", "wpb": "118686", "bsz": "256", "num_updates": "269200", "lr": "0.000738182", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "87913"} +[2022-08-02 01:54:26,971][train_inner][INFO] - {"epoch": 6, "update": 5.234, "loss": "2.313", "ppl": "4.97", "wps": "357884", "ups": "3.03", "wpb": "118056", "bsz": "255.9", "num_updates": "269400", "lr": "0.00073798", "gnorm": "0.68", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.2", "wall": "87979"} +[2022-08-02 01:55:31,367][train_inner][INFO] - {"epoch": 6, "update": 5.238, "loss": "2.31", "ppl": "4.96", "wps": "366921", "ups": "3.11", "wpb": "118140", "bsz": "256", "num_updates": "269600", "lr": "0.000737778", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.4", "wall": "88044"} +[2022-08-02 01:56:36,329][train_inner][INFO] - {"epoch": 6, "update": 5.242, "loss": "2.311", "ppl": "4.96", "wps": "361509", "ups": "3.08", "wpb": "117419", "bsz": "256", "num_updates": "269800", "lr": "0.000737576", "gnorm": "0.682", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "88109"} +[2022-08-02 01:57:40,920][train_inner][INFO] - {"epoch": 6, "update": 5.246, "loss": "2.311", "ppl": "4.96", "wps": "366516", "ups": "3.1", "wpb": "118367", "bsz": "256", "num_updates": "270000", "lr": "0.000737374", "gnorm": "0.679", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.1", "wall": "88173"} +[2022-08-02 01:58:46,064][train_inner][INFO] - {"epoch": 6, "update": 5.25, "loss": "2.312", "ppl": "4.97", "wps": "364549", "ups": "3.07", "wpb": "118738", "bsz": "256", "num_updates": "270200", "lr": "0.000737172", "gnorm": "0.799", "clip": "1", "loss_scale": "0.25", "train_wall": "65", "gb_free": "25.1", "wall": "88238"} +[2022-08-02 01:59:50,486][train_inner][INFO] - {"epoch": 6, "update": 5.254, "loss": "2.3", "ppl": "4.93", "wps": "367425", "ups": "3.1", "wpb": "118350", "bsz": "256", "num_updates": "270400", "lr": "0.00073697", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "88303"} +[2022-08-02 02:00:55,287][train_inner][INFO] - {"epoch": 6, "update": 5.258, "loss": "2.307", "ppl": "4.95", "wps": "364927", "ups": "3.09", "wpb": "118236", "bsz": "256", "num_updates": "270600", "lr": "0.000736768", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "25.3", "wall": "88368"} +[2022-08-02 02:02:00,920][train_inner][INFO] - {"epoch": 6, "update": 5.262, "loss": "2.31", "ppl": "4.96", "wps": "359351", "ups": "3.05", "wpb": "117926", "bsz": "256", "num_updates": "270800", "lr": "0.000736566", "gnorm": "0.696", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "88433"} +[2022-08-02 02:03:07,083][train_inner][INFO] - {"epoch": 6, "update": 5.265, "loss": "2.306", "ppl": "4.95", "wps": "359921", "ups": "3.02", "wpb": "119065", "bsz": "256", "num_updates": "271000", "lr": "0.000736364", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "25", "wall": "88499"} +[2022-08-02 02:04:12,339][train_inner][INFO] - {"epoch": 6, "update": 5.269, "loss": "2.311", "ppl": "4.96", "wps": "362485", "ups": "3.06", "wpb": "118270", "bsz": "256", "num_updates": "271200", "lr": "0.000736162", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "88565"} +[2022-08-02 02:05:18,505][train_inner][INFO] - {"epoch": 6, "update": 5.273, "loss": "2.312", "ppl": "4.96", "wps": "357074", "ups": "3.02", "wpb": "118128", "bsz": "256", "num_updates": "271400", "lr": "0.00073596", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "20.2", "wall": "88631"} +[2022-08-02 02:06:23,376][train_inner][INFO] - {"epoch": 6, "update": 5.277, "loss": "2.305", "ppl": "4.94", "wps": "365093", "ups": "3.08", "wpb": "118419", "bsz": "256", "num_updates": "271600", "lr": "0.000735758", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.3", "wall": "88696"} +[2022-08-02 02:07:28,295][train_inner][INFO] - {"epoch": 6, "update": 5.281, "loss": "2.304", "ppl": "4.94", "wps": "363466", "ups": "3.08", "wpb": "117977", "bsz": "256", "num_updates": "271800", "lr": "0.000735556", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.7", "wall": "88761"} +[2022-08-02 02:08:33,218][train_inner][INFO] - {"epoch": 6, "update": 5.285, "loss": "2.306", "ppl": "4.94", "wps": "363108", "ups": "3.08", "wpb": "117868", "bsz": "256", "num_updates": "272000", "lr": "0.000735354", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.1", "wall": "88826"} +[2022-08-02 02:09:37,992][train_inner][INFO] - {"epoch": 6, "update": 5.289, "loss": "2.304", "ppl": "4.94", "wps": "364269", "ups": "3.09", "wpb": "117973", "bsz": "256", "num_updates": "272200", "lr": "0.000735152", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.4", "wall": "88890"} +[2022-08-02 02:10:42,825][train_inner][INFO] - {"epoch": 6, "update": 5.293, "loss": "2.306", "ppl": "4.94", "wps": "365619", "ups": "3.08", "wpb": "118519", "bsz": "256", "num_updates": "272400", "lr": "0.000734949", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "88955"} +[2022-08-02 02:11:47,470][train_inner][INFO] - {"epoch": 6, "update": 5.297, "loss": "2.307", "ppl": "4.95", "wps": "367063", "ups": "3.09", "wpb": "118643", "bsz": "256", "num_updates": "272600", "lr": "0.000734747", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "26.2", "wall": "89020"} +[2022-08-02 02:12:52,197][train_inner][INFO] - {"epoch": 6, "update": 5.3, "loss": "2.306", "ppl": "4.95", "wps": "366503", "ups": "3.09", "wpb": "118610", "bsz": "256", "num_updates": "272800", "lr": "0.000734545", "gnorm": "0.689", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "89085"} +[2022-08-02 02:13:56,759][train_inner][INFO] - {"epoch": 6, "update": 5.304, "loss": "2.296", "ppl": "4.91", "wps": "365965", "ups": "3.1", "wpb": "118137", "bsz": "256", "num_updates": "273000", "lr": "0.000734343", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "89149"} +[2022-08-02 02:15:01,520][train_inner][INFO] - {"epoch": 6, "update": 5.308, "loss": "2.305", "ppl": "4.94", "wps": "364355", "ups": "3.09", "wpb": "117978", "bsz": "256", "num_updates": "273200", "lr": "0.000734141", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "89214"} +[2022-08-02 02:16:05,963][train_inner][INFO] - {"epoch": 6, "update": 5.312, "loss": "2.305", "ppl": "4.94", "wps": "366827", "ups": "3.1", "wpb": "118195", "bsz": "256", "num_updates": "273400", "lr": "0.000733939", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.7", "wall": "89278"} +[2022-08-02 02:17:10,920][train_inner][INFO] - {"epoch": 6, "update": 5.316, "loss": "2.309", "ppl": "4.95", "wps": "364106", "ups": "3.08", "wpb": "118255", "bsz": "256", "num_updates": "273600", "lr": "0.000733737", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.6", "wall": "89343"} +[2022-08-02 02:18:16,088][train_inner][INFO] - {"epoch": 6, "update": 5.32, "loss": "2.307", "ppl": "4.95", "wps": "362300", "ups": "3.07", "wpb": "118049", "bsz": "256", "num_updates": "273800", "lr": "0.000733535", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "89408"} +[2022-08-02 02:19:20,773][train_inner][INFO] - {"epoch": 6, "update": 5.324, "loss": "2.305", "ppl": "4.94", "wps": "366668", "ups": "3.09", "wpb": "118588", "bsz": "256", "num_updates": "274000", "lr": "0.000733333", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "27.6", "wall": "89473"} +[2022-08-02 02:20:25,410][train_inner][INFO] - {"epoch": 6, "update": 5.328, "loss": "2.298", "ppl": "4.92", "wps": "366832", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "274200", "lr": "0.000733131", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.9", "wall": "89538"} +[2022-08-02 02:21:30,331][train_inner][INFO] - {"epoch": 6, "update": 5.332, "loss": "2.302", "ppl": "4.93", "wps": "365336", "ups": "3.08", "wpb": "118588", "bsz": "256", "num_updates": "274400", "lr": "0.000732929", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "89603"} +[2022-08-02 02:22:35,105][train_inner][INFO] - {"epoch": 6, "update": 5.335, "loss": "2.302", "ppl": "4.93", "wps": "365739", "ups": "3.09", "wpb": "118449", "bsz": "256", "num_updates": "274600", "lr": "0.000732727", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "89667"} +[2022-08-02 02:23:40,961][train_inner][INFO] - {"epoch": 6, "update": 5.339, "loss": "2.304", "ppl": "4.94", "wps": "358217", "ups": "3.04", "wpb": "117953", "bsz": "256", "num_updates": "274800", "lr": "0.000732525", "gnorm": "0.687", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "89733"} +[2022-08-02 02:24:45,559][train_inner][INFO] - {"epoch": 6, "update": 5.343, "loss": "2.303", "ppl": "4.94", "wps": "366597", "ups": "3.1", "wpb": "118405", "bsz": "256", "num_updates": "275000", "lr": "0.000732323", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "89798"} +[2022-08-02 02:25:50,633][train_inner][INFO] - {"epoch": 6, "update": 5.347, "loss": "2.302", "ppl": "4.93", "wps": "363847", "ups": "3.07", "wpb": "118383", "bsz": "256", "num_updates": "275200", "lr": "0.000732121", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "89863"} +[2022-08-02 02:26:55,337][train_inner][INFO] - {"epoch": 6, "update": 5.351, "loss": "2.298", "ppl": "4.92", "wps": "365833", "ups": "3.09", "wpb": "118352", "bsz": "256", "num_updates": "275400", "lr": "0.000731919", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "89928"} +[2022-08-02 02:28:00,305][train_inner][INFO] - {"epoch": 6, "update": 5.355, "loss": "2.304", "ppl": "4.94", "wps": "362486", "ups": "3.08", "wpb": "117749", "bsz": "256", "num_updates": "275600", "lr": "0.000731717", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "89993"} +[2022-08-02 02:29:05,272][train_inner][INFO] - {"epoch": 6, "update": 5.359, "loss": "2.3", "ppl": "4.93", "wps": "365140", "ups": "3.08", "wpb": "118609", "bsz": "256", "num_updates": "275800", "lr": "0.000731515", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.1", "wall": "90058"} +[2022-08-02 02:29:08,831][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 02:29:09,084][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 02:29:21,422][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-02 02:30:10,667][train_inner][INFO] - {"epoch": 6, "update": 5.363, "loss": "2.321", "ppl": "5", "wps": "362487", "ups": "3.06", "wpb": "118521", "bsz": "256", "num_updates": "276000", "lr": "0.000731313", "gnorm": "0.916", "clip": "2", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "90123"} +[2022-08-02 02:31:15,528][train_inner][INFO] - {"epoch": 6, "update": 5.367, "loss": "2.306", "ppl": "4.94", "wps": "362753", "ups": "3.08", "wpb": "117642", "bsz": "256", "num_updates": "276200", "lr": "0.000731111", "gnorm": "0.68", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.4", "wall": "90188"} +[2022-08-02 02:32:20,427][train_inner][INFO] - {"epoch": 6, "update": 5.37, "loss": "2.304", "ppl": "4.94", "wps": "364043", "ups": "3.08", "wpb": "118128", "bsz": "256", "num_updates": "276400", "lr": "0.000730909", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.5", "wall": "90253"} +[2022-08-02 02:33:25,469][train_inner][INFO] - {"epoch": 6, "update": 5.374, "loss": "2.302", "ppl": "4.93", "wps": "365447", "ups": "3.07", "wpb": "118846", "bsz": "256", "num_updates": "276600", "lr": "0.000730707", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.1", "wall": "90318"} +[2022-08-02 02:34:30,282][train_inner][INFO] - {"epoch": 6, "update": 5.378, "loss": "2.298", "ppl": "4.92", "wps": "365848", "ups": "3.09", "wpb": "118557", "bsz": "256", "num_updates": "276800", "lr": "0.000730505", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "90383"} +[2022-08-02 02:35:34,924][train_inner][INFO] - {"epoch": 6, "update": 5.382, "loss": "2.304", "ppl": "4.94", "wps": "365526", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "277000", "lr": "0.000730303", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "28", "wall": "90447"} +[2022-08-02 02:36:41,029][train_inner][INFO] - {"epoch": 6, "update": 5.386, "loss": "2.292", "ppl": "4.9", "wps": "358967", "ups": "3.03", "wpb": "118646", "bsz": "256", "num_updates": "277200", "lr": "0.000730101", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.9", "wall": "90513"} +[2022-08-02 02:37:46,017][train_inner][INFO] - {"epoch": 6, "update": 5.39, "loss": "2.304", "ppl": "4.94", "wps": "362906", "ups": "3.08", "wpb": "117920", "bsz": "256", "num_updates": "277400", "lr": "0.000729899", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.8", "wall": "90578"} +[2022-08-02 02:38:50,991][train_inner][INFO] - {"epoch": 6, "update": 5.394, "loss": "2.292", "ppl": "4.9", "wps": "365662", "ups": "3.08", "wpb": "118791", "bsz": "256", "num_updates": "277600", "lr": "0.000729697", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.9", "wall": "90643"} +[2022-08-02 02:39:56,089][train_inner][INFO] - {"epoch": 6, "update": 5.398, "loss": "2.299", "ppl": "4.92", "wps": "363877", "ups": "3.07", "wpb": "118435", "bsz": "256", "num_updates": "277800", "lr": "0.000729495", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "25.7", "wall": "90708"} +[2022-08-02 02:41:00,662][train_inner][INFO] - {"epoch": 6, "update": 5.401, "loss": "2.302", "ppl": "4.93", "wps": "365872", "ups": "3.1", "wpb": "118127", "bsz": "256", "num_updates": "278000", "lr": "0.000729293", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.1", "wall": "90773"} +[2022-08-02 02:42:05,348][train_inner][INFO] - {"epoch": 6, "update": 5.405, "loss": "2.302", "ppl": "4.93", "wps": "365804", "ups": "3.09", "wpb": "118310", "bsz": "256", "num_updates": "278200", "lr": "0.000729091", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "90838"} +[2022-08-02 02:43:11,384][train_inner][INFO] - {"epoch": 6, "update": 5.409, "loss": "2.299", "ppl": "4.92", "wps": "358367", "ups": "3.03", "wpb": "118323", "bsz": "256", "num_updates": "278400", "lr": "0.000728889", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "20.2", "wall": "90904"} +[2022-08-02 02:44:16,220][train_inner][INFO] - {"epoch": 6, "update": 5.413, "loss": "2.303", "ppl": "4.93", "wps": "366392", "ups": "3.08", "wpb": "118777", "bsz": "256", "num_updates": "278600", "lr": "0.000728687", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.4", "wall": "90969"} +[2022-08-02 02:45:20,339][train_inner][INFO] - {"epoch": 6, "update": 5.417, "loss": "2.302", "ppl": "4.93", "wps": "366615", "ups": "3.12", "wpb": "117532", "bsz": "256", "num_updates": "278800", "lr": "0.000728485", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "91033"} +[2022-08-02 02:46:25,123][train_inner][INFO] - {"epoch": 6, "update": 5.421, "loss": "2.304", "ppl": "4.94", "wps": "365676", "ups": "3.09", "wpb": "118448", "bsz": "256", "num_updates": "279000", "lr": "0.000728283", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "91097"} +[2022-08-02 02:47:30,098][train_inner][INFO] - {"epoch": 6, "update": 5.425, "loss": "2.306", "ppl": "4.94", "wps": "363234", "ups": "3.08", "wpb": "118005", "bsz": "256", "num_updates": "279200", "lr": "0.000728081", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.8", "wall": "91162"} +[2022-08-02 02:48:35,196][train_inner][INFO] - {"epoch": 6, "update": 5.429, "loss": "2.292", "ppl": "4.9", "wps": "365588", "ups": "3.07", "wpb": "118993", "bsz": "256", "num_updates": "279400", "lr": "0.000727879", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "91228"} +[2022-08-02 02:49:40,030][train_inner][INFO] - {"epoch": 6, "update": 5.433, "loss": "2.3", "ppl": "4.93", "wps": "366615", "ups": "3.08", "wpb": "118843", "bsz": "256", "num_updates": "279600", "lr": "0.000727677", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "91292"} +[2022-08-02 02:50:45,066][train_inner][INFO] - {"epoch": 6, "update": 5.436, "loss": "2.301", "ppl": "4.93", "wps": "363565", "ups": "3.08", "wpb": "118222", "bsz": "256", "num_updates": "279800", "lr": "0.000727475", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "91357"} +[2022-08-02 02:51:49,867][train_inner][INFO] - {"epoch": 6, "update": 5.44, "loss": "2.297", "ppl": "4.91", "wps": "365257", "ups": "3.09", "wpb": "118344", "bsz": "256", "num_updates": "280000", "lr": "0.000727273", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.3", "wall": "91422"} +[2022-08-02 02:52:55,016][train_inner][INFO] - {"epoch": 6, "update": 5.444, "loss": "2.301", "ppl": "4.93", "wps": "363492", "ups": "3.07", "wpb": "118405", "bsz": "256", "num_updates": "280200", "lr": "0.000727071", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "25.8", "wall": "91487"} +[2022-08-02 02:54:00,212][train_inner][INFO] - {"epoch": 6, "update": 5.448, "loss": "2.293", "ppl": "4.9", "wps": "365716", "ups": "3.07", "wpb": "119214", "bsz": "256", "num_updates": "280400", "lr": "0.000726869", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "91553"} +[2022-08-02 02:55:05,196][train_inner][INFO] - {"epoch": 6, "update": 5.452, "loss": "2.292", "ppl": "4.9", "wps": "365013", "ups": "3.08", "wpb": "118598", "bsz": "256", "num_updates": "280600", "lr": "0.000726667", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.5", "wall": "91618"} +[2022-08-02 02:56:10,241][train_inner][INFO] - {"epoch": 6, "update": 5.456, "loss": "2.299", "ppl": "4.92", "wps": "365128", "ups": "3.07", "wpb": "118747", "bsz": "256", "num_updates": "280800", "lr": "0.000726465", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "91683"} +[2022-08-02 02:57:15,034][train_inner][INFO] - {"epoch": 6, "update": 5.46, "loss": "2.297", "ppl": "4.92", "wps": "365702", "ups": "3.09", "wpb": "118472", "bsz": "256", "num_updates": "281000", "lr": "0.000726263", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "91747"} +[2022-08-02 02:58:20,044][train_inner][INFO] - {"epoch": 6, "update": 5.464, "loss": "2.3", "ppl": "4.93", "wps": "361969", "ups": "3.08", "wpb": "117658", "bsz": "256", "num_updates": "281200", "lr": "0.000726061", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.8", "wall": "91812"} +[2022-08-02 02:59:25,238][train_inner][INFO] - {"epoch": 6, "update": 5.468, "loss": "2.296", "ppl": "4.91", "wps": "363710", "ups": "3.07", "wpb": "118556", "bsz": "256", "num_updates": "281400", "lr": "0.000725859", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "91878"} +[2022-08-02 03:00:30,020][train_inner][INFO] - {"epoch": 6, "update": 5.471, "loss": "2.302", "ppl": "4.93", "wps": "364670", "ups": "3.09", "wpb": "118118", "bsz": "256", "num_updates": "281600", "lr": "0.000725657", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "91942"} +[2022-08-02 03:01:34,638][train_inner][INFO] - {"epoch": 6, "update": 5.475, "loss": "2.301", "ppl": "4.93", "wps": "365386", "ups": "3.1", "wpb": "118051", "bsz": "256", "num_updates": "281800", "lr": "0.000725455", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "92007"} +[2022-08-02 03:02:39,706][train_inner][INFO] - {"epoch": 6, "update": 5.479, "loss": "2.296", "ppl": "4.91", "wps": "363723", "ups": "3.07", "wpb": "118332", "bsz": "256", "num_updates": "282000", "lr": "0.000725253", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.6", "wall": "92072"} +[2022-08-02 03:03:44,676][train_inner][INFO] - {"epoch": 6, "update": 5.483, "loss": "2.294", "ppl": "4.9", "wps": "365128", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "282200", "lr": "0.000725051", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.1", "wall": "92137"} +[2022-08-02 03:04:49,657][train_inner][INFO] - {"epoch": 6, "update": 5.487, "loss": "2.296", "ppl": "4.91", "wps": "364437", "ups": "3.08", "wpb": "118405", "bsz": "256", "num_updates": "282400", "lr": "0.000724848", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "92202"} +[2022-08-02 03:05:13,516][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 03:05:54,606][train_inner][INFO] - {"epoch": 6, "update": 5.491, "loss": "2.306", "ppl": "4.95", "wps": "365092", "ups": "3.08", "wpb": "118561", "bsz": "256", "num_updates": "282600", "lr": "0.000724646", "gnorm": "0.818", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "92267"} +[2022-08-02 03:06:58,856][train_inner][INFO] - {"epoch": 6, "update": 5.495, "loss": "2.305", "ppl": "4.94", "wps": "368294", "ups": "3.11", "wpb": "118312", "bsz": "256", "num_updates": "282800", "lr": "0.000724444", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.4", "wall": "92331"} +[2022-08-02 03:08:03,439][train_inner][INFO] - {"epoch": 6, "update": 5.499, "loss": "2.297", "ppl": "4.91", "wps": "363715", "ups": "3.1", "wpb": "117447", "bsz": "256", "num_updates": "283000", "lr": "0.000724242", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "92396"} +[2022-08-02 03:09:08,096][train_inner][INFO] - {"epoch": 6, "update": 5.503, "loss": "2.294", "ppl": "4.91", "wps": "366280", "ups": "3.09", "wpb": "118410", "bsz": "256", "num_updates": "283200", "lr": "0.00072404", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.5", "wall": "92460"} +[2022-08-02 03:10:12,992][train_inner][INFO] - {"epoch": 6, "update": 5.506, "loss": "2.297", "ppl": "4.91", "wps": "362378", "ups": "3.08", "wpb": "117583", "bsz": "256", "num_updates": "283400", "lr": "0.000723838", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26", "wall": "92525"} +[2022-08-02 03:11:17,721][train_inner][INFO] - {"epoch": 6, "update": 5.51, "loss": "2.304", "ppl": "4.94", "wps": "365850", "ups": "3.09", "wpb": "118403", "bsz": "256", "num_updates": "283600", "lr": "0.000723636", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "92590"} +[2022-08-02 03:12:21,979][train_inner][INFO] - {"epoch": 6, "update": 5.514, "loss": "2.299", "ppl": "4.92", "wps": "367906", "ups": "3.11", "wpb": "118203", "bsz": "256", "num_updates": "283800", "lr": "0.000723434", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "92654"} +[2022-08-02 03:13:26,779][train_inner][INFO] - {"epoch": 6, "update": 5.518, "loss": "2.297", "ppl": "4.91", "wps": "364385", "ups": "3.09", "wpb": "118059", "bsz": "256", "num_updates": "284000", "lr": "0.000723232", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.4", "wall": "92719"} +[2022-08-02 03:14:31,517][train_inner][INFO] - {"epoch": 6, "update": 5.522, "loss": "2.295", "ppl": "4.91", "wps": "365669", "ups": "3.09", "wpb": "118363", "bsz": "256", "num_updates": "284200", "lr": "0.00072303", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "92784"} +[2022-08-02 03:15:36,505][train_inner][INFO] - {"epoch": 6, "update": 5.526, "loss": "2.303", "ppl": "4.93", "wps": "364438", "ups": "3.08", "wpb": "118418", "bsz": "256", "num_updates": "284400", "lr": "0.000722828", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "92849"} +[2022-08-02 03:16:41,146][train_inner][INFO] - {"epoch": 6, "update": 5.53, "loss": "2.298", "ppl": "4.92", "wps": "365924", "ups": "3.09", "wpb": "118266", "bsz": "256", "num_updates": "284600", "lr": "0.000722626", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "92913"} +[2022-08-02 03:17:45,722][train_inner][INFO] - {"epoch": 6, "update": 5.534, "loss": "2.296", "ppl": "4.91", "wps": "367179", "ups": "3.1", "wpb": "118553", "bsz": "256", "num_updates": "284800", "lr": "0.000722424", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "92978"} +[2022-08-02 03:18:50,670][train_inner][INFO] - {"epoch": 6, "update": 5.537, "loss": "2.295", "ppl": "4.91", "wps": "363140", "ups": "3.08", "wpb": "117924", "bsz": "256", "num_updates": "285000", "lr": "0.000722222", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "93043"} +[2022-08-02 03:19:55,478][train_inner][INFO] - {"epoch": 6, "update": 5.541, "loss": "2.296", "ppl": "4.91", "wps": "365520", "ups": "3.09", "wpb": "118442", "bsz": "256", "num_updates": "285200", "lr": "0.00072202", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "93108"} +[2022-08-02 03:21:00,038][train_inner][INFO] - {"epoch": 6, "update": 5.545, "loss": "2.289", "ppl": "4.89", "wps": "368128", "ups": "3.1", "wpb": "118829", "bsz": "256", "num_updates": "285400", "lr": "0.000721818", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "93172"} +[2022-08-02 03:22:05,105][train_inner][INFO] - {"epoch": 6, "update": 5.549, "loss": "2.29", "ppl": "4.89", "wps": "362469", "ups": "3.07", "wpb": "117923", "bsz": "256", "num_updates": "285600", "lr": "0.000721616", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "93237"} +[2022-08-02 03:23:10,526][train_inner][INFO] - {"epoch": 6, "update": 5.553, "loss": "2.296", "ppl": "4.91", "wps": "362538", "ups": "3.06", "wpb": "118585", "bsz": "256", "num_updates": "285800", "lr": "0.000721414", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "93303"} +[2022-08-02 03:24:15,735][train_inner][INFO] - {"epoch": 6, "update": 5.557, "loss": "2.298", "ppl": "4.92", "wps": "362976", "ups": "3.07", "wpb": "118345", "bsz": "256", "num_updates": "286000", "lr": "0.000721212", "gnorm": "0.74", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "93368"} +[2022-08-02 03:25:20,537][train_inner][INFO] - {"epoch": 6, "update": 5.561, "loss": "2.299", "ppl": "4.92", "wps": "364418", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "286200", "lr": "0.00072101", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "93433"} +[2022-08-02 03:26:24,943][train_inner][INFO] - {"epoch": 6, "update": 5.565, "loss": "2.293", "ppl": "4.9", "wps": "368688", "ups": "3.11", "wpb": "118727", "bsz": "256", "num_updates": "286400", "lr": "0.000720808", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.5", "wall": "93497"} +[2022-08-02 03:27:29,519][train_inner][INFO] - {"epoch": 6, "update": 5.569, "loss": "2.294", "ppl": "4.9", "wps": "363503", "ups": "3.1", "wpb": "117366", "bsz": "256", "num_updates": "286600", "lr": "0.000720606", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "93562"} +[2022-08-02 03:28:34,520][train_inner][INFO] - {"epoch": 6, "update": 5.572, "loss": "2.295", "ppl": "4.91", "wps": "365030", "ups": "3.08", "wpb": "118634", "bsz": "256", "num_updates": "286800", "lr": "0.000720404", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "93627"} +[2022-08-02 03:29:39,467][train_inner][INFO] - {"epoch": 6, "update": 5.576, "loss": "2.295", "ppl": "4.91", "wps": "365176", "ups": "3.08", "wpb": "118583", "bsz": "256", "num_updates": "287000", "lr": "0.000720202", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24", "wall": "93692"} +[2022-08-02 03:30:44,517][train_inner][INFO] - {"epoch": 6, "update": 5.58, "loss": "2.3", "ppl": "4.92", "wps": "363733", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "287200", "lr": "0.00072", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "93757"} +[2022-08-02 03:31:49,577][train_inner][INFO] - {"epoch": 6, "update": 5.584, "loss": "2.295", "ppl": "4.91", "wps": "362961", "ups": "3.07", "wpb": "118069", "bsz": "256", "num_updates": "287400", "lr": "0.000719798", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "93822"} +[2022-08-02 03:32:54,148][train_inner][INFO] - {"epoch": 6, "update": 5.588, "loss": "2.289", "ppl": "4.89", "wps": "366909", "ups": "3.1", "wpb": "118458", "bsz": "256", "num_updates": "287600", "lr": "0.000719596", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "93886"} +[2022-08-02 03:34:00,388][train_inner][INFO] - {"epoch": 6, "update": 5.592, "loss": "2.293", "ppl": "4.9", "wps": "356126", "ups": "3.02", "wpb": "117947", "bsz": "256", "num_updates": "287800", "lr": "0.000719394", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "22.7", "wall": "93953"} +[2022-08-02 03:35:05,382][train_inner][INFO] - {"epoch": 6, "update": 5.596, "loss": "2.288", "ppl": "4.88", "wps": "364117", "ups": "3.08", "wpb": "118324", "bsz": "256", "num_updates": "288000", "lr": "0.000719192", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "94018"} +[2022-08-02 03:36:09,965][train_inner][INFO] - {"epoch": 6, "update": 5.6, "loss": "2.294", "ppl": "4.9", "wps": "365440", "ups": "3.1", "wpb": "118004", "bsz": "256", "num_updates": "288200", "lr": "0.00071899", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "94082"} +[2022-08-02 03:37:15,224][train_inner][INFO] - {"epoch": 6, "update": 5.603, "loss": "2.289", "ppl": "4.89", "wps": "363012", "ups": "3.06", "wpb": "118447", "bsz": "256", "num_updates": "288400", "lr": "0.000718788", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "94148"} +[2022-08-02 03:38:19,896][train_inner][INFO] - {"epoch": 6, "update": 5.607, "loss": "2.298", "ppl": "4.92", "wps": "365280", "ups": "3.09", "wpb": "118116", "bsz": "256", "num_updates": "288600", "lr": "0.000718586", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.9", "wall": "94212"} +[2022-08-02 03:39:24,898][train_inner][INFO] - {"epoch": 6, "update": 5.611, "loss": "2.295", "ppl": "4.91", "wps": "364664", "ups": "3.08", "wpb": "118517", "bsz": "256", "num_updates": "288800", "lr": "0.000718384", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "94277"} +[2022-08-02 03:40:29,827][train_inner][INFO] - {"epoch": 6, "update": 5.615, "loss": "2.297", "ppl": "4.92", "wps": "365408", "ups": "3.08", "wpb": "118626", "bsz": "256", "num_updates": "289000", "lr": "0.000718182", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "94342"} +[2022-08-02 03:41:34,381][train_inner][INFO] - {"epoch": 6, "update": 5.619, "loss": "2.289", "ppl": "4.89", "wps": "367721", "ups": "3.1", "wpb": "118689", "bsz": "256", "num_updates": "289200", "lr": "0.00071798", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "94407"} +[2022-08-02 03:42:39,336][train_inner][INFO] - {"epoch": 6, "update": 5.623, "loss": "2.293", "ppl": "4.9", "wps": "363873", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "289400", "lr": "0.000717778", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "94472"} +[2022-08-02 03:43:44,397][train_inner][INFO] - {"epoch": 6, "update": 5.627, "loss": "2.298", "ppl": "4.92", "wps": "364060", "ups": "3.07", "wpb": "118429", "bsz": "256", "num_updates": "289600", "lr": "0.000717576", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "94537"} +[2022-08-02 03:44:49,003][train_inner][INFO] - {"epoch": 6, "update": 5.631, "loss": "2.297", "ppl": "4.91", "wps": "365225", "ups": "3.1", "wpb": "117977", "bsz": "256", "num_updates": "289800", "lr": "0.000717374", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.8", "wall": "94601"} +[2022-08-02 03:45:53,899][train_inner][INFO] - {"epoch": 6, "update": 5.635, "loss": "2.293", "ppl": "4.9", "wps": "363909", "ups": "3.08", "wpb": "118079", "bsz": "256", "num_updates": "290000", "lr": "0.000717172", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "94666"} +[2022-08-02 03:46:59,171][train_inner][INFO] - {"epoch": 6, "update": 5.638, "loss": "2.292", "ppl": "4.9", "wps": "363496", "ups": "3.06", "wpb": "118629", "bsz": "256", "num_updates": "290200", "lr": "0.00071697", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "94731"} +[2022-08-02 03:48:04,216][train_inner][INFO] - {"epoch": 6, "update": 5.642, "loss": "2.292", "ppl": "4.9", "wps": "364063", "ups": "3.07", "wpb": "118400", "bsz": "256", "num_updates": "290400", "lr": "0.000716768", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "94797"} +[2022-08-02 03:49:09,069][train_inner][INFO] - {"epoch": 6, "update": 5.646, "loss": "2.29", "ppl": "4.89", "wps": "365349", "ups": "3.08", "wpb": "118469", "bsz": "256", "num_updates": "290600", "lr": "0.000716566", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "94861"} +[2022-08-02 03:50:14,077][train_inner][INFO] - {"epoch": 6, "update": 5.65, "loss": "2.291", "ppl": "4.89", "wps": "363353", "ups": "3.08", "wpb": "118102", "bsz": "256", "num_updates": "290800", "lr": "0.000716364", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "94926"} +[2022-08-02 03:51:18,572][train_inner][INFO] - {"epoch": 6, "update": 5.654, "loss": "2.299", "ppl": "4.92", "wps": "365146", "ups": "3.1", "wpb": "117749", "bsz": "256", "num_updates": "291000", "lr": "0.000716162", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "94991"} +[2022-08-02 03:52:23,480][train_inner][INFO] - {"epoch": 6, "update": 5.658, "loss": "2.291", "ppl": "4.89", "wps": "365084", "ups": "3.08", "wpb": "118483", "bsz": "256", "num_updates": "291200", "lr": "0.00071596", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "95056"} +[2022-08-02 03:53:28,784][train_inner][INFO] - {"epoch": 6, "update": 5.662, "loss": "2.288", "ppl": "4.88", "wps": "362560", "ups": "3.06", "wpb": "118380", "bsz": "256", "num_updates": "291400", "lr": "0.000715758", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "95121"} +[2022-08-02 03:53:53,280][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 03:54:34,055][train_inner][INFO] - {"epoch": 6, "update": 5.666, "loss": "2.287", "ppl": "4.88", "wps": "361660", "ups": "3.06", "wpb": "118028", "bsz": "256", "num_updates": "291600", "lr": "0.000715556", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "95186"} +[2022-08-02 03:55:38,955][train_inner][INFO] - {"epoch": 6, "update": 5.67, "loss": "2.299", "ppl": "4.92", "wps": "362994", "ups": "3.08", "wpb": "117792", "bsz": "256", "num_updates": "291800", "lr": "0.000715354", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25", "wall": "95251"} +[2022-08-02 03:56:44,914][train_inner][INFO] - {"epoch": 6, "update": 5.673, "loss": "2.29", "ppl": "4.89", "wps": "359589", "ups": "3.03", "wpb": "118588", "bsz": "256", "num_updates": "292000", "lr": "0.000715152", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.7", "wall": "95317"} +[2022-08-02 03:57:49,779][train_inner][INFO] - {"epoch": 6, "update": 5.677, "loss": "2.29", "ppl": "4.89", "wps": "364269", "ups": "3.08", "wpb": "118140", "bsz": "256", "num_updates": "292200", "lr": "0.000714949", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "95382"} +[2022-08-02 03:58:54,675][train_inner][INFO] - {"epoch": 6, "update": 5.681, "loss": "2.289", "ppl": "4.89", "wps": "365565", "ups": "3.08", "wpb": "118616", "bsz": "256", "num_updates": "292400", "lr": "0.000714747", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "95447"} +[2022-08-02 03:59:40,880][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 03:59:41,825][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 03:59:42,448][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 04:00:01,309][train_inner][INFO] - {"epoch": 6, "update": 5.685, "loss": "2.291", "ppl": "4.9", "wps": "356187", "ups": "3", "wpb": "118669", "bsz": "256", "num_updates": "292600", "lr": "0.000714545", "gnorm": "0.758", "clip": "0.5", "loss_scale": "1", "train_wall": "66", "gb_free": "20", "wall": "95514"} +[2022-08-02 04:01:06,034][train_inner][INFO] - {"epoch": 6, "update": 5.689, "loss": "2.293", "ppl": "4.9", "wps": "365645", "ups": "3.09", "wpb": "118331", "bsz": "256", "num_updates": "292800", "lr": "0.000714343", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "95578"} +[2022-08-02 04:02:12,041][train_inner][INFO] - {"epoch": 6, "update": 5.693, "loss": "2.29", "ppl": "4.89", "wps": "358062", "ups": "3.03", "wpb": "118171", "bsz": "256", "num_updates": "293000", "lr": "0.000714141", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "21.5", "wall": "95644"} +[2022-08-02 04:03:17,233][train_inner][INFO] - {"epoch": 6, "update": 5.697, "loss": "2.289", "ppl": "4.89", "wps": "361786", "ups": "3.07", "wpb": "117927", "bsz": "256", "num_updates": "293200", "lr": "0.000713939", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "95710"} +[2022-08-02 04:04:22,248][train_inner][INFO] - {"epoch": 6, "update": 5.701, "loss": "2.289", "ppl": "4.89", "wps": "364552", "ups": "3.08", "wpb": "118504", "bsz": "256", "num_updates": "293400", "lr": "0.000713737", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26.2", "wall": "95775"} +[2022-08-02 04:04:37,036][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 04:04:37,317][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-02 04:05:28,040][train_inner][INFO] - {"epoch": 6, "update": 5.705, "loss": "2.311", "ppl": "4.96", "wps": "358679", "ups": "3.04", "wpb": "117989", "bsz": "256", "num_updates": "293600", "lr": "0.000713535", "gnorm": "1.006", "clip": "0.5", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.7", "wall": "95840"} +[2022-08-02 04:06:32,973][train_inner][INFO] - {"epoch": 6, "update": 5.708, "loss": "2.287", "ppl": "4.88", "wps": "363848", "ups": "3.08", "wpb": "118129", "bsz": "256", "num_updates": "293800", "lr": "0.000713333", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.1", "wall": "95905"} +[2022-08-02 04:07:37,723][train_inner][INFO] - {"epoch": 6, "update": 5.712, "loss": "2.286", "ppl": "4.88", "wps": "366194", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "294000", "lr": "0.000713131", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.2", "wall": "95970"} +[2022-08-02 04:08:43,869][train_inner][INFO] - {"epoch": 6, "update": 5.716, "loss": "2.287", "ppl": "4.88", "wps": "358531", "ups": "3.02", "wpb": "118576", "bsz": "256", "num_updates": "294200", "lr": "0.000712929", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "19.7", "wall": "96036"} +[2022-08-02 04:09:49,238][train_inner][INFO] - {"epoch": 6, "update": 5.72, "loss": "2.294", "ppl": "4.9", "wps": "362462", "ups": "3.06", "wpb": "118466", "bsz": "256", "num_updates": "294400", "lr": "0.000712727", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.1", "wall": "96102"} +[2022-08-02 04:10:54,029][train_inner][INFO] - {"epoch": 6, "update": 5.724, "loss": "2.293", "ppl": "4.9", "wps": "362650", "ups": "3.09", "wpb": "117481", "bsz": "256", "num_updates": "294600", "lr": "0.000712525", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "23.4", "wall": "96166"} +[2022-08-02 04:11:59,191][train_inner][INFO] - {"epoch": 6, "update": 5.728, "loss": "2.289", "ppl": "4.89", "wps": "363438", "ups": "3.07", "wpb": "118409", "bsz": "256", "num_updates": "294800", "lr": "0.000712323", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.7", "wall": "96232"} +[2022-08-02 04:13:04,287][train_inner][INFO] - {"epoch": 6, "update": 5.732, "loss": "2.291", "ppl": "4.89", "wps": "364647", "ups": "3.07", "wpb": "118684", "bsz": "256", "num_updates": "295000", "lr": "0.000712121", "gnorm": "0.681", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "96297"} +[2022-08-02 04:14:10,309][train_inner][INFO] - {"epoch": 6, "update": 5.736, "loss": "2.285", "ppl": "4.87", "wps": "360146", "ups": "3.03", "wpb": "118885", "bsz": "256", "num_updates": "295200", "lr": "0.000711919", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "21.7", "wall": "96363"} +[2022-08-02 04:15:15,300][train_inner][INFO] - {"epoch": 6, "update": 5.74, "loss": "2.29", "ppl": "4.89", "wps": "364150", "ups": "3.08", "wpb": "118331", "bsz": "256", "num_updates": "295400", "lr": "0.000711717", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20", "wall": "96428"} +[2022-08-02 04:16:20,080][train_inner][INFO] - {"epoch": 6, "update": 5.743, "loss": "2.29", "ppl": "4.89", "wps": "362796", "ups": "3.09", "wpb": "117510", "bsz": "256", "num_updates": "295600", "lr": "0.000711515", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "96492"} +[2022-08-02 04:17:24,461][train_inner][INFO] - {"epoch": 6, "update": 5.747, "loss": "2.286", "ppl": "4.88", "wps": "367996", "ups": "3.11", "wpb": "118458", "bsz": "256", "num_updates": "295800", "lr": "0.000711313", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.3", "wall": "96557"} +[2022-08-02 04:18:29,334][train_inner][INFO] - {"epoch": 6, "update": 5.751, "loss": "2.285", "ppl": "4.87", "wps": "364090", "ups": "3.08", "wpb": "118096", "bsz": "256", "num_updates": "296000", "lr": "0.000711111", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23", "wall": "96622"} +[2022-08-02 04:19:34,128][train_inner][INFO] - {"epoch": 6, "update": 5.755, "loss": "2.293", "ppl": "4.9", "wps": "365290", "ups": "3.09", "wpb": "118341", "bsz": "256", "num_updates": "296200", "lr": "0.000710909", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "96686"} +[2022-08-02 04:20:40,117][train_inner][INFO] - {"epoch": 6, "update": 5.759, "loss": "2.289", "ppl": "4.89", "wps": "358294", "ups": "3.03", "wpb": "118217", "bsz": "256", "num_updates": "296400", "lr": "0.000710707", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "26.8", "wall": "96752"} +[2022-08-02 04:21:45,080][train_inner][INFO] - {"epoch": 6, "update": 5.763, "loss": "2.281", "ppl": "4.86", "wps": "366023", "ups": "3.08", "wpb": "118887", "bsz": "256", "num_updates": "296600", "lr": "0.000710505", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "96817"} +[2022-08-02 04:22:50,231][train_inner][INFO] - {"epoch": 6, "update": 5.767, "loss": "2.287", "ppl": "4.88", "wps": "364320", "ups": "3.07", "wpb": "118677", "bsz": "256", "num_updates": "296800", "lr": "0.000710303", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "96883"} +[2022-08-02 04:23:54,827][train_inner][INFO] - {"epoch": 6, "update": 5.771, "loss": "2.292", "ppl": "4.9", "wps": "366270", "ups": "3.1", "wpb": "118296", "bsz": "256", "num_updates": "297000", "lr": "0.000710101", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "96947"} +[2022-08-02 04:24:59,635][train_inner][INFO] - {"epoch": 6, "update": 5.775, "loss": "2.286", "ppl": "4.88", "wps": "364227", "ups": "3.09", "wpb": "118022", "bsz": "256", "num_updates": "297200", "lr": "0.000709899", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "97012"} +[2022-08-02 04:26:04,727][train_inner][INFO] - {"epoch": 6, "update": 5.778, "loss": "2.282", "ppl": "4.86", "wps": "365210", "ups": "3.07", "wpb": "118861", "bsz": "256", "num_updates": "297400", "lr": "0.000709697", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "25.4", "wall": "97077"} +[2022-08-02 04:26:54,233][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 04:26:54,513][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-02 04:26:55,093][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 +[2022-08-02 04:27:10,720][train_inner][INFO] - {"epoch": 6, "update": 5.782, "loss": "2.339", "ppl": "5.06", "wps": "359490", "ups": "3.03", "wpb": "118617", "bsz": "256", "num_updates": "297600", "lr": "0.000709495", "gnorm": "1.014", "clip": "1.5", "loss_scale": "0.125", "train_wall": "66", "gb_free": "19.9", "wall": "97143"} +[2022-08-02 04:28:15,140][train_inner][INFO] - {"epoch": 6, "update": 5.786, "loss": "2.295", "ppl": "4.91", "wps": "366438", "ups": "3.1", "wpb": "118028", "bsz": "256", "num_updates": "297800", "lr": "0.000709293", "gnorm": "0.925", "clip": "1", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.7", "wall": "97207"} +[2022-08-02 04:29:20,099][train_inner][INFO] - {"epoch": 6, "update": 5.79, "loss": "2.281", "ppl": "4.86", "wps": "364980", "ups": "3.08", "wpb": "118542", "bsz": "256", "num_updates": "298000", "lr": "0.000709091", "gnorm": "0.671", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.4", "wall": "97272"} +[2022-08-02 04:30:24,765][train_inner][INFO] - {"epoch": 6, "update": 5.794, "loss": "2.283", "ppl": "4.87", "wps": "366722", "ups": "3.09", "wpb": "118570", "bsz": "256", "num_updates": "298200", "lr": "0.000708889", "gnorm": "0.674", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20", "wall": "97337"} +[2022-08-02 04:31:29,532][train_inner][INFO] - {"epoch": 6, "update": 5.798, "loss": "2.306", "ppl": "4.94", "wps": "365363", "ups": "3.09", "wpb": "118316", "bsz": "256", "num_updates": "298400", "lr": "0.000708687", "gnorm": "0.954", "clip": "1.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "19.9", "wall": "97402"} +[2022-08-02 04:32:34,138][train_inner][INFO] - {"epoch": 6, "update": 5.802, "loss": "2.297", "ppl": "4.91", "wps": "364073", "ups": "3.1", "wpb": "117605", "bsz": "256", "num_updates": "298600", "lr": "0.000708485", "gnorm": "0.734", "clip": "0.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "19.9", "wall": "97466"} +[2022-08-02 04:33:39,014][train_inner][INFO] - {"epoch": 6, "update": 5.806, "loss": "2.297", "ppl": "4.92", "wps": "363690", "ups": "3.08", "wpb": "117971", "bsz": "256", "num_updates": "298800", "lr": "0.000708283", "gnorm": "0.72", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.9", "wall": "97531"} +[2022-08-02 04:34:43,739][train_inner][INFO] - {"epoch": 6, "update": 5.81, "loss": "2.29", "ppl": "4.89", "wps": "366264", "ups": "3.09", "wpb": "118531", "bsz": "256", "num_updates": "299000", "lr": "0.000708081", "gnorm": "0.749", "clip": "0.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.5", "wall": "97596"} +[2022-08-02 04:35:48,656][train_inner][INFO] - {"epoch": 6, "update": 5.813, "loss": "2.293", "ppl": "4.9", "wps": "363353", "ups": "3.08", "wpb": "117937", "bsz": "256", "num_updates": "299200", "lr": "0.000707879", "gnorm": "0.75", "clip": "0.5", "loss_scale": "0.125", "train_wall": "65", "gb_free": "24.1", "wall": "97661"} +[2022-08-02 04:36:53,514][train_inner][INFO] - {"epoch": 6, "update": 5.817, "loss": "2.296", "ppl": "4.91", "wps": "363572", "ups": "3.08", "wpb": "117902", "bsz": "256", "num_updates": "299400", "lr": "0.000707677", "gnorm": "0.685", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "26.8", "wall": "97726"} +[2022-08-02 04:37:58,207][train_inner][INFO] - {"epoch": 6, "update": 5.821, "loss": "2.287", "ppl": "4.88", "wps": "366247", "ups": "3.09", "wpb": "118467", "bsz": "256", "num_updates": "299600", "lr": "0.000707475", "gnorm": "0.681", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "97791"} +[2022-08-02 04:39:02,986][train_inner][INFO] - {"epoch": 6, "update": 5.825, "loss": "2.294", "ppl": "4.9", "wps": "365145", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "299800", "lr": "0.000707273", "gnorm": "0.731", "clip": "0.5", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "97855"} +[2022-08-02 04:40:07,792][train_inner][INFO] - {"epoch": 6, "update": 5.829, "loss": "2.283", "ppl": "4.87", "wps": "366536", "ups": "3.09", "wpb": "118767", "bsz": "256", "num_updates": "300000", "lr": "0.000707071", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "27.3", "wall": "97920"} +[2022-08-02 04:40:07,793][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 04:40:30,472][valid][INFO] - {"epoch": 6, "valid_loss": "2.185", "valid_ppl": "4.55", "valid_wps": "1.61614e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "300000", "valid_best_loss": "2.185"} +[2022-08-02 04:40:30,475][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 6 @ 300000 updates +[2022-08-02 04:40:30,476][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_6_300000.pt +[2022-08-02 04:40:41,615][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_6_300000.pt +[2022-08-02 04:41:13,516][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_6_300000.pt (epoch 6 @ 300000 updates, score 2.185) (writing took 43.04120281152427 seconds) +[2022-08-02 04:42:18,295][train_inner][INFO] - {"epoch": 6, "update": 5.833, "loss": "2.287", "ppl": "4.88", "wps": "181080", "ups": "1.53", "wpb": "118157", "bsz": "256", "num_updates": "300200", "lr": "0.000706869", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "25.8", "wall": "98051"} +[2022-08-02 04:43:23,552][train_inner][INFO] - {"epoch": 6, "update": 5.837, "loss": "2.29", "ppl": "4.89", "wps": "362730", "ups": "3.06", "wpb": "118352", "bsz": "256", "num_updates": "300400", "lr": "0.000706667", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.7", "wall": "98116"} +[2022-08-02 04:44:28,268][train_inner][INFO] - {"epoch": 6, "update": 5.841, "loss": "2.286", "ppl": "4.88", "wps": "366224", "ups": "3.09", "wpb": "118501", "bsz": "256", "num_updates": "300600", "lr": "0.000706465", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "26.3", "wall": "98181"} +[2022-08-02 04:45:34,130][train_inner][INFO] - {"epoch": 6, "update": 5.845, "loss": "2.287", "ppl": "4.88", "wps": "359623", "ups": "3.04", "wpb": "118424", "bsz": "256", "num_updates": "300800", "lr": "0.000706263", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "98246"} +[2022-08-02 04:46:38,651][train_inner][INFO] - {"epoch": 6, "update": 5.848, "loss": "2.288", "ppl": "4.88", "wps": "368329", "ups": "3.1", "wpb": "118823", "bsz": "256", "num_updates": "301000", "lr": "0.000706061", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.7", "wall": "98311"} +[2022-08-02 04:47:44,547][train_inner][INFO] - {"epoch": 6, "update": 5.852, "loss": "2.29", "ppl": "4.89", "wps": "359052", "ups": "3.04", "wpb": "118300", "bsz": "256", "num_updates": "301200", "lr": "0.000705859", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.8", "wall": "98377"} +[2022-08-02 04:48:49,205][train_inner][INFO] - {"epoch": 6, "update": 5.856, "loss": "2.302", "ppl": "4.93", "wps": "362865", "ups": "3.09", "wpb": "117308", "bsz": "256", "num_updates": "301400", "lr": "0.000705657", "gnorm": "0.679", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.1", "wall": "98442"} +[2022-08-02 04:49:54,162][train_inner][INFO] - {"epoch": 6, "update": 5.86, "loss": "2.29", "ppl": "4.89", "wps": "365419", "ups": "3.08", "wpb": "118682", "bsz": "256", "num_updates": "301600", "lr": "0.000705455", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.9", "wall": "98506"} +[2022-08-02 04:50:59,075][train_inner][INFO] - {"epoch": 6, "update": 5.864, "loss": "2.284", "ppl": "4.87", "wps": "366510", "ups": "3.08", "wpb": "118955", "bsz": "256", "num_updates": "301800", "lr": "0.000705253", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.3", "wall": "98571"} +[2022-08-02 04:52:04,162][train_inner][INFO] - {"epoch": 6, "update": 5.868, "loss": "2.286", "ppl": "4.88", "wps": "362494", "ups": "3.07", "wpb": "117965", "bsz": "256", "num_updates": "302000", "lr": "0.000705051", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.9", "wall": "98636"} +[2022-08-02 04:53:08,858][train_inner][INFO] - {"epoch": 6, "update": 5.872, "loss": "2.285", "ppl": "4.88", "wps": "366696", "ups": "3.09", "wpb": "118617", "bsz": "256", "num_updates": "302200", "lr": "0.000704848", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.8", "wall": "98701"} +[2022-08-02 04:54:13,658][train_inner][INFO] - {"epoch": 6, "update": 5.876, "loss": "2.291", "ppl": "4.9", "wps": "364712", "ups": "3.09", "wpb": "118165", "bsz": "256", "num_updates": "302400", "lr": "0.000704646", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "98766"} +[2022-08-02 04:55:18,333][train_inner][INFO] - {"epoch": 6, "update": 5.879, "loss": "2.29", "ppl": "4.89", "wps": "365901", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "302600", "lr": "0.000704444", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.1", "wall": "98831"} +[2022-08-02 04:56:22,921][train_inner][INFO] - {"epoch": 6, "update": 5.883, "loss": "2.288", "ppl": "4.89", "wps": "366070", "ups": "3.1", "wpb": "118218", "bsz": "256", "num_updates": "302800", "lr": "0.000704242", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.5", "wall": "98895"} +[2022-08-02 04:57:27,809][train_inner][INFO] - {"epoch": 6, "update": 5.887, "loss": "2.286", "ppl": "4.88", "wps": "364240", "ups": "3.08", "wpb": "118172", "bsz": "256", "num_updates": "303000", "lr": "0.00070404", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "98960"} +[2022-08-02 04:58:33,095][train_inner][INFO] - {"epoch": 6, "update": 5.891, "loss": "2.288", "ppl": "4.88", "wps": "362741", "ups": "3.06", "wpb": "118407", "bsz": "256", "num_updates": "303200", "lr": "0.000703838", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22", "wall": "99025"} +[2022-08-02 04:59:37,787][train_inner][INFO] - {"epoch": 6, "update": 5.895, "loss": "2.294", "ppl": "4.9", "wps": "364957", "ups": "3.09", "wpb": "118048", "bsz": "256", "num_updates": "303400", "lr": "0.000703636", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.4", "wall": "99090"} +[2022-08-02 05:00:42,684][train_inner][INFO] - {"epoch": 6, "update": 5.899, "loss": "2.289", "ppl": "4.89", "wps": "365223", "ups": "3.08", "wpb": "118508", "bsz": "256", "num_updates": "303600", "lr": "0.000703434", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "99155"} +[2022-08-02 05:01:47,024][train_inner][INFO] - {"epoch": 6, "update": 5.903, "loss": "2.291", "ppl": "4.89", "wps": "365180", "ups": "3.11", "wpb": "117476", "bsz": "256", "num_updates": "303800", "lr": "0.000703232", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24", "wall": "99219"} +[2022-08-02 05:02:52,141][train_inner][INFO] - {"epoch": 6, "update": 5.907, "loss": "2.284", "ppl": "4.87", "wps": "363973", "ups": "3.07", "wpb": "118503", "bsz": "256", "num_updates": "304000", "lr": "0.00070303", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "99284"} +[2022-08-02 05:03:57,065][train_inner][INFO] - {"epoch": 6, "update": 5.911, "loss": "2.282", "ppl": "4.86", "wps": "365939", "ups": "3.08", "wpb": "118789", "bsz": "256", "num_updates": "304200", "lr": "0.000702828", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "99349"} +[2022-08-02 05:05:03,209][train_inner][INFO] - {"epoch": 6, "update": 5.914, "loss": "2.285", "ppl": "4.87", "wps": "358382", "ups": "3.02", "wpb": "118521", "bsz": "256", "num_updates": "304400", "lr": "0.000702626", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "19.7", "wall": "99416"} +[2022-08-02 05:06:08,039][train_inner][INFO] - {"epoch": 6, "update": 5.918, "loss": "2.284", "ppl": "4.87", "wps": "365280", "ups": "3.09", "wpb": "118404", "bsz": "256", "num_updates": "304600", "lr": "0.000702424", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "99480"} +[2022-08-02 05:07:12,780][train_inner][INFO] - {"epoch": 6, "update": 5.922, "loss": "2.283", "ppl": "4.87", "wps": "365972", "ups": "3.09", "wpb": "118465", "bsz": "256", "num_updates": "304800", "lr": "0.000702222", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.3", "wall": "99545"} +[2022-08-02 05:08:17,526][train_inner][INFO] - {"epoch": 6, "update": 5.926, "loss": "2.282", "ppl": "4.86", "wps": "367125", "ups": "3.09", "wpb": "118848", "bsz": "256", "num_updates": "305000", "lr": "0.00070202", "gnorm": "0.669", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.3", "wall": "99610"} +[2022-08-02 05:09:22,543][train_inner][INFO] - {"epoch": 6, "update": 5.93, "loss": "2.287", "ppl": "4.88", "wps": "364185", "ups": "3.08", "wpb": "118390", "bsz": "256", "num_updates": "305200", "lr": "0.000701818", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26.6", "wall": "99675"} +[2022-08-02 05:10:27,316][train_inner][INFO] - {"epoch": 6, "update": 5.934, "loss": "2.284", "ppl": "4.87", "wps": "364926", "ups": "3.09", "wpb": "118184", "bsz": "256", "num_updates": "305400", "lr": "0.000701616", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "25.3", "wall": "99740"} +[2022-08-02 05:11:32,367][train_inner][INFO] - {"epoch": 6, "update": 5.938, "loss": "2.281", "ppl": "4.86", "wps": "364360", "ups": "3.07", "wpb": "118509", "bsz": "256", "num_updates": "305600", "lr": "0.000701414", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "99805"} +[2022-08-02 05:12:37,328][train_inner][INFO] - {"epoch": 6, "update": 5.942, "loss": "2.288", "ppl": "4.89", "wps": "363779", "ups": "3.08", "wpb": "118154", "bsz": "256", "num_updates": "305800", "lr": "0.000701212", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.9", "wall": "99870"} +[2022-08-02 05:13:41,893][train_inner][INFO] - {"epoch": 6, "update": 5.946, "loss": "2.285", "ppl": "4.87", "wps": "365108", "ups": "3.1", "wpb": "117864", "bsz": "256", "num_updates": "306000", "lr": "0.00070101", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "99934"} +[2022-08-02 05:14:46,662][train_inner][INFO] - {"epoch": 6, "update": 5.949, "loss": "2.286", "ppl": "4.88", "wps": "366574", "ups": "3.09", "wpb": "118712", "bsz": "256", "num_updates": "306200", "lr": "0.000700808", "gnorm": "0.692", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.5", "wall": "99999"} +[2022-08-02 05:15:51,180][train_inner][INFO] - {"epoch": 6, "update": 5.953, "loss": "2.295", "ppl": "4.91", "wps": "365365", "ups": "3.1", "wpb": "117861", "bsz": "256", "num_updates": "306400", "lr": "0.000700606", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "26", "wall": "100064"} +[2022-08-02 05:16:05,805][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 05:16:06,070][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 05:16:56,399][train_inner][INFO] - {"epoch": 6, "update": 5.957, "loss": "2.287", "ppl": "4.88", "wps": "362660", "ups": "3.07", "wpb": "118260", "bsz": "256", "num_updates": "306600", "lr": "0.000700404", "gnorm": "0.703", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.7", "wall": "100129"} +[2022-08-02 05:18:01,044][train_inner][INFO] - {"epoch": 6, "update": 5.961, "loss": "2.286", "ppl": "4.88", "wps": "364919", "ups": "3.09", "wpb": "117949", "bsz": "256", "num_updates": "306800", "lr": "0.000700202", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "100193"} +[2022-08-02 05:19:05,851][train_inner][INFO] - {"epoch": 6, "update": 5.965, "loss": "2.282", "ppl": "4.86", "wps": "366750", "ups": "3.09", "wpb": "118838", "bsz": "256", "num_updates": "307000", "lr": "0.0007", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "100258"} +[2022-08-02 05:20:10,983][train_inner][INFO] - {"epoch": 6, "update": 5.969, "loss": "2.281", "ppl": "4.86", "wps": "364613", "ups": "3.07", "wpb": "118738", "bsz": "256", "num_updates": "307200", "lr": "0.000699798", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.3", "wall": "100323"} +[2022-08-02 05:21:15,881][train_inner][INFO] - {"epoch": 6, "update": 5.973, "loss": "2.287", "ppl": "4.88", "wps": "365020", "ups": "3.08", "wpb": "118443", "bsz": "256", "num_updates": "307400", "lr": "0.000699596", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.9", "wall": "100388"} +[2022-08-02 05:22:20,789][train_inner][INFO] - {"epoch": 6, "update": 5.977, "loss": "2.285", "ppl": "4.87", "wps": "364993", "ups": "3.08", "wpb": "118453", "bsz": "256", "num_updates": "307600", "lr": "0.000699394", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22", "wall": "100453"} +[2022-08-02 05:23:25,874][train_inner][INFO] - {"epoch": 6, "update": 5.981, "loss": "2.285", "ppl": "4.87", "wps": "363231", "ups": "3.07", "wpb": "118204", "bsz": "256", "num_updates": "307800", "lr": "0.000699192", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "100518"} +[2022-08-02 05:24:30,623][train_inner][INFO] - {"epoch": 6, "update": 5.984, "loss": "2.283", "ppl": "4.87", "wps": "365226", "ups": "3.09", "wpb": "118237", "bsz": "256", "num_updates": "308000", "lr": "0.00069899", "gnorm": "0.686", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.3", "wall": "100583"} +[2022-08-02 05:25:35,733][train_inner][INFO] - {"epoch": 6, "update": 5.988, "loss": "2.277", "ppl": "4.85", "wps": "362683", "ups": "3.07", "wpb": "118070", "bsz": "256", "num_updates": "308200", "lr": "0.000698788", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "100648"} +[2022-08-02 05:26:40,822][train_inner][INFO] - {"epoch": 6, "update": 5.992, "loss": "2.28", "ppl": "4.86", "wps": "362825", "ups": "3.07", "wpb": "118079", "bsz": "256", "num_updates": "308400", "lr": "0.000698586", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "100713"} +[2022-08-02 05:27:45,879][train_inner][INFO] - {"epoch": 6, "update": 5.996, "loss": "2.283", "ppl": "4.87", "wps": "363742", "ups": "3.07", "wpb": "118317", "bsz": "256", "num_updates": "308600", "lr": "0.000698384", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.9", "wall": "100778"} +[2022-08-02 05:28:50,938][train_inner][INFO] - {"epoch": 6, "update": 6.0, "loss": "2.28", "ppl": "4.86", "wps": "363290", "ups": "3.07", "wpb": "118174", "bsz": "256", "num_updates": "308800", "lr": "0.000698182", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.4", "wall": "100843"} +[2022-08-02 05:28:52,181][fairseq_cli.train][INFO] - end of epoch 6 (average epoch stats below) +[2022-08-02 05:28:52,182][train][INFO] - {"epoch": 6, "train_loss": "2.299", "train_ppl": "4.92", "train_wps": "362602", "train_ups": "3.07", "train_wpb": "118299", "train_bsz": "256", "train_num_updates": "308804", "train_lr": "0.000698178", "train_gnorm": "0.695", "train_clip": "0.1", "train_loss_scale": "1", "train_train_wall": "16621", "train_gb_free": "27.4", "train_wall": "100845"} +[2022-08-02 05:28:52,271][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 05:28:52,274][fairseq.trainer][INFO] - begin training epoch 7 +[2022-08-02 05:28:52,275][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 05:30:06,429][train_inner][INFO] - {"epoch": 7, "update": 6.004, "loss": "2.274", "ppl": "4.84", "wps": "312561", "ups": "2.65", "wpb": "117976", "bsz": "255.4", "num_updates": "309000", "lr": "0.00069798", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "22.3", "wall": "100919"} +[2022-08-02 05:31:11,126][train_inner][INFO] - {"epoch": 7, "update": 6.008, "loss": "2.28", "ppl": "4.86", "wps": "364552", "ups": "3.09", "wpb": "117926", "bsz": "256", "num_updates": "309200", "lr": "0.000697778", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.1", "wall": "100983"} +[2022-08-02 05:32:16,853][train_inner][INFO] - {"epoch": 7, "update": 6.012, "loss": "2.274", "ppl": "4.84", "wps": "359457", "ups": "3.04", "wpb": "118129", "bsz": "256", "num_updates": "309400", "lr": "0.000697576", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "101049"} +[2022-08-02 05:33:21,334][train_inner][INFO] - {"epoch": 7, "update": 6.015, "loss": "2.282", "ppl": "4.86", "wps": "368073", "ups": "3.1", "wpb": "118666", "bsz": "256", "num_updates": "309600", "lr": "0.000697374", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23", "wall": "101114"} +[2022-08-02 05:34:26,114][train_inner][INFO] - {"epoch": 7, "update": 6.019, "loss": "2.274", "ppl": "4.84", "wps": "364828", "ups": "3.09", "wpb": "118166", "bsz": "256", "num_updates": "309800", "lr": "0.000697172", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.7", "wall": "101178"} +[2022-08-02 05:35:30,957][train_inner][INFO] - {"epoch": 7, "update": 6.023, "loss": "2.281", "ppl": "4.86", "wps": "365229", "ups": "3.08", "wpb": "118411", "bsz": "256", "num_updates": "310000", "lr": "0.00069697", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "101243"} +[2022-08-02 05:36:36,768][train_inner][INFO] - {"epoch": 7, "update": 6.027, "loss": "2.285", "ppl": "4.87", "wps": "356909", "ups": "3.04", "wpb": "117441", "bsz": "256", "num_updates": "310200", "lr": "0.000696768", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "101309"} +[2022-08-02 05:37:42,121][train_inner][INFO] - {"epoch": 7, "update": 6.031, "loss": "2.27", "ppl": "4.82", "wps": "362745", "ups": "3.06", "wpb": "118530", "bsz": "256", "num_updates": "310400", "lr": "0.000696566", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26", "wall": "101374"} +[2022-08-02 05:38:47,238][train_inner][INFO] - {"epoch": 7, "update": 6.035, "loss": "2.277", "ppl": "4.85", "wps": "361975", "ups": "3.07", "wpb": "117853", "bsz": "256", "num_updates": "310600", "lr": "0.000696364", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.1", "wall": "101440"} +[2022-08-02 05:39:52,031][train_inner][INFO] - {"epoch": 7, "update": 6.039, "loss": "2.284", "ppl": "4.87", "wps": "365298", "ups": "3.09", "wpb": "118342", "bsz": "256", "num_updates": "310800", "lr": "0.000696162", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.9", "wall": "101504"} +[2022-08-02 05:40:56,849][train_inner][INFO] - {"epoch": 7, "update": 6.043, "loss": "2.274", "ppl": "4.84", "wps": "364731", "ups": "3.09", "wpb": "118204", "bsz": "256", "num_updates": "311000", "lr": "0.00069596", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "101569"} +[2022-08-02 05:42:03,178][train_inner][INFO] - {"epoch": 7, "update": 6.047, "loss": "2.267", "ppl": "4.81", "wps": "359552", "ups": "3.02", "wpb": "119241", "bsz": "256", "num_updates": "311200", "lr": "0.000695758", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "22.4", "wall": "101635"} +[2022-08-02 05:43:08,072][train_inner][INFO] - {"epoch": 7, "update": 6.05, "loss": "2.276", "ppl": "4.84", "wps": "365658", "ups": "3.08", "wpb": "118644", "bsz": "256", "num_updates": "311400", "lr": "0.000695556", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23", "wall": "101700"} +[2022-08-02 05:44:12,837][train_inner][INFO] - {"epoch": 7, "update": 6.054, "loss": "2.282", "ppl": "4.86", "wps": "365910", "ups": "3.09", "wpb": "118488", "bsz": "256", "num_updates": "311600", "lr": "0.000695354", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.6", "wall": "101765"} +[2022-08-02 05:45:17,619][train_inner][INFO] - {"epoch": 7, "update": 6.058, "loss": "2.274", "ppl": "4.84", "wps": "366114", "ups": "3.09", "wpb": "118587", "bsz": "256", "num_updates": "311800", "lr": "0.000695152", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "101830"} +[2022-08-02 05:46:22,554][train_inner][INFO] - {"epoch": 7, "update": 6.062, "loss": "2.278", "ppl": "4.85", "wps": "363852", "ups": "3.08", "wpb": "118131", "bsz": "256", "num_updates": "312000", "lr": "0.000694949", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "101895"} +[2022-08-02 05:47:27,530][train_inner][INFO] - {"epoch": 7, "update": 6.066, "loss": "2.273", "ppl": "4.83", "wps": "364354", "ups": "3.08", "wpb": "118370", "bsz": "256", "num_updates": "312200", "lr": "0.000694747", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "101960"} +[2022-08-02 05:48:32,664][train_inner][INFO] - {"epoch": 7, "update": 6.07, "loss": "2.278", "ppl": "4.85", "wps": "362822", "ups": "3.07", "wpb": "118158", "bsz": "256", "num_updates": "312400", "lr": "0.000694545", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "102025"} +[2022-08-02 05:49:39,742][train_inner][INFO] - {"epoch": 7, "update": 6.074, "loss": "2.274", "ppl": "4.84", "wps": "354332", "ups": "2.98", "wpb": "118837", "bsz": "256", "num_updates": "312600", "lr": "0.000694343", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "67", "gb_free": "20", "wall": "102092"} +[2022-08-02 05:50:45,581][train_inner][INFO] - {"epoch": 7, "update": 6.078, "loss": "2.274", "ppl": "4.84", "wps": "359641", "ups": "3.04", "wpb": "118391", "bsz": "256", "num_updates": "312800", "lr": "0.000694141", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "102158"} +[2022-08-02 05:51:50,687][train_inner][INFO] - {"epoch": 7, "update": 6.081, "loss": "2.273", "ppl": "4.83", "wps": "362359", "ups": "3.07", "wpb": "117956", "bsz": "256", "num_updates": "313000", "lr": "0.000693939", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.5", "wall": "102223"} +[2022-08-02 05:52:55,845][train_inner][INFO] - {"epoch": 7, "update": 6.085, "loss": "2.281", "ppl": "4.86", "wps": "364042", "ups": "3.07", "wpb": "118600", "bsz": "256", "num_updates": "313200", "lr": "0.000693737", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "102288"} +[2022-08-02 05:54:00,856][train_inner][INFO] - {"epoch": 7, "update": 6.089, "loss": "2.281", "ppl": "4.86", "wps": "363807", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "313400", "lr": "0.000693535", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "102353"} +[2022-08-02 05:55:05,520][train_inner][INFO] - {"epoch": 7, "update": 6.093, "loss": "2.274", "ppl": "4.84", "wps": "366444", "ups": "3.09", "wpb": "118477", "bsz": "256", "num_updates": "313600", "lr": "0.000693333", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.1", "wall": "102418"} +[2022-08-02 05:56:10,627][train_inner][INFO] - {"epoch": 7, "update": 6.097, "loss": "2.274", "ppl": "4.84", "wps": "365248", "ups": "3.07", "wpb": "118901", "bsz": "256", "num_updates": "313800", "lr": "0.000693131", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "102483"} +[2022-08-02 05:57:15,876][train_inner][INFO] - {"epoch": 7, "update": 6.101, "loss": "2.275", "ppl": "4.84", "wps": "363588", "ups": "3.07", "wpb": "118616", "bsz": "256", "num_updates": "314000", "lr": "0.000692929", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.2", "wall": "102548"} +[2022-08-02 05:58:21,715][train_inner][INFO] - {"epoch": 7, "update": 6.105, "loss": "2.276", "ppl": "4.84", "wps": "357848", "ups": "3.04", "wpb": "117800", "bsz": "256", "num_updates": "314200", "lr": "0.000692727", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.3", "wall": "102614"} +[2022-08-02 05:59:26,583][train_inner][INFO] - {"epoch": 7, "update": 6.109, "loss": "2.274", "ppl": "4.84", "wps": "363412", "ups": "3.08", "wpb": "117868", "bsz": "256", "num_updates": "314400", "lr": "0.000692525", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "102679"} +[2022-08-02 06:00:31,800][train_inner][INFO] - {"epoch": 7, "update": 6.113, "loss": "2.285", "ppl": "4.87", "wps": "363714", "ups": "3.07", "wpb": "118600", "bsz": "256", "num_updates": "314600", "lr": "0.000692323", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "102744"} +[2022-08-02 06:01:36,193][train_inner][INFO] - {"epoch": 7, "update": 6.116, "loss": "2.271", "ppl": "4.83", "wps": "367267", "ups": "3.11", "wpb": "118247", "bsz": "256", "num_updates": "314800", "lr": "0.000692121", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "102809"} +[2022-08-02 06:02:41,162][train_inner][INFO] - {"epoch": 7, "update": 6.12, "loss": "2.269", "ppl": "4.82", "wps": "364002", "ups": "3.08", "wpb": "118242", "bsz": "256", "num_updates": "315000", "lr": "0.000691919", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "102873"} +[2022-08-02 06:03:46,087][train_inner][INFO] - {"epoch": 7, "update": 6.124, "loss": "2.271", "ppl": "4.83", "wps": "363781", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "315200", "lr": "0.000691717", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "102938"} +[2022-08-02 06:04:51,107][train_inner][INFO] - {"epoch": 7, "update": 6.128, "loss": "2.274", "ppl": "4.84", "wps": "364100", "ups": "3.08", "wpb": "118368", "bsz": "256", "num_updates": "315400", "lr": "0.000691515", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "103003"} +[2022-08-02 06:05:55,899][train_inner][INFO] - {"epoch": 7, "update": 6.132, "loss": "2.28", "ppl": "4.86", "wps": "364541", "ups": "3.09", "wpb": "118096", "bsz": "256", "num_updates": "315600", "lr": "0.000691313", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "103068"} +[2022-08-02 06:07:00,579][train_inner][INFO] - {"epoch": 7, "update": 6.136, "loss": "2.273", "ppl": "4.83", "wps": "365241", "ups": "3.09", "wpb": "118117", "bsz": "256", "num_updates": "315800", "lr": "0.000691111", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "103133"} +[2022-08-02 06:08:05,199][train_inner][INFO] - {"epoch": 7, "update": 6.14, "loss": "2.279", "ppl": "4.85", "wps": "364409", "ups": "3.1", "wpb": "117738", "bsz": "256", "num_updates": "316000", "lr": "0.000690909", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "103198"} +[2022-08-02 06:09:10,033][train_inner][INFO] - {"epoch": 7, "update": 6.144, "loss": "2.277", "ppl": "4.85", "wps": "365317", "ups": "3.08", "wpb": "118424", "bsz": "256", "num_updates": "316200", "lr": "0.000690707", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.1", "wall": "103262"} +[2022-08-02 06:10:14,960][train_inner][INFO] - {"epoch": 7, "update": 6.148, "loss": "2.27", "ppl": "4.82", "wps": "366180", "ups": "3.08", "wpb": "118873", "bsz": "256", "num_updates": "316400", "lr": "0.000690505", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "103327"} +[2022-08-02 06:11:19,764][train_inner][INFO] - {"epoch": 7, "update": 6.151, "loss": "2.274", "ppl": "4.84", "wps": "364767", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "316600", "lr": "0.000690303", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "103392"} +[2022-08-02 06:12:24,240][train_inner][INFO] - {"epoch": 7, "update": 6.155, "loss": "2.28", "ppl": "4.86", "wps": "366890", "ups": "3.1", "wpb": "118276", "bsz": "256", "num_updates": "316800", "lr": "0.000690101", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "103457"} +[2022-08-02 06:13:29,250][train_inner][INFO] - {"epoch": 7, "update": 6.159, "loss": "2.273", "ppl": "4.83", "wps": "363815", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "317000", "lr": "0.000689899", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "103522"} +[2022-08-02 06:14:30,451][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 06:14:33,884][train_inner][INFO] - {"epoch": 7, "update": 6.163, "loss": "2.271", "ppl": "4.83", "wps": "367002", "ups": "3.09", "wpb": "118602", "bsz": "256", "num_updates": "317200", "lr": "0.000689697", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "103586"} +[2022-08-02 06:15:38,440][train_inner][INFO] - {"epoch": 7, "update": 6.167, "loss": "2.27", "ppl": "4.82", "wps": "366873", "ups": "3.1", "wpb": "118418", "bsz": "256", "num_updates": "317400", "lr": "0.000689495", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.3", "wall": "103651"} +[2022-08-02 06:16:42,954][train_inner][INFO] - {"epoch": 7, "update": 6.171, "loss": "2.273", "ppl": "4.83", "wps": "367706", "ups": "3.1", "wpb": "118610", "bsz": "256", "num_updates": "317600", "lr": "0.000689293", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "103715"} +[2022-08-02 06:17:47,479][train_inner][INFO] - {"epoch": 7, "update": 6.175, "loss": "2.276", "ppl": "4.84", "wps": "366048", "ups": "3.1", "wpb": "118095", "bsz": "256", "num_updates": "317800", "lr": "0.000689091", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "103780"} +[2022-08-02 06:18:52,487][train_inner][INFO] - {"epoch": 7, "update": 6.179, "loss": "2.271", "ppl": "4.83", "wps": "363318", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "318000", "lr": "0.000688889", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "103845"} +[2022-08-02 06:19:57,375][train_inner][INFO] - {"epoch": 7, "update": 6.183, "loss": "2.272", "ppl": "4.83", "wps": "364604", "ups": "3.08", "wpb": "118290", "bsz": "256", "num_updates": "318200", "lr": "0.000688687", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "103910"} +[2022-08-02 06:21:02,423][train_inner][INFO] - {"epoch": 7, "update": 6.186, "loss": "2.276", "ppl": "4.84", "wps": "362410", "ups": "3.07", "wpb": "117868", "bsz": "256", "num_updates": "318400", "lr": "0.000688485", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "103975"} +[2022-08-02 06:22:07,106][train_inner][INFO] - {"epoch": 7, "update": 6.19, "loss": "2.271", "ppl": "4.83", "wps": "366344", "ups": "3.09", "wpb": "118479", "bsz": "256", "num_updates": "318600", "lr": "0.000688283", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "104039"} +[2022-08-02 06:23:11,879][train_inner][INFO] - {"epoch": 7, "update": 6.194, "loss": "2.274", "ppl": "4.84", "wps": "361876", "ups": "3.09", "wpb": "117199", "bsz": "256", "num_updates": "318800", "lr": "0.000688081", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "104104"} +[2022-08-02 06:24:17,730][train_inner][INFO] - {"epoch": 7, "update": 6.198, "loss": "2.266", "ppl": "4.81", "wps": "359589", "ups": "3.04", "wpb": "118395", "bsz": "256", "num_updates": "319000", "lr": "0.000687879", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "104170"} +[2022-08-02 06:25:22,904][train_inner][INFO] - {"epoch": 7, "update": 6.202, "loss": "2.276", "ppl": "4.84", "wps": "364193", "ups": "3.07", "wpb": "118678", "bsz": "256", "num_updates": "319200", "lr": "0.000687677", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "104235"} +[2022-08-02 06:26:27,740][train_inner][INFO] - {"epoch": 7, "update": 6.206, "loss": "2.273", "ppl": "4.83", "wps": "363970", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "319400", "lr": "0.000687475", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "104300"} +[2022-08-02 06:27:32,785][train_inner][INFO] - {"epoch": 7, "update": 6.21, "loss": "2.267", "ppl": "4.81", "wps": "364700", "ups": "3.07", "wpb": "118608", "bsz": "256", "num_updates": "319600", "lr": "0.000687273", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "104365"} +[2022-08-02 06:28:37,804][train_inner][INFO] - {"epoch": 7, "update": 6.214, "loss": "2.269", "ppl": "4.82", "wps": "365759", "ups": "3.08", "wpb": "118904", "bsz": "256", "num_updates": "319800", "lr": "0.000687071", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "104430"} +[2022-08-02 06:29:42,547][train_inner][INFO] - {"epoch": 7, "update": 6.217, "loss": "2.269", "ppl": "4.82", "wps": "364257", "ups": "3.09", "wpb": "117915", "bsz": "256", "num_updates": "320000", "lr": "0.000686869", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "104495"} +[2022-08-02 06:30:47,353][train_inner][INFO] - {"epoch": 7, "update": 6.221, "loss": "2.272", "ppl": "4.83", "wps": "365211", "ups": "3.09", "wpb": "118338", "bsz": "256", "num_updates": "320200", "lr": "0.000686667", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "104560"} +[2022-08-02 06:31:52,288][train_inner][INFO] - {"epoch": 7, "update": 6.225, "loss": "2.278", "ppl": "4.85", "wps": "361896", "ups": "3.08", "wpb": "117496", "bsz": "256", "num_updates": "320400", "lr": "0.000686465", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "104625"} +[2022-08-02 06:32:57,564][train_inner][INFO] - {"epoch": 7, "update": 6.229, "loss": "2.264", "ppl": "4.8", "wps": "363494", "ups": "3.06", "wpb": "118636", "bsz": "256", "num_updates": "320600", "lr": "0.000686263", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "104690"} +[2022-08-02 06:34:02,782][train_inner][INFO] - {"epoch": 7, "update": 6.233, "loss": "2.271", "ppl": "4.83", "wps": "363123", "ups": "3.07", "wpb": "118409", "bsz": "256", "num_updates": "320800", "lr": "0.000686061", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "104755"} +[2022-08-02 06:35:07,658][train_inner][INFO] - {"epoch": 7, "update": 6.237, "loss": "2.272", "ppl": "4.83", "wps": "363481", "ups": "3.08", "wpb": "117903", "bsz": "256", "num_updates": "321000", "lr": "0.000685859", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "104820"} +[2022-08-02 06:36:12,988][train_inner][INFO] - {"epoch": 7, "update": 6.241, "loss": "2.273", "ppl": "4.83", "wps": "360650", "ups": "3.06", "wpb": "117805", "bsz": "256", "num_updates": "321200", "lr": "0.000685657", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26", "wall": "104885"} +[2022-08-02 06:37:17,853][train_inner][INFO] - {"epoch": 7, "update": 6.245, "loss": "2.274", "ppl": "4.84", "wps": "363125", "ups": "3.08", "wpb": "117770", "bsz": "256", "num_updates": "321400", "lr": "0.000685455", "gnorm": "0.672", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.6", "wall": "104950"} +[2022-08-02 06:38:22,483][train_inner][INFO] - {"epoch": 7, "update": 6.249, "loss": "2.268", "ppl": "4.82", "wps": "366382", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "321600", "lr": "0.000685253", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.7", "wall": "105015"} +[2022-08-02 06:38:43,596][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 06:39:27,630][train_inner][INFO] - {"epoch": 7, "update": 6.252, "loss": "2.269", "ppl": "4.82", "wps": "362991", "ups": "3.07", "wpb": "118237", "bsz": "256", "num_updates": "321800", "lr": "0.000685051", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "105080"} +[2022-08-02 06:40:32,664][train_inner][INFO] - {"epoch": 7, "update": 6.256, "loss": "2.27", "ppl": "4.82", "wps": "362392", "ups": "3.08", "wpb": "117837", "bsz": "256", "num_updates": "322000", "lr": "0.000684848", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "105145"} +[2022-08-02 06:41:38,009][train_inner][INFO] - {"epoch": 7, "update": 6.26, "loss": "2.267", "ppl": "4.81", "wps": "362419", "ups": "3.06", "wpb": "118410", "bsz": "256", "num_updates": "322200", "lr": "0.000684646", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "105210"} +[2022-08-02 06:42:43,002][train_inner][INFO] - {"epoch": 7, "update": 6.264, "loss": "2.27", "ppl": "4.82", "wps": "361948", "ups": "3.08", "wpb": "117619", "bsz": "256", "num_updates": "322400", "lr": "0.000684444", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.8", "wall": "105275"} +[2022-08-02 06:43:47,886][train_inner][INFO] - {"epoch": 7, "update": 6.268, "loss": "2.272", "ppl": "4.83", "wps": "364944", "ups": "3.08", "wpb": "118392", "bsz": "256", "num_updates": "322600", "lr": "0.000684242", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.6", "wall": "105340"} +[2022-08-02 06:44:52,746][train_inner][INFO] - {"epoch": 7, "update": 6.272, "loss": "2.265", "ppl": "4.81", "wps": "366778", "ups": "3.08", "wpb": "118945", "bsz": "256", "num_updates": "322800", "lr": "0.00068404", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "105405"} +[2022-08-02 06:45:57,607][train_inner][INFO] - {"epoch": 7, "update": 6.276, "loss": "2.271", "ppl": "4.83", "wps": "364118", "ups": "3.08", "wpb": "118084", "bsz": "256", "num_updates": "323000", "lr": "0.000683838", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "105470"} +[2022-08-02 06:47:02,361][train_inner][INFO] - {"epoch": 7, "update": 6.28, "loss": "2.269", "ppl": "4.82", "wps": "364393", "ups": "3.09", "wpb": "117977", "bsz": "256", "num_updates": "323200", "lr": "0.000683636", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "105535"} +[2022-08-02 06:48:07,644][train_inner][INFO] - {"epoch": 7, "update": 6.284, "loss": "2.267", "ppl": "4.81", "wps": "362269", "ups": "3.06", "wpb": "118248", "bsz": "256", "num_updates": "323400", "lr": "0.000683434", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "105600"} +[2022-08-02 06:49:12,888][train_inner][INFO] - {"epoch": 7, "update": 6.287, "loss": "2.271", "ppl": "4.83", "wps": "363426", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "323600", "lr": "0.000683232", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "105665"} +[2022-08-02 06:49:35,663][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 06:49:35,955][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 06:50:18,425][train_inner][INFO] - {"epoch": 7, "update": 6.291, "loss": "2.267", "ppl": "4.81", "wps": "361089", "ups": "3.05", "wpb": "118321", "bsz": "256", "num_updates": "323800", "lr": "0.00068303", "gnorm": "0.733", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "105731"} +[2022-08-02 06:51:22,814][train_inner][INFO] - {"epoch": 7, "update": 6.295, "loss": "2.27", "ppl": "4.82", "wps": "366212", "ups": "3.11", "wpb": "117899", "bsz": "256", "num_updates": "324000", "lr": "0.000682828", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "105795"} +[2022-08-02 06:52:27,684][train_inner][INFO] - {"epoch": 7, "update": 6.299, "loss": "2.267", "ppl": "4.81", "wps": "365032", "ups": "3.08", "wpb": "118396", "bsz": "256", "num_updates": "324200", "lr": "0.000682626", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "105860"} +[2022-08-02 06:53:32,842][train_inner][INFO] - {"epoch": 7, "update": 6.303, "loss": "2.268", "ppl": "4.82", "wps": "362264", "ups": "3.07", "wpb": "118021", "bsz": "256", "num_updates": "324400", "lr": "0.000682424", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "105925"} +[2022-08-02 06:54:38,011][train_inner][INFO] - {"epoch": 7, "update": 6.307, "loss": "2.269", "ppl": "4.82", "wps": "363269", "ups": "3.07", "wpb": "118369", "bsz": "256", "num_updates": "324600", "lr": "0.000682222", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "105990"} +[2022-08-02 06:55:42,512][train_inner][INFO] - {"epoch": 7, "update": 6.311, "loss": "2.274", "ppl": "4.84", "wps": "368728", "ups": "3.1", "wpb": "118914", "bsz": "256", "num_updates": "324800", "lr": "0.00068202", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26.4", "wall": "106055"} +[2022-08-02 06:56:47,163][train_inner][INFO] - {"epoch": 7, "update": 6.315, "loss": "2.266", "ppl": "4.81", "wps": "364630", "ups": "3.09", "wpb": "117867", "bsz": "256", "num_updates": "325000", "lr": "0.000681818", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24", "wall": "106119"} +[2022-08-02 06:57:52,163][train_inner][INFO] - {"epoch": 7, "update": 6.319, "loss": "2.266", "ppl": "4.81", "wps": "363781", "ups": "3.08", "wpb": "118228", "bsz": "256", "num_updates": "325200", "lr": "0.000681616", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "106184"} +[2022-08-02 06:58:57,206][train_inner][INFO] - {"epoch": 7, "update": 6.322, "loss": "2.268", "ppl": "4.82", "wps": "363883", "ups": "3.07", "wpb": "118338", "bsz": "256", "num_updates": "325400", "lr": "0.000681414", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "106250"} +[2022-08-02 07:00:01,764][train_inner][INFO] - {"epoch": 7, "update": 6.326, "loss": "2.27", "ppl": "4.82", "wps": "366503", "ups": "3.1", "wpb": "118301", "bsz": "256", "num_updates": "325600", "lr": "0.000681212", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "106314"} +[2022-08-02 07:01:06,567][train_inner][INFO] - {"epoch": 7, "update": 6.33, "loss": "2.268", "ppl": "4.82", "wps": "365138", "ups": "3.09", "wpb": "118308", "bsz": "256", "num_updates": "325800", "lr": "0.00068101", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.2", "wall": "106379"} +[2022-08-02 07:02:11,508][train_inner][INFO] - {"epoch": 7, "update": 6.334, "loss": "2.263", "ppl": "4.8", "wps": "365750", "ups": "3.08", "wpb": "118760", "bsz": "256", "num_updates": "326000", "lr": "0.000680808", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.3", "wall": "106444"} +[2022-08-02 07:03:16,230][train_inner][INFO] - {"epoch": 7, "update": 6.338, "loss": "2.264", "ppl": "4.8", "wps": "365760", "ups": "3.09", "wpb": "118362", "bsz": "256", "num_updates": "326200", "lr": "0.000680606", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "106509"} +[2022-08-02 07:04:21,198][train_inner][INFO] - {"epoch": 7, "update": 6.342, "loss": "2.272", "ppl": "4.83", "wps": "363304", "ups": "3.08", "wpb": "118014", "bsz": "256", "num_updates": "326400", "lr": "0.000680404", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "106574"} +[2022-08-02 07:05:26,062][train_inner][INFO] - {"epoch": 7, "update": 6.346, "loss": "2.26", "ppl": "4.79", "wps": "366064", "ups": "3.08", "wpb": "118720", "bsz": "256", "num_updates": "326600", "lr": "0.000680202", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "106638"} +[2022-08-02 07:06:31,372][train_inner][INFO] - {"epoch": 7, "update": 6.35, "loss": "2.26", "ppl": "4.79", "wps": "361721", "ups": "3.06", "wpb": "118118", "bsz": "256", "num_updates": "326800", "lr": "0.00068", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "106704"} +[2022-08-02 07:07:36,153][train_inner][INFO] - {"epoch": 7, "update": 6.353, "loss": "2.263", "ppl": "4.8", "wps": "366104", "ups": "3.09", "wpb": "118581", "bsz": "256", "num_updates": "327000", "lr": "0.000679798", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "106768"} +[2022-08-02 07:08:42,057][train_inner][INFO] - {"epoch": 7, "update": 6.357, "loss": "2.267", "ppl": "4.81", "wps": "359374", "ups": "3.03", "wpb": "118421", "bsz": "256", "num_updates": "327200", "lr": "0.000679596", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.4", "wall": "106834"} +[2022-08-02 07:09:46,816][train_inner][INFO] - {"epoch": 7, "update": 6.361, "loss": "2.265", "ppl": "4.81", "wps": "366204", "ups": "3.09", "wpb": "118573", "bsz": "256", "num_updates": "327400", "lr": "0.000679394", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "106899"} +[2022-08-02 07:10:51,556][train_inner][INFO] - {"epoch": 7, "update": 6.365, "loss": "2.271", "ppl": "4.83", "wps": "362704", "ups": "3.09", "wpb": "117406", "bsz": "256", "num_updates": "327600", "lr": "0.000679192", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "106964"} +[2022-08-02 07:11:56,170][train_inner][INFO] - {"epoch": 7, "update": 6.369, "loss": "2.264", "ppl": "4.8", "wps": "364765", "ups": "3.1", "wpb": "117844", "bsz": "256", "num_updates": "327800", "lr": "0.00067899", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "107028"} +[2022-08-02 07:13:01,204][train_inner][INFO] - {"epoch": 7, "update": 6.373, "loss": "2.266", "ppl": "4.81", "wps": "364445", "ups": "3.08", "wpb": "118505", "bsz": "256", "num_updates": "328000", "lr": "0.000678788", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "107094"} +[2022-08-02 07:13:24,544][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 07:14:06,381][train_inner][INFO] - {"epoch": 7, "update": 6.377, "loss": "2.268", "ppl": "4.82", "wps": "363296", "ups": "3.07", "wpb": "118390", "bsz": "256", "num_updates": "328200", "lr": "0.000678586", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "107159"} +[2022-08-02 07:15:11,118][train_inner][INFO] - {"epoch": 7, "update": 6.381, "loss": "2.264", "ppl": "4.8", "wps": "365310", "ups": "3.09", "wpb": "118245", "bsz": "256", "num_updates": "328400", "lr": "0.000678384", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.2", "wall": "107223"} +[2022-08-02 07:16:15,465][train_inner][INFO] - {"epoch": 7, "update": 6.385, "loss": "2.27", "ppl": "4.82", "wps": "367258", "ups": "3.11", "wpb": "118158", "bsz": "256", "num_updates": "328600", "lr": "0.000678182", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "107288"} +[2022-08-02 07:17:20,098][train_inner][INFO] - {"epoch": 7, "update": 6.388, "loss": "2.267", "ppl": "4.81", "wps": "363754", "ups": "3.09", "wpb": "117550", "bsz": "256", "num_updates": "328800", "lr": "0.00067798", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "107352"} +[2022-08-02 07:18:25,104][train_inner][INFO] - {"epoch": 7, "update": 6.392, "loss": "2.268", "ppl": "4.82", "wps": "362250", "ups": "3.08", "wpb": "117741", "bsz": "256", "num_updates": "329000", "lr": "0.000677778", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "107417"} +[2022-08-02 07:19:29,870][train_inner][INFO] - {"epoch": 7, "update": 6.396, "loss": "2.268", "ppl": "4.82", "wps": "364312", "ups": "3.09", "wpb": "117975", "bsz": "256", "num_updates": "329200", "lr": "0.000677576", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "107482"} +[2022-08-02 07:20:34,818][train_inner][INFO] - {"epoch": 7, "update": 6.4, "loss": "2.265", "ppl": "4.81", "wps": "362858", "ups": "3.08", "wpb": "117832", "bsz": "256", "num_updates": "329400", "lr": "0.000677374", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "107547"} +[2022-08-02 07:21:39,904][train_inner][INFO] - {"epoch": 7, "update": 6.404, "loss": "2.261", "ppl": "4.79", "wps": "363823", "ups": "3.07", "wpb": "118397", "bsz": "256", "num_updates": "329600", "lr": "0.000677172", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "107612"} +[2022-08-02 07:22:44,610][train_inner][INFO] - {"epoch": 7, "update": 6.408, "loss": "2.259", "ppl": "4.79", "wps": "364927", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "329800", "lr": "0.00067697", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "107677"} +[2022-08-02 07:23:49,658][train_inner][INFO] - {"epoch": 7, "update": 6.412, "loss": "2.258", "ppl": "4.78", "wps": "365190", "ups": "3.07", "wpb": "118771", "bsz": "256", "num_updates": "330000", "lr": "0.000676768", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "107742"} +[2022-08-02 07:24:54,796][train_inner][INFO] - {"epoch": 7, "update": 6.416, "loss": "2.26", "ppl": "4.79", "wps": "363978", "ups": "3.07", "wpb": "118544", "bsz": "256", "num_updates": "330200", "lr": "0.000676566", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "107807"} +[2022-08-02 07:25:59,706][train_inner][INFO] - {"epoch": 7, "update": 6.42, "loss": "2.27", "ppl": "4.82", "wps": "364564", "ups": "3.08", "wpb": "118317", "bsz": "256", "num_updates": "330400", "lr": "0.000676364", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "107872"} +[2022-08-02 07:27:04,283][train_inner][INFO] - {"epoch": 7, "update": 6.423, "loss": "2.265", "ppl": "4.81", "wps": "364994", "ups": "3.1", "wpb": "117849", "bsz": "256", "num_updates": "330600", "lr": "0.000676162", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "107937"} +[2022-08-02 07:27:28,744][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 07:28:09,177][train_inner][INFO] - {"epoch": 7, "update": 6.427, "loss": "2.263", "ppl": "4.8", "wps": "363755", "ups": "3.08", "wpb": "118027", "bsz": "256", "num_updates": "330800", "lr": "0.00067596", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "108001"} +[2022-08-02 07:29:15,011][train_inner][INFO] - {"epoch": 7, "update": 6.431, "loss": "2.263", "ppl": "4.8", "wps": "359080", "ups": "3.04", "wpb": "118196", "bsz": "256", "num_updates": "331000", "lr": "0.000675758", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "108067"} +[2022-08-02 07:30:20,333][train_inner][INFO] - {"epoch": 7, "update": 6.435, "loss": "2.268", "ppl": "4.82", "wps": "361067", "ups": "3.06", "wpb": "117928", "bsz": "256", "num_updates": "331200", "lr": "0.000675556", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.8", "wall": "108133"} +[2022-08-02 07:31:25,874][train_inner][INFO] - {"epoch": 7, "update": 6.439, "loss": "2.259", "ppl": "4.79", "wps": "360981", "ups": "3.05", "wpb": "118293", "bsz": "256", "num_updates": "331400", "lr": "0.000675354", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "108198"} +[2022-08-02 07:32:31,128][train_inner][INFO] - {"epoch": 7, "update": 6.443, "loss": "2.256", "ppl": "4.78", "wps": "363597", "ups": "3.06", "wpb": "118629", "bsz": "256", "num_updates": "331600", "lr": "0.000675152", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "108263"} +[2022-08-02 07:33:36,123][train_inner][INFO] - {"epoch": 7, "update": 6.447, "loss": "2.261", "ppl": "4.79", "wps": "363361", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "331800", "lr": "0.000674949", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.7", "wall": "108328"} +[2022-08-02 07:34:40,611][train_inner][INFO] - {"epoch": 7, "update": 6.451, "loss": "2.265", "ppl": "4.81", "wps": "365506", "ups": "3.1", "wpb": "117852", "bsz": "256", "num_updates": "332000", "lr": "0.000674747", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "108393"} +[2022-08-02 07:35:45,603][train_inner][INFO] - {"epoch": 7, "update": 6.455, "loss": "2.263", "ppl": "4.8", "wps": "364496", "ups": "3.08", "wpb": "118443", "bsz": "256", "num_updates": "332200", "lr": "0.000674545", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "108458"} +[2022-08-02 07:36:50,106][train_inner][INFO] - {"epoch": 7, "update": 6.458, "loss": "2.267", "ppl": "4.81", "wps": "365102", "ups": "3.1", "wpb": "117749", "bsz": "256", "num_updates": "332400", "lr": "0.000674343", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.7", "wall": "108522"} +[2022-08-02 07:37:54,566][train_inner][INFO] - {"epoch": 7, "update": 6.462, "loss": "2.263", "ppl": "4.8", "wps": "367073", "ups": "3.1", "wpb": "118306", "bsz": "256", "num_updates": "332600", "lr": "0.000674141", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "108587"} +[2022-08-02 07:38:59,263][train_inner][INFO] - {"epoch": 7, "update": 6.466, "loss": "2.268", "ppl": "4.82", "wps": "365386", "ups": "3.09", "wpb": "118194", "bsz": "256", "num_updates": "332800", "lr": "0.000673939", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "108652"} +[2022-08-02 07:40:02,320][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 07:40:04,241][train_inner][INFO] - {"epoch": 7, "update": 6.47, "loss": "2.261", "ppl": "4.79", "wps": "362156", "ups": "3.08", "wpb": "117660", "bsz": "256", "num_updates": "333000", "lr": "0.000673737", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "108717"} +[2022-08-02 07:41:08,871][train_inner][INFO] - {"epoch": 7, "update": 6.474, "loss": "2.252", "ppl": "4.76", "wps": "367728", "ups": "3.09", "wpb": "118831", "bsz": "255.9", "num_updates": "333200", "lr": "0.000673535", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26", "wall": "108781"} +[2022-08-02 07:42:13,873][train_inner][INFO] - {"epoch": 7, "update": 6.478, "loss": "2.266", "ppl": "4.81", "wps": "363261", "ups": "3.08", "wpb": "118062", "bsz": "256", "num_updates": "333400", "lr": "0.000673333", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "108846"} +[2022-08-02 07:43:18,374][train_inner][INFO] - {"epoch": 7, "update": 6.482, "loss": "2.263", "ppl": "4.8", "wps": "368396", "ups": "3.1", "wpb": "118807", "bsz": "256", "num_updates": "333600", "lr": "0.000673131", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "108911"} +[2022-08-02 07:43:52,100][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 07:44:23,394][train_inner][INFO] - {"epoch": 7, "update": 6.486, "loss": "2.251", "ppl": "4.76", "wps": "365554", "ups": "3.08", "wpb": "118840", "bsz": "256", "num_updates": "333800", "lr": "0.000672929", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "108976"} +[2022-08-02 07:45:27,839][train_inner][INFO] - {"epoch": 7, "update": 6.49, "loss": "2.266", "ppl": "4.81", "wps": "366509", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "334000", "lr": "0.000672727", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.5", "wall": "109040"} +[2022-08-02 07:46:32,631][train_inner][INFO] - {"epoch": 7, "update": 6.493, "loss": "2.269", "ppl": "4.82", "wps": "364152", "ups": "3.09", "wpb": "117969", "bsz": "256", "num_updates": "334200", "lr": "0.000672525", "gnorm": "0.689", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.3", "wall": "109105"} +[2022-08-02 07:47:37,495][train_inner][INFO] - {"epoch": 7, "update": 6.497, "loss": "2.266", "ppl": "4.81", "wps": "365746", "ups": "3.08", "wpb": "118618", "bsz": "256", "num_updates": "334400", "lr": "0.000672323", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.5", "wall": "109170"} +[2022-08-02 07:48:41,949][train_inner][INFO] - {"epoch": 7, "update": 6.501, "loss": "2.262", "ppl": "4.8", "wps": "366396", "ups": "3.1", "wpb": "118077", "bsz": "256", "num_updates": "334600", "lr": "0.000672121", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "109234"} +[2022-08-02 07:49:46,706][train_inner][INFO] - {"epoch": 7, "update": 6.505, "loss": "2.259", "ppl": "4.79", "wps": "365771", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "334800", "lr": "0.000671919", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.8", "wall": "109299"} +[2022-08-02 07:50:51,864][train_inner][INFO] - {"epoch": 7, "update": 6.509, "loss": "2.259", "ppl": "4.79", "wps": "362959", "ups": "3.07", "wpb": "118247", "bsz": "256", "num_updates": "335000", "lr": "0.000671717", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "109364"} +[2022-08-02 07:51:56,836][train_inner][INFO] - {"epoch": 7, "update": 6.513, "loss": "2.262", "ppl": "4.8", "wps": "363481", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "335200", "lr": "0.000671515", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "109429"} +[2022-08-02 07:53:02,041][train_inner][INFO] - {"epoch": 7, "update": 6.517, "loss": "2.264", "ppl": "4.8", "wps": "361667", "ups": "3.07", "wpb": "117912", "bsz": "256", "num_updates": "335400", "lr": "0.000671313", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "109494"} +[2022-08-02 07:54:06,527][train_inner][INFO] - {"epoch": 7, "update": 6.521, "loss": "2.266", "ppl": "4.81", "wps": "364002", "ups": "3.1", "wpb": "117363", "bsz": "256", "num_updates": "335600", "lr": "0.000671111", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "109559"} +[2022-08-02 07:55:11,129][train_inner][INFO] - {"epoch": 7, "update": 6.524, "loss": "2.258", "ppl": "4.78", "wps": "367398", "ups": "3.1", "wpb": "118671", "bsz": "256", "num_updates": "335800", "lr": "0.000670909", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "109623"} +[2022-08-02 07:56:16,024][train_inner][INFO] - {"epoch": 7, "update": 6.528, "loss": "2.257", "ppl": "4.78", "wps": "366088", "ups": "3.08", "wpb": "118786", "bsz": "256", "num_updates": "336000", "lr": "0.000670707", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "109688"} +[2022-08-02 07:57:20,982][train_inner][INFO] - {"epoch": 7, "update": 6.532, "loss": "2.263", "ppl": "4.8", "wps": "363043", "ups": "3.08", "wpb": "117910", "bsz": "256", "num_updates": "336200", "lr": "0.000670505", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "109753"} +[2022-08-02 07:58:25,812][train_inner][INFO] - {"epoch": 7, "update": 6.536, "loss": "2.265", "ppl": "4.81", "wps": "363392", "ups": "3.09", "wpb": "117792", "bsz": "256", "num_updates": "336400", "lr": "0.000670303", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "109818"} +[2022-08-02 07:59:30,762][train_inner][INFO] - {"epoch": 7, "update": 6.54, "loss": "2.253", "ppl": "4.77", "wps": "364593", "ups": "3.08", "wpb": "118400", "bsz": "256", "num_updates": "336600", "lr": "0.000670101", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "109883"} +[2022-08-02 08:00:36,025][train_inner][INFO] - {"epoch": 7, "update": 6.544, "loss": "2.262", "ppl": "4.8", "wps": "364395", "ups": "3.06", "wpb": "118906", "bsz": "256", "num_updates": "336800", "lr": "0.000669899", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "109948"} +[2022-08-02 08:01:40,442][train_inner][INFO] - {"epoch": 7, "update": 6.548, "loss": "2.26", "ppl": "4.79", "wps": "366553", "ups": "3.1", "wpb": "118059", "bsz": "256", "num_updates": "337000", "lr": "0.000669697", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "110013"} +[2022-08-02 08:02:45,738][train_inner][INFO] - {"epoch": 7, "update": 6.552, "loss": "2.256", "ppl": "4.78", "wps": "363085", "ups": "3.06", "wpb": "118537", "bsz": "256", "num_updates": "337200", "lr": "0.000669495", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "110078"} +[2022-08-02 08:03:50,606][train_inner][INFO] - {"epoch": 7, "update": 6.556, "loss": "2.26", "ppl": "4.79", "wps": "365975", "ups": "3.08", "wpb": "118699", "bsz": "256", "num_updates": "337400", "lr": "0.000669293", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "110143"} +[2022-08-02 08:04:55,299][train_inner][INFO] - {"epoch": 7, "update": 6.559, "loss": "2.255", "ppl": "4.77", "wps": "365240", "ups": "3.09", "wpb": "118141", "bsz": "256", "num_updates": "337600", "lr": "0.000669091", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "110208"} +[2022-08-02 08:06:00,149][train_inner][INFO] - {"epoch": 7, "update": 6.563, "loss": "2.257", "ppl": "4.78", "wps": "365540", "ups": "3.08", "wpb": "118524", "bsz": "256", "num_updates": "337800", "lr": "0.000668889", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "110272"} +[2022-08-02 08:07:04,915][train_inner][INFO] - {"epoch": 7, "update": 6.567, "loss": "2.265", "ppl": "4.81", "wps": "364439", "ups": "3.09", "wpb": "118015", "bsz": "256", "num_updates": "338000", "lr": "0.000668687", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.1", "wall": "110337"} +[2022-08-02 08:08:10,049][train_inner][INFO] - {"epoch": 7, "update": 6.571, "loss": "2.255", "ppl": "4.77", "wps": "364334", "ups": "3.07", "wpb": "118650", "bsz": "256", "num_updates": "338200", "lr": "0.000668485", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "110402"} +[2022-08-02 08:09:15,375][train_inner][INFO] - {"epoch": 7, "update": 6.575, "loss": "2.256", "ppl": "4.78", "wps": "362696", "ups": "3.06", "wpb": "118467", "bsz": "256", "num_updates": "338400", "lr": "0.000668283", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "110468"} +[2022-08-02 08:10:20,178][train_inner][INFO] - {"epoch": 7, "update": 6.579, "loss": "2.259", "ppl": "4.79", "wps": "364296", "ups": "3.09", "wpb": "118034", "bsz": "256", "num_updates": "338600", "lr": "0.000668081", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "110532"} +[2022-08-02 08:10:38,406][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 08:11:25,079][train_inner][INFO] - {"epoch": 7, "update": 6.583, "loss": "2.259", "ppl": "4.79", "wps": "362278", "ups": "3.08", "wpb": "117560", "bsz": "256", "num_updates": "338800", "lr": "0.000667879", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "110597"} +[2022-08-02 08:12:30,007][train_inner][INFO] - {"epoch": 7, "update": 6.587, "loss": "2.264", "ppl": "4.8", "wps": "362212", "ups": "3.08", "wpb": "117588", "bsz": "256", "num_updates": "339000", "lr": "0.000667677", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "110662"} +[2022-08-02 08:13:35,105][train_inner][INFO] - {"epoch": 7, "update": 6.591, "loss": "2.258", "ppl": "4.78", "wps": "363955", "ups": "3.07", "wpb": "118461", "bsz": "256", "num_updates": "339200", "lr": "0.000667475", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.1", "wall": "110727"} +[2022-08-02 08:14:39,702][train_inner][INFO] - {"epoch": 7, "update": 6.594, "loss": "2.253", "ppl": "4.77", "wps": "368769", "ups": "3.1", "wpb": "119106", "bsz": "256", "num_updates": "339400", "lr": "0.000667273", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "110792"} +[2022-08-02 08:15:45,017][train_inner][INFO] - {"epoch": 7, "update": 6.598, "loss": "2.259", "ppl": "4.79", "wps": "361306", "ups": "3.06", "wpb": "117991", "bsz": "256", "num_updates": "339600", "lr": "0.000667071", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "110857"} +[2022-08-02 08:16:49,583][train_inner][INFO] - {"epoch": 7, "update": 6.602, "loss": "2.259", "ppl": "4.79", "wps": "366766", "ups": "3.1", "wpb": "118401", "bsz": "256", "num_updates": "339800", "lr": "0.000666869", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "110922"} +[2022-08-02 08:17:54,308][train_inner][INFO] - {"epoch": 7, "update": 6.606, "loss": "2.257", "ppl": "4.78", "wps": "365978", "ups": "3.09", "wpb": "118438", "bsz": "256", "num_updates": "340000", "lr": "0.000666667", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "110987"} +[2022-08-02 08:18:59,575][train_inner][INFO] - {"epoch": 7, "update": 6.61, "loss": "2.255", "ppl": "4.77", "wps": "364542", "ups": "3.06", "wpb": "118961", "bsz": "256", "num_updates": "340200", "lr": "0.000666465", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "111052"} +[2022-08-02 08:20:04,579][train_inner][INFO] - {"epoch": 7, "update": 6.614, "loss": "2.259", "ppl": "4.79", "wps": "364725", "ups": "3.08", "wpb": "118541", "bsz": "256", "num_updates": "340400", "lr": "0.000666263", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "111117"} +[2022-08-02 08:21:09,733][train_inner][INFO] - {"epoch": 7, "update": 6.618, "loss": "2.254", "ppl": "4.77", "wps": "364370", "ups": "3.07", "wpb": "118699", "bsz": "256", "num_updates": "340600", "lr": "0.000666061", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "111182"} +[2022-08-02 08:22:14,306][train_inner][INFO] - {"epoch": 7, "update": 6.622, "loss": "2.257", "ppl": "4.78", "wps": "366857", "ups": "3.1", "wpb": "118443", "bsz": "256", "num_updates": "340800", "lr": "0.000665859", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "111247"} +[2022-08-02 08:23:19,125][train_inner][INFO] - {"epoch": 7, "update": 6.625, "loss": "2.26", "ppl": "4.79", "wps": "365224", "ups": "3.09", "wpb": "118366", "bsz": "256", "num_updates": "341000", "lr": "0.000665657", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "111311"} +[2022-08-02 08:24:24,137][train_inner][INFO] - {"epoch": 7, "update": 6.629, "loss": "2.257", "ppl": "4.78", "wps": "363184", "ups": "3.08", "wpb": "118056", "bsz": "256", "num_updates": "341200", "lr": "0.000665455", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "111376"} +[2022-08-02 08:25:28,928][train_inner][INFO] - {"epoch": 7, "update": 6.633, "loss": "2.265", "ppl": "4.81", "wps": "363526", "ups": "3.09", "wpb": "117764", "bsz": "256", "num_updates": "341400", "lr": "0.000665253", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "111441"} +[2022-08-02 08:26:34,007][train_inner][INFO] - {"epoch": 7, "update": 6.637, "loss": "2.251", "ppl": "4.76", "wps": "365555", "ups": "3.07", "wpb": "118948", "bsz": "256", "num_updates": "341600", "lr": "0.000665051", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "111506"} +[2022-08-02 08:27:38,880][train_inner][INFO] - {"epoch": 7, "update": 6.641, "loss": "2.253", "ppl": "4.77", "wps": "366004", "ups": "3.08", "wpb": "118718", "bsz": "256", "num_updates": "341800", "lr": "0.000664848", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.3", "wall": "111571"} +[2022-08-02 08:28:43,549][train_inner][INFO] - {"epoch": 7, "update": 6.645, "loss": "2.26", "ppl": "4.79", "wps": "365596", "ups": "3.09", "wpb": "118211", "bsz": "256", "num_updates": "342000", "lr": "0.000664646", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "111636"} +[2022-08-02 08:29:48,595][train_inner][INFO] - {"epoch": 7, "update": 6.649, "loss": "2.254", "ppl": "4.77", "wps": "364255", "ups": "3.07", "wpb": "118466", "bsz": "256", "num_updates": "342200", "lr": "0.000664444", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.4", "wall": "111701"} +[2022-08-02 08:30:53,169][train_inner][INFO] - {"epoch": 7, "update": 6.653, "loss": "2.261", "ppl": "4.79", "wps": "363920", "ups": "3.1", "wpb": "117497", "bsz": "256", "num_updates": "342400", "lr": "0.000664242", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "111765"} +[2022-08-02 08:31:58,147][train_inner][INFO] - {"epoch": 7, "update": 6.657, "loss": "2.256", "ppl": "4.78", "wps": "367091", "ups": "3.08", "wpb": "119261", "bsz": "256", "num_updates": "342600", "lr": "0.00066404", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "111830"} +[2022-08-02 08:32:50,749][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 08:33:03,321][train_inner][INFO] - {"epoch": 7, "update": 6.66, "loss": "2.263", "ppl": "4.8", "wps": "361323", "ups": "3.07", "wpb": "117743", "bsz": "256", "num_updates": "342800", "lr": "0.000663838", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.9", "wall": "111896"} +[2022-08-02 08:33:34,980][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 08:34:07,361][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 08:34:08,672][train_inner][INFO] - {"epoch": 7, "update": 6.664, "loss": "2.257", "ppl": "4.78", "wps": "362072", "ups": "3.06", "wpb": "118308", "bsz": "256", "num_updates": "343000", "lr": "0.000663636", "gnorm": "0.745", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "111961"} +[2022-08-02 08:34:16,692][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 08:34:18,293][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 08:34:23,047][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 08:34:41,783][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-02 08:34:42,080][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 +[2022-08-02 08:35:14,826][train_inner][INFO] - {"epoch": 7, "update": 6.668, "loss": "2.283", "ppl": "4.87", "wps": "358054", "ups": "3.02", "wpb": "118433", "bsz": "256", "num_updates": "343200", "lr": "0.000663434", "gnorm": "1.374", "clip": "2.5", "loss_scale": "0.125", "train_wall": "66", "gb_free": "24.3", "wall": "112027"} +[2022-08-02 08:36:19,904][train_inner][INFO] - {"epoch": 7, "update": 6.672, "loss": "2.247", "ppl": "4.75", "wps": "362676", "ups": "3.07", "wpb": "118011", "bsz": "256", "num_updates": "343400", "lr": "0.000663232", "gnorm": "0.669", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.2", "wall": "112092"} +[2022-08-02 08:37:24,830][train_inner][INFO] - {"epoch": 7, "update": 6.676, "loss": "2.248", "ppl": "4.75", "wps": "366303", "ups": "3.08", "wpb": "118912", "bsz": "256", "num_updates": "343600", "lr": "0.00066303", "gnorm": "0.665", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.7", "wall": "112157"} +[2022-08-02 08:38:29,677][train_inner][INFO] - {"epoch": 7, "update": 6.68, "loss": "2.26", "ppl": "4.79", "wps": "366230", "ups": "3.08", "wpb": "118739", "bsz": "256", "num_updates": "343800", "lr": "0.000662828", "gnorm": "0.667", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "25", "wall": "112222"} +[2022-08-02 08:39:34,405][train_inner][INFO] - {"epoch": 7, "update": 6.684, "loss": "2.259", "ppl": "4.79", "wps": "367266", "ups": "3.09", "wpb": "118860", "bsz": "256", "num_updates": "344000", "lr": "0.000662626", "gnorm": "0.667", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.1", "wall": "112287"} +[2022-08-02 08:40:39,033][train_inner][INFO] - {"epoch": 7, "update": 6.688, "loss": "2.261", "ppl": "4.79", "wps": "366037", "ups": "3.09", "wpb": "118279", "bsz": "256", "num_updates": "344200", "lr": "0.000662424", "gnorm": "0.67", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.2", "wall": "112351"} +[2022-08-02 08:41:43,734][train_inner][INFO] - {"epoch": 7, "update": 6.692, "loss": "2.252", "ppl": "4.76", "wps": "365918", "ups": "3.09", "wpb": "118375", "bsz": "256", "num_updates": "344400", "lr": "0.000662222", "gnorm": "0.669", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.4", "wall": "112416"} +[2022-08-02 08:42:48,721][train_inner][INFO] - {"epoch": 7, "update": 6.696, "loss": "2.25", "ppl": "4.76", "wps": "363850", "ups": "3.08", "wpb": "118226", "bsz": "256", "num_updates": "344600", "lr": "0.00066202", "gnorm": "0.67", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "112481"} +[2022-08-02 08:43:53,589][train_inner][INFO] - {"epoch": 7, "update": 6.699, "loss": "2.257", "ppl": "4.78", "wps": "364483", "ups": "3.08", "wpb": "118216", "bsz": "256", "num_updates": "344800", "lr": "0.000661818", "gnorm": "0.671", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.2", "wall": "112546"} +[2022-08-02 08:44:58,168][train_inner][INFO] - {"epoch": 7, "update": 6.703, "loss": "2.26", "ppl": "4.79", "wps": "364490", "ups": "3.1", "wpb": "117690", "bsz": "256", "num_updates": "345000", "lr": "0.000661616", "gnorm": "0.672", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20", "wall": "112610"} +[2022-08-02 08:46:03,026][train_inner][INFO] - {"epoch": 7, "update": 6.707, "loss": "2.257", "ppl": "4.78", "wps": "365334", "ups": "3.08", "wpb": "118472", "bsz": "256", "num_updates": "345200", "lr": "0.000661414", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "112675"} +[2022-08-02 08:47:07,945][train_inner][INFO] - {"epoch": 7, "update": 6.711, "loss": "2.255", "ppl": "4.77", "wps": "364376", "ups": "3.08", "wpb": "118274", "bsz": "256", "num_updates": "345400", "lr": "0.000661212", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "26.6", "wall": "112740"} +[2022-08-02 08:48:12,715][train_inner][INFO] - {"epoch": 7, "update": 6.715, "loss": "2.259", "ppl": "4.79", "wps": "363657", "ups": "3.09", "wpb": "117768", "bsz": "256", "num_updates": "345600", "lr": "0.00066101", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20", "wall": "112805"} +[2022-08-02 08:49:18,673][train_inner][INFO] - {"epoch": 7, "update": 6.719, "loss": "2.257", "ppl": "4.78", "wps": "358272", "ups": "3.03", "wpb": "118154", "bsz": "256", "num_updates": "345800", "lr": "0.000660808", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "22.8", "wall": "112871"} +[2022-08-02 08:50:23,775][train_inner][INFO] - {"epoch": 7, "update": 6.723, "loss": "2.257", "ppl": "4.78", "wps": "362867", "ups": "3.07", "wpb": "118114", "bsz": "256", "num_updates": "346000", "lr": "0.000660606", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.9", "wall": "112936"} +[2022-08-02 08:51:28,460][train_inner][INFO] - {"epoch": 7, "update": 6.727, "loss": "2.251", "ppl": "4.76", "wps": "367298", "ups": "3.09", "wpb": "118793", "bsz": "256", "num_updates": "346200", "lr": "0.000660404", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "23.7", "wall": "113001"} +[2022-08-02 08:52:32,934][train_inner][INFO] - {"epoch": 7, "update": 6.731, "loss": "2.259", "ppl": "4.79", "wps": "367133", "ups": "3.1", "wpb": "118351", "bsz": "256", "num_updates": "346400", "lr": "0.000660202", "gnorm": "0.839", "clip": "1", "loss_scale": "0.25", "train_wall": "64", "gb_free": "27", "wall": "113065"} +[2022-08-02 08:53:37,923][train_inner][INFO] - {"epoch": 7, "update": 6.734, "loss": "2.259", "ppl": "4.79", "wps": "364073", "ups": "3.08", "wpb": "118302", "bsz": "256", "num_updates": "346600", "lr": "0.00066", "gnorm": "0.683", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21", "wall": "113130"} +[2022-08-02 08:54:42,906][train_inner][INFO] - {"epoch": 7, "update": 6.738, "loss": "2.26", "ppl": "4.79", "wps": "364111", "ups": "3.08", "wpb": "118304", "bsz": "256", "num_updates": "346800", "lr": "0.000659798", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.2", "wall": "113195"} +[2022-08-02 08:55:47,900][train_inner][INFO] - {"epoch": 7, "update": 6.742, "loss": "2.25", "ppl": "4.76", "wps": "363447", "ups": "3.08", "wpb": "118106", "bsz": "256", "num_updates": "347000", "lr": "0.000659596", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.3", "wall": "113260"} +[2022-08-02 08:56:53,819][train_inner][INFO] - {"epoch": 7, "update": 6.746, "loss": "2.255", "ppl": "4.77", "wps": "359142", "ups": "3.03", "wpb": "118370", "bsz": "256", "num_updates": "347200", "lr": "0.000659394", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "28.2", "wall": "113326"} +[2022-08-02 08:57:58,924][train_inner][INFO] - {"epoch": 7, "update": 6.75, "loss": "2.252", "ppl": "4.76", "wps": "364544", "ups": "3.07", "wpb": "118667", "bsz": "256", "num_updates": "347400", "lr": "0.000659192", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.3", "wall": "113391"} +[2022-08-02 08:59:03,436][train_inner][INFO] - {"epoch": 7, "update": 6.754, "loss": "2.272", "ppl": "4.83", "wps": "366060", "ups": "3.1", "wpb": "118074", "bsz": "256", "num_updates": "347600", "lr": "0.00065899", "gnorm": "0.88", "clip": "0.5", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.6", "wall": "113456"} +[2022-08-02 09:00:07,919][train_inner][INFO] - {"epoch": 7, "update": 6.758, "loss": "2.251", "ppl": "4.76", "wps": "366605", "ups": "3.1", "wpb": "118197", "bsz": "256", "num_updates": "347800", "lr": "0.000658788", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24.1", "wall": "113520"} +[2022-08-02 09:01:12,962][train_inner][INFO] - {"epoch": 7, "update": 6.762, "loss": "2.254", "ppl": "4.77", "wps": "363629", "ups": "3.07", "wpb": "118254", "bsz": "256", "num_updates": "348000", "lr": "0.000658586", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.3", "wall": "113585"} +[2022-08-02 09:02:17,831][train_inner][INFO] - {"epoch": 7, "update": 6.765, "loss": "2.252", "ppl": "4.76", "wps": "364770", "ups": "3.08", "wpb": "118309", "bsz": "256", "num_updates": "348200", "lr": "0.000658384", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.8", "wall": "113650"} +[2022-08-02 09:03:22,594][train_inner][INFO] - {"epoch": 7, "update": 6.769, "loss": "2.256", "ppl": "4.78", "wps": "365708", "ups": "3.09", "wpb": "118421", "bsz": "256", "num_updates": "348400", "lr": "0.000658182", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "113715"} +[2022-08-02 09:04:27,816][train_inner][INFO] - {"epoch": 7, "update": 6.773, "loss": "2.251", "ppl": "4.76", "wps": "363921", "ups": "3.07", "wpb": "118677", "bsz": "256", "num_updates": "348600", "lr": "0.00065798", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.9", "wall": "113780"} +[2022-08-02 09:05:32,777][train_inner][INFO] - {"epoch": 7, "update": 6.777, "loss": "2.248", "ppl": "4.75", "wps": "363279", "ups": "3.08", "wpb": "117992", "bsz": "256", "num_updates": "348800", "lr": "0.000657778", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "113845"} +[2022-08-02 09:06:38,605][train_inner][INFO] - {"epoch": 7, "update": 6.781, "loss": "2.252", "ppl": "4.76", "wps": "358769", "ups": "3.04", "wpb": "118084", "bsz": "256", "num_updates": "349000", "lr": "0.000657576", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "113911"} +[2022-08-02 09:07:43,254][train_inner][INFO] - {"epoch": 7, "update": 6.785, "loss": "2.25", "ppl": "4.76", "wps": "367928", "ups": "3.09", "wpb": "118928", "bsz": "256", "num_updates": "349200", "lr": "0.000657374", "gnorm": "0.803", "clip": "0.5", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "113976"} +[2022-08-02 09:08:48,126][train_inner][INFO] - {"epoch": 7, "update": 6.789, "loss": "2.253", "ppl": "4.77", "wps": "364894", "ups": "3.08", "wpb": "118355", "bsz": "256", "num_updates": "349400", "lr": "0.000657172", "gnorm": "0.738", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "22.9", "wall": "114040"} +[2022-08-02 09:09:53,128][train_inner][INFO] - {"epoch": 7, "update": 6.793, "loss": "2.25", "ppl": "4.76", "wps": "362301", "ups": "3.08", "wpb": "117750", "bsz": "256", "num_updates": "349600", "lr": "0.00065697", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.7", "wall": "114105"} +[2022-08-02 09:10:58,314][train_inner][INFO] - {"epoch": 7, "update": 6.797, "loss": "2.258", "ppl": "4.78", "wps": "363041", "ups": "3.07", "wpb": "118323", "bsz": "256", "num_updates": "349800", "lr": "0.000656768", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "114171"} +[2022-08-02 09:12:03,490][train_inner][INFO] - {"epoch": 7, "update": 6.8, "loss": "2.249", "ppl": "4.75", "wps": "363076", "ups": "3.07", "wpb": "118317", "bsz": "256", "num_updates": "350000", "lr": "0.000656566", "gnorm": "0.667", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.1", "wall": "114236"} +[2022-08-02 09:12:03,491][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 09:12:26,104][valid][INFO] - {"epoch": 7, "valid_loss": "2.156", "valid_ppl": "4.46", "valid_wps": "1.61428e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "350000", "valid_best_loss": "2.156"} +[2022-08-02 09:12:26,107][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 7 @ 350000 updates +[2022-08-02 09:12:26,108][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_7_350000.pt +[2022-08-02 09:12:35,852][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_7_350000.pt +[2022-08-02 09:13:10,129][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_7_350000.pt (epoch 7 @ 350000 updates, score 2.156) (writing took 44.022107725963 seconds) +[2022-08-02 09:14:14,772][train_inner][INFO] - {"epoch": 7, "update": 6.804, "loss": "2.246", "ppl": "4.74", "wps": "180791", "ups": "1.52", "wpb": "118673", "bsz": "256", "num_updates": "350200", "lr": "0.000656364", "gnorm": "0.667", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.3", "wall": "114367"} +[2022-08-02 09:15:19,631][train_inner][INFO] - {"epoch": 7, "update": 6.808, "loss": "2.252", "ppl": "4.76", "wps": "366209", "ups": "3.08", "wpb": "118758", "bsz": "255.9", "num_updates": "350400", "lr": "0.000656162", "gnorm": "0.669", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.2", "wall": "114432"} +[2022-08-02 09:16:24,415][train_inner][INFO] - {"epoch": 7, "update": 6.812, "loss": "2.256", "ppl": "4.78", "wps": "364319", "ups": "3.09", "wpb": "118007", "bsz": "256", "num_updates": "350600", "lr": "0.00065596", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.7", "wall": "114497"} +[2022-08-02 09:17:28,770][train_inner][INFO] - {"epoch": 7, "update": 6.816, "loss": "2.256", "ppl": "4.78", "wps": "366979", "ups": "3.11", "wpb": "118084", "bsz": "256", "num_updates": "350800", "lr": "0.000655758", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "114561"} +[2022-08-02 09:18:33,494][train_inner][INFO] - {"epoch": 7, "update": 6.82, "loss": "2.257", "ppl": "4.78", "wps": "364630", "ups": "3.09", "wpb": "117999", "bsz": "256", "num_updates": "351000", "lr": "0.000655556", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23", "wall": "114626"} +[2022-08-02 09:19:38,484][train_inner][INFO] - {"epoch": 7, "update": 6.824, "loss": "2.248", "ppl": "4.75", "wps": "364988", "ups": "3.08", "wpb": "118600", "bsz": "256", "num_updates": "351200", "lr": "0.000655354", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.1", "wall": "114691"} +[2022-08-02 09:20:43,180][train_inner][INFO] - {"epoch": 7, "update": 6.828, "loss": "2.253", "ppl": "4.77", "wps": "364834", "ups": "3.09", "wpb": "118016", "bsz": "256", "num_updates": "351400", "lr": "0.000655152", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.5", "wall": "114756"} +[2022-08-02 09:21:48,069][train_inner][INFO] - {"epoch": 7, "update": 6.832, "loss": "2.25", "ppl": "4.76", "wps": "365634", "ups": "3.08", "wpb": "118625", "bsz": "256", "num_updates": "351600", "lr": "0.000654949", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.4", "wall": "114820"} +[2022-08-02 09:22:53,967][train_inner][INFO] - {"epoch": 7, "update": 6.835, "loss": "2.25", "ppl": "4.76", "wps": "358651", "ups": "3.04", "wpb": "118170", "bsz": "256", "num_updates": "351800", "lr": "0.000654747", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "26.1", "wall": "114886"} +[2022-08-02 09:23:59,376][train_inner][INFO] - {"epoch": 7, "update": 6.839, "loss": "2.251", "ppl": "4.76", "wps": "361600", "ups": "3.06", "wpb": "118258", "bsz": "256", "num_updates": "352000", "lr": "0.000654545", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.3", "wall": "114952"} +[2022-08-02 09:25:03,904][train_inner][INFO] - {"epoch": 7, "update": 6.843, "loss": "2.254", "ppl": "4.77", "wps": "366366", "ups": "3.1", "wpb": "118202", "bsz": "256", "num_updates": "352200", "lr": "0.000654343", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "115016"} +[2022-08-02 09:26:08,615][train_inner][INFO] - {"epoch": 7, "update": 6.847, "loss": "2.251", "ppl": "4.76", "wps": "364998", "ups": "3.09", "wpb": "118096", "bsz": "256", "num_updates": "352400", "lr": "0.000654141", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.8", "wall": "115081"} +[2022-08-02 09:27:13,810][train_inner][INFO] - {"epoch": 7, "update": 6.851, "loss": "2.25", "ppl": "4.76", "wps": "365282", "ups": "3.07", "wpb": "119070", "bsz": "256", "num_updates": "352600", "lr": "0.000653939", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.7", "wall": "115146"} +[2022-08-02 09:28:18,368][train_inner][INFO] - {"epoch": 7, "update": 6.855, "loss": "2.25", "ppl": "4.76", "wps": "364923", "ups": "3.1", "wpb": "117792", "bsz": "256", "num_updates": "352800", "lr": "0.000653737", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "115211"} +[2022-08-02 09:29:23,182][train_inner][INFO] - {"epoch": 7, "update": 6.859, "loss": "2.256", "ppl": "4.78", "wps": "363480", "ups": "3.09", "wpb": "117791", "bsz": "256", "num_updates": "353000", "lr": "0.000653535", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.6", "wall": "115276"} +[2022-08-02 09:30:28,327][train_inner][INFO] - {"epoch": 7, "update": 6.863, "loss": "2.25", "ppl": "4.76", "wps": "363746", "ups": "3.07", "wpb": "118479", "bsz": "256", "num_updates": "353200", "lr": "0.000653333", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.2", "wall": "115341"} +[2022-08-02 09:31:33,432][train_inner][INFO] - {"epoch": 7, "update": 6.866, "loss": "2.251", "ppl": "4.76", "wps": "363797", "ups": "3.07", "wpb": "118424", "bsz": "256", "num_updates": "353400", "lr": "0.000653131", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "115406"} +[2022-08-02 09:32:38,763][train_inner][INFO] - {"epoch": 7, "update": 6.87, "loss": "2.249", "ppl": "4.75", "wps": "365366", "ups": "3.06", "wpb": "119348", "bsz": "256", "num_updates": "353600", "lr": "0.000652929", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "115471"} +[2022-08-02 09:33:43,813][train_inner][INFO] - {"epoch": 7, "update": 6.874, "loss": "2.247", "ppl": "4.75", "wps": "363992", "ups": "3.07", "wpb": "118386", "bsz": "256", "num_updates": "353800", "lr": "0.000652727", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.9", "wall": "115536"} +[2022-08-02 09:34:48,868][train_inner][INFO] - {"epoch": 7, "update": 6.878, "loss": "2.249", "ppl": "4.75", "wps": "363538", "ups": "3.07", "wpb": "118247", "bsz": "256", "num_updates": "354000", "lr": "0.000652525", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "115601"} +[2022-08-02 09:35:54,223][train_inner][INFO] - {"epoch": 7, "update": 6.882, "loss": "2.249", "ppl": "4.75", "wps": "363239", "ups": "3.06", "wpb": "118697", "bsz": "256", "num_updates": "354200", "lr": "0.000652323", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "115667"} +[2022-08-02 09:37:00,261][train_inner][INFO] - {"epoch": 7, "update": 6.886, "loss": "2.25", "ppl": "4.76", "wps": "359934", "ups": "3.03", "wpb": "118844", "bsz": "256", "num_updates": "354400", "lr": "0.000652121", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "23", "wall": "115733"} +[2022-08-02 09:38:04,916][train_inner][INFO] - {"epoch": 7, "update": 6.89, "loss": "2.248", "ppl": "4.75", "wps": "366315", "ups": "3.09", "wpb": "118420", "bsz": "256", "num_updates": "354600", "lr": "0.000651919", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "115797"} +[2022-08-02 09:39:09,859][train_inner][INFO] - {"epoch": 7, "update": 6.894, "loss": "2.247", "ppl": "4.75", "wps": "364479", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "354800", "lr": "0.000651717", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "115862"} +[2022-08-02 09:40:14,810][train_inner][INFO] - {"epoch": 7, "update": 6.898, "loss": "2.246", "ppl": "4.74", "wps": "364723", "ups": "3.08", "wpb": "118444", "bsz": "256", "num_updates": "355000", "lr": "0.000651515", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "115927"} +[2022-08-02 09:41:19,685][train_inner][INFO] - {"epoch": 7, "update": 6.901, "loss": "2.255", "ppl": "4.77", "wps": "366395", "ups": "3.08", "wpb": "118848", "bsz": "256", "num_updates": "355200", "lr": "0.000651313", "gnorm": "0.72", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "115992"} +[2022-08-02 09:42:24,692][train_inner][INFO] - {"epoch": 7, "update": 6.905, "loss": "2.25", "ppl": "4.76", "wps": "363366", "ups": "3.08", "wpb": "118103", "bsz": "256", "num_updates": "355400", "lr": "0.000651111", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "116057"} +[2022-08-02 09:43:29,860][train_inner][INFO] - {"epoch": 7, "update": 6.909, "loss": "2.243", "ppl": "4.73", "wps": "363731", "ups": "3.07", "wpb": "118516", "bsz": "256", "num_updates": "355600", "lr": "0.000650909", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "116122"} +[2022-08-02 09:44:34,766][train_inner][INFO] - {"epoch": 7, "update": 6.913, "loss": "2.247", "ppl": "4.75", "wps": "363896", "ups": "3.08", "wpb": "118095", "bsz": "256", "num_updates": "355800", "lr": "0.000650707", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "116187"} +[2022-08-02 09:45:39,605][train_inner][INFO] - {"epoch": 7, "update": 6.917, "loss": "2.249", "ppl": "4.75", "wps": "363372", "ups": "3.08", "wpb": "117800", "bsz": "256", "num_updates": "356000", "lr": "0.000650505", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.8", "wall": "116252"} +[2022-08-02 09:46:44,471][train_inner][INFO] - {"epoch": 7, "update": 6.921, "loss": "2.246", "ppl": "4.74", "wps": "364839", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "356200", "lr": "0.000650303", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "116317"} +[2022-08-02 09:47:49,526][train_inner][INFO] - {"epoch": 7, "update": 6.925, "loss": "2.245", "ppl": "4.74", "wps": "363529", "ups": "3.07", "wpb": "118244", "bsz": "256", "num_updates": "356400", "lr": "0.000650101", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "116382"} +[2022-08-02 09:48:54,337][train_inner][INFO] - {"epoch": 7, "update": 6.929, "loss": "2.247", "ppl": "4.75", "wps": "365434", "ups": "3.09", "wpb": "118421", "bsz": "256", "num_updates": "356600", "lr": "0.000649899", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "116447"} +[2022-08-02 09:50:00,157][train_inner][INFO] - {"epoch": 7, "update": 6.933, "loss": "2.251", "ppl": "4.76", "wps": "358068", "ups": "3.04", "wpb": "117838", "bsz": "256", "num_updates": "356800", "lr": "0.000649697", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "116512"} +[2022-08-02 09:51:05,038][train_inner][INFO] - {"epoch": 7, "update": 6.936, "loss": "2.244", "ppl": "4.74", "wps": "364814", "ups": "3.08", "wpb": "118345", "bsz": "256", "num_updates": "357000", "lr": "0.000649495", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "116577"} +[2022-08-02 09:52:10,249][train_inner][INFO] - {"epoch": 7, "update": 6.94, "loss": "2.248", "ppl": "4.75", "wps": "363998", "ups": "3.07", "wpb": "118681", "bsz": "256", "num_updates": "357200", "lr": "0.000649293", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "116643"} +[2022-08-02 09:53:14,984][train_inner][INFO] - {"epoch": 7, "update": 6.944, "loss": "2.246", "ppl": "4.74", "wps": "365318", "ups": "3.09", "wpb": "118242", "bsz": "256", "num_updates": "357400", "lr": "0.000649091", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "116707"} +[2022-08-02 09:54:20,131][train_inner][INFO] - {"epoch": 7, "update": 6.948, "loss": "2.247", "ppl": "4.75", "wps": "364978", "ups": "3.07", "wpb": "118886", "bsz": "256", "num_updates": "357600", "lr": "0.000648889", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "116772"} +[2022-08-02 09:55:24,831][train_inner][INFO] - {"epoch": 7, "update": 6.952, "loss": "2.24", "ppl": "4.72", "wps": "366792", "ups": "3.09", "wpb": "118654", "bsz": "256", "num_updates": "357800", "lr": "0.000648687", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "116837"} +[2022-08-02 09:56:29,608][train_inner][INFO] - {"epoch": 7, "update": 6.956, "loss": "2.242", "ppl": "4.73", "wps": "367052", "ups": "3.09", "wpb": "118882", "bsz": "256", "num_updates": "358000", "lr": "0.000648485", "gnorm": "0.683", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "116902"} +[2022-08-02 09:57:34,739][train_inner][INFO] - {"epoch": 7, "update": 6.96, "loss": "2.243", "ppl": "4.73", "wps": "363703", "ups": "3.07", "wpb": "118438", "bsz": "256", "num_updates": "358200", "lr": "0.000648283", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "116967"} +[2022-08-02 09:58:39,465][train_inner][INFO] - {"epoch": 7, "update": 6.964, "loss": "2.246", "ppl": "4.74", "wps": "365227", "ups": "3.09", "wpb": "118198", "bsz": "256", "num_updates": "358400", "lr": "0.000648081", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "117032"} +[2022-08-02 09:59:44,206][train_inner][INFO] - {"epoch": 7, "update": 6.967, "loss": "2.247", "ppl": "4.75", "wps": "367064", "ups": "3.09", "wpb": "118818", "bsz": "256", "num_updates": "358600", "lr": "0.000647879", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "117097"} +[2022-08-02 10:00:05,683][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 10:00:14,087][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 10:00:49,499][train_inner][INFO] - {"epoch": 7, "update": 6.971, "loss": "2.245", "ppl": "4.74", "wps": "364347", "ups": "3.06", "wpb": "118946", "bsz": "256", "num_updates": "358800", "lr": "0.000647677", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "117162"} +[2022-08-02 10:01:54,436][train_inner][INFO] - {"epoch": 7, "update": 6.975, "loss": "2.254", "ppl": "4.77", "wps": "362577", "ups": "3.08", "wpb": "117721", "bsz": "256", "num_updates": "359000", "lr": "0.000647475", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.5", "wall": "117227"} +[2022-08-02 10:02:59,078][train_inner][INFO] - {"epoch": 7, "update": 6.979, "loss": "2.241", "ppl": "4.73", "wps": "367020", "ups": "3.09", "wpb": "118624", "bsz": "256", "num_updates": "359200", "lr": "0.000647273", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "117291"} +[2022-08-02 10:04:04,069][train_inner][INFO] - {"epoch": 7, "update": 6.983, "loss": "2.246", "ppl": "4.74", "wps": "362349", "ups": "3.08", "wpb": "117745", "bsz": "256", "num_updates": "359400", "lr": "0.000647071", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "117356"} +[2022-08-02 10:05:08,622][train_inner][INFO] - {"epoch": 7, "update": 6.987, "loss": "2.238", "ppl": "4.72", "wps": "369710", "ups": "3.1", "wpb": "119327", "bsz": "256", "num_updates": "359600", "lr": "0.000646869", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "117421"} +[2022-08-02 10:06:13,434][train_inner][INFO] - {"epoch": 7, "update": 6.991, "loss": "2.251", "ppl": "4.76", "wps": "363168", "ups": "3.09", "wpb": "117686", "bsz": "256", "num_updates": "359800", "lr": "0.000646667", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "117486"} +[2022-08-02 10:07:18,356][train_inner][INFO] - {"epoch": 7, "update": 6.995, "loss": "2.245", "ppl": "4.74", "wps": "365030", "ups": "3.08", "wpb": "118491", "bsz": "256", "num_updates": "360000", "lr": "0.000646465", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.6", "wall": "117551"} +[2022-08-02 10:08:23,021][train_inner][INFO] - {"epoch": 7, "update": 6.999, "loss": "2.247", "ppl": "4.75", "wps": "365725", "ups": "3.09", "wpb": "118246", "bsz": "256", "num_updates": "360200", "lr": "0.000646263", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "117615"} +[2022-08-02 10:08:46,673][fairseq_cli.train][INFO] - end of epoch 7 (average epoch stats below) +[2022-08-02 10:08:46,674][train][INFO] - {"epoch": 7, "train_loss": "2.262", "train_ppl": "4.8", "train_wps": "362538", "train_ups": "3.06", "train_wpb": "118297", "train_bsz": "256", "train_num_updates": "360273", "train_lr": "0.000646189", "train_gnorm": "0.677", "train_clip": "0", "train_loss_scale": "4", "train_train_wall": "16628", "train_gb_free": "20.5", "train_wall": "117639"} +[2022-08-02 10:08:46,787][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 10:08:46,790][fairseq.trainer][INFO] - begin training epoch 8 +[2022-08-02 10:08:46,790][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 10:09:42,204][train_inner][INFO] - {"epoch": 8, "update": 7.002, "loss": "2.248", "ppl": "4.75", "wps": "297619", "ups": "2.53", "wpb": "117831", "bsz": "255.4", "num_updates": "360400", "lr": "0.000646061", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "67", "gb_free": "20.1", "wall": "117695"} +[2022-08-02 10:10:47,103][train_inner][INFO] - {"epoch": 8, "update": 7.006, "loss": "2.244", "ppl": "4.74", "wps": "362919", "ups": "3.08", "wpb": "117765", "bsz": "256", "num_updates": "360600", "lr": "0.000645859", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "117759"} +[2022-08-02 10:11:51,790][train_inner][INFO] - {"epoch": 8, "update": 7.01, "loss": "2.246", "ppl": "4.75", "wps": "366016", "ups": "3.09", "wpb": "118380", "bsz": "256", "num_updates": "360800", "lr": "0.000645657", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "117824"} +[2022-08-02 10:12:56,650][train_inner][INFO] - {"epoch": 8, "update": 7.014, "loss": "2.24", "ppl": "4.72", "wps": "362696", "ups": "3.08", "wpb": "117621", "bsz": "256", "num_updates": "361000", "lr": "0.000645455", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "117889"} +[2022-08-02 10:14:01,753][train_inner][INFO] - {"epoch": 8, "update": 7.018, "loss": "2.243", "ppl": "4.73", "wps": "364172", "ups": "3.07", "wpb": "118541", "bsz": "255.9", "num_updates": "361200", "lr": "0.000645253", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "117954"} +[2022-08-02 10:15:06,903][train_inner][INFO] - {"epoch": 8, "update": 7.022, "loss": "2.243", "ppl": "4.73", "wps": "361486", "ups": "3.07", "wpb": "117752", "bsz": "256", "num_updates": "361400", "lr": "0.000645051", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "118019"} +[2022-08-02 10:16:11,961][train_inner][INFO] - {"epoch": 8, "update": 7.026, "loss": "2.242", "ppl": "4.73", "wps": "362869", "ups": "3.07", "wpb": "118035", "bsz": "256", "num_updates": "361600", "lr": "0.000644848", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "118084"} +[2022-08-02 10:17:16,781][train_inner][INFO] - {"epoch": 8, "update": 7.03, "loss": "2.242", "ppl": "4.73", "wps": "365451", "ups": "3.09", "wpb": "118442", "bsz": "256", "num_updates": "361800", "lr": "0.000644646", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "118149"} +[2022-08-02 10:17:42,933][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 10:18:22,100][train_inner][INFO] - {"epoch": 8, "update": 7.034, "loss": "2.245", "ppl": "4.74", "wps": "362025", "ups": "3.06", "wpb": "118234", "bsz": "256", "num_updates": "362000", "lr": "0.000644444", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "118214"} +[2022-08-02 10:19:26,831][train_inner][INFO] - {"epoch": 8, "update": 7.037, "loss": "2.236", "ppl": "4.71", "wps": "367702", "ups": "3.09", "wpb": "119008", "bsz": "256", "num_updates": "362200", "lr": "0.000644242", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "118279"} +[2022-08-02 10:20:31,905][train_inner][INFO] - {"epoch": 8, "update": 7.041, "loss": "2.242", "ppl": "4.73", "wps": "364066", "ups": "3.07", "wpb": "118456", "bsz": "256", "num_updates": "362400", "lr": "0.00064404", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.9", "wall": "118344"} +[2022-08-02 10:21:36,409][train_inner][INFO] - {"epoch": 8, "update": 7.045, "loss": "2.249", "ppl": "4.76", "wps": "363919", "ups": "3.1", "wpb": "117368", "bsz": "256", "num_updates": "362600", "lr": "0.000643838", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.7", "wall": "118409"} +[2022-08-02 10:22:41,627][train_inner][INFO] - {"epoch": 8, "update": 7.049, "loss": "2.247", "ppl": "4.75", "wps": "361353", "ups": "3.07", "wpb": "117832", "bsz": "256", "num_updates": "362800", "lr": "0.000643636", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "118474"} +[2022-08-02 10:23:46,684][train_inner][INFO] - {"epoch": 8, "update": 7.053, "loss": "2.244", "ppl": "4.74", "wps": "365819", "ups": "3.07", "wpb": "118995", "bsz": "256", "num_updates": "363000", "lr": "0.000643434", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.7", "wall": "118539"} +[2022-08-02 10:24:51,365][train_inner][INFO] - {"epoch": 8, "update": 7.057, "loss": "2.245", "ppl": "4.74", "wps": "365470", "ups": "3.09", "wpb": "118192", "bsz": "256", "num_updates": "363200", "lr": "0.000643232", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "118604"} +[2022-08-02 10:25:57,091][train_inner][INFO] - {"epoch": 8, "update": 7.061, "loss": "2.247", "ppl": "4.75", "wps": "360772", "ups": "3.04", "wpb": "118559", "bsz": "256", "num_updates": "363400", "lr": "0.00064303", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "118669"} +[2022-08-02 10:27:02,423][train_inner][INFO] - {"epoch": 8, "update": 7.065, "loss": "2.239", "ppl": "4.72", "wps": "364553", "ups": "3.06", "wpb": "119083", "bsz": "256", "num_updates": "363600", "lr": "0.000642828", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "118735"} +[2022-08-02 10:28:07,330][train_inner][INFO] - {"epoch": 8, "update": 7.069, "loss": "2.238", "ppl": "4.72", "wps": "364757", "ups": "3.08", "wpb": "118374", "bsz": "256", "num_updates": "363800", "lr": "0.000642626", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.7", "wall": "118800"} +[2022-08-02 10:29:12,264][train_inner][INFO] - {"epoch": 8, "update": 7.072, "loss": "2.244", "ppl": "4.74", "wps": "364808", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "364000", "lr": "0.000642424", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "118865"} +[2022-08-02 10:30:16,776][train_inner][INFO] - {"epoch": 8, "update": 7.076, "loss": "2.238", "ppl": "4.72", "wps": "366773", "ups": "3.1", "wpb": "118303", "bsz": "256", "num_updates": "364200", "lr": "0.000642222", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "118929"} +[2022-08-02 10:31:21,720][train_inner][INFO] - {"epoch": 8, "update": 7.08, "loss": "2.242", "ppl": "4.73", "wps": "364398", "ups": "3.08", "wpb": "118326", "bsz": "256", "num_updates": "364400", "lr": "0.00064202", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "118994"} +[2022-08-02 10:32:26,944][train_inner][INFO] - {"epoch": 8, "update": 7.084, "loss": "2.236", "ppl": "4.71", "wps": "364279", "ups": "3.07", "wpb": "118797", "bsz": "256", "num_updates": "364600", "lr": "0.000641818", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "119059"} +[2022-08-02 10:33:31,597][train_inner][INFO] - {"epoch": 8, "update": 7.088, "loss": "2.239", "ppl": "4.72", "wps": "365903", "ups": "3.09", "wpb": "118282", "bsz": "256", "num_updates": "364800", "lr": "0.000641616", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "119124"} +[2022-08-02 10:34:36,324][train_inner][INFO] - {"epoch": 8, "update": 7.092, "loss": "2.241", "ppl": "4.73", "wps": "363436", "ups": "3.09", "wpb": "117619", "bsz": "256", "num_updates": "365000", "lr": "0.000641414", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "119189"} +[2022-08-02 10:35:41,304][train_inner][INFO] - {"epoch": 8, "update": 7.096, "loss": "2.236", "ppl": "4.71", "wps": "365073", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "365200", "lr": "0.000641212", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "119254"} +[2022-08-02 10:36:46,187][train_inner][INFO] - {"epoch": 8, "update": 7.1, "loss": "2.239", "ppl": "4.72", "wps": "365202", "ups": "3.08", "wpb": "118475", "bsz": "256", "num_updates": "365400", "lr": "0.00064101", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "119319"} +[2022-08-02 10:37:51,291][train_inner][INFO] - {"epoch": 8, "update": 7.103, "loss": "2.24", "ppl": "4.72", "wps": "362046", "ups": "3.07", "wpb": "117851", "bsz": "256", "num_updates": "365600", "lr": "0.000640808", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "119384"} +[2022-08-02 10:38:56,312][train_inner][INFO] - {"epoch": 8, "update": 7.107, "loss": "2.232", "ppl": "4.7", "wps": "363555", "ups": "3.08", "wpb": "118192", "bsz": "256", "num_updates": "365800", "lr": "0.000640606", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.5", "wall": "119449"} +[2022-08-02 10:40:00,820][train_inner][INFO] - {"epoch": 8, "update": 7.111, "loss": "2.248", "ppl": "4.75", "wps": "365384", "ups": "3.1", "wpb": "117850", "bsz": "256", "num_updates": "366000", "lr": "0.000640404", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "119513"} +[2022-08-02 10:41:05,818][train_inner][INFO] - {"epoch": 8, "update": 7.115, "loss": "2.238", "ppl": "4.72", "wps": "364503", "ups": "3.08", "wpb": "118459", "bsz": "256", "num_updates": "366200", "lr": "0.000640202", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "119578"} +[2022-08-02 10:42:10,689][train_inner][INFO] - {"epoch": 8, "update": 7.119, "loss": "2.246", "ppl": "4.74", "wps": "364242", "ups": "3.08", "wpb": "118141", "bsz": "256", "num_updates": "366400", "lr": "0.00064", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "119643"} +[2022-08-02 10:43:15,540][train_inner][INFO] - {"epoch": 8, "update": 7.123, "loss": "2.238", "ppl": "4.72", "wps": "363514", "ups": "3.08", "wpb": "117871", "bsz": "256", "num_updates": "366600", "lr": "0.000639798", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "119708"} +[2022-08-02 10:44:20,366][train_inner][INFO] - {"epoch": 8, "update": 7.127, "loss": "2.24", "ppl": "4.72", "wps": "364497", "ups": "3.09", "wpb": "118142", "bsz": "256", "num_updates": "366800", "lr": "0.000639596", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "119773"} +[2022-08-02 10:45:25,057][train_inner][INFO] - {"epoch": 8, "update": 7.131, "loss": "2.242", "ppl": "4.73", "wps": "364134", "ups": "3.09", "wpb": "117778", "bsz": "256", "num_updates": "367000", "lr": "0.000639394", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "119837"} +[2022-08-02 10:46:30,202][train_inner][INFO] - {"epoch": 8, "update": 7.135, "loss": "2.244", "ppl": "4.74", "wps": "363816", "ups": "3.07", "wpb": "118504", "bsz": "256", "num_updates": "367200", "lr": "0.000639192", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "119903"} +[2022-08-02 10:47:34,990][train_inner][INFO] - {"epoch": 8, "update": 7.138, "loss": "2.247", "ppl": "4.75", "wps": "363841", "ups": "3.09", "wpb": "117860", "bsz": "256", "num_updates": "367400", "lr": "0.00063899", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "119967"} +[2022-08-02 10:48:40,231][train_inner][INFO] - {"epoch": 8, "update": 7.142, "loss": "2.24", "ppl": "4.72", "wps": "362273", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "367600", "lr": "0.000638788", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "120033"} +[2022-08-02 10:49:44,986][train_inner][INFO] - {"epoch": 8, "update": 7.146, "loss": "2.237", "ppl": "4.71", "wps": "366262", "ups": "3.09", "wpb": "118586", "bsz": "256", "num_updates": "367800", "lr": "0.000638586", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "120097"} +[2022-08-02 10:50:49,573][train_inner][INFO] - {"epoch": 8, "update": 7.15, "loss": "2.234", "ppl": "4.71", "wps": "368034", "ups": "3.1", "wpb": "118849", "bsz": "256", "num_updates": "368000", "lr": "0.000638384", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.8", "wall": "120162"} +[2022-08-02 10:51:54,535][train_inner][INFO] - {"epoch": 8, "update": 7.154, "loss": "2.237", "ppl": "4.72", "wps": "364844", "ups": "3.08", "wpb": "118502", "bsz": "256", "num_updates": "368200", "lr": "0.000638182", "gnorm": "0.665", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.7", "wall": "120227"} +[2022-08-02 10:53:00,944][train_inner][INFO] - {"epoch": 8, "update": 7.158, "loss": "2.235", "ppl": "4.71", "wps": "356166", "ups": "3.01", "wpb": "118261", "bsz": "256", "num_updates": "368400", "lr": "0.00063798", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "19.9", "wall": "120293"} +[2022-08-02 10:54:05,802][train_inner][INFO] - {"epoch": 8, "update": 7.162, "loss": "2.242", "ppl": "4.73", "wps": "362295", "ups": "3.08", "wpb": "117488", "bsz": "256", "num_updates": "368600", "lr": "0.000637778", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "120358"} +[2022-08-02 10:55:10,655][train_inner][INFO] - {"epoch": 8, "update": 7.166, "loss": "2.239", "ppl": "4.72", "wps": "363290", "ups": "3.08", "wpb": "117801", "bsz": "256", "num_updates": "368800", "lr": "0.000637576", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "120423"} +[2022-08-02 10:55:22,162][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 10:56:16,957][train_inner][INFO] - {"epoch": 8, "update": 7.17, "loss": "2.238", "ppl": "4.72", "wps": "357094", "ups": "3.02", "wpb": "118378", "bsz": "256", "num_updates": "369000", "lr": "0.000637374", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20", "wall": "120489"} +[2022-08-02 10:57:21,153][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 10:57:21,732][train_inner][INFO] - {"epoch": 8, "update": 7.173, "loss": "2.229", "ppl": "4.69", "wps": "365071", "ups": "3.09", "wpb": "118237", "bsz": "256", "num_updates": "369200", "lr": "0.000637172", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "120554"} +[2022-08-02 10:57:33,778][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 10:58:27,114][train_inner][INFO] - {"epoch": 8, "update": 7.177, "loss": "2.244", "ppl": "4.74", "wps": "362040", "ups": "3.06", "wpb": "118354", "bsz": "256", "num_updates": "369400", "lr": "0.00063697", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "120619"} +[2022-08-02 10:59:31,810][train_inner][INFO] - {"epoch": 8, "update": 7.181, "loss": "2.241", "ppl": "4.73", "wps": "363960", "ups": "3.09", "wpb": "117733", "bsz": "256", "num_updates": "369600", "lr": "0.000636768", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "120684"} +[2022-08-02 11:00:36,347][train_inner][INFO] - {"epoch": 8, "update": 7.185, "loss": "2.243", "ppl": "4.73", "wps": "366402", "ups": "3.1", "wpb": "118230", "bsz": "256", "num_updates": "369800", "lr": "0.000636566", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "120749"} +[2022-08-02 11:01:41,414][train_inner][INFO] - {"epoch": 8, "update": 7.189, "loss": "2.237", "ppl": "4.71", "wps": "363842", "ups": "3.07", "wpb": "118370", "bsz": "256", "num_updates": "370000", "lr": "0.000636364", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.1", "wall": "120814"} +[2022-08-02 11:02:46,283][train_inner][INFO] - {"epoch": 8, "update": 7.193, "loss": "2.238", "ppl": "4.72", "wps": "363014", "ups": "3.08", "wpb": "117739", "bsz": "256", "num_updates": "370200", "lr": "0.000636162", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "120879"} +[2022-08-02 11:03:51,388][train_inner][INFO] - {"epoch": 8, "update": 7.197, "loss": "2.24", "ppl": "4.73", "wps": "362909", "ups": "3.07", "wpb": "118134", "bsz": "256", "num_updates": "370400", "lr": "0.00063596", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24", "wall": "120944"} +[2022-08-02 11:04:56,059][train_inner][INFO] - {"epoch": 8, "update": 7.201, "loss": "2.232", "ppl": "4.7", "wps": "367052", "ups": "3.09", "wpb": "118686", "bsz": "256", "num_updates": "370600", "lr": "0.000635758", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "121008"} +[2022-08-02 11:06:00,754][train_inner][INFO] - {"epoch": 8, "update": 7.205, "loss": "2.246", "ppl": "4.74", "wps": "364930", "ups": "3.09", "wpb": "118045", "bsz": "256", "num_updates": "370800", "lr": "0.000635556", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "121073"} +[2022-08-02 11:07:05,385][train_inner][INFO] - {"epoch": 8, "update": 7.208, "loss": "2.239", "ppl": "4.72", "wps": "367103", "ups": "3.09", "wpb": "118629", "bsz": "256", "num_updates": "371000", "lr": "0.000635354", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "121138"} +[2022-08-02 11:08:09,942][train_inner][INFO] - {"epoch": 8, "update": 7.212, "loss": "2.235", "ppl": "4.71", "wps": "366362", "ups": "3.1", "wpb": "118254", "bsz": "256", "num_updates": "371200", "lr": "0.000635152", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "121202"} +[2022-08-02 11:09:07,751][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 11:09:14,922][train_inner][INFO] - {"epoch": 8, "update": 7.216, "loss": "2.244", "ppl": "4.74", "wps": "363420", "ups": "3.08", "wpb": "118075", "bsz": "256", "num_updates": "371400", "lr": "0.000634949", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.8", "wall": "121267"} +[2022-08-02 11:10:19,848][train_inner][INFO] - {"epoch": 8, "update": 7.22, "loss": "2.24", "ppl": "4.73", "wps": "364150", "ups": "3.08", "wpb": "118212", "bsz": "256", "num_updates": "371600", "lr": "0.000634747", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "121332"} +[2022-08-02 11:11:24,321][train_inner][INFO] - {"epoch": 8, "update": 7.224, "loss": "2.237", "ppl": "4.72", "wps": "367252", "ups": "3.1", "wpb": "118387", "bsz": "256", "num_updates": "371800", "lr": "0.000634545", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "121397"} +[2022-08-02 11:12:29,507][train_inner][INFO] - {"epoch": 8, "update": 7.228, "loss": "2.24", "ppl": "4.73", "wps": "363982", "ups": "3.07", "wpb": "118630", "bsz": "256", "num_updates": "372000", "lr": "0.000634343", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "121462"} +[2022-08-02 11:13:34,692][train_inner][INFO] - {"epoch": 8, "update": 7.232, "loss": "2.239", "ppl": "4.72", "wps": "364091", "ups": "3.07", "wpb": "118665", "bsz": "256", "num_updates": "372200", "lr": "0.000634141", "gnorm": "0.713", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.2", "wall": "121527"} +[2022-08-02 11:14:39,207][train_inner][INFO] - {"epoch": 8, "update": 7.236, "loss": "2.239", "ppl": "4.72", "wps": "364685", "ups": "3.1", "wpb": "117637", "bsz": "256", "num_updates": "372400", "lr": "0.000633939", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "121592"} +[2022-08-02 11:15:43,978][train_inner][INFO] - {"epoch": 8, "update": 7.24, "loss": "2.229", "ppl": "4.69", "wps": "364323", "ups": "3.09", "wpb": "117984", "bsz": "256", "num_updates": "372600", "lr": "0.000633737", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23", "wall": "121656"} +[2022-08-02 11:16:48,591][train_inner][INFO] - {"epoch": 8, "update": 7.243, "loss": "2.233", "ppl": "4.7", "wps": "367080", "ups": "3.1", "wpb": "118590", "bsz": "256", "num_updates": "372800", "lr": "0.000633535", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "121721"} +[2022-08-02 11:17:53,327][train_inner][INFO] - {"epoch": 8, "update": 7.247, "loss": "2.237", "ppl": "4.72", "wps": "364741", "ups": "3.09", "wpb": "118057", "bsz": "256", "num_updates": "373000", "lr": "0.000633333", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "121786"} +[2022-08-02 11:18:58,154][train_inner][INFO] - {"epoch": 8, "update": 7.251, "loss": "2.237", "ppl": "4.71", "wps": "364039", "ups": "3.09", "wpb": "117995", "bsz": "256", "num_updates": "373200", "lr": "0.000633131", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "121850"} +[2022-08-02 11:20:02,908][train_inner][INFO] - {"epoch": 8, "update": 7.255, "loss": "2.241", "ppl": "4.73", "wps": "365073", "ups": "3.09", "wpb": "118199", "bsz": "256", "num_updates": "373400", "lr": "0.000632929", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "121915"} +[2022-08-02 11:21:08,212][train_inner][INFO] - {"epoch": 8, "update": 7.259, "loss": "2.239", "ppl": "4.72", "wps": "360473", "ups": "3.06", "wpb": "117699", "bsz": "256", "num_updates": "373600", "lr": "0.000632727", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.7", "wall": "121981"} +[2022-08-02 11:22:13,386][train_inner][INFO] - {"epoch": 8, "update": 7.263, "loss": "2.239", "ppl": "4.72", "wps": "362402", "ups": "3.07", "wpb": "118094", "bsz": "256", "num_updates": "373800", "lr": "0.000632525", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "122046"} +[2022-08-02 11:23:18,236][train_inner][INFO] - {"epoch": 8, "update": 7.267, "loss": "2.24", "ppl": "4.72", "wps": "364593", "ups": "3.08", "wpb": "118219", "bsz": "256", "num_updates": "374000", "lr": "0.000632323", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "122111"} +[2022-08-02 11:24:22,896][train_inner][INFO] - {"epoch": 8, "update": 7.271, "loss": "2.233", "ppl": "4.7", "wps": "366963", "ups": "3.09", "wpb": "118637", "bsz": "256", "num_updates": "374200", "lr": "0.000632121", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "122175"} +[2022-08-02 11:25:27,904][train_inner][INFO] - {"epoch": 8, "update": 7.274, "loss": "2.239", "ppl": "4.72", "wps": "364185", "ups": "3.08", "wpb": "118372", "bsz": "256", "num_updates": "374400", "lr": "0.000631919", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "122240"} +[2022-08-02 11:26:32,531][train_inner][INFO] - {"epoch": 8, "update": 7.278, "loss": "2.238", "ppl": "4.72", "wps": "365163", "ups": "3.09", "wpb": "117995", "bsz": "256", "num_updates": "374600", "lr": "0.000631717", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "122305"} +[2022-08-02 11:27:37,431][train_inner][INFO] - {"epoch": 8, "update": 7.282, "loss": "2.235", "ppl": "4.71", "wps": "364421", "ups": "3.08", "wpb": "118252", "bsz": "256", "num_updates": "374800", "lr": "0.000631515", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "122370"} +[2022-08-02 11:28:42,451][train_inner][INFO] - {"epoch": 8, "update": 7.286, "loss": "2.239", "ppl": "4.72", "wps": "365376", "ups": "3.08", "wpb": "118781", "bsz": "256", "num_updates": "375000", "lr": "0.000631313", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "122435"} +[2022-08-02 11:29:47,489][train_inner][INFO] - {"epoch": 8, "update": 7.29, "loss": "2.232", "ppl": "4.7", "wps": "364336", "ups": "3.08", "wpb": "118478", "bsz": "256", "num_updates": "375200", "lr": "0.000631111", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "122500"} +[2022-08-02 11:30:52,364][train_inner][INFO] - {"epoch": 8, "update": 7.294, "loss": "2.231", "ppl": "4.7", "wps": "366170", "ups": "3.08", "wpb": "118774", "bsz": "256", "num_updates": "375400", "lr": "0.000630909", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "122565"} +[2022-08-02 11:31:57,189][train_inner][INFO] - {"epoch": 8, "update": 7.298, "loss": "2.24", "ppl": "4.72", "wps": "364146", "ups": "3.09", "wpb": "118027", "bsz": "256", "num_updates": "375600", "lr": "0.000630707", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "122630"} +[2022-08-02 11:33:02,034][train_inner][INFO] - {"epoch": 8, "update": 7.302, "loss": "2.237", "ppl": "4.72", "wps": "364490", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "375800", "lr": "0.000630505", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "122694"} +[2022-08-02 11:33:28,293][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 11:34:07,351][train_inner][INFO] - {"epoch": 8, "update": 7.306, "loss": "2.234", "ppl": "4.71", "wps": "361594", "ups": "3.06", "wpb": "118090", "bsz": "256", "num_updates": "376000", "lr": "0.000630303", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "122760"} +[2022-08-02 11:35:06,870][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 11:35:12,459][train_inner][INFO] - {"epoch": 8, "update": 7.309, "loss": "2.234", "ppl": "4.7", "wps": "363751", "ups": "3.07", "wpb": "118415", "bsz": "256", "num_updates": "376200", "lr": "0.000630101", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "122825"} +[2022-08-02 11:36:17,490][train_inner][INFO] - {"epoch": 8, "update": 7.313, "loss": "2.235", "ppl": "4.71", "wps": "363832", "ups": "3.08", "wpb": "118301", "bsz": "256", "num_updates": "376400", "lr": "0.000629899", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "122890"} +[2022-08-02 11:37:22,384][train_inner][INFO] - {"epoch": 8, "update": 7.317, "loss": "2.241", "ppl": "4.73", "wps": "364448", "ups": "3.08", "wpb": "118251", "bsz": "256", "num_updates": "376600", "lr": "0.000629697", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.5", "wall": "122955"} +[2022-08-02 11:38:27,303][train_inner][INFO] - {"epoch": 8, "update": 7.321, "loss": "2.233", "ppl": "4.7", "wps": "365570", "ups": "3.08", "wpb": "118660", "bsz": "256", "num_updates": "376800", "lr": "0.000629495", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "123020"} +[2022-08-02 11:39:32,239][train_inner][INFO] - {"epoch": 8, "update": 7.325, "loss": "2.232", "ppl": "4.7", "wps": "363851", "ups": "3.08", "wpb": "118134", "bsz": "256", "num_updates": "377000", "lr": "0.000629293", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "123085"} +[2022-08-02 11:40:37,038][train_inner][INFO] - {"epoch": 8, "update": 7.329, "loss": "2.232", "ppl": "4.7", "wps": "363624", "ups": "3.09", "wpb": "117811", "bsz": "256", "num_updates": "377200", "lr": "0.000629091", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "123149"} +[2022-08-02 11:41:41,836][train_inner][INFO] - {"epoch": 8, "update": 7.333, "loss": "2.236", "ppl": "4.71", "wps": "365469", "ups": "3.09", "wpb": "118406", "bsz": "256", "num_updates": "377400", "lr": "0.000628889", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "123214"} +[2022-08-02 11:42:46,839][train_inner][INFO] - {"epoch": 8, "update": 7.337, "loss": "2.235", "ppl": "4.71", "wps": "364604", "ups": "3.08", "wpb": "118498", "bsz": "256", "num_updates": "377600", "lr": "0.000628687", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.6", "wall": "123279"} +[2022-08-02 11:43:51,864][train_inner][INFO] - {"epoch": 8, "update": 7.341, "loss": "2.231", "ppl": "4.69", "wps": "363729", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "377800", "lr": "0.000628485", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "123344"} +[2022-08-02 11:44:56,960][train_inner][INFO] - {"epoch": 8, "update": 7.344, "loss": "2.233", "ppl": "4.7", "wps": "365092", "ups": "3.07", "wpb": "118829", "bsz": "256", "num_updates": "378000", "lr": "0.000628283", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "123409"} +[2022-08-02 11:46:02,165][train_inner][INFO] - {"epoch": 8, "update": 7.348, "loss": "2.233", "ppl": "4.7", "wps": "362270", "ups": "3.07", "wpb": "118106", "bsz": "256", "num_updates": "378200", "lr": "0.000628081", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "123474"} +[2022-08-02 11:47:06,853][train_inner][INFO] - {"epoch": 8, "update": 7.352, "loss": "2.233", "ppl": "4.7", "wps": "367548", "ups": "3.09", "wpb": "118878", "bsz": "256", "num_updates": "378400", "lr": "0.000627879", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "123539"} +[2022-08-02 11:48:12,259][train_inner][INFO] - {"epoch": 8, "update": 7.356, "loss": "2.23", "ppl": "4.69", "wps": "362229", "ups": "3.06", "wpb": "118459", "bsz": "256", "num_updates": "378600", "lr": "0.000627677", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "123605"} +[2022-08-02 11:49:16,839][train_inner][INFO] - {"epoch": 8, "update": 7.36, "loss": "2.232", "ppl": "4.7", "wps": "365480", "ups": "3.1", "wpb": "118009", "bsz": "256", "num_updates": "378800", "lr": "0.000627475", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "123669"} +[2022-08-02 11:50:22,048][train_inner][INFO] - {"epoch": 8, "update": 7.364, "loss": "2.23", "ppl": "4.69", "wps": "362618", "ups": "3.07", "wpb": "118228", "bsz": "256", "num_updates": "379000", "lr": "0.000627273", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "123734"} +[2022-08-02 11:51:26,704][train_inner][INFO] - {"epoch": 8, "update": 7.368, "loss": "2.238", "ppl": "4.72", "wps": "364570", "ups": "3.09", "wpb": "117856", "bsz": "256", "num_updates": "379200", "lr": "0.000627071", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "123799"} +[2022-08-02 11:52:31,172][train_inner][INFO] - {"epoch": 8, "update": 7.372, "loss": "2.238", "ppl": "4.72", "wps": "365930", "ups": "3.1", "wpb": "117952", "bsz": "256", "num_updates": "379400", "lr": "0.000626869", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "123863"} +[2022-08-02 11:53:35,797][train_inner][INFO] - {"epoch": 8, "update": 7.376, "loss": "2.233", "ppl": "4.7", "wps": "365968", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "379600", "lr": "0.000626667", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "123928"} +[2022-08-02 11:54:40,334][train_inner][INFO] - {"epoch": 8, "update": 7.379, "loss": "2.232", "ppl": "4.7", "wps": "367219", "ups": "3.1", "wpb": "118494", "bsz": "256", "num_updates": "379800", "lr": "0.000626465", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "123993"} +[2022-08-02 11:55:45,396][train_inner][INFO] - {"epoch": 8, "update": 7.383, "loss": "2.233", "ppl": "4.7", "wps": "365257", "ups": "3.07", "wpb": "118819", "bsz": "256", "num_updates": "380000", "lr": "0.000626263", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "124058"} +[2022-08-02 11:56:51,423][train_inner][INFO] - {"epoch": 8, "update": 7.387, "loss": "2.237", "ppl": "4.71", "wps": "358438", "ups": "3.03", "wpb": "118332", "bsz": "256", "num_updates": "380200", "lr": "0.000626061", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "124124"} +[2022-08-02 11:57:56,509][train_inner][INFO] - {"epoch": 8, "update": 7.391, "loss": "2.228", "ppl": "4.69", "wps": "364620", "ups": "3.07", "wpb": "118655", "bsz": "256", "num_updates": "380400", "lr": "0.000625859", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "124189"} +[2022-08-02 11:59:01,541][train_inner][INFO] - {"epoch": 8, "update": 7.395, "loss": "2.231", "ppl": "4.7", "wps": "364460", "ups": "3.08", "wpb": "118507", "bsz": "256", "num_updates": "380600", "lr": "0.000625657", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "124254"} +[2022-08-02 12:00:06,348][train_inner][INFO] - {"epoch": 8, "update": 7.399, "loss": "2.232", "ppl": "4.7", "wps": "364752", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "380800", "lr": "0.000625455", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "124319"} +[2022-08-02 12:01:11,019][train_inner][INFO] - {"epoch": 8, "update": 7.403, "loss": "2.235", "ppl": "4.71", "wps": "364552", "ups": "3.09", "wpb": "117878", "bsz": "256", "num_updates": "381000", "lr": "0.000625253", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "124383"} +[2022-08-02 12:02:15,918][train_inner][INFO] - {"epoch": 8, "update": 7.407, "loss": "2.233", "ppl": "4.7", "wps": "363529", "ups": "3.08", "wpb": "117962", "bsz": "256", "num_updates": "381200", "lr": "0.000625051", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "124448"} +[2022-08-02 12:03:20,475][train_inner][INFO] - {"epoch": 8, "update": 7.41, "loss": "2.233", "ppl": "4.7", "wps": "366839", "ups": "3.1", "wpb": "118409", "bsz": "256", "num_updates": "381400", "lr": "0.000624848", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "124513"} +[2022-08-02 12:04:25,576][train_inner][INFO] - {"epoch": 8, "update": 7.414, "loss": "2.23", "ppl": "4.69", "wps": "364698", "ups": "3.07", "wpb": "118708", "bsz": "256", "num_updates": "381600", "lr": "0.000624646", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "124578"} +[2022-08-02 12:05:30,419][train_inner][INFO] - {"epoch": 8, "update": 7.418, "loss": "2.236", "ppl": "4.71", "wps": "365304", "ups": "3.08", "wpb": "118436", "bsz": "256", "num_updates": "381800", "lr": "0.000624444", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "124643"} +[2022-08-02 12:06:35,288][train_inner][INFO] - {"epoch": 8, "update": 7.422, "loss": "2.235", "ppl": "4.71", "wps": "364476", "ups": "3.08", "wpb": "118214", "bsz": "256", "num_updates": "382000", "lr": "0.000624242", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.7", "wall": "124708"} +[2022-08-02 12:07:40,162][train_inner][INFO] - {"epoch": 8, "update": 7.426, "loss": "2.234", "ppl": "4.7", "wps": "365899", "ups": "3.08", "wpb": "118685", "bsz": "256", "num_updates": "382200", "lr": "0.00062404", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "124772"} +[2022-08-02 12:08:44,948][train_inner][INFO] - {"epoch": 8, "update": 7.43, "loss": "2.231", "ppl": "4.69", "wps": "365052", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "382400", "lr": "0.000623838", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.9", "wall": "124837"} +[2022-08-02 12:09:50,006][train_inner][INFO] - {"epoch": 8, "update": 7.434, "loss": "2.223", "ppl": "4.67", "wps": "366722", "ups": "3.07", "wpb": "119289", "bsz": "256", "num_updates": "382600", "lr": "0.000623636", "gnorm": "0.663", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "124902"} +[2022-08-02 12:10:55,069][train_inner][INFO] - {"epoch": 8, "update": 7.438, "loss": "2.231", "ppl": "4.69", "wps": "362669", "ups": "3.07", "wpb": "117980", "bsz": "256", "num_updates": "382800", "lr": "0.000623434", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "124967"} +[2022-08-02 12:11:56,465][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 12:12:00,398][train_inner][INFO] - {"epoch": 8, "update": 7.442, "loss": "2.23", "ppl": "4.69", "wps": "362072", "ups": "3.06", "wpb": "118269", "bsz": "256", "num_updates": "383000", "lr": "0.000623232", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "125033"} +[2022-08-02 12:12:05,239][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 12:13:05,897][train_inner][INFO] - {"epoch": 8, "update": 7.445, "loss": "2.222", "ppl": "4.67", "wps": "361917", "ups": "3.05", "wpb": "118525", "bsz": "256", "num_updates": "383200", "lr": "0.00062303", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "125098"} +[2022-08-02 12:14:10,659][train_inner][INFO] - {"epoch": 8, "update": 7.449, "loss": "2.231", "ppl": "4.69", "wps": "366476", "ups": "3.09", "wpb": "118667", "bsz": "256", "num_updates": "383400", "lr": "0.000622828", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "125163"} +[2022-08-02 12:15:15,252][train_inner][INFO] - {"epoch": 8, "update": 7.453, "loss": "2.232", "ppl": "4.7", "wps": "364998", "ups": "3.1", "wpb": "117880", "bsz": "256", "num_updates": "383600", "lr": "0.000622626", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "125228"} +[2022-08-02 12:16:20,225][train_inner][INFO] - {"epoch": 8, "update": 7.457, "loss": "2.235", "ppl": "4.71", "wps": "363554", "ups": "3.08", "wpb": "118104", "bsz": "256", "num_updates": "383800", "lr": "0.000622424", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "125293"} +[2022-08-02 12:17:25,027][train_inner][INFO] - {"epoch": 8, "update": 7.461, "loss": "2.226", "ppl": "4.68", "wps": "364150", "ups": "3.09", "wpb": "117987", "bsz": "256", "num_updates": "384000", "lr": "0.000622222", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "125357"} +[2022-08-02 12:18:29,581][train_inner][INFO] - {"epoch": 8, "update": 7.465, "loss": "2.229", "ppl": "4.69", "wps": "366682", "ups": "3.1", "wpb": "118353", "bsz": "256", "num_updates": "384200", "lr": "0.00062202", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "125422"} +[2022-08-02 12:19:34,049][train_inner][INFO] - {"epoch": 8, "update": 7.469, "loss": "2.229", "ppl": "4.69", "wps": "367156", "ups": "3.1", "wpb": "118347", "bsz": "256", "num_updates": "384400", "lr": "0.000621818", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "125486"} +[2022-08-02 12:20:38,869][train_inner][INFO] - {"epoch": 8, "update": 7.473, "loss": "2.236", "ppl": "4.71", "wps": "363337", "ups": "3.09", "wpb": "117755", "bsz": "256", "num_updates": "384600", "lr": "0.000621616", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "125551"} +[2022-08-02 12:21:43,859][train_inner][INFO] - {"epoch": 8, "update": 7.477, "loss": "2.233", "ppl": "4.7", "wps": "364782", "ups": "3.08", "wpb": "118535", "bsz": "256", "num_updates": "384800", "lr": "0.000621414", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "125616"} +[2022-08-02 12:22:48,847][train_inner][INFO] - {"epoch": 8, "update": 7.48, "loss": "2.235", "ppl": "4.71", "wps": "364790", "ups": "3.08", "wpb": "118533", "bsz": "256", "num_updates": "385000", "lr": "0.000621212", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "125681"} +[2022-08-02 12:23:53,521][train_inner][INFO] - {"epoch": 8, "update": 7.484, "loss": "2.232", "ppl": "4.7", "wps": "364344", "ups": "3.09", "wpb": "117816", "bsz": "256", "num_updates": "385200", "lr": "0.00062101", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "125746"} +[2022-08-02 12:24:58,044][train_inner][INFO] - {"epoch": 8, "update": 7.488, "loss": "2.228", "ppl": "4.68", "wps": "364943", "ups": "3.1", "wpb": "117734", "bsz": "256", "num_updates": "385400", "lr": "0.000620808", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "125810"} +[2022-08-02 12:26:02,767][train_inner][INFO] - {"epoch": 8, "update": 7.492, "loss": "2.222", "ppl": "4.66", "wps": "365988", "ups": "3.09", "wpb": "118437", "bsz": "256", "num_updates": "385600", "lr": "0.000620606", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "125875"} +[2022-08-02 12:27:07,523][train_inner][INFO] - {"epoch": 8, "update": 7.496, "loss": "2.227", "ppl": "4.68", "wps": "366166", "ups": "3.09", "wpb": "118556", "bsz": "256", "num_updates": "385800", "lr": "0.000620404", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "125940"} +[2022-08-02 12:28:12,131][train_inner][INFO] - {"epoch": 8, "update": 7.5, "loss": "2.224", "ppl": "4.67", "wps": "367433", "ups": "3.1", "wpb": "118695", "bsz": "256", "num_updates": "386000", "lr": "0.000620202", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "126004"} +[2022-08-02 12:29:17,952][train_inner][INFO] - {"epoch": 8, "update": 7.504, "loss": "2.225", "ppl": "4.67", "wps": "359508", "ups": "3.04", "wpb": "118313", "bsz": "256", "num_updates": "386200", "lr": "0.00062", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "126070"} +[2022-08-02 12:30:22,754][train_inner][INFO] - {"epoch": 8, "update": 7.508, "loss": "2.23", "ppl": "4.69", "wps": "365521", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "386400", "lr": "0.000619798", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "126135"} +[2022-08-02 12:31:28,413][train_inner][INFO] - {"epoch": 8, "update": 7.511, "loss": "2.229", "ppl": "4.69", "wps": "360514", "ups": "3.05", "wpb": "118354", "bsz": "256", "num_updates": "386600", "lr": "0.000619596", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "126201"} +[2022-08-02 12:32:33,343][train_inner][INFO] - {"epoch": 8, "update": 7.515, "loss": "2.228", "ppl": "4.68", "wps": "364967", "ups": "3.08", "wpb": "118484", "bsz": "256", "num_updates": "386800", "lr": "0.000619394", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.6", "wall": "126266"} +[2022-08-02 12:33:38,702][train_inner][INFO] - {"epoch": 8, "update": 7.519, "loss": "2.226", "ppl": "4.68", "wps": "361765", "ups": "3.06", "wpb": "118223", "bsz": "256", "num_updates": "387000", "lr": "0.000619192", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "126331"} +[2022-08-02 12:34:43,579][train_inner][INFO] - {"epoch": 8, "update": 7.523, "loss": "2.224", "ppl": "4.67", "wps": "365929", "ups": "3.08", "wpb": "118700", "bsz": "256", "num_updates": "387200", "lr": "0.00061899", "gnorm": "0.663", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.9", "wall": "126396"} +[2022-08-02 12:35:09,190][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 12:35:21,248][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 12:35:37,337][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 12:35:49,052][train_inner][INFO] - {"epoch": 8, "update": 7.527, "loss": "2.229", "ppl": "4.69", "wps": "361202", "ups": "3.05", "wpb": "118243", "bsz": "256", "num_updates": "387400", "lr": "0.000618788", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "126461"} +[2022-08-02 12:36:53,737][train_inner][INFO] - {"epoch": 8, "update": 7.531, "loss": "2.236", "ppl": "4.71", "wps": "364207", "ups": "3.09", "wpb": "117792", "bsz": "256", "num_updates": "387600", "lr": "0.000618586", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "126526"} +[2022-08-02 12:37:58,435][train_inner][INFO] - {"epoch": 8, "update": 7.535, "loss": "2.227", "ppl": "4.68", "wps": "366062", "ups": "3.09", "wpb": "118415", "bsz": "256", "num_updates": "387800", "lr": "0.000618384", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "126591"} +[2022-08-02 12:39:03,255][train_inner][INFO] - {"epoch": 8, "update": 7.539, "loss": "2.227", "ppl": "4.68", "wps": "364755", "ups": "3.09", "wpb": "118216", "bsz": "256", "num_updates": "388000", "lr": "0.000618182", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.9", "wall": "126656"} +[2022-08-02 12:40:07,935][train_inner][INFO] - {"epoch": 8, "update": 7.543, "loss": "2.229", "ppl": "4.69", "wps": "363413", "ups": "3.09", "wpb": "117527", "bsz": "256", "num_updates": "388200", "lr": "0.00061798", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "126720"} +[2022-08-02 12:41:12,580][train_inner][INFO] - {"epoch": 8, "update": 7.547, "loss": "2.226", "ppl": "4.68", "wps": "365587", "ups": "3.09", "wpb": "118166", "bsz": "256", "num_updates": "388400", "lr": "0.000617778", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "126785"} +[2022-08-02 12:42:17,611][train_inner][INFO] - {"epoch": 8, "update": 7.55, "loss": "2.224", "ppl": "4.67", "wps": "364273", "ups": "3.08", "wpb": "118443", "bsz": "256", "num_updates": "388600", "lr": "0.000617576", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "126850"} +[2022-08-02 12:42:22,957][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 12:43:22,498][train_inner][INFO] - {"epoch": 8, "update": 7.554, "loss": "2.224", "ppl": "4.67", "wps": "364719", "ups": "3.08", "wpb": "118326", "bsz": "256", "num_updates": "388800", "lr": "0.000617374", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "126915"} +[2022-08-02 12:44:26,994][train_inner][INFO] - {"epoch": 8, "update": 7.558, "loss": "2.228", "ppl": "4.68", "wps": "366917", "ups": "3.1", "wpb": "118321", "bsz": "256", "num_updates": "389000", "lr": "0.000617172", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "126979"} +[2022-08-02 12:45:31,821][train_inner][INFO] - {"epoch": 8, "update": 7.562, "loss": "2.231", "ppl": "4.69", "wps": "364008", "ups": "3.09", "wpb": "117986", "bsz": "256", "num_updates": "389200", "lr": "0.00061697", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.4", "wall": "127044"} +[2022-08-02 12:46:38,746][train_inner][INFO] - {"epoch": 8, "update": 7.566, "loss": "2.229", "ppl": "4.69", "wps": "353890", "ups": "2.99", "wpb": "118420", "bsz": "256", "num_updates": "389400", "lr": "0.000616768", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "67", "gb_free": "20", "wall": "127111"} +[2022-08-02 12:47:43,667][train_inner][INFO] - {"epoch": 8, "update": 7.57, "loss": "2.227", "ppl": "4.68", "wps": "363691", "ups": "3.08", "wpb": "118054", "bsz": "256", "num_updates": "389600", "lr": "0.000616566", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.7", "wall": "127176"} +[2022-08-02 12:48:48,196][train_inner][INFO] - {"epoch": 8, "update": 7.574, "loss": "2.223", "ppl": "4.67", "wps": "367975", "ups": "3.1", "wpb": "118724", "bsz": "256", "num_updates": "389800", "lr": "0.000616364", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21", "wall": "127241"} +[2022-08-02 12:49:53,072][train_inner][INFO] - {"epoch": 8, "update": 7.578, "loss": "2.23", "ppl": "4.69", "wps": "364203", "ups": "3.08", "wpb": "118138", "bsz": "256", "num_updates": "390000", "lr": "0.000616162", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "127305"} +[2022-08-02 12:50:57,774][train_inner][INFO] - {"epoch": 8, "update": 7.581, "loss": "2.225", "ppl": "4.67", "wps": "364978", "ups": "3.09", "wpb": "118073", "bsz": "256", "num_updates": "390200", "lr": "0.00061596", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.1", "wall": "127370"} +[2022-08-02 12:52:02,553][train_inner][INFO] - {"epoch": 8, "update": 7.585, "loss": "2.219", "ppl": "4.66", "wps": "365916", "ups": "3.09", "wpb": "118515", "bsz": "256", "num_updates": "390400", "lr": "0.000615758", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.8", "wall": "127435"} +[2022-08-02 12:53:07,516][train_inner][INFO] - {"epoch": 8, "update": 7.589, "loss": "2.223", "ppl": "4.67", "wps": "364967", "ups": "3.08", "wpb": "118546", "bsz": "256", "num_updates": "390600", "lr": "0.000615556", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "127500"} +[2022-08-02 12:54:12,656][train_inner][INFO] - {"epoch": 8, "update": 7.593, "loss": "2.225", "ppl": "4.68", "wps": "363579", "ups": "3.07", "wpb": "118414", "bsz": "256", "num_updates": "390800", "lr": "0.000615354", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27", "wall": "127565"} +[2022-08-02 12:55:17,799][train_inner][INFO] - {"epoch": 8, "update": 7.597, "loss": "2.232", "ppl": "4.7", "wps": "363388", "ups": "3.07", "wpb": "118360", "bsz": "256", "num_updates": "391000", "lr": "0.000615152", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "127630"} +[2022-08-02 12:56:22,662][train_inner][INFO] - {"epoch": 8, "update": 7.601, "loss": "2.227", "ppl": "4.68", "wps": "364844", "ups": "3.08", "wpb": "118324", "bsz": "256", "num_updates": "391200", "lr": "0.000614949", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "127695"} +[2022-08-02 12:57:27,380][train_inner][INFO] - {"epoch": 8, "update": 7.605, "loss": "2.227", "ppl": "4.68", "wps": "367353", "ups": "3.09", "wpb": "118870", "bsz": "256", "num_updates": "391400", "lr": "0.000614747", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "127760"} +[2022-08-02 12:58:32,375][train_inner][INFO] - {"epoch": 8, "update": 7.609, "loss": "2.226", "ppl": "4.68", "wps": "365630", "ups": "3.08", "wpb": "118819", "bsz": "256", "num_updates": "391600", "lr": "0.000614545", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "127825"} +[2022-08-02 12:59:37,331][train_inner][INFO] - {"epoch": 8, "update": 7.613, "loss": "2.225", "ppl": "4.68", "wps": "366118", "ups": "3.08", "wpb": "118906", "bsz": "256", "num_updates": "391800", "lr": "0.000614343", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "127890"} +[2022-08-02 13:00:42,946][train_inner][INFO] - {"epoch": 8, "update": 7.616, "loss": "2.231", "ppl": "4.7", "wps": "360694", "ups": "3.05", "wpb": "118333", "bsz": "256", "num_updates": "392000", "lr": "0.000614141", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.2", "wall": "127955"} +[2022-08-02 13:01:47,309][train_inner][INFO] - {"epoch": 8, "update": 7.62, "loss": "2.23", "ppl": "4.69", "wps": "366088", "ups": "3.11", "wpb": "117811", "bsz": "256", "num_updates": "392200", "lr": "0.000613939", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "128020"} +[2022-08-02 13:02:51,568][train_inner][INFO] - {"epoch": 8, "update": 7.624, "loss": "2.228", "ppl": "4.69", "wps": "367198", "ups": "3.11", "wpb": "117976", "bsz": "256", "num_updates": "392400", "lr": "0.000613737", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "128084"} +[2022-08-02 13:03:56,393][train_inner][INFO] - {"epoch": 8, "update": 7.628, "loss": "2.227", "ppl": "4.68", "wps": "365841", "ups": "3.09", "wpb": "118578", "bsz": "256", "num_updates": "392600", "lr": "0.000613535", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "128149"} +[2022-08-02 13:05:01,129][train_inner][INFO] - {"epoch": 8, "update": 7.632, "loss": "2.224", "ppl": "4.67", "wps": "366439", "ups": "3.09", "wpb": "118606", "bsz": "256", "num_updates": "392800", "lr": "0.000613333", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "128213"} +[2022-08-02 13:06:05,164][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 13:06:06,152][train_inner][INFO] - {"epoch": 8, "update": 7.636, "loss": "2.222", "ppl": "4.66", "wps": "363518", "ups": "3.08", "wpb": "118184", "bsz": "256", "num_updates": "393000", "lr": "0.000613131", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "128278"} +[2022-08-02 13:07:11,224][train_inner][INFO] - {"epoch": 8, "update": 7.64, "loss": "2.227", "ppl": "4.68", "wps": "365497", "ups": "3.07", "wpb": "118917", "bsz": "256", "num_updates": "393200", "lr": "0.000612929", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "128344"} +[2022-08-02 13:08:16,247][train_inner][INFO] - {"epoch": 8, "update": 7.644, "loss": "2.22", "ppl": "4.66", "wps": "364793", "ups": "3.08", "wpb": "118597", "bsz": "256", "num_updates": "393400", "lr": "0.000612727", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "128409"} +[2022-08-02 13:09:21,190][train_inner][INFO] - {"epoch": 8, "update": 7.648, "loss": "2.223", "ppl": "4.67", "wps": "363898", "ups": "3.08", "wpb": "118162", "bsz": "256", "num_updates": "393600", "lr": "0.000612525", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "128474"} +[2022-08-02 13:10:25,703][train_inner][INFO] - {"epoch": 8, "update": 7.651, "loss": "2.225", "ppl": "4.68", "wps": "365876", "ups": "3.1", "wpb": "118016", "bsz": "256", "num_updates": "393800", "lr": "0.000612323", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.2", "wall": "128538"} +[2022-08-02 13:11:30,293][train_inner][INFO] - {"epoch": 8, "update": 7.655, "loss": "2.229", "ppl": "4.69", "wps": "367136", "ups": "3.1", "wpb": "118565", "bsz": "256", "num_updates": "394000", "lr": "0.000612121", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "128603"} +[2022-08-02 13:12:37,448][train_inner][INFO] - {"epoch": 8, "update": 7.659, "loss": "2.226", "ppl": "4.68", "wps": "351703", "ups": "2.98", "wpb": "118092", "bsz": "256", "num_updates": "394200", "lr": "0.000611919", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "67", "gb_free": "28.2", "wall": "128670"} +[2022-08-02 13:13:42,547][train_inner][INFO] - {"epoch": 8, "update": 7.663, "loss": "2.218", "ppl": "4.65", "wps": "364886", "ups": "3.07", "wpb": "118760", "bsz": "256", "num_updates": "394400", "lr": "0.000611717", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "128735"} +[2022-08-02 13:14:47,374][train_inner][INFO] - {"epoch": 8, "update": 7.667, "loss": "2.224", "ppl": "4.67", "wps": "364408", "ups": "3.09", "wpb": "118116", "bsz": "256", "num_updates": "394600", "lr": "0.000611515", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.8", "wall": "128800"} +[2022-08-02 13:15:52,275][train_inner][INFO] - {"epoch": 8, "update": 7.671, "loss": "2.214", "ppl": "4.64", "wps": "366254", "ups": "3.08", "wpb": "118849", "bsz": "256", "num_updates": "394800", "lr": "0.000611313", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "128865"} +[2022-08-02 13:16:56,979][train_inner][INFO] - {"epoch": 8, "update": 7.675, "loss": "2.223", "ppl": "4.67", "wps": "366232", "ups": "3.09", "wpb": "118481", "bsz": "256", "num_updates": "395000", "lr": "0.000611111", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "128929"} +[2022-08-02 13:18:01,777][train_inner][INFO] - {"epoch": 8, "update": 7.679, "loss": "2.225", "ppl": "4.68", "wps": "364884", "ups": "3.09", "wpb": "118218", "bsz": "256", "num_updates": "395200", "lr": "0.000610909", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "128994"} +[2022-08-02 13:19:06,630][train_inner][INFO] - {"epoch": 8, "update": 7.683, "loss": "2.219", "ppl": "4.66", "wps": "364530", "ups": "3.08", "wpb": "118202", "bsz": "256", "num_updates": "395400", "lr": "0.000610707", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "129059"} +[2022-08-02 13:20:11,036][train_inner][INFO] - {"epoch": 8, "update": 7.686, "loss": "2.222", "ppl": "4.66", "wps": "368801", "ups": "3.11", "wpb": "118763", "bsz": "256", "num_updates": "395600", "lr": "0.000610505", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "129123"} +[2022-08-02 13:21:15,902][train_inner][INFO] - {"epoch": 8, "update": 7.69, "loss": "2.219", "ppl": "4.66", "wps": "366158", "ups": "3.08", "wpb": "118755", "bsz": "256", "num_updates": "395800", "lr": "0.000610303", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "129188"} +[2022-08-02 13:22:20,673][train_inner][INFO] - {"epoch": 8, "update": 7.694, "loss": "2.226", "ppl": "4.68", "wps": "365332", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "396000", "lr": "0.000610101", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.1", "wall": "129253"} +[2022-08-02 13:23:25,581][train_inner][INFO] - {"epoch": 8, "update": 7.698, "loss": "2.229", "ppl": "4.69", "wps": "364094", "ups": "3.08", "wpb": "118160", "bsz": "256", "num_updates": "396200", "lr": "0.000609899", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "129318"} +[2022-08-02 13:24:30,673][train_inner][INFO] - {"epoch": 8, "update": 7.702, "loss": "2.22", "ppl": "4.66", "wps": "364101", "ups": "3.07", "wpb": "118500", "bsz": "256", "num_updates": "396400", "lr": "0.000609697", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "129383"} +[2022-08-02 13:25:36,029][train_inner][INFO] - {"epoch": 8, "update": 7.706, "loss": "2.219", "ppl": "4.66", "wps": "362744", "ups": "3.06", "wpb": "118536", "bsz": "256", "num_updates": "396600", "lr": "0.000609495", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "129448"} +[2022-08-02 13:26:40,896][train_inner][INFO] - {"epoch": 8, "update": 7.71, "loss": "2.22", "ppl": "4.66", "wps": "365015", "ups": "3.08", "wpb": "118385", "bsz": "256", "num_updates": "396800", "lr": "0.000609293", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "129513"} +[2022-08-02 13:27:45,673][train_inner][INFO] - {"epoch": 8, "update": 7.714, "loss": "2.223", "ppl": "4.67", "wps": "364110", "ups": "3.09", "wpb": "117929", "bsz": "256", "num_updates": "397000", "lr": "0.000609091", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.4", "wall": "129578"} +[2022-08-02 13:28:50,239][train_inner][INFO] - {"epoch": 8, "update": 7.717, "loss": "2.226", "ppl": "4.68", "wps": "365181", "ups": "3.1", "wpb": "117889", "bsz": "256", "num_updates": "397200", "lr": "0.000608889", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "129643"} +[2022-08-02 13:29:55,142][train_inner][INFO] - {"epoch": 8, "update": 7.721, "loss": "2.227", "ppl": "4.68", "wps": "364299", "ups": "3.08", "wpb": "118218", "bsz": "256", "num_updates": "397400", "lr": "0.000608687", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.7", "wall": "129707"} +[2022-08-02 13:30:59,816][train_inner][INFO] - {"epoch": 8, "update": 7.725, "loss": "2.219", "ppl": "4.66", "wps": "366403", "ups": "3.09", "wpb": "118482", "bsz": "256", "num_updates": "397600", "lr": "0.000608485", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "129772"} +[2022-08-02 13:31:46,455][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 13:32:01,531][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 13:32:05,007][train_inner][INFO] - {"epoch": 8, "update": 7.729, "loss": "2.221", "ppl": "4.66", "wps": "362574", "ups": "3.07", "wpb": "118181", "bsz": "256", "num_updates": "397800", "lr": "0.000608283", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "129837"} +[2022-08-02 13:33:09,940][train_inner][INFO] - {"epoch": 8, "update": 7.733, "loss": "2.218", "ppl": "4.65", "wps": "365943", "ups": "3.08", "wpb": "118809", "bsz": "256", "num_updates": "398000", "lr": "0.000608081", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "129902"} +[2022-08-02 13:34:15,451][train_inner][INFO] - {"epoch": 8, "update": 7.737, "loss": "2.221", "ppl": "4.66", "wps": "360883", "ups": "3.05", "wpb": "118208", "bsz": "256", "num_updates": "398200", "lr": "0.000607879", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "129968"} +[2022-08-02 13:35:21,209][train_inner][INFO] - {"epoch": 8, "update": 7.741, "loss": "2.22", "ppl": "4.66", "wps": "359926", "ups": "3.04", "wpb": "118337", "bsz": "256", "num_updates": "398400", "lr": "0.000607677", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "130034"} +[2022-08-02 13:36:25,903][train_inner][INFO] - {"epoch": 8, "update": 7.745, "loss": "2.221", "ppl": "4.66", "wps": "365975", "ups": "3.09", "wpb": "118381", "bsz": "256", "num_updates": "398600", "lr": "0.000607475", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "130098"} +[2022-08-02 13:37:30,605][train_inner][INFO] - {"epoch": 8, "update": 7.749, "loss": "2.224", "ppl": "4.67", "wps": "363727", "ups": "3.09", "wpb": "117666", "bsz": "256", "num_updates": "398800", "lr": "0.000607273", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "130163"} +[2022-08-02 13:38:36,444][train_inner][INFO] - {"epoch": 8, "update": 7.752, "loss": "2.223", "ppl": "4.67", "wps": "359439", "ups": "3.04", "wpb": "118325", "bsz": "256", "num_updates": "399000", "lr": "0.000607071", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.1", "wall": "130229"} +[2022-08-02 13:39:41,485][train_inner][INFO] - {"epoch": 8, "update": 7.756, "loss": "2.222", "ppl": "4.66", "wps": "364110", "ups": "3.08", "wpb": "118408", "bsz": "256", "num_updates": "399200", "lr": "0.000606869", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "130294"} +[2022-08-02 13:40:46,392][train_inner][INFO] - {"epoch": 8, "update": 7.76, "loss": "2.221", "ppl": "4.66", "wps": "365943", "ups": "3.08", "wpb": "118760", "bsz": "256", "num_updates": "399400", "lr": "0.000606667", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "130359"} +[2022-08-02 13:41:51,157][train_inner][INFO] - {"epoch": 8, "update": 7.764, "loss": "2.217", "ppl": "4.65", "wps": "365545", "ups": "3.09", "wpb": "118370", "bsz": "256", "num_updates": "399600", "lr": "0.000606465", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "130423"} +[2022-08-02 13:42:55,953][train_inner][INFO] - {"epoch": 8, "update": 7.768, "loss": "2.219", "ppl": "4.66", "wps": "364951", "ups": "3.09", "wpb": "118236", "bsz": "256", "num_updates": "399800", "lr": "0.000606263", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "130488"} +[2022-08-02 13:44:00,709][train_inner][INFO] - {"epoch": 8, "update": 7.772, "loss": "2.218", "ppl": "4.65", "wps": "366009", "ups": "3.09", "wpb": "118504", "bsz": "256", "num_updates": "400000", "lr": "0.000606061", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "130553"} +[2022-08-02 13:44:00,710][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 13:44:23,413][valid][INFO] - {"epoch": 8, "valid_loss": "2.122", "valid_ppl": "4.35", "valid_wps": "1.52207e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "400000", "valid_best_loss": "2.122"} +[2022-08-02 13:44:23,415][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 8 @ 400000 updates +[2022-08-02 13:44:23,416][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_8_400000.pt +[2022-08-02 13:44:32,906][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_8_400000.pt +[2022-08-02 13:44:59,303][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_8_400000.pt (epoch 8 @ 400000 updates, score 2.122) (writing took 35.88700763322413 seconds) +[2022-08-02 13:46:04,127][train_inner][INFO] - {"epoch": 8, "update": 7.776, "loss": "2.221", "ppl": "4.66", "wps": "191311", "ups": "1.62", "wpb": "118055", "bsz": "256", "num_updates": "400200", "lr": "0.000605859", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "130676"} +[2022-08-02 13:47:08,817][train_inner][INFO] - {"epoch": 8, "update": 7.78, "loss": "2.229", "ppl": "4.69", "wps": "363458", "ups": "3.09", "wpb": "117558", "bsz": "256", "num_updates": "400400", "lr": "0.000605657", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "130741"} +[2022-08-02 13:48:13,619][train_inner][INFO] - {"epoch": 8, "update": 7.784, "loss": "2.226", "ppl": "4.68", "wps": "363757", "ups": "3.09", "wpb": "117861", "bsz": "256", "num_updates": "400600", "lr": "0.000605455", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "130806"} +[2022-08-02 13:49:18,339][train_inner][INFO] - {"epoch": 8, "update": 7.787, "loss": "2.214", "ppl": "4.64", "wps": "366756", "ups": "3.09", "wpb": "118680", "bsz": "256", "num_updates": "400800", "lr": "0.000605253", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "130871"} +[2022-08-02 13:50:24,206][train_inner][INFO] - {"epoch": 8, "update": 7.791, "loss": "2.212", "ppl": "4.63", "wps": "359174", "ups": "3.04", "wpb": "118287", "bsz": "256", "num_updates": "401000", "lr": "0.000605051", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.8", "wall": "130937"} +[2022-08-02 13:51:28,840][train_inner][INFO] - {"epoch": 8, "update": 7.795, "loss": "2.218", "ppl": "4.65", "wps": "366153", "ups": "3.09", "wpb": "118328", "bsz": "256", "num_updates": "401200", "lr": "0.000604848", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "131001"} +[2022-08-02 13:52:33,535][train_inner][INFO] - {"epoch": 8, "update": 7.799, "loss": "2.219", "ppl": "4.66", "wps": "365731", "ups": "3.09", "wpb": "118304", "bsz": "256", "num_updates": "401400", "lr": "0.000604646", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "131066"} +[2022-08-02 13:53:38,040][train_inner][INFO] - {"epoch": 8, "update": 7.803, "loss": "2.221", "ppl": "4.66", "wps": "366489", "ups": "3.1", "wpb": "118198", "bsz": "256", "num_updates": "401600", "lr": "0.000604444", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "131130"} +[2022-08-02 13:54:43,053][train_inner][INFO] - {"epoch": 8, "update": 7.807, "loss": "2.215", "ppl": "4.64", "wps": "365586", "ups": "3.08", "wpb": "118838", "bsz": "256", "num_updates": "401800", "lr": "0.000604242", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "131195"} +[2022-08-02 13:55:47,529][train_inner][INFO] - {"epoch": 8, "update": 7.811, "loss": "2.222", "ppl": "4.67", "wps": "367715", "ups": "3.1", "wpb": "118543", "bsz": "256", "num_updates": "402000", "lr": "0.00060404", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "131260"} +[2022-08-02 13:56:52,233][train_inner][INFO] - {"epoch": 8, "update": 7.815, "loss": "2.221", "ppl": "4.66", "wps": "364437", "ups": "3.09", "wpb": "117902", "bsz": "256", "num_updates": "402200", "lr": "0.000603838", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "131325"} +[2022-08-02 13:57:56,986][train_inner][INFO] - {"epoch": 8, "update": 7.819, "loss": "2.218", "ppl": "4.65", "wps": "365143", "ups": "3.09", "wpb": "118219", "bsz": "256", "num_updates": "402400", "lr": "0.000603636", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "131389"} +[2022-08-02 13:58:48,917][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 13:59:03,395][train_inner][INFO] - {"epoch": 8, "update": 7.822, "loss": "2.218", "ppl": "4.65", "wps": "357153", "ups": "3.01", "wpb": "118589", "bsz": "256", "num_updates": "402600", "lr": "0.000603434", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.2", "wall": "131456"} +[2022-08-02 14:00:08,565][train_inner][INFO] - {"epoch": 8, "update": 7.826, "loss": "2.223", "ppl": "4.67", "wps": "362936", "ups": "3.07", "wpb": "118261", "bsz": "256", "num_updates": "402800", "lr": "0.000603232", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "131521"} +[2022-08-02 14:01:14,553][train_inner][INFO] - {"epoch": 8, "update": 7.83, "loss": "2.218", "ppl": "4.65", "wps": "359479", "ups": "3.03", "wpb": "118605", "bsz": "256", "num_updates": "403000", "lr": "0.00060303", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "131587"} +[2022-08-02 14:02:19,006][train_inner][INFO] - {"epoch": 8, "update": 7.834, "loss": "2.221", "ppl": "4.66", "wps": "367151", "ups": "3.1", "wpb": "118318", "bsz": "256", "num_updates": "403200", "lr": "0.000602828", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "131651"} +[2022-08-02 14:03:23,914][train_inner][INFO] - {"epoch": 8, "update": 7.838, "loss": "2.22", "ppl": "4.66", "wps": "363558", "ups": "3.08", "wpb": "117987", "bsz": "256", "num_updates": "403400", "lr": "0.000602626", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "131716"} +[2022-08-02 14:04:28,861][train_inner][INFO] - {"epoch": 8, "update": 7.842, "loss": "2.218", "ppl": "4.65", "wps": "364802", "ups": "3.08", "wpb": "118462", "bsz": "256", "num_updates": "403600", "lr": "0.000602424", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "131781"} +[2022-08-02 14:05:33,591][train_inner][INFO] - {"epoch": 8, "update": 7.846, "loss": "2.218", "ppl": "4.65", "wps": "364883", "ups": "3.09", "wpb": "118092", "bsz": "256", "num_updates": "403800", "lr": "0.000602222", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "131846"} +[2022-08-02 14:06:38,417][train_inner][INFO] - {"epoch": 8, "update": 7.85, "loss": "2.216", "ppl": "4.65", "wps": "365391", "ups": "3.09", "wpb": "118433", "bsz": "256", "num_updates": "404000", "lr": "0.00060202", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "131911"} +[2022-08-02 14:07:42,862][train_inner][INFO] - {"epoch": 8, "update": 7.853, "loss": "2.209", "ppl": "4.62", "wps": "367185", "ups": "3.1", "wpb": "118315", "bsz": "256", "num_updates": "404200", "lr": "0.000601818", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "131975"} +[2022-08-02 14:08:48,078][train_inner][INFO] - {"epoch": 8, "update": 7.857, "loss": "2.217", "ppl": "4.65", "wps": "364706", "ups": "3.07", "wpb": "118922", "bsz": "256", "num_updates": "404400", "lr": "0.000601616", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "132040"} +[2022-08-02 14:09:34,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 14:09:53,498][train_inner][INFO] - {"epoch": 8, "update": 7.861, "loss": "2.215", "ppl": "4.64", "wps": "360913", "ups": "3.06", "wpb": "118052", "bsz": "256", "num_updates": "404600", "lr": "0.000601414", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "132106"} +[2022-08-02 14:10:58,678][train_inner][INFO] - {"epoch": 8, "update": 7.865, "loss": "2.218", "ppl": "4.65", "wps": "363683", "ups": "3.07", "wpb": "118524", "bsz": "256", "num_updates": "404800", "lr": "0.000601212", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "132171"} +[2022-08-02 14:12:03,572][train_inner][INFO] - {"epoch": 8, "update": 7.869, "loss": "2.219", "ppl": "4.65", "wps": "366199", "ups": "3.08", "wpb": "118819", "bsz": "256", "num_updates": "405000", "lr": "0.00060101", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "132236"} +[2022-08-02 14:13:08,488][train_inner][INFO] - {"epoch": 8, "update": 7.873, "loss": "2.209", "ppl": "4.62", "wps": "365097", "ups": "3.08", "wpb": "118501", "bsz": "256", "num_updates": "405200", "lr": "0.000600808", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.5", "wall": "132301"} +[2022-08-02 14:14:13,782][train_inner][INFO] - {"epoch": 8, "update": 7.877, "loss": "2.214", "ppl": "4.64", "wps": "364816", "ups": "3.06", "wpb": "119100", "bsz": "256", "num_updates": "405400", "lr": "0.000600606", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "132366"} +[2022-08-02 14:15:18,642][train_inner][INFO] - {"epoch": 8, "update": 7.881, "loss": "2.215", "ppl": "4.64", "wps": "365022", "ups": "3.08", "wpb": "118374", "bsz": "256", "num_updates": "405600", "lr": "0.000600404", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.5", "wall": "132431"} +[2022-08-02 14:16:23,437][train_inner][INFO] - {"epoch": 8, "update": 7.885, "loss": "2.213", "ppl": "4.64", "wps": "364630", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "405800", "lr": "0.000600202", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "132496"} +[2022-08-02 14:17:28,027][train_inner][INFO] - {"epoch": 8, "update": 7.888, "loss": "2.221", "ppl": "4.66", "wps": "366672", "ups": "3.1", "wpb": "118415", "bsz": "256", "num_updates": "406000", "lr": "0.0006", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.2", "wall": "132560"} +[2022-08-02 14:18:32,841][train_inner][INFO] - {"epoch": 8, "update": 7.892, "loss": "2.222", "ppl": "4.66", "wps": "364821", "ups": "3.09", "wpb": "118225", "bsz": "255.9", "num_updates": "406200", "lr": "0.000599798", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "132625"} +[2022-08-02 14:19:36,934][train_inner][INFO] - {"epoch": 8, "update": 7.896, "loss": "2.219", "ppl": "4.66", "wps": "368212", "ups": "3.12", "wpb": "117998", "bsz": "256", "num_updates": "406400", "lr": "0.000599596", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "132689"} +[2022-08-02 14:20:42,054][train_inner][INFO] - {"epoch": 8, "update": 7.9, "loss": "2.21", "ppl": "4.63", "wps": "364589", "ups": "3.07", "wpb": "118708", "bsz": "256", "num_updates": "406600", "lr": "0.000599394", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "132754"} +[2022-08-02 14:21:46,698][train_inner][INFO] - {"epoch": 8, "update": 7.904, "loss": "2.219", "ppl": "4.65", "wps": "365228", "ups": "3.09", "wpb": "118048", "bsz": "256", "num_updates": "406800", "lr": "0.000599192", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "132819"} +[2022-08-02 14:22:51,366][train_inner][INFO] - {"epoch": 8, "update": 7.908, "loss": "2.218", "ppl": "4.65", "wps": "365562", "ups": "3.09", "wpb": "118199", "bsz": "256", "num_updates": "407000", "lr": "0.00059899", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "132884"} +[2022-08-02 14:23:56,257][train_inner][INFO] - {"epoch": 8, "update": 7.912, "loss": "2.214", "ppl": "4.64", "wps": "365564", "ups": "3.08", "wpb": "118606", "bsz": "256", "num_updates": "407200", "lr": "0.000598788", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "132949"} +[2022-08-02 14:25:00,934][train_inner][INFO] - {"epoch": 8, "update": 7.916, "loss": "2.213", "ppl": "4.64", "wps": "366211", "ups": "3.09", "wpb": "118426", "bsz": "256", "num_updates": "407400", "lr": "0.000598586", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "133013"} +[2022-08-02 14:26:05,740][train_inner][INFO] - {"epoch": 8, "update": 7.92, "loss": "2.218", "ppl": "4.65", "wps": "364373", "ups": "3.09", "wpb": "118065", "bsz": "256", "num_updates": "407600", "lr": "0.000598384", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "133078"} +[2022-08-02 14:27:10,567][train_inner][INFO] - {"epoch": 8, "update": 7.923, "loss": "2.212", "ppl": "4.63", "wps": "366632", "ups": "3.09", "wpb": "118837", "bsz": "256", "num_updates": "407800", "lr": "0.000598182", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "133143"} +[2022-08-02 14:28:15,365][train_inner][INFO] - {"epoch": 8, "update": 7.927, "loss": "2.218", "ppl": "4.65", "wps": "363981", "ups": "3.09", "wpb": "117924", "bsz": "256", "num_updates": "408000", "lr": "0.00059798", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "133208"} +[2022-08-02 14:29:21,009][train_inner][INFO] - {"epoch": 8, "update": 7.931, "loss": "2.212", "ppl": "4.63", "wps": "360428", "ups": "3.05", "wpb": "118299", "bsz": "256", "num_updates": "408200", "lr": "0.000597778", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "133273"} +[2022-08-02 14:30:26,006][train_inner][INFO] - {"epoch": 8, "update": 7.935, "loss": "2.215", "ppl": "4.64", "wps": "363324", "ups": "3.08", "wpb": "118073", "bsz": "256", "num_updates": "408400", "lr": "0.000597576", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "133338"} +[2022-08-02 14:31:30,573][train_inner][INFO] - {"epoch": 8, "update": 7.939, "loss": "2.211", "ppl": "4.63", "wps": "365356", "ups": "3.1", "wpb": "117948", "bsz": "256", "num_updates": "408600", "lr": "0.000597374", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "133403"} +[2022-08-02 14:32:35,565][train_inner][INFO] - {"epoch": 8, "update": 7.943, "loss": "2.214", "ppl": "4.64", "wps": "364112", "ups": "3.08", "wpb": "118321", "bsz": "256", "num_updates": "408800", "lr": "0.000597172", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "133468"} +[2022-08-02 14:33:40,574][train_inner][INFO] - {"epoch": 8, "update": 7.947, "loss": "2.21", "ppl": "4.63", "wps": "364249", "ups": "3.08", "wpb": "118395", "bsz": "256", "num_updates": "409000", "lr": "0.00059697", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "133533"} +[2022-08-02 14:34:45,336][train_inner][INFO] - {"epoch": 8, "update": 7.951, "loss": "2.218", "ppl": "4.65", "wps": "364701", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "409200", "lr": "0.000596768", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "133598"} +[2022-08-02 14:35:50,169][train_inner][INFO] - {"epoch": 8, "update": 7.954, "loss": "2.213", "ppl": "4.64", "wps": "364850", "ups": "3.08", "wpb": "118270", "bsz": "256", "num_updates": "409400", "lr": "0.000596566", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "133662"} +[2022-08-02 14:36:55,274][train_inner][INFO] - {"epoch": 8, "update": 7.958, "loss": "2.212", "ppl": "4.63", "wps": "362228", "ups": "3.07", "wpb": "117913", "bsz": "256", "num_updates": "409600", "lr": "0.000596364", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "133728"} +[2022-08-02 14:38:00,229][train_inner][INFO] - {"epoch": 8, "update": 7.962, "loss": "2.21", "ppl": "4.63", "wps": "364150", "ups": "3.08", "wpb": "118262", "bsz": "256", "num_updates": "409800", "lr": "0.000596162", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "133793"} +[2022-08-02 14:39:05,310][train_inner][INFO] - {"epoch": 8, "update": 7.966, "loss": "2.206", "ppl": "4.61", "wps": "366302", "ups": "3.07", "wpb": "119185", "bsz": "256", "num_updates": "410000", "lr": "0.00059596", "gnorm": "0.664", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "133858"} +[2022-08-02 14:40:10,023][train_inner][INFO] - {"epoch": 8, "update": 7.97, "loss": "2.209", "ppl": "4.62", "wps": "365927", "ups": "3.09", "wpb": "118400", "bsz": "256", "num_updates": "410200", "lr": "0.000595758", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "133922"} +[2022-08-02 14:40:39,110][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 14:41:14,929][train_inner][INFO] - {"epoch": 8, "update": 7.974, "loss": "2.213", "ppl": "4.64", "wps": "361649", "ups": "3.08", "wpb": "117364", "bsz": "256", "num_updates": "410400", "lr": "0.000595556", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "133987"} +[2022-08-02 14:42:19,609][train_inner][INFO] - {"epoch": 8, "update": 7.978, "loss": "2.217", "ppl": "4.65", "wps": "366552", "ups": "3.09", "wpb": "118543", "bsz": "256", "num_updates": "410600", "lr": "0.000595354", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "134052"} +[2022-08-02 14:43:24,445][train_inner][INFO] - {"epoch": 8, "update": 7.982, "loss": "2.218", "ppl": "4.65", "wps": "363426", "ups": "3.08", "wpb": "117814", "bsz": "256", "num_updates": "410800", "lr": "0.000595152", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "134117"} +[2022-08-02 14:44:29,271][train_inner][INFO] - {"epoch": 8, "update": 7.986, "loss": "2.217", "ppl": "4.65", "wps": "364605", "ups": "3.09", "wpb": "118177", "bsz": "256", "num_updates": "411000", "lr": "0.000594949", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "134182"} +[2022-08-02 14:45:34,100][train_inner][INFO] - {"epoch": 8, "update": 7.989, "loss": "2.215", "ppl": "4.64", "wps": "364440", "ups": "3.09", "wpb": "118129", "bsz": "256", "num_updates": "411200", "lr": "0.000594747", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "134246"} +[2022-08-02 14:46:39,124][train_inner][INFO] - {"epoch": 8, "update": 7.993, "loss": "2.214", "ppl": "4.64", "wps": "366656", "ups": "3.08", "wpb": "119206", "bsz": "256", "num_updates": "411400", "lr": "0.000594545", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "134311"} +[2022-08-02 14:47:43,963][train_inner][INFO] - {"epoch": 8, "update": 7.997, "loss": "2.214", "ppl": "4.64", "wps": "362809", "ups": "3.08", "wpb": "117618", "bsz": "256", "num_updates": "411600", "lr": "0.000594343", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.8", "wall": "134376"} +[2022-08-02 14:48:29,816][fairseq_cli.train][INFO] - end of epoch 8 (average epoch stats below) +[2022-08-02 14:48:29,816][train][INFO] - {"epoch": 8, "train_loss": "2.229", "train_ppl": "4.69", "train_wps": "362786", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "411742", "train_lr": "0.0005942", "train_gnorm": "0.669", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16619", "train_gb_free": "21.4", "train_wall": "134422"} +[2022-08-02 14:48:29,907][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 14:48:29,910][fairseq.trainer][INFO] - begin training epoch 9 +[2022-08-02 14:48:29,910][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 14:48:58,989][train_inner][INFO] - {"epoch": 9, "update": 8.001, "loss": "2.219", "ppl": "4.66", "wps": "312441", "ups": "2.67", "wpb": "117204", "bsz": "255.4", "num_updates": "411800", "lr": "0.000594141", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "134451"} +[2022-08-02 14:50:03,881][train_inner][INFO] - {"epoch": 9, "update": 8.005, "loss": "2.213", "ppl": "4.64", "wps": "365932", "ups": "3.08", "wpb": "118730", "bsz": "256", "num_updates": "412000", "lr": "0.000593939", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "134516"} +[2022-08-02 14:51:08,617][train_inner][INFO] - {"epoch": 9, "update": 8.009, "loss": "2.203", "ppl": "4.6", "wps": "368276", "ups": "3.09", "wpb": "119201", "bsz": "256", "num_updates": "412200", "lr": "0.000593737", "gnorm": "0.662", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "134581"} +[2022-08-02 14:52:13,596][train_inner][INFO] - {"epoch": 9, "update": 8.013, "loss": "2.208", "ppl": "4.62", "wps": "365087", "ups": "3.08", "wpb": "118613", "bsz": "256", "num_updates": "412400", "lr": "0.000593535", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "134646"} +[2022-08-02 14:53:18,400][train_inner][INFO] - {"epoch": 9, "update": 8.017, "loss": "2.21", "ppl": "4.63", "wps": "364883", "ups": "3.09", "wpb": "118227", "bsz": "256", "num_updates": "412600", "lr": "0.000593333", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "134711"} +[2022-08-02 14:54:23,271][train_inner][INFO] - {"epoch": 9, "update": 8.021, "loss": "2.219", "ppl": "4.65", "wps": "363210", "ups": "3.08", "wpb": "117808", "bsz": "256", "num_updates": "412800", "lr": "0.000593131", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "134776"} +[2022-08-02 14:55:28,118][train_inner][INFO] - {"epoch": 9, "update": 8.024, "loss": "2.207", "ppl": "4.62", "wps": "365322", "ups": "3.08", "wpb": "118449", "bsz": "256", "num_updates": "413000", "lr": "0.000592929", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "134840"} +[2022-08-02 14:55:56,463][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 14:56:33,763][train_inner][INFO] - {"epoch": 9, "update": 8.028, "loss": "2.21", "ppl": "4.63", "wps": "361094", "ups": "3.05", "wpb": "118518", "bsz": "256", "num_updates": "413200", "lr": "0.000592727", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "134906"} +[2022-08-02 14:57:04,839][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 14:57:38,511][train_inner][INFO] - {"epoch": 9, "update": 8.032, "loss": "2.208", "ppl": "4.62", "wps": "366209", "ups": "3.09", "wpb": "118556", "bsz": "256", "num_updates": "413400", "lr": "0.000592525", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.6", "wall": "134971"} +[2022-08-02 14:58:43,787][train_inner][INFO] - {"epoch": 9, "update": 8.036, "loss": "2.204", "ppl": "4.61", "wps": "365175", "ups": "3.06", "wpb": "119185", "bsz": "256", "num_updates": "413600", "lr": "0.000592323", "gnorm": "0.697", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "135036"} +[2022-08-02 14:59:48,570][train_inner][INFO] - {"epoch": 9, "update": 8.04, "loss": "2.213", "ppl": "4.64", "wps": "363816", "ups": "3.09", "wpb": "117844", "bsz": "256", "num_updates": "413800", "lr": "0.000592121", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "135101"} +[2022-08-02 15:00:53,478][train_inner][INFO] - {"epoch": 9, "update": 8.044, "loss": "2.209", "ppl": "4.62", "wps": "365564", "ups": "3.08", "wpb": "118639", "bsz": "256", "num_updates": "414000", "lr": "0.000591919", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "135166"} +[2022-08-02 15:01:58,627][train_inner][INFO] - {"epoch": 9, "update": 8.048, "loss": "2.213", "ppl": "4.64", "wps": "364813", "ups": "3.07", "wpb": "118833", "bsz": "256", "num_updates": "414200", "lr": "0.000591717", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "135231"} +[2022-08-02 15:03:03,559][train_inner][INFO] - {"epoch": 9, "update": 8.052, "loss": "2.21", "ppl": "4.63", "wps": "364390", "ups": "3.08", "wpb": "118301", "bsz": "256", "num_updates": "414400", "lr": "0.000591515", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "135296"} +[2022-08-02 15:04:08,432][train_inner][INFO] - {"epoch": 9, "update": 8.056, "loss": "2.208", "ppl": "4.62", "wps": "364683", "ups": "3.08", "wpb": "118289", "bsz": "256", "num_updates": "414600", "lr": "0.000591313", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "135361"} +[2022-08-02 15:05:13,452][train_inner][INFO] - {"epoch": 9, "update": 8.059, "loss": "2.212", "ppl": "4.63", "wps": "365586", "ups": "3.08", "wpb": "118849", "bsz": "256", "num_updates": "414800", "lr": "0.000591111", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "135426"} +[2022-08-02 15:06:18,512][train_inner][INFO] - {"epoch": 9, "update": 8.063, "loss": "2.211", "ppl": "4.63", "wps": "364207", "ups": "3.07", "wpb": "118476", "bsz": "256", "num_updates": "415000", "lr": "0.000590909", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "135491"} +[2022-08-02 15:07:24,643][train_inner][INFO] - {"epoch": 9, "update": 8.067, "loss": "2.219", "ppl": "4.66", "wps": "356970", "ups": "3.02", "wpb": "118031", "bsz": "256", "num_updates": "415200", "lr": "0.000590707", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "20.1", "wall": "135557"} +[2022-08-02 15:08:29,171][train_inner][INFO] - {"epoch": 9, "update": 8.071, "loss": "2.206", "ppl": "4.62", "wps": "366416", "ups": "3.1", "wpb": "118221", "bsz": "256", "num_updates": "415400", "lr": "0.000590505", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "135621"} +[2022-08-02 15:09:34,959][train_inner][INFO] - {"epoch": 9, "update": 8.075, "loss": "2.208", "ppl": "4.62", "wps": "359706", "ups": "3.04", "wpb": "118319", "bsz": "256", "num_updates": "415600", "lr": "0.000590303", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "135687"} +[2022-08-02 15:10:39,775][train_inner][INFO] - {"epoch": 9, "update": 8.079, "loss": "2.212", "ppl": "4.63", "wps": "365891", "ups": "3.09", "wpb": "118577", "bsz": "256", "num_updates": "415800", "lr": "0.000590101", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.7", "wall": "135752"} +[2022-08-02 15:11:44,627][train_inner][INFO] - {"epoch": 9, "update": 8.083, "loss": "2.21", "ppl": "4.63", "wps": "364336", "ups": "3.08", "wpb": "118137", "bsz": "256", "num_updates": "416000", "lr": "0.000589899", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "135817"} +[2022-08-02 15:12:49,702][train_inner][INFO] - {"epoch": 9, "update": 8.087, "loss": "2.216", "ppl": "4.65", "wps": "363688", "ups": "3.07", "wpb": "118334", "bsz": "256", "num_updates": "416200", "lr": "0.000589697", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.9", "wall": "135882"} +[2022-08-02 15:13:54,550][train_inner][INFO] - {"epoch": 9, "update": 8.091, "loss": "2.21", "ppl": "4.63", "wps": "363861", "ups": "3.08", "wpb": "117977", "bsz": "256", "num_updates": "416400", "lr": "0.000589495", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "135947"} +[2022-08-02 15:14:59,352][train_inner][INFO] - {"epoch": 9, "update": 8.094, "loss": "2.204", "ppl": "4.61", "wps": "366122", "ups": "3.09", "wpb": "118625", "bsz": "256", "num_updates": "416600", "lr": "0.000589293", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "136012"} +[2022-08-02 15:16:04,448][train_inner][INFO] - {"epoch": 9, "update": 8.098, "loss": "2.208", "ppl": "4.62", "wps": "363756", "ups": "3.07", "wpb": "118393", "bsz": "256", "num_updates": "416800", "lr": "0.000589091", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "136077"} +[2022-08-02 15:17:08,989][train_inner][INFO] - {"epoch": 9, "update": 8.102, "loss": "2.216", "ppl": "4.65", "wps": "365519", "ups": "3.1", "wpb": "117954", "bsz": "256", "num_updates": "417000", "lr": "0.000588889", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.7", "wall": "136141"} +[2022-08-02 15:18:14,166][train_inner][INFO] - {"epoch": 9, "update": 8.106, "loss": "2.204", "ppl": "4.61", "wps": "363889", "ups": "3.07", "wpb": "118583", "bsz": "256", "num_updates": "417200", "lr": "0.000588687", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "136206"} +[2022-08-02 15:19:19,235][train_inner][INFO] - {"epoch": 9, "update": 8.11, "loss": "2.209", "ppl": "4.62", "wps": "363143", "ups": "3.07", "wpb": "118145", "bsz": "256", "num_updates": "417400", "lr": "0.000588485", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "136272"} +[2022-08-02 15:19:28,606][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 15:20:24,383][train_inner][INFO] - {"epoch": 9, "update": 8.114, "loss": "2.207", "ppl": "4.62", "wps": "363602", "ups": "3.07", "wpb": "118438", "bsz": "256", "num_updates": "417600", "lr": "0.000588283", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "136337"} +[2022-08-02 15:21:30,021][train_inner][INFO] - {"epoch": 9, "update": 8.118, "loss": "2.208", "ppl": "4.62", "wps": "360001", "ups": "3.05", "wpb": "118149", "bsz": "256", "num_updates": "417800", "lr": "0.000588081", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "136402"} +[2022-08-02 15:22:34,546][train_inner][INFO] - {"epoch": 9, "update": 8.122, "loss": "2.217", "ppl": "4.65", "wps": "363146", "ups": "3.1", "wpb": "117157", "bsz": "256", "num_updates": "418000", "lr": "0.000587879", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "136467"} +[2022-08-02 15:23:39,433][train_inner][INFO] - {"epoch": 9, "update": 8.125, "loss": "2.207", "ppl": "4.62", "wps": "363229", "ups": "3.08", "wpb": "117843", "bsz": "256", "num_updates": "418200", "lr": "0.000587677", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25", "wall": "136532"} +[2022-08-02 15:24:44,331][train_inner][INFO] - {"epoch": 9, "update": 8.129, "loss": "2.21", "ppl": "4.63", "wps": "364274", "ups": "3.08", "wpb": "118201", "bsz": "256", "num_updates": "418400", "lr": "0.000587475", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "136597"} +[2022-08-02 15:25:49,177][train_inner][INFO] - {"epoch": 9, "update": 8.133, "loss": "2.197", "ppl": "4.59", "wps": "366848", "ups": "3.08", "wpb": "118942", "bsz": "256", "num_updates": "418600", "lr": "0.000587273", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.9", "wall": "136661"} +[2022-08-02 15:26:54,085][train_inner][INFO] - {"epoch": 9, "update": 8.137, "loss": "2.207", "ppl": "4.62", "wps": "364259", "ups": "3.08", "wpb": "118214", "bsz": "256", "num_updates": "418800", "lr": "0.000587071", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "136726"} +[2022-08-02 15:27:59,044][train_inner][INFO] - {"epoch": 9, "update": 8.141, "loss": "2.207", "ppl": "4.62", "wps": "361512", "ups": "3.08", "wpb": "117416", "bsz": "256", "num_updates": "419000", "lr": "0.000586869", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "136791"} +[2022-08-02 15:29:04,079][train_inner][INFO] - {"epoch": 9, "update": 8.145, "loss": "2.209", "ppl": "4.62", "wps": "364317", "ups": "3.08", "wpb": "118465", "bsz": "256", "num_updates": "419200", "lr": "0.000586667", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "136856"} +[2022-08-02 15:30:08,846][train_inner][INFO] - {"epoch": 9, "update": 8.149, "loss": "2.208", "ppl": "4.62", "wps": "364174", "ups": "3.09", "wpb": "117929", "bsz": "256", "num_updates": "419400", "lr": "0.000586465", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "136921"} +[2022-08-02 15:30:32,004][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 15:31:14,053][train_inner][INFO] - {"epoch": 9, "update": 8.153, "loss": "2.212", "ppl": "4.63", "wps": "361224", "ups": "3.07", "wpb": "117771", "bsz": "256", "num_updates": "419600", "lr": "0.000586263", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "136986"} +[2022-08-02 15:31:16,297][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 15:32:19,481][train_inner][INFO] - {"epoch": 9, "update": 8.157, "loss": "2.213", "ppl": "4.64", "wps": "362591", "ups": "3.06", "wpb": "118617", "bsz": "256", "num_updates": "419800", "lr": "0.000586061", "gnorm": "0.818", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "137052"} +[2022-08-02 15:33:24,367][train_inner][INFO] - {"epoch": 9, "update": 8.16, "loss": "2.206", "ppl": "4.61", "wps": "365146", "ups": "3.08", "wpb": "118463", "bsz": "256", "num_updates": "420000", "lr": "0.000585859", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "137117"} +[2022-08-02 15:34:30,182][train_inner][INFO] - {"epoch": 9, "update": 8.164, "loss": "2.201", "ppl": "4.6", "wps": "359218", "ups": "3.04", "wpb": "118208", "bsz": "256", "num_updates": "420200", "lr": "0.000585657", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.1", "wall": "137183"} +[2022-08-02 15:35:35,217][train_inner][INFO] - {"epoch": 9, "update": 8.168, "loss": "2.203", "ppl": "4.6", "wps": "364374", "ups": "3.08", "wpb": "118483", "bsz": "256", "num_updates": "420400", "lr": "0.000585455", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "137248"} +[2022-08-02 15:36:40,281][train_inner][INFO] - {"epoch": 9, "update": 8.172, "loss": "2.209", "ppl": "4.62", "wps": "363141", "ups": "3.07", "wpb": "118135", "bsz": "256", "num_updates": "420600", "lr": "0.000585253", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "137313"} +[2022-08-02 15:37:44,876][train_inner][INFO] - {"epoch": 9, "update": 8.176, "loss": "2.206", "ppl": "4.61", "wps": "366965", "ups": "3.1", "wpb": "118520", "bsz": "256", "num_updates": "420800", "lr": "0.000585051", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "137377"} +[2022-08-02 15:38:49,835][train_inner][INFO] - {"epoch": 9, "update": 8.18, "loss": "2.206", "ppl": "4.61", "wps": "364810", "ups": "3.08", "wpb": "118486", "bsz": "256", "num_updates": "421000", "lr": "0.000584848", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "137442"} +[2022-08-02 15:39:55,136][train_inner][INFO] - {"epoch": 9, "update": 8.184, "loss": "2.21", "ppl": "4.63", "wps": "361254", "ups": "3.06", "wpb": "117950", "bsz": "256", "num_updates": "421200", "lr": "0.000584646", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.6", "wall": "137507"} +[2022-08-02 15:40:59,973][train_inner][INFO] - {"epoch": 9, "update": 8.188, "loss": "2.208", "ppl": "4.62", "wps": "363834", "ups": "3.08", "wpb": "117948", "bsz": "256", "num_updates": "421400", "lr": "0.000584444", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "137572"} +[2022-08-02 15:42:02,015][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 15:42:05,244][train_inner][INFO] - {"epoch": 9, "update": 8.192, "loss": "2.221", "ppl": "4.66", "wps": "361395", "ups": "3.06", "wpb": "117940", "bsz": "256", "num_updates": "421600", "lr": "0.000584242", "gnorm": "0.712", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "137638"} +[2022-08-02 15:43:09,867][train_inner][INFO] - {"epoch": 9, "update": 8.195, "loss": "2.209", "ppl": "4.62", "wps": "366630", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "421800", "lr": "0.00058404", "gnorm": "0.718", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "23.7", "wall": "137702"} +[2022-08-02 15:44:14,621][train_inner][INFO] - {"epoch": 9, "update": 8.199, "loss": "2.212", "ppl": "4.63", "wps": "365024", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "422000", "lr": "0.000583838", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.8", "wall": "137767"} +[2022-08-02 15:45:19,598][train_inner][INFO] - {"epoch": 9, "update": 8.203, "loss": "2.211", "ppl": "4.63", "wps": "363881", "ups": "3.08", "wpb": "118216", "bsz": "256", "num_updates": "422200", "lr": "0.000583636", "gnorm": "0.669", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "137832"} +[2022-08-02 15:46:24,886][train_inner][INFO] - {"epoch": 9, "update": 8.207, "loss": "2.207", "ppl": "4.62", "wps": "363051", "ups": "3.06", "wpb": "118514", "bsz": "256", "num_updates": "422400", "lr": "0.000583434", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.1", "wall": "137897"} +[2022-08-02 15:47:29,961][train_inner][INFO] - {"epoch": 9, "update": 8.211, "loss": "2.205", "ppl": "4.61", "wps": "362130", "ups": "3.07", "wpb": "117825", "bsz": "256", "num_updates": "422600", "lr": "0.000583232", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "137962"} +[2022-08-02 15:48:35,147][train_inner][INFO] - {"epoch": 9, "update": 8.215, "loss": "2.206", "ppl": "4.62", "wps": "361756", "ups": "3.07", "wpb": "117906", "bsz": "256", "num_updates": "422800", "lr": "0.00058303", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "138027"} +[2022-08-02 15:49:40,107][train_inner][INFO] - {"epoch": 9, "update": 8.219, "loss": "2.211", "ppl": "4.63", "wps": "362248", "ups": "3.08", "wpb": "117656", "bsz": "256", "num_updates": "423000", "lr": "0.000582828", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "138092"} +[2022-08-02 15:50:44,911][train_inner][INFO] - {"epoch": 9, "update": 8.223, "loss": "2.203", "ppl": "4.6", "wps": "364414", "ups": "3.09", "wpb": "118076", "bsz": "255.9", "num_updates": "423200", "lr": "0.000582626", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "138157"} +[2022-08-02 15:51:49,933][train_inner][INFO] - {"epoch": 9, "update": 8.227, "loss": "2.206", "ppl": "4.61", "wps": "364549", "ups": "3.08", "wpb": "118517", "bsz": "256", "num_updates": "423400", "lr": "0.000582424", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "138222"} +[2022-08-02 15:52:54,751][train_inner][INFO] - {"epoch": 9, "update": 8.23, "loss": "2.203", "ppl": "4.61", "wps": "366127", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "423600", "lr": "0.000582222", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.9", "wall": "138287"} +[2022-08-02 15:53:59,886][train_inner][INFO] - {"epoch": 9, "update": 8.234, "loss": "2.202", "ppl": "4.6", "wps": "365430", "ups": "3.07", "wpb": "119010", "bsz": "256", "num_updates": "423800", "lr": "0.00058202", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "138352"} +[2022-08-02 15:55:04,717][train_inner][INFO] - {"epoch": 9, "update": 8.238, "loss": "2.213", "ppl": "4.64", "wps": "363095", "ups": "3.08", "wpb": "117697", "bsz": "256", "num_updates": "424000", "lr": "0.000581818", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "138417"} +[2022-08-02 15:56:10,507][train_inner][INFO] - {"epoch": 9, "update": 8.242, "loss": "2.207", "ppl": "4.62", "wps": "361308", "ups": "3.04", "wpb": "118852", "bsz": "256", "num_updates": "424200", "lr": "0.000581616", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "138483"} +[2022-08-02 15:57:15,093][train_inner][INFO] - {"epoch": 9, "update": 8.246, "loss": "2.201", "ppl": "4.6", "wps": "364664", "ups": "3.1", "wpb": "117758", "bsz": "256", "num_updates": "424400", "lr": "0.000581414", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "138547"} +[2022-08-02 15:58:19,475][train_inner][INFO] - {"epoch": 9, "update": 8.25, "loss": "2.213", "ppl": "4.64", "wps": "367937", "ups": "3.11", "wpb": "118442", "bsz": "256", "num_updates": "424600", "lr": "0.000581212", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "138612"} +[2022-08-02 15:59:23,912][train_inner][INFO] - {"epoch": 9, "update": 8.254, "loss": "2.201", "ppl": "4.6", "wps": "365440", "ups": "3.1", "wpb": "117737", "bsz": "256", "num_updates": "424800", "lr": "0.00058101", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.4", "wall": "138676"} +[2022-08-02 16:00:28,472][train_inner][INFO] - {"epoch": 9, "update": 8.258, "loss": "2.204", "ppl": "4.61", "wps": "366665", "ups": "3.1", "wpb": "118357", "bsz": "256", "num_updates": "425000", "lr": "0.000580808", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "138741"} +[2022-08-02 16:01:33,386][train_inner][INFO] - {"epoch": 9, "update": 8.261, "loss": "2.209", "ppl": "4.62", "wps": "365329", "ups": "3.08", "wpb": "118574", "bsz": "256", "num_updates": "425200", "lr": "0.000580606", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.7", "wall": "138806"} +[2022-08-02 16:02:37,504][train_inner][INFO] - {"epoch": 9, "update": 8.265, "loss": "2.202", "ppl": "4.6", "wps": "368884", "ups": "3.12", "wpb": "118258", "bsz": "256", "num_updates": "425400", "lr": "0.000580404", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "138870"} +[2022-08-02 16:03:42,250][train_inner][INFO] - {"epoch": 9, "update": 8.269, "loss": "2.21", "ppl": "4.63", "wps": "365366", "ups": "3.09", "wpb": "118280", "bsz": "256", "num_updates": "425600", "lr": "0.000580202", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "138935"} +[2022-08-02 16:04:46,808][train_inner][INFO] - {"epoch": 9, "update": 8.273, "loss": "2.205", "ppl": "4.61", "wps": "365050", "ups": "3.1", "wpb": "117833", "bsz": "256", "num_updates": "425800", "lr": "0.00058", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "138999"} +[2022-08-02 16:05:51,746][train_inner][INFO] - {"epoch": 9, "update": 8.277, "loss": "2.204", "ppl": "4.61", "wps": "364847", "ups": "3.08", "wpb": "118460", "bsz": "256", "num_updates": "426000", "lr": "0.000579798", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.1", "wall": "139064"} +[2022-08-02 16:06:56,475][train_inner][INFO] - {"epoch": 9, "update": 8.281, "loss": "2.206", "ppl": "4.61", "wps": "364796", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "426200", "lr": "0.000579596", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "139129"} +[2022-08-02 16:08:01,609][train_inner][INFO] - {"epoch": 9, "update": 8.285, "loss": "2.207", "ppl": "4.62", "wps": "362787", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "426400", "lr": "0.000579394", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "139194"} +[2022-08-02 16:09:06,180][train_inner][INFO] - {"epoch": 9, "update": 8.289, "loss": "2.205", "ppl": "4.61", "wps": "366550", "ups": "3.1", "wpb": "118340", "bsz": "256", "num_updates": "426600", "lr": "0.000579192", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "139259"} +[2022-08-02 16:10:10,862][train_inner][INFO] - {"epoch": 9, "update": 8.293, "loss": "2.206", "ppl": "4.61", "wps": "363886", "ups": "3.09", "wpb": "117683", "bsz": "256", "num_updates": "426800", "lr": "0.00057899", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.2", "wall": "139323"} +[2022-08-02 16:11:15,587][train_inner][INFO] - {"epoch": 9, "update": 8.296, "loss": "2.206", "ppl": "4.61", "wps": "366445", "ups": "3.09", "wpb": "118588", "bsz": "256", "num_updates": "427000", "lr": "0.000578788", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "139388"} +[2022-08-02 16:12:20,284][train_inner][INFO] - {"epoch": 9, "update": 8.3, "loss": "2.204", "ppl": "4.61", "wps": "365513", "ups": "3.09", "wpb": "118237", "bsz": "256", "num_updates": "427200", "lr": "0.000578586", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.1", "wall": "139453"} +[2022-08-02 16:13:25,313][train_inner][INFO] - {"epoch": 9, "update": 8.304, "loss": "2.2", "ppl": "4.59", "wps": "364509", "ups": "3.08", "wpb": "118516", "bsz": "256", "num_updates": "427400", "lr": "0.000578384", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "139518"} +[2022-08-02 16:14:29,918][train_inner][INFO] - {"epoch": 9, "update": 8.308, "loss": "2.206", "ppl": "4.61", "wps": "366807", "ups": "3.1", "wpb": "118486", "bsz": "256", "num_updates": "427600", "lr": "0.000578182", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "139582"} +[2022-08-02 16:15:35,838][train_inner][INFO] - {"epoch": 9, "update": 8.312, "loss": "2.202", "ppl": "4.6", "wps": "358820", "ups": "3.03", "wpb": "118265", "bsz": "256", "num_updates": "427800", "lr": "0.00057798", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.2", "wall": "139648"} +[2022-08-02 16:16:40,731][train_inner][INFO] - {"epoch": 9, "update": 8.316, "loss": "2.196", "ppl": "4.58", "wps": "365120", "ups": "3.08", "wpb": "118466", "bsz": "256", "num_updates": "428000", "lr": "0.000577778", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "139713"} +[2022-08-02 16:17:45,584][train_inner][INFO] - {"epoch": 9, "update": 8.32, "loss": "2.206", "ppl": "4.61", "wps": "365032", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "428200", "lr": "0.000577576", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "139778"} +[2022-08-02 16:18:50,251][train_inner][INFO] - {"epoch": 9, "update": 8.324, "loss": "2.202", "ppl": "4.6", "wps": "364811", "ups": "3.09", "wpb": "117955", "bsz": "256", "num_updates": "428400", "lr": "0.000577374", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "139843"} +[2022-08-02 16:19:54,909][train_inner][INFO] - {"epoch": 9, "update": 8.328, "loss": "2.205", "ppl": "4.61", "wps": "365081", "ups": "3.09", "wpb": "118026", "bsz": "256", "num_updates": "428600", "lr": "0.000577172", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "139907"} +[2022-08-02 16:20:59,738][train_inner][INFO] - {"epoch": 9, "update": 8.331, "loss": "2.205", "ppl": "4.61", "wps": "363333", "ups": "3.09", "wpb": "117771", "bsz": "256", "num_updates": "428800", "lr": "0.00057697", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26", "wall": "139972"} +[2022-08-02 16:22:04,736][train_inner][INFO] - {"epoch": 9, "update": 8.335, "loss": "2.208", "ppl": "4.62", "wps": "363670", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "429000", "lr": "0.000576768", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "140037"} +[2022-08-02 16:23:09,776][train_inner][INFO] - {"epoch": 9, "update": 8.339, "loss": "2.208", "ppl": "4.62", "wps": "361939", "ups": "3.08", "wpb": "117700", "bsz": "256", "num_updates": "429200", "lr": "0.000576566", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "140102"} +[2022-08-02 16:24:14,488][train_inner][INFO] - {"epoch": 9, "update": 8.343, "loss": "2.196", "ppl": "4.58", "wps": "366257", "ups": "3.09", "wpb": "118504", "bsz": "256", "num_updates": "429400", "lr": "0.000576364", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "140167"} +[2022-08-02 16:25:19,366][train_inner][INFO] - {"epoch": 9, "update": 8.347, "loss": "2.209", "ppl": "4.62", "wps": "365042", "ups": "3.08", "wpb": "118415", "bsz": "256", "num_updates": "429600", "lr": "0.000576162", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "140232"} +[2022-08-02 16:26:24,365][train_inner][INFO] - {"epoch": 9, "update": 8.351, "loss": "2.202", "ppl": "4.6", "wps": "363037", "ups": "3.08", "wpb": "117984", "bsz": "255.9", "num_updates": "429800", "lr": "0.00057596", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.8", "wall": "140297"} +[2022-08-02 16:27:29,123][train_inner][INFO] - {"epoch": 9, "update": 8.355, "loss": "2.205", "ppl": "4.61", "wps": "365147", "ups": "3.09", "wpb": "118229", "bsz": "256", "num_updates": "430000", "lr": "0.000575758", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "140361"} +[2022-08-02 16:28:34,245][train_inner][INFO] - {"epoch": 9, "update": 8.359, "loss": "2.203", "ppl": "4.6", "wps": "362782", "ups": "3.07", "wpb": "118124", "bsz": "256", "num_updates": "430200", "lr": "0.000575556", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "140427"} +[2022-08-02 16:29:38,662][train_inner][INFO] - {"epoch": 9, "update": 8.362, "loss": "2.207", "ppl": "4.62", "wps": "365953", "ups": "3.1", "wpb": "117866", "bsz": "256", "num_updates": "430400", "lr": "0.000575354", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "140491"} +[2022-08-02 16:30:43,528][train_inner][INFO] - {"epoch": 9, "update": 8.366, "loss": "2.205", "ppl": "4.61", "wps": "365447", "ups": "3.08", "wpb": "118524", "bsz": "256", "num_updates": "430600", "lr": "0.000575152", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.7", "wall": "140556"} +[2022-08-02 16:31:48,534][train_inner][INFO] - {"epoch": 9, "update": 8.37, "loss": "2.202", "ppl": "4.6", "wps": "363286", "ups": "3.08", "wpb": "118076", "bsz": "256", "num_updates": "430800", "lr": "0.000574949", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "140621"} +[2022-08-02 16:32:53,202][train_inner][INFO] - {"epoch": 9, "update": 8.374, "loss": "2.206", "ppl": "4.61", "wps": "365526", "ups": "3.09", "wpb": "118188", "bsz": "256", "num_updates": "431000", "lr": "0.000574747", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "140686"} +[2022-08-02 16:33:18,622][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 16:33:58,257][train_inner][INFO] - {"epoch": 9, "update": 8.378, "loss": "2.204", "ppl": "4.61", "wps": "363936", "ups": "3.07", "wpb": "118378", "bsz": "256", "num_updates": "431200", "lr": "0.000574545", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "140751"} +[2022-08-02 16:35:03,558][train_inner][INFO] - {"epoch": 9, "update": 8.382, "loss": "2.204", "ppl": "4.61", "wps": "363039", "ups": "3.06", "wpb": "118533", "bsz": "256", "num_updates": "431400", "lr": "0.000574343", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "140816"} +[2022-08-02 16:36:08,100][train_inner][INFO] - {"epoch": 9, "update": 8.386, "loss": "2.202", "ppl": "4.6", "wps": "367070", "ups": "3.1", "wpb": "118455", "bsz": "256", "num_updates": "431600", "lr": "0.000574141", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "140880"} +[2022-08-02 16:37:12,912][train_inner][INFO] - {"epoch": 9, "update": 8.39, "loss": "2.197", "ppl": "4.59", "wps": "366297", "ups": "3.09", "wpb": "118701", "bsz": "256", "num_updates": "431800", "lr": "0.000573939", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "140945"} +[2022-08-02 16:38:17,893][train_inner][INFO] - {"epoch": 9, "update": 8.394, "loss": "2.21", "ppl": "4.63", "wps": "362383", "ups": "3.08", "wpb": "117738", "bsz": "256", "num_updates": "432000", "lr": "0.000573737", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "141010"} +[2022-08-02 16:39:22,598][train_inner][INFO] - {"epoch": 9, "update": 8.397, "loss": "2.202", "ppl": "4.6", "wps": "366667", "ups": "3.09", "wpb": "118624", "bsz": "256", "num_updates": "432200", "lr": "0.000573535", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "141075"} +[2022-08-02 16:40:27,490][train_inner][INFO] - {"epoch": 9, "update": 8.401, "loss": "2.197", "ppl": "4.58", "wps": "363653", "ups": "3.08", "wpb": "117989", "bsz": "256", "num_updates": "432400", "lr": "0.000573333", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "141140"} +[2022-08-02 16:41:32,177][train_inner][INFO] - {"epoch": 9, "update": 8.405, "loss": "2.204", "ppl": "4.61", "wps": "363873", "ups": "3.09", "wpb": "117689", "bsz": "256", "num_updates": "432600", "lr": "0.000573131", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "141204"} +[2022-08-02 16:42:36,643][train_inner][INFO] - {"epoch": 9, "update": 8.409, "loss": "2.203", "ppl": "4.6", "wps": "367303", "ups": "3.1", "wpb": "118390", "bsz": "256", "num_updates": "432800", "lr": "0.000572929", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "141269"} +[2022-08-02 16:43:41,616][train_inner][INFO] - {"epoch": 9, "update": 8.413, "loss": "2.198", "ppl": "4.59", "wps": "364257", "ups": "3.08", "wpb": "118334", "bsz": "256", "num_updates": "433000", "lr": "0.000572727", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "141334"} +[2022-08-02 16:44:46,825][train_inner][INFO] - {"epoch": 9, "update": 8.417, "loss": "2.196", "ppl": "4.58", "wps": "363403", "ups": "3.07", "wpb": "118484", "bsz": "256", "num_updates": "433200", "lr": "0.000572525", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "141399"} +[2022-08-02 16:45:51,744][train_inner][INFO] - {"epoch": 9, "update": 8.421, "loss": "2.202", "ppl": "4.6", "wps": "363652", "ups": "3.08", "wpb": "118036", "bsz": "256", "num_updates": "433400", "lr": "0.000572323", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "141464"} +[2022-08-02 16:46:55,839][train_inner][INFO] - {"epoch": 9, "update": 8.425, "loss": "2.205", "ppl": "4.61", "wps": "368641", "ups": "3.12", "wpb": "118139", "bsz": "256", "num_updates": "433600", "lr": "0.000572121", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "141528"} +[2022-08-02 16:48:00,157][train_inner][INFO] - {"epoch": 9, "update": 8.429, "loss": "2.198", "ppl": "4.59", "wps": "366729", "ups": "3.11", "wpb": "117935", "bsz": "256", "num_updates": "433800", "lr": "0.000571919", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "141592"} +[2022-08-02 16:49:04,521][train_inner][INFO] - {"epoch": 9, "update": 8.432, "loss": "2.195", "ppl": "4.58", "wps": "366125", "ups": "3.11", "wpb": "117825", "bsz": "256", "num_updates": "434000", "lr": "0.000571717", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "141657"} +[2022-08-02 16:50:09,385][train_inner][INFO] - {"epoch": 9, "update": 8.436, "loss": "2.202", "ppl": "4.6", "wps": "364608", "ups": "3.08", "wpb": "118248", "bsz": "256", "num_updates": "434200", "lr": "0.000571515", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "141722"} +[2022-08-02 16:51:13,590][train_inner][INFO] - {"epoch": 9, "update": 8.44, "loss": "2.202", "ppl": "4.6", "wps": "368410", "ups": "3.12", "wpb": "118268", "bsz": "256", "num_updates": "434400", "lr": "0.000571313", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "141786"} +[2022-08-02 16:52:18,724][train_inner][INFO] - {"epoch": 9, "update": 8.444, "loss": "2.196", "ppl": "4.58", "wps": "363728", "ups": "3.07", "wpb": "118453", "bsz": "256", "num_updates": "434600", "lr": "0.000571111", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "141851"} +[2022-08-02 16:53:23,971][train_inner][INFO] - {"epoch": 9, "update": 8.448, "loss": "2.195", "ppl": "4.58", "wps": "361899", "ups": "3.07", "wpb": "118063", "bsz": "256", "num_updates": "434800", "lr": "0.000570909", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25", "wall": "141916"} +[2022-08-02 16:54:29,468][train_inner][INFO] - {"epoch": 9, "update": 8.452, "loss": "2.2", "ppl": "4.59", "wps": "360879", "ups": "3.05", "wpb": "118179", "bsz": "256", "num_updates": "435000", "lr": "0.000570707", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "141982"} +[2022-08-02 16:55:34,309][train_inner][INFO] - {"epoch": 9, "update": 8.456, "loss": "2.197", "ppl": "4.58", "wps": "365632", "ups": "3.08", "wpb": "118538", "bsz": "256", "num_updates": "435200", "lr": "0.000570505", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.8", "wall": "142047"} +[2022-08-02 16:56:39,209][train_inner][INFO] - {"epoch": 9, "update": 8.46, "loss": "2.192", "ppl": "4.57", "wps": "366444", "ups": "3.08", "wpb": "118909", "bsz": "256", "num_updates": "435400", "lr": "0.000570303", "gnorm": "0.664", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "142112"} +[2022-08-02 16:57:44,271][train_inner][INFO] - {"epoch": 9, "update": 8.464, "loss": "2.204", "ppl": "4.61", "wps": "362980", "ups": "3.07", "wpb": "118080", "bsz": "256", "num_updates": "435600", "lr": "0.000570101", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "142177"} +[2022-08-02 16:58:48,942][train_inner][INFO] - {"epoch": 9, "update": 8.467, "loss": "2.195", "ppl": "4.58", "wps": "366853", "ups": "3.09", "wpb": "118623", "bsz": "256", "num_updates": "435800", "lr": "0.000569899", "gnorm": "0.667", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.4", "wall": "142241"} +[2022-08-02 16:59:53,763][train_inner][INFO] - {"epoch": 9, "update": 8.471, "loss": "2.198", "ppl": "4.59", "wps": "365233", "ups": "3.09", "wpb": "118371", "bsz": "256", "num_updates": "436000", "lr": "0.000569697", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "142306"} +[2022-08-02 17:00:58,373][train_inner][INFO] - {"epoch": 9, "update": 8.475, "loss": "2.194", "ppl": "4.58", "wps": "365341", "ups": "3.1", "wpb": "118018", "bsz": "256", "num_updates": "436200", "lr": "0.000569495", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "142371"} +[2022-08-02 17:02:03,779][train_inner][INFO] - {"epoch": 9, "update": 8.479, "loss": "2.2", "ppl": "4.6", "wps": "361522", "ups": "3.06", "wpb": "118227", "bsz": "256", "num_updates": "436400", "lr": "0.000569293", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "142436"} +[2022-08-02 17:03:08,725][train_inner][INFO] - {"epoch": 9, "update": 8.483, "loss": "2.19", "ppl": "4.56", "wps": "365762", "ups": "3.08", "wpb": "118771", "bsz": "256", "num_updates": "436600", "lr": "0.000569091", "gnorm": "0.665", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "142501"} +[2022-08-02 17:03:23,041][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 17:04:13,873][train_inner][INFO] - {"epoch": 9, "update": 8.487, "loss": "2.199", "ppl": "4.59", "wps": "363337", "ups": "3.07", "wpb": "118353", "bsz": "256", "num_updates": "436800", "lr": "0.000568889", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "142566"} +[2022-08-02 17:05:18,100][train_inner][INFO] - {"epoch": 9, "update": 8.491, "loss": "2.202", "ppl": "4.6", "wps": "368161", "ups": "3.11", "wpb": "118228", "bsz": "256", "num_updates": "437000", "lr": "0.000568687", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "142630"} +[2022-08-02 17:06:23,051][train_inner][INFO] - {"epoch": 9, "update": 8.495, "loss": "2.195", "ppl": "4.58", "wps": "364475", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "437200", "lr": "0.000568485", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "142695"} +[2022-08-02 17:07:27,935][train_inner][INFO] - {"epoch": 9, "update": 8.498, "loss": "2.195", "ppl": "4.58", "wps": "364828", "ups": "3.08", "wpb": "118355", "bsz": "256", "num_updates": "437400", "lr": "0.000568283", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.6", "wall": "142760"} +[2022-08-02 17:08:32,552][train_inner][INFO] - {"epoch": 9, "update": 8.502, "loss": "2.196", "ppl": "4.58", "wps": "365648", "ups": "3.1", "wpb": "118134", "bsz": "256", "num_updates": "437600", "lr": "0.000568081", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "142825"} +[2022-08-02 17:09:37,460][train_inner][INFO] - {"epoch": 9, "update": 8.506, "loss": "2.203", "ppl": "4.6", "wps": "364211", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "437800", "lr": "0.000567879", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "142890"} +[2022-08-02 17:10:42,133][train_inner][INFO] - {"epoch": 9, "update": 8.51, "loss": "2.195", "ppl": "4.58", "wps": "365994", "ups": "3.09", "wpb": "118350", "bsz": "256", "num_updates": "438000", "lr": "0.000567677", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "142954"} +[2022-08-02 17:11:47,003][train_inner][INFO] - {"epoch": 9, "update": 8.514, "loss": "2.197", "ppl": "4.59", "wps": "362776", "ups": "3.08", "wpb": "117663", "bsz": "256", "num_updates": "438200", "lr": "0.000567475", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "143019"} +[2022-08-02 17:12:51,757][train_inner][INFO] - {"epoch": 9, "update": 8.518, "loss": "2.192", "ppl": "4.57", "wps": "366338", "ups": "3.09", "wpb": "118609", "bsz": "256", "num_updates": "438400", "lr": "0.000567273", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "143084"} +[2022-08-02 17:13:56,367][train_inner][INFO] - {"epoch": 9, "update": 8.522, "loss": "2.198", "ppl": "4.59", "wps": "364919", "ups": "3.1", "wpb": "117884", "bsz": "256", "num_updates": "438600", "lr": "0.000567071", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "143149"} +[2022-08-02 17:15:01,174][train_inner][INFO] - {"epoch": 9, "update": 8.526, "loss": "2.198", "ppl": "4.59", "wps": "365515", "ups": "3.09", "wpb": "118438", "bsz": "256", "num_updates": "438800", "lr": "0.000566869", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.3", "wall": "143213"} +[2022-08-02 17:16:01,236][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 17:16:06,095][train_inner][INFO] - {"epoch": 9, "update": 8.53, "loss": "2.198", "ppl": "4.59", "wps": "363680", "ups": "3.08", "wpb": "118051", "bsz": "256", "num_updates": "439000", "lr": "0.000566667", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "143278"} +[2022-08-02 17:16:37,227][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 17:17:11,290][train_inner][INFO] - {"epoch": 9, "update": 8.533, "loss": "2.191", "ppl": "4.57", "wps": "362863", "ups": "3.07", "wpb": "118284", "bsz": "256", "num_updates": "439200", "lr": "0.000566465", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "143344"} +[2022-08-02 17:18:16,569][train_inner][INFO] - {"epoch": 9, "update": 8.537, "loss": "2.198", "ppl": "4.59", "wps": "362206", "ups": "3.06", "wpb": "118221", "bsz": "256", "num_updates": "439400", "lr": "0.000566263", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "143409"} +[2022-08-02 17:19:21,430][train_inner][INFO] - {"epoch": 9, "update": 8.541, "loss": "2.192", "ppl": "4.57", "wps": "365726", "ups": "3.08", "wpb": "118605", "bsz": "256", "num_updates": "439600", "lr": "0.000566061", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "143474"} +[2022-08-02 17:20:26,242][train_inner][INFO] - {"epoch": 9, "update": 8.545, "loss": "2.197", "ppl": "4.59", "wps": "366008", "ups": "3.09", "wpb": "118608", "bsz": "256", "num_updates": "439800", "lr": "0.000565859", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "143539"} +[2022-08-02 17:21:31,136][train_inner][INFO] - {"epoch": 9, "update": 8.549, "loss": "2.194", "ppl": "4.58", "wps": "366285", "ups": "3.08", "wpb": "118846", "bsz": "256", "num_updates": "440000", "lr": "0.000565657", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "143603"} +[2022-08-02 17:22:35,872][train_inner][INFO] - {"epoch": 9, "update": 8.553, "loss": "2.193", "ppl": "4.57", "wps": "367252", "ups": "3.09", "wpb": "118871", "bsz": "256", "num_updates": "440200", "lr": "0.000565455", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24", "wall": "143668"} +[2022-08-02 17:23:40,538][train_inner][INFO] - {"epoch": 9, "update": 8.557, "loss": "2.199", "ppl": "4.59", "wps": "366795", "ups": "3.09", "wpb": "118594", "bsz": "256", "num_updates": "440400", "lr": "0.000565253", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "143733"} +[2022-08-02 17:24:45,222][train_inner][INFO] - {"epoch": 9, "update": 8.561, "loss": "2.195", "ppl": "4.58", "wps": "366785", "ups": "3.09", "wpb": "118625", "bsz": "256", "num_updates": "440600", "lr": "0.000565051", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "143798"} +[2022-08-02 17:25:50,153][train_inner][INFO] - {"epoch": 9, "update": 8.565, "loss": "2.198", "ppl": "4.59", "wps": "364463", "ups": "3.08", "wpb": "118321", "bsz": "256", "num_updates": "440800", "lr": "0.000564848", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "143862"} +[2022-08-02 17:26:54,176][train_inner][INFO] - {"epoch": 9, "update": 8.568, "loss": "2.199", "ppl": "4.59", "wps": "368537", "ups": "3.12", "wpb": "117972", "bsz": "256", "num_updates": "441000", "lr": "0.000564646", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "143926"} +[2022-08-02 17:27:58,976][train_inner][INFO] - {"epoch": 9, "update": 8.572, "loss": "2.187", "ppl": "4.55", "wps": "366763", "ups": "3.09", "wpb": "118831", "bsz": "256", "num_updates": "441200", "lr": "0.000564444", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "143991"} +[2022-08-02 17:29:03,966][train_inner][INFO] - {"epoch": 9, "update": 8.576, "loss": "2.203", "ppl": "4.6", "wps": "363417", "ups": "3.08", "wpb": "118089", "bsz": "256", "num_updates": "441400", "lr": "0.000564242", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "144056"} +[2022-08-02 17:30:08,704][train_inner][INFO] - {"epoch": 9, "update": 8.58, "loss": "2.195", "ppl": "4.58", "wps": "364524", "ups": "3.09", "wpb": "117992", "bsz": "256", "num_updates": "441600", "lr": "0.00056404", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "144121"} +[2022-08-02 17:31:13,487][train_inner][INFO] - {"epoch": 9, "update": 8.584, "loss": "2.197", "ppl": "4.58", "wps": "365785", "ups": "3.09", "wpb": "118482", "bsz": "256", "num_updates": "441800", "lr": "0.000563838", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "144186"} +[2022-08-02 17:32:18,560][train_inner][INFO] - {"epoch": 9, "update": 8.588, "loss": "2.195", "ppl": "4.58", "wps": "361934", "ups": "3.07", "wpb": "117758", "bsz": "256", "num_updates": "442000", "lr": "0.000563636", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.8", "wall": "144251"} +[2022-08-02 17:33:22,915][train_inner][INFO] - {"epoch": 9, "update": 8.592, "loss": "2.2", "ppl": "4.59", "wps": "368250", "ups": "3.11", "wpb": "118492", "bsz": "256", "num_updates": "442200", "lr": "0.000563434", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.9", "wall": "144315"} +[2022-08-02 17:34:27,744][train_inner][INFO] - {"epoch": 9, "update": 8.596, "loss": "2.198", "ppl": "4.59", "wps": "367556", "ups": "3.09", "wpb": "119139", "bsz": "256", "num_updates": "442400", "lr": "0.000563232", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "144380"} +[2022-08-02 17:35:32,824][train_inner][INFO] - {"epoch": 9, "update": 8.6, "loss": "2.201", "ppl": "4.6", "wps": "362919", "ups": "3.07", "wpb": "118091", "bsz": "256", "num_updates": "442600", "lr": "0.00056303", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "144445"} +[2022-08-02 17:36:37,933][train_inner][INFO] - {"epoch": 9, "update": 8.603, "loss": "2.192", "ppl": "4.57", "wps": "363161", "ups": "3.07", "wpb": "118225", "bsz": "256", "num_updates": "442800", "lr": "0.000562828", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "144510"} +[2022-08-02 17:37:43,069][train_inner][INFO] - {"epoch": 9, "update": 8.607, "loss": "2.195", "ppl": "4.58", "wps": "364004", "ups": "3.07", "wpb": "118546", "bsz": "256", "num_updates": "443000", "lr": "0.000562626", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "144575"} +[2022-08-02 17:38:47,882][train_inner][INFO] - {"epoch": 9, "update": 8.611, "loss": "2.191", "ppl": "4.57", "wps": "366596", "ups": "3.09", "wpb": "118798", "bsz": "256", "num_updates": "443200", "lr": "0.000562424", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.5", "wall": "144640"} +[2022-08-02 17:39:52,881][train_inner][INFO] - {"epoch": 9, "update": 8.615, "loss": "2.192", "ppl": "4.57", "wps": "365104", "ups": "3.08", "wpb": "118655", "bsz": "256", "num_updates": "443400", "lr": "0.000562222", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "144705"} +[2022-08-02 17:40:58,095][train_inner][INFO] - {"epoch": 9, "update": 8.619, "loss": "2.189", "ppl": "4.56", "wps": "363833", "ups": "3.07", "wpb": "118633", "bsz": "256", "num_updates": "443600", "lr": "0.00056202", "gnorm": "0.666", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "144770"} +[2022-08-02 17:42:03,393][train_inner][INFO] - {"epoch": 9, "update": 8.623, "loss": "2.186", "ppl": "4.55", "wps": "362448", "ups": "3.06", "wpb": "118335", "bsz": "256", "num_updates": "443800", "lr": "0.000561818", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "144836"} +[2022-08-02 17:42:29,570][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 17:43:08,626][train_inner][INFO] - {"epoch": 9, "update": 8.627, "loss": "2.197", "ppl": "4.58", "wps": "361531", "ups": "3.07", "wpb": "117916", "bsz": "256", "num_updates": "444000", "lr": "0.000561616", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "144901"} +[2022-08-02 17:44:13,457][train_inner][INFO] - {"epoch": 9, "update": 8.631, "loss": "2.189", "ppl": "4.56", "wps": "366414", "ups": "3.08", "wpb": "118775", "bsz": "256", "num_updates": "444200", "lr": "0.000561414", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "144966"} +[2022-08-02 17:45:18,354][train_inner][INFO] - {"epoch": 9, "update": 8.634, "loss": "2.193", "ppl": "4.57", "wps": "363992", "ups": "3.08", "wpb": "118107", "bsz": "256", "num_updates": "444400", "lr": "0.000561212", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "145031"} +[2022-08-02 17:46:23,065][train_inner][INFO] - {"epoch": 9, "update": 8.638, "loss": "2.196", "ppl": "4.58", "wps": "365344", "ups": "3.09", "wpb": "118207", "bsz": "256", "num_updates": "444600", "lr": "0.00056101", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "145095"} +[2022-08-02 17:47:27,778][train_inner][INFO] - {"epoch": 9, "update": 8.642, "loss": "2.198", "ppl": "4.59", "wps": "364979", "ups": "3.09", "wpb": "118093", "bsz": "256", "num_updates": "444800", "lr": "0.000560808", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "145160"} +[2022-08-02 17:48:32,982][train_inner][INFO] - {"epoch": 9, "update": 8.646, "loss": "2.19", "ppl": "4.56", "wps": "363406", "ups": "3.07", "wpb": "118475", "bsz": "256", "num_updates": "445000", "lr": "0.000560606", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "145225"} +[2022-08-02 17:49:38,012][train_inner][INFO] - {"epoch": 9, "update": 8.65, "loss": "2.186", "ppl": "4.55", "wps": "365062", "ups": "3.08", "wpb": "118698", "bsz": "256", "num_updates": "445200", "lr": "0.000560404", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "145290"} +[2022-08-02 17:50:42,761][train_inner][INFO] - {"epoch": 9, "update": 8.654, "loss": "2.191", "ppl": "4.57", "wps": "366842", "ups": "3.09", "wpb": "118762", "bsz": "256", "num_updates": "445400", "lr": "0.000560202", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.6", "wall": "145355"} +[2022-08-02 17:51:47,657][train_inner][INFO] - {"epoch": 9, "update": 8.658, "loss": "2.193", "ppl": "4.57", "wps": "364344", "ups": "3.08", "wpb": "118220", "bsz": "256", "num_updates": "445600", "lr": "0.00056", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "145420"} +[2022-08-02 17:52:52,479][train_inner][INFO] - {"epoch": 9, "update": 8.662, "loss": "2.187", "ppl": "4.55", "wps": "365632", "ups": "3.09", "wpb": "118503", "bsz": "256", "num_updates": "445800", "lr": "0.000559798", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "145485"} +[2022-08-02 17:53:45,817][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 17:53:57,523][train_inner][INFO] - {"epoch": 9, "update": 8.666, "loss": "2.192", "ppl": "4.57", "wps": "364142", "ups": "3.07", "wpb": "118425", "bsz": "256", "num_updates": "446000", "lr": "0.000559596", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "145550"} +[2022-08-02 17:55:02,683][train_inner][INFO] - {"epoch": 9, "update": 8.669, "loss": "2.191", "ppl": "4.57", "wps": "362818", "ups": "3.07", "wpb": "118206", "bsz": "256", "num_updates": "446200", "lr": "0.000559394", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "145615"} +[2022-08-02 17:56:07,715][train_inner][INFO] - {"epoch": 9, "update": 8.673, "loss": "2.191", "ppl": "4.57", "wps": "364183", "ups": "3.08", "wpb": "118415", "bsz": "256", "num_updates": "446400", "lr": "0.000559192", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.7", "wall": "145680"} +[2022-08-02 17:57:12,413][train_inner][INFO] - {"epoch": 9, "update": 8.677, "loss": "2.192", "ppl": "4.57", "wps": "366216", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "446600", "lr": "0.00055899", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "145745"} +[2022-08-02 17:58:17,433][train_inner][INFO] - {"epoch": 9, "update": 8.681, "loss": "2.187", "ppl": "4.55", "wps": "363005", "ups": "3.08", "wpb": "118011", "bsz": "256", "num_updates": "446800", "lr": "0.000558788", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "145810"} +[2022-08-02 17:59:22,259][train_inner][INFO] - {"epoch": 9, "update": 8.685, "loss": "2.195", "ppl": "4.58", "wps": "365566", "ups": "3.09", "wpb": "118489", "bsz": "256", "num_updates": "447000", "lr": "0.000558586", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26", "wall": "145875"} +[2022-08-02 18:00:27,476][train_inner][INFO] - {"epoch": 9, "update": 8.689, "loss": "2.192", "ppl": "4.57", "wps": "363347", "ups": "3.07", "wpb": "118481", "bsz": "256", "num_updates": "447200", "lr": "0.000558384", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "145940"} +[2022-08-02 18:01:32,708][train_inner][INFO] - {"epoch": 9, "update": 8.693, "loss": "2.196", "ppl": "4.58", "wps": "361309", "ups": "3.07", "wpb": "117843", "bsz": "256", "num_updates": "447400", "lr": "0.000558182", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.2", "wall": "146005"} +[2022-08-02 18:02:37,745][train_inner][INFO] - {"epoch": 9, "update": 8.697, "loss": "2.193", "ppl": "4.57", "wps": "362755", "ups": "3.08", "wpb": "117961", "bsz": "256", "num_updates": "447600", "lr": "0.00055798", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "146070"} +[2022-08-02 18:03:42,693][train_inner][INFO] - {"epoch": 9, "update": 8.701, "loss": "2.192", "ppl": "4.57", "wps": "365739", "ups": "3.08", "wpb": "118768", "bsz": "256", "num_updates": "447800", "lr": "0.000557778", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "146135"} +[2022-08-02 18:04:47,603][train_inner][INFO] - {"epoch": 9, "update": 8.704, "loss": "2.188", "ppl": "4.56", "wps": "364510", "ups": "3.08", "wpb": "118299", "bsz": "256", "num_updates": "448000", "lr": "0.000557576", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "146200"} +[2022-08-02 18:05:52,394][train_inner][INFO] - {"epoch": 9, "update": 8.708, "loss": "2.189", "ppl": "4.56", "wps": "364077", "ups": "3.09", "wpb": "117943", "bsz": "256", "num_updates": "448200", "lr": "0.000557374", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.2", "wall": "146265"} +[2022-08-02 18:06:57,544][train_inner][INFO] - {"epoch": 9, "update": 8.712, "loss": "2.188", "ppl": "4.56", "wps": "363553", "ups": "3.07", "wpb": "118426", "bsz": "256", "num_updates": "448400", "lr": "0.000557172", "gnorm": "0.667", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.8", "wall": "146330"} +[2022-08-02 18:07:57,977][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 18:08:02,553][train_inner][INFO] - {"epoch": 9, "update": 8.716, "loss": "2.191", "ppl": "4.57", "wps": "362777", "ups": "3.08", "wpb": "117917", "bsz": "256", "num_updates": "448600", "lr": "0.00055697", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "146395"} +[2022-08-02 18:09:07,525][train_inner][INFO] - {"epoch": 9, "update": 8.72, "loss": "2.19", "ppl": "4.56", "wps": "364387", "ups": "3.08", "wpb": "118374", "bsz": "256", "num_updates": "448800", "lr": "0.000556768", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.7", "wall": "146460"} +[2022-08-02 18:10:12,343][train_inner][INFO] - {"epoch": 9, "update": 8.724, "loss": "2.183", "ppl": "4.54", "wps": "366297", "ups": "3.09", "wpb": "118711", "bsz": "256", "num_updates": "449000", "lr": "0.000556566", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "146525"} +[2022-08-02 18:11:17,310][train_inner][INFO] - {"epoch": 9, "update": 8.728, "loss": "2.188", "ppl": "4.56", "wps": "365667", "ups": "3.08", "wpb": "118779", "bsz": "256", "num_updates": "449200", "lr": "0.000556364", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "146590"} +[2022-08-02 18:12:22,378][train_inner][INFO] - {"epoch": 9, "update": 8.732, "loss": "2.192", "ppl": "4.57", "wps": "364680", "ups": "3.07", "wpb": "118642", "bsz": "256", "num_updates": "449400", "lr": "0.000556162", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "146655"} +[2022-08-02 18:13:27,045][train_inner][INFO] - {"epoch": 9, "update": 8.736, "loss": "2.188", "ppl": "4.56", "wps": "365980", "ups": "3.09", "wpb": "118333", "bsz": "256", "num_updates": "449600", "lr": "0.00055596", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "146719"} +[2022-08-02 18:14:31,886][train_inner][INFO] - {"epoch": 9, "update": 8.739, "loss": "2.189", "ppl": "4.56", "wps": "365841", "ups": "3.08", "wpb": "118606", "bsz": "256", "num_updates": "449800", "lr": "0.000555758", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "146784"} +[2022-08-02 18:15:36,652][train_inner][INFO] - {"epoch": 9, "update": 8.743, "loss": "2.189", "ppl": "4.56", "wps": "365496", "ups": "3.09", "wpb": "118356", "bsz": "256", "num_updates": "450000", "lr": "0.000555556", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "146849"} +[2022-08-02 18:15:36,653][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 18:15:59,444][valid][INFO] - {"epoch": 9, "valid_loss": "2.094", "valid_ppl": "4.27", "valid_wps": "1.57319e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "450000", "valid_best_loss": "2.094"} +[2022-08-02 18:15:59,448][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 9 @ 450000 updates +[2022-08-02 18:15:59,449][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_9_450000.pt +[2022-08-02 18:16:09,444][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_9_450000.pt +[2022-08-02 18:16:41,724][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_9_450000.pt (epoch 9 @ 450000 updates, score 2.094) (writing took 42.27624590694904 seconds) +[2022-08-02 18:17:46,958][train_inner][INFO] - {"epoch": 9, "update": 8.747, "loss": "2.187", "ppl": "4.55", "wps": "181827", "ups": "1.53", "wpb": "118465", "bsz": "256", "num_updates": "450200", "lr": "0.000555354", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "146979"} +[2022-08-02 18:18:52,216][train_inner][INFO] - {"epoch": 9, "update": 8.751, "loss": "2.194", "ppl": "4.58", "wps": "360156", "ups": "3.06", "wpb": "117513", "bsz": "256", "num_updates": "450400", "lr": "0.000555152", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "147045"} +[2022-08-02 18:19:56,863][train_inner][INFO] - {"epoch": 9, "update": 8.755, "loss": "2.194", "ppl": "4.58", "wps": "365883", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "450600", "lr": "0.000554949", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "147109"} +[2022-08-02 18:21:01,963][train_inner][INFO] - {"epoch": 9, "update": 8.759, "loss": "2.188", "ppl": "4.56", "wps": "363512", "ups": "3.07", "wpb": "118320", "bsz": "256", "num_updates": "450800", "lr": "0.000554747", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.9", "wall": "147174"} +[2022-08-02 18:22:06,567][train_inner][INFO] - {"epoch": 9, "update": 8.763, "loss": "2.191", "ppl": "4.57", "wps": "363690", "ups": "3.1", "wpb": "117477", "bsz": "256", "num_updates": "451000", "lr": "0.000554545", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "147239"} +[2022-08-02 18:23:11,543][train_inner][INFO] - {"epoch": 9, "update": 8.767, "loss": "2.195", "ppl": "4.58", "wps": "363973", "ups": "3.08", "wpb": "118247", "bsz": "256", "num_updates": "451200", "lr": "0.000554343", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "147304"} +[2022-08-02 18:24:16,455][train_inner][INFO] - {"epoch": 9, "update": 8.77, "loss": "2.188", "ppl": "4.56", "wps": "363717", "ups": "3.08", "wpb": "118045", "bsz": "256", "num_updates": "451400", "lr": "0.000554141", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.8", "wall": "147369"} +[2022-08-02 18:25:14,947][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 18:25:21,883][train_inner][INFO] - {"epoch": 9, "update": 8.774, "loss": "2.187", "ppl": "4.55", "wps": "362122", "ups": "3.06", "wpb": "118464", "bsz": "256", "num_updates": "451600", "lr": "0.000553939", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "147434"} +[2022-08-02 18:26:27,021][train_inner][INFO] - {"epoch": 9, "update": 8.778, "loss": "2.193", "ppl": "4.57", "wps": "362194", "ups": "3.07", "wpb": "117961", "bsz": "256", "num_updates": "451800", "lr": "0.000553737", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "147499"} +[2022-08-02 18:27:32,279][train_inner][INFO] - {"epoch": 9, "update": 8.782, "loss": "2.192", "ppl": "4.57", "wps": "364140", "ups": "3.06", "wpb": "118814", "bsz": "256", "num_updates": "452000", "lr": "0.000553535", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "147565"} +[2022-08-02 18:28:37,436][train_inner][INFO] - {"epoch": 9, "update": 8.786, "loss": "2.187", "ppl": "4.55", "wps": "362593", "ups": "3.07", "wpb": "118125", "bsz": "256", "num_updates": "452200", "lr": "0.000553333", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "147630"} +[2022-08-02 18:29:42,501][train_inner][INFO] - {"epoch": 9, "update": 8.79, "loss": "2.191", "ppl": "4.57", "wps": "365563", "ups": "3.07", "wpb": "118926", "bsz": "256", "num_updates": "452400", "lr": "0.000553131", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "147695"} +[2022-08-02 18:30:47,406][train_inner][INFO] - {"epoch": 9, "update": 8.794, "loss": "2.192", "ppl": "4.57", "wps": "364174", "ups": "3.08", "wpb": "118180", "bsz": "256", "num_updates": "452600", "lr": "0.000552929", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "147760"} +[2022-08-02 18:31:52,110][train_inner][INFO] - {"epoch": 9, "update": 8.798, "loss": "2.194", "ppl": "4.57", "wps": "364980", "ups": "3.09", "wpb": "118077", "bsz": "256", "num_updates": "452800", "lr": "0.000552727", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "147824"} +[2022-08-02 18:32:57,293][train_inner][INFO] - {"epoch": 9, "update": 8.802, "loss": "2.19", "ppl": "4.56", "wps": "364109", "ups": "3.07", "wpb": "118667", "bsz": "256", "num_updates": "453000", "lr": "0.000552525", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "147890"} +[2022-08-02 18:34:02,253][train_inner][INFO] - {"epoch": 9, "update": 8.805, "loss": "2.191", "ppl": "4.57", "wps": "365222", "ups": "3.08", "wpb": "118622", "bsz": "256", "num_updates": "453200", "lr": "0.000552323", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "147955"} +[2022-08-02 18:35:07,248][train_inner][INFO] - {"epoch": 9, "update": 8.809, "loss": "2.186", "ppl": "4.55", "wps": "365246", "ups": "3.08", "wpb": "118695", "bsz": "256", "num_updates": "453400", "lr": "0.000552121", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "148020"} +[2022-08-02 18:36:12,276][train_inner][INFO] - {"epoch": 9, "update": 8.813, "loss": "2.186", "ppl": "4.55", "wps": "365020", "ups": "3.08", "wpb": "118681", "bsz": "256", "num_updates": "453600", "lr": "0.000551919", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "148085"} +[2022-08-02 18:37:17,274][train_inner][INFO] - {"epoch": 9, "update": 8.817, "loss": "2.188", "ppl": "4.56", "wps": "364598", "ups": "3.08", "wpb": "118487", "bsz": "256", "num_updates": "453800", "lr": "0.000551717", "gnorm": "0.666", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "148150"} +[2022-08-02 18:38:22,157][train_inner][INFO] - {"epoch": 9, "update": 8.821, "loss": "2.186", "ppl": "4.55", "wps": "365543", "ups": "3.08", "wpb": "118587", "bsz": "256", "num_updates": "454000", "lr": "0.000551515", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "148214"} +[2022-08-02 18:39:27,156][train_inner][INFO] - {"epoch": 9, "update": 8.825, "loss": "2.187", "ppl": "4.55", "wps": "365115", "ups": "3.08", "wpb": "118659", "bsz": "256", "num_updates": "454200", "lr": "0.000551313", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26", "wall": "148279"} +[2022-08-02 18:40:30,305][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 18:40:32,565][train_inner][INFO] - {"epoch": 9, "update": 8.829, "loss": "2.184", "ppl": "4.54", "wps": "362341", "ups": "3.06", "wpb": "118499", "bsz": "256", "num_updates": "454400", "lr": "0.000551111", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.2", "wall": "148345"} +[2022-08-02 18:41:38,378][train_inner][INFO] - {"epoch": 9, "update": 8.833, "loss": "2.189", "ppl": "4.56", "wps": "357292", "ups": "3.04", "wpb": "117571", "bsz": "256", "num_updates": "454600", "lr": "0.000550909", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "148411"} +[2022-08-02 18:42:43,399][train_inner][INFO] - {"epoch": 9, "update": 8.837, "loss": "2.186", "ppl": "4.55", "wps": "363739", "ups": "3.08", "wpb": "118252", "bsz": "256", "num_updates": "454800", "lr": "0.000550707", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "148476"} +[2022-08-02 18:43:48,383][train_inner][INFO] - {"epoch": 9, "update": 8.84, "loss": "2.183", "ppl": "4.54", "wps": "366151", "ups": "3.08", "wpb": "118968", "bsz": "256", "num_updates": "455000", "lr": "0.000550505", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "148541"} +[2022-08-02 18:44:53,361][train_inner][INFO] - {"epoch": 9, "update": 8.844, "loss": "2.186", "ppl": "4.55", "wps": "362759", "ups": "3.08", "wpb": "117855", "bsz": "256", "num_updates": "455200", "lr": "0.000550303", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "148606"} +[2022-08-02 18:45:18,414][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 18:45:20,692][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 18:45:21,288][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-02 18:45:21,584][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-02 18:45:21,882][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 18:45:22,496][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 +[2022-08-02 18:45:59,965][train_inner][INFO] - {"epoch": 9, "update": 8.848, "loss": "2.199", "ppl": "4.59", "wps": "353759", "ups": "3", "wpb": "117806", "bsz": "256", "num_updates": "455400", "lr": "0.000550101", "gnorm": "0.892", "clip": "1", "loss_scale": "0.25", "train_wall": "66", "gb_free": "19.7", "wall": "148672"} +[2022-08-02 18:47:04,963][train_inner][INFO] - {"epoch": 9, "update": 8.852, "loss": "2.188", "ppl": "4.56", "wps": "365390", "ups": "3.08", "wpb": "118748", "bsz": "256", "num_updates": "455600", "lr": "0.000549899", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.1", "wall": "148737"} +[2022-08-02 18:48:09,507][train_inner][INFO] - {"epoch": 9, "update": 8.856, "loss": "2.19", "ppl": "4.56", "wps": "366192", "ups": "3.1", "wpb": "118175", "bsz": "256", "num_updates": "455800", "lr": "0.000549697", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.8", "wall": "148802"} +[2022-08-02 18:49:14,696][train_inner][INFO] - {"epoch": 9, "update": 8.86, "loss": "2.183", "ppl": "4.54", "wps": "365650", "ups": "3.07", "wpb": "119181", "bsz": "256", "num_updates": "456000", "lr": "0.000549495", "gnorm": "0.666", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "148867"} +[2022-08-02 18:50:19,729][train_inner][INFO] - {"epoch": 9, "update": 8.864, "loss": "2.189", "ppl": "4.56", "wps": "362411", "ups": "3.08", "wpb": "117842", "bsz": "256", "num_updates": "456200", "lr": "0.000549293", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "148932"} +[2022-08-02 18:51:24,398][train_inner][INFO] - {"epoch": 9, "update": 8.868, "loss": "2.189", "ppl": "4.56", "wps": "364045", "ups": "3.09", "wpb": "117709", "bsz": "256", "num_updates": "456400", "lr": "0.000549091", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.8", "wall": "148997"} +[2022-08-02 18:52:29,083][train_inner][INFO] - {"epoch": 9, "update": 8.872, "loss": "2.187", "ppl": "4.55", "wps": "364687", "ups": "3.09", "wpb": "117947", "bsz": "256", "num_updates": "456600", "lr": "0.000548889", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21", "wall": "149061"} +[2022-08-02 18:53:33,940][train_inner][INFO] - {"epoch": 9, "update": 8.876, "loss": "2.193", "ppl": "4.57", "wps": "363080", "ups": "3.08", "wpb": "117741", "bsz": "256", "num_updates": "456800", "lr": "0.000548687", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.4", "wall": "149126"} +[2022-08-02 18:54:38,889][train_inner][INFO] - {"epoch": 9, "update": 8.879, "loss": "2.192", "ppl": "4.57", "wps": "362149", "ups": "3.08", "wpb": "117603", "bsz": "256", "num_updates": "457000", "lr": "0.000548485", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.9", "wall": "149191"} +[2022-08-02 18:55:43,711][train_inner][INFO] - {"epoch": 9, "update": 8.883, "loss": "2.191", "ppl": "4.57", "wps": "365443", "ups": "3.09", "wpb": "118442", "bsz": "256", "num_updates": "457200", "lr": "0.000548283", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.1", "wall": "149256"} +[2022-08-02 18:56:48,883][train_inner][INFO] - {"epoch": 9, "update": 8.887, "loss": "2.186", "ppl": "4.55", "wps": "365561", "ups": "3.07", "wpb": "119121", "bsz": "256", "num_updates": "457400", "lr": "0.000548081", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "149321"} +[2022-08-02 18:57:53,620][train_inner][INFO] - {"epoch": 9, "update": 8.891, "loss": "2.186", "ppl": "4.55", "wps": "366554", "ups": "3.09", "wpb": "118646", "bsz": "256", "num_updates": "457600", "lr": "0.000547879", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "28.2", "wall": "149386"} +[2022-08-02 18:58:58,705][train_inner][INFO] - {"epoch": 9, "update": 8.895, "loss": "2.186", "ppl": "4.55", "wps": "365355", "ups": "3.07", "wpb": "118893", "bsz": "256", "num_updates": "457800", "lr": "0.000547677", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.9", "wall": "149451"} +[2022-08-02 19:00:03,616][train_inner][INFO] - {"epoch": 9, "update": 8.899, "loss": "2.182", "ppl": "4.54", "wps": "366000", "ups": "3.08", "wpb": "118786", "bsz": "256", "num_updates": "458000", "lr": "0.000547475", "gnorm": "0.667", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "149516"} +[2022-08-02 19:01:08,222][train_inner][INFO] - {"epoch": 9, "update": 8.903, "loss": "2.183", "ppl": "4.54", "wps": "367086", "ups": "3.1", "wpb": "118578", "bsz": "256", "num_updates": "458200", "lr": "0.000547273", "gnorm": "0.686", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "149581"} +[2022-08-02 19:02:13,400][train_inner][INFO] - {"epoch": 9, "update": 8.907, "loss": "2.184", "ppl": "4.54", "wps": "364189", "ups": "3.07", "wpb": "118684", "bsz": "256", "num_updates": "458400", "lr": "0.000547071", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "149646"} +[2022-08-02 19:03:18,403][train_inner][INFO] - {"epoch": 9, "update": 8.91, "loss": "2.19", "ppl": "4.56", "wps": "363286", "ups": "3.08", "wpb": "118071", "bsz": "256", "num_updates": "458600", "lr": "0.000546869", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.6", "wall": "149711"} +[2022-08-02 19:04:23,271][train_inner][INFO] - {"epoch": 9, "update": 8.914, "loss": "2.183", "ppl": "4.54", "wps": "365341", "ups": "3.08", "wpb": "118493", "bsz": "256", "num_updates": "458800", "lr": "0.000546667", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "149776"} +[2022-08-02 19:05:28,213][train_inner][INFO] - {"epoch": 9, "update": 8.918, "loss": "2.19", "ppl": "4.56", "wps": "363567", "ups": "3.08", "wpb": "118052", "bsz": "256", "num_updates": "459000", "lr": "0.000546465", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "149841"} +[2022-08-02 19:06:33,235][train_inner][INFO] - {"epoch": 9, "update": 8.922, "loss": "2.185", "ppl": "4.55", "wps": "363890", "ups": "3.08", "wpb": "118303", "bsz": "256", "num_updates": "459200", "lr": "0.000546263", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "149906"} +[2022-08-02 19:07:38,032][train_inner][INFO] - {"epoch": 9, "update": 8.926, "loss": "2.186", "ppl": "4.55", "wps": "364844", "ups": "3.09", "wpb": "118202", "bsz": "256", "num_updates": "459400", "lr": "0.000546061", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "149970"} +[2022-08-02 19:08:42,559][train_inner][INFO] - {"epoch": 9, "update": 8.93, "loss": "2.189", "ppl": "4.56", "wps": "366175", "ups": "3.1", "wpb": "118140", "bsz": "256", "num_updates": "459600", "lr": "0.000545859", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "150035"} +[2022-08-02 19:09:47,367][train_inner][INFO] - {"epoch": 9, "update": 8.934, "loss": "2.188", "ppl": "4.56", "wps": "363728", "ups": "3.09", "wpb": "117860", "bsz": "256", "num_updates": "459800", "lr": "0.000545657", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "150100"} +[2022-08-02 19:10:52,413][train_inner][INFO] - {"epoch": 9, "update": 8.938, "loss": "2.185", "ppl": "4.55", "wps": "363900", "ups": "3.07", "wpb": "118350", "bsz": "256", "num_updates": "460000", "lr": "0.000545455", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "150165"} +[2022-08-02 19:11:57,098][train_inner][INFO] - {"epoch": 9, "update": 8.942, "loss": "2.186", "ppl": "4.55", "wps": "364085", "ups": "3.09", "wpb": "117751", "bsz": "256", "num_updates": "460200", "lr": "0.000545253", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "150229"} +[2022-08-02 19:13:01,352][train_inner][INFO] - {"epoch": 9, "update": 8.945, "loss": "2.185", "ppl": "4.55", "wps": "367536", "ups": "3.11", "wpb": "118076", "bsz": "256", "num_updates": "460400", "lr": "0.000545051", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.1", "wall": "150294"} +[2022-08-02 19:14:06,306][train_inner][INFO] - {"epoch": 9, "update": 8.949, "loss": "2.193", "ppl": "4.57", "wps": "364202", "ups": "3.08", "wpb": "118273", "bsz": "256", "num_updates": "460600", "lr": "0.000544848", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "150359"} +[2022-08-02 19:15:11,331][train_inner][INFO] - {"epoch": 9, "update": 8.953, "loss": "2.181", "ppl": "4.53", "wps": "364033", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "460800", "lr": "0.000544646", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "150424"} +[2022-08-02 19:15:26,497][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-02 19:16:16,408][train_inner][INFO] - {"epoch": 9, "update": 8.957, "loss": "2.184", "ppl": "4.55", "wps": "362157", "ups": "3.07", "wpb": "117839", "bsz": "256", "num_updates": "461000", "lr": "0.000544444", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22", "wall": "150489"} +[2022-08-02 19:17:21,128][train_inner][INFO] - {"epoch": 9, "update": 8.961, "loss": "2.191", "ppl": "4.56", "wps": "366157", "ups": "3.09", "wpb": "118488", "bsz": "256", "num_updates": "461200", "lr": "0.000544242", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "28.2", "wall": "150553"} +[2022-08-02 19:18:26,087][train_inner][INFO] - {"epoch": 9, "update": 8.965, "loss": "2.187", "ppl": "4.56", "wps": "363577", "ups": "3.08", "wpb": "118086", "bsz": "256", "num_updates": "461400", "lr": "0.00054404", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.4", "wall": "150618"} +[2022-08-02 19:19:31,012][train_inner][INFO] - {"epoch": 9, "update": 8.969, "loss": "2.186", "ppl": "4.55", "wps": "363182", "ups": "3.08", "wpb": "117895", "bsz": "256", "num_updates": "461600", "lr": "0.000543838", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.3", "wall": "150683"} +[2022-08-02 19:20:36,070][train_inner][INFO] - {"epoch": 9, "update": 8.973, "loss": "2.184", "ppl": "4.54", "wps": "365193", "ups": "3.08", "wpb": "118731", "bsz": "256", "num_updates": "461800", "lr": "0.000543636", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.4", "wall": "150748"} +[2022-08-02 19:21:40,583][train_inner][INFO] - {"epoch": 9, "update": 8.977, "loss": "2.187", "ppl": "4.55", "wps": "364733", "ups": "3.1", "wpb": "117647", "bsz": "256", "num_updates": "462000", "lr": "0.000543434", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.1", "wall": "150813"} +[2022-08-02 19:22:45,569][train_inner][INFO] - {"epoch": 9, "update": 8.98, "loss": "2.182", "ppl": "4.54", "wps": "364947", "ups": "3.08", "wpb": "118581", "bsz": "256", "num_updates": "462200", "lr": "0.000543232", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.3", "wall": "150878"} +[2022-08-02 19:23:50,274][train_inner][INFO] - {"epoch": 9, "update": 8.984, "loss": "2.175", "ppl": "4.52", "wps": "367006", "ups": "3.09", "wpb": "118735", "bsz": "256", "num_updates": "462400", "lr": "0.00054303", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.7", "wall": "150943"} +[2022-08-02 19:24:55,296][train_inner][INFO] - {"epoch": 9, "update": 8.988, "loss": "2.185", "ppl": "4.55", "wps": "363673", "ups": "3.08", "wpb": "118231", "bsz": "256", "num_updates": "462600", "lr": "0.000542828", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.2", "wall": "151008"} +[2022-08-02 19:26:00,168][train_inner][INFO] - {"epoch": 9, "update": 8.992, "loss": "2.191", "ppl": "4.57", "wps": "362679", "ups": "3.08", "wpb": "117637", "bsz": "256", "num_updates": "462800", "lr": "0.000542626", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "151072"} +[2022-08-02 19:27:05,231][train_inner][INFO] - {"epoch": 9, "update": 8.996, "loss": "2.184", "ppl": "4.54", "wps": "364438", "ups": "3.07", "wpb": "118555", "bsz": "256", "num_updates": "463000", "lr": "0.000542424", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "151138"} +[2022-08-02 19:28:10,311][train_inner][INFO] - {"epoch": 9, "update": 9.0, "loss": "2.187", "ppl": "4.55", "wps": "363316", "ups": "3.07", "wpb": "118222", "bsz": "256", "num_updates": "463200", "lr": "0.000542222", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "25.7", "wall": "151203"} +[2022-08-02 19:28:12,983][fairseq_cli.train][INFO] - end of epoch 9 (average epoch stats below) +[2022-08-02 19:28:12,983][train][INFO] - {"epoch": 9, "train_loss": "2.198", "train_ppl": "4.59", "train_wps": "362762", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "463208", "train_lr": "0.000542214", "train_gnorm": "0.672", "train_clip": "0", "train_loss_scale": "1", "train_train_wall": "16614", "train_gb_free": "22.9", "train_wall": "151205"} +[2022-08-02 19:28:13,092][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-02 19:28:13,096][fairseq.trainer][INFO] - begin training epoch 10 +[2022-08-02 19:28:13,096][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-02 19:29:28,946][train_inner][INFO] - {"epoch": 10, "update": 9.004, "loss": "2.179", "ppl": "4.53", "wps": "299898", "ups": "2.54", "wpb": "117910", "bsz": "255.4", "num_updates": "463400", "lr": "0.00054202", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "151281"} +[2022-08-02 19:30:34,137][train_inner][INFO] - {"epoch": 10, "update": 9.008, "loss": "2.176", "ppl": "4.52", "wps": "362740", "ups": "3.07", "wpb": "118236", "bsz": "256", "num_updates": "463600", "lr": "0.000541818", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.8", "wall": "151346"} +[2022-08-02 19:31:38,837][train_inner][INFO] - {"epoch": 10, "update": 9.011, "loss": "2.186", "ppl": "4.55", "wps": "364688", "ups": "3.09", "wpb": "117975", "bsz": "256", "num_updates": "463800", "lr": "0.000541616", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.7", "wall": "151411"} +[2022-08-02 19:32:43,589][train_inner][INFO] - {"epoch": 10, "update": 9.015, "loss": "2.184", "ppl": "4.55", "wps": "364183", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "464000", "lr": "0.000541414", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.1", "wall": "151476"} +[2022-08-02 19:33:48,436][train_inner][INFO] - {"epoch": 10, "update": 9.019, "loss": "2.179", "ppl": "4.53", "wps": "365063", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "464200", "lr": "0.000541212", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "151541"} +[2022-08-02 19:34:53,132][train_inner][INFO] - {"epoch": 10, "update": 9.023, "loss": "2.185", "ppl": "4.55", "wps": "364496", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "464400", "lr": "0.00054101", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.6", "wall": "151605"} +[2022-08-02 19:35:58,030][train_inner][INFO] - {"epoch": 10, "update": 9.027, "loss": "2.179", "ppl": "4.53", "wps": "362974", "ups": "3.08", "wpb": "117780", "bsz": "256", "num_updates": "464600", "lr": "0.000540808", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "151670"} +[2022-08-02 19:37:02,945][train_inner][INFO] - {"epoch": 10, "update": 9.031, "loss": "2.183", "ppl": "4.54", "wps": "365366", "ups": "3.08", "wpb": "118586", "bsz": "256", "num_updates": "464800", "lr": "0.000540606", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "151735"} +[2022-08-02 19:38:07,526][train_inner][INFO] - {"epoch": 10, "update": 9.035, "loss": "2.18", "ppl": "4.53", "wps": "366724", "ups": "3.1", "wpb": "118415", "bsz": "256", "num_updates": "465000", "lr": "0.000540404", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "151800"} +[2022-08-02 19:39:12,189][train_inner][INFO] - {"epoch": 10, "update": 9.039, "loss": "2.174", "ppl": "4.51", "wps": "365891", "ups": "3.09", "wpb": "118297", "bsz": "256", "num_updates": "465200", "lr": "0.000540202", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "151865"} +[2022-08-02 19:40:16,810][train_inner][INFO] - {"epoch": 10, "update": 9.043, "loss": "2.179", "ppl": "4.53", "wps": "366800", "ups": "3.1", "wpb": "118513", "bsz": "256", "num_updates": "465400", "lr": "0.00054", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "151929"} +[2022-08-02 19:41:21,698][train_inner][INFO] - {"epoch": 10, "update": 9.046, "loss": "2.184", "ppl": "4.54", "wps": "364009", "ups": "3.08", "wpb": "118098", "bsz": "256", "num_updates": "465600", "lr": "0.000539798", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "151994"} +[2022-08-02 19:42:26,557][train_inner][INFO] - {"epoch": 10, "update": 9.05, "loss": "2.175", "ppl": "4.52", "wps": "365812", "ups": "3.08", "wpb": "118629", "bsz": "256", "num_updates": "465800", "lr": "0.000539596", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "152059"} +[2022-08-02 19:43:31,158][train_inner][INFO] - {"epoch": 10, "update": 9.054, "loss": "2.176", "ppl": "4.52", "wps": "367106", "ups": "3.1", "wpb": "118574", "bsz": "256", "num_updates": "466000", "lr": "0.000539394", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "152123"} +[2022-08-02 19:44:36,221][train_inner][INFO] - {"epoch": 10, "update": 9.058, "loss": "2.178", "ppl": "4.53", "wps": "364194", "ups": "3.07", "wpb": "118476", "bsz": "256", "num_updates": "466200", "lr": "0.000539192", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "152189"} +[2022-08-02 19:45:41,425][train_inner][INFO] - {"epoch": 10, "update": 9.062, "loss": "2.183", "ppl": "4.54", "wps": "363231", "ups": "3.07", "wpb": "118420", "bsz": "256", "num_updates": "466400", "lr": "0.00053899", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "152254"} +[2022-08-02 19:46:46,593][train_inner][INFO] - {"epoch": 10, "update": 9.066, "loss": "2.174", "ppl": "4.51", "wps": "364330", "ups": "3.07", "wpb": "118711", "bsz": "256", "num_updates": "466600", "lr": "0.000538788", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.7", "wall": "152319"} +[2022-08-02 19:47:51,211][train_inner][INFO] - {"epoch": 10, "update": 9.07, "loss": "2.18", "ppl": "4.53", "wps": "366549", "ups": "3.1", "wpb": "118427", "bsz": "256", "num_updates": "466800", "lr": "0.000538586", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "152384"} +[2022-08-02 19:48:55,756][train_inner][INFO] - {"epoch": 10, "update": 9.074, "loss": "2.179", "ppl": "4.53", "wps": "366112", "ups": "3.1", "wpb": "118152", "bsz": "256", "num_updates": "467000", "lr": "0.000538384", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "152448"} +[2022-08-02 19:50:00,791][train_inner][INFO] - {"epoch": 10, "update": 9.078, "loss": "2.177", "ppl": "4.52", "wps": "362793", "ups": "3.08", "wpb": "117970", "bsz": "256", "num_updates": "467200", "lr": "0.000538182", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "152513"} +[2022-08-02 19:51:05,618][train_inner][INFO] - {"epoch": 10, "update": 9.081, "loss": "2.178", "ppl": "4.53", "wps": "365818", "ups": "3.09", "wpb": "118571", "bsz": "256", "num_updates": "467400", "lr": "0.00053798", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "152578"} +[2022-08-02 19:52:10,600][train_inner][INFO] - {"epoch": 10, "update": 9.085, "loss": "2.177", "ppl": "4.52", "wps": "364839", "ups": "3.08", "wpb": "118540", "bsz": "256", "num_updates": "467600", "lr": "0.000537778", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "152643"} +[2022-08-02 19:53:16,834][train_inner][INFO] - {"epoch": 10, "update": 9.089, "loss": "2.177", "ppl": "4.52", "wps": "356441", "ups": "3.02", "wpb": "118039", "bsz": "256", "num_updates": "467800", "lr": "0.000537576", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "24.5", "wall": "152709"} +[2022-08-02 19:54:21,926][train_inner][INFO] - {"epoch": 10, "update": 9.093, "loss": "2.183", "ppl": "4.54", "wps": "362982", "ups": "3.07", "wpb": "118136", "bsz": "256", "num_updates": "468000", "lr": "0.000537374", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "152774"} +[2022-08-02 19:55:26,806][train_inner][INFO] - {"epoch": 10, "update": 9.097, "loss": "2.184", "ppl": "4.54", "wps": "362291", "ups": "3.08", "wpb": "117524", "bsz": "256", "num_updates": "468200", "lr": "0.000537172", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "152839"} +[2022-08-02 19:56:31,438][train_inner][INFO] - {"epoch": 10, "update": 9.101, "loss": "2.184", "ppl": "4.54", "wps": "365272", "ups": "3.09", "wpb": "118040", "bsz": "256", "num_updates": "468400", "lr": "0.00053697", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "152904"} +[2022-08-02 19:57:37,307][train_inner][INFO] - {"epoch": 10, "update": 9.105, "loss": "2.179", "ppl": "4.53", "wps": "360618", "ups": "3.04", "wpb": "118766", "bsz": "256", "num_updates": "468600", "lr": "0.000536768", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "152970"} +[2022-08-02 19:58:42,357][train_inner][INFO] - {"epoch": 10, "update": 9.109, "loss": "2.172", "ppl": "4.51", "wps": "365559", "ups": "3.07", "wpb": "118897", "bsz": "256", "num_updates": "468800", "lr": "0.000536566", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "153035"} +[2022-08-02 19:59:47,412][train_inner][INFO] - {"epoch": 10, "update": 9.112, "loss": "2.176", "ppl": "4.52", "wps": "364101", "ups": "3.07", "wpb": "118430", "bsz": "256", "num_updates": "469000", "lr": "0.000536364", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.7", "wall": "153100"} +[2022-08-02 20:00:52,386][train_inner][INFO] - {"epoch": 10, "update": 9.116, "loss": "2.178", "ppl": "4.53", "wps": "366140", "ups": "3.08", "wpb": "118946", "bsz": "256", "num_updates": "469200", "lr": "0.000536162", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "153165"} +[2022-08-02 20:01:57,487][train_inner][INFO] - {"epoch": 10, "update": 9.12, "loss": "2.174", "ppl": "4.51", "wps": "364180", "ups": "3.07", "wpb": "118541", "bsz": "256", "num_updates": "469400", "lr": "0.00053596", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "153230"} +[2022-08-02 20:03:02,345][train_inner][INFO] - {"epoch": 10, "update": 9.124, "loss": "2.18", "ppl": "4.53", "wps": "365958", "ups": "3.08", "wpb": "118674", "bsz": "256", "num_updates": "469600", "lr": "0.000535758", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "153295"} +[2022-08-02 20:04:07,660][train_inner][INFO] - {"epoch": 10, "update": 9.128, "loss": "2.173", "ppl": "4.51", "wps": "365081", "ups": "3.06", "wpb": "119225", "bsz": "256", "num_updates": "469800", "lr": "0.000535556", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "153360"} +[2022-08-02 20:05:12,639][train_inner][INFO] - {"epoch": 10, "update": 9.132, "loss": "2.181", "ppl": "4.53", "wps": "364171", "ups": "3.08", "wpb": "118316", "bsz": "256", "num_updates": "470000", "lr": "0.000535354", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "153425"} +[2022-08-02 20:06:17,714][train_inner][INFO] - {"epoch": 10, "update": 9.136, "loss": "2.175", "ppl": "4.52", "wps": "363071", "ups": "3.07", "wpb": "118132", "bsz": "256", "num_updates": "470200", "lr": "0.000535152", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "153490"} +[2022-08-02 20:07:22,639][train_inner][INFO] - {"epoch": 10, "update": 9.14, "loss": "2.179", "ppl": "4.53", "wps": "365056", "ups": "3.08", "wpb": "118506", "bsz": "256", "num_updates": "470400", "lr": "0.000534949", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "153555"} +[2022-08-02 20:08:27,520][train_inner][INFO] - {"epoch": 10, "update": 9.144, "loss": "2.187", "ppl": "4.55", "wps": "361806", "ups": "3.08", "wpb": "117370", "bsz": "256", "num_updates": "470600", "lr": "0.000534747", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "153620"} +[2022-08-02 20:09:32,478][train_inner][INFO] - {"epoch": 10, "update": 9.147, "loss": "2.183", "ppl": "4.54", "wps": "363280", "ups": "3.08", "wpb": "117987", "bsz": "256", "num_updates": "470800", "lr": "0.000534545", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "153685"} +[2022-08-02 20:10:37,727][train_inner][INFO] - {"epoch": 10, "update": 9.151, "loss": "2.177", "ppl": "4.52", "wps": "363264", "ups": "3.07", "wpb": "118512", "bsz": "256", "num_updates": "471000", "lr": "0.000534343", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "153750"} +[2022-08-02 20:11:43,072][train_inner][INFO] - {"epoch": 10, "update": 9.155, "loss": "2.176", "ppl": "4.52", "wps": "362719", "ups": "3.06", "wpb": "118508", "bsz": "256", "num_updates": "471200", "lr": "0.000534141", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "153815"} +[2022-08-02 20:12:48,139][train_inner][INFO] - {"epoch": 10, "update": 9.159, "loss": "2.18", "ppl": "4.53", "wps": "362933", "ups": "3.07", "wpb": "118073", "bsz": "256", "num_updates": "471400", "lr": "0.000533939", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "153880"} +[2022-08-02 20:13:53,258][train_inner][INFO] - {"epoch": 10, "update": 9.163, "loss": "2.177", "ppl": "4.52", "wps": "364914", "ups": "3.07", "wpb": "118812", "bsz": "256", "num_updates": "471600", "lr": "0.000533737", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "153946"} +[2022-08-02 20:14:57,865][train_inner][INFO] - {"epoch": 10, "update": 9.167, "loss": "2.182", "ppl": "4.54", "wps": "365694", "ups": "3.1", "wpb": "118131", "bsz": "256", "num_updates": "471800", "lr": "0.000533535", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "154010"} +[2022-08-02 20:16:03,731][train_inner][INFO] - {"epoch": 10, "update": 9.171, "loss": "2.182", "ppl": "4.54", "wps": "359119", "ups": "3.04", "wpb": "118266", "bsz": "256", "num_updates": "472000", "lr": "0.000533333", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.7", "wall": "154076"} +[2022-08-02 20:17:08,532][train_inner][INFO] - {"epoch": 10, "update": 9.175, "loss": "2.177", "ppl": "4.52", "wps": "365283", "ups": "3.09", "wpb": "118352", "bsz": "256", "num_updates": "472200", "lr": "0.000533131", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "154141"} +[2022-08-02 20:18:13,427][train_inner][INFO] - {"epoch": 10, "update": 9.179, "loss": "2.173", "ppl": "4.51", "wps": "365496", "ups": "3.08", "wpb": "118594", "bsz": "256", "num_updates": "472400", "lr": "0.000532929", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "154206"} +[2022-08-02 20:19:18,477][train_inner][INFO] - {"epoch": 10, "update": 9.182, "loss": "2.172", "ppl": "4.51", "wps": "365338", "ups": "3.07", "wpb": "118825", "bsz": "256", "num_updates": "472600", "lr": "0.000532727", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "154271"} +[2022-08-02 20:20:23,289][train_inner][INFO] - {"epoch": 10, "update": 9.186, "loss": "2.177", "ppl": "4.52", "wps": "363871", "ups": "3.09", "wpb": "117913", "bsz": "256", "num_updates": "472800", "lr": "0.000532525", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.6", "wall": "154336"} +[2022-08-02 20:21:28,045][train_inner][INFO] - {"epoch": 10, "update": 9.19, "loss": "2.178", "ppl": "4.53", "wps": "365342", "ups": "3.09", "wpb": "118289", "bsz": "256", "num_updates": "473000", "lr": "0.000532323", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "154400"} +[2022-08-02 20:22:33,167][train_inner][INFO] - {"epoch": 10, "update": 9.194, "loss": "2.172", "ppl": "4.51", "wps": "363410", "ups": "3.07", "wpb": "118329", "bsz": "256", "num_updates": "473200", "lr": "0.000532121", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "154465"} +[2022-08-02 20:23:37,990][train_inner][INFO] - {"epoch": 10, "update": 9.198, "loss": "2.182", "ppl": "4.54", "wps": "365000", "ups": "3.09", "wpb": "118300", "bsz": "256", "num_updates": "473400", "lr": "0.000531919", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.5", "wall": "154530"} +[2022-08-02 20:24:42,993][train_inner][INFO] - {"epoch": 10, "update": 9.202, "loss": "2.173", "ppl": "4.51", "wps": "365214", "ups": "3.08", "wpb": "118697", "bsz": "256", "num_updates": "473600", "lr": "0.000531717", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26.1", "wall": "154595"} +[2022-08-02 20:25:49,076][train_inner][INFO] - {"epoch": 10, "update": 9.206, "loss": "2.178", "ppl": "4.53", "wps": "358274", "ups": "3.03", "wpb": "118378", "bsz": "256", "num_updates": "473800", "lr": "0.000531515", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20.2", "wall": "154661"} +[2022-08-02 20:26:53,465][train_inner][INFO] - {"epoch": 10, "update": 9.21, "loss": "2.172", "ppl": "4.51", "wps": "365798", "ups": "3.11", "wpb": "117766", "bsz": "256", "num_updates": "474000", "lr": "0.000531313", "gnorm": "0.676", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "27.6", "wall": "154726"} +[2022-08-02 20:27:10,819][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 20:27:59,047][train_inner][INFO] - {"epoch": 10, "update": 9.214, "loss": "2.175", "ppl": "4.51", "wps": "360239", "ups": "3.05", "wpb": "118123", "bsz": "256", "num_updates": "474200", "lr": "0.000531111", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "154791"} +[2022-08-02 20:29:03,993][train_inner][INFO] - {"epoch": 10, "update": 9.217, "loss": "2.175", "ppl": "4.52", "wps": "364254", "ups": "3.08", "wpb": "118284", "bsz": "256", "num_updates": "474400", "lr": "0.000530909", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "154856"} +[2022-08-02 20:30:08,941][train_inner][INFO] - {"epoch": 10, "update": 9.221, "loss": "2.177", "ppl": "4.52", "wps": "363602", "ups": "3.08", "wpb": "118075", "bsz": "256", "num_updates": "474600", "lr": "0.000530707", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "154921"} +[2022-08-02 20:31:13,741][train_inner][INFO] - {"epoch": 10, "update": 9.225, "loss": "2.177", "ppl": "4.52", "wps": "364915", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "474800", "lr": "0.000530505", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "154986"} +[2022-08-02 20:32:18,960][train_inner][INFO] - {"epoch": 10, "update": 9.229, "loss": "2.175", "ppl": "4.52", "wps": "364065", "ups": "3.07", "wpb": "118717", "bsz": "256", "num_updates": "475000", "lr": "0.000530303", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "155051"} +[2022-08-02 20:33:23,932][train_inner][INFO] - {"epoch": 10, "update": 9.233, "loss": "2.17", "ppl": "4.5", "wps": "366080", "ups": "3.08", "wpb": "118924", "bsz": "256", "num_updates": "475200", "lr": "0.000530101", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "155116"} +[2022-08-02 20:34:28,818][train_inner][INFO] - {"epoch": 10, "update": 9.237, "loss": "2.175", "ppl": "4.52", "wps": "363262", "ups": "3.08", "wpb": "117850", "bsz": "256", "num_updates": "475400", "lr": "0.000529899", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28", "wall": "155181"} +[2022-08-02 20:35:33,803][train_inner][INFO] - {"epoch": 10, "update": 9.241, "loss": "2.172", "ppl": "4.51", "wps": "365241", "ups": "3.08", "wpb": "118675", "bsz": "256", "num_updates": "475600", "lr": "0.000529697", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "155246"} +[2022-08-02 20:36:38,749][train_inner][INFO] - {"epoch": 10, "update": 9.245, "loss": "2.171", "ppl": "4.5", "wps": "364668", "ups": "3.08", "wpb": "118416", "bsz": "256", "num_updates": "475800", "lr": "0.000529495", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "155311"} +[2022-08-02 20:37:43,258][train_inner][INFO] - {"epoch": 10, "update": 9.248, "loss": "2.171", "ppl": "4.5", "wps": "367541", "ups": "3.1", "wpb": "118548", "bsz": "256", "num_updates": "476000", "lr": "0.000529293", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "155376"} +[2022-08-02 20:38:47,956][train_inner][INFO] - {"epoch": 10, "update": 9.252, "loss": "2.175", "ppl": "4.51", "wps": "366471", "ups": "3.09", "wpb": "118546", "bsz": "256", "num_updates": "476200", "lr": "0.000529091", "gnorm": "0.672", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.2", "wall": "155440"} +[2022-08-02 20:39:52,726][train_inner][INFO] - {"epoch": 10, "update": 9.256, "loss": "2.168", "ppl": "4.49", "wps": "365052", "ups": "3.09", "wpb": "118222", "bsz": "256", "num_updates": "476400", "lr": "0.000528889", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.7", "wall": "155505"} +[2022-08-02 20:40:57,414][train_inner][INFO] - {"epoch": 10, "update": 9.26, "loss": "2.175", "ppl": "4.52", "wps": "365367", "ups": "3.09", "wpb": "118173", "bsz": "256", "num_updates": "476600", "lr": "0.000528687", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.7", "wall": "155570"} +[2022-08-02 20:41:30,636][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 20:42:02,588][train_inner][INFO] - {"epoch": 10, "update": 9.264, "loss": "2.171", "ppl": "4.5", "wps": "363548", "ups": "3.07", "wpb": "118468", "bsz": "256", "num_updates": "476800", "lr": "0.000528485", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "155635"} +[2022-08-02 20:43:08,709][train_inner][INFO] - {"epoch": 10, "update": 9.268, "loss": "2.173", "ppl": "4.51", "wps": "357640", "ups": "3.02", "wpb": "118236", "bsz": "256", "num_updates": "477000", "lr": "0.000528283", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.4", "wall": "155701"} +[2022-08-02 20:44:13,696][train_inner][INFO] - {"epoch": 10, "update": 9.272, "loss": "2.176", "ppl": "4.52", "wps": "363919", "ups": "3.08", "wpb": "118248", "bsz": "255.9", "num_updates": "477200", "lr": "0.000528081", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "155766"} +[2022-08-02 20:45:18,735][train_inner][INFO] - {"epoch": 10, "update": 9.276, "loss": "2.171", "ppl": "4.5", "wps": "363803", "ups": "3.08", "wpb": "118306", "bsz": "256", "num_updates": "477400", "lr": "0.000527879", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28", "wall": "155831"} +[2022-08-02 20:45:19,030][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 20:46:24,051][train_inner][INFO] - {"epoch": 10, "update": 9.28, "loss": "2.18", "ppl": "4.53", "wps": "362166", "ups": "3.06", "wpb": "118273", "bsz": "256", "num_updates": "477600", "lr": "0.000527677", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "155896"} +[2022-08-02 20:47:28,986][train_inner][INFO] - {"epoch": 10, "update": 9.283, "loss": "2.172", "ppl": "4.51", "wps": "363259", "ups": "3.08", "wpb": "117940", "bsz": "256", "num_updates": "477800", "lr": "0.000527475", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "155961"} +[2022-08-02 20:48:34,045][train_inner][INFO] - {"epoch": 10, "update": 9.287, "loss": "2.176", "ppl": "4.52", "wps": "362464", "ups": "3.07", "wpb": "117907", "bsz": "256", "num_updates": "478000", "lr": "0.000527273", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "156026"} +[2022-08-02 20:49:38,691][train_inner][INFO] - {"epoch": 10, "update": 9.291, "loss": "2.176", "ppl": "4.52", "wps": "365181", "ups": "3.09", "wpb": "118035", "bsz": "256", "num_updates": "478200", "lr": "0.000527071", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "156091"} +[2022-08-02 20:50:43,503][train_inner][INFO] - {"epoch": 10, "update": 9.295, "loss": "2.177", "ppl": "4.52", "wps": "365571", "ups": "3.09", "wpb": "118466", "bsz": "256", "num_updates": "478400", "lr": "0.000526869", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "156156"} +[2022-08-02 20:51:48,569][train_inner][INFO] - {"epoch": 10, "update": 9.299, "loss": "2.177", "ppl": "4.52", "wps": "363722", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "478600", "lr": "0.000526667", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "156221"} +[2022-08-02 20:52:54,223][train_inner][INFO] - {"epoch": 10, "update": 9.303, "loss": "2.178", "ppl": "4.53", "wps": "359292", "ups": "3.05", "wpb": "117944", "bsz": "256", "num_updates": "478800", "lr": "0.000526465", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "156287"} +[2022-08-02 20:53:59,119][train_inner][INFO] - {"epoch": 10, "update": 9.307, "loss": "2.172", "ppl": "4.51", "wps": "364155", "ups": "3.08", "wpb": "118158", "bsz": "256", "num_updates": "479000", "lr": "0.000526263", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "156351"} +[2022-08-02 20:55:04,009][train_inner][INFO] - {"epoch": 10, "update": 9.311, "loss": "2.175", "ppl": "4.52", "wps": "364231", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "479200", "lr": "0.000526061", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "156416"} +[2022-08-02 20:56:09,101][train_inner][INFO] - {"epoch": 10, "update": 9.315, "loss": "2.174", "ppl": "4.51", "wps": "363113", "ups": "3.07", "wpb": "118176", "bsz": "256", "num_updates": "479400", "lr": "0.000525859", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "156481"} +[2022-08-02 20:57:14,066][train_inner][INFO] - {"epoch": 10, "update": 9.318, "loss": "2.171", "ppl": "4.5", "wps": "365098", "ups": "3.08", "wpb": "118591", "bsz": "256", "num_updates": "479600", "lr": "0.000525657", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "156546"} +[2022-08-02 20:58:19,165][train_inner][INFO] - {"epoch": 10, "update": 9.322, "loss": "2.168", "ppl": "4.5", "wps": "362992", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "479800", "lr": "0.000525455", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "156611"} +[2022-08-02 20:59:24,051][train_inner][INFO] - {"epoch": 10, "update": 9.326, "loss": "2.18", "ppl": "4.53", "wps": "363281", "ups": "3.08", "wpb": "117848", "bsz": "256", "num_updates": "480000", "lr": "0.000525253", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "156676"} +[2022-08-02 21:00:28,824][train_inner][INFO] - {"epoch": 10, "update": 9.33, "loss": "2.173", "ppl": "4.51", "wps": "364942", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "480200", "lr": "0.000525051", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "156741"} +[2022-08-02 21:01:33,621][train_inner][INFO] - {"epoch": 10, "update": 9.334, "loss": "2.169", "ppl": "4.5", "wps": "364624", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "480400", "lr": "0.000524848", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "156806"} +[2022-08-02 21:02:38,805][train_inner][INFO] - {"epoch": 10, "update": 9.338, "loss": "2.173", "ppl": "4.51", "wps": "366792", "ups": "3.07", "wpb": "119544", "bsz": "256", "num_updates": "480600", "lr": "0.000524646", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "156871"} +[2022-08-02 21:03:44,791][train_inner][INFO] - {"epoch": 10, "update": 9.342, "loss": "2.174", "ppl": "4.51", "wps": "359414", "ups": "3.03", "wpb": "118580", "bsz": "256", "num_updates": "480800", "lr": "0.000524444", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.9", "wall": "156937"} +[2022-08-02 21:04:49,556][train_inner][INFO] - {"epoch": 10, "update": 9.346, "loss": "2.17", "ppl": "4.5", "wps": "365499", "ups": "3.09", "wpb": "118355", "bsz": "256", "num_updates": "481000", "lr": "0.000524242", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "157002"} +[2022-08-02 21:05:54,604][train_inner][INFO] - {"epoch": 10, "update": 9.349, "loss": "2.173", "ppl": "4.51", "wps": "363343", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "481200", "lr": "0.00052404", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "157067"} +[2022-08-02 21:06:59,813][train_inner][INFO] - {"epoch": 10, "update": 9.353, "loss": "2.171", "ppl": "4.5", "wps": "364714", "ups": "3.07", "wpb": "118911", "bsz": "256", "num_updates": "481400", "lr": "0.000523838", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "157132"} +[2022-08-02 21:07:48,596][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-02 21:08:04,789][train_inner][INFO] - {"epoch": 10, "update": 9.357, "loss": "2.178", "ppl": "4.53", "wps": "362281", "ups": "3.08", "wpb": "117696", "bsz": "256", "num_updates": "481600", "lr": "0.000523636", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "157197"} +[2022-08-02 21:08:25,028][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 21:09:09,998][train_inner][INFO] - {"epoch": 10, "update": 9.361, "loss": "2.174", "ppl": "4.51", "wps": "363129", "ups": "3.07", "wpb": "118395", "bsz": "256", "num_updates": "481800", "lr": "0.000523434", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "157262"} +[2022-08-02 21:10:14,831][train_inner][INFO] - {"epoch": 10, "update": 9.365, "loss": "2.17", "ppl": "4.5", "wps": "366184", "ups": "3.08", "wpb": "118703", "bsz": "256", "num_updates": "482000", "lr": "0.000523232", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "157327"} +[2022-08-02 21:11:19,504][train_inner][INFO] - {"epoch": 10, "update": 9.369, "loss": "2.177", "ppl": "4.52", "wps": "366436", "ups": "3.09", "wpb": "118492", "bsz": "256", "num_updates": "482200", "lr": "0.00052303", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "157392"} +[2022-08-02 21:12:24,600][train_inner][INFO] - {"epoch": 10, "update": 9.373, "loss": "2.177", "ppl": "4.52", "wps": "363432", "ups": "3.07", "wpb": "118288", "bsz": "256", "num_updates": "482400", "lr": "0.000522828", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "157457"} +[2022-08-02 21:13:29,239][train_inner][INFO] - {"epoch": 10, "update": 9.377, "loss": "2.174", "ppl": "4.51", "wps": "362802", "ups": "3.09", "wpb": "117254", "bsz": "256", "num_updates": "482600", "lr": "0.000522626", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "157522"} +[2022-08-02 21:14:34,083][train_inner][INFO] - {"epoch": 10, "update": 9.381, "loss": "2.169", "ppl": "4.5", "wps": "364387", "ups": "3.08", "wpb": "118140", "bsz": "256", "num_updates": "482800", "lr": "0.000522424", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "157586"} +[2022-08-02 21:15:38,892][train_inner][INFO] - {"epoch": 10, "update": 9.384, "loss": "2.173", "ppl": "4.51", "wps": "363217", "ups": "3.09", "wpb": "117697", "bsz": "256", "num_updates": "483000", "lr": "0.000522222", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "157651"} +[2022-08-02 21:16:43,547][train_inner][INFO] - {"epoch": 10, "update": 9.388, "loss": "2.174", "ppl": "4.51", "wps": "365435", "ups": "3.09", "wpb": "118134", "bsz": "256", "num_updates": "483200", "lr": "0.00052202", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "157716"} +[2022-08-02 21:17:48,833][train_inner][INFO] - {"epoch": 10, "update": 9.392, "loss": "2.173", "ppl": "4.51", "wps": "361812", "ups": "3.06", "wpb": "118105", "bsz": "256", "num_updates": "483400", "lr": "0.000521818", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "157781"} +[2022-08-02 21:18:53,459][train_inner][INFO] - {"epoch": 10, "update": 9.396, "loss": "2.175", "ppl": "4.52", "wps": "366465", "ups": "3.09", "wpb": "118414", "bsz": "256", "num_updates": "483600", "lr": "0.000521616", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "157846"} +[2022-08-02 21:19:58,694][train_inner][INFO] - {"epoch": 10, "update": 9.4, "loss": "2.169", "ppl": "4.5", "wps": "362221", "ups": "3.07", "wpb": "118146", "bsz": "256", "num_updates": "483800", "lr": "0.000521414", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "157911"} +[2022-08-02 21:21:03,780][train_inner][INFO] - {"epoch": 10, "update": 9.404, "loss": "2.173", "ppl": "4.51", "wps": "363614", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "484000", "lr": "0.000521212", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "157976"} +[2022-08-02 21:21:15,824][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 21:22:09,118][train_inner][INFO] - {"epoch": 10, "update": 9.408, "loss": "2.171", "ppl": "4.5", "wps": "361710", "ups": "3.06", "wpb": "118167", "bsz": "256", "num_updates": "484200", "lr": "0.00052101", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "158041"} +[2022-08-02 21:23:14,026][train_inner][INFO] - {"epoch": 10, "update": 9.412, "loss": "2.182", "ppl": "4.54", "wps": "363781", "ups": "3.08", "wpb": "118059", "bsz": "256", "num_updates": "484400", "lr": "0.000520808", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "158106"} +[2022-08-02 21:24:18,912][train_inner][INFO] - {"epoch": 10, "update": 9.416, "loss": "2.169", "ppl": "4.5", "wps": "364010", "ups": "3.08", "wpb": "118094", "bsz": "256", "num_updates": "484600", "lr": "0.000520606", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "158171"} +[2022-08-02 21:25:24,884][train_inner][INFO] - {"epoch": 10, "update": 9.419, "loss": "2.172", "ppl": "4.51", "wps": "358867", "ups": "3.03", "wpb": "118375", "bsz": "256", "num_updates": "484800", "lr": "0.000520404", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "158237"} +[2022-08-02 21:26:29,872][train_inner][INFO] - {"epoch": 10, "update": 9.423, "loss": "2.167", "ppl": "4.49", "wps": "365248", "ups": "3.08", "wpb": "118675", "bsz": "256", "num_updates": "485000", "lr": "0.000520202", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "158302"} +[2022-08-02 21:27:36,120][train_inner][INFO] - {"epoch": 10, "update": 9.427, "loss": "2.17", "ppl": "4.5", "wps": "357924", "ups": "3.02", "wpb": "118556", "bsz": "256", "num_updates": "485200", "lr": "0.00052", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "158368"} +[2022-08-02 21:28:40,792][train_inner][INFO] - {"epoch": 10, "update": 9.431, "loss": "2.166", "ppl": "4.49", "wps": "365196", "ups": "3.09", "wpb": "118088", "bsz": "256", "num_updates": "485400", "lr": "0.000519798", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "158433"} +[2022-08-02 21:29:45,496][train_inner][INFO] - {"epoch": 10, "update": 9.435, "loss": "2.171", "ppl": "4.5", "wps": "367006", "ups": "3.09", "wpb": "118732", "bsz": "256", "num_updates": "485600", "lr": "0.000519596", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "158498"} +[2022-08-02 21:30:50,247][train_inner][INFO] - {"epoch": 10, "update": 9.439, "loss": "2.167", "ppl": "4.49", "wps": "364720", "ups": "3.09", "wpb": "118077", "bsz": "256", "num_updates": "485800", "lr": "0.000519394", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "158563"} +[2022-08-02 21:31:55,082][train_inner][INFO] - {"epoch": 10, "update": 9.443, "loss": "2.167", "ppl": "4.49", "wps": "365288", "ups": "3.08", "wpb": "118416", "bsz": "256", "num_updates": "486000", "lr": "0.000519192", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "158627"} +[2022-08-02 21:32:59,574][train_inner][INFO] - {"epoch": 10, "update": 9.447, "loss": "2.167", "ppl": "4.49", "wps": "367487", "ups": "3.1", "wpb": "118486", "bsz": "256", "num_updates": "486200", "lr": "0.00051899", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "158692"} +[2022-08-02 21:33:47,437][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 21:34:04,772][train_inner][INFO] - {"epoch": 10, "update": 9.451, "loss": "2.171", "ppl": "4.5", "wps": "362355", "ups": "3.07", "wpb": "118123", "bsz": "256", "num_updates": "486400", "lr": "0.000518788", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "158757"} +[2022-08-02 21:35:09,623][train_inner][INFO] - {"epoch": 10, "update": 9.454, "loss": "2.169", "ppl": "4.5", "wps": "365461", "ups": "3.08", "wpb": "118501", "bsz": "256", "num_updates": "486600", "lr": "0.000518586", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "158822"} +[2022-08-02 21:36:14,655][train_inner][INFO] - {"epoch": 10, "update": 9.458, "loss": "2.167", "ppl": "4.49", "wps": "363425", "ups": "3.08", "wpb": "118170", "bsz": "256", "num_updates": "486800", "lr": "0.000518384", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "158887"} +[2022-08-02 21:37:19,327][train_inner][INFO] - {"epoch": 10, "update": 9.462, "loss": "2.166", "ppl": "4.49", "wps": "366899", "ups": "3.09", "wpb": "118631", "bsz": "256", "num_updates": "487000", "lr": "0.000518182", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "158952"} +[2022-08-02 21:38:24,380][train_inner][INFO] - {"epoch": 10, "update": 9.466, "loss": "2.17", "ppl": "4.5", "wps": "364840", "ups": "3.07", "wpb": "118662", "bsz": "256", "num_updates": "487200", "lr": "0.00051798", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "159017"} +[2022-08-02 21:39:29,129][train_inner][INFO] - {"epoch": 10, "update": 9.47, "loss": "2.172", "ppl": "4.51", "wps": "364738", "ups": "3.09", "wpb": "118081", "bsz": "256", "num_updates": "487400", "lr": "0.000517778", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "159081"} +[2022-08-02 21:40:33,916][train_inner][INFO] - {"epoch": 10, "update": 9.474, "loss": "2.172", "ppl": "4.51", "wps": "365216", "ups": "3.09", "wpb": "118304", "bsz": "256", "num_updates": "487600", "lr": "0.000517576", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "159146"} +[2022-08-02 21:41:38,829][train_inner][INFO] - {"epoch": 10, "update": 9.478, "loss": "2.169", "ppl": "4.5", "wps": "364988", "ups": "3.08", "wpb": "118461", "bsz": "256", "num_updates": "487800", "lr": "0.000517374", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "159211"} +[2022-08-02 21:42:43,164][train_inner][INFO] - {"epoch": 10, "update": 9.482, "loss": "2.168", "ppl": "4.5", "wps": "368839", "ups": "3.11", "wpb": "118644", "bsz": "256", "num_updates": "488000", "lr": "0.000517172", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "159275"} +[2022-08-02 21:43:48,222][train_inner][INFO] - {"epoch": 10, "update": 9.486, "loss": "2.166", "ppl": "4.49", "wps": "363424", "ups": "3.07", "wpb": "118217", "bsz": "256", "num_updates": "488200", "lr": "0.00051697", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "159341"} +[2022-08-02 21:44:53,302][train_inner][INFO] - {"epoch": 10, "update": 9.489, "loss": "2.163", "ppl": "4.48", "wps": "364225", "ups": "3.07", "wpb": "118516", "bsz": "256", "num_updates": "488400", "lr": "0.000516768", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "159406"} +[2022-08-02 21:45:58,302][train_inner][INFO] - {"epoch": 10, "update": 9.493, "loss": "2.166", "ppl": "4.49", "wps": "366099", "ups": "3.08", "wpb": "118981", "bsz": "256", "num_updates": "488600", "lr": "0.000516566", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "159471"} +[2022-08-02 21:46:54,287][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 21:47:03,438][train_inner][INFO] - {"epoch": 10, "update": 9.497, "loss": "2.164", "ppl": "4.48", "wps": "362148", "ups": "3.07", "wpb": "117943", "bsz": "256", "num_updates": "488800", "lr": "0.000516364", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "159536"} +[2022-08-02 21:48:08,098][train_inner][INFO] - {"epoch": 10, "update": 9.501, "loss": "2.169", "ppl": "4.5", "wps": "367049", "ups": "3.09", "wpb": "118665", "bsz": "256", "num_updates": "489000", "lr": "0.000516162", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "159600"} +[2022-08-02 21:49:12,675][train_inner][INFO] - {"epoch": 10, "update": 9.505, "loss": "2.173", "ppl": "4.51", "wps": "366947", "ups": "3.1", "wpb": "118481", "bsz": "256", "num_updates": "489200", "lr": "0.00051596", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "159665"} +[2022-08-02 21:50:18,182][train_inner][INFO] - {"epoch": 10, "update": 9.509, "loss": "2.17", "ppl": "4.5", "wps": "360559", "ups": "3.05", "wpb": "118094", "bsz": "256", "num_updates": "489400", "lr": "0.000515758", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "159731"} +[2022-08-02 21:51:22,990][train_inner][INFO] - {"epoch": 10, "update": 9.513, "loss": "2.168", "ppl": "4.49", "wps": "364363", "ups": "3.09", "wpb": "118066", "bsz": "256", "num_updates": "489600", "lr": "0.000515556", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "159795"} +[2022-08-02 21:52:27,549][train_inner][INFO] - {"epoch": 10, "update": 9.517, "loss": "2.169", "ppl": "4.5", "wps": "366878", "ups": "3.1", "wpb": "118424", "bsz": "256", "num_updates": "489800", "lr": "0.000515354", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "159860"} +[2022-08-02 21:53:31,850][train_inner][INFO] - {"epoch": 10, "update": 9.521, "loss": "2.168", "ppl": "4.49", "wps": "367155", "ups": "3.11", "wpb": "118040", "bsz": "256", "num_updates": "490000", "lr": "0.000515152", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "159924"} +[2022-08-02 21:54:36,273][train_inner][INFO] - {"epoch": 10, "update": 9.524, "loss": "2.171", "ppl": "4.5", "wps": "366080", "ups": "3.1", "wpb": "117919", "bsz": "256", "num_updates": "490200", "lr": "0.000514949", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "159989"} +[2022-08-02 21:55:40,832][train_inner][INFO] - {"epoch": 10, "update": 9.528, "loss": "2.167", "ppl": "4.49", "wps": "367157", "ups": "3.1", "wpb": "118516", "bsz": "256", "num_updates": "490400", "lr": "0.000514747", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "160053"} +[2022-08-02 21:56:45,959][train_inner][INFO] - {"epoch": 10, "update": 9.532, "loss": "2.168", "ppl": "4.49", "wps": "362310", "ups": "3.07", "wpb": "117979", "bsz": "255.9", "num_updates": "490600", "lr": "0.000514545", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "160118"} +[2022-08-02 21:57:50,460][train_inner][INFO] - {"epoch": 10, "update": 9.536, "loss": "2.166", "ppl": "4.49", "wps": "366510", "ups": "3.1", "wpb": "118200", "bsz": "256", "num_updates": "490800", "lr": "0.000514343", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "160183"} +[2022-08-02 21:58:54,828][train_inner][INFO] - {"epoch": 10, "update": 9.54, "loss": "2.165", "ppl": "4.48", "wps": "366015", "ups": "3.11", "wpb": "117797", "bsz": "256", "num_updates": "491000", "lr": "0.000514141", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "160247"} +[2022-08-02 21:59:59,464][train_inner][INFO] - {"epoch": 10, "update": 9.544, "loss": "2.167", "ppl": "4.49", "wps": "365285", "ups": "3.09", "wpb": "118052", "bsz": "256", "num_updates": "491200", "lr": "0.000513939", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "160312"} +[2022-08-02 22:00:22,109][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 22:01:04,378][train_inner][INFO] - {"epoch": 10, "update": 9.548, "loss": "2.167", "ppl": "4.49", "wps": "364908", "ups": "3.08", "wpb": "118436", "bsz": "256", "num_updates": "491400", "lr": "0.000513737", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "160377"} +[2022-08-02 22:01:56,281][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 22:02:09,038][train_inner][INFO] - {"epoch": 10, "update": 9.552, "loss": "2.166", "ppl": "4.49", "wps": "363961", "ups": "3.09", "wpb": "117669", "bsz": "256", "num_updates": "491600", "lr": "0.000513535", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "160441"} +[2022-08-02 22:03:13,921][train_inner][INFO] - {"epoch": 10, "update": 9.556, "loss": "2.167", "ppl": "4.49", "wps": "365253", "ups": "3.08", "wpb": "118492", "bsz": "256", "num_updates": "491800", "lr": "0.000513333", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "160506"} +[2022-08-02 22:04:18,727][train_inner][INFO] - {"epoch": 10, "update": 9.559, "loss": "2.165", "ppl": "4.48", "wps": "366180", "ups": "3.09", "wpb": "118651", "bsz": "256", "num_updates": "492000", "lr": "0.000513131", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.2", "wall": "160571"} +[2022-08-02 22:05:23,849][train_inner][INFO] - {"epoch": 10, "update": 9.563, "loss": "2.17", "ppl": "4.5", "wps": "362494", "ups": "3.07", "wpb": "118030", "bsz": "256", "num_updates": "492200", "lr": "0.000512929", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "160636"} +[2022-08-02 22:06:28,675][train_inner][INFO] - {"epoch": 10, "update": 9.567, "loss": "2.167", "ppl": "4.49", "wps": "363935", "ups": "3.09", "wpb": "117961", "bsz": "256", "num_updates": "492400", "lr": "0.000512727", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "160701"} +[2022-08-02 22:07:32,974][train_inner][INFO] - {"epoch": 10, "update": 9.571, "loss": "2.17", "ppl": "4.5", "wps": "367052", "ups": "3.11", "wpb": "118004", "bsz": "256", "num_updates": "492600", "lr": "0.000512525", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "160765"} +[2022-08-02 22:08:37,588][train_inner][INFO] - {"epoch": 10, "update": 9.575, "loss": "2.172", "ppl": "4.51", "wps": "365803", "ups": "3.1", "wpb": "118178", "bsz": "256", "num_updates": "492800", "lr": "0.000512323", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "160830"} +[2022-08-02 22:09:42,690][train_inner][INFO] - {"epoch": 10, "update": 9.579, "loss": "2.167", "ppl": "4.49", "wps": "362414", "ups": "3.07", "wpb": "117967", "bsz": "256", "num_updates": "493000", "lr": "0.000512121", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "160895"} +[2022-08-02 22:10:47,352][train_inner][INFO] - {"epoch": 10, "update": 9.583, "loss": "2.172", "ppl": "4.51", "wps": "364848", "ups": "3.09", "wpb": "117958", "bsz": "256", "num_updates": "493200", "lr": "0.000511919", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "160960"} +[2022-08-02 22:11:52,137][train_inner][INFO] - {"epoch": 10, "update": 9.587, "loss": "2.165", "ppl": "4.48", "wps": "366624", "ups": "3.09", "wpb": "118755", "bsz": "256", "num_updates": "493400", "lr": "0.000511717", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "161024"} +[2022-08-02 22:12:56,845][train_inner][INFO] - {"epoch": 10, "update": 9.59, "loss": "2.175", "ppl": "4.52", "wps": "364297", "ups": "3.09", "wpb": "117864", "bsz": "256", "num_updates": "493600", "lr": "0.000511515", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "161089"} +[2022-08-02 22:14:01,767][train_inner][INFO] - {"epoch": 10, "update": 9.594, "loss": "2.169", "ppl": "4.5", "wps": "364553", "ups": "3.08", "wpb": "118335", "bsz": "256", "num_updates": "493800", "lr": "0.000511313", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "161154"} +[2022-08-02 22:15:06,326][train_inner][INFO] - {"epoch": 10, "update": 9.598, "loss": "2.168", "ppl": "4.49", "wps": "365658", "ups": "3.1", "wpb": "118021", "bsz": "256", "num_updates": "494000", "lr": "0.000511111", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "161219"} +[2022-08-02 22:16:11,297][train_inner][INFO] - {"epoch": 10, "update": 9.602, "loss": "2.167", "ppl": "4.49", "wps": "364419", "ups": "3.08", "wpb": "118381", "bsz": "256", "num_updates": "494200", "lr": "0.000510909", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "161284"} +[2022-08-02 22:17:16,425][train_inner][INFO] - {"epoch": 10, "update": 9.606, "loss": "2.168", "ppl": "4.49", "wps": "361912", "ups": "3.07", "wpb": "117852", "bsz": "256", "num_updates": "494400", "lr": "0.000510707", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "161349"} +[2022-08-02 22:18:21,421][train_inner][INFO] - {"epoch": 10, "update": 9.61, "loss": "2.164", "ppl": "4.48", "wps": "365577", "ups": "3.08", "wpb": "118802", "bsz": "256", "num_updates": "494600", "lr": "0.000510505", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "161414"} +[2022-08-02 22:19:26,360][train_inner][INFO] - {"epoch": 10, "update": 9.614, "loss": "2.164", "ppl": "4.48", "wps": "365824", "ups": "3.08", "wpb": "118781", "bsz": "256", "num_updates": "494800", "lr": "0.000510303", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "161479"} +[2022-08-02 22:20:30,866][train_inner][INFO] - {"epoch": 10, "update": 9.618, "loss": "2.163", "ppl": "4.48", "wps": "366646", "ups": "3.1", "wpb": "118252", "bsz": "256", "num_updates": "495000", "lr": "0.000510101", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "161543"} +[2022-08-02 22:21:36,776][train_inner][INFO] - {"epoch": 10, "update": 9.622, "loss": "2.166", "ppl": "4.49", "wps": "360343", "ups": "3.03", "wpb": "118749", "bsz": "256", "num_updates": "495200", "lr": "0.000509899", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.7", "wall": "161609"} +[2022-08-02 22:22:41,438][train_inner][INFO] - {"epoch": 10, "update": 9.625, "loss": "2.169", "ppl": "4.5", "wps": "364024", "ups": "3.09", "wpb": "117691", "bsz": "256", "num_updates": "495400", "lr": "0.000509697", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "161674"} +[2022-08-02 22:23:46,270][train_inner][INFO] - {"epoch": 10, "update": 9.629, "loss": "2.165", "ppl": "4.48", "wps": "364344", "ups": "3.08", "wpb": "118105", "bsz": "256", "num_updates": "495600", "lr": "0.000509495", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "161739"} +[2022-08-02 22:24:50,733][train_inner][INFO] - {"epoch": 10, "update": 9.633, "loss": "2.163", "ppl": "4.48", "wps": "366242", "ups": "3.1", "wpb": "118043", "bsz": "256", "num_updates": "495800", "lr": "0.000509293", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "161803"} +[2022-08-02 22:25:55,602][train_inner][INFO] - {"epoch": 10, "update": 9.637, "loss": "2.164", "ppl": "4.48", "wps": "364746", "ups": "3.08", "wpb": "118301", "bsz": "256", "num_updates": "496000", "lr": "0.000509091", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "161868"} +[2022-08-02 22:27:00,613][train_inner][INFO] - {"epoch": 10, "update": 9.641, "loss": "2.168", "ppl": "4.49", "wps": "361859", "ups": "3.08", "wpb": "117623", "bsz": "256", "num_updates": "496200", "lr": "0.000508889", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "161933"} +[2022-08-02 22:28:05,328][train_inner][INFO] - {"epoch": 10, "update": 9.645, "loss": "2.159", "ppl": "4.47", "wps": "368185", "ups": "3.09", "wpb": "119134", "bsz": "256", "num_updates": "496400", "lr": "0.000508687", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "161998"} +[2022-08-02 22:29:09,756][train_inner][INFO] - {"epoch": 10, "update": 9.649, "loss": "2.171", "ppl": "4.5", "wps": "366162", "ups": "3.1", "wpb": "117955", "bsz": "256", "num_updates": "496600", "lr": "0.000508485", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.3", "wall": "162062"} +[2022-08-02 22:30:14,542][train_inner][INFO] - {"epoch": 10, "update": 9.653, "loss": "2.165", "ppl": "4.49", "wps": "365555", "ups": "3.09", "wpb": "118412", "bsz": "256", "num_updates": "496800", "lr": "0.000508283", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "162127"} +[2022-08-02 22:31:19,700][train_inner][INFO] - {"epoch": 10, "update": 9.657, "loss": "2.164", "ppl": "4.48", "wps": "365808", "ups": "3.07", "wpb": "119175", "bsz": "256", "num_updates": "497000", "lr": "0.000508081", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "162192"} +[2022-08-02 22:32:24,253][train_inner][INFO] - {"epoch": 10, "update": 9.66, "loss": "2.168", "ppl": "4.49", "wps": "366094", "ups": "3.1", "wpb": "118161", "bsz": "256", "num_updates": "497200", "lr": "0.000507879", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "162257"} +[2022-08-02 22:33:28,810][train_inner][INFO] - {"epoch": 10, "update": 9.664, "loss": "2.165", "ppl": "4.48", "wps": "366381", "ups": "3.1", "wpb": "118261", "bsz": "256", "num_updates": "497400", "lr": "0.000507677", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "162321"} +[2022-08-02 22:34:23,964][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 22:34:25,874][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-02 22:34:33,965][train_inner][INFO] - {"epoch": 10, "update": 9.668, "loss": "2.167", "ppl": "4.49", "wps": "363755", "ups": "3.07", "wpb": "118499", "bsz": "256", "num_updates": "497600", "lr": "0.000507475", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "162386"} +[2022-08-02 22:35:38,897][train_inner][INFO] - {"epoch": 10, "update": 9.672, "loss": "2.164", "ppl": "4.48", "wps": "364479", "ups": "3.08", "wpb": "118331", "bsz": "256", "num_updates": "497800", "lr": "0.000507273", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "162451"} +[2022-08-02 22:36:44,754][train_inner][INFO] - {"epoch": 10, "update": 9.676, "loss": "2.169", "ppl": "4.5", "wps": "358822", "ups": "3.04", "wpb": "118154", "bsz": "256", "num_updates": "498000", "lr": "0.000507071", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "21.8", "wall": "162517"} +[2022-08-02 22:37:49,654][train_inner][INFO] - {"epoch": 10, "update": 9.68, "loss": "2.165", "ppl": "4.49", "wps": "365114", "ups": "3.08", "wpb": "118477", "bsz": "256", "num_updates": "498200", "lr": "0.000506869", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "162582"} +[2022-08-02 22:38:54,561][train_inner][INFO] - {"epoch": 10, "update": 9.684, "loss": "2.162", "ppl": "4.48", "wps": "365046", "ups": "3.08", "wpb": "118469", "bsz": "256", "num_updates": "498400", "lr": "0.000506667", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "162647"} +[2022-08-02 22:39:59,530][train_inner][INFO] - {"epoch": 10, "update": 9.688, "loss": "2.168", "ppl": "4.49", "wps": "362656", "ups": "3.08", "wpb": "117806", "bsz": "256", "num_updates": "498600", "lr": "0.000506465", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "162712"} +[2022-08-02 22:41:04,560][train_inner][INFO] - {"epoch": 10, "update": 9.692, "loss": "2.16", "ppl": "4.47", "wps": "364365", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "498800", "lr": "0.000506263", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.5", "wall": "162777"} +[2022-08-02 22:42:09,797][train_inner][INFO] - {"epoch": 10, "update": 9.695, "loss": "2.161", "ppl": "4.47", "wps": "362969", "ups": "3.07", "wpb": "118393", "bsz": "256", "num_updates": "499000", "lr": "0.000506061", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "162842"} +[2022-08-02 22:43:14,414][train_inner][INFO] - {"epoch": 10, "update": 9.699, "loss": "2.163", "ppl": "4.48", "wps": "364910", "ups": "3.1", "wpb": "117895", "bsz": "256", "num_updates": "499200", "lr": "0.000505859", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "162907"} +[2022-08-02 22:44:19,548][train_inner][INFO] - {"epoch": 10, "update": 9.703, "loss": "2.165", "ppl": "4.48", "wps": "363444", "ups": "3.07", "wpb": "118361", "bsz": "256", "num_updates": "499400", "lr": "0.000505657", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.4", "wall": "162972"} +[2022-08-02 22:45:24,653][train_inner][INFO] - {"epoch": 10, "update": 9.707, "loss": "2.168", "ppl": "4.49", "wps": "361049", "ups": "3.07", "wpb": "117529", "bsz": "256", "num_updates": "499600", "lr": "0.000505455", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "163037"} +[2022-08-02 22:46:29,295][train_inner][INFO] - {"epoch": 10, "update": 9.711, "loss": "2.162", "ppl": "4.48", "wps": "368129", "ups": "3.09", "wpb": "118981", "bsz": "256", "num_updates": "499800", "lr": "0.000505253", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "163102"} +[2022-08-02 22:47:33,933][train_inner][INFO] - {"epoch": 10, "update": 9.715, "loss": "2.167", "ppl": "4.49", "wps": "364546", "ups": "3.09", "wpb": "117816", "bsz": "256", "num_updates": "500000", "lr": "0.000505051", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "163166"} +[2022-08-02 22:47:33,934][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-02 22:47:57,722][valid][INFO] - {"epoch": 10, "valid_loss": "2.069", "valid_ppl": "4.2", "valid_wps": "1.58577e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "500000", "valid_best_loss": "2.069"} +[2022-08-02 22:47:57,726][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 10 @ 500000 updates +[2022-08-02 22:47:57,727][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_10_500000.pt +[2022-08-02 22:48:07,303][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_10_500000.pt +[2022-08-02 22:48:33,491][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_10_500000.pt (epoch 10 @ 500000 updates, score 2.069) (writing took 35.76536318846047 seconds) +[2022-08-02 22:49:38,497][train_inner][INFO] - {"epoch": 10, "update": 9.719, "loss": "2.159", "ppl": "4.47", "wps": "190379", "ups": "1.61", "wpb": "118571", "bsz": "256", "num_updates": "500200", "lr": "0.000504848", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "163291"} +[2022-08-02 22:50:43,639][train_inner][INFO] - {"epoch": 10, "update": 9.723, "loss": "2.163", "ppl": "4.48", "wps": "363480", "ups": "3.07", "wpb": "118387", "bsz": "256", "num_updates": "500400", "lr": "0.000504646", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "163356"} +[2022-08-02 22:51:48,648][train_inner][INFO] - {"epoch": 10, "update": 9.726, "loss": "2.161", "ppl": "4.47", "wps": "365128", "ups": "3.08", "wpb": "118682", "bsz": "256", "num_updates": "500600", "lr": "0.000504444", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "163421"} +[2022-08-02 22:52:53,781][train_inner][INFO] - {"epoch": 10, "update": 9.73, "loss": "2.158", "ppl": "4.46", "wps": "363902", "ups": "3.07", "wpb": "118508", "bsz": "256", "num_updates": "500800", "lr": "0.000504242", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "163486"} +[2022-08-02 22:53:58,271][train_inner][INFO] - {"epoch": 10, "update": 9.734, "loss": "2.16", "ppl": "4.47", "wps": "366474", "ups": "3.1", "wpb": "118170", "bsz": "256", "num_updates": "501000", "lr": "0.00050404", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "163551"} +[2022-08-02 22:55:03,332][train_inner][INFO] - {"epoch": 10, "update": 9.738, "loss": "2.155", "ppl": "4.45", "wps": "365554", "ups": "3.07", "wpb": "118914", "bsz": "256", "num_updates": "501200", "lr": "0.000503838", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28", "wall": "163616"} +[2022-08-02 22:56:07,859][train_inner][INFO] - {"epoch": 10, "update": 9.742, "loss": "2.17", "ppl": "4.5", "wps": "366728", "ups": "3.1", "wpb": "118318", "bsz": "256", "num_updates": "501400", "lr": "0.000503636", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "163680"} +[2022-08-02 22:57:12,843][train_inner][INFO] - {"epoch": 10, "update": 9.746, "loss": "2.162", "ppl": "4.48", "wps": "364429", "ups": "3.08", "wpb": "118408", "bsz": "256", "num_updates": "501600", "lr": "0.000503434", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "163745"} +[2022-08-02 22:58:17,760][train_inner][INFO] - {"epoch": 10, "update": 9.75, "loss": "2.168", "ppl": "4.49", "wps": "363918", "ups": "3.08", "wpb": "118121", "bsz": "256", "num_updates": "501800", "lr": "0.000503232", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "163810"} +[2022-08-02 22:59:23,071][train_inner][INFO] - {"epoch": 10, "update": 9.754, "loss": "2.161", "ppl": "4.47", "wps": "364437", "ups": "3.06", "wpb": "119008", "bsz": "256", "num_updates": "502000", "lr": "0.00050303", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "163875"} +[2022-08-02 23:00:28,223][train_inner][INFO] - {"epoch": 10, "update": 9.758, "loss": "2.166", "ppl": "4.49", "wps": "361083", "ups": "3.07", "wpb": "117623", "bsz": "256", "num_updates": "502200", "lr": "0.000502828", "gnorm": "0.683", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "163941"} +[2022-08-02 23:01:33,039][train_inner][INFO] - {"epoch": 10, "update": 9.761, "loss": "2.165", "ppl": "4.48", "wps": "366283", "ups": "3.09", "wpb": "118703", "bsz": "256", "num_updates": "502400", "lr": "0.000502626", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.8", "wall": "164005"} +[2022-08-02 23:02:37,577][train_inner][INFO] - {"epoch": 10, "update": 9.765, "loss": "2.163", "ppl": "4.48", "wps": "365452", "ups": "3.1", "wpb": "117927", "bsz": "256", "num_updates": "502600", "lr": "0.000502424", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "164070"} +[2022-08-02 23:02:40,987][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 23:03:43,035][train_inner][INFO] - {"epoch": 10, "update": 9.769, "loss": "2.153", "ppl": "4.45", "wps": "362658", "ups": "3.06", "wpb": "118692", "bsz": "256", "num_updates": "502800", "lr": "0.000502222", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.5", "wall": "164135"} +[2022-08-02 23:04:48,200][train_inner][INFO] - {"epoch": 10, "update": 9.773, "loss": "2.158", "ppl": "4.46", "wps": "362902", "ups": "3.07", "wpb": "118241", "bsz": "256", "num_updates": "503000", "lr": "0.00050202", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "164201"} +[2022-08-02 23:05:53,015][train_inner][INFO] - {"epoch": 10, "update": 9.777, "loss": "2.161", "ppl": "4.47", "wps": "363425", "ups": "3.09", "wpb": "117775", "bsz": "256", "num_updates": "503200", "lr": "0.000501818", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "164265"} +[2022-08-02 23:06:58,280][train_inner][INFO] - {"epoch": 10, "update": 9.781, "loss": "2.167", "ppl": "4.49", "wps": "362314", "ups": "3.06", "wpb": "118230", "bsz": "256", "num_updates": "503400", "lr": "0.000501616", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "164331"} +[2022-08-02 23:08:02,963][train_inner][INFO] - {"epoch": 10, "update": 9.785, "loss": "2.168", "ppl": "4.49", "wps": "367494", "ups": "3.09", "wpb": "118851", "bsz": "256", "num_updates": "503600", "lr": "0.000501414", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "164395"} +[2022-08-02 23:09:08,733][train_inner][INFO] - {"epoch": 10, "update": 9.789, "loss": "2.155", "ppl": "4.45", "wps": "359475", "ups": "3.04", "wpb": "118212", "bsz": "256", "num_updates": "503800", "lr": "0.000501212", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "164461"} +[2022-08-02 23:10:13,437][train_inner][INFO] - {"epoch": 10, "update": 9.793, "loss": "2.16", "ppl": "4.47", "wps": "364954", "ups": "3.09", "wpb": "118069", "bsz": "256", "num_updates": "504000", "lr": "0.00050101", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "164526"} +[2022-08-02 23:11:18,056][train_inner][INFO] - {"epoch": 10, "update": 9.796, "loss": "2.158", "ppl": "4.46", "wps": "364045", "ups": "3.1", "wpb": "117620", "bsz": "256", "num_updates": "504200", "lr": "0.000500808", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "164590"} +[2022-08-02 23:12:22,534][train_inner][INFO] - {"epoch": 10, "update": 9.8, "loss": "2.16", "ppl": "4.47", "wps": "369304", "ups": "3.1", "wpb": "119057", "bsz": "256", "num_updates": "504400", "lr": "0.000500606", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "164655"} +[2022-08-02 23:13:27,354][train_inner][INFO] - {"epoch": 10, "update": 9.804, "loss": "2.164", "ppl": "4.48", "wps": "364758", "ups": "3.09", "wpb": "118217", "bsz": "256", "num_updates": "504600", "lr": "0.000500404", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "164720"} +[2022-08-02 23:14:33,219][train_inner][INFO] - {"epoch": 10, "update": 9.808, "loss": "2.165", "ppl": "4.48", "wps": "356996", "ups": "3.04", "wpb": "117566", "bsz": "256", "num_updates": "504800", "lr": "0.000500202", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.6", "wall": "164786"} +[2022-08-02 23:15:38,122][train_inner][INFO] - {"epoch": 10, "update": 9.812, "loss": "2.162", "ppl": "4.48", "wps": "364424", "ups": "3.08", "wpb": "118260", "bsz": "256", "num_updates": "505000", "lr": "0.0005", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "164850"} +[2022-08-02 23:16:43,169][train_inner][INFO] - {"epoch": 10, "update": 9.816, "loss": "2.158", "ppl": "4.46", "wps": "364400", "ups": "3.07", "wpb": "118514", "bsz": "256", "num_updates": "505200", "lr": "0.000499798", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "164915"} +[2022-08-02 23:17:47,954][train_inner][INFO] - {"epoch": 10, "update": 9.82, "loss": "2.16", "ppl": "4.47", "wps": "364817", "ups": "3.09", "wpb": "118170", "bsz": "256", "num_updates": "505400", "lr": "0.000499596", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "164980"} +[2022-08-02 23:18:45,255][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 23:18:53,052][train_inner][INFO] - {"epoch": 10, "update": 9.824, "loss": "2.158", "ppl": "4.46", "wps": "362493", "ups": "3.07", "wpb": "117988", "bsz": "256", "num_updates": "505600", "lr": "0.000499394", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "165045"} +[2022-08-02 23:19:57,790][train_inner][INFO] - {"epoch": 10, "update": 9.827, "loss": "2.164", "ppl": "4.48", "wps": "364364", "ups": "3.09", "wpb": "117939", "bsz": "256", "num_updates": "505800", "lr": "0.000499192", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "165110"} +[2022-08-02 23:21:02,367][train_inner][INFO] - {"epoch": 10, "update": 9.831, "loss": "2.162", "ppl": "4.47", "wps": "364320", "ups": "3.1", "wpb": "117633", "bsz": "256", "num_updates": "506000", "lr": "0.00049899", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "165175"} +[2022-08-02 23:22:07,329][train_inner][INFO] - {"epoch": 10, "update": 9.835, "loss": "2.161", "ppl": "4.47", "wps": "363497", "ups": "3.08", "wpb": "118065", "bsz": "256", "num_updates": "506200", "lr": "0.000498788", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "165240"} +[2022-08-02 23:23:11,775][train_inner][INFO] - {"epoch": 10, "update": 9.839, "loss": "2.162", "ppl": "4.48", "wps": "367480", "ups": "3.1", "wpb": "118412", "bsz": "256", "num_updates": "506400", "lr": "0.000498586", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "165304"} +[2022-08-02 23:24:16,632][train_inner][INFO] - {"epoch": 10, "update": 9.843, "loss": "2.16", "ppl": "4.47", "wps": "363886", "ups": "3.08", "wpb": "118000", "bsz": "256", "num_updates": "506600", "lr": "0.000498384", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "165369"} +[2022-08-02 23:25:21,474][train_inner][INFO] - {"epoch": 10, "update": 9.847, "loss": "2.156", "ppl": "4.46", "wps": "366791", "ups": "3.08", "wpb": "118916", "bsz": "256", "num_updates": "506800", "lr": "0.000498182", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "165434"} +[2022-08-02 23:26:26,221][train_inner][INFO] - {"epoch": 10, "update": 9.851, "loss": "2.161", "ppl": "4.47", "wps": "367382", "ups": "3.09", "wpb": "118932", "bsz": "256", "num_updates": "507000", "lr": "0.00049798", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "165499"} +[2022-08-02 23:27:31,017][train_inner][INFO] - {"epoch": 10, "update": 9.855, "loss": "2.154", "ppl": "4.45", "wps": "366796", "ups": "3.09", "wpb": "118832", "bsz": "256", "num_updates": "507200", "lr": "0.000497778", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "165563"} +[2022-08-02 23:28:36,450][train_inner][INFO] - {"epoch": 10, "update": 9.859, "loss": "2.157", "ppl": "4.46", "wps": "359971", "ups": "3.06", "wpb": "117769", "bsz": "256", "num_updates": "507400", "lr": "0.000497576", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "165629"} +[2022-08-02 23:29:41,406][train_inner][INFO] - {"epoch": 10, "update": 9.862, "loss": "2.159", "ppl": "4.47", "wps": "363973", "ups": "3.08", "wpb": "118209", "bsz": "256", "num_updates": "507600", "lr": "0.000497374", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "165694"} +[2022-08-02 23:30:46,544][train_inner][INFO] - {"epoch": 10, "update": 9.866, "loss": "2.158", "ppl": "4.46", "wps": "362230", "ups": "3.07", "wpb": "117972", "bsz": "256", "num_updates": "507800", "lr": "0.000497172", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "165759"} +[2022-08-02 23:31:51,418][train_inner][INFO] - {"epoch": 10, "update": 9.87, "loss": "2.157", "ppl": "4.46", "wps": "365634", "ups": "3.08", "wpb": "118600", "bsz": "256", "num_updates": "508000", "lr": "0.00049697", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "165824"} +[2022-08-02 23:32:56,793][train_inner][INFO] - {"epoch": 10, "update": 9.874, "loss": "2.16", "ppl": "4.47", "wps": "361583", "ups": "3.06", "wpb": "118191", "bsz": "256", "num_updates": "508200", "lr": "0.000496768", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "165889"} +[2022-08-02 23:34:01,773][train_inner][INFO] - {"epoch": 10, "update": 9.878, "loss": "2.16", "ppl": "4.47", "wps": "364505", "ups": "3.08", "wpb": "118425", "bsz": "256", "num_updates": "508400", "lr": "0.000496566", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.8", "wall": "165954"} +[2022-08-02 23:35:07,758][train_inner][INFO] - {"epoch": 10, "update": 9.882, "loss": "2.154", "ppl": "4.45", "wps": "358576", "ups": "3.03", "wpb": "118303", "bsz": "256", "num_updates": "508600", "lr": "0.000496364", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.6", "wall": "166020"} +[2022-08-02 23:36:12,840][train_inner][INFO] - {"epoch": 10, "update": 9.886, "loss": "2.157", "ppl": "4.46", "wps": "365225", "ups": "3.07", "wpb": "118846", "bsz": "256", "num_updates": "508800", "lr": "0.000496162", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "166085"} +[2022-08-02 23:37:17,549][train_inner][INFO] - {"epoch": 10, "update": 9.89, "loss": "2.155", "ppl": "4.45", "wps": "365570", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "509000", "lr": "0.00049596", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "166150"} +[2022-08-02 23:37:41,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-02 23:38:22,662][train_inner][INFO] - {"epoch": 10, "update": 9.894, "loss": "2.154", "ppl": "4.45", "wps": "362682", "ups": "3.07", "wpb": "118074", "bsz": "256", "num_updates": "509200", "lr": "0.000495758", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "166215"} +[2022-08-02 23:39:27,634][train_inner][INFO] - {"epoch": 10, "update": 9.897, "loss": "2.163", "ppl": "4.48", "wps": "362812", "ups": "3.08", "wpb": "117862", "bsz": "256", "num_updates": "509400", "lr": "0.000495556", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "166280"} +[2022-08-02 23:40:32,583][train_inner][INFO] - {"epoch": 10, "update": 9.901, "loss": "2.153", "ppl": "4.45", "wps": "365786", "ups": "3.08", "wpb": "118786", "bsz": "256", "num_updates": "509600", "lr": "0.000495354", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "166345"} +[2022-08-02 23:41:37,586][train_inner][INFO] - {"epoch": 10, "update": 9.905, "loss": "2.155", "ppl": "4.45", "wps": "364064", "ups": "3.08", "wpb": "118325", "bsz": "256", "num_updates": "509800", "lr": "0.000495152", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "166410"} +[2022-08-02 23:42:42,436][train_inner][INFO] - {"epoch": 10, "update": 9.909, "loss": "2.153", "ppl": "4.45", "wps": "364700", "ups": "3.08", "wpb": "118253", "bsz": "256", "num_updates": "510000", "lr": "0.000494949", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "166475"} +[2022-08-02 23:43:46,916][train_inner][INFO] - {"epoch": 10, "update": 9.913, "loss": "2.165", "ppl": "4.49", "wps": "366137", "ups": "3.1", "wpb": "118040", "bsz": "256", "num_updates": "510200", "lr": "0.000494747", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "166539"} +[2022-08-02 23:44:52,590][train_inner][INFO] - {"epoch": 10, "update": 9.917, "loss": "2.156", "ppl": "4.46", "wps": "359443", "ups": "3.05", "wpb": "118029", "bsz": "256", "num_updates": "510400", "lr": "0.000494545", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "166605"} +[2022-08-02 23:45:57,503][train_inner][INFO] - {"epoch": 10, "update": 9.921, "loss": "2.156", "ppl": "4.46", "wps": "365012", "ups": "3.08", "wpb": "118468", "bsz": "256", "num_updates": "510600", "lr": "0.000494343", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "166670"} +[2022-08-02 23:47:02,326][train_inner][INFO] - {"epoch": 10, "update": 9.925, "loss": "2.158", "ppl": "4.46", "wps": "365450", "ups": "3.09", "wpb": "118447", "bsz": "256", "num_updates": "510800", "lr": "0.000494141", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "166735"} +[2022-08-02 23:48:07,375][train_inner][INFO] - {"epoch": 10, "update": 9.929, "loss": "2.151", "ppl": "4.44", "wps": "365756", "ups": "3.07", "wpb": "118959", "bsz": "256", "num_updates": "511000", "lr": "0.000493939", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "166800"} +[2022-08-02 23:49:11,888][train_inner][INFO] - {"epoch": 10, "update": 9.932, "loss": "2.159", "ppl": "4.46", "wps": "362978", "ups": "3.1", "wpb": "117082", "bsz": "256", "num_updates": "511200", "lr": "0.000493737", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "166864"} +[2022-08-02 23:50:17,181][train_inner][INFO] - {"epoch": 10, "update": 9.936, "loss": "2.151", "ppl": "4.44", "wps": "362556", "ups": "3.06", "wpb": "118360", "bsz": "256", "num_updates": "511400", "lr": "0.000493535", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "166930"} +[2022-08-02 23:51:21,734][train_inner][INFO] - {"epoch": 10, "update": 9.94, "loss": "2.157", "ppl": "4.46", "wps": "365483", "ups": "3.1", "wpb": "117964", "bsz": "256", "num_updates": "511600", "lr": "0.000493333", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "166994"} +[2022-08-02 23:52:26,560][train_inner][INFO] - {"epoch": 10, "update": 9.944, "loss": "2.155", "ppl": "4.45", "wps": "364692", "ups": "3.09", "wpb": "118205", "bsz": "256", "num_updates": "511800", "lr": "0.000493131", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "167059"} +[2022-08-02 23:53:31,222][train_inner][INFO] - {"epoch": 10, "update": 9.948, "loss": "2.156", "ppl": "4.46", "wps": "365962", "ups": "3.09", "wpb": "118318", "bsz": "256", "num_updates": "512000", "lr": "0.000492929", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "167124"} +[2022-08-02 23:54:36,026][train_inner][INFO] - {"epoch": 10, "update": 9.952, "loss": "2.151", "ppl": "4.44", "wps": "366499", "ups": "3.09", "wpb": "118751", "bsz": "256", "num_updates": "512200", "lr": "0.000492727", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "167188"} +[2022-08-02 23:55:40,873][train_inner][INFO] - {"epoch": 10, "update": 9.956, "loss": "2.152", "ppl": "4.44", "wps": "367068", "ups": "3.08", "wpb": "119015", "bsz": "256", "num_updates": "512400", "lr": "0.000492525", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.6", "wall": "167253"} +[2022-08-02 23:56:45,461][train_inner][INFO] - {"epoch": 10, "update": 9.96, "loss": "2.153", "ppl": "4.45", "wps": "366393", "ups": "3.1", "wpb": "118321", "bsz": "256", "num_updates": "512600", "lr": "0.000492323", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "167318"} +[2022-08-02 23:57:50,638][train_inner][INFO] - {"epoch": 10, "update": 9.963, "loss": "2.156", "ppl": "4.46", "wps": "361636", "ups": "3.07", "wpb": "117849", "bsz": "256", "num_updates": "512800", "lr": "0.000492121", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "167383"} +[2022-08-02 23:58:55,585][train_inner][INFO] - {"epoch": 10, "update": 9.967, "loss": "2.152", "ppl": "4.45", "wps": "365720", "ups": "3.08", "wpb": "118761", "bsz": "256", "num_updates": "513000", "lr": "0.000491919", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "167448"} +[2022-08-03 00:00:01,278][train_inner][INFO] - {"epoch": 10, "update": 9.971, "loss": "2.158", "ppl": "4.46", "wps": "359843", "ups": "3.04", "wpb": "118194", "bsz": "256", "num_updates": "513200", "lr": "0.000491717", "gnorm": "0.68", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21", "wall": "167514"} +[2022-08-03 00:01:06,328][train_inner][INFO] - {"epoch": 10, "update": 9.975, "loss": "2.154", "ppl": "4.45", "wps": "363693", "ups": "3.07", "wpb": "118284", "bsz": "256", "num_updates": "513400", "lr": "0.000491515", "gnorm": "0.682", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.9", "wall": "167579"} +[2022-08-03 00:01:07,288][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 00:01:09,450][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 00:01:48,095][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 00:02:12,080][train_inner][INFO] - {"epoch": 10, "update": 9.979, "loss": "2.154", "ppl": "4.45", "wps": "360521", "ups": "3.04", "wpb": "118524", "bsz": "256", "num_updates": "513600", "lr": "0.000491313", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "167644"} +[2022-08-03 00:03:17,523][train_inner][INFO] - {"epoch": 10, "update": 9.983, "loss": "2.153", "ppl": "4.45", "wps": "364930", "ups": "3.06", "wpb": "119410", "bsz": "256", "num_updates": "513800", "lr": "0.000491111", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "167710"} +[2022-08-03 00:04:21,936][train_inner][INFO] - {"epoch": 10, "update": 9.987, "loss": "2.158", "ppl": "4.46", "wps": "366051", "ups": "3.1", "wpb": "117891", "bsz": "256", "num_updates": "514000", "lr": "0.000490909", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "167774"} +[2022-08-03 00:05:26,753][train_inner][INFO] - {"epoch": 10, "update": 9.991, "loss": "2.159", "ppl": "4.47", "wps": "366302", "ups": "3.09", "wpb": "118711", "bsz": "256", "num_updates": "514200", "lr": "0.000490707", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "167839"} +[2022-08-03 00:06:31,818][train_inner][INFO] - {"epoch": 10, "update": 9.995, "loss": "2.16", "ppl": "4.47", "wps": "362003", "ups": "3.07", "wpb": "117767", "bsz": "256", "num_updates": "514400", "lr": "0.000490505", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.1", "wall": "167904"} +[2022-08-03 00:07:37,798][train_inner][INFO] - {"epoch": 10, "update": 9.998, "loss": "2.152", "ppl": "4.44", "wps": "359162", "ups": "3.03", "wpb": "118487", "bsz": "256", "num_updates": "514600", "lr": "0.000490303", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "24.4", "wall": "167970"} +[2022-08-03 00:08:03,090][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-03 00:08:25,701][valid][INFO] - {"epoch": 10, "valid_loss": "2.062", "valid_ppl": "4.17", "valid_wps": "1.56508e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "514678", "valid_best_loss": "2.062"} +[2022-08-03 00:08:25,704][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 10 @ 514678 updates +[2022-08-03 00:08:25,705][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt +[2022-08-03 00:08:35,643][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt +[2022-08-03 00:08:54,905][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_best.pt (epoch 10 @ 514678 updates, score 2.062) (writing took 29.200979253277183 seconds) +[2022-08-03 00:08:54,905][fairseq_cli.train][INFO] - end of epoch 10 (average epoch stats below) +[2022-08-03 00:08:54,906][train][INFO] - {"epoch": 10, "train_loss": "2.168", "train_ppl": "4.5", "train_wps": "361526", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "514678", "train_lr": "0.000490224", "train_gnorm": "0.677", "train_clip": "0", "train_loss_scale": "4", "train_train_wall": "16626", "train_gb_free": "21.1", "train_wall": "168047"} +[2022-08-03 00:08:55,017][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-03 00:08:55,020][fairseq.trainer][INFO] - begin training epoch 11 +[2022-08-03 00:08:55,020][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-03 00:09:44,475][train_inner][INFO] - {"epoch": 11, "update": 10.002, "loss": "2.162", "ppl": "4.48", "wps": "184754", "ups": "1.58", "wpb": "117019", "bsz": "255.4", "num_updates": "514800", "lr": "0.000490101", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "168097"} +[2022-08-03 00:10:49,248][train_inner][INFO] - {"epoch": 11, "update": 10.006, "loss": "2.149", "ppl": "4.43", "wps": "364492", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "515000", "lr": "0.000489899", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.6", "wall": "168162"} +[2022-08-03 00:11:54,855][train_inner][INFO] - {"epoch": 11, "update": 10.01, "loss": "2.152", "ppl": "4.45", "wps": "360129", "ups": "3.05", "wpb": "118132", "bsz": "256", "num_updates": "515200", "lr": "0.000489697", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "168227"} +[2022-08-03 00:12:59,437][train_inner][INFO] - {"epoch": 11, "update": 10.014, "loss": "2.152", "ppl": "4.45", "wps": "364796", "ups": "3.1", "wpb": "117794", "bsz": "256", "num_updates": "515400", "lr": "0.000489495", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.6", "wall": "168292"} +[2022-08-03 00:14:04,425][train_inner][INFO] - {"epoch": 11, "update": 10.018, "loss": "2.155", "ppl": "4.45", "wps": "363961", "ups": "3.08", "wpb": "118265", "bsz": "256", "num_updates": "515600", "lr": "0.000489293", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "168357"} +[2022-08-03 00:15:09,076][train_inner][INFO] - {"epoch": 11, "update": 10.022, "loss": "2.152", "ppl": "4.44", "wps": "365150", "ups": "3.09", "wpb": "118034", "bsz": "256", "num_updates": "515800", "lr": "0.000489091", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "168421"} +[2022-08-03 00:16:14,096][train_inner][INFO] - {"epoch": 11, "update": 10.026, "loss": "2.152", "ppl": "4.44", "wps": "361982", "ups": "3.08", "wpb": "117679", "bsz": "256", "num_updates": "516000", "lr": "0.000488889", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "168486"} +[2022-08-03 00:17:18,646][train_inner][INFO] - {"epoch": 11, "update": 10.03, "loss": "2.162", "ppl": "4.47", "wps": "365505", "ups": "3.1", "wpb": "117964", "bsz": "256", "num_updates": "516200", "lr": "0.000488687", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "168551"} +[2022-08-03 00:18:23,562][train_inner][INFO] - {"epoch": 11, "update": 10.033, "loss": "2.146", "ppl": "4.43", "wps": "364560", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "516400", "lr": "0.000488485", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "168616"} +[2022-08-03 00:19:28,377][train_inner][INFO] - {"epoch": 11, "update": 10.037, "loss": "2.15", "ppl": "4.44", "wps": "364878", "ups": "3.09", "wpb": "118246", "bsz": "256", "num_updates": "516600", "lr": "0.000488283", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "168681"} +[2022-08-03 00:20:32,973][train_inner][INFO] - {"epoch": 11, "update": 10.041, "loss": "2.15", "ppl": "4.44", "wps": "365301", "ups": "3.1", "wpb": "117984", "bsz": "256", "num_updates": "516800", "lr": "0.000488081", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "168745"} +[2022-08-03 00:21:37,493][train_inner][INFO] - {"epoch": 11, "update": 10.045, "loss": "2.15", "ppl": "4.44", "wps": "365781", "ups": "3.1", "wpb": "117998", "bsz": "256", "num_updates": "517000", "lr": "0.000487879", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "168810"} +[2022-08-03 00:22:42,506][train_inner][INFO] - {"epoch": 11, "update": 10.049, "loss": "2.15", "ppl": "4.44", "wps": "365526", "ups": "3.08", "wpb": "118818", "bsz": "256", "num_updates": "517200", "lr": "0.000487677", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "168875"} +[2022-08-03 00:23:22,770][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 00:23:47,714][train_inner][INFO] - {"epoch": 11, "update": 10.053, "loss": "2.157", "ppl": "4.46", "wps": "363452", "ups": "3.07", "wpb": "118497", "bsz": "256", "num_updates": "517400", "lr": "0.000487475", "gnorm": "0.711", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "168940"} +[2022-08-03 00:24:52,703][train_inner][INFO] - {"epoch": 11, "update": 10.057, "loss": "2.151", "ppl": "4.44", "wps": "365614", "ups": "3.08", "wpb": "118804", "bsz": "256", "num_updates": "517600", "lr": "0.000487273", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "169005"} +[2022-08-03 00:25:57,586][train_inner][INFO] - {"epoch": 11, "update": 10.061, "loss": "2.15", "ppl": "4.44", "wps": "365866", "ups": "3.08", "wpb": "118690", "bsz": "256", "num_updates": "517800", "lr": "0.000487071", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "169070"} +[2022-08-03 00:27:02,544][train_inner][INFO] - {"epoch": 11, "update": 10.065, "loss": "2.15", "ppl": "4.44", "wps": "364258", "ups": "3.08", "wpb": "118307", "bsz": "256", "num_updates": "518000", "lr": "0.000486869", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.4", "wall": "169135"} +[2022-08-03 00:28:07,450][train_inner][INFO] - {"epoch": 11, "update": 10.068, "loss": "2.146", "ppl": "4.43", "wps": "364274", "ups": "3.08", "wpb": "118216", "bsz": "256", "num_updates": "518200", "lr": "0.000486667", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "169200"} +[2022-08-03 00:29:12,162][train_inner][INFO] - {"epoch": 11, "update": 10.072, "loss": "2.151", "ppl": "4.44", "wps": "365079", "ups": "3.09", "wpb": "118122", "bsz": "256", "num_updates": "518400", "lr": "0.000486465", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "169264"} +[2022-08-03 00:30:17,214][train_inner][INFO] - {"epoch": 11, "update": 10.076, "loss": "2.147", "ppl": "4.43", "wps": "364516", "ups": "3.07", "wpb": "118561", "bsz": "256", "num_updates": "518600", "lr": "0.000486263", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "169330"} +[2022-08-03 00:31:21,684][train_inner][INFO] - {"epoch": 11, "update": 10.08, "loss": "2.147", "ppl": "4.43", "wps": "366338", "ups": "3.1", "wpb": "118088", "bsz": "256", "num_updates": "518800", "lr": "0.000486061", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "169394"} +[2022-08-03 00:32:26,502][train_inner][INFO] - {"epoch": 11, "update": 10.084, "loss": "2.145", "ppl": "4.42", "wps": "365778", "ups": "3.09", "wpb": "118543", "bsz": "256", "num_updates": "519000", "lr": "0.000485859", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "169459"} +[2022-08-03 00:33:30,906][train_inner][INFO] - {"epoch": 11, "update": 10.088, "loss": "2.147", "ppl": "4.43", "wps": "367031", "ups": "3.11", "wpb": "118189", "bsz": "256", "num_updates": "519200", "lr": "0.000485657", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "169523"} +[2022-08-03 00:34:36,079][train_inner][INFO] - {"epoch": 11, "update": 10.092, "loss": "2.156", "ppl": "4.46", "wps": "361908", "ups": "3.07", "wpb": "117931", "bsz": "256", "num_updates": "519400", "lr": "0.000485455", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "169588"} +[2022-08-03 00:35:28,287][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 00:35:34,368][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 00:35:41,569][train_inner][INFO] - {"epoch": 11, "update": 10.096, "loss": "2.151", "ppl": "4.44", "wps": "362689", "ups": "3.05", "wpb": "118761", "bsz": "256", "num_updates": "519600", "lr": "0.000485253", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "169654"} +[2022-08-03 00:36:46,264][train_inner][INFO] - {"epoch": 11, "update": 10.1, "loss": "2.145", "ppl": "4.42", "wps": "364826", "ups": "3.09", "wpb": "118012", "bsz": "256", "num_updates": "519800", "lr": "0.000485051", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.7", "wall": "169719"} +[2022-08-03 00:37:51,264][train_inner][INFO] - {"epoch": 11, "update": 10.103, "loss": "2.147", "ppl": "4.43", "wps": "364445", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "520000", "lr": "0.000484848", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "169784"} +[2022-08-03 00:38:56,516][train_inner][INFO] - {"epoch": 11, "update": 10.107, "loss": "2.143", "ppl": "4.42", "wps": "364692", "ups": "3.07", "wpb": "118983", "bsz": "256", "num_updates": "520200", "lr": "0.000484646", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "169849"} +[2022-08-03 00:40:01,175][train_inner][INFO] - {"epoch": 11, "update": 10.111, "loss": "2.151", "ppl": "4.44", "wps": "366578", "ups": "3.09", "wpb": "118511", "bsz": "256", "num_updates": "520400", "lr": "0.000484444", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "169913"} +[2022-08-03 00:41:06,003][train_inner][INFO] - {"epoch": 11, "update": 10.115, "loss": "2.148", "ppl": "4.43", "wps": "363971", "ups": "3.09", "wpb": "117972", "bsz": "256", "num_updates": "520600", "lr": "0.000484242", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.7", "wall": "169978"} +[2022-08-03 00:42:10,529][train_inner][INFO] - {"epoch": 11, "update": 10.119, "loss": "2.148", "ppl": "4.43", "wps": "367364", "ups": "3.1", "wpb": "118521", "bsz": "256", "num_updates": "520800", "lr": "0.00048404", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "170043"} +[2022-08-03 00:43:15,062][train_inner][INFO] - {"epoch": 11, "update": 10.123, "loss": "2.146", "ppl": "4.43", "wps": "365650", "ups": "3.1", "wpb": "117980", "bsz": "256", "num_updates": "521000", "lr": "0.000483838", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.3", "wall": "170107"} +[2022-08-03 00:44:19,687][train_inner][INFO] - {"epoch": 11, "update": 10.127, "loss": "2.15", "ppl": "4.44", "wps": "365542", "ups": "3.09", "wpb": "118115", "bsz": "256", "num_updates": "521200", "lr": "0.000483636", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "170172"} +[2022-08-03 00:45:24,812][train_inner][INFO] - {"epoch": 11, "update": 10.131, "loss": "2.145", "ppl": "4.42", "wps": "364771", "ups": "3.07", "wpb": "118776", "bsz": "256", "num_updates": "521400", "lr": "0.000483434", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "170237"} +[2022-08-03 00:46:30,406][train_inner][INFO] - {"epoch": 11, "update": 10.134, "loss": "2.151", "ppl": "4.44", "wps": "361209", "ups": "3.05", "wpb": "118466", "bsz": "256", "num_updates": "521600", "lr": "0.000483232", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.6", "wall": "170303"} +[2022-08-03 00:47:34,874][train_inner][INFO] - {"epoch": 11, "update": 10.138, "loss": "2.15", "ppl": "4.44", "wps": "367743", "ups": "3.1", "wpb": "118536", "bsz": "256", "num_updates": "521800", "lr": "0.00048303", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.4", "wall": "170367"} +[2022-08-03 00:48:40,064][train_inner][INFO] - {"epoch": 11, "update": 10.142, "loss": "2.146", "ppl": "4.43", "wps": "363122", "ups": "3.07", "wpb": "118357", "bsz": "256", "num_updates": "522000", "lr": "0.000482828", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "170432"} +[2022-08-03 00:49:44,970][train_inner][INFO] - {"epoch": 11, "update": 10.146, "loss": "2.145", "ppl": "4.42", "wps": "364076", "ups": "3.08", "wpb": "118151", "bsz": "256", "num_updates": "522200", "lr": "0.000482626", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "170497"} +[2022-08-03 00:50:50,017][train_inner][INFO] - {"epoch": 11, "update": 10.15, "loss": "2.149", "ppl": "4.43", "wps": "363945", "ups": "3.07", "wpb": "118366", "bsz": "256", "num_updates": "522400", "lr": "0.000482424", "gnorm": "0.708", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "170562"} +[2022-08-03 00:51:54,624][train_inner][INFO] - {"epoch": 11, "update": 10.154, "loss": "2.142", "ppl": "4.42", "wps": "366753", "ups": "3.1", "wpb": "118473", "bsz": "256", "num_updates": "522600", "lr": "0.000482222", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "170627"} +[2022-08-03 00:52:59,591][train_inner][INFO] - {"epoch": 11, "update": 10.158, "loss": "2.147", "ppl": "4.43", "wps": "365866", "ups": "3.08", "wpb": "118845", "bsz": "256", "num_updates": "522800", "lr": "0.00048202", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "170692"} +[2022-08-03 00:54:04,600][train_inner][INFO] - {"epoch": 11, "update": 10.162, "loss": "2.145", "ppl": "4.42", "wps": "363913", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "523000", "lr": "0.000481818", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "170757"} +[2022-08-03 00:55:09,406][train_inner][INFO] - {"epoch": 11, "update": 10.166, "loss": "2.146", "ppl": "4.42", "wps": "367513", "ups": "3.09", "wpb": "119082", "bsz": "256", "num_updates": "523200", "lr": "0.000481616", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "170822"} +[2022-08-03 00:56:07,422][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 00:56:14,997][train_inner][INFO] - {"epoch": 11, "update": 10.169, "loss": "2.142", "ppl": "4.41", "wps": "362657", "ups": "3.05", "wpb": "118934", "bsz": "256", "num_updates": "523400", "lr": "0.000481414", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.3", "wall": "170887"} +[2022-08-03 00:57:19,696][train_inner][INFO] - {"epoch": 11, "update": 10.173, "loss": "2.145", "ppl": "4.42", "wps": "364402", "ups": "3.09", "wpb": "117881", "bsz": "256", "num_updates": "523600", "lr": "0.000481212", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "170952"} +[2022-08-03 00:58:24,773][train_inner][INFO] - {"epoch": 11, "update": 10.177, "loss": "2.138", "ppl": "4.4", "wps": "366944", "ups": "3.07", "wpb": "119396", "bsz": "256", "num_updates": "523800", "lr": "0.00048101", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "171017"} +[2022-08-03 00:59:29,665][train_inner][INFO] - {"epoch": 11, "update": 10.181, "loss": "2.147", "ppl": "4.43", "wps": "364262", "ups": "3.08", "wpb": "118187", "bsz": "256", "num_updates": "524000", "lr": "0.000480808", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.1", "wall": "171082"} +[2022-08-03 01:00:34,291][train_inner][INFO] - {"epoch": 11, "update": 10.185, "loss": "2.143", "ppl": "4.42", "wps": "367343", "ups": "3.09", "wpb": "118698", "bsz": "256", "num_updates": "524200", "lr": "0.000480606", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "171147"} +[2022-08-03 01:01:39,199][train_inner][INFO] - {"epoch": 11, "update": 10.189, "loss": "2.152", "ppl": "4.44", "wps": "363052", "ups": "3.08", "wpb": "117823", "bsz": "256", "num_updates": "524400", "lr": "0.000480404", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "171212"} +[2022-08-03 01:02:44,043][train_inner][INFO] - {"epoch": 11, "update": 10.193, "loss": "2.15", "ppl": "4.44", "wps": "364416", "ups": "3.08", "wpb": "118149", "bsz": "256", "num_updates": "524600", "lr": "0.000480202", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "171276"} +[2022-08-03 01:03:48,497][train_inner][INFO] - {"epoch": 11, "update": 10.197, "loss": "2.154", "ppl": "4.45", "wps": "365371", "ups": "3.1", "wpb": "117747", "bsz": "256", "num_updates": "524800", "lr": "0.00048", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.2", "wall": "171341"} +[2022-08-03 01:04:53,611][train_inner][INFO] - {"epoch": 11, "update": 10.201, "loss": "2.148", "ppl": "4.43", "wps": "363007", "ups": "3.07", "wpb": "118184", "bsz": "256", "num_updates": "525000", "lr": "0.000479798", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "171406"} +[2022-08-03 01:05:58,307][train_inner][INFO] - {"epoch": 11, "update": 10.204, "loss": "2.149", "ppl": "4.43", "wps": "367648", "ups": "3.09", "wpb": "118924", "bsz": "256", "num_updates": "525200", "lr": "0.000479596", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "171471"} +[2022-08-03 01:07:02,957][train_inner][INFO] - {"epoch": 11, "update": 10.208, "loss": "2.141", "ppl": "4.41", "wps": "366176", "ups": "3.09", "wpb": "118365", "bsz": "256", "num_updates": "525400", "lr": "0.000479394", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.1", "wall": "171535"} +[2022-08-03 01:08:07,407][train_inner][INFO] - {"epoch": 11, "update": 10.212, "loss": "2.142", "ppl": "4.41", "wps": "367042", "ups": "3.1", "wpb": "118277", "bsz": "256", "num_updates": "525600", "lr": "0.000479192", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "171600"} +[2022-08-03 01:09:12,245][train_inner][INFO] - {"epoch": 11, "update": 10.216, "loss": "2.138", "ppl": "4.4", "wps": "366118", "ups": "3.08", "wpb": "118690", "bsz": "256", "num_updates": "525800", "lr": "0.00047899", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "171665"} +[2022-08-03 01:10:17,198][train_inner][INFO] - {"epoch": 11, "update": 10.22, "loss": "2.145", "ppl": "4.42", "wps": "366844", "ups": "3.08", "wpb": "119135", "bsz": "256", "num_updates": "526000", "lr": "0.000478788", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.9", "wall": "171730"} +[2022-08-03 01:11:21,929][train_inner][INFO] - {"epoch": 11, "update": 10.224, "loss": "2.147", "ppl": "4.43", "wps": "366006", "ups": "3.09", "wpb": "118458", "bsz": "256", "num_updates": "526200", "lr": "0.000478586", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "171794"} +[2022-08-03 01:12:26,757][train_inner][INFO] - {"epoch": 11, "update": 10.228, "loss": "2.151", "ppl": "4.44", "wps": "365450", "ups": "3.09", "wpb": "118454", "bsz": "256", "num_updates": "526400", "lr": "0.000478384", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "171859"} +[2022-08-03 01:13:31,971][train_inner][INFO] - {"epoch": 11, "update": 10.232, "loss": "2.151", "ppl": "4.44", "wps": "362386", "ups": "3.07", "wpb": "118163", "bsz": "256", "num_updates": "526600", "lr": "0.000478182", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "171924"} +[2022-08-03 01:14:36,535][train_inner][INFO] - {"epoch": 11, "update": 10.236, "loss": "2.152", "ppl": "4.45", "wps": "363576", "ups": "3.1", "wpb": "117367", "bsz": "256", "num_updates": "526800", "lr": "0.00047798", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "171989"} +[2022-08-03 01:15:42,089][train_inner][INFO] - {"epoch": 11, "update": 10.239, "loss": "2.143", "ppl": "4.42", "wps": "360675", "ups": "3.05", "wpb": "118216", "bsz": "256", "num_updates": "527000", "lr": "0.000477778", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.2", "wall": "172054"} +[2022-08-03 01:16:46,973][train_inner][INFO] - {"epoch": 11, "update": 10.243, "loss": "2.147", "ppl": "4.43", "wps": "365363", "ups": "3.08", "wpb": "118530", "bsz": "256", "num_updates": "527200", "lr": "0.000477576", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "172119"} +[2022-08-03 01:17:51,598][train_inner][INFO] - {"epoch": 11, "update": 10.247, "loss": "2.148", "ppl": "4.43", "wps": "365171", "ups": "3.09", "wpb": "117993", "bsz": "256", "num_updates": "527400", "lr": "0.000477374", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "172184"} +[2022-08-03 01:18:56,426][train_inner][INFO] - {"epoch": 11, "update": 10.251, "loss": "2.138", "ppl": "4.4", "wps": "365819", "ups": "3.09", "wpb": "118575", "bsz": "256", "num_updates": "527600", "lr": "0.000477172", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "172249"} +[2022-08-03 01:19:54,326][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 01:19:56,260][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 01:20:02,012][train_inner][INFO] - {"epoch": 11, "update": 10.255, "loss": "2.142", "ppl": "4.41", "wps": "362311", "ups": "3.05", "wpb": "118810", "bsz": "256", "num_updates": "527800", "lr": "0.00047697", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "172314"} +[2022-08-03 01:21:06,887][train_inner][INFO] - {"epoch": 11, "update": 10.259, "loss": "2.141", "ppl": "4.41", "wps": "365881", "ups": "3.08", "wpb": "118683", "bsz": "256", "num_updates": "528000", "lr": "0.000476768", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "172379"} +[2022-08-03 01:22:11,519][train_inner][INFO] - {"epoch": 11, "update": 10.263, "loss": "2.145", "ppl": "4.42", "wps": "366023", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "528200", "lr": "0.000476566", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.2", "wall": "172444"} +[2022-08-03 01:23:16,111][train_inner][INFO] - {"epoch": 11, "update": 10.267, "loss": "2.14", "ppl": "4.41", "wps": "367503", "ups": "3.1", "wpb": "118686", "bsz": "256", "num_updates": "528400", "lr": "0.000476364", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "172508"} +[2022-08-03 01:24:20,962][train_inner][INFO] - {"epoch": 11, "update": 10.271, "loss": "2.152", "ppl": "4.44", "wps": "364164", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "528600", "lr": "0.000476162", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.8", "wall": "172573"} +[2022-08-03 01:25:25,815][train_inner][INFO] - {"epoch": 11, "update": 10.274, "loss": "2.146", "ppl": "4.43", "wps": "364359", "ups": "3.08", "wpb": "118145", "bsz": "256", "num_updates": "528800", "lr": "0.00047596", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "28.2", "wall": "172638"} +[2022-08-03 01:26:30,472][train_inner][INFO] - {"epoch": 11, "update": 10.278, "loss": "2.141", "ppl": "4.41", "wps": "365161", "ups": "3.09", "wpb": "118050", "bsz": "256", "num_updates": "529000", "lr": "0.000475758", "gnorm": "0.688", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "172703"} +[2022-08-03 01:27:35,310][train_inner][INFO] - {"epoch": 11, "update": 10.282, "loss": "2.14", "ppl": "4.41", "wps": "365134", "ups": "3.08", "wpb": "118371", "bsz": "256", "num_updates": "529200", "lr": "0.000475556", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "172768"} +[2022-08-03 01:28:12,183][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-03 01:28:40,454][train_inner][INFO] - {"epoch": 11, "update": 10.286, "loss": "2.143", "ppl": "4.42", "wps": "363070", "ups": "3.07", "wpb": "118257", "bsz": "256", "num_updates": "529400", "lr": "0.000475354", "gnorm": "0.688", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "172833"} +[2022-08-03 01:29:45,332][train_inner][INFO] - {"epoch": 11, "update": 10.29, "loss": "2.144", "ppl": "4.42", "wps": "364447", "ups": "3.08", "wpb": "118223", "bsz": "256", "num_updates": "529600", "lr": "0.000475152", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.3", "wall": "172898"} +[2022-08-03 01:30:27,178][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 +[2022-08-03 01:30:50,682][train_inner][INFO] - {"epoch": 11, "update": 10.294, "loss": "2.148", "ppl": "4.43", "wps": "361406", "ups": "3.06", "wpb": "118088", "bsz": "256", "num_updates": "529800", "lr": "0.000474949", "gnorm": "0.698", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.8", "wall": "172963"} +[2022-08-03 01:31:55,431][train_inner][INFO] - {"epoch": 11, "update": 10.298, "loss": "2.141", "ppl": "4.41", "wps": "365812", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "530000", "lr": "0.000474747", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "173028"} +[2022-08-03 01:33:00,194][train_inner][INFO] - {"epoch": 11, "update": 10.302, "loss": "2.138", "ppl": "4.4", "wps": "367096", "ups": "3.09", "wpb": "118869", "bsz": "256", "num_updates": "530200", "lr": "0.000474545", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.7", "wall": "173093"} +[2022-08-03 01:34:05,227][train_inner][INFO] - {"epoch": 11, "update": 10.306, "loss": "2.139", "ppl": "4.4", "wps": "363275", "ups": "3.08", "wpb": "118123", "bsz": "256", "num_updates": "530400", "lr": "0.000474343", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "173158"} +[2022-08-03 01:35:10,198][train_inner][INFO] - {"epoch": 11, "update": 10.309, "loss": "2.152", "ppl": "4.45", "wps": "361448", "ups": "3.08", "wpb": "117417", "bsz": "256", "num_updates": "530600", "lr": "0.000474141", "gnorm": "0.77", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.7", "wall": "173223"} +[2022-08-03 01:36:14,934][train_inner][INFO] - {"epoch": 11, "update": 10.313, "loss": "2.136", "ppl": "4.4", "wps": "367054", "ups": "3.09", "wpb": "118807", "bsz": "256", "num_updates": "530800", "lr": "0.000473939", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.5", "wall": "173287"} +[2022-08-03 01:37:19,866][train_inner][INFO] - {"epoch": 11, "update": 10.317, "loss": "2.146", "ppl": "4.42", "wps": "363155", "ups": "3.08", "wpb": "117900", "bsz": "256", "num_updates": "531000", "lr": "0.000473737", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.3", "wall": "173352"} +[2022-08-03 01:38:24,839][train_inner][INFO] - {"epoch": 11, "update": 10.321, "loss": "2.143", "ppl": "4.42", "wps": "364787", "ups": "3.08", "wpb": "118504", "bsz": "256", "num_updates": "531200", "lr": "0.000473535", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.3", "wall": "173417"} +[2022-08-03 01:39:29,835][train_inner][INFO] - {"epoch": 11, "update": 10.325, "loss": "2.142", "ppl": "4.41", "wps": "362911", "ups": "3.08", "wpb": "117937", "bsz": "256", "num_updates": "531400", "lr": "0.000473333", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "173482"} +[2022-08-03 01:40:34,716][train_inner][INFO] - {"epoch": 11, "update": 10.329, "loss": "2.146", "ppl": "4.42", "wps": "364124", "ups": "3.08", "wpb": "118121", "bsz": "256", "num_updates": "531600", "lr": "0.000473131", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.4", "wall": "173547"} +[2022-08-03 01:41:39,436][train_inner][INFO] - {"epoch": 11, "update": 10.333, "loss": "2.147", "ppl": "4.43", "wps": "364988", "ups": "3.09", "wpb": "118109", "bsz": "256", "num_updates": "531800", "lr": "0.000472929", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.4", "wall": "173612"} +[2022-08-03 01:42:44,349][train_inner][INFO] - {"epoch": 11, "update": 10.337, "loss": "2.14", "ppl": "4.41", "wps": "365977", "ups": "3.08", "wpb": "118781", "bsz": "256", "num_updates": "532000", "lr": "0.000472727", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "173677"} +[2022-08-03 01:43:49,255][train_inner][INFO] - {"epoch": 11, "update": 10.34, "loss": "2.139", "ppl": "4.4", "wps": "365127", "ups": "3.08", "wpb": "118492", "bsz": "256", "num_updates": "532200", "lr": "0.000472525", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "173742"} +[2022-08-03 01:44:53,844][train_inner][INFO] - {"epoch": 11, "update": 10.344, "loss": "2.142", "ppl": "4.41", "wps": "365844", "ups": "3.1", "wpb": "118147", "bsz": "256", "num_updates": "532400", "lr": "0.000472323", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "173806"} +[2022-08-03 01:45:59,400][train_inner][INFO] - {"epoch": 11, "update": 10.348, "loss": "2.146", "ppl": "4.42", "wps": "360825", "ups": "3.05", "wpb": "118269", "bsz": "256", "num_updates": "532600", "lr": "0.000472121", "gnorm": "0.683", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "173872"} +[2022-08-03 01:47:04,069][train_inner][INFO] - {"epoch": 11, "update": 10.352, "loss": "2.145", "ppl": "4.42", "wps": "365000", "ups": "3.09", "wpb": "118018", "bsz": "256", "num_updates": "532800", "lr": "0.000471919", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.1", "wall": "173936"} +[2022-08-03 01:48:09,055][train_inner][INFO] - {"epoch": 11, "update": 10.356, "loss": "2.143", "ppl": "4.42", "wps": "364500", "ups": "3.08", "wpb": "118436", "bsz": "256", "num_updates": "533000", "lr": "0.000471717", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "174001"} +[2022-08-03 01:49:13,791][train_inner][INFO] - {"epoch": 11, "update": 10.36, "loss": "2.139", "ppl": "4.4", "wps": "368031", "ups": "3.09", "wpb": "119121", "bsz": "256", "num_updates": "533200", "lr": "0.000471515", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.6", "wall": "174066"} +[2022-08-03 01:50:18,639][train_inner][INFO] - {"epoch": 11, "update": 10.364, "loss": "2.137", "ppl": "4.4", "wps": "367418", "ups": "3.08", "wpb": "119130", "bsz": "256", "num_updates": "533400", "lr": "0.000471313", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "174131"} +[2022-08-03 01:51:23,319][train_inner][INFO] - {"epoch": 11, "update": 10.368, "loss": "2.141", "ppl": "4.41", "wps": "365485", "ups": "3.09", "wpb": "118196", "bsz": "256", "num_updates": "533600", "lr": "0.000471111", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "174196"} +[2022-08-03 01:52:28,036][train_inner][INFO] - {"epoch": 11, "update": 10.372, "loss": "2.144", "ppl": "4.42", "wps": "366244", "ups": "3.09", "wpb": "118510", "bsz": "256", "num_updates": "533800", "lr": "0.000470909", "gnorm": "0.699", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.6", "wall": "174260"} +[2022-08-03 01:53:32,607][train_inner][INFO] - {"epoch": 11, "update": 10.375, "loss": "2.151", "ppl": "4.44", "wps": "365716", "ups": "3.1", "wpb": "118070", "bsz": "256", "num_updates": "534000", "lr": "0.000470707", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "174325"} +[2022-08-03 01:54:37,245][train_inner][INFO] - {"epoch": 11, "update": 10.379, "loss": "2.142", "ppl": "4.41", "wps": "364807", "ups": "3.09", "wpb": "117902", "bsz": "256", "num_updates": "534200", "lr": "0.000470505", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "174390"} +[2022-08-03 01:55:42,178][train_inner][INFO] - {"epoch": 11, "update": 10.383, "loss": "2.139", "ppl": "4.4", "wps": "364109", "ups": "3.08", "wpb": "118211", "bsz": "256", "num_updates": "534400", "lr": "0.000470303", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.9", "wall": "174455"} +[2022-08-03 01:56:47,118][train_inner][INFO] - {"epoch": 11, "update": 10.387, "loss": "2.145", "ppl": "4.42", "wps": "364719", "ups": "3.08", "wpb": "118423", "bsz": "256", "num_updates": "534600", "lr": "0.000470101", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "174519"} +[2022-08-03 01:57:51,825][train_inner][INFO] - {"epoch": 11, "update": 10.391, "loss": "2.138", "ppl": "4.4", "wps": "363939", "ups": "3.09", "wpb": "117745", "bsz": "256", "num_updates": "534800", "lr": "0.000469899", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "174584"} +[2022-08-03 01:58:56,873][train_inner][INFO] - {"epoch": 11, "update": 10.395, "loss": "2.146", "ppl": "4.43", "wps": "362537", "ups": "3.07", "wpb": "117909", "bsz": "256", "num_updates": "535000", "lr": "0.000469697", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.7", "wall": "174649"} +[2022-08-03 02:00:01,568][train_inner][INFO] - {"epoch": 11, "update": 10.399, "loss": "2.14", "ppl": "4.41", "wps": "366068", "ups": "3.09", "wpb": "118411", "bsz": "256", "num_updates": "535200", "lr": "0.000469495", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "174714"} +[2022-08-03 02:01:06,577][train_inner][INFO] - {"epoch": 11, "update": 10.403, "loss": "2.142", "ppl": "4.41", "wps": "363080", "ups": "3.08", "wpb": "118016", "bsz": "256", "num_updates": "535400", "lr": "0.000469293", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "174779"} +[2022-08-03 02:02:11,386][train_inner][INFO] - {"epoch": 11, "update": 10.407, "loss": "2.14", "ppl": "4.41", "wps": "364956", "ups": "3.09", "wpb": "118260", "bsz": "256", "num_updates": "535600", "lr": "0.000469091", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "174844"} +[2022-08-03 02:03:16,176][train_inner][INFO] - {"epoch": 11, "update": 10.41, "loss": "2.139", "ppl": "4.41", "wps": "365909", "ups": "3.09", "wpb": "118535", "bsz": "256", "num_updates": "535800", "lr": "0.000468889", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "174908"} +[2022-08-03 02:04:20,727][train_inner][INFO] - {"epoch": 11, "update": 10.414, "loss": "2.143", "ppl": "4.42", "wps": "366796", "ups": "3.1", "wpb": "118382", "bsz": "256", "num_updates": "536000", "lr": "0.000468687", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "174973"} +[2022-08-03 02:05:25,923][train_inner][INFO] - {"epoch": 11, "update": 10.418, "loss": "2.137", "ppl": "4.4", "wps": "363140", "ups": "3.07", "wpb": "118374", "bsz": "256", "num_updates": "536200", "lr": "0.000468485", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "175038"} +[2022-08-03 02:06:30,761][train_inner][INFO] - {"epoch": 11, "update": 10.422, "loss": "2.141", "ppl": "4.41", "wps": "365922", "ups": "3.08", "wpb": "118627", "bsz": "255.9", "num_updates": "536400", "lr": "0.000468283", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "175103"} +[2022-08-03 02:07:35,419][train_inner][INFO] - {"epoch": 11, "update": 10.426, "loss": "2.142", "ppl": "4.42", "wps": "365344", "ups": "3.09", "wpb": "118109", "bsz": "256", "num_updates": "536600", "lr": "0.000468081", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "175168"} +[2022-08-03 02:08:40,379][train_inner][INFO] - {"epoch": 11, "update": 10.43, "loss": "2.144", "ppl": "4.42", "wps": "363233", "ups": "3.08", "wpb": "117977", "bsz": "256", "num_updates": "536800", "lr": "0.000467879", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.2", "wall": "175233"} +[2022-08-03 02:09:45,075][train_inner][INFO] - {"epoch": 11, "update": 10.434, "loss": "2.147", "ppl": "4.43", "wps": "364991", "ups": "3.09", "wpb": "118065", "bsz": "256", "num_updates": "537000", "lr": "0.000467677", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.7", "wall": "175297"} +[2022-08-03 02:10:50,788][train_inner][INFO] - {"epoch": 11, "update": 10.438, "loss": "2.136", "ppl": "4.4", "wps": "358635", "ups": "3.04", "wpb": "117833", "bsz": "256", "num_updates": "537200", "lr": "0.000467475", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "175363"} +[2022-08-03 02:11:55,530][train_inner][INFO] - {"epoch": 11, "update": 10.441, "loss": "2.139", "ppl": "4.4", "wps": "364180", "ups": "3.09", "wpb": "117887", "bsz": "256", "num_updates": "537400", "lr": "0.000467273", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "175428"} +[2022-08-03 02:13:00,704][train_inner][INFO] - {"epoch": 11, "update": 10.445, "loss": "2.141", "ppl": "4.41", "wps": "364348", "ups": "3.07", "wpb": "118728", "bsz": "256", "num_updates": "537600", "lr": "0.000467071", "gnorm": "0.687", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "175493"} +[2022-08-03 02:14:05,963][train_inner][INFO] - {"epoch": 11, "update": 10.449, "loss": "2.139", "ppl": "4.41", "wps": "362862", "ups": "3.06", "wpb": "118399", "bsz": "256", "num_updates": "537800", "lr": "0.000466869", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "175558"} +[2022-08-03 02:15:10,951][train_inner][INFO] - {"epoch": 11, "update": 10.453, "loss": "2.136", "ppl": "4.4", "wps": "363873", "ups": "3.08", "wpb": "118234", "bsz": "256", "num_updates": "538000", "lr": "0.000466667", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "175623"} +[2022-08-03 02:16:15,617][train_inner][INFO] - {"epoch": 11, "update": 10.457, "loss": "2.149", "ppl": "4.44", "wps": "364432", "ups": "3.09", "wpb": "117830", "bsz": "256", "num_updates": "538200", "lr": "0.000466465", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "175688"} +[2022-08-03 02:17:20,467][train_inner][INFO] - {"epoch": 11, "update": 10.461, "loss": "2.147", "ppl": "4.43", "wps": "364312", "ups": "3.08", "wpb": "118127", "bsz": "256", "num_updates": "538400", "lr": "0.000466263", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "175753"} +[2022-08-03 02:18:25,432][train_inner][INFO] - {"epoch": 11, "update": 10.465, "loss": "2.143", "ppl": "4.42", "wps": "361619", "ups": "3.08", "wpb": "117460", "bsz": "256", "num_updates": "538600", "lr": "0.000466061", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "175818"} +[2022-08-03 02:19:30,566][train_inner][INFO] - {"epoch": 11, "update": 10.469, "loss": "2.135", "ppl": "4.39", "wps": "364342", "ups": "3.07", "wpb": "118653", "bsz": "256", "num_updates": "538800", "lr": "0.000465859", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26", "wall": "175883"} +[2022-08-03 02:20:36,020][train_inner][INFO] - {"epoch": 11, "update": 10.473, "loss": "2.136", "ppl": "4.4", "wps": "362831", "ups": "3.06", "wpb": "118742", "bsz": "256", "num_updates": "539000", "lr": "0.000465657", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "175948"} +[2022-08-03 02:21:40,860][train_inner][INFO] - {"epoch": 11, "update": 10.476, "loss": "2.139", "ppl": "4.4", "wps": "363962", "ups": "3.08", "wpb": "117996", "bsz": "256", "num_updates": "539200", "lr": "0.000465455", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "176013"} +[2022-08-03 02:22:45,786][train_inner][INFO] - {"epoch": 11, "update": 10.48, "loss": "2.14", "ppl": "4.41", "wps": "364467", "ups": "3.08", "wpb": "118314", "bsz": "256", "num_updates": "539400", "lr": "0.000465253", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "176078"} +[2022-08-03 02:23:50,573][train_inner][INFO] - {"epoch": 11, "update": 10.484, "loss": "2.138", "ppl": "4.4", "wps": "365719", "ups": "3.09", "wpb": "118468", "bsz": "256", "num_updates": "539600", "lr": "0.000465051", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "176143"} +[2022-08-03 02:24:55,327][train_inner][INFO] - {"epoch": 11, "update": 10.488, "loss": "2.146", "ppl": "4.43", "wps": "364744", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "539800", "lr": "0.000464848", "gnorm": "0.692", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "176208"} +[2022-08-03 02:25:59,989][train_inner][INFO] - {"epoch": 11, "update": 10.492, "loss": "2.135", "ppl": "4.39", "wps": "365234", "ups": "3.09", "wpb": "118082", "bsz": "256", "num_updates": "540000", "lr": "0.000464646", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.2", "wall": "176272"} +[2022-08-03 02:27:04,948][train_inner][INFO] - {"epoch": 11, "update": 10.496, "loss": "2.135", "ppl": "4.39", "wps": "365603", "ups": "3.08", "wpb": "118745", "bsz": "256", "num_updates": "540200", "lr": "0.000464444", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.3", "wall": "176337"} +[2022-08-03 02:27:09,432][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 02:28:09,926][train_inner][INFO] - {"epoch": 11, "update": 10.5, "loss": "2.136", "ppl": "4.39", "wps": "363042", "ups": "3.08", "wpb": "117948", "bsz": "256", "num_updates": "540400", "lr": "0.000464242", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.7", "wall": "176402"} +[2022-08-03 02:29:14,941][train_inner][INFO] - {"epoch": 11, "update": 10.504, "loss": "2.144", "ppl": "4.42", "wps": "364342", "ups": "3.08", "wpb": "118437", "bsz": "256", "num_updates": "540600", "lr": "0.00046404", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "176467"} +[2022-08-03 02:30:21,130][train_inner][INFO] - {"epoch": 11, "update": 10.508, "loss": "2.137", "ppl": "4.4", "wps": "355969", "ups": "3.02", "wpb": "117804", "bsz": "256", "num_updates": "540800", "lr": "0.000463838", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "22.2", "wall": "176533"} +[2022-08-03 02:31:25,926][train_inner][INFO] - {"epoch": 11, "update": 10.511, "loss": "2.138", "ppl": "4.4", "wps": "367325", "ups": "3.09", "wpb": "119004", "bsz": "256", "num_updates": "541000", "lr": "0.000463636", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "176598"} +[2022-08-03 02:32:30,745][train_inner][INFO] - {"epoch": 11, "update": 10.515, "loss": "2.132", "ppl": "4.38", "wps": "364006", "ups": "3.09", "wpb": "117970", "bsz": "256", "num_updates": "541200", "lr": "0.000463434", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.5", "wall": "176663"} +[2022-08-03 02:33:35,431][train_inner][INFO] - {"epoch": 11, "update": 10.519, "loss": "2.138", "ppl": "4.4", "wps": "364252", "ups": "3.09", "wpb": "117808", "bsz": "256", "num_updates": "541400", "lr": "0.000463232", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "176728"} +[2022-08-03 02:34:40,186][train_inner][INFO] - {"epoch": 11, "update": 10.523, "loss": "2.129", "ppl": "4.37", "wps": "367713", "ups": "3.09", "wpb": "119054", "bsz": "256", "num_updates": "541600", "lr": "0.00046303", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "176793"} +[2022-08-03 02:35:46,284][train_inner][INFO] - {"epoch": 11, "update": 10.527, "loss": "2.141", "ppl": "4.41", "wps": "356801", "ups": "3.03", "wpb": "117918", "bsz": "256", "num_updates": "541800", "lr": "0.000462828", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.2", "wall": "176859"} +[2022-08-03 02:36:48,474][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 02:36:51,415][train_inner][INFO] - {"epoch": 11, "update": 10.531, "loss": "2.144", "ppl": "4.42", "wps": "363282", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "542000", "lr": "0.000462626", "gnorm": "0.707", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "176924"} +[2022-08-03 02:37:12,118][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 02:37:12,738][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-03 02:37:57,166][train_inner][INFO] - {"epoch": 11, "update": 10.535, "loss": "2.141", "ppl": "4.41", "wps": "359558", "ups": "3.04", "wpb": "118206", "bsz": "256", "num_updates": "542200", "lr": "0.000462424", "gnorm": "0.703", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "176989"} +[2022-08-03 02:39:01,916][train_inner][INFO] - {"epoch": 11, "update": 10.539, "loss": "2.139", "ppl": "4.41", "wps": "366923", "ups": "3.09", "wpb": "118791", "bsz": "256", "num_updates": "542400", "lr": "0.000462222", "gnorm": "0.69", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "177054"} +[2022-08-03 02:40:06,743][train_inner][INFO] - {"epoch": 11, "update": 10.543, "loss": "2.136", "ppl": "4.4", "wps": "363939", "ups": "3.09", "wpb": "117963", "bsz": "256", "num_updates": "542600", "lr": "0.00046202", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "177119"} +[2022-08-03 02:41:11,822][train_inner][INFO] - {"epoch": 11, "update": 10.546, "loss": "2.134", "ppl": "4.39", "wps": "364406", "ups": "3.07", "wpb": "118575", "bsz": "256", "num_updates": "542800", "lr": "0.000461818", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "177184"} +[2022-08-03 02:42:16,246][train_inner][INFO] - {"epoch": 11, "update": 10.55, "loss": "2.14", "ppl": "4.41", "wps": "366179", "ups": "3.1", "wpb": "117951", "bsz": "256", "num_updates": "543000", "lr": "0.000461616", "gnorm": "0.683", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.8", "wall": "177249"} +[2022-08-03 02:43:21,401][train_inner][INFO] - {"epoch": 11, "update": 10.554, "loss": "2.138", "ppl": "4.4", "wps": "362587", "ups": "3.07", "wpb": "118121", "bsz": "256", "num_updates": "543200", "lr": "0.000461414", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.6", "wall": "177314"} +[2022-08-03 02:44:26,283][train_inner][INFO] - {"epoch": 11, "update": 10.558, "loss": "2.137", "ppl": "4.4", "wps": "363363", "ups": "3.08", "wpb": "117876", "bsz": "256", "num_updates": "543400", "lr": "0.000461212", "gnorm": "0.694", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.1", "wall": "177379"} +[2022-08-03 02:45:30,848][train_inner][INFO] - {"epoch": 11, "update": 10.562, "loss": "2.138", "ppl": "4.4", "wps": "365769", "ups": "3.1", "wpb": "118078", "bsz": "256", "num_updates": "543600", "lr": "0.00046101", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24", "wall": "177443"} +[2022-08-03 02:46:35,948][train_inner][INFO] - {"epoch": 11, "update": 10.566, "loss": "2.135", "ppl": "4.39", "wps": "363290", "ups": "3.07", "wpb": "118248", "bsz": "256", "num_updates": "543800", "lr": "0.000460808", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.6", "wall": "177508"} +[2022-08-03 02:47:40,561][train_inner][INFO] - {"epoch": 11, "update": 10.57, "loss": "2.133", "ppl": "4.39", "wps": "365930", "ups": "3.1", "wpb": "118217", "bsz": "256", "num_updates": "544000", "lr": "0.000460606", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.1", "wall": "177573"} +[2022-08-03 02:48:45,671][train_inner][INFO] - {"epoch": 11, "update": 10.574, "loss": "2.139", "ppl": "4.4", "wps": "364204", "ups": "3.07", "wpb": "118565", "bsz": "256", "num_updates": "544200", "lr": "0.000460404", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.8", "wall": "177638"} +[2022-08-03 02:49:50,666][train_inner][INFO] - {"epoch": 11, "update": 10.577, "loss": "2.132", "ppl": "4.38", "wps": "365174", "ups": "3.08", "wpb": "118671", "bsz": "256", "num_updates": "544400", "lr": "0.000460202", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.3", "wall": "177703"} +[2022-08-03 02:50:55,432][train_inner][INFO] - {"epoch": 11, "update": 10.581, "loss": "2.135", "ppl": "4.39", "wps": "366422", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "544600", "lr": "0.00046", "gnorm": "0.695", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "177768"} +[2022-08-03 02:52:01,515][train_inner][INFO] - {"epoch": 11, "update": 10.585, "loss": "2.132", "ppl": "4.38", "wps": "357371", "ups": "3.03", "wpb": "118080", "bsz": "256", "num_updates": "544800", "lr": "0.000459798", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "20", "wall": "177834"} +[2022-08-03 02:53:06,448][train_inner][INFO] - {"epoch": 11, "update": 10.589, "loss": "2.138", "ppl": "4.4", "wps": "365660", "ups": "3.08", "wpb": "118715", "bsz": "256", "num_updates": "545000", "lr": "0.000459596", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "177899"} +[2022-08-03 02:54:11,482][train_inner][INFO] - {"epoch": 11, "update": 10.593, "loss": "2.133", "ppl": "4.39", "wps": "365091", "ups": "3.08", "wpb": "118715", "bsz": "256", "num_updates": "545200", "lr": "0.000459394", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "177964"} +[2022-08-03 02:55:16,717][train_inner][INFO] - {"epoch": 11, "update": 10.597, "loss": "2.128", "ppl": "4.37", "wps": "364633", "ups": "3.07", "wpb": "118932", "bsz": "256", "num_updates": "545400", "lr": "0.000459192", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.8", "wall": "178029"} +[2022-08-03 02:56:21,157][train_inner][INFO] - {"epoch": 11, "update": 10.601, "loss": "2.136", "ppl": "4.39", "wps": "369204", "ups": "3.1", "wpb": "118955", "bsz": "256", "num_updates": "545600", "lr": "0.00045899", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.7", "wall": "178093"} +[2022-08-03 02:57:25,839][train_inner][INFO] - {"epoch": 11, "update": 10.605, "loss": "2.139", "ppl": "4.4", "wps": "366535", "ups": "3.09", "wpb": "118540", "bsz": "256", "num_updates": "545800", "lr": "0.000458788", "gnorm": "0.684", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.6", "wall": "178158"} +[2022-08-03 02:58:31,190][train_inner][INFO] - {"epoch": 11, "update": 10.609, "loss": "2.131", "ppl": "4.38", "wps": "364062", "ups": "3.06", "wpb": "118957", "bsz": "256", "num_updates": "546000", "lr": "0.000458586", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "178224"} +[2022-08-03 02:59:35,979][train_inner][INFO] - {"epoch": 11, "update": 10.612, "loss": "2.129", "ppl": "4.37", "wps": "366460", "ups": "3.09", "wpb": "118712", "bsz": "256", "num_updates": "546200", "lr": "0.000458384", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.4", "wall": "178288"} +[2022-08-03 03:00:41,004][train_inner][INFO] - {"epoch": 11, "update": 10.616, "loss": "2.137", "ppl": "4.4", "wps": "362458", "ups": "3.08", "wpb": "117842", "bsz": "256", "num_updates": "546400", "lr": "0.000458182", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "178353"} +[2022-08-03 03:01:45,828][train_inner][INFO] - {"epoch": 11, "update": 10.62, "loss": "2.143", "ppl": "4.42", "wps": "363045", "ups": "3.09", "wpb": "117669", "bsz": "256", "num_updates": "546600", "lr": "0.00045798", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.2", "wall": "178418"} +[2022-08-03 03:02:50,784][train_inner][INFO] - {"epoch": 11, "update": 10.624, "loss": "2.135", "ppl": "4.39", "wps": "363947", "ups": "3.08", "wpb": "118201", "bsz": "256", "num_updates": "546800", "lr": "0.000457778", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "178483"} +[2022-08-03 03:03:55,563][train_inner][INFO] - {"epoch": 11, "update": 10.628, "loss": "2.139", "ppl": "4.4", "wps": "365465", "ups": "3.09", "wpb": "118369", "bsz": "256", "num_updates": "547000", "lr": "0.000457576", "gnorm": "0.687", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "178548"} +[2022-08-03 03:05:00,332][train_inner][INFO] - {"epoch": 11, "update": 10.632, "loss": "2.131", "ppl": "4.38", "wps": "364777", "ups": "3.09", "wpb": "118130", "bsz": "256", "num_updates": "547200", "lr": "0.000457374", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "178613"} +[2022-08-03 03:06:05,306][train_inner][INFO] - {"epoch": 11, "update": 10.636, "loss": "2.136", "ppl": "4.39", "wps": "362276", "ups": "3.08", "wpb": "117690", "bsz": "256", "num_updates": "547400", "lr": "0.000457172", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "178678"} +[2022-08-03 03:07:09,690][train_inner][INFO] - {"epoch": 11, "update": 10.64, "loss": "2.133", "ppl": "4.38", "wps": "367504", "ups": "3.11", "wpb": "118306", "bsz": "256", "num_updates": "547600", "lr": "0.00045697", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "178742"} +[2022-08-03 03:08:14,217][train_inner][INFO] - {"epoch": 11, "update": 10.644, "loss": "2.135", "ppl": "4.39", "wps": "368183", "ups": "3.1", "wpb": "118785", "bsz": "256", "num_updates": "547800", "lr": "0.000456768", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "178807"} +[2022-08-03 03:09:19,309][train_inner][INFO] - {"epoch": 11, "update": 10.647, "loss": "2.132", "ppl": "4.38", "wps": "362579", "ups": "3.07", "wpb": "118003", "bsz": "256", "num_updates": "548000", "lr": "0.000456566", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "178872"} +[2022-08-03 03:10:23,992][train_inner][INFO] - {"epoch": 11, "update": 10.651, "loss": "2.136", "ppl": "4.4", "wps": "365346", "ups": "3.09", "wpb": "118157", "bsz": "256", "num_updates": "548200", "lr": "0.000456364", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26.5", "wall": "178936"} +[2022-08-03 03:11:29,033][train_inner][INFO] - {"epoch": 11, "update": 10.655, "loss": "2.142", "ppl": "4.41", "wps": "362020", "ups": "3.08", "wpb": "117729", "bsz": "256", "num_updates": "548400", "lr": "0.000456162", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "179001"} +[2022-08-03 03:12:33,915][train_inner][INFO] - {"epoch": 11, "update": 10.659, "loss": "2.131", "ppl": "4.38", "wps": "365746", "ups": "3.08", "wpb": "118650", "bsz": "256", "num_updates": "548600", "lr": "0.00045596", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "179066"} +[2022-08-03 03:13:38,624][train_inner][INFO] - {"epoch": 11, "update": 10.663, "loss": "2.13", "ppl": "4.38", "wps": "366184", "ups": "3.09", "wpb": "118475", "bsz": "256", "num_updates": "548800", "lr": "0.000455758", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "179131"} +[2022-08-03 03:14:43,337][train_inner][INFO] - {"epoch": 11, "update": 10.667, "loss": "2.138", "ppl": "4.4", "wps": "364450", "ups": "3.09", "wpb": "117921", "bsz": "256", "num_updates": "549000", "lr": "0.000455556", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "179196"} +[2022-08-03 03:15:48,518][train_inner][INFO] - {"epoch": 11, "update": 10.671, "loss": "2.139", "ppl": "4.41", "wps": "362550", "ups": "3.07", "wpb": "118156", "bsz": "256", "num_updates": "549200", "lr": "0.000455354", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "179261"} +[2022-08-03 03:16:53,327][train_inner][INFO] - {"epoch": 11, "update": 10.675, "loss": "2.131", "ppl": "4.38", "wps": "364528", "ups": "3.09", "wpb": "118121", "bsz": "256", "num_updates": "549400", "lr": "0.000455152", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "179326"} +[2022-08-03 03:17:58,536][train_inner][INFO] - {"epoch": 11, "update": 10.678, "loss": "2.134", "ppl": "4.39", "wps": "363812", "ups": "3.07", "wpb": "118618", "bsz": "256", "num_updates": "549600", "lr": "0.000454949", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "179391"} +[2022-08-03 03:19:03,404][train_inner][INFO] - {"epoch": 11, "update": 10.682, "loss": "2.126", "ppl": "4.37", "wps": "364262", "ups": "3.08", "wpb": "118142", "bsz": "256", "num_updates": "549800", "lr": "0.000454747", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "179456"} +[2022-08-03 03:20:08,030][train_inner][INFO] - {"epoch": 11, "update": 10.686, "loss": "2.13", "ppl": "4.38", "wps": "366355", "ups": "3.09", "wpb": "118378", "bsz": "256", "num_updates": "550000", "lr": "0.000454545", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "179520"} +[2022-08-03 03:20:08,031][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-03 03:20:31,019][valid][INFO] - {"epoch": 11, "valid_loss": "2.038", "valid_ppl": "4.11", "valid_wps": "1.60422e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "550000", "valid_best_loss": "2.038"} +[2022-08-03 03:20:31,022][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 11 @ 550000 updates +[2022-08-03 03:20:31,023][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_11_550000.pt +[2022-08-03 03:20:41,449][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_11_550000.pt +[2022-08-03 03:21:09,064][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_11_550000.pt (epoch 11 @ 550000 updates, score 2.038) (writing took 38.042021503672004 seconds) +[2022-08-03 03:22:13,960][train_inner][INFO] - {"epoch": 11, "update": 10.69, "loss": "2.135", "ppl": "4.39", "wps": "187689", "ups": "1.59", "wpb": "118178", "bsz": "256", "num_updates": "550200", "lr": "0.000454343", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "179646"} +[2022-08-03 03:23:18,856][train_inner][INFO] - {"epoch": 11, "update": 10.694, "loss": "2.135", "ppl": "4.39", "wps": "364203", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "550400", "lr": "0.000454141", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.1", "wall": "179711"} +[2022-08-03 03:24:23,911][train_inner][INFO] - {"epoch": 11, "update": 10.698, "loss": "2.136", "ppl": "4.39", "wps": "363633", "ups": "3.07", "wpb": "118278", "bsz": "256", "num_updates": "550600", "lr": "0.000453939", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "179776"} +[2022-08-03 03:25:28,404][train_inner][INFO] - {"epoch": 11, "update": 10.702, "loss": "2.14", "ppl": "4.41", "wps": "365965", "ups": "3.1", "wpb": "118011", "bsz": "255.9", "num_updates": "550800", "lr": "0.000453737", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "179841"} +[2022-08-03 03:26:33,422][train_inner][INFO] - {"epoch": 11, "update": 10.706, "loss": "2.132", "ppl": "4.38", "wps": "365357", "ups": "3.08", "wpb": "118772", "bsz": "256", "num_updates": "551000", "lr": "0.000453535", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "179906"} +[2022-08-03 03:27:38,414][train_inner][INFO] - {"epoch": 11, "update": 10.71, "loss": "2.136", "ppl": "4.4", "wps": "362092", "ups": "3.08", "wpb": "117664", "bsz": "256", "num_updates": "551200", "lr": "0.000453333", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "179971"} +[2022-08-03 03:28:43,260][train_inner][INFO] - {"epoch": 11, "update": 10.713, "loss": "2.133", "ppl": "4.39", "wps": "364733", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "551400", "lr": "0.000453131", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.7", "wall": "180036"} +[2022-08-03 03:29:48,190][train_inner][INFO] - {"epoch": 11, "update": 10.717, "loss": "2.133", "ppl": "4.39", "wps": "363619", "ups": "3.08", "wpb": "118046", "bsz": "256", "num_updates": "551600", "lr": "0.000452929", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.4", "wall": "180101"} +[2022-08-03 03:30:53,326][train_inner][INFO] - {"epoch": 11, "update": 10.721, "loss": "2.13", "ppl": "4.38", "wps": "362775", "ups": "3.07", "wpb": "118146", "bsz": "256", "num_updates": "551800", "lr": "0.000452727", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "180166"} +[2022-08-03 03:31:59,062][train_inner][INFO] - {"epoch": 11, "update": 10.725, "loss": "2.126", "ppl": "4.37", "wps": "361500", "ups": "3.04", "wpb": "118817", "bsz": "256", "num_updates": "552000", "lr": "0.000452525", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "180231"} +[2022-08-03 03:33:04,154][train_inner][INFO] - {"epoch": 11, "update": 10.729, "loss": "2.133", "ppl": "4.39", "wps": "363323", "ups": "3.07", "wpb": "118244", "bsz": "256", "num_updates": "552200", "lr": "0.000452323", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "180296"} +[2022-08-03 03:34:09,145][train_inner][INFO] - {"epoch": 11, "update": 10.733, "loss": "2.133", "ppl": "4.38", "wps": "365786", "ups": "3.08", "wpb": "118862", "bsz": "256", "num_updates": "552400", "lr": "0.000452121", "gnorm": "0.681", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.9", "wall": "180361"} +[2022-08-03 03:34:20,886][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 03:35:14,387][train_inner][INFO] - {"epoch": 11, "update": 10.737, "loss": "2.132", "ppl": "4.38", "wps": "363446", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "552600", "lr": "0.000451919", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "180427"} +[2022-08-03 03:36:19,584][train_inner][INFO] - {"epoch": 11, "update": 10.741, "loss": "2.131", "ppl": "4.38", "wps": "361829", "ups": "3.07", "wpb": "117951", "bsz": "256", "num_updates": "552800", "lr": "0.000451717", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "180492"} +[2022-08-03 03:37:24,661][train_inner][INFO] - {"epoch": 11, "update": 10.745, "loss": "2.14", "ppl": "4.41", "wps": "362496", "ups": "3.07", "wpb": "117948", "bsz": "256", "num_updates": "553000", "lr": "0.000451515", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "180557"} +[2022-08-03 03:38:29,311][train_inner][INFO] - {"epoch": 11, "update": 10.748, "loss": "2.13", "ppl": "4.38", "wps": "364474", "ups": "3.09", "wpb": "117815", "bsz": "256", "num_updates": "553200", "lr": "0.000451313", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "180622"} +[2022-08-03 03:39:33,801][train_inner][INFO] - {"epoch": 11, "update": 10.752, "loss": "2.132", "ppl": "4.38", "wps": "366114", "ups": "3.1", "wpb": "118051", "bsz": "256", "num_updates": "553400", "lr": "0.000451111", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "180686"} +[2022-08-03 03:40:39,728][train_inner][INFO] - {"epoch": 11, "update": 10.756, "loss": "2.13", "ppl": "4.38", "wps": "359416", "ups": "3.03", "wpb": "118474", "bsz": "256", "num_updates": "553600", "lr": "0.000450909", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.9", "wall": "180752"} +[2022-08-03 03:41:44,704][train_inner][INFO] - {"epoch": 11, "update": 10.76, "loss": "2.129", "ppl": "4.37", "wps": "365497", "ups": "3.08", "wpb": "118740", "bsz": "256", "num_updates": "553800", "lr": "0.000450707", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "180817"} +[2022-08-03 03:42:49,281][train_inner][INFO] - {"epoch": 11, "update": 10.764, "loss": "2.132", "ppl": "4.38", "wps": "364256", "ups": "3.1", "wpb": "117612", "bsz": "256", "num_updates": "554000", "lr": "0.000450505", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "180882"} +[2022-08-03 03:43:53,899][train_inner][INFO] - {"epoch": 11, "update": 10.768, "loss": "2.14", "ppl": "4.41", "wps": "365703", "ups": "3.1", "wpb": "118152", "bsz": "256", "num_updates": "554200", "lr": "0.000450303", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "180946"} +[2022-08-03 03:44:58,625][train_inner][INFO] - {"epoch": 11, "update": 10.772, "loss": "2.131", "ppl": "4.38", "wps": "364297", "ups": "3.09", "wpb": "117895", "bsz": "256", "num_updates": "554400", "lr": "0.000450101", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.3", "wall": "181011"} +[2022-08-03 03:46:03,197][train_inner][INFO] - {"epoch": 11, "update": 10.776, "loss": "2.132", "ppl": "4.38", "wps": "367664", "ups": "3.1", "wpb": "118702", "bsz": "256", "num_updates": "554600", "lr": "0.000449899", "gnorm": "0.687", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "181076"} +[2022-08-03 03:47:07,803][train_inner][INFO] - {"epoch": 11, "update": 10.78, "loss": "2.134", "ppl": "4.39", "wps": "364395", "ups": "3.1", "wpb": "117708", "bsz": "256", "num_updates": "554800", "lr": "0.000449697", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.4", "wall": "181140"} +[2022-08-03 03:48:12,963][train_inner][INFO] - {"epoch": 11, "update": 10.783, "loss": "2.132", "ppl": "4.38", "wps": "365192", "ups": "3.07", "wpb": "118978", "bsz": "256", "num_updates": "555000", "lr": "0.000449495", "gnorm": "0.685", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "181205"} +[2022-08-03 03:48:32,650][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 03:49:18,005][train_inner][INFO] - {"epoch": 11, "update": 10.787, "loss": "2.125", "ppl": "4.36", "wps": "365256", "ups": "3.07", "wpb": "118783", "bsz": "256", "num_updates": "555200", "lr": "0.000449293", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "181270"} +[2022-08-03 03:50:22,555][train_inner][INFO] - {"epoch": 11, "update": 10.791, "loss": "2.125", "ppl": "4.36", "wps": "366407", "ups": "3.1", "wpb": "118258", "bsz": "256", "num_updates": "555400", "lr": "0.000449091", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "181335"} +[2022-08-03 03:50:40,761][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 03:51:27,704][train_inner][INFO] - {"epoch": 11, "update": 10.795, "loss": "2.124", "ppl": "4.36", "wps": "364440", "ups": "3.07", "wpb": "118713", "bsz": "256", "num_updates": "555600", "lr": "0.000448889", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "181400"} +[2022-08-03 03:52:32,467][train_inner][INFO] - {"epoch": 11, "update": 10.799, "loss": "2.127", "ppl": "4.37", "wps": "365022", "ups": "3.09", "wpb": "118199", "bsz": "256", "num_updates": "555800", "lr": "0.000448687", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "181465"} +[2022-08-03 03:53:37,014][train_inner][INFO] - {"epoch": 11, "update": 10.803, "loss": "2.13", "ppl": "4.38", "wps": "364804", "ups": "3.1", "wpb": "117733", "bsz": "256", "num_updates": "556000", "lr": "0.000448485", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "181529"} +[2022-08-03 03:54:41,933][train_inner][INFO] - {"epoch": 11, "update": 10.807, "loss": "2.129", "ppl": "4.37", "wps": "364574", "ups": "3.08", "wpb": "118337", "bsz": "256", "num_updates": "556200", "lr": "0.000448283", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "181594"} +[2022-08-03 03:55:47,177][train_inner][INFO] - {"epoch": 11, "update": 10.811, "loss": "2.122", "ppl": "4.35", "wps": "366015", "ups": "3.07", "wpb": "119399", "bsz": "256", "num_updates": "556400", "lr": "0.000448081", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "181659"} +[2022-08-03 03:56:51,633][train_inner][INFO] - {"epoch": 11, "update": 10.815, "loss": "2.133", "ppl": "4.38", "wps": "366320", "ups": "3.1", "wpb": "118057", "bsz": "256", "num_updates": "556600", "lr": "0.000447879", "gnorm": "0.691", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.7", "wall": "181724"} +[2022-08-03 03:56:59,116][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 03:57:04,510][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 03:57:57,059][train_inner][INFO] - {"epoch": 11, "update": 10.818, "loss": "2.136", "ppl": "4.4", "wps": "359016", "ups": "3.06", "wpb": "117442", "bsz": "256", "num_updates": "556800", "lr": "0.000447677", "gnorm": "0.706", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.3", "wall": "181789"} +[2022-08-03 03:59:01,937][train_inner][INFO] - {"epoch": 11, "update": 10.822, "loss": "2.124", "ppl": "4.36", "wps": "365912", "ups": "3.08", "wpb": "118697", "bsz": "256", "num_updates": "557000", "lr": "0.000447475", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.7", "wall": "181854"} +[2022-08-03 04:00:06,568][train_inner][INFO] - {"epoch": 11, "update": 10.826, "loss": "2.139", "ppl": "4.4", "wps": "364635", "ups": "3.09", "wpb": "117833", "bsz": "256", "num_updates": "557200", "lr": "0.000447273", "gnorm": "0.705", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.1", "wall": "181919"} +[2022-08-03 04:01:11,182][train_inner][INFO] - {"epoch": 11, "update": 10.83, "loss": "2.133", "ppl": "4.39", "wps": "365551", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "557400", "lr": "0.000447071", "gnorm": "0.689", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "181984"} +[2022-08-03 04:02:16,326][train_inner][INFO] - {"epoch": 11, "update": 10.834, "loss": "2.122", "ppl": "4.35", "wps": "364767", "ups": "3.07", "wpb": "118810", "bsz": "256", "num_updates": "557600", "lr": "0.000446869", "gnorm": "0.684", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "182049"} +[2022-08-03 04:03:21,032][train_inner][INFO] - {"epoch": 11, "update": 10.838, "loss": "2.124", "ppl": "4.36", "wps": "366210", "ups": "3.09", "wpb": "118480", "bsz": "256", "num_updates": "557800", "lr": "0.000446667", "gnorm": "0.686", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.5", "wall": "182113"} +[2022-08-03 04:04:25,526][train_inner][INFO] - {"epoch": 11, "update": 10.842, "loss": "2.129", "ppl": "4.37", "wps": "366593", "ups": "3.1", "wpb": "118212", "bsz": "256", "num_updates": "558000", "lr": "0.000446465", "gnorm": "0.686", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.3", "wall": "182178"} +[2022-08-03 04:05:29,707][train_inner][INFO] - {"epoch": 11, "update": 10.846, "loss": "2.134", "ppl": "4.39", "wps": "366844", "ups": "3.12", "wpb": "117721", "bsz": "256", "num_updates": "558200", "lr": "0.000446263", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "182242"} +[2022-08-03 04:06:34,949][train_inner][INFO] - {"epoch": 11, "update": 10.849, "loss": "2.129", "ppl": "4.37", "wps": "362233", "ups": "3.07", "wpb": "118162", "bsz": "256", "num_updates": "558400", "lr": "0.000446061", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.8", "wall": "182307"} +[2022-08-03 04:07:39,723][train_inner][INFO] - {"epoch": 11, "update": 10.853, "loss": "2.125", "ppl": "4.36", "wps": "368354", "ups": "3.09", "wpb": "119298", "bsz": "256", "num_updates": "558600", "lr": "0.000445859", "gnorm": "0.687", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "182372"} +[2022-08-03 04:08:26,130][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 04:08:44,625][train_inner][INFO] - {"epoch": 11, "update": 10.857, "loss": "2.133", "ppl": "4.39", "wps": "363129", "ups": "3.08", "wpb": "117838", "bsz": "256", "num_updates": "558800", "lr": "0.000445657", "gnorm": "0.731", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "182437"} +[2022-08-03 04:09:49,640][train_inner][INFO] - {"epoch": 11, "update": 10.861, "loss": "2.125", "ppl": "4.36", "wps": "363555", "ups": "3.08", "wpb": "118181", "bsz": "256", "num_updates": "559000", "lr": "0.000445455", "gnorm": "0.688", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "182502"} +[2022-08-03 04:10:54,847][train_inner][INFO] - {"epoch": 11, "update": 10.865, "loss": "2.129", "ppl": "4.37", "wps": "362740", "ups": "3.07", "wpb": "118264", "bsz": "256", "num_updates": "559200", "lr": "0.000445253", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24", "wall": "182567"} +[2022-08-03 04:11:59,696][train_inner][INFO] - {"epoch": 11, "update": 10.869, "loss": "2.128", "ppl": "4.37", "wps": "363464", "ups": "3.08", "wpb": "117849", "bsz": "256", "num_updates": "559400", "lr": "0.000445051", "gnorm": "0.691", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.6", "wall": "182632"} +[2022-08-03 04:13:04,517][train_inner][INFO] - {"epoch": 11, "update": 10.873, "loss": "2.134", "ppl": "4.39", "wps": "363521", "ups": "3.09", "wpb": "117818", "bsz": "256", "num_updates": "559600", "lr": "0.000444848", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "182697"} +[2022-08-03 04:14:09,446][train_inner][INFO] - {"epoch": 11, "update": 10.877, "loss": "2.124", "ppl": "4.36", "wps": "366868", "ups": "3.08", "wpb": "119099", "bsz": "256", "num_updates": "559800", "lr": "0.000444646", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.3", "wall": "182762"} +[2022-08-03 04:15:14,675][train_inner][INFO] - {"epoch": 11, "update": 10.881, "loss": "2.132", "ppl": "4.38", "wps": "364204", "ups": "3.07", "wpb": "118782", "bsz": "256", "num_updates": "560000", "lr": "0.000444444", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "182827"} +[2022-08-03 04:16:19,685][train_inner][INFO] - {"epoch": 11, "update": 10.884, "loss": "2.124", "ppl": "4.36", "wps": "365236", "ups": "3.08", "wpb": "118718", "bsz": "256", "num_updates": "560200", "lr": "0.000444242", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "182892"} +[2022-08-03 04:17:24,354][train_inner][INFO] - {"epoch": 11, "update": 10.888, "loss": "2.122", "ppl": "4.35", "wps": "366942", "ups": "3.09", "wpb": "118647", "bsz": "256", "num_updates": "560400", "lr": "0.00044404", "gnorm": "0.686", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "182957"} +[2022-08-03 04:18:29,301][train_inner][INFO] - {"epoch": 11, "update": 10.892, "loss": "2.129", "ppl": "4.37", "wps": "363130", "ups": "3.08", "wpb": "117920", "bsz": "256", "num_updates": "560600", "lr": "0.000443838", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "183022"} +[2022-08-03 04:19:34,185][train_inner][INFO] - {"epoch": 11, "update": 10.896, "loss": "2.121", "ppl": "4.35", "wps": "366124", "ups": "3.08", "wpb": "118776", "bsz": "256", "num_updates": "560800", "lr": "0.000443636", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.4", "wall": "183087"} +[2022-08-03 04:20:38,998][train_inner][INFO] - {"epoch": 11, "update": 10.9, "loss": "2.124", "ppl": "4.36", "wps": "365611", "ups": "3.09", "wpb": "118480", "bsz": "256", "num_updates": "561000", "lr": "0.000443434", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.2", "wall": "183151"} +[2022-08-03 04:21:43,648][train_inner][INFO] - {"epoch": 11, "update": 10.904, "loss": "2.131", "ppl": "4.38", "wps": "366272", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "561200", "lr": "0.000443232", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "183216"} +[2022-08-03 04:22:48,499][train_inner][INFO] - {"epoch": 11, "update": 10.908, "loss": "2.126", "ppl": "4.36", "wps": "364283", "ups": "3.08", "wpb": "118119", "bsz": "256", "num_updates": "561400", "lr": "0.00044303", "gnorm": "0.689", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "183281"} +[2022-08-03 04:23:53,339][train_inner][INFO] - {"epoch": 11, "update": 10.912, "loss": "2.128", "ppl": "4.37", "wps": "364680", "ups": "3.08", "wpb": "118227", "bsz": "256", "num_updates": "561600", "lr": "0.000442828", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "183346"} +[2022-08-03 04:24:57,982][train_inner][INFO] - {"epoch": 11, "update": 10.916, "loss": "2.126", "ppl": "4.37", "wps": "367978", "ups": "3.09", "wpb": "118935", "bsz": "256", "num_updates": "561800", "lr": "0.000442626", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "27.2", "wall": "183410"} +[2022-08-03 04:26:02,727][train_inner][INFO] - {"epoch": 11, "update": 10.919, "loss": "2.129", "ppl": "4.37", "wps": "363109", "ups": "3.09", "wpb": "117544", "bsz": "256", "num_updates": "562000", "lr": "0.000442424", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "183475"} +[2022-08-03 04:27:07,768][train_inner][INFO] - {"epoch": 11, "update": 10.923, "loss": "2.126", "ppl": "4.36", "wps": "364285", "ups": "3.08", "wpb": "118466", "bsz": "256", "num_updates": "562200", "lr": "0.000442222", "gnorm": "0.698", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "183540"} +[2022-08-03 04:28:12,970][train_inner][INFO] - {"epoch": 11, "update": 10.927, "loss": "2.125", "ppl": "4.36", "wps": "362734", "ups": "3.07", "wpb": "118254", "bsz": "256", "num_updates": "562400", "lr": "0.00044202", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "183605"} +[2022-08-03 04:29:17,969][train_inner][INFO] - {"epoch": 11, "update": 10.931, "loss": "2.13", "ppl": "4.38", "wps": "363972", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "562600", "lr": "0.000441818", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.6", "wall": "183670"} +[2022-08-03 04:30:22,591][train_inner][INFO] - {"epoch": 11, "update": 10.935, "loss": "2.132", "ppl": "4.38", "wps": "366142", "ups": "3.09", "wpb": "118302", "bsz": "256", "num_updates": "562800", "lr": "0.000441616", "gnorm": "0.689", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "183735"} +[2022-08-03 04:31:26,948][train_inner][INFO] - {"epoch": 11, "update": 10.939, "loss": "2.127", "ppl": "4.37", "wps": "366248", "ups": "3.11", "wpb": "117853", "bsz": "256", "num_updates": "563000", "lr": "0.000441414", "gnorm": "0.691", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "183799"} +[2022-08-03 04:32:32,191][train_inner][INFO] - {"epoch": 11, "update": 10.943, "loss": "2.13", "ppl": "4.38", "wps": "361313", "ups": "3.07", "wpb": "117862", "bsz": "256", "num_updates": "563200", "lr": "0.000441212", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "183865"} +[2022-08-03 04:33:37,194][train_inner][INFO] - {"epoch": 11, "update": 10.947, "loss": "2.127", "ppl": "4.37", "wps": "363434", "ups": "3.08", "wpb": "118120", "bsz": "256", "num_updates": "563400", "lr": "0.00044101", "gnorm": "0.692", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "183930"} +[2022-08-03 04:34:42,145][train_inner][INFO] - {"epoch": 11, "update": 10.951, "loss": "2.129", "ppl": "4.37", "wps": "363632", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "563600", "lr": "0.000440808", "gnorm": "0.691", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "183994"} +[2022-08-03 04:35:46,995][train_inner][INFO] - {"epoch": 11, "update": 10.954, "loss": "2.133", "ppl": "4.39", "wps": "362773", "ups": "3.08", "wpb": "117627", "bsz": "256", "num_updates": "563800", "lr": "0.000440606", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "184059"} +[2022-08-03 04:36:52,320][train_inner][INFO] - {"epoch": 11, "update": 10.958, "loss": "2.129", "ppl": "4.37", "wps": "362706", "ups": "3.06", "wpb": "118466", "bsz": "256", "num_updates": "564000", "lr": "0.000440404", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "184125"} +[2022-08-03 04:37:57,073][train_inner][INFO] - {"epoch": 11, "update": 10.962, "loss": "2.127", "ppl": "4.37", "wps": "364452", "ups": "3.09", "wpb": "117995", "bsz": "256", "num_updates": "564200", "lr": "0.000440202", "gnorm": "0.693", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "184189"} +[2022-08-03 04:39:02,167][train_inner][INFO] - {"epoch": 11, "update": 10.966, "loss": "2.119", "ppl": "4.34", "wps": "364081", "ups": "3.07", "wpb": "118496", "bsz": "256", "num_updates": "564400", "lr": "0.00044", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "184254"} +[2022-08-03 04:40:07,204][train_inner][INFO] - {"epoch": 11, "update": 10.97, "loss": "2.129", "ppl": "4.38", "wps": "363384", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "564600", "lr": "0.000439798", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "184320"} +[2022-08-03 04:41:12,129][train_inner][INFO] - {"epoch": 11, "update": 10.974, "loss": "2.122", "ppl": "4.35", "wps": "365572", "ups": "3.08", "wpb": "118672", "bsz": "256", "num_updates": "564800", "lr": "0.000439596", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "184384"} +[2022-08-03 04:42:16,750][train_inner][INFO] - {"epoch": 11, "update": 10.978, "loss": "2.135", "ppl": "4.39", "wps": "366096", "ups": "3.1", "wpb": "118285", "bsz": "256", "num_updates": "565000", "lr": "0.000439394", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "184449"} +[2022-08-03 04:43:21,778][train_inner][INFO] - {"epoch": 11, "update": 10.982, "loss": "2.128", "ppl": "4.37", "wps": "365461", "ups": "3.08", "wpb": "118825", "bsz": "256", "num_updates": "565200", "lr": "0.000439192", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "184514"} +[2022-08-03 04:44:26,114][train_inner][INFO] - {"epoch": 11, "update": 10.985, "loss": "2.131", "ppl": "4.38", "wps": "368505", "ups": "3.11", "wpb": "118539", "bsz": "256", "num_updates": "565400", "lr": "0.00043899", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "184578"} +[2022-08-03 04:45:30,942][train_inner][INFO] - {"epoch": 11, "update": 10.989, "loss": "2.125", "ppl": "4.36", "wps": "366150", "ups": "3.09", "wpb": "118682", "bsz": "256", "num_updates": "565600", "lr": "0.000438788", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "184643"} +[2022-08-03 04:46:34,975][train_inner][INFO] - {"epoch": 11, "update": 10.993, "loss": "2.133", "ppl": "4.39", "wps": "367087", "ups": "3.12", "wpb": "117528", "bsz": "256", "num_updates": "565800", "lr": "0.000438586", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "184707"} +[2022-08-03 04:47:39,541][train_inner][INFO] - {"epoch": 11, "update": 10.997, "loss": "2.13", "ppl": "4.38", "wps": "366251", "ups": "3.1", "wpb": "118235", "bsz": "256", "num_updates": "566000", "lr": "0.000438384", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "184772"} +[2022-08-03 04:48:27,274][fairseq_cli.train][INFO] - end of epoch 11 (average epoch stats below) +[2022-08-03 04:48:27,275][train][INFO] - {"epoch": 11, "train_loss": "2.138", "train_ppl": "4.4", "train_wps": "363026", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "566148", "train_lr": "0.000438234", "train_gnorm": "0.685", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16606", "train_gb_free": "24.9", "train_wall": "184820"} +[2022-08-03 04:48:27,385][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-03 04:48:27,388][fairseq.trainer][INFO] - begin training epoch 12 +[2022-08-03 04:48:27,388][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-03 04:48:57,497][train_inner][INFO] - {"epoch": 12, "update": 11.001, "loss": "2.122", "ppl": "4.35", "wps": "302727", "ups": "2.57", "wpb": "117995", "bsz": "255.4", "num_updates": "566200", "lr": "0.000438182", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "184850"} +[2022-08-03 04:50:02,266][train_inner][INFO] - {"epoch": 12, "update": 11.005, "loss": "2.121", "ppl": "4.35", "wps": "365302", "ups": "3.09", "wpb": "118300", "bsz": "256", "num_updates": "566400", "lr": "0.00043798", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "184915"} +[2022-08-03 04:51:07,076][train_inner][INFO] - {"epoch": 12, "update": 11.009, "loss": "2.123", "ppl": "4.36", "wps": "364022", "ups": "3.09", "wpb": "117960", "bsz": "256", "num_updates": "566600", "lr": "0.000437778", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "184979"} +[2022-08-03 04:52:11,837][train_inner][INFO] - {"epoch": 12, "update": 11.013, "loss": "2.123", "ppl": "4.36", "wps": "365374", "ups": "3.09", "wpb": "118308", "bsz": "256", "num_updates": "566800", "lr": "0.000437576", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "185044"} +[2022-08-03 04:53:16,485][train_inner][INFO] - {"epoch": 12, "update": 11.017, "loss": "2.117", "ppl": "4.34", "wps": "366503", "ups": "3.09", "wpb": "118466", "bsz": "256", "num_updates": "567000", "lr": "0.000437374", "gnorm": "0.688", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.2", "wall": "185109"} +[2022-08-03 04:54:21,470][train_inner][INFO] - {"epoch": 12, "update": 11.02, "loss": "2.124", "ppl": "4.36", "wps": "363847", "ups": "3.08", "wpb": "118221", "bsz": "256", "num_updates": "567200", "lr": "0.000437172", "gnorm": "0.692", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "185174"} +[2022-08-03 04:54:59,633][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 04:55:26,531][train_inner][INFO] - {"epoch": 12, "update": 11.024, "loss": "2.121", "ppl": "4.35", "wps": "362485", "ups": "3.07", "wpb": "117916", "bsz": "256", "num_updates": "567400", "lr": "0.00043697", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "185239"} +[2022-08-03 04:56:31,252][train_inner][INFO] - {"epoch": 12, "update": 11.028, "loss": "2.122", "ppl": "4.35", "wps": "364839", "ups": "3.09", "wpb": "118063", "bsz": "256", "num_updates": "567600", "lr": "0.000436768", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "185304"} +[2022-08-03 04:57:35,799][train_inner][INFO] - {"epoch": 12, "update": 11.032, "loss": "2.124", "ppl": "4.36", "wps": "364526", "ups": "3.1", "wpb": "117645", "bsz": "256", "num_updates": "567800", "lr": "0.000436566", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "185368"} +[2022-08-03 04:58:40,698][train_inner][INFO] - {"epoch": 12, "update": 11.036, "loss": "2.125", "ppl": "4.36", "wps": "364198", "ups": "3.08", "wpb": "118179", "bsz": "256", "num_updates": "568000", "lr": "0.000436364", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "185433"} +[2022-08-03 04:59:45,572][train_inner][INFO] - {"epoch": 12, "update": 11.04, "loss": "2.124", "ppl": "4.36", "wps": "364173", "ups": "3.08", "wpb": "118124", "bsz": "256", "num_updates": "568200", "lr": "0.000436162", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "185498"} +[2022-08-03 05:00:50,444][train_inner][INFO] - {"epoch": 12, "update": 11.044, "loss": "2.126", "ppl": "4.37", "wps": "364403", "ups": "3.08", "wpb": "118196", "bsz": "256", "num_updates": "568400", "lr": "0.00043596", "gnorm": "0.695", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "185563"} +[2022-08-03 05:01:55,268][train_inner][INFO] - {"epoch": 12, "update": 11.048, "loss": "2.123", "ppl": "4.35", "wps": "366074", "ups": "3.09", "wpb": "118651", "bsz": "256", "num_updates": "568600", "lr": "0.000435758", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "185628"} +[2022-08-03 05:03:00,427][train_inner][INFO] - {"epoch": 12, "update": 11.052, "loss": "2.12", "ppl": "4.35", "wps": "361981", "ups": "3.07", "wpb": "117929", "bsz": "256", "num_updates": "568800", "lr": "0.000435556", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "185693"} +[2022-08-03 05:04:05,635][train_inner][INFO] - {"epoch": 12, "update": 11.055, "loss": "2.113", "ppl": "4.33", "wps": "362087", "ups": "3.07", "wpb": "118054", "bsz": "256", "num_updates": "569000", "lr": "0.000435354", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "185758"} +[2022-08-03 05:05:11,712][train_inner][INFO] - {"epoch": 12, "update": 11.059, "loss": "2.121", "ppl": "4.35", "wps": "358335", "ups": "3.03", "wpb": "118386", "bsz": "256", "num_updates": "569200", "lr": "0.000435152", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "28.2", "wall": "185824"} +[2022-08-03 05:06:16,243][train_inner][INFO] - {"epoch": 12, "update": 11.063, "loss": "2.125", "ppl": "4.36", "wps": "367455", "ups": "3.1", "wpb": "118559", "bsz": "256", "num_updates": "569400", "lr": "0.000434949", "gnorm": "0.689", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "185889"} +[2022-08-03 05:07:21,086][train_inner][INFO] - {"epoch": 12, "update": 11.067, "loss": "2.118", "ppl": "4.34", "wps": "365976", "ups": "3.08", "wpb": "118653", "bsz": "256", "num_updates": "569600", "lr": "0.000434747", "gnorm": "0.688", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "185953"} +[2022-08-03 05:08:25,833][train_inner][INFO] - {"epoch": 12, "update": 11.071, "loss": "2.123", "ppl": "4.35", "wps": "366418", "ups": "3.09", "wpb": "118620", "bsz": "256", "num_updates": "569800", "lr": "0.000434545", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "186018"} +[2022-08-03 05:09:31,073][train_inner][INFO] - {"epoch": 12, "update": 11.075, "loss": "2.121", "ppl": "4.35", "wps": "364234", "ups": "3.07", "wpb": "118810", "bsz": "256", "num_updates": "570000", "lr": "0.000434343", "gnorm": "0.689", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "186083"} +[2022-08-03 05:10:35,997][train_inner][INFO] - {"epoch": 12, "update": 11.079, "loss": "2.122", "ppl": "4.35", "wps": "364692", "ups": "3.08", "wpb": "118385", "bsz": "256", "num_updates": "570200", "lr": "0.000434141", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.4", "wall": "186148"} +[2022-08-03 05:11:41,355][train_inner][INFO] - {"epoch": 12, "update": 11.083, "loss": "2.117", "ppl": "4.34", "wps": "362993", "ups": "3.06", "wpb": "118622", "bsz": "256", "num_updates": "570400", "lr": "0.000433939", "gnorm": "0.689", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "186214"} +[2022-08-03 05:12:46,068][train_inner][INFO] - {"epoch": 12, "update": 11.086, "loss": "2.124", "ppl": "4.36", "wps": "364966", "ups": "3.09", "wpb": "118088", "bsz": "256", "num_updates": "570600", "lr": "0.000433737", "gnorm": "0.693", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.7", "wall": "186278"} +[2022-08-03 05:13:51,278][train_inner][INFO] - {"epoch": 12, "update": 11.09, "loss": "2.119", "ppl": "4.35", "wps": "363290", "ups": "3.07", "wpb": "118450", "bsz": "256", "num_updates": "570800", "lr": "0.000433535", "gnorm": "0.692", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "28.2", "wall": "186344"} +[2022-08-03 05:14:56,104][train_inner][INFO] - {"epoch": 12, "update": 11.094, "loss": "2.122", "ppl": "4.35", "wps": "361918", "ups": "3.09", "wpb": "117306", "bsz": "256", "num_updates": "571000", "lr": "0.000433333", "gnorm": "0.693", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "186408"} +[2022-08-03 05:16:01,138][train_inner][INFO] - {"epoch": 12, "update": 11.098, "loss": "2.122", "ppl": "4.35", "wps": "365023", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "571200", "lr": "0.000433131", "gnorm": "0.694", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.6", "wall": "186473"} +[2022-08-03 05:17:05,705][train_inner][INFO] - {"epoch": 12, "update": 11.102, "loss": "2.124", "ppl": "4.36", "wps": "367003", "ups": "3.1", "wpb": "118480", "bsz": "256", "num_updates": "571400", "lr": "0.000432929", "gnorm": "0.691", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.6", "wall": "186538"} +[2022-08-03 05:17:11,556][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 +[2022-08-03 05:18:11,068][train_inner][INFO] - {"epoch": 12, "update": 11.106, "loss": "2.117", "ppl": "4.34", "wps": "362666", "ups": "3.06", "wpb": "118522", "bsz": "256", "num_updates": "571600", "lr": "0.000432727", "gnorm": "0.691", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.2", "wall": "186603"} +[2022-08-03 05:18:41,876][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 05:19:16,341][train_inner][INFO] - {"epoch": 12, "update": 11.11, "loss": "2.119", "ppl": "4.35", "wps": "363387", "ups": "3.06", "wpb": "118596", "bsz": "256", "num_updates": "571800", "lr": "0.000432525", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "186669"} +[2022-08-03 05:20:21,643][train_inner][INFO] - {"epoch": 12, "update": 11.114, "loss": "2.118", "ppl": "4.34", "wps": "363815", "ups": "3.06", "wpb": "118790", "bsz": "256", "num_updates": "572000", "lr": "0.000432323", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "186734"} +[2022-08-03 05:21:26,767][train_inner][INFO] - {"epoch": 12, "update": 11.118, "loss": "2.117", "ppl": "4.34", "wps": "362103", "ups": "3.07", "wpb": "117906", "bsz": "256", "num_updates": "572200", "lr": "0.000432121", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "186799"} +[2022-08-03 05:22:06,342][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 05:22:32,040][train_inner][INFO] - {"epoch": 12, "update": 11.122, "loss": "2.115", "ppl": "4.33", "wps": "363693", "ups": "3.06", "wpb": "118694", "bsz": "256", "num_updates": "572400", "lr": "0.000431919", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "186864"} +[2022-08-03 05:23:36,852][train_inner][INFO] - {"epoch": 12, "update": 11.125, "loss": "2.121", "ppl": "4.35", "wps": "364844", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "572600", "lr": "0.000431717", "gnorm": "0.692", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "186929"} +[2022-08-03 05:24:41,937][train_inner][INFO] - {"epoch": 12, "update": 11.129, "loss": "2.131", "ppl": "4.38", "wps": "364950", "ups": "3.07", "wpb": "118762", "bsz": "256", "num_updates": "572800", "lr": "0.000431515", "gnorm": "0.693", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "186994"} +[2022-08-03 05:25:46,864][train_inner][INFO] - {"epoch": 12, "update": 11.133, "loss": "2.121", "ppl": "4.35", "wps": "365578", "ups": "3.08", "wpb": "118677", "bsz": "256", "num_updates": "573000", "lr": "0.000431313", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "187059"} +[2022-08-03 05:26:51,816][train_inner][INFO] - {"epoch": 12, "update": 11.137, "loss": "2.117", "ppl": "4.34", "wps": "365679", "ups": "3.08", "wpb": "118756", "bsz": "256", "num_updates": "573200", "lr": "0.000431111", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "187124"} +[2022-08-03 05:27:56,911][train_inner][INFO] - {"epoch": 12, "update": 11.141, "loss": "2.121", "ppl": "4.35", "wps": "364087", "ups": "3.07", "wpb": "118499", "bsz": "256", "num_updates": "573400", "lr": "0.000430909", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "187189"} +[2022-08-03 05:29:01,581][train_inner][INFO] - {"epoch": 12, "update": 11.145, "loss": "2.114", "ppl": "4.33", "wps": "364774", "ups": "3.09", "wpb": "117949", "bsz": "256", "num_updates": "573600", "lr": "0.000430707", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24", "wall": "187254"} +[2022-08-03 05:30:06,563][train_inner][INFO] - {"epoch": 12, "update": 11.149, "loss": "2.122", "ppl": "4.35", "wps": "365126", "ups": "3.08", "wpb": "118630", "bsz": "256", "num_updates": "573800", "lr": "0.000430505", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "187319"} +[2022-08-03 05:31:11,527][train_inner][INFO] - {"epoch": 12, "update": 11.153, "loss": "2.117", "ppl": "4.34", "wps": "364299", "ups": "3.08", "wpb": "118330", "bsz": "256", "num_updates": "574000", "lr": "0.000430303", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "187384"} +[2022-08-03 05:32:16,720][train_inner][INFO] - {"epoch": 12, "update": 11.156, "loss": "2.114", "ppl": "4.33", "wps": "362662", "ups": "3.07", "wpb": "118213", "bsz": "256", "num_updates": "574200", "lr": "0.000430101", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "187449"} +[2022-08-03 05:33:21,534][train_inner][INFO] - {"epoch": 12, "update": 11.16, "loss": "2.115", "ppl": "4.33", "wps": "365086", "ups": "3.09", "wpb": "118311", "bsz": "256", "num_updates": "574400", "lr": "0.000429899", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "187514"} +[2022-08-03 05:34:26,703][train_inner][INFO] - {"epoch": 12, "update": 11.164, "loss": "2.118", "ppl": "4.34", "wps": "363755", "ups": "3.07", "wpb": "118526", "bsz": "256", "num_updates": "574600", "lr": "0.000429697", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "187579"} +[2022-08-03 05:35:07,718][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 05:35:18,690][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 05:35:32,346][train_inner][INFO] - {"epoch": 12, "update": 11.168, "loss": "2.12", "ppl": "4.35", "wps": "359688", "ups": "3.05", "wpb": "118053", "bsz": "256", "num_updates": "574800", "lr": "0.000429495", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "187645"} +[2022-08-03 05:36:38,042][train_inner][INFO] - {"epoch": 12, "update": 11.172, "loss": "2.125", "ppl": "4.36", "wps": "358623", "ups": "3.04", "wpb": "117799", "bsz": "256", "num_updates": "575000", "lr": "0.000429293", "gnorm": "0.697", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "187710"} +[2022-08-03 05:37:43,027][train_inner][INFO] - {"epoch": 12, "update": 11.176, "loss": "2.113", "ppl": "4.33", "wps": "366063", "ups": "3.08", "wpb": "118941", "bsz": "256", "num_updates": "575200", "lr": "0.000429091", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "187775"} +[2022-08-03 05:38:47,758][train_inner][INFO] - {"epoch": 12, "update": 11.18, "loss": "2.114", "ppl": "4.33", "wps": "366298", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "575400", "lr": "0.000428889", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "187840"} +[2022-08-03 05:39:52,526][train_inner][INFO] - {"epoch": 12, "update": 11.184, "loss": "2.113", "ppl": "4.33", "wps": "366255", "ups": "3.09", "wpb": "118607", "bsz": "256", "num_updates": "575600", "lr": "0.000428687", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "187905"} +[2022-08-03 05:40:57,556][train_inner][INFO] - {"epoch": 12, "update": 11.188, "loss": "2.116", "ppl": "4.34", "wps": "363211", "ups": "3.08", "wpb": "118097", "bsz": "256", "num_updates": "575800", "lr": "0.000428485", "gnorm": "0.693", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "187970"} +[2022-08-03 05:42:02,706][train_inner][INFO] - {"epoch": 12, "update": 11.191, "loss": "2.115", "ppl": "4.33", "wps": "364116", "ups": "3.07", "wpb": "118608", "bsz": "256", "num_updates": "576000", "lr": "0.000428283", "gnorm": "0.693", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "188035"} +[2022-08-03 05:43:07,305][train_inner][INFO] - {"epoch": 12, "update": 11.195, "loss": "2.118", "ppl": "4.34", "wps": "367372", "ups": "3.1", "wpb": "118659", "bsz": "256", "num_updates": "576200", "lr": "0.000428081", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22", "wall": "188100"} +[2022-08-03 05:44:12,196][train_inner][INFO] - {"epoch": 12, "update": 11.199, "loss": "2.112", "ppl": "4.32", "wps": "365251", "ups": "3.08", "wpb": "118506", "bsz": "256", "num_updates": "576400", "lr": "0.000427879", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.4", "wall": "188165"} +[2022-08-03 05:45:17,482][train_inner][INFO] - {"epoch": 12, "update": 11.203, "loss": "2.122", "ppl": "4.35", "wps": "362925", "ups": "3.06", "wpb": "118466", "bsz": "256", "num_updates": "576600", "lr": "0.000427677", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.8", "wall": "188230"} +[2022-08-03 05:46:21,919][train_inner][INFO] - {"epoch": 12, "update": 11.207, "loss": "2.119", "ppl": "4.34", "wps": "368321", "ups": "3.1", "wpb": "118666", "bsz": "256", "num_updates": "576800", "lr": "0.000427475", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "188294"} +[2022-08-03 05:47:27,062][train_inner][INFO] - {"epoch": 12, "update": 11.211, "loss": "2.119", "ppl": "4.34", "wps": "362372", "ups": "3.07", "wpb": "118029", "bsz": "256", "num_updates": "577000", "lr": "0.000427273", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "188359"} +[2022-08-03 05:48:05,347][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 05:48:32,318][train_inner][INFO] - {"epoch": 12, "update": 11.215, "loss": "2.119", "ppl": "4.34", "wps": "364553", "ups": "3.06", "wpb": "118944", "bsz": "256", "num_updates": "577200", "lr": "0.000427071", "gnorm": "0.697", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "188425"} +[2022-08-03 05:49:37,149][train_inner][INFO] - {"epoch": 12, "update": 11.219, "loss": "2.126", "ppl": "4.36", "wps": "362841", "ups": "3.08", "wpb": "117615", "bsz": "256", "num_updates": "577400", "lr": "0.000426869", "gnorm": "0.702", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "188489"} +[2022-08-03 05:50:41,677][train_inner][INFO] - {"epoch": 12, "update": 11.223, "loss": "2.115", "ppl": "4.33", "wps": "367387", "ups": "3.1", "wpb": "118532", "bsz": "256", "num_updates": "577600", "lr": "0.000426667", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.2", "wall": "188554"} +[2022-08-03 05:51:46,427][train_inner][INFO] - {"epoch": 12, "update": 11.226, "loss": "2.119", "ppl": "4.34", "wps": "365516", "ups": "3.09", "wpb": "118335", "bsz": "256", "num_updates": "577800", "lr": "0.000426465", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "188619"} +[2022-08-03 05:52:51,301][train_inner][INFO] - {"epoch": 12, "update": 11.23, "loss": "2.124", "ppl": "4.36", "wps": "364106", "ups": "3.08", "wpb": "118103", "bsz": "256", "num_updates": "578000", "lr": "0.000426263", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "188684"} +[2022-08-03 05:53:56,239][train_inner][INFO] - {"epoch": 12, "update": 11.234, "loss": "2.121", "ppl": "4.35", "wps": "362058", "ups": "3.08", "wpb": "117554", "bsz": "256", "num_updates": "578200", "lr": "0.000426061", "gnorm": "0.701", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "188749"} +[2022-08-03 05:55:00,831][train_inner][INFO] - {"epoch": 12, "update": 11.238, "loss": "2.115", "ppl": "4.33", "wps": "368550", "ups": "3.1", "wpb": "119026", "bsz": "256", "num_updates": "578400", "lr": "0.000425859", "gnorm": "0.693", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "188813"} +[2022-08-03 05:56:05,775][train_inner][INFO] - {"epoch": 12, "update": 11.242, "loss": "2.123", "ppl": "4.36", "wps": "363244", "ups": "3.08", "wpb": "117951", "bsz": "256", "num_updates": "578600", "lr": "0.000425657", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "188878"} +[2022-08-03 05:57:10,578][train_inner][INFO] - {"epoch": 12, "update": 11.246, "loss": "2.116", "ppl": "4.33", "wps": "365869", "ups": "3.09", "wpb": "118545", "bsz": "256", "num_updates": "578800", "lr": "0.000425455", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "188943"} +[2022-08-03 05:58:15,356][train_inner][INFO] - {"epoch": 12, "update": 11.25, "loss": "2.115", "ppl": "4.33", "wps": "365556", "ups": "3.09", "wpb": "118398", "bsz": "256", "num_updates": "579000", "lr": "0.000425253", "gnorm": "0.696", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22", "wall": "189008"} +[2022-08-03 05:59:20,333][train_inner][INFO] - {"epoch": 12, "update": 11.254, "loss": "2.119", "ppl": "4.34", "wps": "362785", "ups": "3.08", "wpb": "117862", "bsz": "256", "num_updates": "579200", "lr": "0.000425051", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "189073"} +[2022-08-03 06:00:25,400][train_inner][INFO] - {"epoch": 12, "update": 11.258, "loss": "2.114", "ppl": "4.33", "wps": "365133", "ups": "3.07", "wpb": "118789", "bsz": "256", "num_updates": "579400", "lr": "0.000424848", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.7", "wall": "189138"} +[2022-08-03 06:01:30,086][train_inner][INFO] - {"epoch": 12, "update": 11.261, "loss": "2.12", "ppl": "4.35", "wps": "364848", "ups": "3.09", "wpb": "118001", "bsz": "256", "num_updates": "579600", "lr": "0.000424646", "gnorm": "0.699", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "189202"} +[2022-08-03 06:02:34,847][train_inner][INFO] - {"epoch": 12, "update": 11.265, "loss": "2.118", "ppl": "4.34", "wps": "366129", "ups": "3.09", "wpb": "118552", "bsz": "256", "num_updates": "579800", "lr": "0.000424444", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27", "wall": "189267"} +[2022-08-03 06:03:39,599][train_inner][INFO] - {"epoch": 12, "update": 11.269, "loss": "2.114", "ppl": "4.33", "wps": "364616", "ups": "3.09", "wpb": "118047", "bsz": "255.9", "num_updates": "580000", "lr": "0.000424242", "gnorm": "0.695", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "189332"} +[2022-08-03 06:04:44,252][train_inner][INFO] - {"epoch": 12, "update": 11.273, "loss": "2.119", "ppl": "4.34", "wps": "365978", "ups": "3.09", "wpb": "118306", "bsz": "256", "num_updates": "580200", "lr": "0.00042404", "gnorm": "0.701", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "189397"} +[2022-08-03 06:05:49,459][train_inner][INFO] - {"epoch": 12, "update": 11.277, "loss": "2.119", "ppl": "4.34", "wps": "360703", "ups": "3.07", "wpb": "117601", "bsz": "256", "num_updates": "580400", "lr": "0.000423838", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "189462"} +[2022-08-03 06:06:54,140][train_inner][INFO] - {"epoch": 12, "update": 11.281, "loss": "2.118", "ppl": "4.34", "wps": "364271", "ups": "3.09", "wpb": "117805", "bsz": "256", "num_updates": "580600", "lr": "0.000423636", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "189526"} +[2022-08-03 06:07:59,268][train_inner][INFO] - {"epoch": 12, "update": 11.285, "loss": "2.116", "ppl": "4.33", "wps": "363094", "ups": "3.07", "wpb": "118236", "bsz": "256", "num_updates": "580800", "lr": "0.000423434", "gnorm": "0.695", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "189592"} +[2022-08-03 06:09:04,395][train_inner][INFO] - {"epoch": 12, "update": 11.289, "loss": "2.112", "ppl": "4.32", "wps": "364243", "ups": "3.07", "wpb": "118609", "bsz": "256", "num_updates": "581000", "lr": "0.000423232", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "189657"} +[2022-08-03 06:10:09,126][train_inner][INFO] - {"epoch": 12, "update": 11.292, "loss": "2.109", "ppl": "4.31", "wps": "368112", "ups": "3.09", "wpb": "119139", "bsz": "256", "num_updates": "581200", "lr": "0.00042303", "gnorm": "0.693", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "189721"} +[2022-08-03 06:11:14,257][train_inner][INFO] - {"epoch": 12, "update": 11.296, "loss": "2.114", "ppl": "4.33", "wps": "360916", "ups": "3.07", "wpb": "117533", "bsz": "256", "num_updates": "581400", "lr": "0.000422828", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "189787"} +[2022-08-03 06:12:19,043][train_inner][INFO] - {"epoch": 12, "update": 11.3, "loss": "2.113", "ppl": "4.33", "wps": "365508", "ups": "3.09", "wpb": "118397", "bsz": "256", "num_updates": "581600", "lr": "0.000422626", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "189851"} +[2022-08-03 06:13:24,315][train_inner][INFO] - {"epoch": 12, "update": 11.304, "loss": "2.119", "ppl": "4.34", "wps": "361781", "ups": "3.06", "wpb": "118069", "bsz": "256", "num_updates": "581800", "lr": "0.000422424", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "189917"} +[2022-08-03 06:14:28,952][train_inner][INFO] - {"epoch": 12, "update": 11.308, "loss": "2.114", "ppl": "4.33", "wps": "366169", "ups": "3.09", "wpb": "118338", "bsz": "256", "num_updates": "582000", "lr": "0.000422222", "gnorm": "0.697", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "189981"} +[2022-08-03 06:15:34,016][train_inner][INFO] - {"epoch": 12, "update": 11.312, "loss": "2.109", "ppl": "4.31", "wps": "363878", "ups": "3.07", "wpb": "118376", "bsz": "256", "num_updates": "582200", "lr": "0.00042202", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "190046"} +[2022-08-03 06:16:38,911][train_inner][INFO] - {"epoch": 12, "update": 11.316, "loss": "2.116", "ppl": "4.33", "wps": "364079", "ups": "3.08", "wpb": "118132", "bsz": "256", "num_updates": "582400", "lr": "0.000421818", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "190111"} +[2022-08-03 06:17:43,489][train_inner][INFO] - {"epoch": 12, "update": 11.32, "loss": "2.11", "ppl": "4.32", "wps": "366178", "ups": "3.1", "wpb": "118233", "bsz": "256", "num_updates": "582600", "lr": "0.000421616", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "190176"} +[2022-08-03 06:18:48,064][train_inner][INFO] - {"epoch": 12, "update": 11.324, "loss": "2.117", "ppl": "4.34", "wps": "365868", "ups": "3.1", "wpb": "118128", "bsz": "256", "num_updates": "582800", "lr": "0.000421414", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "190240"} +[2022-08-03 06:19:53,037][train_inner][INFO] - {"epoch": 12, "update": 11.327, "loss": "2.112", "ppl": "4.32", "wps": "364275", "ups": "3.08", "wpb": "118338", "bsz": "256", "num_updates": "583000", "lr": "0.000421212", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "190305"} +[2022-08-03 06:20:57,819][train_inner][INFO] - {"epoch": 12, "update": 11.331, "loss": "2.118", "ppl": "4.34", "wps": "364968", "ups": "3.09", "wpb": "118215", "bsz": "256", "num_updates": "583200", "lr": "0.00042101", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.4", "wall": "190370"} +[2022-08-03 06:22:03,189][train_inner][INFO] - {"epoch": 12, "update": 11.335, "loss": "2.112", "ppl": "4.32", "wps": "362745", "ups": "3.06", "wpb": "118561", "bsz": "256", "num_updates": "583400", "lr": "0.000420808", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.5", "wall": "190436"} +[2022-08-03 06:23:08,225][train_inner][INFO] - {"epoch": 12, "update": 11.339, "loss": "2.117", "ppl": "4.34", "wps": "363823", "ups": "3.08", "wpb": "118306", "bsz": "256", "num_updates": "583600", "lr": "0.000420606", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.6", "wall": "190501"} +[2022-08-03 06:24:12,829][train_inner][INFO] - {"epoch": 12, "update": 11.343, "loss": "2.114", "ppl": "4.33", "wps": "366719", "ups": "3.1", "wpb": "118456", "bsz": "256", "num_updates": "583800", "lr": "0.000420404", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.8", "wall": "190565"} +[2022-08-03 06:25:17,601][train_inner][INFO] - {"epoch": 12, "update": 11.347, "loss": "2.115", "ppl": "4.33", "wps": "363947", "ups": "3.09", "wpb": "117865", "bsz": "256", "num_updates": "584000", "lr": "0.000420202", "gnorm": "0.7", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.7", "wall": "190630"} +[2022-08-03 06:26:22,444][train_inner][INFO] - {"epoch": 12, "update": 11.351, "loss": "2.115", "ppl": "4.33", "wps": "363970", "ups": "3.08", "wpb": "118004", "bsz": "256", "num_updates": "584200", "lr": "0.00042", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.5", "wall": "190695"} +[2022-08-03 06:26:43,178][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 06:27:27,638][train_inner][INFO] - {"epoch": 12, "update": 11.355, "loss": "2.116", "ppl": "4.33", "wps": "362795", "ups": "3.07", "wpb": "118258", "bsz": "256", "num_updates": "584400", "lr": "0.000419798", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.6", "wall": "190760"} +[2022-08-03 06:28:32,477][train_inner][INFO] - {"epoch": 12, "update": 11.359, "loss": "2.113", "ppl": "4.33", "wps": "365970", "ups": "3.08", "wpb": "118644", "bsz": "256", "num_updates": "584600", "lr": "0.000419596", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "190825"} +[2022-08-03 06:29:37,443][train_inner][INFO] - {"epoch": 12, "update": 11.362, "loss": "2.112", "ppl": "4.32", "wps": "362450", "ups": "3.08", "wpb": "117733", "bsz": "256", "num_updates": "584800", "lr": "0.000419394", "gnorm": "0.697", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "190890"} +[2022-08-03 06:30:42,722][train_inner][INFO] - {"epoch": 12, "update": 11.366, "loss": "2.112", "ppl": "4.32", "wps": "362929", "ups": "3.06", "wpb": "118457", "bsz": "256", "num_updates": "585000", "lr": "0.000419192", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "190955"} +[2022-08-03 06:31:47,303][train_inner][INFO] - {"epoch": 12, "update": 11.37, "loss": "2.116", "ppl": "4.33", "wps": "366281", "ups": "3.1", "wpb": "118272", "bsz": "256", "num_updates": "585200", "lr": "0.00041899", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "191020"} +[2022-08-03 06:32:52,100][train_inner][INFO] - {"epoch": 12, "update": 11.374, "loss": "2.113", "ppl": "4.33", "wps": "364777", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "585400", "lr": "0.000418788", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.8", "wall": "191084"} +[2022-08-03 06:33:56,734][train_inner][INFO] - {"epoch": 12, "update": 11.378, "loss": "2.114", "ppl": "4.33", "wps": "365288", "ups": "3.09", "wpb": "118048", "bsz": "256", "num_updates": "585600", "lr": "0.000418586", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "191149"} +[2022-08-03 06:35:01,285][train_inner][INFO] - {"epoch": 12, "update": 11.382, "loss": "2.108", "ppl": "4.31", "wps": "368241", "ups": "3.1", "wpb": "118851", "bsz": "256", "num_updates": "585800", "lr": "0.000418384", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "191214"} +[2022-08-03 06:36:05,831][train_inner][INFO] - {"epoch": 12, "update": 11.386, "loss": "2.116", "ppl": "4.34", "wps": "364629", "ups": "3.1", "wpb": "117674", "bsz": "256", "num_updates": "586000", "lr": "0.000418182", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "191278"} +[2022-08-03 06:37:10,929][train_inner][INFO] - {"epoch": 12, "update": 11.39, "loss": "2.107", "ppl": "4.31", "wps": "363735", "ups": "3.07", "wpb": "118389", "bsz": "256", "num_updates": "586200", "lr": "0.00041798", "gnorm": "0.695", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "191343"} +[2022-08-03 06:38:16,932][train_inner][INFO] - {"epoch": 12, "update": 11.393, "loss": "2.108", "ppl": "4.31", "wps": "358691", "ups": "3.03", "wpb": "118372", "bsz": "256", "num_updates": "586400", "lr": "0.000417778", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "22.5", "wall": "191409"} +[2022-08-03 06:39:21,622][train_inner][INFO] - {"epoch": 12, "update": 11.397, "loss": "2.114", "ppl": "4.33", "wps": "364190", "ups": "3.09", "wpb": "117795", "bsz": "256", "num_updates": "586600", "lr": "0.000417576", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "191474"} +[2022-08-03 06:40:26,388][train_inner][INFO] - {"epoch": 12, "update": 11.401, "loss": "2.108", "ppl": "4.31", "wps": "365081", "ups": "3.09", "wpb": "118223", "bsz": "256", "num_updates": "586800", "lr": "0.000417374", "gnorm": "0.698", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "191539"} +[2022-08-03 06:41:31,233][train_inner][INFO] - {"epoch": 12, "update": 11.405, "loss": "2.111", "ppl": "4.32", "wps": "364892", "ups": "3.08", "wpb": "118305", "bsz": "256", "num_updates": "587000", "lr": "0.000417172", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.8", "wall": "191604"} +[2022-08-03 06:42:35,920][train_inner][INFO] - {"epoch": 12, "update": 11.409, "loss": "2.117", "ppl": "4.34", "wps": "366084", "ups": "3.09", "wpb": "118403", "bsz": "256", "num_updates": "587200", "lr": "0.00041697", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "191668"} +[2022-08-03 06:43:40,468][train_inner][INFO] - {"epoch": 12, "update": 11.413, "loss": "2.112", "ppl": "4.32", "wps": "364782", "ups": "3.1", "wpb": "117729", "bsz": "256", "num_updates": "587400", "lr": "0.000416768", "gnorm": "0.699", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.1", "wall": "191733"} +[2022-08-03 06:44:45,341][train_inner][INFO] - {"epoch": 12, "update": 11.417, "loss": "2.113", "ppl": "4.33", "wps": "366743", "ups": "3.08", "wpb": "118957", "bsz": "256", "num_updates": "587600", "lr": "0.000416566", "gnorm": "0.694", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "191798"} +[2022-08-03 06:45:43,228][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 06:45:51,030][train_inner][INFO] - {"epoch": 12, "update": 11.421, "loss": "2.112", "ppl": "4.32", "wps": "361053", "ups": "3.04", "wpb": "118584", "bsz": "256", "num_updates": "587800", "lr": "0.000416364", "gnorm": "0.7", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "191863"} +[2022-08-03 06:45:54,825][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 06:46:55,988][train_inner][INFO] - {"epoch": 12, "update": 11.425, "loss": "2.119", "ppl": "4.34", "wps": "363257", "ups": "3.08", "wpb": "117982", "bsz": "256", "num_updates": "588000", "lr": "0.000416162", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "191928"} +[2022-08-03 06:48:00,653][train_inner][INFO] - {"epoch": 12, "update": 11.428, "loss": "2.116", "ppl": "4.34", "wps": "365578", "ups": "3.09", "wpb": "118198", "bsz": "256", "num_updates": "588200", "lr": "0.00041596", "gnorm": "0.714", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "191993"} +[2022-08-03 06:49:06,693][train_inner][INFO] - {"epoch": 12, "update": 11.432, "loss": "2.108", "ppl": "4.31", "wps": "358253", "ups": "3.03", "wpb": "118294", "bsz": "256", "num_updates": "588400", "lr": "0.000415758", "gnorm": "0.695", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "25.6", "wall": "192059"} +[2022-08-03 06:50:11,151][train_inner][INFO] - {"epoch": 12, "update": 11.436, "loss": "2.112", "ppl": "4.32", "wps": "367061", "ups": "3.1", "wpb": "118298", "bsz": "256", "num_updates": "588600", "lr": "0.000415556", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "192123"} +[2022-08-03 06:51:15,963][train_inner][INFO] - {"epoch": 12, "update": 11.44, "loss": "2.11", "ppl": "4.32", "wps": "364005", "ups": "3.09", "wpb": "117957", "bsz": "256", "num_updates": "588800", "lr": "0.000415354", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "192188"} +[2022-08-03 06:52:20,879][train_inner][INFO] - {"epoch": 12, "update": 11.444, "loss": "2.11", "ppl": "4.32", "wps": "364674", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "589000", "lr": "0.000415152", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "192253"} +[2022-08-03 06:53:25,701][train_inner][INFO] - {"epoch": 12, "update": 11.448, "loss": "2.115", "ppl": "4.33", "wps": "365049", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "589200", "lr": "0.000414949", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "192318"} +[2022-08-03 06:54:30,700][train_inner][INFO] - {"epoch": 12, "update": 11.452, "loss": "2.11", "ppl": "4.32", "wps": "364890", "ups": "3.08", "wpb": "118587", "bsz": "256", "num_updates": "589400", "lr": "0.000414747", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.9", "wall": "192383"} +[2022-08-03 06:55:35,856][train_inner][INFO] - {"epoch": 12, "update": 11.456, "loss": "2.114", "ppl": "4.33", "wps": "362815", "ups": "3.07", "wpb": "118196", "bsz": "256", "num_updates": "589600", "lr": "0.000414545", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "192448"} +[2022-08-03 06:56:40,509][train_inner][INFO] - {"epoch": 12, "update": 11.46, "loss": "2.106", "ppl": "4.3", "wps": "366599", "ups": "3.09", "wpb": "118508", "bsz": "256", "num_updates": "589800", "lr": "0.000414343", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "192513"} +[2022-08-03 06:57:45,162][train_inner][INFO] - {"epoch": 12, "update": 11.463, "loss": "2.113", "ppl": "4.33", "wps": "364741", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "590000", "lr": "0.000414141", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "192577"} +[2022-08-03 06:58:50,221][train_inner][INFO] - {"epoch": 12, "update": 11.467, "loss": "2.108", "ppl": "4.31", "wps": "365536", "ups": "3.07", "wpb": "118904", "bsz": "256", "num_updates": "590200", "lr": "0.000413939", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "192643"} +[2022-08-03 06:59:55,086][train_inner][INFO] - {"epoch": 12, "update": 11.471, "loss": "2.107", "ppl": "4.31", "wps": "363860", "ups": "3.08", "wpb": "118008", "bsz": "256", "num_updates": "590400", "lr": "0.000413737", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "192707"} +[2022-08-03 07:00:59,729][train_inner][INFO] - {"epoch": 12, "update": 11.475, "loss": "2.109", "ppl": "4.31", "wps": "367273", "ups": "3.09", "wpb": "118707", "bsz": "256", "num_updates": "590600", "lr": "0.000413535", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "192772"} +[2022-08-03 07:02:04,570][train_inner][INFO] - {"epoch": 12, "update": 11.479, "loss": "2.107", "ppl": "4.31", "wps": "365471", "ups": "3.08", "wpb": "118486", "bsz": "256", "num_updates": "590800", "lr": "0.000413333", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "192837"} +[2022-08-03 07:03:09,573][train_inner][INFO] - {"epoch": 12, "update": 11.483, "loss": "2.109", "ppl": "4.31", "wps": "363775", "ups": "3.08", "wpb": "118230", "bsz": "256", "num_updates": "591000", "lr": "0.000413131", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "192902"} +[2022-08-03 07:03:53,723][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 07:04:14,529][train_inner][INFO] - {"epoch": 12, "update": 11.487, "loss": "2.112", "ppl": "4.32", "wps": "363415", "ups": "3.08", "wpb": "118029", "bsz": "256", "num_updates": "591200", "lr": "0.000412929", "gnorm": "0.699", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "192967"} +[2022-08-03 07:05:19,213][train_inner][INFO] - {"epoch": 12, "update": 11.491, "loss": "2.113", "ppl": "4.32", "wps": "364367", "ups": "3.09", "wpb": "117842", "bsz": "256", "num_updates": "591400", "lr": "0.000412727", "gnorm": "0.701", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.6", "wall": "193032"} +[2022-08-03 07:06:24,068][train_inner][INFO] - {"epoch": 12, "update": 11.495, "loss": "2.113", "ppl": "4.33", "wps": "363587", "ups": "3.08", "wpb": "117900", "bsz": "256", "num_updates": "591600", "lr": "0.000412525", "gnorm": "0.701", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "193096"} +[2022-08-03 07:07:28,902][train_inner][INFO] - {"epoch": 12, "update": 11.498, "loss": "2.112", "ppl": "4.32", "wps": "365892", "ups": "3.08", "wpb": "118611", "bsz": "256", "num_updates": "591800", "lr": "0.000412323", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "193161"} +[2022-08-03 07:08:33,683][train_inner][INFO] - {"epoch": 12, "update": 11.502, "loss": "2.105", "ppl": "4.3", "wps": "366998", "ups": "3.09", "wpb": "118871", "bsz": "256", "num_updates": "592000", "lr": "0.000412121", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "193226"} +[2022-08-03 07:09:39,503][train_inner][INFO] - {"epoch": 12, "update": 11.506, "loss": "2.11", "ppl": "4.32", "wps": "358607", "ups": "3.04", "wpb": "118015", "bsz": "256", "num_updates": "592200", "lr": "0.000411919", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "193292"} +[2022-08-03 07:10:44,329][train_inner][INFO] - {"epoch": 12, "update": 11.51, "loss": "2.112", "ppl": "4.32", "wps": "364826", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "592400", "lr": "0.000411717", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "193357"} +[2022-08-03 07:11:49,310][train_inner][INFO] - {"epoch": 12, "update": 11.514, "loss": "2.101", "ppl": "4.29", "wps": "364728", "ups": "3.08", "wpb": "118499", "bsz": "256", "num_updates": "592600", "lr": "0.000411515", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "193422"} +[2022-08-03 07:12:54,128][train_inner][INFO] - {"epoch": 12, "update": 11.518, "loss": "2.106", "ppl": "4.3", "wps": "362816", "ups": "3.09", "wpb": "117584", "bsz": "256", "num_updates": "592800", "lr": "0.000411313", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "193486"} +[2022-08-03 07:13:58,820][train_inner][INFO] - {"epoch": 12, "update": 11.522, "loss": "2.102", "ppl": "4.29", "wps": "363375", "ups": "3.09", "wpb": "117535", "bsz": "256", "num_updates": "593000", "lr": "0.000411111", "gnorm": "0.699", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.6", "wall": "193551"} +[2022-08-03 07:15:03,977][train_inner][INFO] - {"epoch": 12, "update": 11.526, "loss": "2.112", "ppl": "4.32", "wps": "364267", "ups": "3.07", "wpb": "118671", "bsz": "256", "num_updates": "593200", "lr": "0.000410909", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "193616"} +[2022-08-03 07:16:08,604][train_inner][INFO] - {"epoch": 12, "update": 11.53, "loss": "2.112", "ppl": "4.32", "wps": "366449", "ups": "3.09", "wpb": "118410", "bsz": "256", "num_updates": "593400", "lr": "0.000410707", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "193681"} +[2022-08-03 07:17:13,715][train_inner][INFO] - {"epoch": 12, "update": 11.533, "loss": "2.107", "ppl": "4.31", "wps": "364705", "ups": "3.07", "wpb": "118730", "bsz": "256", "num_updates": "593600", "lr": "0.000410505", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "193746"} +[2022-08-03 07:18:18,545][train_inner][INFO] - {"epoch": 12, "update": 11.537, "loss": "2.106", "ppl": "4.3", "wps": "364976", "ups": "3.09", "wpb": "118306", "bsz": "256", "num_updates": "593800", "lr": "0.000410303", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "193811"} +[2022-08-03 07:19:23,022][train_inner][INFO] - {"epoch": 12, "update": 11.541, "loss": "2.107", "ppl": "4.31", "wps": "367425", "ups": "3.1", "wpb": "118450", "bsz": "256", "num_updates": "594000", "lr": "0.000410101", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "193875"} +[2022-08-03 07:20:28,038][train_inner][INFO] - {"epoch": 12, "update": 11.545, "loss": "2.107", "ppl": "4.31", "wps": "366616", "ups": "3.08", "wpb": "119178", "bsz": "256", "num_updates": "594200", "lr": "0.000409899", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "193940"} +[2022-08-03 07:21:32,902][train_inner][INFO] - {"epoch": 12, "update": 11.549, "loss": "2.103", "ppl": "4.3", "wps": "365855", "ups": "3.08", "wpb": "118652", "bsz": "256", "num_updates": "594400", "lr": "0.000409697", "gnorm": "0.702", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "194005"} +[2022-08-03 07:22:38,407][train_inner][INFO] - {"epoch": 12, "update": 11.553, "loss": "2.107", "ppl": "4.31", "wps": "363335", "ups": "3.05", "wpb": "119000", "bsz": "256", "num_updates": "594600", "lr": "0.000409495", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "194071"} +[2022-08-03 07:23:43,444][train_inner][INFO] - {"epoch": 12, "update": 11.557, "loss": "2.106", "ppl": "4.31", "wps": "363227", "ups": "3.08", "wpb": "118114", "bsz": "256", "num_updates": "594800", "lr": "0.000409293", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "194136"} +[2022-08-03 07:24:48,558][train_inner][INFO] - {"epoch": 12, "update": 11.561, "loss": "2.107", "ppl": "4.31", "wps": "364421", "ups": "3.07", "wpb": "118643", "bsz": "256", "num_updates": "595000", "lr": "0.000409091", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "194201"} +[2022-08-03 07:25:53,461][train_inner][INFO] - {"epoch": 12, "update": 11.564, "loss": "2.105", "ppl": "4.3", "wps": "364136", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "595200", "lr": "0.000408889", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "194266"} +[2022-08-03 07:26:59,463][train_inner][INFO] - {"epoch": 12, "update": 11.568, "loss": "2.106", "ppl": "4.31", "wps": "356299", "ups": "3.03", "wpb": "117580", "bsz": "256", "num_updates": "595400", "lr": "0.000408687", "gnorm": "0.701", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "194332"} +[2022-08-03 07:28:04,619][train_inner][INFO] - {"epoch": 12, "update": 11.572, "loss": "2.113", "ppl": "4.33", "wps": "363722", "ups": "3.07", "wpb": "118491", "bsz": "256", "num_updates": "595600", "lr": "0.000408485", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "194397"} +[2022-08-03 07:29:08,926][train_inner][INFO] - {"epoch": 12, "update": 11.576, "loss": "2.111", "ppl": "4.32", "wps": "365864", "ups": "3.11", "wpb": "117636", "bsz": "256", "num_updates": "595800", "lr": "0.000408283", "gnorm": "0.705", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.2", "wall": "194461"} +[2022-08-03 07:30:14,131][train_inner][INFO] - {"epoch": 12, "update": 11.58, "loss": "2.108", "ppl": "4.31", "wps": "363205", "ups": "3.07", "wpb": "118413", "bsz": "256", "num_updates": "596000", "lr": "0.000408081", "gnorm": "0.698", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.7", "wall": "194526"} +[2022-08-03 07:31:19,250][train_inner][INFO] - {"epoch": 12, "update": 11.584, "loss": "2.103", "ppl": "4.3", "wps": "365333", "ups": "3.07", "wpb": "118949", "bsz": "256", "num_updates": "596200", "lr": "0.000407879", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.6", "wall": "194592"} +[2022-08-03 07:32:24,204][train_inner][INFO] - {"epoch": 12, "update": 11.588, "loss": "2.105", "ppl": "4.3", "wps": "364417", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "596400", "lr": "0.000407677", "gnorm": "0.699", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.2", "wall": "194657"} +[2022-08-03 07:33:28,880][train_inner][INFO] - {"epoch": 12, "update": 11.592, "loss": "2.108", "ppl": "4.31", "wps": "365603", "ups": "3.09", "wpb": "118226", "bsz": "256", "num_updates": "596600", "lr": "0.000407475", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.1", "wall": "194721"} +[2022-08-03 07:34:27,559][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 07:34:34,056][train_inner][INFO] - {"epoch": 12, "update": 11.596, "loss": "2.109", "ppl": "4.31", "wps": "362992", "ups": "3.07", "wpb": "118291", "bsz": "256", "num_updates": "596800", "lr": "0.000407273", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "194786"} +[2022-08-03 07:34:55,182][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 07:35:02,452][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 07:35:23,676][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 07:35:39,288][train_inner][INFO] - {"epoch": 12, "update": 11.599, "loss": "2.111", "ppl": "4.32", "wps": "363176", "ups": "3.07", "wpb": "118452", "bsz": "256", "num_updates": "597000", "lr": "0.000407071", "gnorm": "0.726", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "194852"} +[2022-08-03 07:36:44,094][train_inner][INFO] - {"epoch": 12, "update": 11.603, "loss": "2.11", "ppl": "4.32", "wps": "364752", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "597200", "lr": "0.000406869", "gnorm": "0.738", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "194916"} +[2022-08-03 07:37:49,065][train_inner][INFO] - {"epoch": 12, "update": 11.607, "loss": "2.103", "ppl": "4.3", "wps": "364566", "ups": "3.08", "wpb": "118428", "bsz": "256", "num_updates": "597400", "lr": "0.000406667", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.8", "wall": "194981"} +[2022-08-03 07:38:54,157][train_inner][INFO] - {"epoch": 12, "update": 11.611, "loss": "2.107", "ppl": "4.31", "wps": "363067", "ups": "3.07", "wpb": "118164", "bsz": "256", "num_updates": "597600", "lr": "0.000406465", "gnorm": "0.782", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "21.6", "wall": "195046"} +[2022-08-03 07:39:21,618][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 +[2022-08-03 07:39:58,786][train_inner][INFO] - {"epoch": 12, "update": 11.615, "loss": "2.111", "ppl": "4.32", "wps": "365477", "ups": "3.09", "wpb": "118099", "bsz": "256", "num_updates": "597800", "lr": "0.000406263", "gnorm": "0.768", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.5", "wall": "195111"} +[2022-08-03 07:41:03,387][train_inner][INFO] - {"epoch": 12, "update": 11.619, "loss": "2.113", "ppl": "4.33", "wps": "366388", "ups": "3.1", "wpb": "118344", "bsz": "256", "num_updates": "598000", "lr": "0.000406061", "gnorm": "0.764", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "195176"} +[2022-08-03 07:42:08,478][train_inner][INFO] - {"epoch": 12, "update": 11.623, "loss": "2.109", "ppl": "4.31", "wps": "362714", "ups": "3.07", "wpb": "118046", "bsz": "256", "num_updates": "598200", "lr": "0.000405859", "gnorm": "0.709", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.5", "wall": "195241"} +[2022-08-03 07:43:13,243][train_inner][INFO] - {"epoch": 12, "update": 11.627, "loss": "2.105", "ppl": "4.3", "wps": "365332", "ups": "3.09", "wpb": "118302", "bsz": "256", "num_updates": "598400", "lr": "0.000405657", "gnorm": "0.706", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21", "wall": "195306"} +[2022-08-03 07:44:18,280][train_inner][INFO] - {"epoch": 12, "update": 11.631, "loss": "2.102", "ppl": "4.29", "wps": "363847", "ups": "3.08", "wpb": "118317", "bsz": "256", "num_updates": "598600", "lr": "0.000405455", "gnorm": "0.7", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.4", "wall": "195371"} +[2022-08-03 07:45:22,951][train_inner][INFO] - {"epoch": 12, "update": 11.634, "loss": "2.103", "ppl": "4.3", "wps": "365960", "ups": "3.09", "wpb": "118332", "bsz": "256", "num_updates": "598800", "lr": "0.000405253", "gnorm": "0.697", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "195435"} +[2022-08-03 07:46:28,127][train_inner][INFO] - {"epoch": 12, "update": 11.638, "loss": "2.108", "ppl": "4.31", "wps": "363120", "ups": "3.07", "wpb": "118333", "bsz": "256", "num_updates": "599000", "lr": "0.000405051", "gnorm": "0.702", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "195500"} +[2022-08-03 07:47:32,872][train_inner][INFO] - {"epoch": 12, "update": 11.642, "loss": "2.108", "ppl": "4.31", "wps": "365081", "ups": "3.09", "wpb": "118185", "bsz": "256", "num_updates": "599200", "lr": "0.000404848", "gnorm": "0.749", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "22.8", "wall": "195565"} +[2022-08-03 07:48:38,076][train_inner][INFO] - {"epoch": 12, "update": 11.646, "loss": "2.104", "ppl": "4.3", "wps": "361028", "ups": "3.07", "wpb": "117699", "bsz": "256", "num_updates": "599400", "lr": "0.000404646", "gnorm": "0.704", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "28.2", "wall": "195630"} +[2022-08-03 07:49:42,788][train_inner][INFO] - {"epoch": 12, "update": 11.65, "loss": "2.1", "ppl": "4.29", "wps": "364375", "ups": "3.09", "wpb": "117896", "bsz": "256", "num_updates": "599600", "lr": "0.000404444", "gnorm": "0.704", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24", "wall": "195695"} +[2022-08-03 07:50:47,454][train_inner][INFO] - {"epoch": 12, "update": 11.654, "loss": "2.109", "ppl": "4.32", "wps": "363617", "ups": "3.09", "wpb": "117566", "bsz": "256", "num_updates": "599800", "lr": "0.000404242", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "195760"} +[2022-08-03 07:51:51,991][train_inner][INFO] - {"epoch": 12, "update": 11.658, "loss": "2.108", "ppl": "4.31", "wps": "364789", "ups": "3.1", "wpb": "117710", "bsz": "256", "num_updates": "600000", "lr": "0.00040404", "gnorm": "0.705", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.8", "wall": "195824"} +[2022-08-03 07:51:51,992][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-03 07:52:14,880][valid][INFO] - {"epoch": 12, "valid_loss": "2.011", "valid_ppl": "4.03", "valid_wps": "1.60884e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "600000", "valid_best_loss": "2.011"} +[2022-08-03 07:52:14,885][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 12 @ 600000 updates +[2022-08-03 07:52:14,886][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_12_600000.pt +[2022-08-03 07:52:24,542][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_12_600000.pt +[2022-08-03 07:52:54,886][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_12_600000.pt (epoch 12 @ 600000 updates, score 2.011) (writing took 40.00082699768245 seconds) +[2022-08-03 07:53:59,519][train_inner][INFO] - {"epoch": 12, "update": 11.662, "loss": "2.106", "ppl": "4.31", "wps": "186006", "ups": "1.57", "wpb": "118605", "bsz": "256", "num_updates": "600200", "lr": "0.000403838", "gnorm": "0.698", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.5", "wall": "195952"} +[2022-08-03 07:55:04,871][train_inner][INFO] - {"epoch": 12, "update": 11.666, "loss": "2.105", "ppl": "4.3", "wps": "362757", "ups": "3.06", "wpb": "118533", "bsz": "256", "num_updates": "600400", "lr": "0.000403636", "gnorm": "0.702", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "196017"} +[2022-08-03 07:56:10,122][train_inner][INFO] - {"epoch": 12, "update": 11.669, "loss": "2.103", "ppl": "4.3", "wps": "363375", "ups": "3.07", "wpb": "118551", "bsz": "256", "num_updates": "600600", "lr": "0.000403434", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "196082"} +[2022-08-03 07:57:15,186][train_inner][INFO] - {"epoch": 12, "update": 11.673, "loss": "2.101", "ppl": "4.29", "wps": "363594", "ups": "3.07", "wpb": "118283", "bsz": "256", "num_updates": "600800", "lr": "0.000403232", "gnorm": "0.712", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.1", "wall": "196148"} +[2022-08-03 07:58:19,540][train_inner][INFO] - {"epoch": 12, "update": 11.677, "loss": "2.113", "ppl": "4.32", "wps": "364386", "ups": "3.11", "wpb": "117246", "bsz": "256", "num_updates": "601000", "lr": "0.00040303", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "196212"} +[2022-08-03 07:59:24,492][train_inner][INFO] - {"epoch": 12, "update": 11.681, "loss": "2.1", "ppl": "4.29", "wps": "363586", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "601200", "lr": "0.000402828", "gnorm": "0.702", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.8", "wall": "196277"} +[2022-08-03 08:00:29,591][train_inner][INFO] - {"epoch": 12, "update": 11.685, "loss": "2.109", "ppl": "4.31", "wps": "362580", "ups": "3.07", "wpb": "118017", "bsz": "256", "num_updates": "601400", "lr": "0.000402626", "gnorm": "0.709", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "196342"} +[2022-08-03 08:01:34,677][train_inner][INFO] - {"epoch": 12, "update": 11.689, "loss": "2.105", "ppl": "4.3", "wps": "363649", "ups": "3.07", "wpb": "118340", "bsz": "256", "num_updates": "601600", "lr": "0.000402424", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "196407"} +[2022-08-03 08:02:40,396][train_inner][INFO] - {"epoch": 12, "update": 11.693, "loss": "2.108", "ppl": "4.31", "wps": "359059", "ups": "3.04", "wpb": "117984", "bsz": "256", "num_updates": "601800", "lr": "0.000402222", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "196473"} +[2022-08-03 08:03:45,674][train_inner][INFO] - {"epoch": 12, "update": 11.697, "loss": "2.103", "ppl": "4.3", "wps": "363145", "ups": "3.06", "wpb": "118525", "bsz": "256", "num_updates": "602000", "lr": "0.00040202", "gnorm": "0.7", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "196538"} +[2022-08-03 08:04:50,490][train_inner][INFO] - {"epoch": 12, "update": 11.701, "loss": "2.109", "ppl": "4.31", "wps": "364067", "ups": "3.09", "wpb": "117984", "bsz": "256", "num_updates": "602200", "lr": "0.000401818", "gnorm": "0.705", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "196603"} +[2022-08-03 08:05:55,648][train_inner][INFO] - {"epoch": 12, "update": 11.704, "loss": "2.103", "ppl": "4.3", "wps": "363155", "ups": "3.07", "wpb": "118311", "bsz": "256", "num_updates": "602400", "lr": "0.000401616", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "196668"} +[2022-08-03 08:06:46,020][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 08:07:01,422][train_inner][INFO] - {"epoch": 12, "update": 11.708, "loss": "2.098", "ppl": "4.28", "wps": "361199", "ups": "3.04", "wpb": "118786", "bsz": "256", "num_updates": "602600", "lr": "0.000401414", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "196734"} +[2022-08-03 08:08:05,817][train_inner][INFO] - {"epoch": 12, "update": 11.712, "loss": "2.107", "ppl": "4.31", "wps": "365537", "ups": "3.11", "wpb": "117693", "bsz": "256", "num_updates": "602800", "lr": "0.000401212", "gnorm": "0.706", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "196798"} +[2022-08-03 08:09:10,667][train_inner][INFO] - {"epoch": 12, "update": 11.716, "loss": "2.103", "ppl": "4.3", "wps": "365250", "ups": "3.08", "wpb": "118430", "bsz": "256", "num_updates": "603000", "lr": "0.00040101", "gnorm": "0.702", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.8", "wall": "196863"} +[2022-08-03 08:10:15,693][train_inner][INFO] - {"epoch": 12, "update": 11.72, "loss": "2.099", "ppl": "4.28", "wps": "364758", "ups": "3.08", "wpb": "118593", "bsz": "256", "num_updates": "603200", "lr": "0.000400808", "gnorm": "0.698", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.9", "wall": "196928"} +[2022-08-03 08:11:20,585][train_inner][INFO] - {"epoch": 12, "update": 11.724, "loss": "2.099", "ppl": "4.28", "wps": "364294", "ups": "3.08", "wpb": "118196", "bsz": "256", "num_updates": "603400", "lr": "0.000400606", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "196993"} +[2022-08-03 08:12:25,359][train_inner][INFO] - {"epoch": 12, "update": 11.728, "loss": "2.101", "ppl": "4.29", "wps": "364875", "ups": "3.09", "wpb": "118171", "bsz": "256", "num_updates": "603600", "lr": "0.000400404", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.2", "wall": "197058"} +[2022-08-03 08:13:30,198][train_inner][INFO] - {"epoch": 12, "update": 11.732, "loss": "2.1", "ppl": "4.29", "wps": "365659", "ups": "3.08", "wpb": "118543", "bsz": "256", "num_updates": "603800", "lr": "0.000400202", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "197123"} +[2022-08-03 08:14:34,759][train_inner][INFO] - {"epoch": 12, "update": 11.735, "loss": "2.102", "ppl": "4.29", "wps": "367598", "ups": "3.1", "wpb": "118661", "bsz": "256", "num_updates": "604000", "lr": "0.0004", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "197187"} +[2022-08-03 08:15:39,795][train_inner][INFO] - {"epoch": 12, "update": 11.739, "loss": "2.102", "ppl": "4.29", "wps": "364439", "ups": "3.08", "wpb": "118508", "bsz": "256", "num_updates": "604200", "lr": "0.000399798", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.6", "wall": "197252"} +[2022-08-03 08:16:44,393][train_inner][INFO] - {"epoch": 12, "update": 11.743, "loss": "2.101", "ppl": "4.29", "wps": "366537", "ups": "3.1", "wpb": "118386", "bsz": "256", "num_updates": "604400", "lr": "0.000399596", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.4", "wall": "197317"} +[2022-08-03 08:17:49,063][train_inner][INFO] - {"epoch": 12, "update": 11.747, "loss": "2.102", "ppl": "4.29", "wps": "367349", "ups": "3.09", "wpb": "118780", "bsz": "256", "num_updates": "604600", "lr": "0.000399394", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "26", "wall": "197381"} +[2022-08-03 08:18:53,549][train_inner][INFO] - {"epoch": 12, "update": 11.751, "loss": "2.104", "ppl": "4.3", "wps": "364281", "ups": "3.1", "wpb": "117454", "bsz": "256", "num_updates": "604800", "lr": "0.000399192", "gnorm": "0.707", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "197446"} +[2022-08-03 08:19:58,277][train_inner][INFO] - {"epoch": 12, "update": 11.755, "loss": "2.101", "ppl": "4.29", "wps": "364370", "ups": "3.09", "wpb": "117922", "bsz": "256", "num_updates": "605000", "lr": "0.00039899", "gnorm": "0.705", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "197511"} +[2022-08-03 08:21:03,131][train_inner][INFO] - {"epoch": 12, "update": 11.759, "loss": "2.099", "ppl": "4.28", "wps": "366015", "ups": "3.08", "wpb": "118686", "bsz": "256", "num_updates": "605200", "lr": "0.000398788", "gnorm": "0.702", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.4", "wall": "197575"} +[2022-08-03 08:22:08,157][train_inner][INFO] - {"epoch": 12, "update": 11.763, "loss": "2.096", "ppl": "4.28", "wps": "366046", "ups": "3.08", "wpb": "119009", "bsz": "256", "num_updates": "605400", "lr": "0.000398586", "gnorm": "0.698", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "197640"} +[2022-08-03 08:23:12,989][train_inner][INFO] - {"epoch": 12, "update": 11.767, "loss": "2.105", "ppl": "4.3", "wps": "365608", "ups": "3.08", "wpb": "118515", "bsz": "256", "num_updates": "605600", "lr": "0.000398384", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "197705"} +[2022-08-03 08:24:17,852][train_inner][INFO] - {"epoch": 12, "update": 11.77, "loss": "2.105", "ppl": "4.3", "wps": "364631", "ups": "3.08", "wpb": "118253", "bsz": "256", "num_updates": "605800", "lr": "0.000398182", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "197770"} +[2022-08-03 08:25:22,076][train_inner][INFO] - {"epoch": 12, "update": 11.774, "loss": "2.098", "ppl": "4.28", "wps": "369134", "ups": "3.11", "wpb": "118533", "bsz": "256", "num_updates": "606000", "lr": "0.00039798", "gnorm": "0.701", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "197834"} +[2022-08-03 08:26:26,659][train_inner][INFO] - {"epoch": 12, "update": 11.778, "loss": "2.099", "ppl": "4.29", "wps": "369667", "ups": "3.1", "wpb": "119370", "bsz": "256", "num_updates": "606200", "lr": "0.000397778", "gnorm": "0.701", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "197899"} +[2022-08-03 08:27:31,780][train_inner][INFO] - {"epoch": 12, "update": 11.782, "loss": "2.104", "ppl": "4.3", "wps": "361571", "ups": "3.07", "wpb": "117729", "bsz": "256", "num_updates": "606400", "lr": "0.000397576", "gnorm": "0.707", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26", "wall": "197964"} +[2022-08-03 08:28:36,549][train_inner][INFO] - {"epoch": 12, "update": 11.786, "loss": "2.102", "ppl": "4.29", "wps": "365200", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "606600", "lr": "0.000397374", "gnorm": "0.706", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "198029"} +[2022-08-03 08:29:41,501][train_inner][INFO] - {"epoch": 12, "update": 11.79, "loss": "2.099", "ppl": "4.28", "wps": "363955", "ups": "3.08", "wpb": "118197", "bsz": "256", "num_updates": "606800", "lr": "0.000397172", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "198094"} +[2022-08-03 08:30:46,487][train_inner][INFO] - {"epoch": 12, "update": 11.794, "loss": "2.104", "ppl": "4.3", "wps": "363848", "ups": "3.08", "wpb": "118224", "bsz": "256", "num_updates": "607000", "lr": "0.00039697", "gnorm": "0.704", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "198159"} +[2022-08-03 08:31:51,189][train_inner][INFO] - {"epoch": 12, "update": 11.798, "loss": "2.102", "ppl": "4.29", "wps": "367138", "ups": "3.09", "wpb": "118770", "bsz": "256", "num_updates": "607200", "lr": "0.000396768", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "198224"} +[2022-08-03 08:32:56,062][train_inner][INFO] - {"epoch": 12, "update": 11.802, "loss": "2.098", "ppl": "4.28", "wps": "365879", "ups": "3.08", "wpb": "118677", "bsz": "256", "num_updates": "607400", "lr": "0.000396566", "gnorm": "0.704", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "198288"} +[2022-08-03 08:34:01,137][train_inner][INFO] - {"epoch": 12, "update": 11.805, "loss": "2.1", "ppl": "4.29", "wps": "363116", "ups": "3.07", "wpb": "118147", "bsz": "256", "num_updates": "607600", "lr": "0.000396364", "gnorm": "0.705", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "198353"} +[2022-08-03 08:35:06,765][train_inner][INFO] - {"epoch": 12, "update": 11.809, "loss": "2.097", "ppl": "4.28", "wps": "359663", "ups": "3.05", "wpb": "118012", "bsz": "256", "num_updates": "607800", "lr": "0.000396162", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "198419"} +[2022-08-03 08:36:11,365][train_inner][INFO] - {"epoch": 12, "update": 11.813, "loss": "2.104", "ppl": "4.3", "wps": "368225", "ups": "3.1", "wpb": "118933", "bsz": "256", "num_updates": "608000", "lr": "0.00039596", "gnorm": "0.702", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "198484"} +[2022-08-03 08:37:15,716][train_inner][INFO] - {"epoch": 12, "update": 11.817, "loss": "2.104", "ppl": "4.3", "wps": "366250", "ups": "3.11", "wpb": "117841", "bsz": "256", "num_updates": "608200", "lr": "0.000395758", "gnorm": "0.709", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "198548"} +[2022-08-03 08:38:21,432][train_inner][INFO] - {"epoch": 12, "update": 11.821, "loss": "2.104", "ppl": "4.3", "wps": "359074", "ups": "3.04", "wpb": "117983", "bsz": "256", "num_updates": "608400", "lr": "0.000395556", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "198614"} +[2022-08-03 08:39:26,123][train_inner][INFO] - {"epoch": 12, "update": 11.825, "loss": "2.102", "ppl": "4.29", "wps": "366104", "ups": "3.09", "wpb": "118416", "bsz": "256", "num_updates": "608600", "lr": "0.000395354", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.5", "wall": "198678"} +[2022-08-03 08:40:31,287][train_inner][INFO] - {"epoch": 12, "update": 11.829, "loss": "2.097", "ppl": "4.28", "wps": "362166", "ups": "3.07", "wpb": "118001", "bsz": "256", "num_updates": "608800", "lr": "0.000395152", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "198744"} +[2022-08-03 08:41:36,110][train_inner][INFO] - {"epoch": 12, "update": 11.833, "loss": "2.102", "ppl": "4.29", "wps": "363936", "ups": "3.09", "wpb": "117954", "bsz": "256", "num_updates": "609000", "lr": "0.000394949", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "198808"} +[2022-08-03 08:42:40,878][train_inner][INFO] - {"epoch": 12, "update": 11.836, "loss": "2.108", "ppl": "4.31", "wps": "364485", "ups": "3.09", "wpb": "118033", "bsz": "256", "num_updates": "609200", "lr": "0.000394747", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "198873"} +[2022-08-03 08:43:45,426][train_inner][INFO] - {"epoch": 12, "update": 11.84, "loss": "2.101", "ppl": "4.29", "wps": "365911", "ups": "3.1", "wpb": "118093", "bsz": "256", "num_updates": "609400", "lr": "0.000394545", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.3", "wall": "198938"} +[2022-08-03 08:44:50,264][train_inner][INFO] - {"epoch": 12, "update": 11.844, "loss": "2.103", "ppl": "4.3", "wps": "363701", "ups": "3.08", "wpb": "117906", "bsz": "256", "num_updates": "609600", "lr": "0.000394343", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "199003"} +[2022-08-03 08:45:55,115][train_inner][INFO] - {"epoch": 12, "update": 11.848, "loss": "2.105", "ppl": "4.3", "wps": "365082", "ups": "3.08", "wpb": "118378", "bsz": "256", "num_updates": "609800", "lr": "0.000394141", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "199067"} +[2022-08-03 08:47:01,515][train_inner][INFO] - {"epoch": 12, "update": 11.852, "loss": "2.096", "ppl": "4.28", "wps": "357762", "ups": "3.01", "wpb": "118776", "bsz": "256", "num_updates": "610000", "lr": "0.000393939", "gnorm": "0.703", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.6", "wall": "199134"} +[2022-08-03 08:48:07,265][train_inner][INFO] - {"epoch": 12, "update": 11.856, "loss": "2.108", "ppl": "4.31", "wps": "359388", "ups": "3.04", "wpb": "118147", "bsz": "256", "num_updates": "610200", "lr": "0.000393737", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "199200"} +[2022-08-03 08:49:12,371][train_inner][INFO] - {"epoch": 12, "update": 11.86, "loss": "2.098", "ppl": "4.28", "wps": "363771", "ups": "3.07", "wpb": "118416", "bsz": "256", "num_updates": "610400", "lr": "0.000393535", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "199265"} +[2022-08-03 08:50:17,041][train_inner][INFO] - {"epoch": 12, "update": 11.864, "loss": "2.104", "ppl": "4.3", "wps": "363727", "ups": "3.09", "wpb": "117611", "bsz": "256", "num_updates": "610600", "lr": "0.000393333", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "199329"} +[2022-08-03 08:51:21,836][train_inner][INFO] - {"epoch": 12, "update": 11.868, "loss": "2.102", "ppl": "4.29", "wps": "364295", "ups": "3.09", "wpb": "118020", "bsz": "256", "num_updates": "610800", "lr": "0.000393131", "gnorm": "0.705", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "199394"} +[2022-08-03 08:51:25,907][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 08:52:26,686][train_inner][INFO] - {"epoch": 12, "update": 11.871, "loss": "2.102", "ppl": "4.29", "wps": "363714", "ups": "3.08", "wpb": "117932", "bsz": "256", "num_updates": "611000", "lr": "0.000392929", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "199459"} +[2022-08-03 08:53:31,795][train_inner][INFO] - {"epoch": 12, "update": 11.875, "loss": "2.096", "ppl": "4.28", "wps": "364580", "ups": "3.07", "wpb": "118688", "bsz": "256", "num_updates": "611200", "lr": "0.000392727", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "199524"} +[2022-08-03 08:54:36,678][train_inner][INFO] - {"epoch": 12, "update": 11.879, "loss": "2.094", "ppl": "4.27", "wps": "364862", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "611400", "lr": "0.000392525", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "199589"} +[2022-08-03 08:55:41,710][train_inner][INFO] - {"epoch": 12, "update": 11.883, "loss": "2.095", "ppl": "4.27", "wps": "364430", "ups": "3.08", "wpb": "118498", "bsz": "256", "num_updates": "611600", "lr": "0.000392323", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "199654"} +[2022-08-03 08:56:46,579][train_inner][INFO] - {"epoch": 12, "update": 11.887, "loss": "2.104", "ppl": "4.3", "wps": "365727", "ups": "3.08", "wpb": "118619", "bsz": "256", "num_updates": "611800", "lr": "0.000392121", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "199719"} +[2022-08-03 08:57:51,395][train_inner][INFO] - {"epoch": 12, "update": 11.891, "loss": "2.096", "ppl": "4.27", "wps": "364644", "ups": "3.09", "wpb": "118172", "bsz": "256", "num_updates": "612000", "lr": "0.000391919", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "199784"} +[2022-08-03 08:58:07,933][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 08:58:56,041][train_inner][INFO] - {"epoch": 12, "update": 11.895, "loss": "2.096", "ppl": "4.28", "wps": "365998", "ups": "3.09", "wpb": "118300", "bsz": "256", "num_updates": "612200", "lr": "0.000391717", "gnorm": "0.704", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "199848"} +[2022-08-03 09:00:01,133][train_inner][INFO] - {"epoch": 12, "update": 11.899, "loss": "2.096", "ppl": "4.27", "wps": "365465", "ups": "3.07", "wpb": "118944", "bsz": "256", "num_updates": "612400", "lr": "0.000391515", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "199913"} +[2022-08-03 09:01:06,127][train_inner][INFO] - {"epoch": 12, "update": 11.903, "loss": "2.102", "ppl": "4.29", "wps": "364974", "ups": "3.08", "wpb": "118601", "bsz": "256", "num_updates": "612600", "lr": "0.000391313", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "199978"} +[2022-08-03 09:02:11,195][train_inner][INFO] - {"epoch": 12, "update": 11.906, "loss": "2.103", "ppl": "4.3", "wps": "362016", "ups": "3.07", "wpb": "117777", "bsz": "255.9", "num_updates": "612800", "lr": "0.000391111", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "200044"} +[2022-08-03 09:03:16,131][train_inner][INFO] - {"epoch": 12, "update": 11.91, "loss": "2.099", "ppl": "4.28", "wps": "362135", "ups": "3.08", "wpb": "117577", "bsz": "256", "num_updates": "613000", "lr": "0.000390909", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "200108"} +[2022-08-03 09:04:20,863][train_inner][INFO] - {"epoch": 12, "update": 11.914, "loss": "2.096", "ppl": "4.27", "wps": "365070", "ups": "3.09", "wpb": "118157", "bsz": "256", "num_updates": "613200", "lr": "0.000390707", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.4", "wall": "200173"} +[2022-08-03 09:05:26,099][train_inner][INFO] - {"epoch": 12, "update": 11.918, "loss": "2.098", "ppl": "4.28", "wps": "363954", "ups": "3.07", "wpb": "118713", "bsz": "256", "num_updates": "613400", "lr": "0.000390505", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "200238"} +[2022-08-03 09:06:31,812][train_inner][INFO] - {"epoch": 12, "update": 11.922, "loss": "2.095", "ppl": "4.27", "wps": "361267", "ups": "3.04", "wpb": "118698", "bsz": "256", "num_updates": "613600", "lr": "0.000390303", "gnorm": "0.707", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "200304"} +[2022-08-03 09:07:36,722][train_inner][INFO] - {"epoch": 12, "update": 11.926, "loss": "2.096", "ppl": "4.28", "wps": "365864", "ups": "3.08", "wpb": "118738", "bsz": "256", "num_updates": "613800", "lr": "0.000390101", "gnorm": "0.705", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "200369"} +[2022-08-03 09:08:41,521][train_inner][INFO] - {"epoch": 12, "update": 11.93, "loss": "2.1", "ppl": "4.29", "wps": "365018", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "614000", "lr": "0.000389899", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "200434"} +[2022-08-03 09:09:47,372][train_inner][INFO] - {"epoch": 12, "update": 11.934, "loss": "2.104", "ppl": "4.3", "wps": "357542", "ups": "3.04", "wpb": "117722", "bsz": "256", "num_updates": "614200", "lr": "0.000389697", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "28.2", "wall": "200500"} +[2022-08-03 09:10:52,382][train_inner][INFO] - {"epoch": 12, "update": 11.938, "loss": "2.097", "ppl": "4.28", "wps": "365720", "ups": "3.08", "wpb": "118876", "bsz": "256", "num_updates": "614400", "lr": "0.000389495", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "200565"} +[2022-08-03 09:11:57,234][train_inner][INFO] - {"epoch": 12, "update": 11.941, "loss": "2.094", "ppl": "4.27", "wps": "364967", "ups": "3.08", "wpb": "118342", "bsz": "256", "num_updates": "614600", "lr": "0.000389293", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "200630"} +[2022-08-03 09:13:02,155][train_inner][INFO] - {"epoch": 12, "update": 11.945, "loss": "2.096", "ppl": "4.28", "wps": "363746", "ups": "3.08", "wpb": "118072", "bsz": "256", "num_updates": "614800", "lr": "0.000389091", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "200694"} +[2022-08-03 09:14:06,851][train_inner][INFO] - {"epoch": 12, "update": 11.949, "loss": "2.099", "ppl": "4.28", "wps": "363786", "ups": "3.09", "wpb": "117677", "bsz": "256", "num_updates": "615000", "lr": "0.000388889", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "200759"} +[2022-08-03 09:15:11,583][train_inner][INFO] - {"epoch": 12, "update": 11.953, "loss": "2.097", "ppl": "4.28", "wps": "367473", "ups": "3.09", "wpb": "118934", "bsz": "256", "num_updates": "615200", "lr": "0.000388687", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "200824"} +[2022-08-03 09:16:16,753][train_inner][INFO] - {"epoch": 12, "update": 11.957, "loss": "2.094", "ppl": "4.27", "wps": "364461", "ups": "3.07", "wpb": "118759", "bsz": "256", "num_updates": "615400", "lr": "0.000388485", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "200889"} +[2022-08-03 09:17:21,378][train_inner][INFO] - {"epoch": 12, "update": 11.961, "loss": "2.1", "ppl": "4.29", "wps": "366056", "ups": "3.09", "wpb": "118279", "bsz": "256", "num_updates": "615600", "lr": "0.000388283", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "200954"} +[2022-08-03 09:18:26,057][train_inner][INFO] - {"epoch": 12, "update": 11.965, "loss": "2.097", "ppl": "4.28", "wps": "366737", "ups": "3.09", "wpb": "118599", "bsz": "256", "num_updates": "615800", "lr": "0.000388081", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "201018"} +[2022-08-03 09:19:30,636][train_inner][INFO] - {"epoch": 12, "update": 11.969, "loss": "2.096", "ppl": "4.28", "wps": "364236", "ups": "3.1", "wpb": "117608", "bsz": "256", "num_updates": "616000", "lr": "0.000387879", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "201083"} +[2022-08-03 09:20:35,732][train_inner][INFO] - {"epoch": 12, "update": 11.972, "loss": "2.09", "ppl": "4.26", "wps": "364482", "ups": "3.07", "wpb": "118630", "bsz": "256", "num_updates": "616200", "lr": "0.000387677", "gnorm": "0.705", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21", "wall": "201148"} +[2022-08-03 09:21:21,381][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 09:21:40,585][train_inner][INFO] - {"epoch": 12, "update": 11.976, "loss": "2.1", "ppl": "4.29", "wps": "367046", "ups": "3.08", "wpb": "119018", "bsz": "256", "num_updates": "616400", "lr": "0.000387475", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "201213"} +[2022-08-03 09:22:45,493][train_inner][INFO] - {"epoch": 12, "update": 11.98, "loss": "2.09", "ppl": "4.26", "wps": "364906", "ups": "3.08", "wpb": "118427", "bsz": "256", "num_updates": "616600", "lr": "0.000387273", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "201278"} +[2022-08-03 09:23:50,442][train_inner][INFO] - {"epoch": 12, "update": 11.984, "loss": "2.1", "ppl": "4.29", "wps": "363395", "ups": "3.08", "wpb": "118008", "bsz": "256", "num_updates": "616800", "lr": "0.000387071", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "201343"} +[2022-08-03 09:24:55,633][train_inner][INFO] - {"epoch": 12, "update": 11.988, "loss": "2.094", "ppl": "4.27", "wps": "364328", "ups": "3.07", "wpb": "118753", "bsz": "256", "num_updates": "617000", "lr": "0.000386869", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "201408"} +[2022-08-03 09:26:00,200][train_inner][INFO] - {"epoch": 12, "update": 11.992, "loss": "2.096", "ppl": "4.28", "wps": "365401", "ups": "3.1", "wpb": "117963", "bsz": "256", "num_updates": "617200", "lr": "0.000386667", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "201473"} +[2022-08-03 09:27:05,161][train_inner][INFO] - {"epoch": 12, "update": 11.996, "loss": "2.098", "ppl": "4.28", "wps": "363846", "ups": "3.08", "wpb": "118176", "bsz": "256", "num_updates": "617400", "lr": "0.000386465", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "201537"} +[2022-08-03 09:28:11,227][train_inner][INFO] - {"epoch": 12, "update": 12.0, "loss": "2.095", "ppl": "4.27", "wps": "356118", "ups": "3.03", "wpb": "117634", "bsz": "256", "num_updates": "617600", "lr": "0.000386263", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.2", "wall": "201604"} +[2022-08-03 09:28:13,814][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 09:28:16,407][fairseq_cli.train][INFO] - end of epoch 12 (average epoch stats below) +[2022-08-03 09:28:16,408][train][INFO] - {"epoch": 12, "train_loss": "2.109", "train_ppl": "4.32", "train_wps": "362641", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "617615", "train_lr": "0.000386247", "train_gnorm": "0.701", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16623", "train_gb_free": "21.5", "train_wall": "201609"} +[2022-08-03 09:28:16,492][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-03 09:28:16,495][fairseq.trainer][INFO] - begin training epoch 13 +[2022-08-03 09:28:16,496][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-03 09:29:30,283][train_inner][INFO] - {"epoch": 13, "update": 12.004, "loss": "2.092", "ppl": "4.26", "wps": "299694", "ups": "2.53", "wpb": "118463", "bsz": "255.4", "num_updates": "617800", "lr": "0.000386061", "gnorm": "0.707", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "201683"} +[2022-08-03 09:30:35,322][train_inner][INFO] - {"epoch": 13, "update": 12.007, "loss": "2.09", "ppl": "4.26", "wps": "365054", "ups": "3.08", "wpb": "118711", "bsz": "255.9", "num_updates": "618000", "lr": "0.000385859", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "201748"} +[2022-08-03 09:31:40,096][train_inner][INFO] - {"epoch": 13, "update": 12.011, "loss": "2.093", "ppl": "4.27", "wps": "363585", "ups": "3.09", "wpb": "117751", "bsz": "256", "num_updates": "618200", "lr": "0.000385657", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "201812"} +[2022-08-03 09:32:45,079][train_inner][INFO] - {"epoch": 13, "update": 12.015, "loss": "2.097", "ppl": "4.28", "wps": "362484", "ups": "3.08", "wpb": "117775", "bsz": "256", "num_updates": "618400", "lr": "0.000385455", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "201877"} +[2022-08-03 09:33:49,439][train_inner][INFO] - {"epoch": 13, "update": 12.019, "loss": "2.095", "ppl": "4.27", "wps": "366837", "ups": "3.11", "wpb": "118036", "bsz": "256", "num_updates": "618600", "lr": "0.000385253", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "201942"} +[2022-08-03 09:34:55,226][train_inner][INFO] - {"epoch": 13, "update": 12.023, "loss": "2.092", "ppl": "4.26", "wps": "359071", "ups": "3.04", "wpb": "118105", "bsz": "256", "num_updates": "618800", "lr": "0.000385051", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "202008"} +[2022-08-03 09:36:00,068][train_inner][INFO] - {"epoch": 13, "update": 12.027, "loss": "2.09", "ppl": "4.26", "wps": "364874", "ups": "3.08", "wpb": "118295", "bsz": "256", "num_updates": "619000", "lr": "0.000384848", "gnorm": "0.71", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "202072"} +[2022-08-03 09:37:05,182][train_inner][INFO] - {"epoch": 13, "update": 12.031, "loss": "2.093", "ppl": "4.27", "wps": "362746", "ups": "3.07", "wpb": "118098", "bsz": "256", "num_updates": "619200", "lr": "0.000384646", "gnorm": "0.71", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "202138"} +[2022-08-03 09:38:10,209][train_inner][INFO] - {"epoch": 13, "update": 12.035, "loss": "2.092", "ppl": "4.26", "wps": "363889", "ups": "3.08", "wpb": "118311", "bsz": "256", "num_updates": "619400", "lr": "0.000384444", "gnorm": "0.705", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "202203"} +[2022-08-03 09:39:14,985][train_inner][INFO] - {"epoch": 13, "update": 12.039, "loss": "2.094", "ppl": "4.27", "wps": "365142", "ups": "3.09", "wpb": "118260", "bsz": "256", "num_updates": "619600", "lr": "0.000384242", "gnorm": "0.709", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "202267"} +[2022-08-03 09:40:19,754][train_inner][INFO] - {"epoch": 13, "update": 12.042, "loss": "2.092", "ppl": "4.26", "wps": "363381", "ups": "3.09", "wpb": "117678", "bsz": "256", "num_updates": "619800", "lr": "0.00038404", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "202332"} +[2022-08-03 09:41:25,441][train_inner][INFO] - {"epoch": 13, "update": 12.046, "loss": "2.093", "ppl": "4.27", "wps": "361702", "ups": "3.04", "wpb": "118795", "bsz": "256", "num_updates": "620000", "lr": "0.000383838", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "202398"} +[2022-08-03 09:42:30,203][train_inner][INFO] - {"epoch": 13, "update": 12.05, "loss": "2.09", "ppl": "4.26", "wps": "366484", "ups": "3.09", "wpb": "118670", "bsz": "256", "num_updates": "620200", "lr": "0.000383636", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "202463"} +[2022-08-03 09:43:34,922][train_inner][INFO] - {"epoch": 13, "update": 12.054, "loss": "2.086", "ppl": "4.25", "wps": "367970", "ups": "3.09", "wpb": "119071", "bsz": "256", "num_updates": "620400", "lr": "0.000383434", "gnorm": "0.703", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "202527"} +[2022-08-03 09:44:39,560][train_inner][INFO] - {"epoch": 13, "update": 12.058, "loss": "2.093", "ppl": "4.27", "wps": "366328", "ups": "3.09", "wpb": "118392", "bsz": "256", "num_updates": "620600", "lr": "0.000383232", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "202592"} +[2022-08-03 09:45:44,612][train_inner][INFO] - {"epoch": 13, "update": 12.062, "loss": "2.095", "ppl": "4.27", "wps": "364468", "ups": "3.07", "wpb": "118545", "bsz": "256", "num_updates": "620800", "lr": "0.00038303", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "202657"} +[2022-08-03 09:46:49,524][train_inner][INFO] - {"epoch": 13, "update": 12.066, "loss": "2.094", "ppl": "4.27", "wps": "363160", "ups": "3.08", "wpb": "117865", "bsz": "256", "num_updates": "621000", "lr": "0.000382828", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "202722"} +[2022-08-03 09:47:54,584][train_inner][INFO] - {"epoch": 13, "update": 12.07, "loss": "2.082", "ppl": "4.23", "wps": "366239", "ups": "3.07", "wpb": "119136", "bsz": "256", "num_updates": "621200", "lr": "0.000382626", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "202787"} +[2022-08-03 09:48:59,608][train_inner][INFO] - {"epoch": 13, "update": 12.074, "loss": "2.088", "ppl": "4.25", "wps": "363886", "ups": "3.08", "wpb": "118304", "bsz": "256", "num_updates": "621400", "lr": "0.000382424", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "202852"} +[2022-08-03 09:50:04,386][train_inner][INFO] - {"epoch": 13, "update": 12.077, "loss": "2.09", "ppl": "4.26", "wps": "367525", "ups": "3.09", "wpb": "119037", "bsz": "256", "num_updates": "621600", "lr": "0.000382222", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "202917"} +[2022-08-03 09:51:09,432][train_inner][INFO] - {"epoch": 13, "update": 12.081, "loss": "2.088", "ppl": "4.25", "wps": "364056", "ups": "3.07", "wpb": "118399", "bsz": "256", "num_updates": "621800", "lr": "0.00038202", "gnorm": "0.707", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.9", "wall": "202982"} +[2022-08-03 09:51:29,902][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 09:52:14,859][train_inner][INFO] - {"epoch": 13, "update": 12.085, "loss": "2.093", "ppl": "4.27", "wps": "362982", "ups": "3.06", "wpb": "118743", "bsz": "256", "num_updates": "622000", "lr": "0.000381818", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "203047"} +[2022-08-03 09:53:19,592][train_inner][INFO] - {"epoch": 13, "update": 12.089, "loss": "2.093", "ppl": "4.27", "wps": "364441", "ups": "3.09", "wpb": "117955", "bsz": "256", "num_updates": "622200", "lr": "0.000381616", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "203112"} +[2022-08-03 09:54:24,295][train_inner][INFO] - {"epoch": 13, "update": 12.093, "loss": "2.096", "ppl": "4.27", "wps": "365918", "ups": "3.09", "wpb": "118378", "bsz": "256", "num_updates": "622400", "lr": "0.000381414", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "203177"} +[2022-08-03 09:55:29,256][train_inner][INFO] - {"epoch": 13, "update": 12.097, "loss": "2.09", "ppl": "4.26", "wps": "364998", "ups": "3.08", "wpb": "118551", "bsz": "256", "num_updates": "622600", "lr": "0.000381212", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "203242"} +[2022-08-03 09:56:34,266][train_inner][INFO] - {"epoch": 13, "update": 12.101, "loss": "2.09", "ppl": "4.26", "wps": "363231", "ups": "3.08", "wpb": "118067", "bsz": "256", "num_updates": "622800", "lr": "0.00038101", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "203307"} +[2022-08-03 09:57:39,190][train_inner][INFO] - {"epoch": 13, "update": 12.105, "loss": "2.097", "ppl": "4.28", "wps": "365578", "ups": "3.08", "wpb": "118672", "bsz": "256", "num_updates": "623000", "lr": "0.000380808", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "203372"} +[2022-08-03 09:58:44,437][train_inner][INFO] - {"epoch": 13, "update": 12.108, "loss": "2.098", "ppl": "4.28", "wps": "361713", "ups": "3.07", "wpb": "118002", "bsz": "256", "num_updates": "623200", "lr": "0.000380606", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "203437"} +[2022-08-03 09:59:48,835][train_inner][INFO] - {"epoch": 13, "update": 12.112, "loss": "2.093", "ppl": "4.27", "wps": "364369", "ups": "3.11", "wpb": "117322", "bsz": "256", "num_updates": "623400", "lr": "0.000380404", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "203501"} +[2022-08-03 10:00:53,638][train_inner][INFO] - {"epoch": 13, "update": 12.116, "loss": "2.093", "ppl": "4.27", "wps": "363404", "ups": "3.09", "wpb": "117746", "bsz": "256", "num_updates": "623600", "lr": "0.000380202", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "203566"} +[2022-08-03 10:01:58,440][train_inner][INFO] - {"epoch": 13, "update": 12.12, "loss": "2.088", "ppl": "4.25", "wps": "366559", "ups": "3.09", "wpb": "118767", "bsz": "256", "num_updates": "623800", "lr": "0.00038", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "203631"} +[2022-08-03 10:03:03,608][train_inner][INFO] - {"epoch": 13, "update": 12.124, "loss": "2.088", "ppl": "4.25", "wps": "365206", "ups": "3.07", "wpb": "118998", "bsz": "256", "num_updates": "624000", "lr": "0.000379798", "gnorm": "0.708", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.2", "wall": "203696"} +[2022-08-03 10:03:48,389][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 10:04:08,906][train_inner][INFO] - {"epoch": 13, "update": 12.128, "loss": "2.086", "ppl": "4.25", "wps": "363279", "ups": "3.06", "wpb": "118604", "bsz": "256", "num_updates": "624200", "lr": "0.000379596", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "203761"} +[2022-08-03 10:05:13,499][train_inner][INFO] - {"epoch": 13, "update": 12.132, "loss": "2.092", "ppl": "4.26", "wps": "365163", "ups": "3.1", "wpb": "117935", "bsz": "256", "num_updates": "624400", "lr": "0.000379394", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "203826"} +[2022-08-03 10:06:18,624][train_inner][INFO] - {"epoch": 13, "update": 12.136, "loss": "2.095", "ppl": "4.27", "wps": "362003", "ups": "3.07", "wpb": "117875", "bsz": "256", "num_updates": "624600", "lr": "0.000379192", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "203891"} +[2022-08-03 10:06:48,476][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 10:07:23,448][train_inner][INFO] - {"epoch": 13, "update": 12.14, "loss": "2.096", "ppl": "4.28", "wps": "363184", "ups": "3.09", "wpb": "117714", "bsz": "256", "num_updates": "624800", "lr": "0.00037899", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "203956"} +[2022-08-03 10:08:28,348][train_inner][INFO] - {"epoch": 13, "update": 12.143, "loss": "2.095", "ppl": "4.27", "wps": "363746", "ups": "3.08", "wpb": "118035", "bsz": "256", "num_updates": "625000", "lr": "0.000378788", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "204021"} +[2022-08-03 10:09:33,573][train_inner][INFO] - {"epoch": 13, "update": 12.147, "loss": "2.088", "ppl": "4.25", "wps": "363962", "ups": "3.07", "wpb": "118694", "bsz": "256", "num_updates": "625200", "lr": "0.000378586", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "204086"} +[2022-08-03 10:10:38,418][train_inner][INFO] - {"epoch": 13, "update": 12.151, "loss": "2.089", "ppl": "4.25", "wps": "366469", "ups": "3.08", "wpb": "118817", "bsz": "256", "num_updates": "625400", "lr": "0.000378384", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "204151"} +[2022-08-03 10:11:43,407][train_inner][INFO] - {"epoch": 13, "update": 12.155, "loss": "2.088", "ppl": "4.25", "wps": "364801", "ups": "3.08", "wpb": "118538", "bsz": "256", "num_updates": "625600", "lr": "0.000378182", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "204216"} +[2022-08-03 10:12:48,240][train_inner][INFO] - {"epoch": 13, "update": 12.159, "loss": "2.092", "ppl": "4.26", "wps": "365574", "ups": "3.08", "wpb": "118505", "bsz": "256", "num_updates": "625800", "lr": "0.00037798", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "204281"} +[2022-08-03 10:13:53,092][train_inner][INFO] - {"epoch": 13, "update": 12.163, "loss": "2.088", "ppl": "4.25", "wps": "365273", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "626000", "lr": "0.000377778", "gnorm": "0.71", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "204345"} +[2022-08-03 10:14:58,002][train_inner][INFO] - {"epoch": 13, "update": 12.167, "loss": "2.088", "ppl": "4.25", "wps": "364504", "ups": "3.08", "wpb": "118298", "bsz": "256", "num_updates": "626200", "lr": "0.000377576", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "204410"} +[2022-08-03 10:16:02,334][train_inner][INFO] - {"epoch": 13, "update": 12.171, "loss": "2.096", "ppl": "4.27", "wps": "365665", "ups": "3.11", "wpb": "117617", "bsz": "256", "num_updates": "626400", "lr": "0.000377374", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "204475"} +[2022-08-03 10:17:07,187][train_inner][INFO] - {"epoch": 13, "update": 12.175, "loss": "2.092", "ppl": "4.26", "wps": "363541", "ups": "3.08", "wpb": "117883", "bsz": "256", "num_updates": "626600", "lr": "0.000377172", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "204540"} +[2022-08-03 10:18:12,273][train_inner][INFO] - {"epoch": 13, "update": 12.178, "loss": "2.094", "ppl": "4.27", "wps": "362559", "ups": "3.07", "wpb": "117985", "bsz": "256", "num_updates": "626800", "lr": "0.00037697", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "204605"} +[2022-08-03 10:19:17,538][train_inner][INFO] - {"epoch": 13, "update": 12.182, "loss": "2.086", "ppl": "4.25", "wps": "362576", "ups": "3.06", "wpb": "118316", "bsz": "256", "num_updates": "627000", "lr": "0.000376768", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "204670"} +[2022-08-03 10:20:22,651][train_inner][INFO] - {"epoch": 13, "update": 12.186, "loss": "2.086", "ppl": "4.25", "wps": "363057", "ups": "3.07", "wpb": "118196", "bsz": "256", "num_updates": "627200", "lr": "0.000376566", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "204735"} +[2022-08-03 10:21:27,529][train_inner][INFO] - {"epoch": 13, "update": 12.19, "loss": "2.086", "ppl": "4.25", "wps": "364355", "ups": "3.08", "wpb": "118191", "bsz": "256", "num_updates": "627400", "lr": "0.000376364", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "204800"} +[2022-08-03 10:22:31,965][train_inner][INFO] - {"epoch": 13, "update": 12.194, "loss": "2.09", "ppl": "4.26", "wps": "364896", "ups": "3.1", "wpb": "117561", "bsz": "256", "num_updates": "627600", "lr": "0.000376162", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "204864"} +[2022-08-03 10:23:36,884][train_inner][INFO] - {"epoch": 13, "update": 12.198, "loss": "2.091", "ppl": "4.26", "wps": "365437", "ups": "3.08", "wpb": "118617", "bsz": "256", "num_updates": "627800", "lr": "0.00037596", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "204929"} +[2022-08-03 10:24:41,885][train_inner][INFO] - {"epoch": 13, "update": 12.202, "loss": "2.089", "ppl": "4.26", "wps": "362104", "ups": "3.08", "wpb": "117683", "bsz": "256", "num_updates": "628000", "lr": "0.000375758", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "204994"} +[2022-08-03 10:25:06,187][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 10:25:47,126][train_inner][INFO] - {"epoch": 13, "update": 12.206, "loss": "2.09", "ppl": "4.26", "wps": "363467", "ups": "3.07", "wpb": "118563", "bsz": "256", "num_updates": "628200", "lr": "0.000375556", "gnorm": "0.729", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "205059"} +[2022-08-03 10:26:52,176][train_inner][INFO] - {"epoch": 13, "update": 12.21, "loss": "2.091", "ppl": "4.26", "wps": "362082", "ups": "3.07", "wpb": "117766", "bsz": "256", "num_updates": "628400", "lr": "0.000375354", "gnorm": "0.715", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "205124"} +[2022-08-03 10:27:57,141][train_inner][INFO] - {"epoch": 13, "update": 12.213, "loss": "2.086", "ppl": "4.25", "wps": "364525", "ups": "3.08", "wpb": "118404", "bsz": "256", "num_updates": "628600", "lr": "0.000375152", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.3", "wall": "205189"} +[2022-08-03 10:29:01,846][train_inner][INFO] - {"epoch": 13, "update": 12.217, "loss": "2.083", "ppl": "4.24", "wps": "366521", "ups": "3.09", "wpb": "118577", "bsz": "256", "num_updates": "628800", "lr": "0.000374949", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "205254"} +[2022-08-03 10:30:06,752][train_inner][INFO] - {"epoch": 13, "update": 12.221, "loss": "2.09", "ppl": "4.26", "wps": "364280", "ups": "3.08", "wpb": "118218", "bsz": "256", "num_updates": "629000", "lr": "0.000374747", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "205319"} +[2022-08-03 10:31:11,605][train_inner][INFO] - {"epoch": 13, "update": 12.225, "loss": "2.092", "ppl": "4.26", "wps": "364404", "ups": "3.08", "wpb": "118162", "bsz": "256", "num_updates": "629200", "lr": "0.000374545", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "205384"} +[2022-08-03 10:32:16,158][train_inner][INFO] - {"epoch": 13, "update": 12.229, "loss": "2.085", "ppl": "4.24", "wps": "366174", "ups": "3.1", "wpb": "118186", "bsz": "256", "num_updates": "629400", "lr": "0.000374343", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "205448"} +[2022-08-03 10:33:20,826][train_inner][INFO] - {"epoch": 13, "update": 12.233, "loss": "2.089", "ppl": "4.25", "wps": "365661", "ups": "3.09", "wpb": "118232", "bsz": "256", "num_updates": "629600", "lr": "0.000374141", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "205513"} +[2022-08-03 10:34:25,295][train_inner][INFO] - {"epoch": 13, "update": 12.237, "loss": "2.089", "ppl": "4.25", "wps": "365332", "ups": "3.1", "wpb": "117762", "bsz": "256", "num_updates": "629800", "lr": "0.000373939", "gnorm": "0.715", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "205578"} +[2022-08-03 10:35:30,487][train_inner][INFO] - {"epoch": 13, "update": 12.241, "loss": "2.088", "ppl": "4.25", "wps": "362544", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "630000", "lr": "0.000373737", "gnorm": "0.714", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.1", "wall": "205643"} +[2022-08-03 10:36:35,327][train_inner][INFO] - {"epoch": 13, "update": 12.245, "loss": "2.09", "ppl": "4.26", "wps": "364775", "ups": "3.08", "wpb": "118259", "bsz": "256", "num_updates": "630200", "lr": "0.000373535", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "205708"} +[2022-08-03 10:37:39,931][train_inner][INFO] - {"epoch": 13, "update": 12.248, "loss": "2.086", "ppl": "4.25", "wps": "367468", "ups": "3.1", "wpb": "118697", "bsz": "256", "num_updates": "630400", "lr": "0.000373333", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "205772"} +[2022-08-03 10:38:45,730][train_inner][INFO] - {"epoch": 13, "update": 12.252, "loss": "2.086", "ppl": "4.25", "wps": "360409", "ups": "3.04", "wpb": "118571", "bsz": "256", "num_updates": "630600", "lr": "0.000373131", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "205838"} +[2022-08-03 10:39:50,720][train_inner][INFO] - {"epoch": 13, "update": 12.256, "loss": "2.085", "ppl": "4.24", "wps": "365050", "ups": "3.08", "wpb": "118621", "bsz": "256", "num_updates": "630800", "lr": "0.000372929", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "205903"} +[2022-08-03 10:40:55,624][train_inner][INFO] - {"epoch": 13, "update": 12.26, "loss": "2.088", "ppl": "4.25", "wps": "364948", "ups": "3.08", "wpb": "118432", "bsz": "256", "num_updates": "631000", "lr": "0.000372727", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "205968"} +[2022-08-03 10:42:00,425][train_inner][INFO] - {"epoch": 13, "update": 12.264, "loss": "2.09", "ppl": "4.26", "wps": "364630", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "631200", "lr": "0.000372525", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "206033"} +[2022-08-03 10:43:05,263][train_inner][INFO] - {"epoch": 13, "update": 12.268, "loss": "2.082", "ppl": "4.23", "wps": "366024", "ups": "3.08", "wpb": "118659", "bsz": "256", "num_updates": "631400", "lr": "0.000372323", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "206098"} +[2022-08-03 10:44:11,186][train_inner][INFO] - {"epoch": 13, "update": 12.272, "loss": "2.094", "ppl": "4.27", "wps": "357360", "ups": "3.03", "wpb": "117790", "bsz": "256", "num_updates": "631600", "lr": "0.000372121", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "24.6", "wall": "206164"} +[2022-08-03 10:45:15,683][train_inner][INFO] - {"epoch": 13, "update": 12.276, "loss": "2.087", "ppl": "4.25", "wps": "367680", "ups": "3.1", "wpb": "118569", "bsz": "256", "num_updates": "631800", "lr": "0.000371919", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "206228"} +[2022-08-03 10:46:20,600][train_inner][INFO] - {"epoch": 13, "update": 12.279, "loss": "2.085", "ppl": "4.24", "wps": "364806", "ups": "3.08", "wpb": "118409", "bsz": "256", "num_updates": "632000", "lr": "0.000371717", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "206293"} +[2022-08-03 10:47:25,339][train_inner][INFO] - {"epoch": 13, "update": 12.283, "loss": "2.091", "ppl": "4.26", "wps": "366259", "ups": "3.09", "wpb": "118554", "bsz": "256", "num_updates": "632200", "lr": "0.000371515", "gnorm": "0.715", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "206358"} +[2022-08-03 10:48:30,206][train_inner][INFO] - {"epoch": 13, "update": 12.287, "loss": "2.089", "ppl": "4.26", "wps": "364424", "ups": "3.08", "wpb": "118192", "bsz": "256", "num_updates": "632400", "lr": "0.000371313", "gnorm": "0.713", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.3", "wall": "206423"} +[2022-08-03 10:48:50,066][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 10:49:35,496][train_inner][INFO] - {"epoch": 13, "update": 12.291, "loss": "2.09", "ppl": "4.26", "wps": "362543", "ups": "3.06", "wpb": "118350", "bsz": "256", "num_updates": "632600", "lr": "0.000371111", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "206488"} +[2022-08-03 10:50:40,069][train_inner][INFO] - {"epoch": 13, "update": 12.295, "loss": "2.094", "ppl": "4.27", "wps": "365000", "ups": "3.1", "wpb": "117845", "bsz": "256", "num_updates": "632800", "lr": "0.000370909", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "206552"} +[2022-08-03 10:51:44,983][train_inner][INFO] - {"epoch": 13, "update": 12.299, "loss": "2.084", "ppl": "4.24", "wps": "366424", "ups": "3.08", "wpb": "118929", "bsz": "256", "num_updates": "633000", "lr": "0.000370707", "gnorm": "0.716", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "206617"} +[2022-08-03 10:52:49,868][train_inner][INFO] - {"epoch": 13, "update": 12.303, "loss": "2.088", "ppl": "4.25", "wps": "364874", "ups": "3.08", "wpb": "118372", "bsz": "256", "num_updates": "633200", "lr": "0.000370505", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "206682"} +[2022-08-03 10:53:54,774][train_inner][INFO] - {"epoch": 13, "update": 12.307, "loss": "2.087", "ppl": "4.25", "wps": "366270", "ups": "3.08", "wpb": "118864", "bsz": "256", "num_updates": "633400", "lr": "0.000370303", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.5", "wall": "206747"} +[2022-08-03 10:54:59,625][train_inner][INFO] - {"epoch": 13, "update": 12.311, "loss": "2.083", "ppl": "4.24", "wps": "364209", "ups": "3.08", "wpb": "118095", "bsz": "256", "num_updates": "633600", "lr": "0.000370101", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "206812"} +[2022-08-03 10:55:16,125][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 10:56:04,823][train_inner][INFO] - {"epoch": 13, "update": 12.314, "loss": "2.092", "ppl": "4.26", "wps": "362096", "ups": "3.07", "wpb": "118038", "bsz": "256", "num_updates": "633800", "lr": "0.000369899", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "206877"} +[2022-08-03 10:57:09,768][train_inner][INFO] - {"epoch": 13, "update": 12.318, "loss": "2.083", "ppl": "4.24", "wps": "364225", "ups": "3.08", "wpb": "118272", "bsz": "256", "num_updates": "634000", "lr": "0.000369697", "gnorm": "0.715", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "206942"} +[2022-08-03 10:58:14,557][train_inner][INFO] - {"epoch": 13, "update": 12.322, "loss": "2.085", "ppl": "4.24", "wps": "364303", "ups": "3.09", "wpb": "118012", "bsz": "256", "num_updates": "634200", "lr": "0.000369495", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "207007"} +[2022-08-03 10:59:19,938][train_inner][INFO] - {"epoch": 13, "update": 12.326, "loss": "2.086", "ppl": "4.25", "wps": "362323", "ups": "3.06", "wpb": "118444", "bsz": "256", "num_updates": "634400", "lr": "0.000369293", "gnorm": "0.714", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "207072"} +[2022-08-03 11:00:24,498][train_inner][INFO] - {"epoch": 13, "update": 12.33, "loss": "2.088", "ppl": "4.25", "wps": "364955", "ups": "3.1", "wpb": "117805", "bsz": "256", "num_updates": "634600", "lr": "0.000369091", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "207137"} +[2022-08-03 11:01:29,284][train_inner][INFO] - {"epoch": 13, "update": 12.334, "loss": "2.088", "ppl": "4.25", "wps": "364996", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "634800", "lr": "0.000368889", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "207202"} +[2022-08-03 11:02:24,784][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 11:02:30,546][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-03 11:02:34,715][train_inner][INFO] - {"epoch": 13, "update": 12.338, "loss": "2.083", "ppl": "4.24", "wps": "361886", "ups": "3.06", "wpb": "118390", "bsz": "256", "num_updates": "635000", "lr": "0.000368687", "gnorm": "0.722", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.4", "wall": "207267"} +[2022-08-03 11:03:39,827][train_inner][INFO] - {"epoch": 13, "update": 12.342, "loss": "2.092", "ppl": "4.26", "wps": "363276", "ups": "3.07", "wpb": "118267", "bsz": "256", "num_updates": "635200", "lr": "0.000368485", "gnorm": "0.716", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "207332"} +[2022-08-03 11:04:44,811][train_inner][INFO] - {"epoch": 13, "update": 12.346, "loss": "2.078", "ppl": "4.22", "wps": "365300", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "635400", "lr": "0.000368283", "gnorm": "0.715", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "207397"} +[2022-08-03 11:05:50,070][train_inner][INFO] - {"epoch": 13, "update": 12.349, "loss": "2.083", "ppl": "4.24", "wps": "363660", "ups": "3.06", "wpb": "118660", "bsz": "256", "num_updates": "635600", "lr": "0.000368081", "gnorm": "0.712", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "207462"} +[2022-08-03 11:06:55,079][train_inner][INFO] - {"epoch": 13, "update": 12.353, "loss": "2.088", "ppl": "4.25", "wps": "364926", "ups": "3.08", "wpb": "118615", "bsz": "256", "num_updates": "635800", "lr": "0.000367879", "gnorm": "0.714", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "207527"} +[2022-08-03 11:07:59,747][train_inner][INFO] - {"epoch": 13, "update": 12.357, "loss": "2.088", "ppl": "4.25", "wps": "363864", "ups": "3.09", "wpb": "117651", "bsz": "256", "num_updates": "636000", "lr": "0.000367677", "gnorm": "0.717", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.5", "wall": "207592"} +[2022-08-03 11:09:04,364][train_inner][INFO] - {"epoch": 13, "update": 12.361, "loss": "2.085", "ppl": "4.24", "wps": "366241", "ups": "3.1", "wpb": "118324", "bsz": "256", "num_updates": "636200", "lr": "0.000367475", "gnorm": "0.715", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.9", "wall": "207657"} +[2022-08-03 11:10:09,080][train_inner][INFO] - {"epoch": 13, "update": 12.365, "loss": "2.078", "ppl": "4.22", "wps": "366153", "ups": "3.09", "wpb": "118478", "bsz": "256", "num_updates": "636400", "lr": "0.000367273", "gnorm": "0.713", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "207721"} +[2022-08-03 11:11:13,938][train_inner][INFO] - {"epoch": 13, "update": 12.369, "loss": "2.084", "ppl": "4.24", "wps": "364709", "ups": "3.08", "wpb": "118270", "bsz": "256", "num_updates": "636600", "lr": "0.000367071", "gnorm": "0.716", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.6", "wall": "207786"} +[2022-08-03 11:12:18,651][train_inner][INFO] - {"epoch": 13, "update": 12.373, "loss": "2.085", "ppl": "4.24", "wps": "366229", "ups": "3.09", "wpb": "118498", "bsz": "256", "num_updates": "636800", "lr": "0.000366869", "gnorm": "0.712", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "207851"} +[2022-08-03 11:13:23,496][train_inner][INFO] - {"epoch": 13, "update": 12.377, "loss": "2.084", "ppl": "4.24", "wps": "362947", "ups": "3.08", "wpb": "117674", "bsz": "256", "num_updates": "637000", "lr": "0.000366667", "gnorm": "0.718", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "26.2", "wall": "207916"} +[2022-08-03 11:14:28,261][train_inner][INFO] - {"epoch": 13, "update": 12.381, "loss": "2.087", "ppl": "4.25", "wps": "364311", "ups": "3.09", "wpb": "117971", "bsz": "256", "num_updates": "637200", "lr": "0.000366465", "gnorm": "0.718", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.7", "wall": "207981"} +[2022-08-03 11:15:32,901][train_inner][INFO] - {"epoch": 13, "update": 12.384, "loss": "2.08", "ppl": "4.23", "wps": "365980", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "637400", "lr": "0.000366263", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "208045"} +[2022-08-03 11:16:37,878][train_inner][INFO] - {"epoch": 13, "update": 12.388, "loss": "2.085", "ppl": "4.24", "wps": "363932", "ups": "3.08", "wpb": "118235", "bsz": "256", "num_updates": "637600", "lr": "0.000366061", "gnorm": "0.715", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "208110"} +[2022-08-03 11:17:42,743][train_inner][INFO] - {"epoch": 13, "update": 12.392, "loss": "2.079", "ppl": "4.22", "wps": "365150", "ups": "3.08", "wpb": "118426", "bsz": "256", "num_updates": "637800", "lr": "0.000365859", "gnorm": "0.712", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.9", "wall": "208175"} +[2022-08-03 11:18:47,878][train_inner][INFO] - {"epoch": 13, "update": 12.396, "loss": "2.076", "ppl": "4.22", "wps": "365320", "ups": "3.07", "wpb": "118974", "bsz": "256", "num_updates": "638000", "lr": "0.000365657", "gnorm": "0.712", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "208240"} +[2022-08-03 11:19:52,831][train_inner][INFO] - {"epoch": 13, "update": 12.4, "loss": "2.083", "ppl": "4.24", "wps": "366061", "ups": "3.08", "wpb": "118881", "bsz": "256", "num_updates": "638200", "lr": "0.000365455", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.4", "wall": "208305"} +[2022-08-03 11:20:58,635][train_inner][INFO] - {"epoch": 13, "update": 12.404, "loss": "2.088", "ppl": "4.25", "wps": "358710", "ups": "3.04", "wpb": "118021", "bsz": "256", "num_updates": "638400", "lr": "0.000365253", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.3", "wall": "208371"} +[2022-08-03 11:22:03,587][train_inner][INFO] - {"epoch": 13, "update": 12.408, "loss": "2.085", "ppl": "4.24", "wps": "365259", "ups": "3.08", "wpb": "118620", "bsz": "256", "num_updates": "638600", "lr": "0.000365051", "gnorm": "0.717", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "208436"} +[2022-08-03 11:23:08,536][train_inner][INFO] - {"epoch": 13, "update": 12.412, "loss": "2.084", "ppl": "4.24", "wps": "364149", "ups": "3.08", "wpb": "118253", "bsz": "256", "num_updates": "638800", "lr": "0.000364848", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "208501"} +[2022-08-03 11:24:13,333][train_inner][INFO] - {"epoch": 13, "update": 12.415, "loss": "2.083", "ppl": "4.24", "wps": "364303", "ups": "3.09", "wpb": "118028", "bsz": "256", "num_updates": "639000", "lr": "0.000364646", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "208566"} +[2022-08-03 11:25:18,533][train_inner][INFO] - {"epoch": 13, "update": 12.419, "loss": "2.081", "ppl": "4.23", "wps": "364823", "ups": "3.07", "wpb": "118931", "bsz": "256", "num_updates": "639200", "lr": "0.000364444", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "208631"} +[2022-08-03 11:26:23,214][train_inner][INFO] - {"epoch": 13, "update": 12.423, "loss": "2.084", "ppl": "4.24", "wps": "364291", "ups": "3.09", "wpb": "117811", "bsz": "256", "num_updates": "639400", "lr": "0.000364242", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "208696"} +[2022-08-03 11:26:39,799][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 11:27:28,411][train_inner][INFO] - {"epoch": 13, "update": 12.427, "loss": "2.085", "ppl": "4.24", "wps": "364450", "ups": "3.07", "wpb": "118804", "bsz": "256", "num_updates": "639600", "lr": "0.00036404", "gnorm": "0.718", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "208761"} +[2022-08-03 11:28:32,970][train_inner][INFO] - {"epoch": 13, "update": 12.431, "loss": "2.082", "ppl": "4.24", "wps": "365724", "ups": "3.1", "wpb": "118054", "bsz": "256", "num_updates": "639800", "lr": "0.000363838", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "208825"} +[2022-08-03 11:29:37,635][train_inner][INFO] - {"epoch": 13, "update": 12.435, "loss": "2.084", "ppl": "4.24", "wps": "366958", "ups": "3.09", "wpb": "118643", "bsz": "256", "num_updates": "640000", "lr": "0.000363636", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.2", "wall": "208890"} +[2022-08-03 11:30:42,678][train_inner][INFO] - {"epoch": 13, "update": 12.439, "loss": "2.082", "ppl": "4.23", "wps": "361278", "ups": "3.07", "wpb": "117491", "bsz": "256", "num_updates": "640200", "lr": "0.000363434", "gnorm": "0.727", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "208955"} +[2022-08-03 11:31:47,360][train_inner][INFO] - {"epoch": 13, "update": 12.443, "loss": "2.087", "ppl": "4.25", "wps": "365097", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "640400", "lr": "0.000363232", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26.6", "wall": "209020"} +[2022-08-03 11:32:52,286][train_inner][INFO] - {"epoch": 13, "update": 12.447, "loss": "2.088", "ppl": "4.25", "wps": "362506", "ups": "3.08", "wpb": "117679", "bsz": "256", "num_updates": "640600", "lr": "0.00036303", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "209085"} +[2022-08-03 11:33:58,196][train_inner][INFO] - {"epoch": 13, "update": 12.45, "loss": "2.084", "ppl": "4.24", "wps": "359139", "ups": "3.03", "wpb": "118352", "bsz": "256", "num_updates": "640800", "lr": "0.000362828", "gnorm": "0.717", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "20.2", "wall": "209151"} +[2022-08-03 11:35:03,005][train_inner][INFO] - {"epoch": 13, "update": 12.454, "loss": "2.079", "ppl": "4.22", "wps": "366461", "ups": "3.09", "wpb": "118749", "bsz": "256", "num_updates": "641000", "lr": "0.000362626", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "209215"} +[2022-08-03 11:36:08,005][train_inner][INFO] - {"epoch": 13, "update": 12.458, "loss": "2.088", "ppl": "4.25", "wps": "363188", "ups": "3.08", "wpb": "118035", "bsz": "256", "num_updates": "641200", "lr": "0.000362424", "gnorm": "0.719", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "209280"} +[2022-08-03 11:37:14,251][train_inner][INFO] - {"epoch": 13, "update": 12.462, "loss": "2.083", "ppl": "4.24", "wps": "356935", "ups": "3.02", "wpb": "118225", "bsz": "256", "num_updates": "641400", "lr": "0.000362222", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "20.1", "wall": "209347"} +[2022-08-03 11:38:19,130][train_inner][INFO] - {"epoch": 13, "update": 12.466, "loss": "2.084", "ppl": "4.24", "wps": "363794", "ups": "3.08", "wpb": "118012", "bsz": "256", "num_updates": "641600", "lr": "0.00036202", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "209411"} +[2022-08-03 11:39:23,739][train_inner][INFO] - {"epoch": 13, "update": 12.47, "loss": "2.083", "ppl": "4.24", "wps": "364909", "ups": "3.1", "wpb": "117879", "bsz": "256", "num_updates": "641800", "lr": "0.000361818", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "209476"} +[2022-08-03 11:40:28,788][train_inner][INFO] - {"epoch": 13, "update": 12.474, "loss": "2.086", "ppl": "4.25", "wps": "362638", "ups": "3.07", "wpb": "117945", "bsz": "256", "num_updates": "642000", "lr": "0.000361616", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "209541"} +[2022-08-03 11:41:33,823][train_inner][INFO] - {"epoch": 13, "update": 12.478, "loss": "2.078", "ppl": "4.22", "wps": "362921", "ups": "3.08", "wpb": "118010", "bsz": "256", "num_updates": "642200", "lr": "0.000361414", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "209606"} +[2022-08-03 11:42:38,891][train_inner][INFO] - {"epoch": 13, "update": 12.482, "loss": "2.08", "ppl": "4.23", "wps": "362871", "ups": "3.07", "wpb": "118054", "bsz": "256", "num_updates": "642400", "lr": "0.000361212", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "209671"} +[2022-08-03 11:43:43,518][train_inner][INFO] - {"epoch": 13, "update": 12.485, "loss": "2.079", "ppl": "4.23", "wps": "366183", "ups": "3.09", "wpb": "118316", "bsz": "256", "num_updates": "642600", "lr": "0.00036101", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "209736"} +[2022-08-03 11:44:48,427][train_inner][INFO] - {"epoch": 13, "update": 12.489, "loss": "2.077", "ppl": "4.22", "wps": "365779", "ups": "3.08", "wpb": "118706", "bsz": "256", "num_updates": "642800", "lr": "0.000360808", "gnorm": "0.716", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "209801"} +[2022-08-03 11:45:53,440][train_inner][INFO] - {"epoch": 13, "update": 12.493, "loss": "2.077", "ppl": "4.22", "wps": "363416", "ups": "3.08", "wpb": "118132", "bsz": "256", "num_updates": "643000", "lr": "0.000360606", "gnorm": "0.717", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "209866"} +[2022-08-03 11:46:58,341][train_inner][INFO] - {"epoch": 13, "update": 12.497, "loss": "2.08", "ppl": "4.23", "wps": "366599", "ups": "3.08", "wpb": "118962", "bsz": "256", "num_updates": "643200", "lr": "0.000360404", "gnorm": "0.716", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "209931"} +[2022-08-03 11:48:03,290][train_inner][INFO] - {"epoch": 13, "update": 12.501, "loss": "2.083", "ppl": "4.24", "wps": "363586", "ups": "3.08", "wpb": "118070", "bsz": "256", "num_updates": "643400", "lr": "0.000360202", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "209996"} +[2022-08-03 11:49:08,264][train_inner][INFO] - {"epoch": 13, "update": 12.505, "loss": "2.08", "ppl": "4.23", "wps": "363992", "ups": "3.08", "wpb": "118249", "bsz": "256", "num_updates": "643600", "lr": "0.00036", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "210061"} +[2022-08-03 11:50:13,197][train_inner][INFO] - {"epoch": 13, "update": 12.509, "loss": "2.077", "ppl": "4.22", "wps": "363669", "ups": "3.08", "wpb": "118068", "bsz": "256", "num_updates": "643800", "lr": "0.000359798", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "210126"} +[2022-08-03 11:51:18,432][train_inner][INFO] - {"epoch": 13, "update": 12.513, "loss": "2.078", "ppl": "4.22", "wps": "363826", "ups": "3.07", "wpb": "118669", "bsz": "256", "num_updates": "644000", "lr": "0.000359596", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "210191"} +[2022-08-03 11:52:22,967][train_inner][INFO] - {"epoch": 13, "update": 12.517, "loss": "2.082", "ppl": "4.23", "wps": "368378", "ups": "3.1", "wpb": "118865", "bsz": "256", "num_updates": "644200", "lr": "0.000359394", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "210255"} +[2022-08-03 11:53:27,960][train_inner][INFO] - {"epoch": 13, "update": 12.52, "loss": "2.083", "ppl": "4.24", "wps": "364246", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "644400", "lr": "0.000359192", "gnorm": "0.716", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "210320"} +[2022-08-03 11:54:32,881][train_inner][INFO] - {"epoch": 13, "update": 12.524, "loss": "2.081", "ppl": "4.23", "wps": "363846", "ups": "3.08", "wpb": "118105", "bsz": "256", "num_updates": "644600", "lr": "0.00035899", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "210385"} +[2022-08-03 11:55:37,565][train_inner][INFO] - {"epoch": 13, "update": 12.528, "loss": "2.079", "ppl": "4.22", "wps": "365990", "ups": "3.09", "wpb": "118367", "bsz": "256", "num_updates": "644800", "lr": "0.000358788", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "210450"} +[2022-08-03 11:56:43,368][train_inner][INFO] - {"epoch": 13, "update": 12.532, "loss": "2.08", "ppl": "4.23", "wps": "359234", "ups": "3.04", "wpb": "118191", "bsz": "256", "num_updates": "645000", "lr": "0.000358586", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "210516"} +[2022-08-03 11:57:48,157][train_inner][INFO] - {"epoch": 13, "update": 12.536, "loss": "2.08", "ppl": "4.23", "wps": "364923", "ups": "3.09", "wpb": "118213", "bsz": "256", "num_updates": "645200", "lr": "0.000358384", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "210580"} +[2022-08-03 11:58:53,298][train_inner][INFO] - {"epoch": 13, "update": 12.54, "loss": "2.078", "ppl": "4.22", "wps": "362277", "ups": "3.07", "wpb": "117995", "bsz": "256", "num_updates": "645400", "lr": "0.000358182", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "210646"} +[2022-08-03 11:59:57,927][train_inner][INFO] - {"epoch": 13, "update": 12.544, "loss": "2.088", "ppl": "4.25", "wps": "364425", "ups": "3.09", "wpb": "117759", "bsz": "256", "num_updates": "645600", "lr": "0.00035798", "gnorm": "0.723", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "210710"} +[2022-08-03 12:01:02,730][train_inner][INFO] - {"epoch": 13, "update": 12.548, "loss": "2.078", "ppl": "4.22", "wps": "364581", "ups": "3.09", "wpb": "118128", "bsz": "256", "num_updates": "645800", "lr": "0.000357778", "gnorm": "0.724", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "210775"} +[2022-08-03 12:01:16,701][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 12:02:08,320][train_inner][INFO] - {"epoch": 13, "update": 12.551, "loss": "2.072", "ppl": "4.21", "wps": "361554", "ups": "3.05", "wpb": "118571", "bsz": "256", "num_updates": "646000", "lr": "0.000357576", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "210841"} +[2022-08-03 12:03:12,628][train_inner][INFO] - {"epoch": 13, "update": 12.555, "loss": "2.08", "ppl": "4.23", "wps": "365165", "ups": "3.11", "wpb": "117413", "bsz": "256", "num_updates": "646200", "lr": "0.000357374", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "210905"} +[2022-08-03 12:04:17,560][train_inner][INFO] - {"epoch": 13, "update": 12.559, "loss": "2.074", "ppl": "4.21", "wps": "366516", "ups": "3.08", "wpb": "118992", "bsz": "256", "num_updates": "646400", "lr": "0.000357172", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "210970"} +[2022-08-03 12:05:22,391][train_inner][INFO] - {"epoch": 13, "update": 12.563, "loss": "2.077", "ppl": "4.22", "wps": "365951", "ups": "3.08", "wpb": "118623", "bsz": "256", "num_updates": "646600", "lr": "0.00035697", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "211035"} +[2022-08-03 12:06:28,277][train_inner][INFO] - {"epoch": 13, "update": 12.567, "loss": "2.076", "ppl": "4.22", "wps": "361051", "ups": "3.04", "wpb": "118939", "bsz": "256", "num_updates": "646800", "lr": "0.000356768", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.9", "wall": "211101"} +[2022-08-03 12:07:32,917][train_inner][INFO] - {"epoch": 13, "update": 12.571, "loss": "2.078", "ppl": "4.22", "wps": "364995", "ups": "3.09", "wpb": "117965", "bsz": "256", "num_updates": "647000", "lr": "0.000356566", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "211165"} +[2022-08-03 12:08:37,679][train_inner][INFO] - {"epoch": 13, "update": 12.575, "loss": "2.079", "ppl": "4.23", "wps": "365743", "ups": "3.09", "wpb": "118429", "bsz": "256", "num_updates": "647200", "lr": "0.000356364", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "211230"} +[2022-08-03 12:09:42,817][train_inner][INFO] - {"epoch": 13, "update": 12.579, "loss": "2.077", "ppl": "4.22", "wps": "362886", "ups": "3.07", "wpb": "118186", "bsz": "256", "num_updates": "647400", "lr": "0.000356162", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "211295"} +[2022-08-03 12:10:47,592][train_inner][INFO] - {"epoch": 13, "update": 12.583, "loss": "2.078", "ppl": "4.22", "wps": "365307", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "647600", "lr": "0.00035596", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "211360"} +[2022-08-03 12:11:52,323][train_inner][INFO] - {"epoch": 13, "update": 12.586, "loss": "2.075", "ppl": "4.21", "wps": "366672", "ups": "3.09", "wpb": "118673", "bsz": "256", "num_updates": "647800", "lr": "0.000355758", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "211425"} +[2022-08-03 12:12:48,313][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 12:12:57,423][train_inner][INFO] - {"epoch": 13, "update": 12.59, "loss": "2.077", "ppl": "4.22", "wps": "364297", "ups": "3.07", "wpb": "118576", "bsz": "256", "num_updates": "648000", "lr": "0.000355556", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "211490"} +[2022-08-03 12:14:02,109][train_inner][INFO] - {"epoch": 13, "update": 12.594, "loss": "2.083", "ppl": "4.24", "wps": "366801", "ups": "3.09", "wpb": "118634", "bsz": "256", "num_updates": "648200", "lr": "0.000355354", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "211554"} +[2022-08-03 12:15:06,957][train_inner][INFO] - {"epoch": 13, "update": 12.598, "loss": "2.079", "ppl": "4.23", "wps": "364998", "ups": "3.08", "wpb": "118344", "bsz": "256", "num_updates": "648400", "lr": "0.000355152", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "211619"} +[2022-08-03 12:16:11,966][train_inner][INFO] - {"epoch": 13, "update": 12.602, "loss": "2.076", "ppl": "4.22", "wps": "365052", "ups": "3.08", "wpb": "118657", "bsz": "256", "num_updates": "648600", "lr": "0.000354949", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "211684"} +[2022-08-03 12:17:16,787][train_inner][INFO] - {"epoch": 13, "update": 12.606, "loss": "2.078", "ppl": "4.22", "wps": "365975", "ups": "3.09", "wpb": "118612", "bsz": "256", "num_updates": "648800", "lr": "0.000354747", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "211749"} +[2022-08-03 12:18:21,500][train_inner][INFO] - {"epoch": 13, "update": 12.61, "loss": "2.077", "ppl": "4.22", "wps": "365810", "ups": "3.09", "wpb": "118362", "bsz": "256", "num_updates": "649000", "lr": "0.000354545", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "211814"} +[2022-08-03 12:19:26,350][train_inner][INFO] - {"epoch": 13, "update": 12.614, "loss": "2.075", "ppl": "4.21", "wps": "365619", "ups": "3.08", "wpb": "118550", "bsz": "256", "num_updates": "649200", "lr": "0.000354343", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "211879"} +[2022-08-03 12:19:54,009][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 12:20:31,361][train_inner][INFO] - {"epoch": 13, "update": 12.618, "loss": "2.073", "ppl": "4.21", "wps": "362429", "ups": "3.08", "wpb": "117807", "bsz": "256", "num_updates": "649400", "lr": "0.000354141", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "211944"} +[2022-08-03 12:21:36,312][train_inner][INFO] - {"epoch": 13, "update": 12.621, "loss": "2.077", "ppl": "4.22", "wps": "363714", "ups": "3.08", "wpb": "118118", "bsz": "256", "num_updates": "649600", "lr": "0.000353939", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "212009"} +[2022-08-03 12:22:41,555][train_inner][INFO] - {"epoch": 13, "update": 12.625, "loss": "2.078", "ppl": "4.22", "wps": "363703", "ups": "3.07", "wpb": "118641", "bsz": "256", "num_updates": "649800", "lr": "0.000353737", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "212074"} +[2022-08-03 12:23:46,466][train_inner][INFO] - {"epoch": 13, "update": 12.629, "loss": "2.08", "ppl": "4.23", "wps": "364621", "ups": "3.08", "wpb": "118339", "bsz": "256", "num_updates": "650000", "lr": "0.000353535", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "212139"} +[2022-08-03 12:23:46,468][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-03 12:24:09,314][valid][INFO] - {"epoch": 13, "valid_loss": "1.988", "valid_ppl": "3.97", "valid_wps": "1.6055e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "650000", "valid_best_loss": "1.988"} +[2022-08-03 12:24:09,317][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 13 @ 650000 updates +[2022-08-03 12:24:09,317][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_13_650000.pt +[2022-08-03 12:24:19,540][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_13_650000.pt +[2022-08-03 12:24:48,816][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_13_650000.pt (epoch 13 @ 650000 updates, score 1.988) (writing took 39.49901276547462 seconds) +[2022-08-03 12:25:53,963][train_inner][INFO] - {"epoch": 13, "update": 12.633, "loss": "2.082", "ppl": "4.24", "wps": "185264", "ups": "1.57", "wpb": "118102", "bsz": "256", "num_updates": "650200", "lr": "0.000353333", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "212266"} +[2022-08-03 12:26:58,713][train_inner][INFO] - {"epoch": 13, "update": 12.637, "loss": "2.072", "ppl": "4.21", "wps": "367694", "ups": "3.09", "wpb": "119039", "bsz": "256", "num_updates": "650400", "lr": "0.000353131", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "212331"} +[2022-08-03 12:28:03,660][train_inner][INFO] - {"epoch": 13, "update": 12.641, "loss": "2.075", "ppl": "4.21", "wps": "365588", "ups": "3.08", "wpb": "118717", "bsz": "256", "num_updates": "650600", "lr": "0.000352929", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "212396"} +[2022-08-03 12:29:08,532][train_inner][INFO] - {"epoch": 13, "update": 12.645, "loss": "2.078", "ppl": "4.22", "wps": "363475", "ups": "3.08", "wpb": "117895", "bsz": "256", "num_updates": "650800", "lr": "0.000352727", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "212461"} +[2022-08-03 12:30:13,480][train_inner][INFO] - {"epoch": 13, "update": 12.649, "loss": "2.075", "ppl": "4.21", "wps": "364948", "ups": "3.08", "wpb": "118512", "bsz": "256", "num_updates": "651000", "lr": "0.000352525", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "212526"} +[2022-08-03 12:31:18,884][train_inner][INFO] - {"epoch": 13, "update": 12.653, "loss": "2.079", "ppl": "4.23", "wps": "361710", "ups": "3.06", "wpb": "118284", "bsz": "256", "num_updates": "651200", "lr": "0.000352323", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "212591"} +[2022-08-03 12:32:23,959][train_inner][INFO] - {"epoch": 13, "update": 12.656, "loss": "2.075", "ppl": "4.21", "wps": "362887", "ups": "3.07", "wpb": "118072", "bsz": "256", "num_updates": "651400", "lr": "0.000352121", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "212656"} +[2022-08-03 12:33:28,761][train_inner][INFO] - {"epoch": 13, "update": 12.66, "loss": "2.08", "ppl": "4.23", "wps": "364229", "ups": "3.09", "wpb": "118013", "bsz": "256", "num_updates": "651600", "lr": "0.000351919", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "212721"} +[2022-08-03 12:34:33,446][train_inner][INFO] - {"epoch": 13, "update": 12.664, "loss": "2.074", "ppl": "4.21", "wps": "365622", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "651800", "lr": "0.000351717", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "212786"} +[2022-08-03 12:35:38,166][train_inner][INFO] - {"epoch": 13, "update": 12.668, "loss": "2.075", "ppl": "4.21", "wps": "366478", "ups": "3.09", "wpb": "118590", "bsz": "256", "num_updates": "652000", "lr": "0.000351515", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "212850"} +[2022-08-03 12:36:43,074][train_inner][INFO] - {"epoch": 13, "update": 12.672, "loss": "2.077", "ppl": "4.22", "wps": "362253", "ups": "3.08", "wpb": "117565", "bsz": "256", "num_updates": "652200", "lr": "0.000351313", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "212915"} +[2022-08-03 12:37:47,629][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 12:37:48,576][train_inner][INFO] - {"epoch": 13, "update": 12.676, "loss": "2.073", "ppl": "4.21", "wps": "361675", "ups": "3.05", "wpb": "118449", "bsz": "256", "num_updates": "652400", "lr": "0.000351111", "gnorm": "0.723", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "212981"} +[2022-08-03 12:38:53,461][train_inner][INFO] - {"epoch": 13, "update": 12.68, "loss": "2.081", "ppl": "4.23", "wps": "364840", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "652600", "lr": "0.000350909", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "213046"} +[2022-08-03 12:39:58,186][train_inner][INFO] - {"epoch": 13, "update": 12.684, "loss": "2.074", "ppl": "4.21", "wps": "365206", "ups": "3.09", "wpb": "118188", "bsz": "256", "num_updates": "652800", "lr": "0.000350707", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "213111"} +[2022-08-03 12:41:03,005][train_inner][INFO] - {"epoch": 13, "update": 12.688, "loss": "2.081", "ppl": "4.23", "wps": "365387", "ups": "3.09", "wpb": "118419", "bsz": "256", "num_updates": "653000", "lr": "0.000350505", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "213175"} +[2022-08-03 12:42:07,670][train_inner][INFO] - {"epoch": 13, "update": 12.691, "loss": "2.072", "ppl": "4.2", "wps": "365444", "ups": "3.09", "wpb": "118155", "bsz": "255.9", "num_updates": "653200", "lr": "0.000350303", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.5", "wall": "213240"} +[2022-08-03 12:43:12,771][train_inner][INFO] - {"epoch": 13, "update": 12.695, "loss": "2.076", "ppl": "4.22", "wps": "363635", "ups": "3.07", "wpb": "118363", "bsz": "256", "num_updates": "653400", "lr": "0.000350101", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "213305"} +[2022-08-03 12:44:17,825][train_inner][INFO] - {"epoch": 13, "update": 12.699, "loss": "2.075", "ppl": "4.21", "wps": "363212", "ups": "3.07", "wpb": "118140", "bsz": "256", "num_updates": "653600", "lr": "0.000349899", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "213370"} +[2022-08-03 12:45:23,060][train_inner][INFO] - {"epoch": 13, "update": 12.703, "loss": "2.081", "ppl": "4.23", "wps": "361324", "ups": "3.07", "wpb": "117854", "bsz": "256", "num_updates": "653800", "lr": "0.000349697", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "213435"} +[2022-08-03 12:46:28,247][train_inner][INFO] - {"epoch": 13, "update": 12.707, "loss": "2.076", "ppl": "4.22", "wps": "363217", "ups": "3.07", "wpb": "118383", "bsz": "256", "num_updates": "654000", "lr": "0.000349495", "gnorm": "0.723", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "213501"} +[2022-08-03 12:47:33,029][train_inner][INFO] - {"epoch": 13, "update": 12.711, "loss": "2.081", "ppl": "4.23", "wps": "362748", "ups": "3.09", "wpb": "117496", "bsz": "256", "num_updates": "654200", "lr": "0.000349293", "gnorm": "0.727", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "213565"} +[2022-08-03 12:48:37,774][train_inner][INFO] - {"epoch": 13, "update": 12.715, "loss": "2.078", "ppl": "4.22", "wps": "364092", "ups": "3.09", "wpb": "117863", "bsz": "256", "num_updates": "654400", "lr": "0.000349091", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "213630"} +[2022-08-03 12:49:42,835][train_inner][INFO] - {"epoch": 13, "update": 12.719, "loss": "2.073", "ppl": "4.21", "wps": "364237", "ups": "3.07", "wpb": "118488", "bsz": "256", "num_updates": "654600", "lr": "0.000348889", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "213695"} +[2022-08-03 12:50:47,982][train_inner][INFO] - {"epoch": 13, "update": 12.722, "loss": "2.069", "ppl": "4.2", "wps": "364174", "ups": "3.07", "wpb": "118621", "bsz": "256", "num_updates": "654800", "lr": "0.000348687", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "213760"} +[2022-08-03 12:51:52,867][train_inner][INFO] - {"epoch": 13, "update": 12.726, "loss": "2.076", "ppl": "4.22", "wps": "363890", "ups": "3.08", "wpb": "118054", "bsz": "256", "num_updates": "655000", "lr": "0.000348485", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "213825"} +[2022-08-03 12:52:57,792][train_inner][INFO] - {"epoch": 13, "update": 12.73, "loss": "2.072", "ppl": "4.2", "wps": "365338", "ups": "3.08", "wpb": "118595", "bsz": "256", "num_updates": "655200", "lr": "0.000348283", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "213890"} +[2022-08-03 12:54:02,971][train_inner][INFO] - {"epoch": 13, "update": 12.734, "loss": "2.067", "ppl": "4.19", "wps": "364365", "ups": "3.07", "wpb": "118743", "bsz": "256", "num_updates": "655400", "lr": "0.000348081", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "213955"} +[2022-08-03 12:55:07,765][train_inner][INFO] - {"epoch": 13, "update": 12.738, "loss": "2.072", "ppl": "4.2", "wps": "365756", "ups": "3.09", "wpb": "118493", "bsz": "256", "num_updates": "655600", "lr": "0.000347879", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "214020"} +[2022-08-03 12:56:12,586][train_inner][INFO] - {"epoch": 13, "update": 12.742, "loss": "2.079", "ppl": "4.22", "wps": "365218", "ups": "3.09", "wpb": "118367", "bsz": "256", "num_updates": "655800", "lr": "0.000347677", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "214085"} +[2022-08-03 12:57:17,545][train_inner][INFO] - {"epoch": 13, "update": 12.746, "loss": "2.07", "ppl": "4.2", "wps": "365993", "ups": "3.08", "wpb": "118871", "bsz": "256", "num_updates": "656000", "lr": "0.000347475", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "214150"} +[2022-08-03 12:58:22,172][train_inner][INFO] - {"epoch": 13, "update": 12.75, "loss": "2.078", "ppl": "4.22", "wps": "366893", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "656200", "lr": "0.000347273", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "214214"} +[2022-08-03 12:59:27,065][train_inner][INFO] - {"epoch": 13, "update": 12.754, "loss": "2.076", "ppl": "4.22", "wps": "365309", "ups": "3.08", "wpb": "118530", "bsz": "256", "num_updates": "656400", "lr": "0.000347071", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "214279"} +[2022-08-03 13:00:16,152][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 13:00:32,562][train_inner][INFO] - {"epoch": 13, "update": 12.757, "loss": "2.074", "ppl": "4.21", "wps": "361632", "ups": "3.05", "wpb": "118426", "bsz": "256", "num_updates": "656600", "lr": "0.000346869", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "214345"} +[2022-08-03 13:01:37,386][train_inner][INFO] - {"epoch": 13, "update": 12.761, "loss": "2.065", "ppl": "4.18", "wps": "368951", "ups": "3.09", "wpb": "119585", "bsz": "256", "num_updates": "656800", "lr": "0.000346667", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "214410"} +[2022-08-03 13:02:42,689][train_inner][INFO] - {"epoch": 13, "update": 12.765, "loss": "2.064", "ppl": "4.18", "wps": "363015", "ups": "3.06", "wpb": "118527", "bsz": "256", "num_updates": "657000", "lr": "0.000346465", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "214475"} +[2022-08-03 13:03:47,388][train_inner][INFO] - {"epoch": 13, "update": 12.769, "loss": "2.068", "ppl": "4.19", "wps": "367470", "ups": "3.09", "wpb": "118873", "bsz": "256", "num_updates": "657200", "lr": "0.000346263", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "214540"} +[2022-08-03 13:04:52,430][train_inner][INFO] - {"epoch": 13, "update": 12.773, "loss": "2.072", "ppl": "4.2", "wps": "365175", "ups": "3.07", "wpb": "118757", "bsz": "256", "num_updates": "657400", "lr": "0.000346061", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "214605"} +[2022-08-03 13:05:57,179][train_inner][INFO] - {"epoch": 13, "update": 12.777, "loss": "2.074", "ppl": "4.21", "wps": "365097", "ups": "3.09", "wpb": "118197", "bsz": "256", "num_updates": "657600", "lr": "0.000345859", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "214670"} +[2022-08-03 13:07:01,891][train_inner][INFO] - {"epoch": 13, "update": 12.781, "loss": "2.074", "ppl": "4.21", "wps": "364881", "ups": "3.09", "wpb": "118060", "bsz": "256", "num_updates": "657800", "lr": "0.000345657", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.3", "wall": "214734"} +[2022-08-03 13:08:06,764][train_inner][INFO] - {"epoch": 13, "update": 12.785, "loss": "2.074", "ppl": "4.21", "wps": "365716", "ups": "3.08", "wpb": "118624", "bsz": "256", "num_updates": "658000", "lr": "0.000345455", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "214799"} +[2022-08-03 13:09:11,827][train_inner][INFO] - {"epoch": 13, "update": 12.789, "loss": "2.073", "ppl": "4.21", "wps": "363370", "ups": "3.07", "wpb": "118207", "bsz": "256", "num_updates": "658200", "lr": "0.000345253", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "214864"} +[2022-08-03 13:10:16,676][train_inner][INFO] - {"epoch": 13, "update": 12.792, "loss": "2.075", "ppl": "4.21", "wps": "363421", "ups": "3.08", "wpb": "117835", "bsz": "256", "num_updates": "658400", "lr": "0.000345051", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "214929"} +[2022-08-03 13:11:21,809][train_inner][INFO] - {"epoch": 13, "update": 12.796, "loss": "2.073", "ppl": "4.21", "wps": "363527", "ups": "3.07", "wpb": "118387", "bsz": "256", "num_updates": "658600", "lr": "0.000344848", "gnorm": "0.723", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "214994"} +[2022-08-03 13:11:38,594][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 13:12:27,037][train_inner][INFO] - {"epoch": 13, "update": 12.8, "loss": "2.075", "ppl": "4.21", "wps": "361605", "ups": "3.07", "wpb": "117931", "bsz": "256", "num_updates": "658800", "lr": "0.000344646", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "215059"} +[2022-08-03 13:13:31,919][train_inner][INFO] - {"epoch": 13, "update": 12.804, "loss": "2.071", "ppl": "4.2", "wps": "363734", "ups": "3.08", "wpb": "117998", "bsz": "256", "num_updates": "659000", "lr": "0.000344444", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "215124"} +[2022-08-03 13:14:36,767][train_inner][INFO] - {"epoch": 13, "update": 12.808, "loss": "2.075", "ppl": "4.21", "wps": "363983", "ups": "3.08", "wpb": "118016", "bsz": "256", "num_updates": "659200", "lr": "0.000344242", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "215189"} +[2022-08-03 13:15:41,735][train_inner][INFO] - {"epoch": 13, "update": 12.812, "loss": "2.068", "ppl": "4.19", "wps": "363625", "ups": "3.08", "wpb": "118118", "bsz": "256", "num_updates": "659400", "lr": "0.00034404", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "215254"} +[2022-08-03 13:16:46,804][train_inner][INFO] - {"epoch": 13, "update": 12.816, "loss": "2.073", "ppl": "4.21", "wps": "363176", "ups": "3.07", "wpb": "118156", "bsz": "256", "num_updates": "659600", "lr": "0.000343838", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "215319"} +[2022-08-03 13:17:51,892][train_inner][INFO] - {"epoch": 13, "update": 12.82, "loss": "2.071", "ppl": "4.2", "wps": "362936", "ups": "3.07", "wpb": "118111", "bsz": "256", "num_updates": "659800", "lr": "0.000343636", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "215384"} +[2022-08-03 13:18:56,773][train_inner][INFO] - {"epoch": 13, "update": 12.823, "loss": "2.07", "ppl": "4.2", "wps": "364899", "ups": "3.08", "wpb": "118373", "bsz": "256", "num_updates": "660000", "lr": "0.000343434", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "215449"} +[2022-08-03 13:20:01,591][train_inner][INFO] - {"epoch": 13, "update": 12.827, "loss": "2.073", "ppl": "4.21", "wps": "367119", "ups": "3.09", "wpb": "118979", "bsz": "256", "num_updates": "660200", "lr": "0.000343232", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "215514"} +[2022-08-03 13:21:06,847][train_inner][INFO] - {"epoch": 13, "update": 12.831, "loss": "2.071", "ppl": "4.2", "wps": "362184", "ups": "3.06", "wpb": "118171", "bsz": "256", "num_updates": "660400", "lr": "0.00034303", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "215579"} +[2022-08-03 13:22:11,868][train_inner][INFO] - {"epoch": 13, "update": 12.835, "loss": "2.071", "ppl": "4.2", "wps": "364216", "ups": "3.08", "wpb": "118407", "bsz": "256", "num_updates": "660600", "lr": "0.000342828", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "215644"} +[2022-08-03 13:23:17,626][train_inner][INFO] - {"epoch": 13, "update": 12.839, "loss": "2.079", "ppl": "4.22", "wps": "358524", "ups": "3.04", "wpb": "117877", "bsz": "256", "num_updates": "660800", "lr": "0.000342626", "gnorm": "0.727", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "215710"} +[2022-08-03 13:23:24,104][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 13:24:23,022][train_inner][INFO] - {"epoch": 13, "update": 12.843, "loss": "2.072", "ppl": "4.21", "wps": "360728", "ups": "3.06", "wpb": "117949", "bsz": "256", "num_updates": "661000", "lr": "0.000342424", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "215775"} +[2022-08-03 13:25:28,085][train_inner][INFO] - {"epoch": 13, "update": 12.847, "loss": "2.066", "ppl": "4.19", "wps": "364993", "ups": "3.07", "wpb": "118736", "bsz": "256", "num_updates": "661200", "lr": "0.000342222", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "215840"} +[2022-08-03 13:26:32,679][train_inner][INFO] - {"epoch": 13, "update": 12.851, "loss": "2.075", "ppl": "4.21", "wps": "364353", "ups": "3.1", "wpb": "117674", "bsz": "256", "num_updates": "661400", "lr": "0.00034202", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "215905"} +[2022-08-03 13:27:37,574][train_inner][INFO] - {"epoch": 13, "update": 12.855, "loss": "2.074", "ppl": "4.21", "wps": "365146", "ups": "3.08", "wpb": "118479", "bsz": "256", "num_updates": "661600", "lr": "0.000341818", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "215970"} +[2022-08-03 13:28:42,376][train_inner][INFO] - {"epoch": 13, "update": 12.858, "loss": "2.075", "ppl": "4.21", "wps": "363036", "ups": "3.09", "wpb": "117626", "bsz": "256", "num_updates": "661800", "lr": "0.000341616", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "216035"} +[2022-08-03 13:29:47,618][train_inner][INFO] - {"epoch": 13, "update": 12.862, "loss": "2.073", "ppl": "4.21", "wps": "363242", "ups": "3.07", "wpb": "118491", "bsz": "256", "num_updates": "662000", "lr": "0.000341414", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "216100"} +[2022-08-03 13:30:52,344][train_inner][INFO] - {"epoch": 13, "update": 12.866, "loss": "2.07", "ppl": "4.2", "wps": "366107", "ups": "3.09", "wpb": "118482", "bsz": "256", "num_updates": "662200", "lr": "0.000341212", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "216165"} +[2022-08-03 13:31:57,138][train_inner][INFO] - {"epoch": 13, "update": 12.87, "loss": "2.079", "ppl": "4.22", "wps": "364518", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "662400", "lr": "0.00034101", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "216229"} +[2022-08-03 13:32:50,368][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 13:33:02,410][train_inner][INFO] - {"epoch": 13, "update": 12.874, "loss": "2.076", "ppl": "4.22", "wps": "363602", "ups": "3.06", "wpb": "118664", "bsz": "256", "num_updates": "662600", "lr": "0.000340808", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "216295"} +[2022-08-03 13:34:07,506][train_inner][INFO] - {"epoch": 13, "update": 12.878, "loss": "2.064", "ppl": "4.18", "wps": "366050", "ups": "3.07", "wpb": "119142", "bsz": "256", "num_updates": "662800", "lr": "0.000340606", "gnorm": "0.723", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "216360"} +[2022-08-03 13:35:12,437][train_inner][INFO] - {"epoch": 13, "update": 12.882, "loss": "2.076", "ppl": "4.22", "wps": "363863", "ups": "3.08", "wpb": "118127", "bsz": "256", "num_updates": "663000", "lr": "0.000340404", "gnorm": "0.727", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.4", "wall": "216425"} +[2022-08-03 13:36:03,656][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 13:36:17,971][train_inner][INFO] - {"epoch": 13, "update": 12.886, "loss": "2.066", "ppl": "4.19", "wps": "362282", "ups": "3.05", "wpb": "118707", "bsz": "256", "num_updates": "663200", "lr": "0.000340202", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "216490"} +[2022-08-03 13:37:22,757][train_inner][INFO] - {"epoch": 13, "update": 12.89, "loss": "2.07", "ppl": "4.2", "wps": "365529", "ups": "3.09", "wpb": "118407", "bsz": "256", "num_updates": "663400", "lr": "0.00034", "gnorm": "0.723", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "216555"} +[2022-08-03 13:38:27,566][train_inner][INFO] - {"epoch": 13, "update": 12.893, "loss": "2.072", "ppl": "4.21", "wps": "364585", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "663600", "lr": "0.000339798", "gnorm": "0.727", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.3", "wall": "216620"} +[2022-08-03 13:39:32,504][train_inner][INFO] - {"epoch": 13, "update": 12.897, "loss": "2.065", "ppl": "4.18", "wps": "365727", "ups": "3.08", "wpb": "118746", "bsz": "256", "num_updates": "663800", "lr": "0.000339596", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "216685"} +[2022-08-03 13:40:37,378][train_inner][INFO] - {"epoch": 13, "update": 12.901, "loss": "2.07", "ppl": "4.2", "wps": "361406", "ups": "3.08", "wpb": "117227", "bsz": "256", "num_updates": "664000", "lr": "0.000339394", "gnorm": "0.731", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "216750"} +[2022-08-03 13:41:42,127][train_inner][INFO] - {"epoch": 13, "update": 12.905, "loss": "2.068", "ppl": "4.19", "wps": "364500", "ups": "3.09", "wpb": "118003", "bsz": "256", "num_updates": "664200", "lr": "0.000339192", "gnorm": "0.727", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "216814"} +[2022-08-03 13:42:46,781][train_inner][INFO] - {"epoch": 13, "update": 12.909, "loss": "2.074", "ppl": "4.21", "wps": "365577", "ups": "3.09", "wpb": "118178", "bsz": "256", "num_updates": "664400", "lr": "0.00033899", "gnorm": "0.725", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "216879"} +[2022-08-03 13:43:51,510][train_inner][INFO] - {"epoch": 13, "update": 12.913, "loss": "2.07", "ppl": "4.2", "wps": "364001", "ups": "3.09", "wpb": "117805", "bsz": "256", "num_updates": "664600", "lr": "0.000338788", "gnorm": "0.726", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "216944"} +[2022-08-03 13:44:56,561][train_inner][INFO] - {"epoch": 13, "update": 12.917, "loss": "2.074", "ppl": "4.21", "wps": "362154", "ups": "3.07", "wpb": "117790", "bsz": "256", "num_updates": "664800", "lr": "0.000338586", "gnorm": "0.729", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "217009"} +[2022-08-03 13:46:01,475][train_inner][INFO] - {"epoch": 13, "update": 12.921, "loss": "2.061", "ppl": "4.17", "wps": "365404", "ups": "3.08", "wpb": "118597", "bsz": "256", "num_updates": "665000", "lr": "0.000338384", "gnorm": "0.726", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "217074"} +[2022-08-03 13:47:06,188][train_inner][INFO] - {"epoch": 13, "update": 12.925, "loss": "2.069", "ppl": "4.2", "wps": "365254", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "665200", "lr": "0.000338182", "gnorm": "0.725", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.8", "wall": "217139"} +[2022-08-03 13:48:11,271][train_inner][INFO] - {"epoch": 13, "update": 12.928, "loss": "2.063", "ppl": "4.18", "wps": "363337", "ups": "3.07", "wpb": "118234", "bsz": "256", "num_updates": "665400", "lr": "0.00033798", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "217204"} +[2022-08-03 13:49:16,357][train_inner][INFO] - {"epoch": 13, "update": 12.932, "loss": "2.072", "ppl": "4.21", "wps": "364604", "ups": "3.07", "wpb": "118652", "bsz": "256", "num_updates": "665600", "lr": "0.000337778", "gnorm": "0.727", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "217269"} +[2022-08-03 13:50:21,567][train_inner][INFO] - {"epoch": 13, "update": 12.936, "loss": "2.066", "ppl": "4.19", "wps": "365038", "ups": "3.07", "wpb": "119018", "bsz": "256", "num_updates": "665800", "lr": "0.000337576", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "217334"} +[2022-08-03 13:51:26,681][train_inner][INFO] - {"epoch": 13, "update": 12.94, "loss": "2.067", "ppl": "4.19", "wps": "363909", "ups": "3.07", "wpb": "118477", "bsz": "256", "num_updates": "666000", "lr": "0.000337374", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.1", "wall": "217399"} +[2022-08-03 13:52:31,236][train_inner][INFO] - {"epoch": 13, "update": 12.944, "loss": "2.069", "ppl": "4.2", "wps": "366097", "ups": "3.1", "wpb": "118164", "bsz": "256", "num_updates": "666200", "lr": "0.000337172", "gnorm": "0.728", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "217464"} +[2022-08-03 13:53:36,117][train_inner][INFO] - {"epoch": 13, "update": 12.948, "loss": "2.068", "ppl": "4.19", "wps": "364649", "ups": "3.08", "wpb": "118293", "bsz": "256", "num_updates": "666400", "lr": "0.00033697", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "217528"} +[2022-08-03 13:54:41,026][train_inner][INFO] - {"epoch": 13, "update": 12.952, "loss": "2.074", "ppl": "4.21", "wps": "364008", "ups": "3.08", "wpb": "118134", "bsz": "256", "num_updates": "666600", "lr": "0.000336768", "gnorm": "0.729", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "217593"} +[2022-08-03 13:55:45,819][train_inner][INFO] - {"epoch": 13, "update": 12.956, "loss": "2.065", "ppl": "4.18", "wps": "367155", "ups": "3.09", "wpb": "118944", "bsz": "256", "num_updates": "666800", "lr": "0.000336566", "gnorm": "0.728", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "217658"} +[2022-08-03 13:56:50,706][train_inner][INFO] - {"epoch": 13, "update": 12.96, "loss": "2.073", "ppl": "4.21", "wps": "363241", "ups": "3.08", "wpb": "117846", "bsz": "256", "num_updates": "667000", "lr": "0.000336364", "gnorm": "0.73", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "217723"} +[2022-08-03 13:57:55,854][train_inner][INFO] - {"epoch": 13, "update": 12.963, "loss": "2.064", "ppl": "4.18", "wps": "364109", "ups": "3.07", "wpb": "118604", "bsz": "256", "num_updates": "667200", "lr": "0.000336162", "gnorm": "0.726", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "217788"} +[2022-08-03 13:59:00,338][train_inner][INFO] - {"epoch": 13, "update": 12.967, "loss": "2.064", "ppl": "4.18", "wps": "366941", "ups": "3.1", "wpb": "118305", "bsz": "256", "num_updates": "667400", "lr": "0.00033596", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "217853"} +[2022-08-03 14:00:04,771][train_inner][INFO] - {"epoch": 13, "update": 12.971, "loss": "2.071", "ppl": "4.2", "wps": "366986", "ups": "3.1", "wpb": "118228", "bsz": "256", "num_updates": "667600", "lr": "0.000335758", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "217917"} +[2022-08-03 14:01:09,453][train_inner][INFO] - {"epoch": 13, "update": 12.975, "loss": "2.07", "ppl": "4.2", "wps": "365516", "ups": "3.09", "wpb": "118211", "bsz": "256", "num_updates": "667800", "lr": "0.000335556", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "217982"} +[2022-08-03 14:02:14,266][train_inner][INFO] - {"epoch": 13, "update": 12.979, "loss": "2.066", "ppl": "4.19", "wps": "362640", "ups": "3.09", "wpb": "117517", "bsz": "256", "num_updates": "668000", "lr": "0.000335354", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "218047"} +[2022-08-03 14:03:18,879][train_inner][INFO] - {"epoch": 13, "update": 12.983, "loss": "2.068", "ppl": "4.19", "wps": "366945", "ups": "3.1", "wpb": "118544", "bsz": "256", "num_updates": "668200", "lr": "0.000335152", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.4", "wall": "218111"} +[2022-08-03 14:04:23,606][train_inner][INFO] - {"epoch": 13, "update": 12.987, "loss": "2.067", "ppl": "4.19", "wps": "364768", "ups": "3.09", "wpb": "118049", "bsz": "256", "num_updates": "668400", "lr": "0.000334949", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "218176"} +[2022-08-03 14:05:28,124][train_inner][INFO] - {"epoch": 13, "update": 12.991, "loss": "2.064", "ppl": "4.18", "wps": "366065", "ups": "3.1", "wpb": "118088", "bsz": "256", "num_updates": "668600", "lr": "0.000334747", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "218240"} +[2022-08-03 14:06:33,166][train_inner][INFO] - {"epoch": 13, "update": 12.994, "loss": "2.067", "ppl": "4.19", "wps": "362114", "ups": "3.07", "wpb": "117761", "bsz": "256", "num_updates": "668800", "lr": "0.000334545", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "218305"} +[2022-08-03 14:07:38,126][train_inner][INFO] - {"epoch": 13, "update": 12.998, "loss": "2.074", "ppl": "4.21", "wps": "364253", "ups": "3.08", "wpb": "118308", "bsz": "256", "num_updates": "669000", "lr": "0.000334343", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "218370"} +[2022-08-03 14:08:05,744][fairseq_cli.train][INFO] - end of epoch 13 (average epoch stats below) +[2022-08-03 14:08:05,744][train][INFO] - {"epoch": 13, "train_loss": "2.081", "train_ppl": "4.23", "train_wps": "362660", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "669085", "train_lr": "0.000334258", "train_gnorm": "0.719", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16618", "train_gb_free": "22.3", "train_wall": "218398"} +[2022-08-03 14:08:05,855][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-03 14:08:05,858][fairseq.trainer][INFO] - begin training epoch 14 +[2022-08-03 14:08:05,858][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-03 14:08:56,438][train_inner][INFO] - {"epoch": 14, "update": 13.002, "loss": "2.064", "ppl": "4.18", "wps": "301922", "ups": "2.55", "wpb": "118219", "bsz": "255.4", "num_updates": "669200", "lr": "0.000334141", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "218449"} +[2022-08-03 14:09:38,379][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 14:10:01,025][train_inner][INFO] - {"epoch": 14, "update": 13.006, "loss": "2.063", "ppl": "4.18", "wps": "364966", "ups": "3.1", "wpb": "117859", "bsz": "256", "num_updates": "669400", "lr": "0.000333939", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "218513"} +[2022-08-03 14:11:05,909][train_inner][INFO] - {"epoch": 14, "update": 13.01, "loss": "2.056", "ppl": "4.16", "wps": "365424", "ups": "3.08", "wpb": "118549", "bsz": "256", "num_updates": "669600", "lr": "0.000333737", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "218578"} +[2022-08-03 14:12:10,335][train_inner][INFO] - {"epoch": 14, "update": 13.014, "loss": "2.057", "ppl": "4.16", "wps": "368979", "ups": "3.1", "wpb": "118858", "bsz": "256", "num_updates": "669800", "lr": "0.000333535", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "218643"} +[2022-08-03 14:13:15,030][train_inner][INFO] - {"epoch": 14, "update": 13.018, "loss": "2.06", "ppl": "4.17", "wps": "365555", "ups": "3.09", "wpb": "118246", "bsz": "256", "num_updates": "670000", "lr": "0.000333333", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "218707"} +[2022-08-03 14:14:19,521][train_inner][INFO] - {"epoch": 14, "update": 13.022, "loss": "2.057", "ppl": "4.16", "wps": "367650", "ups": "3.1", "wpb": "118548", "bsz": "256", "num_updates": "670200", "lr": "0.000333131", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "218772"} +[2022-08-03 14:15:24,368][train_inner][INFO] - {"epoch": 14, "update": 13.026, "loss": "2.063", "ppl": "4.18", "wps": "363998", "ups": "3.08", "wpb": "118019", "bsz": "256", "num_updates": "670400", "lr": "0.000332929", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "218837"} +[2022-08-03 14:16:28,926][train_inner][INFO] - {"epoch": 14, "update": 13.029, "loss": "2.061", "ppl": "4.17", "wps": "365625", "ups": "3.1", "wpb": "118018", "bsz": "256", "num_updates": "670600", "lr": "0.000332727", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "218901"} +[2022-08-03 14:17:33,480][train_inner][INFO] - {"epoch": 14, "update": 13.033, "loss": "2.063", "ppl": "4.18", "wps": "366807", "ups": "3.1", "wpb": "118394", "bsz": "256", "num_updates": "670800", "lr": "0.000332525", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.6", "wall": "218966"} +[2022-08-03 14:18:38,655][train_inner][INFO] - {"epoch": 14, "update": 13.037, "loss": "2.065", "ppl": "4.18", "wps": "365440", "ups": "3.07", "wpb": "119086", "bsz": "256", "num_updates": "671000", "lr": "0.000332323", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "219031"} +[2022-08-03 14:19:43,396][train_inner][INFO] - {"epoch": 14, "update": 13.041, "loss": "2.064", "ppl": "4.18", "wps": "366150", "ups": "3.09", "wpb": "118523", "bsz": "256", "num_updates": "671200", "lr": "0.000332121", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "219096"} +[2022-08-03 14:20:47,834][train_inner][INFO] - {"epoch": 14, "update": 13.045, "loss": "2.069", "ppl": "4.2", "wps": "366376", "ups": "3.1", "wpb": "118040", "bsz": "256", "num_updates": "671400", "lr": "0.000331919", "gnorm": "0.727", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "219160"} +[2022-08-03 14:20:59,885][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 14:21:53,087][train_inner][INFO] - {"epoch": 14, "update": 13.049, "loss": "2.062", "ppl": "4.18", "wps": "362894", "ups": "3.07", "wpb": "118398", "bsz": "256", "num_updates": "671600", "lr": "0.000331717", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "219225"} +[2022-08-03 14:22:58,349][train_inner][INFO] - {"epoch": 14, "update": 13.053, "loss": "2.06", "ppl": "4.17", "wps": "363308", "ups": "3.06", "wpb": "118550", "bsz": "256", "num_updates": "671800", "lr": "0.000331515", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "219291"} +[2022-08-03 14:24:03,290][train_inner][INFO] - {"epoch": 14, "update": 13.057, "loss": "2.069", "ppl": "4.19", "wps": "364502", "ups": "3.08", "wpb": "118355", "bsz": "256", "num_updates": "672000", "lr": "0.000331313", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "219356"} +[2022-08-03 14:25:08,041][train_inner][INFO] - {"epoch": 14, "update": 13.061, "loss": "2.065", "ppl": "4.18", "wps": "366450", "ups": "3.09", "wpb": "118637", "bsz": "256", "num_updates": "672200", "lr": "0.000331111", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "219420"} +[2022-08-03 14:26:12,947][train_inner][INFO] - {"epoch": 14, "update": 13.064, "loss": "2.06", "ppl": "4.17", "wps": "364349", "ups": "3.08", "wpb": "118241", "bsz": "256", "num_updates": "672400", "lr": "0.000330909", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "219485"} +[2022-08-03 14:27:18,031][train_inner][INFO] - {"epoch": 14, "update": 13.068, "loss": "2.061", "ppl": "4.17", "wps": "363648", "ups": "3.07", "wpb": "118337", "bsz": "256", "num_updates": "672600", "lr": "0.000330707", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "219550"} +[2022-08-03 14:28:22,632][train_inner][INFO] - {"epoch": 14, "update": 13.072, "loss": "2.057", "ppl": "4.16", "wps": "367318", "ups": "3.1", "wpb": "118643", "bsz": "256", "num_updates": "672800", "lr": "0.000330505", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "219615"} +[2022-08-03 14:29:27,739][train_inner][INFO] - {"epoch": 14, "update": 13.076, "loss": "2.059", "ppl": "4.17", "wps": "362926", "ups": "3.07", "wpb": "118143", "bsz": "256", "num_updates": "673000", "lr": "0.000330303", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.7", "wall": "219680"} +[2022-08-03 14:30:32,675][train_inner][INFO] - {"epoch": 14, "update": 13.08, "loss": "2.062", "ppl": "4.18", "wps": "364375", "ups": "3.08", "wpb": "118304", "bsz": "256", "num_updates": "673200", "lr": "0.000330101", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "219745"} +[2022-08-03 14:31:37,749][train_inner][INFO] - {"epoch": 14, "update": 13.084, "loss": "2.059", "ppl": "4.17", "wps": "363615", "ups": "3.07", "wpb": "118308", "bsz": "256", "num_updates": "673400", "lr": "0.000329899", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "219810"} +[2022-08-03 14:32:28,413][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 14:32:42,519][train_inner][INFO] - {"epoch": 14, "update": 13.088, "loss": "2.062", "ppl": "4.18", "wps": "364391", "ups": "3.09", "wpb": "118006", "bsz": "256", "num_updates": "673600", "lr": "0.000329697", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "219875"} +[2022-08-03 14:33:47,379][train_inner][INFO] - {"epoch": 14, "update": 13.092, "loss": "2.062", "ppl": "4.18", "wps": "365028", "ups": "3.08", "wpb": "118377", "bsz": "256", "num_updates": "673800", "lr": "0.000329495", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "219940"} +[2022-08-03 14:34:52,321][train_inner][INFO] - {"epoch": 14, "update": 13.096, "loss": "2.061", "ppl": "4.17", "wps": "365350", "ups": "3.08", "wpb": "118631", "bsz": "256", "num_updates": "674000", "lr": "0.000329293", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "220005"} +[2022-08-03 14:35:57,347][train_inner][INFO] - {"epoch": 14, "update": 13.099, "loss": "2.062", "ppl": "4.18", "wps": "365151", "ups": "3.08", "wpb": "118720", "bsz": "256", "num_updates": "674200", "lr": "0.000329091", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "220070"} +[2022-08-03 14:37:02,851][train_inner][INFO] - {"epoch": 14, "update": 13.103, "loss": "2.07", "ppl": "4.2", "wps": "361856", "ups": "3.05", "wpb": "118514", "bsz": "256", "num_updates": "674400", "lr": "0.000328889", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "220135"} +[2022-08-03 14:38:07,608][train_inner][INFO] - {"epoch": 14, "update": 13.107, "loss": "2.061", "ppl": "4.17", "wps": "363648", "ups": "3.09", "wpb": "117741", "bsz": "256", "num_updates": "674600", "lr": "0.000328687", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "220200"} +[2022-08-03 14:39:12,784][train_inner][INFO] - {"epoch": 14, "update": 13.111, "loss": "2.063", "ppl": "4.18", "wps": "363232", "ups": "3.07", "wpb": "118370", "bsz": "256", "num_updates": "674800", "lr": "0.000328485", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "220265"} +[2022-08-03 14:40:17,961][train_inner][INFO] - {"epoch": 14, "update": 13.115, "loss": "2.062", "ppl": "4.18", "wps": "362070", "ups": "3.07", "wpb": "117991", "bsz": "256", "num_updates": "675000", "lr": "0.000328283", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "220330"} +[2022-08-03 14:41:22,835][train_inner][INFO] - {"epoch": 14, "update": 13.119, "loss": "2.069", "ppl": "4.2", "wps": "360922", "ups": "3.08", "wpb": "117071", "bsz": "256", "num_updates": "675200", "lr": "0.000328081", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "220395"} +[2022-08-03 14:42:27,563][train_inner][INFO] - {"epoch": 14, "update": 13.123, "loss": "2.065", "ppl": "4.18", "wps": "363787", "ups": "3.09", "wpb": "117734", "bsz": "256", "num_updates": "675400", "lr": "0.000327879", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "220460"} +[2022-08-03 14:43:32,537][train_inner][INFO] - {"epoch": 14, "update": 13.127, "loss": "2.06", "ppl": "4.17", "wps": "363754", "ups": "3.08", "wpb": "118171", "bsz": "256", "num_updates": "675600", "lr": "0.000327677", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "220525"} +[2022-08-03 14:43:40,185][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 14:44:37,718][train_inner][INFO] - {"epoch": 14, "update": 13.13, "loss": "2.062", "ppl": "4.17", "wps": "363537", "ups": "3.07", "wpb": "118475", "bsz": "256", "num_updates": "675800", "lr": "0.000327475", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "220590"} +[2022-08-03 14:45:43,596][train_inner][INFO] - {"epoch": 14, "update": 13.134, "loss": "2.062", "ppl": "4.18", "wps": "358268", "ups": "3.04", "wpb": "118010", "bsz": "256", "num_updates": "676000", "lr": "0.000327273", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.4", "wall": "220656"} +[2022-08-03 14:46:48,437][train_inner][INFO] - {"epoch": 14, "update": 13.138, "loss": "2.062", "ppl": "4.18", "wps": "364921", "ups": "3.08", "wpb": "118307", "bsz": "256", "num_updates": "676200", "lr": "0.000327071", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "220721"} +[2022-08-03 14:47:53,431][train_inner][INFO] - {"epoch": 14, "update": 13.142, "loss": "2.056", "ppl": "4.16", "wps": "364677", "ups": "3.08", "wpb": "118508", "bsz": "256", "num_updates": "676400", "lr": "0.000326869", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "220786"} +[2022-08-03 14:48:59,492][train_inner][INFO] - {"epoch": 14, "update": 13.146, "loss": "2.06", "ppl": "4.17", "wps": "358118", "ups": "3.03", "wpb": "118286", "bsz": "256", "num_updates": "676600", "lr": "0.000326667", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.5", "wall": "220852"} +[2022-08-03 14:50:04,659][train_inner][INFO] - {"epoch": 14, "update": 13.15, "loss": "2.062", "ppl": "4.18", "wps": "364248", "ups": "3.07", "wpb": "118682", "bsz": "256", "num_updates": "676800", "lr": "0.000326465", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "220917"} +[2022-08-03 14:51:09,509][train_inner][INFO] - {"epoch": 14, "update": 13.154, "loss": "2.055", "ppl": "4.16", "wps": "366406", "ups": "3.08", "wpb": "118806", "bsz": "256", "num_updates": "677000", "lr": "0.000326263", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "220982"} +[2022-08-03 14:52:15,446][train_inner][INFO] - {"epoch": 14, "update": 13.158, "loss": "2.064", "ppl": "4.18", "wps": "357161", "ups": "3.03", "wpb": "117748", "bsz": "256", "num_updates": "677200", "lr": "0.000326061", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "27.9", "wall": "221048"} +[2022-08-03 14:53:20,256][train_inner][INFO] - {"epoch": 14, "update": 13.162, "loss": "2.06", "ppl": "4.17", "wps": "364618", "ups": "3.09", "wpb": "118154", "bsz": "256", "num_updates": "677400", "lr": "0.000325859", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "221113"} +[2022-08-03 14:54:24,991][train_inner][INFO] - {"epoch": 14, "update": 13.165, "loss": "2.066", "ppl": "4.19", "wps": "362840", "ups": "3.09", "wpb": "117440", "bsz": "256", "num_updates": "677600", "lr": "0.000325657", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "221177"} +[2022-08-03 14:54:55,621][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 14:55:30,322][train_inner][INFO] - {"epoch": 14, "update": 13.169, "loss": "2.062", "ppl": "4.18", "wps": "362502", "ups": "3.06", "wpb": "118411", "bsz": "256", "num_updates": "677800", "lr": "0.000325455", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "221243"} +[2022-08-03 14:56:35,323][train_inner][INFO] - {"epoch": 14, "update": 13.173, "loss": "2.055", "ppl": "4.16", "wps": "364763", "ups": "3.08", "wpb": "118548", "bsz": "256", "num_updates": "678000", "lr": "0.000325253", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "221308"} +[2022-08-03 14:57:40,303][train_inner][INFO] - {"epoch": 14, "update": 13.177, "loss": "2.057", "ppl": "4.16", "wps": "364859", "ups": "3.08", "wpb": "118542", "bsz": "256", "num_updates": "678200", "lr": "0.000325051", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "221373"} +[2022-08-03 14:58:45,290][train_inner][INFO] - {"epoch": 14, "update": 13.181, "loss": "2.057", "ppl": "4.16", "wps": "363297", "ups": "3.08", "wpb": "118046", "bsz": "256", "num_updates": "678400", "lr": "0.000324848", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "221438"} +[2022-08-03 14:59:50,021][train_inner][INFO] - {"epoch": 14, "update": 13.185, "loss": "2.065", "ppl": "4.18", "wps": "364296", "ups": "3.09", "wpb": "117903", "bsz": "256", "num_updates": "678600", "lr": "0.000324646", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "221502"} +[2022-08-03 15:00:54,983][train_inner][INFO] - {"epoch": 14, "update": 13.189, "loss": "2.064", "ppl": "4.18", "wps": "362148", "ups": "3.08", "wpb": "117628", "bsz": "256", "num_updates": "678800", "lr": "0.000324444", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "221567"} +[2022-08-03 15:02:00,830][train_inner][INFO] - {"epoch": 14, "update": 13.193, "loss": "2.059", "ppl": "4.17", "wps": "359535", "ups": "3.04", "wpb": "118369", "bsz": "256", "num_updates": "679000", "lr": "0.000324242", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "221633"} +[2022-08-03 15:03:05,847][train_inner][INFO] - {"epoch": 14, "update": 13.197, "loss": "2.057", "ppl": "4.16", "wps": "363052", "ups": "3.08", "wpb": "118022", "bsz": "256", "num_updates": "679200", "lr": "0.00032404", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "221698"} +[2022-08-03 15:03:23,404][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 15:04:10,785][train_inner][INFO] - {"epoch": 14, "update": 13.2, "loss": "2.061", "ppl": "4.17", "wps": "365820", "ups": "3.08", "wpb": "118776", "bsz": "256", "num_updates": "679400", "lr": "0.000323838", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "221763"} +[2022-08-03 15:05:15,931][train_inner][INFO] - {"epoch": 14, "update": 13.204, "loss": "2.06", "ppl": "4.17", "wps": "365745", "ups": "3.07", "wpb": "119132", "bsz": "256", "num_updates": "679600", "lr": "0.000323636", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "221828"} +[2022-08-03 15:06:21,168][train_inner][INFO] - {"epoch": 14, "update": 13.208, "loss": "2.059", "ppl": "4.17", "wps": "363348", "ups": "3.07", "wpb": "118518", "bsz": "256", "num_updates": "679800", "lr": "0.000323434", "gnorm": "0.729", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "221893"} +[2022-08-03 15:07:25,955][train_inner][INFO] - {"epoch": 14, "update": 13.212, "loss": "2.056", "ppl": "4.16", "wps": "363978", "ups": "3.09", "wpb": "117904", "bsz": "256", "num_updates": "680000", "lr": "0.000323232", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.6", "wall": "221958"} +[2022-08-03 15:08:30,851][train_inner][INFO] - {"epoch": 14, "update": 13.216, "loss": "2.064", "ppl": "4.18", "wps": "362510", "ups": "3.08", "wpb": "117625", "bsz": "256", "num_updates": "680200", "lr": "0.00032303", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "222023"} +[2022-08-03 15:09:35,909][train_inner][INFO] - {"epoch": 14, "update": 13.22, "loss": "2.068", "ppl": "4.19", "wps": "364030", "ups": "3.07", "wpb": "118414", "bsz": "256", "num_updates": "680400", "lr": "0.000322828", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "222088"} +[2022-08-03 15:10:40,733][train_inner][INFO] - {"epoch": 14, "update": 13.224, "loss": "2.057", "ppl": "4.16", "wps": "363970", "ups": "3.09", "wpb": "117968", "bsz": "256", "num_updates": "680600", "lr": "0.000322626", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "222153"} +[2022-08-03 15:11:45,163][train_inner][INFO] - {"epoch": 14, "update": 13.228, "loss": "2.063", "ppl": "4.18", "wps": "366576", "ups": "3.1", "wpb": "118091", "bsz": "256", "num_updates": "680800", "lr": "0.000322424", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.4", "wall": "222217"} +[2022-08-03 15:12:50,316][train_inner][INFO] - {"epoch": 14, "update": 13.232, "loss": "2.058", "ppl": "4.16", "wps": "363233", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "681000", "lr": "0.000322222", "gnorm": "0.734", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "222283"} +[2022-08-03 15:13:56,095][train_inner][INFO] - {"epoch": 14, "update": 13.235, "loss": "2.057", "ppl": "4.16", "wps": "360758", "ups": "3.04", "wpb": "118649", "bsz": "256", "num_updates": "681200", "lr": "0.00032202", "gnorm": "0.73", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "222348"} +[2022-08-03 15:15:01,131][train_inner][INFO] - {"epoch": 14, "update": 13.239, "loss": "2.059", "ppl": "4.17", "wps": "364274", "ups": "3.08", "wpb": "118453", "bsz": "256", "num_updates": "681400", "lr": "0.000321818", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "222413"} +[2022-08-03 15:16:06,042][train_inner][INFO] - {"epoch": 14, "update": 13.243, "loss": "2.058", "ppl": "4.16", "wps": "364949", "ups": "3.08", "wpb": "118444", "bsz": "256", "num_updates": "681600", "lr": "0.000321616", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.5", "wall": "222478"} +[2022-08-03 15:17:10,553][train_inner][INFO] - {"epoch": 14, "update": 13.247, "loss": "2.057", "ppl": "4.16", "wps": "369586", "ups": "3.1", "wpb": "119210", "bsz": "256", "num_updates": "681800", "lr": "0.000321414", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "222543"} +[2022-08-03 15:18:15,330][train_inner][INFO] - {"epoch": 14, "update": 13.251, "loss": "2.061", "ppl": "4.17", "wps": "363906", "ups": "3.09", "wpb": "117862", "bsz": "256", "num_updates": "682000", "lr": "0.000321212", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "222608"} +[2022-08-03 15:19:19,932][train_inner][INFO] - {"epoch": 14, "update": 13.255, "loss": "2.057", "ppl": "4.16", "wps": "367105", "ups": "3.1", "wpb": "118577", "bsz": "256", "num_updates": "682200", "lr": "0.00032101", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "222672"} +[2022-08-03 15:20:24,611][train_inner][INFO] - {"epoch": 14, "update": 13.259, "loss": "2.055", "ppl": "4.16", "wps": "367293", "ups": "3.09", "wpb": "118778", "bsz": "256", "num_updates": "682400", "lr": "0.000320808", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "222737"} +[2022-08-03 15:21:29,504][train_inner][INFO] - {"epoch": 14, "update": 13.263, "loss": "2.061", "ppl": "4.17", "wps": "363431", "ups": "3.08", "wpb": "117920", "bsz": "256", "num_updates": "682600", "lr": "0.000320606", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "222802"} +[2022-08-03 15:22:34,312][train_inner][INFO] - {"epoch": 14, "update": 13.266, "loss": "2.062", "ppl": "4.18", "wps": "365696", "ups": "3.09", "wpb": "118497", "bsz": "256", "num_updates": "682800", "lr": "0.000320404", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "222867"} +[2022-08-03 15:23:39,267][train_inner][INFO] - {"epoch": 14, "update": 13.27, "loss": "2.067", "ppl": "4.19", "wps": "362369", "ups": "3.08", "wpb": "117687", "bsz": "256", "num_updates": "683000", "lr": "0.000320202", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "222932"} +[2022-08-03 15:24:44,035][train_inner][INFO] - {"epoch": 14, "update": 13.274, "loss": "2.064", "ppl": "4.18", "wps": "364318", "ups": "3.09", "wpb": "117979", "bsz": "256", "num_updates": "683200", "lr": "0.00032", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "222996"} +[2022-08-03 15:25:49,286][train_inner][INFO] - {"epoch": 14, "update": 13.278, "loss": "2.059", "ppl": "4.17", "wps": "362179", "ups": "3.07", "wpb": "118160", "bsz": "256", "num_updates": "683400", "lr": "0.000319798", "gnorm": "0.732", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.3", "wall": "223062"} +[2022-08-03 15:26:01,943][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 15:26:54,800][train_inner][INFO] - {"epoch": 14, "update": 13.282, "loss": "2.057", "ppl": "4.16", "wps": "360899", "ups": "3.05", "wpb": "118218", "bsz": "256", "num_updates": "683600", "lr": "0.000319596", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "223127"} +[2022-08-03 15:27:59,317][train_inner][INFO] - {"epoch": 14, "update": 13.286, "loss": "2.059", "ppl": "4.17", "wps": "366798", "ups": "3.1", "wpb": "118323", "bsz": "256", "num_updates": "683800", "lr": "0.000319394", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "223192"} +[2022-08-03 15:29:04,568][train_inner][INFO] - {"epoch": 14, "update": 13.29, "loss": "2.061", "ppl": "4.17", "wps": "362639", "ups": "3.07", "wpb": "118312", "bsz": "256", "num_updates": "684000", "lr": "0.000319192", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "223257"} +[2022-08-03 15:30:09,572][train_inner][INFO] - {"epoch": 14, "update": 13.294, "loss": "2.057", "ppl": "4.16", "wps": "363272", "ups": "3.08", "wpb": "118070", "bsz": "256", "num_updates": "684200", "lr": "0.00031899", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "223322"} +[2022-08-03 15:31:14,126][train_inner][INFO] - {"epoch": 14, "update": 13.298, "loss": "2.059", "ppl": "4.17", "wps": "366062", "ups": "3.1", "wpb": "118151", "bsz": "256", "num_updates": "684400", "lr": "0.000318788", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.3", "wall": "223386"} +[2022-08-03 15:32:19,368][train_inner][INFO] - {"epoch": 14, "update": 13.301, "loss": "2.055", "ppl": "4.16", "wps": "364916", "ups": "3.07", "wpb": "119038", "bsz": "256", "num_updates": "684600", "lr": "0.000318586", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "223452"} +[2022-08-03 15:33:24,534][train_inner][INFO] - {"epoch": 14, "update": 13.305, "loss": "2.055", "ppl": "4.16", "wps": "363174", "ups": "3.07", "wpb": "118330", "bsz": "255.9", "num_updates": "684800", "lr": "0.000318384", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "223517"} +[2022-08-03 15:34:29,809][train_inner][INFO] - {"epoch": 14, "update": 13.309, "loss": "2.059", "ppl": "4.17", "wps": "362375", "ups": "3.06", "wpb": "118269", "bsz": "256", "num_updates": "685000", "lr": "0.000318182", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "223582"} +[2022-08-03 15:35:34,207][train_inner][INFO] - {"epoch": 14, "update": 13.313, "loss": "2.062", "ppl": "4.18", "wps": "366107", "ups": "3.11", "wpb": "117882", "bsz": "256", "num_updates": "685200", "lr": "0.00031798", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "223647"} +[2022-08-03 15:36:39,050][train_inner][INFO] - {"epoch": 14, "update": 13.317, "loss": "2.056", "ppl": "4.16", "wps": "365584", "ups": "3.08", "wpb": "118525", "bsz": "256", "num_updates": "685400", "lr": "0.000317778", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "223711"} +[2022-08-03 15:37:41,582][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 15:37:44,256][train_inner][INFO] - {"epoch": 14, "update": 13.321, "loss": "2.059", "ppl": "4.17", "wps": "360752", "ups": "3.07", "wpb": "117614", "bsz": "256", "num_updates": "685600", "lr": "0.000317576", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "223777"} +[2022-08-03 15:38:49,059][train_inner][INFO] - {"epoch": 14, "update": 13.325, "loss": "2.054", "ppl": "4.15", "wps": "364730", "ups": "3.09", "wpb": "118178", "bsz": "256", "num_updates": "685800", "lr": "0.000317374", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "223841"} +[2022-08-03 15:39:54,951][train_inner][INFO] - {"epoch": 14, "update": 13.329, "loss": "2.052", "ppl": "4.15", "wps": "358572", "ups": "3.04", "wpb": "118132", "bsz": "256", "num_updates": "686000", "lr": "0.000317172", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "26.4", "wall": "223907"} +[2022-08-03 15:40:59,796][train_inner][INFO] - {"epoch": 14, "update": 13.333, "loss": "2.054", "ppl": "4.15", "wps": "366639", "ups": "3.08", "wpb": "118872", "bsz": "256", "num_updates": "686200", "lr": "0.00031697", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "223972"} +[2022-08-03 15:42:04,654][train_inner][INFO] - {"epoch": 14, "update": 13.336, "loss": "2.06", "ppl": "4.17", "wps": "365386", "ups": "3.08", "wpb": "118489", "bsz": "256", "num_updates": "686400", "lr": "0.000316768", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "224037"} +[2022-08-03 15:43:09,378][train_inner][INFO] - {"epoch": 14, "update": 13.34, "loss": "2.055", "ppl": "4.15", "wps": "365732", "ups": "3.09", "wpb": "118356", "bsz": "256", "num_updates": "686600", "lr": "0.000316566", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "224102"} +[2022-08-03 15:44:14,325][train_inner][INFO] - {"epoch": 14, "update": 13.344, "loss": "2.054", "ppl": "4.15", "wps": "364851", "ups": "3.08", "wpb": "118478", "bsz": "256", "num_updates": "686800", "lr": "0.000316364", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "224167"} +[2022-08-03 15:44:42,829][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 15:45:19,623][train_inner][INFO] - {"epoch": 14, "update": 13.348, "loss": "2.062", "ppl": "4.18", "wps": "361554", "ups": "3.06", "wpb": "118042", "bsz": "256", "num_updates": "687000", "lr": "0.000316162", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "224232"} +[2022-08-03 15:46:24,624][train_inner][INFO] - {"epoch": 14, "update": 13.352, "loss": "2.05", "ppl": "4.14", "wps": "366167", "ups": "3.08", "wpb": "119005", "bsz": "256", "num_updates": "687200", "lr": "0.00031596", "gnorm": "0.731", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "224297"} +[2022-08-03 15:47:29,277][train_inner][INFO] - {"epoch": 14, "update": 13.356, "loss": "2.056", "ppl": "4.16", "wps": "363271", "ups": "3.09", "wpb": "117431", "bsz": "256", "num_updates": "687400", "lr": "0.000315758", "gnorm": "0.75", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "224362"} +[2022-08-03 15:48:34,465][train_inner][INFO] - {"epoch": 14, "update": 13.36, "loss": "2.054", "ppl": "4.15", "wps": "364244", "ups": "3.07", "wpb": "118719", "bsz": "256", "num_updates": "687600", "lr": "0.000315556", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "224427"} +[2022-08-03 15:49:38,976][train_inner][INFO] - {"epoch": 14, "update": 13.364, "loss": "2.059", "ppl": "4.17", "wps": "366694", "ups": "3.1", "wpb": "118278", "bsz": "256", "num_updates": "687800", "lr": "0.000315354", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "224491"} +[2022-08-03 15:50:43,852][train_inner][INFO] - {"epoch": 14, "update": 13.368, "loss": "2.059", "ppl": "4.17", "wps": "364826", "ups": "3.08", "wpb": "118339", "bsz": "256", "num_updates": "688000", "lr": "0.000315152", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "224556"} +[2022-08-03 15:51:48,685][train_inner][INFO] - {"epoch": 14, "update": 13.371, "loss": "2.058", "ppl": "4.16", "wps": "364855", "ups": "3.08", "wpb": "118271", "bsz": "256", "num_updates": "688200", "lr": "0.000314949", "gnorm": "0.735", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "224621"} +[2022-08-03 15:52:53,868][train_inner][INFO] - {"epoch": 14, "update": 13.375, "loss": "2.059", "ppl": "4.17", "wps": "360792", "ups": "3.07", "wpb": "117587", "bsz": "256", "num_updates": "688400", "lr": "0.000314747", "gnorm": "0.735", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "224686"} +[2022-08-03 15:53:58,650][train_inner][INFO] - {"epoch": 14, "update": 13.379, "loss": "2.053", "ppl": "4.15", "wps": "365839", "ups": "3.09", "wpb": "118498", "bsz": "256", "num_updates": "688600", "lr": "0.000314545", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "224751"} +[2022-08-03 15:55:03,767][train_inner][INFO] - {"epoch": 14, "update": 13.383, "loss": "2.053", "ppl": "4.15", "wps": "364145", "ups": "3.07", "wpb": "118558", "bsz": "256", "num_updates": "688800", "lr": "0.000314343", "gnorm": "0.734", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "224816"} +[2022-08-03 15:56:08,682][train_inner][INFO] - {"epoch": 14, "update": 13.387, "loss": "2.061", "ppl": "4.17", "wps": "362033", "ups": "3.08", "wpb": "117506", "bsz": "256", "num_updates": "689000", "lr": "0.000314141", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "224881"} +[2022-08-03 15:57:13,483][train_inner][INFO] - {"epoch": 14, "update": 13.391, "loss": "2.056", "ppl": "4.16", "wps": "364472", "ups": "3.09", "wpb": "118089", "bsz": "256", "num_updates": "689200", "lr": "0.000313939", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "224946"} +[2022-08-03 15:58:18,477][train_inner][INFO] - {"epoch": 14, "update": 13.395, "loss": "2.052", "ppl": "4.15", "wps": "363479", "ups": "3.08", "wpb": "118118", "bsz": "256", "num_updates": "689400", "lr": "0.000313737", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "225011"} +[2022-08-03 15:59:23,202][train_inner][INFO] - {"epoch": 14, "update": 13.399, "loss": "2.057", "ppl": "4.16", "wps": "364476", "ups": "3.09", "wpb": "117952", "bsz": "256", "num_updates": "689600", "lr": "0.000313535", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "225076"} +[2022-08-03 16:00:28,269][train_inner][INFO] - {"epoch": 14, "update": 13.403, "loss": "2.061", "ppl": "4.17", "wps": "363338", "ups": "3.07", "wpb": "118205", "bsz": "256", "num_updates": "689800", "lr": "0.000313333", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "225141"} +[2022-08-03 16:01:33,004][train_inner][INFO] - {"epoch": 14, "update": 13.406, "loss": "2.051", "ppl": "4.14", "wps": "364729", "ups": "3.09", "wpb": "118050", "bsz": "256", "num_updates": "690000", "lr": "0.000313131", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.7", "wall": "225205"} +[2022-08-03 16:02:38,125][train_inner][INFO] - {"epoch": 14, "update": 13.41, "loss": "2.057", "ppl": "4.16", "wps": "364152", "ups": "3.07", "wpb": "118569", "bsz": "256", "num_updates": "690200", "lr": "0.000312929", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.3", "wall": "225270"} +[2022-08-03 16:03:42,883][train_inner][INFO] - {"epoch": 14, "update": 13.414, "loss": "2.055", "ppl": "4.16", "wps": "364111", "ups": "3.09", "wpb": "117893", "bsz": "256", "num_updates": "690400", "lr": "0.000312727", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "225335"} +[2022-08-03 16:04:47,915][train_inner][INFO] - {"epoch": 14, "update": 13.418, "loss": "2.051", "ppl": "4.14", "wps": "363969", "ups": "3.08", "wpb": "118346", "bsz": "256", "num_updates": "690600", "lr": "0.000312525", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.2", "wall": "225400"} +[2022-08-03 16:05:53,934][train_inner][INFO] - {"epoch": 14, "update": 13.422, "loss": "2.054", "ppl": "4.15", "wps": "358754", "ups": "3.03", "wpb": "118421", "bsz": "256", "num_updates": "690800", "lr": "0.000312323", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.9", "wall": "225466"} +[2022-08-03 16:06:55,739][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 16:06:59,174][train_inner][INFO] - {"epoch": 14, "update": 13.426, "loss": "2.048", "ppl": "4.14", "wps": "363329", "ups": "3.07", "wpb": "118516", "bsz": "256", "num_updates": "691000", "lr": "0.000312121", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "225531"} +[2022-08-03 16:08:04,057][train_inner][INFO] - {"epoch": 14, "update": 13.43, "loss": "2.048", "ppl": "4.14", "wps": "363615", "ups": "3.08", "wpb": "117962", "bsz": "256", "num_updates": "691200", "lr": "0.000311919", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "225596"} +[2022-08-03 16:09:08,774][train_inner][INFO] - {"epoch": 14, "update": 13.434, "loss": "2.055", "ppl": "4.16", "wps": "365733", "ups": "3.09", "wpb": "118344", "bsz": "256", "num_updates": "691400", "lr": "0.000311717", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "225661"} +[2022-08-03 16:09:41,757][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 16:10:13,822][train_inner][INFO] - {"epoch": 14, "update": 13.438, "loss": "2.058", "ppl": "4.16", "wps": "363022", "ups": "3.07", "wpb": "118066", "bsz": "256", "num_updates": "691600", "lr": "0.000311515", "gnorm": "0.738", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "225726"} +[2022-08-03 16:11:19,077][train_inner][INFO] - {"epoch": 14, "update": 13.441, "loss": "2.056", "ppl": "4.16", "wps": "362073", "ups": "3.06", "wpb": "118135", "bsz": "256", "num_updates": "691800", "lr": "0.000311313", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "225791"} +[2022-08-03 16:12:23,790][train_inner][INFO] - {"epoch": 14, "update": 13.445, "loss": "2.055", "ppl": "4.16", "wps": "366544", "ups": "3.09", "wpb": "118599", "bsz": "256", "num_updates": "692000", "lr": "0.000311111", "gnorm": "0.736", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "225856"} +[2022-08-03 16:13:28,831][train_inner][INFO] - {"epoch": 14, "update": 13.449, "loss": "2.054", "ppl": "4.15", "wps": "364087", "ups": "3.08", "wpb": "118400", "bsz": "256", "num_updates": "692200", "lr": "0.000310909", "gnorm": "0.735", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "225921"} +[2022-08-03 16:14:34,068][train_inner][INFO] - {"epoch": 14, "update": 13.453, "loss": "2.059", "ppl": "4.17", "wps": "363089", "ups": "3.07", "wpb": "118434", "bsz": "256", "num_updates": "692400", "lr": "0.000310707", "gnorm": "0.738", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "225986"} +[2022-08-03 16:15:39,044][train_inner][INFO] - {"epoch": 14, "update": 13.457, "loss": "2.055", "ppl": "4.16", "wps": "362350", "ups": "3.08", "wpb": "117717", "bsz": "256", "num_updates": "692600", "lr": "0.000310505", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "226051"} +[2022-08-03 16:16:44,151][train_inner][INFO] - {"epoch": 14, "update": 13.461, "loss": "2.049", "ppl": "4.14", "wps": "363686", "ups": "3.07", "wpb": "118391", "bsz": "256", "num_updates": "692800", "lr": "0.000310303", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "226116"} +[2022-08-03 16:17:49,098][train_inner][INFO] - {"epoch": 14, "update": 13.465, "loss": "2.055", "ppl": "4.16", "wps": "363901", "ups": "3.08", "wpb": "118170", "bsz": "256", "num_updates": "693000", "lr": "0.000310101", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "226181"} +[2022-08-03 16:18:54,324][train_inner][INFO] - {"epoch": 14, "update": 13.469, "loss": "2.054", "ppl": "4.15", "wps": "364850", "ups": "3.07", "wpb": "118987", "bsz": "256", "num_updates": "693200", "lr": "0.000309899", "gnorm": "0.736", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "226247"} +[2022-08-03 16:19:59,251][train_inner][INFO] - {"epoch": 14, "update": 13.472, "loss": "2.055", "ppl": "4.16", "wps": "364852", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "693400", "lr": "0.000309697", "gnorm": "0.738", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "226312"} +[2022-08-03 16:21:04,111][train_inner][INFO] - {"epoch": 14, "update": 13.476, "loss": "2.053", "ppl": "4.15", "wps": "364820", "ups": "3.08", "wpb": "118308", "bsz": "256", "num_updates": "693600", "lr": "0.000309495", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "226376"} +[2022-08-03 16:22:08,894][train_inner][INFO] - {"epoch": 14, "update": 13.48, "loss": "2.052", "ppl": "4.15", "wps": "365366", "ups": "3.09", "wpb": "118346", "bsz": "256", "num_updates": "693800", "lr": "0.000309293", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "226441"} +[2022-08-03 16:23:13,975][train_inner][INFO] - {"epoch": 14, "update": 13.484, "loss": "2.052", "ppl": "4.15", "wps": "363494", "ups": "3.07", "wpb": "118282", "bsz": "256", "num_updates": "694000", "lr": "0.000309091", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "226506"} +[2022-08-03 16:24:19,131][train_inner][INFO] - {"epoch": 14, "update": 13.488, "loss": "2.052", "ppl": "4.15", "wps": "363016", "ups": "3.07", "wpb": "118260", "bsz": "256", "num_updates": "694200", "lr": "0.000308889", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "226571"} +[2022-08-03 16:25:25,014][train_inner][INFO] - {"epoch": 14, "update": 13.492, "loss": "2.05", "ppl": "4.14", "wps": "359106", "ups": "3.04", "wpb": "118294", "bsz": "256", "num_updates": "694400", "lr": "0.000308687", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.2", "wall": "226637"} +[2022-08-03 16:26:29,637][train_inner][INFO] - {"epoch": 14, "update": 13.496, "loss": "2.053", "ppl": "4.15", "wps": "365388", "ups": "3.09", "wpb": "118061", "bsz": "256", "num_updates": "694600", "lr": "0.000308485", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "226702"} +[2022-08-03 16:27:34,504][train_inner][INFO] - {"epoch": 14, "update": 13.5, "loss": "2.05", "ppl": "4.14", "wps": "365240", "ups": "3.08", "wpb": "118457", "bsz": "256", "num_updates": "694800", "lr": "0.000308283", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "226767"} +[2022-08-03 16:28:39,630][train_inner][INFO] - {"epoch": 14, "update": 13.504, "loss": "2.047", "ppl": "4.13", "wps": "364493", "ups": "3.07", "wpb": "118690", "bsz": "256", "num_updates": "695000", "lr": "0.000308081", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "226832"} +[2022-08-03 16:29:44,608][train_inner][INFO] - {"epoch": 14, "update": 13.507, "loss": "2.05", "ppl": "4.14", "wps": "365170", "ups": "3.08", "wpb": "118638", "bsz": "256", "num_updates": "695200", "lr": "0.000307879", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "226897"} +[2022-08-03 16:30:49,371][train_inner][INFO] - {"epoch": 14, "update": 13.511, "loss": "2.044", "ppl": "4.12", "wps": "364789", "ups": "3.09", "wpb": "118121", "bsz": "256", "num_updates": "695400", "lr": "0.000307677", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.2", "wall": "226962"} +[2022-08-03 16:31:54,531][train_inner][INFO] - {"epoch": 14, "update": 13.515, "loss": "2.053", "ppl": "4.15", "wps": "363378", "ups": "3.07", "wpb": "118387", "bsz": "256", "num_updates": "695600", "lr": "0.000307475", "gnorm": "0.74", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.5", "wall": "227027"} +[2022-08-03 16:32:02,719][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 16:32:59,785][train_inner][INFO] - {"epoch": 14, "update": 13.519, "loss": "2.048", "ppl": "4.13", "wps": "363795", "ups": "3.06", "wpb": "118694", "bsz": "256", "num_updates": "695800", "lr": "0.000307273", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "227092"} +[2022-08-03 16:34:04,331][train_inner][INFO] - {"epoch": 14, "update": 13.523, "loss": "2.05", "ppl": "4.14", "wps": "366389", "ups": "3.1", "wpb": "118243", "bsz": "256", "num_updates": "696000", "lr": "0.000307071", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "227157"} +[2022-08-03 16:35:10,199][train_inner][INFO] - {"epoch": 14, "update": 13.527, "loss": "2.05", "ppl": "4.14", "wps": "360142", "ups": "3.04", "wpb": "118607", "bsz": "256", "num_updates": "696200", "lr": "0.000306869", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "227223"} +[2022-08-03 16:36:15,194][train_inner][INFO] - {"epoch": 14, "update": 13.531, "loss": "2.06", "ppl": "4.17", "wps": "365708", "ups": "3.08", "wpb": "118845", "bsz": "256", "num_updates": "696400", "lr": "0.000306667", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "227288"} +[2022-08-03 16:37:20,142][train_inner][INFO] - {"epoch": 14, "update": 13.535, "loss": "2.049", "ppl": "4.14", "wps": "363907", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "696600", "lr": "0.000306465", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "227352"} +[2022-08-03 16:38:26,203][train_inner][INFO] - {"epoch": 14, "update": 13.539, "loss": "2.05", "ppl": "4.14", "wps": "358358", "ups": "3.03", "wpb": "118365", "bsz": "256", "num_updates": "696800", "lr": "0.000306263", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20", "wall": "227419"} +[2022-08-03 16:39:31,085][train_inner][INFO] - {"epoch": 14, "update": 13.542, "loss": "2.05", "ppl": "4.14", "wps": "366338", "ups": "3.08", "wpb": "118842", "bsz": "256", "num_updates": "697000", "lr": "0.000306061", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "227483"} +[2022-08-03 16:40:36,862][train_inner][INFO] - {"epoch": 14, "update": 13.546, "loss": "2.049", "ppl": "4.14", "wps": "358359", "ups": "3.04", "wpb": "117858", "bsz": "256", "num_updates": "697200", "lr": "0.000305859", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "227549"} +[2022-08-03 16:41:42,013][train_inner][INFO] - {"epoch": 14, "update": 13.55, "loss": "2.053", "ppl": "4.15", "wps": "362521", "ups": "3.07", "wpb": "118091", "bsz": "256", "num_updates": "697400", "lr": "0.000305657", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25", "wall": "227614"} +[2022-08-03 16:42:46,690][train_inner][INFO] - {"epoch": 14, "update": 13.554, "loss": "2.047", "ppl": "4.13", "wps": "366138", "ups": "3.09", "wpb": "118401", "bsz": "256", "num_updates": "697600", "lr": "0.000305455", "gnorm": "0.741", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "227679"} +[2022-08-03 16:43:13,282][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 16:43:52,199][train_inner][INFO] - {"epoch": 14, "update": 13.558, "loss": "2.041", "ppl": "4.11", "wps": "361612", "ups": "3.05", "wpb": "118442", "bsz": "256", "num_updates": "697800", "lr": "0.000305253", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "227745"} +[2022-08-03 16:44:56,965][train_inner][INFO] - {"epoch": 14, "update": 13.562, "loss": "2.048", "ppl": "4.14", "wps": "364910", "ups": "3.09", "wpb": "118168", "bsz": "256", "num_updates": "698000", "lr": "0.000305051", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.6", "wall": "227809"} +[2022-08-03 16:46:01,539][train_inner][INFO] - {"epoch": 14, "update": 13.566, "loss": "2.048", "ppl": "4.14", "wps": "366225", "ups": "3.1", "wpb": "118241", "bsz": "256", "num_updates": "698200", "lr": "0.000304848", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "227874"} +[2022-08-03 16:47:06,334][train_inner][INFO] - {"epoch": 14, "update": 13.57, "loss": "2.051", "ppl": "4.15", "wps": "365374", "ups": "3.09", "wpb": "118371", "bsz": "256", "num_updates": "698400", "lr": "0.000304646", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "227939"} +[2022-08-03 16:48:11,356][train_inner][INFO] - {"epoch": 14, "update": 13.573, "loss": "2.051", "ppl": "4.14", "wps": "364632", "ups": "3.08", "wpb": "118543", "bsz": "256", "num_updates": "698600", "lr": "0.000304444", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "228004"} +[2022-08-03 16:49:15,799][train_inner][INFO] - {"epoch": 14, "update": 13.577, "loss": "2.044", "ppl": "4.12", "wps": "366205", "ups": "3.1", "wpb": "117995", "bsz": "256", "num_updates": "698800", "lr": "0.000304242", "gnorm": "0.741", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "228068"} +[2022-08-03 16:50:21,019][train_inner][INFO] - {"epoch": 14, "update": 13.581, "loss": "2.046", "ppl": "4.13", "wps": "362803", "ups": "3.07", "wpb": "118310", "bsz": "256", "num_updates": "699000", "lr": "0.00030404", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "228133"} +[2022-08-03 16:51:27,306][train_inner][INFO] - {"epoch": 14, "update": 13.585, "loss": "2.051", "ppl": "4.15", "wps": "358163", "ups": "3.02", "wpb": "118705", "bsz": "256", "num_updates": "699200", "lr": "0.000303838", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.7", "wall": "228200"} +[2022-08-03 16:52:31,760][train_inner][INFO] - {"epoch": 14, "update": 13.589, "loss": "2.054", "ppl": "4.15", "wps": "364812", "ups": "3.1", "wpb": "117567", "bsz": "256", "num_updates": "699400", "lr": "0.000303636", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "228264"} +[2022-08-03 16:53:37,985][train_inner][INFO] - {"epoch": 14, "update": 13.593, "loss": "2.05", "ppl": "4.14", "wps": "358029", "ups": "3.02", "wpb": "118550", "bsz": "256", "num_updates": "699600", "lr": "0.000303434", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21", "wall": "228330"} +[2022-08-03 16:54:38,880][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 16:54:43,444][train_inner][INFO] - {"epoch": 14, "update": 13.597, "loss": "2.052", "ppl": "4.15", "wps": "361968", "ups": "3.06", "wpb": "118469", "bsz": "256", "num_updates": "699800", "lr": "0.000303232", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "228396"} +[2022-08-03 16:55:48,040][train_inner][INFO] - {"epoch": 14, "update": 13.601, "loss": "2.053", "ppl": "4.15", "wps": "364054", "ups": "3.1", "wpb": "117580", "bsz": "256", "num_updates": "700000", "lr": "0.00030303", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "228460"} +[2022-08-03 16:55:48,041][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-03 16:56:10,827][valid][INFO] - {"epoch": 14, "valid_loss": "1.958", "valid_ppl": "3.88", "valid_wps": "1.57266e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "700000", "valid_best_loss": "1.958"} +[2022-08-03 16:56:10,831][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 14 @ 700000 updates +[2022-08-03 16:56:10,831][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_14_700000.pt +[2022-08-03 16:56:23,650][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_14_700000.pt +[2022-08-03 16:56:53,272][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_14_700000.pt (epoch 14 @ 700000 updates, score 1.958) (writing took 42.4408537838608 seconds) +[2022-08-03 16:57:58,336][train_inner][INFO] - {"epoch": 14, "update": 13.605, "loss": "2.055", "ppl": "4.16", "wps": "180829", "ups": "1.53", "wpb": "117805", "bsz": "256", "num_updates": "700200", "lr": "0.000302828", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "228591"} +[2022-08-03 16:58:48,529][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 16:59:04,451][train_inner][INFO] - {"epoch": 14, "update": 13.608, "loss": "2.051", "ppl": "4.14", "wps": "356976", "ups": "3.03", "wpb": "118006", "bsz": "256", "num_updates": "700400", "lr": "0.000302626", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "228657"} +[2022-08-03 17:00:10,512][train_inner][INFO] - {"epoch": 14, "update": 13.612, "loss": "2.052", "ppl": "4.15", "wps": "359539", "ups": "3.03", "wpb": "118757", "bsz": "256", "num_updates": "700600", "lr": "0.000302424", "gnorm": "0.747", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.9", "wall": "228723"} +[2022-08-03 17:01:16,728][train_inner][INFO] - {"epoch": 14, "update": 13.616, "loss": "2.043", "ppl": "4.12", "wps": "356934", "ups": "3.02", "wpb": "118171", "bsz": "256", "num_updates": "700800", "lr": "0.000302222", "gnorm": "0.75", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.2", "wall": "228789"} +[2022-08-03 17:02:22,632][train_inner][INFO] - {"epoch": 14, "update": 13.62, "loss": "2.06", "ppl": "4.17", "wps": "356767", "ups": "3.03", "wpb": "117560", "bsz": "256", "num_updates": "701000", "lr": "0.00030202", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "228855"} +[2022-08-03 17:03:28,717][train_inner][INFO] - {"epoch": 14, "update": 13.624, "loss": "2.045", "ppl": "4.13", "wps": "359954", "ups": "3.03", "wpb": "118937", "bsz": "256", "num_updates": "701200", "lr": "0.000301818", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.1", "wall": "228921"} +[2022-08-03 17:04:33,317][train_inner][INFO] - {"epoch": 14, "update": 13.628, "loss": "2.048", "ppl": "4.14", "wps": "365777", "ups": "3.1", "wpb": "118144", "bsz": "256", "num_updates": "701400", "lr": "0.000301616", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "228986"} +[2022-08-03 17:05:38,225][train_inner][INFO] - {"epoch": 14, "update": 13.632, "loss": "2.054", "ppl": "4.15", "wps": "365108", "ups": "3.08", "wpb": "118490", "bsz": "256", "num_updates": "701600", "lr": "0.000301414", "gnorm": "0.74", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "229051"} +[2022-08-03 17:06:43,298][train_inner][INFO] - {"epoch": 14, "update": 13.636, "loss": "2.044", "ppl": "4.12", "wps": "363069", "ups": "3.07", "wpb": "118129", "bsz": "256", "num_updates": "701800", "lr": "0.000301212", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "229116"} +[2022-08-03 17:07:48,094][train_inner][INFO] - {"epoch": 14, "update": 13.64, "loss": "2.052", "ppl": "4.15", "wps": "366622", "ups": "3.09", "wpb": "118776", "bsz": "256", "num_updates": "702000", "lr": "0.00030101", "gnorm": "0.741", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "229180"} +[2022-08-03 17:08:52,895][train_inner][INFO] - {"epoch": 14, "update": 13.643, "loss": "2.043", "ppl": "4.12", "wps": "365110", "ups": "3.09", "wpb": "118296", "bsz": "256", "num_updates": "702200", "lr": "0.000300808", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "229245"} +[2022-08-03 17:09:58,006][train_inner][INFO] - {"epoch": 14, "update": 13.647, "loss": "2.049", "ppl": "4.14", "wps": "363423", "ups": "3.07", "wpb": "118313", "bsz": "256", "num_updates": "702400", "lr": "0.000300606", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "229310"} +[2022-08-03 17:11:03,121][train_inner][INFO] - {"epoch": 14, "update": 13.651, "loss": "2.046", "ppl": "4.13", "wps": "365096", "ups": "3.07", "wpb": "118864", "bsz": "256", "num_updates": "702600", "lr": "0.000300404", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "229375"} +[2022-08-03 17:12:08,156][train_inner][INFO] - {"epoch": 14, "update": 13.655, "loss": "2.047", "ppl": "4.13", "wps": "364247", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "702800", "lr": "0.000300202", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "229440"} +[2022-08-03 17:13:12,953][train_inner][INFO] - {"epoch": 14, "update": 13.659, "loss": "2.048", "ppl": "4.14", "wps": "366438", "ups": "3.09", "wpb": "118720", "bsz": "256", "num_updates": "703000", "lr": "0.0003", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "229505"} +[2022-08-03 17:14:17,689][train_inner][INFO] - {"epoch": 14, "update": 13.663, "loss": "2.045", "ppl": "4.13", "wps": "367058", "ups": "3.09", "wpb": "118808", "bsz": "256", "num_updates": "703200", "lr": "0.000299798", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "229570"} +[2022-08-03 17:15:23,779][train_inner][INFO] - {"epoch": 14, "update": 13.667, "loss": "2.046", "ppl": "4.13", "wps": "355390", "ups": "3.03", "wpb": "117436", "bsz": "256", "num_updates": "703400", "lr": "0.000299596", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "24.3", "wall": "229636"} +[2022-08-03 17:16:28,836][train_inner][INFO] - {"epoch": 14, "update": 13.671, "loss": "2.05", "ppl": "4.14", "wps": "363062", "ups": "3.07", "wpb": "118096", "bsz": "255.9", "num_updates": "703600", "lr": "0.000299394", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "229701"} +[2022-08-03 17:17:33,938][train_inner][INFO] - {"epoch": 14, "update": 13.675, "loss": "2.046", "ppl": "4.13", "wps": "364100", "ups": "3.07", "wpb": "118517", "bsz": "256", "num_updates": "703800", "lr": "0.000299192", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "229766"} +[2022-08-03 17:18:40,072][train_inner][INFO] - {"epoch": 14, "update": 13.678, "loss": "2.053", "ppl": "4.15", "wps": "357706", "ups": "3.02", "wpb": "118281", "bsz": "256", "num_updates": "704000", "lr": "0.00029899", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.6", "wall": "229832"} +[2022-08-03 17:19:45,187][train_inner][INFO] - {"epoch": 14, "update": 13.682, "loss": "2.044", "ppl": "4.12", "wps": "363765", "ups": "3.07", "wpb": "118431", "bsz": "256", "num_updates": "704200", "lr": "0.000298788", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "229898"} +[2022-08-03 17:20:50,142][train_inner][INFO] - {"epoch": 14, "update": 13.686, "loss": "2.046", "ppl": "4.13", "wps": "364880", "ups": "3.08", "wpb": "118502", "bsz": "256", "num_updates": "704400", "lr": "0.000298586", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "229962"} +[2022-08-03 17:21:08,137][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 17:21:55,625][train_inner][INFO] - {"epoch": 14, "update": 13.69, "loss": "2.044", "ppl": "4.12", "wps": "359759", "ups": "3.05", "wpb": "117788", "bsz": "256", "num_updates": "704600", "lr": "0.000298384", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "230028"} +[2022-08-03 17:22:13,120][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 17:23:01,938][train_inner][INFO] - {"epoch": 14, "update": 13.694, "loss": "2.048", "ppl": "4.14", "wps": "358397", "ups": "3.02", "wpb": "118831", "bsz": "256", "num_updates": "704800", "lr": "0.000298182", "gnorm": "0.745", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.4", "wall": "230094"} +[2022-08-03 17:24:06,679][train_inner][INFO] - {"epoch": 14, "update": 13.698, "loss": "2.052", "ppl": "4.15", "wps": "362968", "ups": "3.09", "wpb": "117494", "bsz": "256", "num_updates": "705000", "lr": "0.00029798", "gnorm": "0.755", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "230159"} +[2022-08-03 17:25:11,593][train_inner][INFO] - {"epoch": 14, "update": 13.702, "loss": "2.048", "ppl": "4.14", "wps": "364575", "ups": "3.08", "wpb": "118328", "bsz": "256", "num_updates": "705200", "lr": "0.000297778", "gnorm": "0.741", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "230224"} +[2022-08-03 17:26:16,792][train_inner][INFO] - {"epoch": 14, "update": 13.706, "loss": "2.045", "ppl": "4.13", "wps": "363247", "ups": "3.07", "wpb": "118415", "bsz": "256", "num_updates": "705400", "lr": "0.000297576", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "230289"} +[2022-08-03 17:27:21,778][train_inner][INFO] - {"epoch": 14, "update": 13.71, "loss": "2.048", "ppl": "4.13", "wps": "364195", "ups": "3.08", "wpb": "118337", "bsz": "256", "num_updates": "705600", "lr": "0.000297374", "gnorm": "0.746", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "230354"} +[2022-08-03 17:28:26,812][train_inner][INFO] - {"epoch": 14, "update": 13.713, "loss": "2.047", "ppl": "4.13", "wps": "364195", "ups": "3.08", "wpb": "118423", "bsz": "256", "num_updates": "705800", "lr": "0.000297172", "gnorm": "0.741", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "230419"} +[2022-08-03 17:29:31,812][train_inner][INFO] - {"epoch": 14, "update": 13.717, "loss": "2.047", "ppl": "4.13", "wps": "365342", "ups": "3.08", "wpb": "118735", "bsz": "256", "num_updates": "706000", "lr": "0.00029697", "gnorm": "0.74", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "230484"} +[2022-08-03 17:30:36,641][train_inner][INFO] - {"epoch": 14, "update": 13.721, "loss": "2.046", "ppl": "4.13", "wps": "366971", "ups": "3.09", "wpb": "118951", "bsz": "256", "num_updates": "706200", "lr": "0.000296768", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "230549"} +[2022-08-03 17:31:41,738][train_inner][INFO] - {"epoch": 14, "update": 13.725, "loss": "2.045", "ppl": "4.13", "wps": "363683", "ups": "3.07", "wpb": "118370", "bsz": "256", "num_updates": "706400", "lr": "0.000296566", "gnorm": "0.753", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "230614"} +[2022-08-03 17:32:46,415][train_inner][INFO] - {"epoch": 14, "update": 13.729, "loss": "2.046", "ppl": "4.13", "wps": "364708", "ups": "3.09", "wpb": "117940", "bsz": "256", "num_updates": "706600", "lr": "0.000296364", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "230679"} +[2022-08-03 17:33:51,275][train_inner][INFO] - {"epoch": 14, "update": 13.733, "loss": "2.041", "ppl": "4.12", "wps": "364813", "ups": "3.08", "wpb": "118306", "bsz": "256", "num_updates": "706800", "lr": "0.000296162", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "230744"} +[2022-08-03 17:34:56,273][train_inner][INFO] - {"epoch": 14, "update": 13.737, "loss": "2.047", "ppl": "4.13", "wps": "363672", "ups": "3.08", "wpb": "118189", "bsz": "256", "num_updates": "707000", "lr": "0.00029596", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.1", "wall": "230809"} +[2022-08-03 17:36:01,202][train_inner][INFO] - {"epoch": 14, "update": 13.741, "loss": "2.054", "ppl": "4.15", "wps": "367049", "ups": "3.08", "wpb": "119160", "bsz": "256", "num_updates": "707200", "lr": "0.000295758", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.6", "wall": "230874"} +[2022-08-03 17:37:05,689][train_inner][INFO] - {"epoch": 14, "update": 13.744, "loss": "2.049", "ppl": "4.14", "wps": "365486", "ups": "3.1", "wpb": "117844", "bsz": "256", "num_updates": "707400", "lr": "0.000295556", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.1", "wall": "230938"} +[2022-08-03 17:38:10,411][train_inner][INFO] - {"epoch": 14, "update": 13.748, "loss": "2.041", "ppl": "4.11", "wps": "365769", "ups": "3.09", "wpb": "118364", "bsz": "256", "num_updates": "707600", "lr": "0.000295354", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "231003"} +[2022-08-03 17:39:15,183][train_inner][INFO] - {"epoch": 14, "update": 13.752, "loss": "2.036", "ppl": "4.1", "wps": "367349", "ups": "3.09", "wpb": "118967", "bsz": "256", "num_updates": "707800", "lr": "0.000295152", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "231068"} +[2022-08-03 17:40:19,980][train_inner][INFO] - {"epoch": 14, "update": 13.756, "loss": "2.042", "ppl": "4.12", "wps": "363235", "ups": "3.09", "wpb": "117682", "bsz": "256", "num_updates": "708000", "lr": "0.000294949", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "231132"} +[2022-08-03 17:41:24,844][train_inner][INFO] - {"epoch": 14, "update": 13.76, "loss": "2.053", "ppl": "4.15", "wps": "361128", "ups": "3.08", "wpb": "117120", "bsz": "256", "num_updates": "708200", "lr": "0.000294747", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "231197"} +[2022-08-03 17:42:29,881][train_inner][INFO] - {"epoch": 14, "update": 13.764, "loss": "2.045", "ppl": "4.13", "wps": "365173", "ups": "3.08", "wpb": "118746", "bsz": "256", "num_updates": "708400", "lr": "0.000294545", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "231262"} +[2022-08-03 17:43:34,740][train_inner][INFO] - {"epoch": 14, "update": 13.768, "loss": "2.046", "ppl": "4.13", "wps": "366846", "ups": "3.08", "wpb": "118966", "bsz": "256", "num_updates": "708600", "lr": "0.000294343", "gnorm": "0.741", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "231327"} +[2022-08-03 17:44:26,201][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 17:44:39,576][train_inner][INFO] - {"epoch": 14, "update": 13.772, "loss": "2.041", "ppl": "4.12", "wps": "366362", "ups": "3.08", "wpb": "118765", "bsz": "256", "num_updates": "708800", "lr": "0.000294141", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "231392"} +[2022-08-03 17:45:44,480][train_inner][INFO] - {"epoch": 14, "update": 13.776, "loss": "2.046", "ppl": "4.13", "wps": "363254", "ups": "3.08", "wpb": "117882", "bsz": "256", "num_updates": "709000", "lr": "0.000293939", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "231457"} +[2022-08-03 17:46:49,401][train_inner][INFO] - {"epoch": 14, "update": 13.779, "loss": "2.048", "ppl": "4.13", "wps": "362736", "ups": "3.08", "wpb": "117745", "bsz": "256", "num_updates": "709200", "lr": "0.000293737", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "231522"} +[2022-08-03 17:47:54,410][train_inner][INFO] - {"epoch": 14, "update": 13.783, "loss": "2.038", "ppl": "4.11", "wps": "364836", "ups": "3.08", "wpb": "118586", "bsz": "256", "num_updates": "709400", "lr": "0.000293535", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "231587"} +[2022-08-03 17:48:59,447][train_inner][INFO] - {"epoch": 14, "update": 13.787, "loss": "2.044", "ppl": "4.12", "wps": "364184", "ups": "3.08", "wpb": "118426", "bsz": "256", "num_updates": "709600", "lr": "0.000293333", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.9", "wall": "231652"} +[2022-08-03 17:50:04,161][train_inner][INFO] - {"epoch": 14, "update": 13.791, "loss": "2.041", "ppl": "4.11", "wps": "366921", "ups": "3.09", "wpb": "118722", "bsz": "256", "num_updates": "709800", "lr": "0.000293131", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "231716"} +[2022-08-03 17:51:09,091][train_inner][INFO] - {"epoch": 14, "update": 13.795, "loss": "2.049", "ppl": "4.14", "wps": "363441", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "710000", "lr": "0.000292929", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "231781"} +[2022-08-03 17:52:14,027][train_inner][INFO] - {"epoch": 14, "update": 13.799, "loss": "2.04", "ppl": "4.11", "wps": "365513", "ups": "3.08", "wpb": "118673", "bsz": "256", "num_updates": "710200", "lr": "0.000292727", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "231846"} +[2022-08-03 17:53:19,803][train_inner][INFO] - {"epoch": 14, "update": 13.803, "loss": "2.045", "ppl": "4.13", "wps": "358457", "ups": "3.04", "wpb": "117887", "bsz": "256", "num_updates": "710400", "lr": "0.000292525", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "231912"} +[2022-08-03 17:54:24,771][train_inner][INFO] - {"epoch": 14, "update": 13.807, "loss": "2.043", "ppl": "4.12", "wps": "364424", "ups": "3.08", "wpb": "118378", "bsz": "256", "num_updates": "710600", "lr": "0.000292323", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "231977"} +[2022-08-03 17:55:29,661][train_inner][INFO] - {"epoch": 14, "update": 13.811, "loss": "2.048", "ppl": "4.14", "wps": "364592", "ups": "3.08", "wpb": "118292", "bsz": "256", "num_updates": "710800", "lr": "0.000292121", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "232042"} +[2022-08-03 17:55:52,673][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 17:56:34,605][train_inner][INFO] - {"epoch": 14, "update": 13.814, "loss": "2.043", "ppl": "4.12", "wps": "364444", "ups": "3.08", "wpb": "118340", "bsz": "256", "num_updates": "711000", "lr": "0.000291919", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "232107"} +[2022-08-03 17:57:39,393][train_inner][INFO] - {"epoch": 14, "update": 13.818, "loss": "2.046", "ppl": "4.13", "wps": "366180", "ups": "3.09", "wpb": "118620", "bsz": "256", "num_updates": "711200", "lr": "0.000291717", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "232172"} +[2022-08-03 17:58:44,134][train_inner][INFO] - {"epoch": 14, "update": 13.822, "loss": "2.043", "ppl": "4.12", "wps": "364354", "ups": "3.09", "wpb": "117941", "bsz": "256", "num_updates": "711400", "lr": "0.000291515", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "232236"} +[2022-08-03 17:59:48,900][train_inner][INFO] - {"epoch": 14, "update": 13.826, "loss": "2.042", "ppl": "4.12", "wps": "366129", "ups": "3.09", "wpb": "118562", "bsz": "256", "num_updates": "711600", "lr": "0.000291313", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.7", "wall": "232301"} +[2022-08-03 18:00:53,686][train_inner][INFO] - {"epoch": 14, "update": 13.83, "loss": "2.04", "ppl": "4.11", "wps": "366635", "ups": "3.09", "wpb": "118761", "bsz": "256", "num_updates": "711800", "lr": "0.000291111", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "232366"} +[2022-08-03 18:01:58,724][train_inner][INFO] - {"epoch": 14, "update": 13.834, "loss": "2.04", "ppl": "4.11", "wps": "363564", "ups": "3.08", "wpb": "118226", "bsz": "256", "num_updates": "712000", "lr": "0.000290909", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "232431"} +[2022-08-03 18:03:03,024][train_inner][INFO] - {"epoch": 14, "update": 13.838, "loss": "2.044", "ppl": "4.12", "wps": "366446", "ups": "3.11", "wpb": "117809", "bsz": "256", "num_updates": "712200", "lr": "0.000290707", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "232495"} +[2022-08-03 18:04:08,025][train_inner][INFO] - {"epoch": 14, "update": 13.842, "loss": "2.04", "ppl": "4.11", "wps": "363688", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "712400", "lr": "0.000290505", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "232560"} +[2022-08-03 18:05:13,096][train_inner][INFO] - {"epoch": 14, "update": 13.846, "loss": "2.039", "ppl": "4.11", "wps": "362892", "ups": "3.07", "wpb": "118068", "bsz": "256", "num_updates": "712600", "lr": "0.000290303", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "232625"} +[2022-08-03 18:06:17,733][train_inner][INFO] - {"epoch": 14, "update": 13.849, "loss": "2.044", "ppl": "4.12", "wps": "366137", "ups": "3.09", "wpb": "118327", "bsz": "256", "num_updates": "712800", "lr": "0.000290101", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "232690"} +[2022-08-03 18:06:59,922][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 18:07:22,704][train_inner][INFO] - {"epoch": 14, "update": 13.853, "loss": "2.048", "ppl": "4.13", "wps": "364413", "ups": "3.08", "wpb": "118380", "bsz": "256", "num_updates": "713000", "lr": "0.000289899", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "232755"} +[2022-08-03 18:07:36,198][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 18:08:27,999][train_inner][INFO] - {"epoch": 14, "update": 13.857, "loss": "2.039", "ppl": "4.11", "wps": "361549", "ups": "3.06", "wpb": "118036", "bsz": "256", "num_updates": "713200", "lr": "0.000289697", "gnorm": "0.757", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "232820"} +[2022-08-03 18:09:33,199][train_inner][INFO] - {"epoch": 14, "update": 13.861, "loss": "2.041", "ppl": "4.11", "wps": "363893", "ups": "3.07", "wpb": "118629", "bsz": "256", "num_updates": "713400", "lr": "0.000289495", "gnorm": "0.747", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "232886"} +[2022-08-03 18:10:39,065][train_inner][INFO] - {"epoch": 14, "update": 13.865, "loss": "2.038", "ppl": "4.11", "wps": "360118", "ups": "3.04", "wpb": "118595", "bsz": "256", "num_updates": "713600", "lr": "0.000289293", "gnorm": "0.749", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "232951"} +[2022-08-03 18:11:44,267][train_inner][INFO] - {"epoch": 14, "update": 13.869, "loss": "2.04", "ppl": "4.11", "wps": "362524", "ups": "3.07", "wpb": "118185", "bsz": "256", "num_updates": "713800", "lr": "0.000289091", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "233017"} +[2022-08-03 18:12:48,849][train_inner][INFO] - {"epoch": 14, "update": 13.873, "loss": "2.042", "ppl": "4.12", "wps": "365042", "ups": "3.1", "wpb": "117875", "bsz": "256", "num_updates": "714000", "lr": "0.000288889", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "233081"} +[2022-08-03 18:13:53,893][train_inner][INFO] - {"epoch": 14, "update": 13.877, "loss": "2.037", "ppl": "4.1", "wps": "364482", "ups": "3.07", "wpb": "118534", "bsz": "256", "num_updates": "714200", "lr": "0.000288687", "gnorm": "0.745", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "233146"} +[2022-08-03 18:14:58,801][train_inner][INFO] - {"epoch": 14, "update": 13.881, "loss": "2.042", "ppl": "4.12", "wps": "365065", "ups": "3.08", "wpb": "118476", "bsz": "256", "num_updates": "714400", "lr": "0.000288485", "gnorm": "0.746", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "233211"} +[2022-08-03 18:16:03,664][train_inner][INFO] - {"epoch": 14, "update": 13.884, "loss": "2.045", "ppl": "4.13", "wps": "364426", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "714600", "lr": "0.000288283", "gnorm": "0.752", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "233276"} +[2022-08-03 18:17:08,712][train_inner][INFO] - {"epoch": 14, "update": 13.888, "loss": "2.042", "ppl": "4.12", "wps": "363563", "ups": "3.07", "wpb": "118243", "bsz": "256", "num_updates": "714800", "lr": "0.000288081", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.1", "wall": "233341"} +[2022-08-03 18:18:13,866][train_inner][INFO] - {"epoch": 14, "update": 13.892, "loss": "2.04", "ppl": "4.11", "wps": "362671", "ups": "3.07", "wpb": "118146", "bsz": "256", "num_updates": "715000", "lr": "0.000287879", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "233406"} +[2022-08-03 18:19:18,300][train_inner][INFO] - {"epoch": 14, "update": 13.896, "loss": "2.041", "ppl": "4.11", "wps": "365498", "ups": "3.1", "wpb": "117750", "bsz": "256", "num_updates": "715200", "lr": "0.000287677", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "233471"} +[2022-08-03 18:20:23,308][train_inner][INFO] - {"epoch": 14, "update": 13.9, "loss": "2.049", "ppl": "4.14", "wps": "364453", "ups": "3.08", "wpb": "118460", "bsz": "256", "num_updates": "715400", "lr": "0.000287475", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "233536"} +[2022-08-03 18:21:28,042][train_inner][INFO] - {"epoch": 14, "update": 13.904, "loss": "2.033", "ppl": "4.09", "wps": "365460", "ups": "3.09", "wpb": "118287", "bsz": "256", "num_updates": "715600", "lr": "0.000287273", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "233600"} +[2022-08-03 18:22:33,160][train_inner][INFO] - {"epoch": 14, "update": 13.908, "loss": "2.043", "ppl": "4.12", "wps": "363976", "ups": "3.07", "wpb": "118506", "bsz": "256", "num_updates": "715800", "lr": "0.000287071", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "233665"} +[2022-08-03 18:23:37,858][train_inner][INFO] - {"epoch": 14, "update": 13.912, "loss": "2.038", "ppl": "4.11", "wps": "366807", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "716000", "lr": "0.000286869", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "233730"} +[2022-08-03 18:24:42,352][train_inner][INFO] - {"epoch": 14, "update": 13.915, "loss": "2.042", "ppl": "4.12", "wps": "364902", "ups": "3.1", "wpb": "117668", "bsz": "256", "num_updates": "716200", "lr": "0.000286667", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "233795"} +[2022-08-03 18:25:47,134][train_inner][INFO] - {"epoch": 14, "update": 13.919, "loss": "2.044", "ppl": "4.12", "wps": "365896", "ups": "3.09", "wpb": "118516", "bsz": "256", "num_updates": "716400", "lr": "0.000286465", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "233859"} +[2022-08-03 18:26:52,017][train_inner][INFO] - {"epoch": 14, "update": 13.923, "loss": "2.041", "ppl": "4.12", "wps": "363711", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "716600", "lr": "0.000286263", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "233924"} +[2022-08-03 18:27:56,904][train_inner][INFO] - {"epoch": 14, "update": 13.927, "loss": "2.041", "ppl": "4.11", "wps": "364473", "ups": "3.08", "wpb": "118247", "bsz": "256", "num_updates": "716800", "lr": "0.000286061", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "233989"} +[2022-08-03 18:29:01,800][train_inner][INFO] - {"epoch": 14, "update": 13.931, "loss": "2.037", "ppl": "4.11", "wps": "364436", "ups": "3.08", "wpb": "118250", "bsz": "256", "num_updates": "717000", "lr": "0.000285859", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "234054"} +[2022-08-03 18:29:47,875][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 18:30:07,103][train_inner][INFO] - {"epoch": 14, "update": 13.935, "loss": "2.039", "ppl": "4.11", "wps": "362312", "ups": "3.06", "wpb": "118299", "bsz": "256", "num_updates": "717200", "lr": "0.000285657", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "234119"} +[2022-08-03 18:31:12,120][train_inner][INFO] - {"epoch": 14, "update": 13.939, "loss": "2.04", "ppl": "4.11", "wps": "362986", "ups": "3.08", "wpb": "118000", "bsz": "256", "num_updates": "717400", "lr": "0.000285455", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "234184"} +[2022-08-03 18:32:17,200][train_inner][INFO] - {"epoch": 14, "update": 13.943, "loss": "2.038", "ppl": "4.11", "wps": "363691", "ups": "3.07", "wpb": "118344", "bsz": "256", "num_updates": "717600", "lr": "0.000285253", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "234250"} +[2022-08-03 18:33:22,196][train_inner][INFO] - {"epoch": 14, "update": 13.947, "loss": "2.04", "ppl": "4.11", "wps": "366519", "ups": "3.08", "wpb": "119108", "bsz": "256", "num_updates": "717800", "lr": "0.000285051", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "234315"} +[2022-08-03 18:34:27,291][train_inner][INFO] - {"epoch": 14, "update": 13.95, "loss": "2.037", "ppl": "4.1", "wps": "364153", "ups": "3.07", "wpb": "118521", "bsz": "256", "num_updates": "718000", "lr": "0.000284848", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "234380"} +[2022-08-03 18:35:32,298][train_inner][INFO] - {"epoch": 14, "update": 13.954, "loss": "2.037", "ppl": "4.1", "wps": "365149", "ups": "3.08", "wpb": "118684", "bsz": "256", "num_updates": "718200", "lr": "0.000284646", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "234445"} +[2022-08-03 18:36:36,943][train_inner][INFO] - {"epoch": 14, "update": 13.958, "loss": "2.039", "ppl": "4.11", "wps": "365785", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "718400", "lr": "0.000284444", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "234509"} +[2022-08-03 18:37:41,737][train_inner][INFO] - {"epoch": 14, "update": 13.962, "loss": "2.034", "ppl": "4.1", "wps": "362798", "ups": "3.09", "wpb": "117533", "bsz": "256", "num_updates": "718600", "lr": "0.000284242", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "234574"} +[2022-08-03 18:38:46,249][train_inner][INFO] - {"epoch": 14, "update": 13.966, "loss": "2.031", "ppl": "4.09", "wps": "368112", "ups": "3.1", "wpb": "118736", "bsz": "256", "num_updates": "718800", "lr": "0.00028404", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "234639"} +[2022-08-03 18:39:50,854][train_inner][INFO] - {"epoch": 14, "update": 13.97, "loss": "2.042", "ppl": "4.12", "wps": "364383", "ups": "3.1", "wpb": "117704", "bsz": "256", "num_updates": "719000", "lr": "0.000283838", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "234703"} +[2022-08-03 18:40:55,779][train_inner][INFO] - {"epoch": 14, "update": 13.974, "loss": "2.039", "ppl": "4.11", "wps": "361781", "ups": "3.08", "wpb": "117440", "bsz": "256", "num_updates": "719200", "lr": "0.000283636", "gnorm": "0.752", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "234768"} +[2022-08-03 18:40:59,315][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 18:42:00,801][train_inner][INFO] - {"epoch": 14, "update": 13.978, "loss": "2.04", "ppl": "4.11", "wps": "362482", "ups": "3.08", "wpb": "117846", "bsz": "256", "num_updates": "719400", "lr": "0.000283434", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "234833"} +[2022-08-03 18:43:05,637][train_inner][INFO] - {"epoch": 14, "update": 13.982, "loss": "2.038", "ppl": "4.11", "wps": "365223", "ups": "3.08", "wpb": "118395", "bsz": "256", "num_updates": "719600", "lr": "0.000283232", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "234898"} +[2022-08-03 18:44:10,590][train_inner][INFO] - {"epoch": 14, "update": 13.985, "loss": "2.04", "ppl": "4.11", "wps": "364793", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "719800", "lr": "0.00028303", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "234963"} +[2022-08-03 18:45:15,635][train_inner][INFO] - {"epoch": 14, "update": 13.989, "loss": "2.04", "ppl": "4.11", "wps": "364653", "ups": "3.07", "wpb": "118593", "bsz": "256", "num_updates": "720000", "lr": "0.000282828", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "235028"} +[2022-08-03 18:46:20,378][train_inner][INFO] - {"epoch": 14, "update": 13.993, "loss": "2.04", "ppl": "4.11", "wps": "366741", "ups": "3.09", "wpb": "118718", "bsz": "256", "num_updates": "720200", "lr": "0.000282626", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "235093"} +[2022-08-03 18:47:25,396][train_inner][INFO] - {"epoch": 14, "update": 13.997, "loss": "2.04", "ppl": "4.11", "wps": "365793", "ups": "3.08", "wpb": "118913", "bsz": "256", "num_updates": "720400", "lr": "0.000282424", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "235158"} +[2022-08-03 18:48:13,939][fairseq_cli.train][INFO] - end of epoch 14 (average epoch stats below) +[2022-08-03 18:48:13,940][train][INFO] - {"epoch": 14, "train_loss": "2.052", "train_ppl": "4.15", "train_wps": "362218", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "720550", "train_lr": "0.000282273", "train_gnorm": "0.739", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16635", "train_gb_free": "21.3", "train_wall": "235206"} +[2022-08-03 18:48:14,024][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-03 18:48:14,027][fairseq.trainer][INFO] - begin training epoch 15 +[2022-08-03 18:48:14,027][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-03 18:48:45,602][train_inner][INFO] - {"epoch": 15, "update": 14.001, "loss": "2.035", "ppl": "4.1", "wps": "295738", "ups": "2.49", "wpb": "118598", "bsz": "255.4", "num_updates": "720600", "lr": "0.000282222", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "235238"} +[2022-08-03 18:49:50,467][train_inner][INFO] - {"epoch": 15, "update": 14.005, "loss": "2.037", "ppl": "4.11", "wps": "363811", "ups": "3.08", "wpb": "117992", "bsz": "256", "num_updates": "720800", "lr": "0.00028202", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "235303"} +[2022-08-03 18:50:55,213][train_inner][INFO] - {"epoch": 15, "update": 14.009, "loss": "2.037", "ppl": "4.1", "wps": "366214", "ups": "3.09", "wpb": "118554", "bsz": "256", "num_updates": "721000", "lr": "0.000281818", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "235368"} +[2022-08-03 18:52:00,088][train_inner][INFO] - {"epoch": 15, "update": 14.013, "loss": "2.034", "ppl": "4.1", "wps": "364574", "ups": "3.08", "wpb": "118255", "bsz": "256", "num_updates": "721200", "lr": "0.000281616", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "235432"} +[2022-08-03 18:52:23,090][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 18:53:04,869][train_inner][INFO] - {"epoch": 15, "update": 14.017, "loss": "2.034", "ppl": "4.09", "wps": "365333", "ups": "3.09", "wpb": "118333", "bsz": "256", "num_updates": "721400", "lr": "0.000281414", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "235497"} +[2022-08-03 18:54:09,725][train_inner][INFO] - {"epoch": 15, "update": 14.02, "loss": "2.035", "ppl": "4.1", "wps": "364455", "ups": "3.08", "wpb": "118184", "bsz": "256", "num_updates": "721600", "lr": "0.000281212", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "235562"} +[2022-08-03 18:55:14,628][train_inner][INFO] - {"epoch": 15, "update": 14.024, "loss": "2.034", "ppl": "4.1", "wps": "365438", "ups": "3.08", "wpb": "118588", "bsz": "256", "num_updates": "721800", "lr": "0.00028101", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "235627"} +[2022-08-03 18:56:19,085][train_inner][INFO] - {"epoch": 15, "update": 14.028, "loss": "2.035", "ppl": "4.1", "wps": "367032", "ups": "3.1", "wpb": "118289", "bsz": "256", "num_updates": "722000", "lr": "0.000280808", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.6", "wall": "235691"} +[2022-08-03 18:57:25,257][train_inner][INFO] - {"epoch": 15, "update": 14.032, "loss": "2.032", "ppl": "4.09", "wps": "355210", "ups": "3.02", "wpb": "117523", "bsz": "256", "num_updates": "722200", "lr": "0.000280606", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.3", "wall": "235758"} +[2022-08-03 18:58:30,349][train_inner][INFO] - {"epoch": 15, "update": 14.036, "loss": "2.034", "ppl": "4.09", "wps": "363030", "ups": "3.07", "wpb": "118149", "bsz": "256", "num_updates": "722400", "lr": "0.000280404", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "235823"} +[2022-08-03 18:59:35,411][train_inner][INFO] - {"epoch": 15, "update": 14.04, "loss": "2.039", "ppl": "4.11", "wps": "362546", "ups": "3.07", "wpb": "117938", "bsz": "256", "num_updates": "722600", "lr": "0.000280202", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "235888"} +[2022-08-03 19:00:41,530][train_inner][INFO] - {"epoch": 15, "update": 14.044, "loss": "2.039", "ppl": "4.11", "wps": "355642", "ups": "3.02", "wpb": "117572", "bsz": "256", "num_updates": "722800", "lr": "0.00028", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "25.9", "wall": "235954"} +[2022-08-03 19:01:46,322][train_inner][INFO] - {"epoch": 15, "update": 14.048, "loss": "2.034", "ppl": "4.09", "wps": "365777", "ups": "3.09", "wpb": "118497", "bsz": "256", "num_updates": "723000", "lr": "0.000279798", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "236019"} +[2022-08-03 19:02:51,531][train_inner][INFO] - {"epoch": 15, "update": 14.051, "loss": "2.029", "ppl": "4.08", "wps": "363939", "ups": "3.07", "wpb": "118659", "bsz": "256", "num_updates": "723200", "lr": "0.000279596", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "236084"} +[2022-08-03 19:03:30,856][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 19:03:56,500][train_inner][INFO] - {"epoch": 15, "update": 14.055, "loss": "2.035", "ppl": "4.1", "wps": "361525", "ups": "3.08", "wpb": "117437", "bsz": "256", "num_updates": "723400", "lr": "0.000279394", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "236149"} +[2022-08-03 19:05:01,423][train_inner][INFO] - {"epoch": 15, "update": 14.059, "loss": "2.031", "ppl": "4.09", "wps": "365714", "ups": "3.08", "wpb": "118715", "bsz": "256", "num_updates": "723600", "lr": "0.000279192", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "236214"} +[2022-08-03 19:06:06,490][train_inner][INFO] - {"epoch": 15, "update": 14.063, "loss": "2.025", "ppl": "4.07", "wps": "364808", "ups": "3.07", "wpb": "118684", "bsz": "256", "num_updates": "723800", "lr": "0.00027899", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "236279"} +[2022-08-03 19:07:11,239][train_inner][INFO] - {"epoch": 15, "update": 14.067, "loss": "2.041", "ppl": "4.11", "wps": "364103", "ups": "3.09", "wpb": "117875", "bsz": "256", "num_updates": "724000", "lr": "0.000278788", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "236344"} +[2022-08-03 19:08:16,179][train_inner][INFO] - {"epoch": 15, "update": 14.071, "loss": "2.034", "ppl": "4.1", "wps": "363640", "ups": "3.08", "wpb": "118072", "bsz": "256", "num_updates": "724200", "lr": "0.000278586", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "236409"} +[2022-08-03 19:09:04,475][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 19:09:21,645][train_inner][INFO] - {"epoch": 15, "update": 14.075, "loss": "2.031", "ppl": "4.09", "wps": "361076", "ups": "3.06", "wpb": "118189", "bsz": "256", "num_updates": "724400", "lr": "0.000278384", "gnorm": "0.75", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "236474"} +[2022-08-03 19:10:26,744][train_inner][INFO] - {"epoch": 15, "update": 14.079, "loss": "2.038", "ppl": "4.11", "wps": "363662", "ups": "3.07", "wpb": "118368", "bsz": "256", "num_updates": "724600", "lr": "0.000278182", "gnorm": "0.749", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "236539"} +[2022-08-03 19:11:31,689][train_inner][INFO] - {"epoch": 15, "update": 14.083, "loss": "2.029", "ppl": "4.08", "wps": "364607", "ups": "3.08", "wpb": "118397", "bsz": "256", "num_updates": "724800", "lr": "0.00027798", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "236604"} +[2022-08-03 19:12:37,523][train_inner][INFO] - {"epoch": 15, "update": 14.086, "loss": "2.036", "ppl": "4.1", "wps": "358113", "ups": "3.04", "wpb": "117878", "bsz": "256", "num_updates": "725000", "lr": "0.000277778", "gnorm": "0.751", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.4", "wall": "236670"} +[2022-08-03 19:13:42,221][train_inner][INFO] - {"epoch": 15, "update": 14.09, "loss": "2.03", "ppl": "4.09", "wps": "365775", "ups": "3.09", "wpb": "118322", "bsz": "256", "num_updates": "725200", "lr": "0.000277576", "gnorm": "0.752", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "236735"} +[2022-08-03 19:14:47,008][train_inner][INFO] - {"epoch": 15, "update": 14.094, "loss": "2.028", "ppl": "4.08", "wps": "364592", "ups": "3.09", "wpb": "118103", "bsz": "256", "num_updates": "725400", "lr": "0.000277374", "gnorm": "0.757", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "236799"} +[2022-08-03 19:15:52,233][train_inner][INFO] - {"epoch": 15, "update": 14.098, "loss": "2.037", "ppl": "4.1", "wps": "361186", "ups": "3.07", "wpb": "117790", "bsz": "256", "num_updates": "725600", "lr": "0.000277172", "gnorm": "0.751", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "236865"} +[2022-08-03 19:16:57,320][train_inner][INFO] - {"epoch": 15, "update": 14.102, "loss": "2.028", "ppl": "4.08", "wps": "363951", "ups": "3.07", "wpb": "118441", "bsz": "256", "num_updates": "725800", "lr": "0.00027697", "gnorm": "0.755", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "236930"} +[2022-08-03 19:18:01,923][train_inner][INFO] - {"epoch": 15, "update": 14.106, "loss": "2.039", "ppl": "4.11", "wps": "367120", "ups": "3.1", "wpb": "118584", "bsz": "256", "num_updates": "726000", "lr": "0.000276768", "gnorm": "0.751", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "236994"} +[2022-08-03 19:19:07,087][train_inner][INFO] - {"epoch": 15, "update": 14.11, "loss": "2.032", "ppl": "4.09", "wps": "362778", "ups": "3.07", "wpb": "118198", "bsz": "256", "num_updates": "726200", "lr": "0.000276566", "gnorm": "0.753", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "237059"} +[2022-08-03 19:20:11,998][train_inner][INFO] - {"epoch": 15, "update": 14.114, "loss": "2.032", "ppl": "4.09", "wps": "365247", "ups": "3.08", "wpb": "118541", "bsz": "256", "num_updates": "726400", "lr": "0.000276364", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "237124"} +[2022-08-03 19:21:17,082][train_inner][INFO] - {"epoch": 15, "update": 14.118, "loss": "2.032", "ppl": "4.09", "wps": "362224", "ups": "3.07", "wpb": "117873", "bsz": "256", "num_updates": "726600", "lr": "0.000276162", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "237189"} +[2022-08-03 19:22:22,060][train_inner][INFO] - {"epoch": 15, "update": 14.121, "loss": "2.034", "ppl": "4.1", "wps": "363696", "ups": "3.08", "wpb": "118161", "bsz": "256", "num_updates": "726800", "lr": "0.00027596", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "237254"} +[2022-08-03 19:23:26,677][train_inner][INFO] - {"epoch": 15, "update": 14.125, "loss": "2.035", "ppl": "4.1", "wps": "365648", "ups": "3.1", "wpb": "118132", "bsz": "256", "num_updates": "727000", "lr": "0.000275758", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "237319"} +[2022-08-03 19:24:32,347][train_inner][INFO] - {"epoch": 15, "update": 14.129, "loss": "2.033", "ppl": "4.09", "wps": "360067", "ups": "3.05", "wpb": "118226", "bsz": "256", "num_updates": "727200", "lr": "0.000275556", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.5", "wall": "237385"} +[2022-08-03 19:25:37,187][train_inner][INFO] - {"epoch": 15, "update": 14.133, "loss": "2.031", "ppl": "4.09", "wps": "365046", "ups": "3.08", "wpb": "118347", "bsz": "256", "num_updates": "727400", "lr": "0.000275354", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "237450"} +[2022-08-03 19:26:42,129][train_inner][INFO] - {"epoch": 15, "update": 14.137, "loss": "2.026", "ppl": "4.07", "wps": "363832", "ups": "3.08", "wpb": "118138", "bsz": "256", "num_updates": "727600", "lr": "0.000275152", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "237514"} +[2022-08-03 19:27:46,821][train_inner][INFO] - {"epoch": 15, "update": 14.141, "loss": "2.032", "ppl": "4.09", "wps": "365789", "ups": "3.09", "wpb": "118315", "bsz": "256", "num_updates": "727800", "lr": "0.000274949", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "237579"} +[2022-08-03 19:28:51,643][train_inner][INFO] - {"epoch": 15, "update": 14.145, "loss": "2.025", "ppl": "4.07", "wps": "366460", "ups": "3.09", "wpb": "118772", "bsz": "256", "num_updates": "728000", "lr": "0.000274747", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "237644"} +[2022-08-03 19:29:56,657][train_inner][INFO] - {"epoch": 15, "update": 14.149, "loss": "2.035", "ppl": "4.1", "wps": "364572", "ups": "3.08", "wpb": "118510", "bsz": "256", "num_updates": "728200", "lr": "0.000274545", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "237709"} +[2022-08-03 19:31:01,335][train_inner][INFO] - {"epoch": 15, "update": 14.153, "loss": "2.033", "ppl": "4.09", "wps": "365781", "ups": "3.09", "wpb": "118288", "bsz": "256", "num_updates": "728400", "lr": "0.000274343", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "237774"} +[2022-08-03 19:31:26,772][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 19:32:06,432][train_inner][INFO] - {"epoch": 15, "update": 14.156, "loss": "2.033", "ppl": "4.09", "wps": "364532", "ups": "3.07", "wpb": "118649", "bsz": "256", "num_updates": "728600", "lr": "0.000274141", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "237839"} +[2022-08-03 19:33:11,540][train_inner][INFO] - {"epoch": 15, "update": 14.16, "loss": "2.028", "ppl": "4.08", "wps": "362798", "ups": "3.07", "wpb": "118104", "bsz": "256", "num_updates": "728800", "lr": "0.000273939", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "237904"} +[2022-08-03 19:34:16,246][train_inner][INFO] - {"epoch": 15, "update": 14.164, "loss": "2.033", "ppl": "4.09", "wps": "364194", "ups": "3.09", "wpb": "117826", "bsz": "256", "num_updates": "729000", "lr": "0.000273737", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27", "wall": "237969"} +[2022-08-03 19:35:21,063][train_inner][INFO] - {"epoch": 15, "update": 14.168, "loss": "2.033", "ppl": "4.09", "wps": "365770", "ups": "3.09", "wpb": "118539", "bsz": "256", "num_updates": "729200", "lr": "0.000273535", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "238033"} +[2022-08-03 19:36:25,825][train_inner][INFO] - {"epoch": 15, "update": 14.172, "loss": "2.03", "ppl": "4.08", "wps": "365244", "ups": "3.09", "wpb": "118268", "bsz": "256", "num_updates": "729400", "lr": "0.000273333", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "238098"} +[2022-08-03 19:37:30,622][train_inner][INFO] - {"epoch": 15, "update": 14.176, "loss": "2.035", "ppl": "4.1", "wps": "367200", "ups": "3.09", "wpb": "118965", "bsz": "256", "num_updates": "729600", "lr": "0.000273131", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "238163"} +[2022-08-03 19:38:35,448][train_inner][INFO] - {"epoch": 15, "update": 14.18, "loss": "2.028", "ppl": "4.08", "wps": "365363", "ups": "3.09", "wpb": "118423", "bsz": "256", "num_updates": "729800", "lr": "0.000272929", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "238228"} +[2022-08-03 19:39:40,551][train_inner][INFO] - {"epoch": 15, "update": 14.184, "loss": "2.028", "ppl": "4.08", "wps": "362542", "ups": "3.07", "wpb": "118013", "bsz": "256", "num_updates": "730000", "lr": "0.000272727", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "238293"} +[2022-08-03 19:40:45,773][train_inner][INFO] - {"epoch": 15, "update": 14.188, "loss": "2.035", "ppl": "4.1", "wps": "364132", "ups": "3.07", "wpb": "118745", "bsz": "256", "num_updates": "730200", "lr": "0.000272525", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "238358"} +[2022-08-03 19:41:50,791][train_inner][INFO] - {"epoch": 15, "update": 14.191, "loss": "2.027", "ppl": "4.08", "wps": "364230", "ups": "3.08", "wpb": "118405", "bsz": "256", "num_updates": "730400", "lr": "0.000272323", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "238423"} +[2022-08-03 19:42:36,215][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 19:42:55,953][train_inner][INFO] - {"epoch": 15, "update": 14.195, "loss": "2.027", "ppl": "4.07", "wps": "363098", "ups": "3.07", "wpb": "118298", "bsz": "256", "num_updates": "730600", "lr": "0.000272121", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "238488"} +[2022-08-03 19:44:00,719][train_inner][INFO] - {"epoch": 15, "update": 14.199, "loss": "2.033", "ppl": "4.09", "wps": "366242", "ups": "3.09", "wpb": "118600", "bsz": "256", "num_updates": "730800", "lr": "0.000271919", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "238553"} +[2022-08-03 19:45:05,686][train_inner][INFO] - {"epoch": 15, "update": 14.203, "loss": "2.035", "ppl": "4.1", "wps": "362329", "ups": "3.08", "wpb": "117695", "bsz": "256", "num_updates": "731000", "lr": "0.000271717", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "238618"} +[2022-08-03 19:46:10,458][train_inner][INFO] - {"epoch": 15, "update": 14.207, "loss": "2.034", "ppl": "4.1", "wps": "366026", "ups": "3.09", "wpb": "118540", "bsz": "256", "num_updates": "731200", "lr": "0.000271515", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "238683"} +[2022-08-03 19:47:15,370][train_inner][INFO] - {"epoch": 15, "update": 14.211, "loss": "2.022", "ppl": "4.06", "wps": "366239", "ups": "3.08", "wpb": "118865", "bsz": "256", "num_updates": "731400", "lr": "0.000271313", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "238748"} +[2022-08-03 19:48:20,075][train_inner][INFO] - {"epoch": 15, "update": 14.215, "loss": "2.032", "ppl": "4.09", "wps": "366626", "ups": "3.09", "wpb": "118610", "bsz": "256", "num_updates": "731600", "lr": "0.000271111", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "238812"} +[2022-08-03 19:49:25,114][train_inner][INFO] - {"epoch": 15, "update": 14.219, "loss": "2.033", "ppl": "4.09", "wps": "361932", "ups": "3.08", "wpb": "117697", "bsz": "256", "num_updates": "731800", "lr": "0.000270909", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "238877"} +[2022-08-03 19:50:30,044][train_inner][INFO] - {"epoch": 15, "update": 14.222, "loss": "2.03", "ppl": "4.08", "wps": "365343", "ups": "3.08", "wpb": "118606", "bsz": "256", "num_updates": "732000", "lr": "0.000270707", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "238942"} +[2022-08-03 19:51:34,850][train_inner][INFO] - {"epoch": 15, "update": 14.226, "loss": "2.028", "ppl": "4.08", "wps": "363179", "ups": "3.09", "wpb": "117680", "bsz": "256", "num_updates": "732200", "lr": "0.000270505", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "239007"} +[2022-08-03 19:52:39,572][train_inner][INFO] - {"epoch": 15, "update": 14.23, "loss": "2.033", "ppl": "4.09", "wps": "367420", "ups": "3.09", "wpb": "118899", "bsz": "256", "num_updates": "732400", "lr": "0.000270303", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "239072"} +[2022-08-03 19:53:43,512][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 19:53:45,102][train_inner][INFO] - {"epoch": 15, "update": 14.234, "loss": "2.025", "ppl": "4.07", "wps": "360944", "ups": "3.05", "wpb": "118261", "bsz": "256", "num_updates": "732600", "lr": "0.000270101", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "239137"} +[2022-08-03 19:54:49,952][train_inner][INFO] - {"epoch": 15, "update": 14.238, "loss": "2.034", "ppl": "4.1", "wps": "365426", "ups": "3.08", "wpb": "118488", "bsz": "256", "num_updates": "732800", "lr": "0.000269899", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "239202"} +[2022-08-03 19:55:54,956][train_inner][INFO] - {"epoch": 15, "update": 14.242, "loss": "2.03", "ppl": "4.09", "wps": "365015", "ups": "3.08", "wpb": "118636", "bsz": "256", "num_updates": "733000", "lr": "0.000269697", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "239267"} +[2022-08-03 19:56:59,190][train_inner][INFO] - {"epoch": 15, "update": 14.246, "loss": "2.029", "ppl": "4.08", "wps": "369026", "ups": "3.11", "wpb": "118518", "bsz": "256", "num_updates": "733200", "lr": "0.000269495", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "239332"} +[2022-08-03 19:58:03,658][train_inner][INFO] - {"epoch": 15, "update": 14.25, "loss": "2.027", "ppl": "4.07", "wps": "366856", "ups": "3.1", "wpb": "118251", "bsz": "256", "num_updates": "733400", "lr": "0.000269293", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "239396"} +[2022-08-03 19:59:08,678][train_inner][INFO] - {"epoch": 15, "update": 14.254, "loss": "2.026", "ppl": "4.07", "wps": "364041", "ups": "3.08", "wpb": "118349", "bsz": "256", "num_updates": "733600", "lr": "0.000269091", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "239461"} +[2022-08-03 20:00:14,146][train_inner][INFO] - {"epoch": 15, "update": 14.257, "loss": "2.028", "ppl": "4.08", "wps": "362082", "ups": "3.05", "wpb": "118521", "bsz": "256", "num_updates": "733800", "lr": "0.000268889", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "239526"} +[2022-08-03 20:01:18,522][train_inner][INFO] - {"epoch": 15, "update": 14.261, "loss": "2.027", "ppl": "4.07", "wps": "366924", "ups": "3.11", "wpb": "118103", "bsz": "256", "num_updates": "734000", "lr": "0.000268687", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "239591"} +[2022-08-03 20:02:23,171][train_inner][INFO] - {"epoch": 15, "update": 14.265, "loss": "2.029", "ppl": "4.08", "wps": "365946", "ups": "3.09", "wpb": "118289", "bsz": "256", "num_updates": "734200", "lr": "0.000268485", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "239655"} +[2022-08-03 20:03:27,827][train_inner][INFO] - {"epoch": 15, "update": 14.269, "loss": "2.024", "ppl": "4.07", "wps": "365715", "ups": "3.09", "wpb": "118226", "bsz": "256", "num_updates": "734400", "lr": "0.000268283", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "239720"} +[2022-08-03 20:04:32,637][train_inner][INFO] - {"epoch": 15, "update": 14.273, "loss": "2.027", "ppl": "4.08", "wps": "364717", "ups": "3.09", "wpb": "118184", "bsz": "256", "num_updates": "734600", "lr": "0.000268081", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "239785"} +[2022-08-03 20:04:47,289][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 20:05:37,839][train_inner][INFO] - {"epoch": 15, "update": 14.277, "loss": "2.027", "ppl": "4.08", "wps": "363188", "ups": "3.07", "wpb": "118402", "bsz": "256", "num_updates": "734800", "lr": "0.000267879", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "239850"} +[2022-08-03 20:06:42,615][train_inner][INFO] - {"epoch": 15, "update": 14.281, "loss": "2.028", "ppl": "4.08", "wps": "365994", "ups": "3.09", "wpb": "118537", "bsz": "256", "num_updates": "735000", "lr": "0.000267677", "gnorm": "0.761", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "239915"} +[2022-08-03 20:07:47,679][train_inner][INFO] - {"epoch": 15, "update": 14.285, "loss": "2.034", "ppl": "4.09", "wps": "364478", "ups": "3.07", "wpb": "118570", "bsz": "256", "num_updates": "735200", "lr": "0.000267475", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "239980"} +[2022-08-03 20:08:53,205][train_inner][INFO] - {"epoch": 15, "update": 14.289, "loss": "2.026", "ppl": "4.07", "wps": "361530", "ups": "3.05", "wpb": "118446", "bsz": "256", "num_updates": "735400", "lr": "0.000267273", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "240046"} +[2022-08-03 20:09:58,407][train_inner][INFO] - {"epoch": 15, "update": 14.292, "loss": "2.024", "ppl": "4.07", "wps": "363183", "ups": "3.07", "wpb": "118401", "bsz": "256", "num_updates": "735600", "lr": "0.000267071", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "240111"} +[2022-08-03 20:11:03,045][train_inner][INFO] - {"epoch": 15, "update": 14.296, "loss": "2.019", "ppl": "4.05", "wps": "368518", "ups": "3.09", "wpb": "119098", "bsz": "256", "num_updates": "735800", "lr": "0.000266869", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "240175"} +[2022-08-03 20:12:08,148][train_inner][INFO] - {"epoch": 15, "update": 14.3, "loss": "2.03", "ppl": "4.08", "wps": "362639", "ups": "3.07", "wpb": "118043", "bsz": "256", "num_updates": "736000", "lr": "0.000266667", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "240240"} +[2022-08-03 20:13:14,047][train_inner][INFO] - {"epoch": 15, "update": 14.304, "loss": "2.027", "ppl": "4.08", "wps": "359055", "ups": "3.03", "wpb": "118306", "bsz": "256", "num_updates": "736200", "lr": "0.000266465", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.3", "wall": "240306"} +[2022-08-03 20:14:18,749][train_inner][INFO] - {"epoch": 15, "update": 14.308, "loss": "2.036", "ppl": "4.1", "wps": "364613", "ups": "3.09", "wpb": "117953", "bsz": "256", "num_updates": "736400", "lr": "0.000266263", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "240371"} +[2022-08-03 20:15:23,683][train_inner][INFO] - {"epoch": 15, "update": 14.312, "loss": "2.023", "ppl": "4.06", "wps": "365407", "ups": "3.08", "wpb": "118635", "bsz": "256", "num_updates": "736600", "lr": "0.000266061", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "240436"} +[2022-08-03 20:15:58,181][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 20:16:29,067][train_inner][INFO] - {"epoch": 15, "update": 14.316, "loss": "2.022", "ppl": "4.06", "wps": "363117", "ups": "3.06", "wpb": "118708", "bsz": "256", "num_updates": "736800", "lr": "0.000265859", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "240501"} +[2022-08-03 20:17:33,954][train_inner][INFO] - {"epoch": 15, "update": 14.32, "loss": "2.031", "ppl": "4.09", "wps": "366346", "ups": "3.08", "wpb": "118854", "bsz": "256", "num_updates": "737000", "lr": "0.000265657", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "240566"} +[2022-08-03 20:17:47,971][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 20:18:38,878][train_inner][INFO] - {"epoch": 15, "update": 14.324, "loss": "2.024", "ppl": "4.07", "wps": "364621", "ups": "3.08", "wpb": "118362", "bsz": "256", "num_updates": "737200", "lr": "0.000265455", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "240631"} +[2022-08-03 20:19:43,648][train_inner][INFO] - {"epoch": 15, "update": 14.327, "loss": "2.021", "ppl": "4.06", "wps": "366621", "ups": "3.09", "wpb": "118730", "bsz": "256", "num_updates": "737400", "lr": "0.000265253", "gnorm": "0.758", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "240696"} +[2022-08-03 20:20:48,294][train_inner][INFO] - {"epoch": 15, "update": 14.331, "loss": "2.029", "ppl": "4.08", "wps": "363866", "ups": "3.09", "wpb": "117610", "bsz": "256", "num_updates": "737600", "lr": "0.000265051", "gnorm": "0.762", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "240761"} +[2022-08-03 20:21:53,407][train_inner][INFO] - {"epoch": 15, "update": 14.335, "loss": "2.019", "ppl": "4.05", "wps": "366652", "ups": "3.07", "wpb": "119368", "bsz": "256", "num_updates": "737800", "lr": "0.000264848", "gnorm": "0.753", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "240826"} +[2022-08-03 20:22:59,455][train_inner][INFO] - {"epoch": 15, "update": 14.339, "loss": "2.027", "ppl": "4.08", "wps": "356484", "ups": "3.03", "wpb": "117724", "bsz": "256", "num_updates": "738000", "lr": "0.000264646", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.6", "wall": "240892"} +[2022-08-03 20:24:03,833][train_inner][INFO] - {"epoch": 15, "update": 14.343, "loss": "2.03", "ppl": "4.08", "wps": "366391", "ups": "3.11", "wpb": "117935", "bsz": "256", "num_updates": "738200", "lr": "0.000264444", "gnorm": "0.762", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "240956"} +[2022-08-03 20:25:09,132][train_inner][INFO] - {"epoch": 15, "update": 14.347, "loss": "2.028", "ppl": "4.08", "wps": "363154", "ups": "3.06", "wpb": "118566", "bsz": "256", "num_updates": "738400", "lr": "0.000264242", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "241021"} +[2022-08-03 20:26:14,000][train_inner][INFO] - {"epoch": 15, "update": 14.351, "loss": "2.028", "ppl": "4.08", "wps": "364578", "ups": "3.08", "wpb": "118245", "bsz": "256", "num_updates": "738600", "lr": "0.00026404", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "241086"} +[2022-08-03 20:27:18,931][train_inner][INFO] - {"epoch": 15, "update": 14.355, "loss": "2.028", "ppl": "4.08", "wps": "363692", "ups": "3.08", "wpb": "118074", "bsz": "256", "num_updates": "738800", "lr": "0.000263838", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "241151"} +[2022-08-03 20:28:23,888][train_inner][INFO] - {"epoch": 15, "update": 14.359, "loss": "2.027", "ppl": "4.08", "wps": "365925", "ups": "3.08", "wpb": "118844", "bsz": "256", "num_updates": "739000", "lr": "0.000263636", "gnorm": "0.758", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "241216"} +[2022-08-03 20:29:28,424][train_inner][INFO] - {"epoch": 15, "update": 14.362, "loss": "2.031", "ppl": "4.09", "wps": "363917", "ups": "3.1", "wpb": "117427", "bsz": "255.9", "num_updates": "739200", "lr": "0.000263434", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "241281"} +[2022-08-03 20:30:33,199][train_inner][INFO] - {"epoch": 15, "update": 14.366, "loss": "2.022", "ppl": "4.06", "wps": "363332", "ups": "3.09", "wpb": "117673", "bsz": "256", "num_updates": "739400", "lr": "0.000263232", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "241346"} +[2022-08-03 20:31:38,073][train_inner][INFO] - {"epoch": 15, "update": 14.37, "loss": "2.025", "ppl": "4.07", "wps": "364326", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "739600", "lr": "0.00026303", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "241410"} +[2022-08-03 20:32:42,609][train_inner][INFO] - {"epoch": 15, "update": 14.374, "loss": "2.029", "ppl": "4.08", "wps": "365463", "ups": "3.1", "wpb": "117927", "bsz": "256", "num_updates": "739800", "lr": "0.000262828", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "241475"} +[2022-08-03 20:33:47,723][train_inner][INFO] - {"epoch": 15, "update": 14.378, "loss": "2.028", "ppl": "4.08", "wps": "362480", "ups": "3.07", "wpb": "118010", "bsz": "256", "num_updates": "740000", "lr": "0.000262626", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "241540"} +[2022-08-03 20:34:52,242][train_inner][INFO] - {"epoch": 15, "update": 14.382, "loss": "2.021", "ppl": "4.06", "wps": "366059", "ups": "3.1", "wpb": "118086", "bsz": "256", "num_updates": "740200", "lr": "0.000262424", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "241605"} +[2022-08-03 20:35:57,227][train_inner][INFO] - {"epoch": 15, "update": 14.386, "loss": "2.022", "ppl": "4.06", "wps": "363844", "ups": "3.08", "wpb": "118221", "bsz": "256", "num_updates": "740400", "lr": "0.000262222", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "241670"} +[2022-08-03 20:37:02,308][train_inner][INFO] - {"epoch": 15, "update": 14.39, "loss": "2.027", "ppl": "4.08", "wps": "362732", "ups": "3.07", "wpb": "118033", "bsz": "256", "num_updates": "740600", "lr": "0.00026202", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "241735"} +[2022-08-03 20:38:07,207][train_inner][INFO] - {"epoch": 15, "update": 14.393, "loss": "2.025", "ppl": "4.07", "wps": "365025", "ups": "3.08", "wpb": "118446", "bsz": "256", "num_updates": "740800", "lr": "0.000261818", "gnorm": "0.761", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "241800"} +[2022-08-03 20:39:12,279][train_inner][INFO] - {"epoch": 15, "update": 14.397, "loss": "2.027", "ppl": "4.07", "wps": "363859", "ups": "3.07", "wpb": "118384", "bsz": "256", "num_updates": "741000", "lr": "0.000261616", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "241865"} +[2022-08-03 20:40:02,010][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 20:40:18,589][train_inner][INFO] - {"epoch": 15, "update": 14.401, "loss": "2.019", "ppl": "4.05", "wps": "357447", "ups": "3.02", "wpb": "118510", "bsz": "256", "num_updates": "741200", "lr": "0.000261414", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.2", "wall": "241931"} +[2022-08-03 20:41:23,442][train_inner][INFO] - {"epoch": 15, "update": 14.405, "loss": "2.026", "ppl": "4.07", "wps": "364549", "ups": "3.08", "wpb": "118209", "bsz": "256", "num_updates": "741400", "lr": "0.000261212", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "241996"} +[2022-08-03 20:42:28,468][train_inner][INFO] - {"epoch": 15, "update": 14.409, "loss": "2.021", "ppl": "4.06", "wps": "364204", "ups": "3.08", "wpb": "118413", "bsz": "256", "num_updates": "741600", "lr": "0.00026101", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "242061"} +[2022-08-03 20:43:33,366][train_inner][INFO] - {"epoch": 15, "update": 14.413, "loss": "2.021", "ppl": "4.06", "wps": "365488", "ups": "3.08", "wpb": "118596", "bsz": "256", "num_updates": "741800", "lr": "0.000260808", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "242126"} +[2022-08-03 20:44:37,969][train_inner][INFO] - {"epoch": 15, "update": 14.417, "loss": "2.023", "ppl": "4.06", "wps": "365073", "ups": "3.1", "wpb": "117923", "bsz": "256", "num_updates": "742000", "lr": "0.000260606", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.7", "wall": "242190"} +[2022-08-03 20:45:43,111][train_inner][INFO] - {"epoch": 15, "update": 14.421, "loss": "2.027", "ppl": "4.08", "wps": "362354", "ups": "3.07", "wpb": "118019", "bsz": "256", "num_updates": "742200", "lr": "0.000260404", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "242255"} +[2022-08-03 20:46:47,865][train_inner][INFO] - {"epoch": 15, "update": 14.425, "loss": "2.02", "ppl": "4.06", "wps": "364603", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "742400", "lr": "0.000260202", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "242320"} +[2022-08-03 20:47:52,752][train_inner][INFO] - {"epoch": 15, "update": 14.428, "loss": "2.023", "ppl": "4.06", "wps": "363876", "ups": "3.08", "wpb": "118052", "bsz": "256", "num_updates": "742600", "lr": "0.00026", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "242385"} +[2022-08-03 20:48:57,738][train_inner][INFO] - {"epoch": 15, "update": 14.432, "loss": "2.02", "ppl": "4.06", "wps": "364178", "ups": "3.08", "wpb": "118332", "bsz": "256", "num_updates": "742800", "lr": "0.000259798", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "242450"} +[2022-08-03 20:50:02,635][train_inner][INFO] - {"epoch": 15, "update": 14.436, "loss": "2.025", "ppl": "4.07", "wps": "365010", "ups": "3.08", "wpb": "118437", "bsz": "256", "num_updates": "743000", "lr": "0.000259596", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "242515"} +[2022-08-03 20:51:07,432][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 20:51:08,117][train_inner][INFO] - {"epoch": 15, "update": 14.44, "loss": "2.024", "ppl": "4.07", "wps": "362132", "ups": "3.05", "wpb": "118566", "bsz": "256", "num_updates": "743200", "lr": "0.000259394", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "242580"} +[2022-08-03 20:52:13,145][train_inner][INFO] - {"epoch": 15, "update": 14.444, "loss": "2.021", "ppl": "4.06", "wps": "363013", "ups": "3.08", "wpb": "118029", "bsz": "256", "num_updates": "743400", "lr": "0.000259192", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "242645"} +[2022-08-03 20:53:18,167][train_inner][INFO] - {"epoch": 15, "update": 14.448, "loss": "2.026", "ppl": "4.07", "wps": "364536", "ups": "3.08", "wpb": "118512", "bsz": "256", "num_updates": "743600", "lr": "0.00025899", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "242710"} +[2022-08-03 20:54:23,197][train_inner][INFO] - {"epoch": 15, "update": 14.452, "loss": "2.023", "ppl": "4.06", "wps": "362445", "ups": "3.08", "wpb": "117848", "bsz": "256", "num_updates": "743800", "lr": "0.000258788", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "242776"} +[2022-08-03 20:55:28,290][train_inner][INFO] - {"epoch": 15, "update": 14.456, "loss": "2.019", "ppl": "4.05", "wps": "362895", "ups": "3.07", "wpb": "118108", "bsz": "256", "num_updates": "744000", "lr": "0.000258586", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.8", "wall": "242841"} +[2022-08-03 20:56:28,797][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 20:56:33,053][train_inner][INFO] - {"epoch": 15, "update": 14.46, "loss": "2.026", "ppl": "4.07", "wps": "364401", "ups": "3.09", "wpb": "117997", "bsz": "256", "num_updates": "744200", "lr": "0.000258384", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "242905"} +[2022-08-03 20:57:39,987][train_inner][INFO] - {"epoch": 15, "update": 14.463, "loss": "2.025", "ppl": "4.07", "wps": "354812", "ups": "2.99", "wpb": "118744", "bsz": "256", "num_updates": "744400", "lr": "0.000258182", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "67", "gb_free": "21.1", "wall": "242972"} +[2022-08-03 20:58:45,313][train_inner][INFO] - {"epoch": 15, "update": 14.467, "loss": "2.024", "ppl": "4.07", "wps": "362485", "ups": "3.06", "wpb": "118398", "bsz": "256", "num_updates": "744600", "lr": "0.00025798", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "243038"} +[2022-08-03 20:59:50,150][train_inner][INFO] - {"epoch": 15, "update": 14.471, "loss": "2.026", "ppl": "4.07", "wps": "364672", "ups": "3.08", "wpb": "118220", "bsz": "256", "num_updates": "744800", "lr": "0.000257778", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "243102"} +[2022-08-03 21:00:54,847][train_inner][INFO] - {"epoch": 15, "update": 14.475, "loss": "2.026", "ppl": "4.07", "wps": "367024", "ups": "3.09", "wpb": "118724", "bsz": "256", "num_updates": "745000", "lr": "0.000257576", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "243167"} +[2022-08-03 21:01:59,834][train_inner][INFO] - {"epoch": 15, "update": 14.479, "loss": "2.025", "ppl": "4.07", "wps": "365269", "ups": "3.08", "wpb": "118688", "bsz": "256", "num_updates": "745200", "lr": "0.000257374", "gnorm": "0.764", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "243232"} +[2022-08-03 21:03:04,675][train_inner][INFO] - {"epoch": 15, "update": 14.483, "loss": "2.02", "ppl": "4.06", "wps": "365637", "ups": "3.08", "wpb": "118540", "bsz": "256", "num_updates": "745400", "lr": "0.000257172", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28", "wall": "243297"} +[2022-08-03 21:04:09,455][train_inner][INFO] - {"epoch": 15, "update": 14.487, "loss": "2.028", "ppl": "4.08", "wps": "366851", "ups": "3.09", "wpb": "118821", "bsz": "256", "num_updates": "745600", "lr": "0.00025697", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "243362"} +[2022-08-03 21:05:14,468][train_inner][INFO] - {"epoch": 15, "update": 14.491, "loss": "2.02", "ppl": "4.06", "wps": "362172", "ups": "3.08", "wpb": "117727", "bsz": "256", "num_updates": "745800", "lr": "0.000256768", "gnorm": "0.765", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "243427"} +[2022-08-03 21:06:19,148][train_inner][INFO] - {"epoch": 15, "update": 14.495, "loss": "2.02", "ppl": "4.06", "wps": "366253", "ups": "3.09", "wpb": "118445", "bsz": "256", "num_updates": "746000", "lr": "0.000256566", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "243491"} +[2022-08-03 21:07:24,186][train_inner][INFO] - {"epoch": 15, "update": 14.498, "loss": "2.017", "ppl": "4.05", "wps": "363920", "ups": "3.08", "wpb": "118341", "bsz": "256", "num_updates": "746200", "lr": "0.000256364", "gnorm": "0.764", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "243557"} +[2022-08-03 21:08:28,855][train_inner][INFO] - {"epoch": 15, "update": 14.502, "loss": "2.02", "ppl": "4.05", "wps": "368995", "ups": "3.09", "wpb": "119312", "bsz": "256", "num_updates": "746400", "lr": "0.000256162", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "243621"} +[2022-08-03 21:09:33,184][train_inner][INFO] - {"epoch": 15, "update": 14.506, "loss": "2.019", "ppl": "4.05", "wps": "370628", "ups": "3.11", "wpb": "119209", "bsz": "256", "num_updates": "746600", "lr": "0.00025596", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "243686"} +[2022-08-03 21:10:38,011][train_inner][INFO] - {"epoch": 15, "update": 14.51, "loss": "2.023", "ppl": "4.06", "wps": "363787", "ups": "3.09", "wpb": "117914", "bsz": "255.9", "num_updates": "746800", "lr": "0.000255758", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "243750"} +[2022-08-03 21:11:43,213][train_inner][INFO] - {"epoch": 15, "update": 14.514, "loss": "2.024", "ppl": "4.07", "wps": "362710", "ups": "3.07", "wpb": "118245", "bsz": "256", "num_updates": "747000", "lr": "0.000255556", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "243816"} +[2022-08-03 21:12:48,195][train_inner][INFO] - {"epoch": 15, "update": 14.518, "loss": "2.023", "ppl": "4.06", "wps": "363119", "ups": "3.08", "wpb": "117980", "bsz": "256", "num_updates": "747200", "lr": "0.000255354", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "243881"} +[2022-08-03 21:13:52,536][train_inner][INFO] - {"epoch": 15, "update": 14.522, "loss": "2.024", "ppl": "4.07", "wps": "366872", "ups": "3.11", "wpb": "118022", "bsz": "256", "num_updates": "747400", "lr": "0.000255152", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "243945"} +[2022-08-03 21:14:57,318][train_inner][INFO] - {"epoch": 15, "update": 14.526, "loss": "2.022", "ppl": "4.06", "wps": "365896", "ups": "3.09", "wpb": "118515", "bsz": "256", "num_updates": "747600", "lr": "0.000254949", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "244010"} +[2022-08-03 21:16:01,774][train_inner][INFO] - {"epoch": 15, "update": 14.529, "loss": "2.016", "ppl": "4.04", "wps": "368738", "ups": "3.1", "wpb": "118837", "bsz": "256", "num_updates": "747800", "lr": "0.000254747", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "244074"} +[2022-08-03 21:17:06,671][train_inner][INFO] - {"epoch": 15, "update": 14.533, "loss": "2.02", "ppl": "4.06", "wps": "364780", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "748000", "lr": "0.000254545", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "244139"} +[2022-08-03 21:18:11,589][train_inner][INFO] - {"epoch": 15, "update": 14.537, "loss": "2.02", "ppl": "4.06", "wps": "365138", "ups": "3.08", "wpb": "118518", "bsz": "256", "num_updates": "748200", "lr": "0.000254343", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "244204"} +[2022-08-03 21:18:39,976][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 21:19:16,271][train_inner][INFO] - {"epoch": 15, "update": 14.541, "loss": "2.021", "ppl": "4.06", "wps": "364666", "ups": "3.09", "wpb": "117936", "bsz": "256", "num_updates": "748400", "lr": "0.000254141", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "244269"} +[2022-08-03 21:20:01,989][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 21:20:21,487][train_inner][INFO] - {"epoch": 15, "update": 14.545, "loss": "2.024", "ppl": "4.07", "wps": "363257", "ups": "3.07", "wpb": "118449", "bsz": "256", "num_updates": "748600", "lr": "0.000253939", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "244334"} +[2022-08-03 21:21:27,455][train_inner][INFO] - {"epoch": 15, "update": 14.549, "loss": "2.02", "ppl": "4.06", "wps": "358075", "ups": "3.03", "wpb": "118107", "bsz": "256", "num_updates": "748800", "lr": "0.000253737", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.2", "wall": "244400"} +[2022-08-03 21:22:32,111][train_inner][INFO] - {"epoch": 15, "update": 14.553, "loss": "2.022", "ppl": "4.06", "wps": "365796", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "749000", "lr": "0.000253535", "gnorm": "0.767", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "244464"} +[2022-08-03 21:23:36,973][train_inner][INFO] - {"epoch": 15, "update": 14.557, "loss": "2.018", "ppl": "4.05", "wps": "366842", "ups": "3.08", "wpb": "118970", "bsz": "256", "num_updates": "749200", "lr": "0.000253333", "gnorm": "0.761", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.4", "wall": "244529"} +[2022-08-03 21:24:42,712][train_inner][INFO] - {"epoch": 15, "update": 14.561, "loss": "2.021", "ppl": "4.06", "wps": "356212", "ups": "3.04", "wpb": "117082", "bsz": "256", "num_updates": "749400", "lr": "0.000253131", "gnorm": "0.77", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "244595"} +[2022-08-03 21:25:44,822][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-03 21:25:47,778][train_inner][INFO] - {"epoch": 15, "update": 14.565, "loss": "2.023", "ppl": "4.06", "wps": "362094", "ups": "3.07", "wpb": "117798", "bsz": "256", "num_updates": "749600", "lr": "0.000252929", "gnorm": "0.767", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "244660"} +[2022-08-03 21:26:52,715][train_inner][INFO] - {"epoch": 15, "update": 14.568, "loss": "2.021", "ppl": "4.06", "wps": "365046", "ups": "3.08", "wpb": "118524", "bsz": "256", "num_updates": "749800", "lr": "0.000252727", "gnorm": "0.763", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "244725"} +[2022-08-03 21:27:57,457][train_inner][INFO] - {"epoch": 15, "update": 14.572, "loss": "2.022", "ppl": "4.06", "wps": "363720", "ups": "3.09", "wpb": "117738", "bsz": "256", "num_updates": "750000", "lr": "0.000252525", "gnorm": "0.776", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "244790"} +[2022-08-03 21:27:57,458][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-03 21:28:20,509][valid][INFO] - {"epoch": 15, "valid_loss": "1.931", "valid_ppl": "3.81", "valid_wps": "1.57705e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "750000", "valid_best_loss": "1.931"} +[2022-08-03 21:28:20,512][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 15 @ 750000 updates +[2022-08-03 21:28:20,513][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_15_750000.pt +[2022-08-03 21:28:30,017][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_15_750000.pt +[2022-08-03 21:29:00,142][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_15_750000.pt (epoch 15 @ 750000 updates, score 1.931) (writing took 39.630318000912666 seconds) +[2022-08-03 21:30:05,088][train_inner][INFO] - {"epoch": 15, "update": 14.576, "loss": "2.012", "ppl": "4.03", "wps": "186172", "ups": "1.57", "wpb": "118806", "bsz": "256", "num_updates": "750200", "lr": "0.000252323", "gnorm": "0.761", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "244917"} +[2022-08-03 21:31:10,146][train_inner][INFO] - {"epoch": 15, "update": 14.58, "loss": "2.022", "ppl": "4.06", "wps": "364152", "ups": "3.07", "wpb": "118452", "bsz": "256", "num_updates": "750400", "lr": "0.000252121", "gnorm": "0.765", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "244982"} +[2022-08-03 21:32:14,776][train_inner][INFO] - {"epoch": 15, "update": 14.584, "loss": "2.018", "ppl": "4.05", "wps": "366447", "ups": "3.09", "wpb": "118416", "bsz": "256", "num_updates": "750600", "lr": "0.000251919", "gnorm": "0.765", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "245047"} +[2022-08-03 21:33:19,535][train_inner][INFO] - {"epoch": 15, "update": 14.588, "loss": "2.025", "ppl": "4.07", "wps": "363122", "ups": "3.09", "wpb": "117577", "bsz": "256", "num_updates": "750800", "lr": "0.000251717", "gnorm": "0.771", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "245112"} +[2022-08-03 21:34:24,634][train_inner][INFO] - {"epoch": 15, "update": 14.592, "loss": "2.017", "ppl": "4.05", "wps": "363405", "ups": "3.07", "wpb": "118283", "bsz": "256", "num_updates": "751000", "lr": "0.000251515", "gnorm": "0.766", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "245177"} +[2022-08-03 21:35:29,749][train_inner][INFO] - {"epoch": 15, "update": 14.596, "loss": "2.027", "ppl": "4.08", "wps": "361707", "ups": "3.07", "wpb": "117762", "bsz": "256", "num_updates": "751200", "lr": "0.000251313", "gnorm": "0.773", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "245242"} +[2022-08-03 21:36:34,612][train_inner][INFO] - {"epoch": 15, "update": 14.599, "loss": "2.015", "ppl": "4.04", "wps": "365988", "ups": "3.08", "wpb": "118694", "bsz": "256", "num_updates": "751400", "lr": "0.000251111", "gnorm": "0.762", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "245307"} +[2022-08-03 21:37:39,563][train_inner][INFO] - {"epoch": 15, "update": 14.603, "loss": "2.015", "ppl": "4.04", "wps": "364336", "ups": "3.08", "wpb": "118316", "bsz": "256", "num_updates": "751600", "lr": "0.000250909", "gnorm": "0.768", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "245372"} +[2022-08-03 21:38:44,171][train_inner][INFO] - {"epoch": 15, "update": 14.607, "loss": "2.02", "ppl": "4.06", "wps": "365394", "ups": "3.1", "wpb": "118035", "bsz": "256", "num_updates": "751800", "lr": "0.000250707", "gnorm": "0.767", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "245436"} +[2022-08-03 21:39:49,225][train_inner][INFO] - {"epoch": 15, "update": 14.611, "loss": "2.022", "ppl": "4.06", "wps": "364404", "ups": "3.07", "wpb": "118527", "bsz": "256", "num_updates": "752000", "lr": "0.000250505", "gnorm": "0.764", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "245502"} +[2022-08-03 21:40:55,510][train_inner][INFO] - {"epoch": 15, "update": 14.615, "loss": "2.022", "ppl": "4.06", "wps": "355907", "ups": "3.02", "wpb": "117955", "bsz": "256", "num_updates": "752200", "lr": "0.000250303", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "22.4", "wall": "245568"} +[2022-08-03 21:41:59,915][train_inner][INFO] - {"epoch": 15, "update": 14.619, "loss": "2.021", "ppl": "4.06", "wps": "365378", "ups": "3.11", "wpb": "117660", "bsz": "256", "num_updates": "752400", "lr": "0.000250101", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "245632"} +[2022-08-03 21:43:04,782][train_inner][INFO] - {"epoch": 15, "update": 14.623, "loss": "2.023", "ppl": "4.06", "wps": "364548", "ups": "3.08", "wpb": "118233", "bsz": "256", "num_updates": "752600", "lr": "0.000249899", "gnorm": "0.768", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "245697"} +[2022-08-03 21:44:09,509][train_inner][INFO] - {"epoch": 15, "update": 14.627, "loss": "2.02", "ppl": "4.05", "wps": "363936", "ups": "3.09", "wpb": "117780", "bsz": "256", "num_updates": "752800", "lr": "0.000249697", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "245762"} +[2022-08-03 21:45:13,793][train_inner][INFO] - {"epoch": 15, "update": 14.631, "loss": "2.021", "ppl": "4.06", "wps": "367190", "ups": "3.11", "wpb": "118021", "bsz": "256", "num_updates": "753000", "lr": "0.000249495", "gnorm": "0.77", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "245826"} +[2022-08-03 21:46:18,373][train_inner][INFO] - {"epoch": 15, "update": 14.634, "loss": "2.014", "ppl": "4.04", "wps": "366450", "ups": "3.1", "wpb": "118326", "bsz": "256", "num_updates": "753200", "lr": "0.000249293", "gnorm": "0.77", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "245891"} +[2022-08-03 21:47:23,224][train_inner][INFO] - {"epoch": 15, "update": 14.638, "loss": "2.017", "ppl": "4.05", "wps": "364436", "ups": "3.08", "wpb": "118167", "bsz": "256", "num_updates": "753400", "lr": "0.000249091", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "245956"} +[2022-08-03 21:48:28,302][train_inner][INFO] - {"epoch": 15, "update": 14.642, "loss": "2.015", "ppl": "4.04", "wps": "365933", "ups": "3.07", "wpb": "119069", "bsz": "256", "num_updates": "753600", "lr": "0.000248889", "gnorm": "0.763", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "246021"} +[2022-08-03 21:49:33,073][train_inner][INFO] - {"epoch": 15, "update": 14.646, "loss": "2.02", "ppl": "4.06", "wps": "365108", "ups": "3.09", "wpb": "118240", "bsz": "256", "num_updates": "753800", "lr": "0.000248687", "gnorm": "0.766", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.4", "wall": "246085"} +[2022-08-03 21:50:37,481][train_inner][INFO] - {"epoch": 15, "update": 14.65, "loss": "2.019", "ppl": "4.05", "wps": "368606", "ups": "3.11", "wpb": "118704", "bsz": "256", "num_updates": "754000", "lr": "0.000248485", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "246150"} +[2022-08-03 21:51:41,912][train_inner][INFO] - {"epoch": 15, "update": 14.654, "loss": "2.023", "ppl": "4.06", "wps": "366492", "ups": "3.1", "wpb": "118064", "bsz": "256", "num_updates": "754200", "lr": "0.000248283", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "246214"} +[2022-08-03 21:52:46,909][train_inner][INFO] - {"epoch": 15, "update": 14.658, "loss": "2.025", "ppl": "4.07", "wps": "364586", "ups": "3.08", "wpb": "118484", "bsz": "256", "num_updates": "754400", "lr": "0.000248081", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "246279"} +[2022-08-03 21:53:52,142][train_inner][INFO] - {"epoch": 15, "update": 14.662, "loss": "2.013", "ppl": "4.04", "wps": "363461", "ups": "3.07", "wpb": "118546", "bsz": "256", "num_updates": "754600", "lr": "0.000247879", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.5", "wall": "246344"} +[2022-08-03 21:54:57,293][train_inner][INFO] - {"epoch": 15, "update": 14.665, "loss": "2.015", "ppl": "4.04", "wps": "363564", "ups": "3.07", "wpb": "118431", "bsz": "256", "num_updates": "754800", "lr": "0.000247677", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "246410"} +[2022-08-03 21:56:02,127][train_inner][INFO] - {"epoch": 15, "update": 14.669, "loss": "2.017", "ppl": "4.05", "wps": "365478", "ups": "3.08", "wpb": "118475", "bsz": "256", "num_updates": "755000", "lr": "0.000247475", "gnorm": "0.766", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "246474"} +[2022-08-03 21:57:06,754][train_inner][INFO] - {"epoch": 15, "update": 14.673, "loss": "2.021", "ppl": "4.06", "wps": "366079", "ups": "3.09", "wpb": "118291", "bsz": "256", "num_updates": "755200", "lr": "0.000247273", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "246539"} +[2022-08-03 21:58:11,792][train_inner][INFO] - {"epoch": 15, "update": 14.677, "loss": "2.013", "ppl": "4.04", "wps": "365393", "ups": "3.08", "wpb": "118822", "bsz": "256", "num_updates": "755400", "lr": "0.000247071", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "246604"} +[2022-08-03 21:59:16,392][train_inner][INFO] - {"epoch": 15, "update": 14.681, "loss": "2.02", "ppl": "4.06", "wps": "365588", "ups": "3.1", "wpb": "118082", "bsz": "256", "num_updates": "755600", "lr": "0.000246869", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "246669"} +[2022-08-03 22:00:02,029][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 22:00:21,496][train_inner][INFO] - {"epoch": 15, "update": 14.685, "loss": "2.021", "ppl": "4.06", "wps": "362941", "ups": "3.07", "wpb": "118143", "bsz": "256", "num_updates": "755800", "lr": "0.000246667", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "246734"} +[2022-08-03 22:01:26,751][train_inner][INFO] - {"epoch": 15, "update": 14.689, "loss": "2.013", "ppl": "4.04", "wps": "363176", "ups": "3.06", "wpb": "118494", "bsz": "256", "num_updates": "756000", "lr": "0.000246465", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "246799"} +[2022-08-03 22:02:31,928][train_inner][INFO] - {"epoch": 15, "update": 14.693, "loss": "2.019", "ppl": "4.05", "wps": "363794", "ups": "3.07", "wpb": "118553", "bsz": "256", "num_updates": "756200", "lr": "0.000246263", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "246864"} +[2022-08-03 22:03:36,681][train_inner][INFO] - {"epoch": 15, "update": 14.697, "loss": "2.011", "ppl": "4.03", "wps": "367942", "ups": "3.09", "wpb": "119126", "bsz": "256", "num_updates": "756400", "lr": "0.000246061", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "246929"} +[2022-08-03 22:04:41,823][train_inner][INFO] - {"epoch": 15, "update": 14.7, "loss": "2.019", "ppl": "4.05", "wps": "364081", "ups": "3.07", "wpb": "118583", "bsz": "256", "num_updates": "756600", "lr": "0.000245859", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "246994"} +[2022-08-03 22:05:46,812][train_inner][INFO] - {"epoch": 15, "update": 14.704, "loss": "2.016", "ppl": "4.04", "wps": "365762", "ups": "3.08", "wpb": "118850", "bsz": "256", "num_updates": "756800", "lr": "0.000245657", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "247059"} +[2022-08-03 22:06:51,913][train_inner][INFO] - {"epoch": 15, "update": 14.708, "loss": "2.014", "ppl": "4.04", "wps": "364413", "ups": "3.07", "wpb": "118617", "bsz": "256", "num_updates": "757000", "lr": "0.000245455", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "247124"} +[2022-08-03 22:07:56,613][train_inner][INFO] - {"epoch": 15, "update": 14.712, "loss": "2.017", "ppl": "4.05", "wps": "364970", "ups": "3.09", "wpb": "118066", "bsz": "256", "num_updates": "757200", "lr": "0.000245253", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "247189"} +[2022-08-03 22:09:01,271][train_inner][INFO] - {"epoch": 15, "update": 14.716, "loss": "2.019", "ppl": "4.05", "wps": "365785", "ups": "3.09", "wpb": "118252", "bsz": "256", "num_updates": "757400", "lr": "0.000245051", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "247254"} +[2022-08-03 22:10:06,142][train_inner][INFO] - {"epoch": 15, "update": 14.72, "loss": "2.007", "ppl": "4.02", "wps": "365210", "ups": "3.08", "wpb": "118455", "bsz": "256", "num_updates": "757600", "lr": "0.000244848", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "247318"} +[2022-08-03 22:10:24,599][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 22:11:11,332][train_inner][INFO] - {"epoch": 15, "update": 14.724, "loss": "2.017", "ppl": "4.05", "wps": "363358", "ups": "3.07", "wpb": "118436", "bsz": "256", "num_updates": "757800", "lr": "0.000244646", "gnorm": "0.768", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "247384"} +[2022-08-03 22:12:16,229][train_inner][INFO] - {"epoch": 15, "update": 14.728, "loss": "2.017", "ppl": "4.05", "wps": "363539", "ups": "3.08", "wpb": "117961", "bsz": "256", "num_updates": "758000", "lr": "0.000244444", "gnorm": "0.771", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "247449"} +[2022-08-03 22:13:21,125][train_inner][INFO] - {"epoch": 15, "update": 14.732, "loss": "2.017", "ppl": "4.05", "wps": "366290", "ups": "3.08", "wpb": "118853", "bsz": "256", "num_updates": "758200", "lr": "0.000244242", "gnorm": "0.771", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "247513"} +[2022-08-03 22:14:25,899][train_inner][INFO] - {"epoch": 15, "update": 14.735, "loss": "2.024", "ppl": "4.07", "wps": "365312", "ups": "3.09", "wpb": "118312", "bsz": "256", "num_updates": "758400", "lr": "0.00024404", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "247578"} +[2022-08-03 22:15:30,881][train_inner][INFO] - {"epoch": 15, "update": 14.739, "loss": "2.013", "ppl": "4.04", "wps": "363805", "ups": "3.08", "wpb": "118201", "bsz": "256", "num_updates": "758600", "lr": "0.000243838", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "247643"} +[2022-08-03 22:16:35,501][train_inner][INFO] - {"epoch": 15, "update": 14.743, "loss": "2.019", "ppl": "4.05", "wps": "366801", "ups": "3.1", "wpb": "118512", "bsz": "256", "num_updates": "758800", "lr": "0.000243636", "gnorm": "0.769", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "247708"} +[2022-08-03 22:17:40,382][train_inner][INFO] - {"epoch": 15, "update": 14.747, "loss": "2.016", "ppl": "4.05", "wps": "366106", "ups": "3.08", "wpb": "118765", "bsz": "256", "num_updates": "759000", "lr": "0.000243434", "gnorm": "0.767", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "247773"} +[2022-08-03 22:18:44,892][train_inner][INFO] - {"epoch": 15, "update": 14.751, "loss": "2.018", "ppl": "4.05", "wps": "364156", "ups": "3.1", "wpb": "117456", "bsz": "256", "num_updates": "759200", "lr": "0.000243232", "gnorm": "0.771", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "247837"} +[2022-08-03 22:19:49,778][train_inner][INFO] - {"epoch": 15, "update": 14.755, "loss": "2.015", "ppl": "4.04", "wps": "363510", "ups": "3.08", "wpb": "117933", "bsz": "256", "num_updates": "759400", "lr": "0.00024303", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.2", "wall": "247902"} +[2022-08-03 22:20:54,838][train_inner][INFO] - {"epoch": 15, "update": 14.759, "loss": "2.013", "ppl": "4.04", "wps": "363849", "ups": "3.07", "wpb": "118358", "bsz": "256", "num_updates": "759600", "lr": "0.000242828", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.5", "wall": "247967"} +[2022-08-03 22:21:59,664][train_inner][INFO] - {"epoch": 15, "update": 14.763, "loss": "2.012", "ppl": "4.03", "wps": "366000", "ups": "3.09", "wpb": "118630", "bsz": "256", "num_updates": "759800", "lr": "0.000242626", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.9", "wall": "248032"} +[2022-08-03 22:23:04,503][train_inner][INFO] - {"epoch": 15, "update": 14.767, "loss": "2.02", "ppl": "4.06", "wps": "363188", "ups": "3.08", "wpb": "117742", "bsz": "256", "num_updates": "760000", "lr": "0.000242424", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "248097"} +[2022-08-03 22:24:09,202][train_inner][INFO] - {"epoch": 15, "update": 14.77, "loss": "2.014", "ppl": "4.04", "wps": "367571", "ups": "3.09", "wpb": "118904", "bsz": "256", "num_updates": "760200", "lr": "0.000242222", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "248162"} +[2022-08-03 22:25:14,180][train_inner][INFO] - {"epoch": 15, "update": 14.774, "loss": "2.014", "ppl": "4.04", "wps": "365880", "ups": "3.08", "wpb": "118869", "bsz": "256", "num_updates": "760400", "lr": "0.00024202", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "248227"} +[2022-08-03 22:26:19,077][train_inner][INFO] - {"epoch": 15, "update": 14.778, "loss": "2.013", "ppl": "4.04", "wps": "363588", "ups": "3.08", "wpb": "117978", "bsz": "256", "num_updates": "760600", "lr": "0.000241818", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "248291"} +[2022-08-03 22:27:23,984][train_inner][INFO] - {"epoch": 15, "update": 14.782, "loss": "2.019", "ppl": "4.05", "wps": "363695", "ups": "3.08", "wpb": "118030", "bsz": "256", "num_updates": "760800", "lr": "0.000241616", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "248356"} +[2022-08-03 22:28:29,347][train_inner][INFO] - {"epoch": 15, "update": 14.786, "loss": "2.02", "ppl": "4.06", "wps": "361715", "ups": "3.06", "wpb": "118212", "bsz": "256", "num_updates": "761000", "lr": "0.000241414", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "248422"} +[2022-08-03 22:29:34,554][train_inner][INFO] - {"epoch": 15, "update": 14.79, "loss": "2.011", "ppl": "4.03", "wps": "364392", "ups": "3.07", "wpb": "118803", "bsz": "256", "num_updates": "761200", "lr": "0.000241212", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "248487"} +[2022-08-03 22:30:39,295][train_inner][INFO] - {"epoch": 15, "update": 14.794, "loss": "2.014", "ppl": "4.04", "wps": "366897", "ups": "3.09", "wpb": "118766", "bsz": "256", "num_updates": "761400", "lr": "0.00024101", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "248552"} +[2022-08-03 22:31:44,075][train_inner][INFO] - {"epoch": 15, "update": 14.798, "loss": "2.01", "ppl": "4.03", "wps": "365296", "ups": "3.09", "wpb": "118317", "bsz": "256", "num_updates": "761600", "lr": "0.000240808", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "248616"} +[2022-08-03 22:32:36,836][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 22:32:49,682][train_inner][INFO] - {"epoch": 15, "update": 14.802, "loss": "2.007", "ppl": "4.02", "wps": "361693", "ups": "3.05", "wpb": "118646", "bsz": "256", "num_updates": "761800", "lr": "0.000240606", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "248682"} +[2022-08-03 22:33:54,314][train_inner][INFO] - {"epoch": 15, "update": 14.805, "loss": "2.007", "ppl": "4.02", "wps": "364935", "ups": "3.09", "wpb": "117931", "bsz": "256", "num_updates": "762000", "lr": "0.000240404", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "248747"} +[2022-08-03 22:34:58,703][train_inner][INFO] - {"epoch": 15, "update": 14.809, "loss": "2.022", "ppl": "4.06", "wps": "366890", "ups": "3.11", "wpb": "118117", "bsz": "256", "num_updates": "762200", "lr": "0.000240202", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28", "wall": "248811"} +[2022-08-03 22:36:03,483][train_inner][INFO] - {"epoch": 15, "update": 14.813, "loss": "2.014", "ppl": "4.04", "wps": "366610", "ups": "3.09", "wpb": "118744", "bsz": "256", "num_updates": "762400", "lr": "0.00024", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "248876"} +[2022-08-03 22:37:08,673][train_inner][INFO] - {"epoch": 15, "update": 14.817, "loss": "2.01", "ppl": "4.03", "wps": "362478", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "762600", "lr": "0.000239798", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "248941"} +[2022-08-03 22:38:13,647][train_inner][INFO] - {"epoch": 15, "update": 14.821, "loss": "2.011", "ppl": "4.03", "wps": "365640", "ups": "3.08", "wpb": "118784", "bsz": "256", "num_updates": "762800", "lr": "0.000239596", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "249006"} +[2022-08-03 22:39:18,939][train_inner][INFO] - {"epoch": 15, "update": 14.825, "loss": "2.008", "ppl": "4.02", "wps": "361239", "ups": "3.06", "wpb": "117928", "bsz": "256", "num_updates": "763000", "lr": "0.000239394", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "249071"} +[2022-08-03 22:40:23,624][train_inner][INFO] - {"epoch": 15, "update": 14.829, "loss": "2.01", "ppl": "4.03", "wps": "366894", "ups": "3.09", "wpb": "118661", "bsz": "256", "num_updates": "763200", "lr": "0.000239192", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "249136"} +[2022-08-03 22:41:15,600][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 22:41:28,568][train_inner][INFO] - {"epoch": 15, "update": 14.833, "loss": "2.014", "ppl": "4.04", "wps": "364207", "ups": "3.08", "wpb": "118264", "bsz": "256", "num_updates": "763400", "lr": "0.00023899", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "249201"} +[2022-08-03 22:42:33,370][train_inner][INFO] - {"epoch": 15, "update": 14.836, "loss": "2.012", "ppl": "4.03", "wps": "363074", "ups": "3.09", "wpb": "117638", "bsz": "256", "num_updates": "763600", "lr": "0.000238788", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.6", "wall": "249266"} +[2022-08-03 22:43:38,921][train_inner][INFO] - {"epoch": 15, "update": 14.84, "loss": "2.011", "ppl": "4.03", "wps": "360176", "ups": "3.05", "wpb": "118048", "bsz": "256", "num_updates": "763800", "lr": "0.000238586", "gnorm": "0.773", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "249331"} +[2022-08-03 22:44:43,696][train_inner][INFO] - {"epoch": 15, "update": 14.844, "loss": "2.01", "ppl": "4.03", "wps": "364197", "ups": "3.09", "wpb": "117952", "bsz": "256", "num_updates": "764000", "lr": "0.000238384", "gnorm": "0.774", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "249396"} +[2022-08-03 22:45:48,436][train_inner][INFO] - {"epoch": 15, "update": 14.848, "loss": "2.022", "ppl": "4.06", "wps": "363457", "ups": "3.09", "wpb": "117650", "bsz": "256", "num_updates": "764200", "lr": "0.000238182", "gnorm": "0.776", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "249461"} +[2022-08-03 22:46:53,302][train_inner][INFO] - {"epoch": 15, "update": 14.852, "loss": "2.012", "ppl": "4.03", "wps": "364162", "ups": "3.08", "wpb": "118106", "bsz": "256", "num_updates": "764400", "lr": "0.00023798", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "249526"} +[2022-08-03 22:47:57,880][train_inner][INFO] - {"epoch": 15, "update": 14.856, "loss": "2.01", "ppl": "4.03", "wps": "366524", "ups": "3.1", "wpb": "118346", "bsz": "256", "num_updates": "764600", "lr": "0.000237778", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "249590"} +[2022-08-03 22:49:02,548][train_inner][INFO] - {"epoch": 15, "update": 14.86, "loss": "2.012", "ppl": "4.03", "wps": "364780", "ups": "3.09", "wpb": "117945", "bsz": "256", "num_updates": "764800", "lr": "0.000237576", "gnorm": "0.773", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "249655"} +[2022-08-03 22:50:07,235][train_inner][INFO] - {"epoch": 15, "update": 14.864, "loss": "2.01", "ppl": "4.03", "wps": "365011", "ups": "3.09", "wpb": "118055", "bsz": "256", "num_updates": "765000", "lr": "0.000237374", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "249720"} +[2022-08-03 22:51:12,507][train_inner][INFO] - {"epoch": 15, "update": 14.868, "loss": "2.012", "ppl": "4.03", "wps": "361354", "ups": "3.06", "wpb": "117931", "bsz": "256", "num_updates": "765200", "lr": "0.000237172", "gnorm": "0.774", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "249785"} +[2022-08-03 22:52:17,405][train_inner][INFO] - {"epoch": 15, "update": 14.871, "loss": "2.015", "ppl": "4.04", "wps": "363822", "ups": "3.08", "wpb": "118055", "bsz": "256", "num_updates": "765400", "lr": "0.00023697", "gnorm": "0.777", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "249850"} +[2022-08-03 22:53:22,708][train_inner][INFO] - {"epoch": 15, "update": 14.875, "loss": "2.008", "ppl": "4.02", "wps": "362599", "ups": "3.06", "wpb": "118392", "bsz": "256", "num_updates": "765600", "lr": "0.000236768", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "249915"} +[2022-08-03 22:54:27,624][train_inner][INFO] - {"epoch": 15, "update": 14.879, "loss": "2.014", "ppl": "4.04", "wps": "364034", "ups": "3.08", "wpb": "118156", "bsz": "256", "num_updates": "765800", "lr": "0.000236566", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "249980"} +[2022-08-03 22:55:32,723][train_inner][INFO] - {"epoch": 15, "update": 14.883, "loss": "2.011", "ppl": "4.03", "wps": "363517", "ups": "3.07", "wpb": "118321", "bsz": "256", "num_updates": "766000", "lr": "0.000236364", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "250045"} +[2022-08-03 22:56:37,763][train_inner][INFO] - {"epoch": 15, "update": 14.887, "loss": "2.007", "ppl": "4.02", "wps": "364219", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "766200", "lr": "0.000236162", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "250110"} +[2022-08-03 22:57:42,816][train_inner][INFO] - {"epoch": 15, "update": 14.891, "loss": "2.007", "ppl": "4.02", "wps": "365598", "ups": "3.07", "wpb": "118916", "bsz": "256", "num_updates": "766400", "lr": "0.00023596", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "250175"} +[2022-08-03 22:58:47,382][train_inner][INFO] - {"epoch": 15, "update": 14.895, "loss": "2.011", "ppl": "4.03", "wps": "365186", "ups": "3.1", "wpb": "117891", "bsz": "256", "num_updates": "766600", "lr": "0.000235758", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "250240"} +[2022-08-03 22:59:52,297][train_inner][INFO] - {"epoch": 15, "update": 14.899, "loss": "2.01", "ppl": "4.03", "wps": "363957", "ups": "3.08", "wpb": "118130", "bsz": "256", "num_updates": "766800", "lr": "0.000235556", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "250305"} +[2022-08-03 23:00:57,476][train_inner][INFO] - {"epoch": 15, "update": 14.903, "loss": "2.013", "ppl": "4.04", "wps": "363595", "ups": "3.07", "wpb": "118492", "bsz": "256", "num_updates": "767000", "lr": "0.000235354", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "250370"} +[2022-08-03 23:02:02,099][train_inner][INFO] - {"epoch": 15, "update": 14.906, "loss": "2.014", "ppl": "4.04", "wps": "367754", "ups": "3.09", "wpb": "118824", "bsz": "256", "num_updates": "767200", "lr": "0.000235152", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.5", "wall": "250434"} +[2022-08-03 23:03:07,099][train_inner][INFO] - {"epoch": 15, "update": 14.91, "loss": "2.008", "ppl": "4.02", "wps": "361779", "ups": "3.08", "wpb": "117578", "bsz": "256", "num_updates": "767400", "lr": "0.000234949", "gnorm": "0.78", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "250499"} +[2022-08-03 23:03:25,966][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 23:04:12,329][train_inner][INFO] - {"epoch": 15, "update": 14.914, "loss": "2.008", "ppl": "4.02", "wps": "362983", "ups": "3.07", "wpb": "118385", "bsz": "256", "num_updates": "767600", "lr": "0.000234747", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "250565"} +[2022-08-03 23:05:17,194][train_inner][INFO] - {"epoch": 15, "update": 14.918, "loss": "2.008", "ppl": "4.02", "wps": "364108", "ups": "3.08", "wpb": "118088", "bsz": "256", "num_updates": "767800", "lr": "0.000234545", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "250630"} +[2022-08-03 23:06:23,148][train_inner][INFO] - {"epoch": 15, "update": 14.922, "loss": "2.012", "ppl": "4.03", "wps": "359223", "ups": "3.03", "wpb": "118461", "bsz": "256", "num_updates": "768000", "lr": "0.000234343", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20", "wall": "250695"} +[2022-08-03 23:07:27,763][train_inner][INFO] - {"epoch": 15, "update": 14.926, "loss": "2.009", "ppl": "4.03", "wps": "364474", "ups": "3.1", "wpb": "117750", "bsz": "256", "num_updates": "768200", "lr": "0.000234141", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "250760"} +[2022-08-03 23:08:32,768][train_inner][INFO] - {"epoch": 15, "update": 14.93, "loss": "2.008", "ppl": "4.02", "wps": "363936", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "768400", "lr": "0.000233939", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "250825"} +[2022-08-03 23:09:39,226][train_inner][INFO] - {"epoch": 15, "update": 14.934, "loss": "2.006", "ppl": "4.02", "wps": "356199", "ups": "3.01", "wpb": "118361", "bsz": "256", "num_updates": "768600", "lr": "0.000233737", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.8", "wall": "250892"} +[2022-08-03 23:10:44,213][train_inner][INFO] - {"epoch": 15, "update": 14.938, "loss": "2.007", "ppl": "4.02", "wps": "363858", "ups": "3.08", "wpb": "118228", "bsz": "256", "num_updates": "768800", "lr": "0.000233535", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "250957"} +[2022-08-03 23:11:48,964][train_inner][INFO] - {"epoch": 15, "update": 14.941, "loss": "2.013", "ppl": "4.03", "wps": "364358", "ups": "3.09", "wpb": "117960", "bsz": "256", "num_updates": "769000", "lr": "0.000233333", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "251021"} +[2022-08-03 23:12:53,687][train_inner][INFO] - {"epoch": 15, "update": 14.945, "loss": "2.009", "ppl": "4.03", "wps": "364038", "ups": "3.09", "wpb": "117808", "bsz": "256", "num_updates": "769200", "lr": "0.000233131", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "251086"} +[2022-08-03 23:13:58,693][train_inner][INFO] - {"epoch": 15, "update": 14.949, "loss": "2.008", "ppl": "4.02", "wps": "365124", "ups": "3.08", "wpb": "118673", "bsz": "256", "num_updates": "769400", "lr": "0.000232929", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "251151"} +[2022-08-03 23:14:33,350][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 23:15:03,906][train_inner][INFO] - {"epoch": 15, "update": 14.953, "loss": "2.013", "ppl": "4.04", "wps": "361566", "ups": "3.07", "wpb": "117892", "bsz": "256", "num_updates": "769600", "lr": "0.000232727", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "251216"} +[2022-08-03 23:16:08,560][train_inner][INFO] - {"epoch": 15, "update": 14.957, "loss": "2.011", "ppl": "4.03", "wps": "364766", "ups": "3.09", "wpb": "117918", "bsz": "256", "num_updates": "769800", "lr": "0.000232525", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "251281"} +[2022-08-03 23:17:13,453][train_inner][INFO] - {"epoch": 15, "update": 14.961, "loss": "2.016", "ppl": "4.04", "wps": "363388", "ups": "3.08", "wpb": "117905", "bsz": "256", "num_updates": "770000", "lr": "0.000232323", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "251346"} +[2022-08-03 23:18:18,426][train_inner][INFO] - {"epoch": 15, "update": 14.965, "loss": "2.011", "ppl": "4.03", "wps": "365661", "ups": "3.08", "wpb": "118788", "bsz": "256", "num_updates": "770200", "lr": "0.000232121", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "251411"} +[2022-08-03 23:19:23,521][train_inner][INFO] - {"epoch": 15, "update": 14.969, "loss": "2.005", "ppl": "4.01", "wps": "362729", "ups": "3.07", "wpb": "118056", "bsz": "256", "num_updates": "770400", "lr": "0.000231919", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "251476"} +[2022-08-03 23:20:28,877][train_inner][INFO] - {"epoch": 15, "update": 14.972, "loss": "2.008", "ppl": "4.02", "wps": "362447", "ups": "3.06", "wpb": "118439", "bsz": "256", "num_updates": "770600", "lr": "0.000231717", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "251541"} +[2022-08-03 23:21:33,325][train_inner][INFO] - {"epoch": 15, "update": 14.976, "loss": "2.01", "ppl": "4.03", "wps": "366064", "ups": "3.1", "wpb": "117959", "bsz": "256", "num_updates": "770800", "lr": "0.000231515", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "251606"} +[2022-08-03 23:22:38,123][train_inner][INFO] - {"epoch": 15, "update": 14.98, "loss": "2.009", "ppl": "4.02", "wps": "364852", "ups": "3.09", "wpb": "118208", "bsz": "256", "num_updates": "771000", "lr": "0.000231313", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "251670"} +[2022-08-03 23:23:43,097][train_inner][INFO] - {"epoch": 15, "update": 14.984, "loss": "2.011", "ppl": "4.03", "wps": "364895", "ups": "3.08", "wpb": "118540", "bsz": "256", "num_updates": "771200", "lr": "0.000231111", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "251735"} +[2022-08-03 23:24:47,787][train_inner][INFO] - {"epoch": 15, "update": 14.988, "loss": "2.006", "ppl": "4.02", "wps": "366771", "ups": "3.09", "wpb": "118631", "bsz": "256", "num_updates": "771400", "lr": "0.000230909", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "251800"} +[2022-08-03 23:25:39,756][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 23:25:53,005][train_inner][INFO] - {"epoch": 15, "update": 14.992, "loss": "2.005", "ppl": "4.01", "wps": "363598", "ups": "3.07", "wpb": "118564", "bsz": "256", "num_updates": "771600", "lr": "0.000230707", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "251865"} +[2022-08-03 23:26:58,170][train_inner][INFO] - {"epoch": 15, "update": 14.996, "loss": "2.004", "ppl": "4.01", "wps": "365250", "ups": "3.07", "wpb": "119005", "bsz": "256", "num_updates": "771800", "lr": "0.000230505", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "251930"} +[2022-08-03 23:28:03,144][train_inner][INFO] - {"epoch": 15, "update": 15.0, "loss": "2.014", "ppl": "4.04", "wps": "363639", "ups": "3.08", "wpb": "118134", "bsz": "256", "num_updates": "772000", "lr": "0.000230303", "gnorm": "0.78", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "251995"} +[2022-08-03 23:28:08,370][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-03 23:28:31,198][valid][INFO] - {"epoch": 15, "valid_loss": "1.918", "valid_ppl": "3.78", "valid_wps": "1.59017e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "772016", "valid_best_loss": "1.918"} +[2022-08-03 23:28:31,201][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 15 @ 772016 updates +[2022-08-03 23:28:31,202][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt +[2022-08-03 23:28:42,148][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt +[2022-08-03 23:29:03,902][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_best.pt (epoch 15 @ 772016 updates, score 1.918) (writing took 32.700798738747835 seconds) +[2022-08-03 23:29:03,902][fairseq_cli.train][INFO] - end of epoch 15 (average epoch stats below) +[2022-08-03 23:29:03,903][train][INFO] - {"epoch": 15, "train_loss": "2.022", "train_ppl": "4.06", "train_wps": "361324", "train_ups": "3.05", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "772016", "train_lr": "0.000230287", "train_gnorm": "0.764", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16621", "train_gb_free": "21.3", "train_wall": "252056"} +[2022-08-03 23:29:04,013][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 +[2022-08-03 23:29:04,016][fairseq.trainer][INFO] - begin training epoch 16 +[2022-08-03 23:29:04,017][fairseq_cli.train][INFO] - Start iterating over samples +[2022-08-03 23:30:14,218][train_inner][INFO] - {"epoch": 16, "update": 15.004, "loss": "2.006", "ppl": "4.02", "wps": "180758", "ups": "1.53", "wpb": "118463", "bsz": "255.4", "num_updates": "772200", "lr": "0.000230101", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "252127"} +[2022-08-03 23:31:18,849][train_inner][INFO] - {"epoch": 16, "update": 15.007, "loss": "2.01", "ppl": "4.03", "wps": "365355", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "772400", "lr": "0.000229899", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "252191"} +[2022-08-03 23:32:23,532][train_inner][INFO] - {"epoch": 16, "update": 15.011, "loss": "2.004", "ppl": "4.01", "wps": "365173", "ups": "3.09", "wpb": "118100", "bsz": "256", "num_updates": "772600", "lr": "0.000229697", "gnorm": "0.783", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "252256"} +[2022-08-03 23:33:28,666][train_inner][INFO] - {"epoch": 16, "update": 15.015, "loss": "2.002", "ppl": "4", "wps": "360684", "ups": "3.07", "wpb": "117463", "bsz": "256", "num_updates": "772800", "lr": "0.000229495", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "252321"} +[2022-08-03 23:34:33,300][train_inner][INFO] - {"epoch": 16, "update": 15.019, "loss": "2.004", "ppl": "4.01", "wps": "365663", "ups": "3.09", "wpb": "118168", "bsz": "256", "num_updates": "773000", "lr": "0.000229293", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "252386"} +[2022-08-03 23:35:38,245][train_inner][INFO] - {"epoch": 16, "update": 15.023, "loss": "2.011", "ppl": "4.03", "wps": "364042", "ups": "3.08", "wpb": "118212", "bsz": "256", "num_updates": "773200", "lr": "0.000229091", "gnorm": "0.78", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "252451"} +[2022-08-03 23:36:43,089][train_inner][INFO] - {"epoch": 16, "update": 15.027, "loss": "2.004", "ppl": "4.01", "wps": "364004", "ups": "3.08", "wpb": "118015", "bsz": "256", "num_updates": "773400", "lr": "0.000228889", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "252515"} +[2022-08-03 23:37:48,051][train_inner][INFO] - {"epoch": 16, "update": 15.031, "loss": "2.004", "ppl": "4.01", "wps": "363165", "ups": "3.08", "wpb": "117957", "bsz": "256", "num_updates": "773600", "lr": "0.000228687", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25", "wall": "252580"} +[2022-08-03 23:37:55,209][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-03 23:38:54,347][train_inner][INFO] - {"epoch": 16, "update": 15.035, "loss": "2.002", "ppl": "4.01", "wps": "355664", "ups": "3.02", "wpb": "117895", "bsz": "256", "num_updates": "773800", "lr": "0.000228485", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.5", "wall": "252647"} +[2022-08-03 23:39:59,488][train_inner][INFO] - {"epoch": 16, "update": 15.039, "loss": "2.008", "ppl": "4.02", "wps": "362897", "ups": "3.07", "wpb": "118197", "bsz": "256", "num_updates": "774000", "lr": "0.000228283", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "252712"} +[2022-08-03 23:41:04,087][train_inner][INFO] - {"epoch": 16, "update": 15.042, "loss": "2.003", "ppl": "4.01", "wps": "363039", "ups": "3.1", "wpb": "117258", "bsz": "256", "num_updates": "774200", "lr": "0.000228081", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "252776"} +[2022-08-03 23:42:09,161][train_inner][INFO] - {"epoch": 16, "update": 15.046, "loss": "2.002", "ppl": "4", "wps": "365230", "ups": "3.07", "wpb": "118833", "bsz": "256", "num_updates": "774400", "lr": "0.000227879", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "252841"} +[2022-08-03 23:43:14,111][train_inner][INFO] - {"epoch": 16, "update": 15.05, "loss": "2.002", "ppl": "4.01", "wps": "363415", "ups": "3.08", "wpb": "118018", "bsz": "256", "num_updates": "774600", "lr": "0.000227677", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26", "wall": "252906"} +[2022-08-03 23:44:18,896][train_inner][INFO] - {"epoch": 16, "update": 15.054, "loss": "2.004", "ppl": "4.01", "wps": "365631", "ups": "3.09", "wpb": "118435", "bsz": "256", "num_updates": "774800", "lr": "0.000227475", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "252971"} +[2022-08-03 23:45:23,583][train_inner][INFO] - {"epoch": 16, "update": 15.058, "loss": "2.008", "ppl": "4.02", "wps": "365349", "ups": "3.09", "wpb": "118164", "bsz": "256", "num_updates": "775000", "lr": "0.000227273", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "253036"} +[2022-08-03 23:46:22,422][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-03 23:46:28,535][train_inner][INFO] - {"epoch": 16, "update": 15.062, "loss": "2.012", "ppl": "4.03", "wps": "363186", "ups": "3.08", "wpb": "117946", "bsz": "256", "num_updates": "775200", "lr": "0.000227071", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "253101"} +[2022-08-03 23:47:33,623][train_inner][INFO] - {"epoch": 16, "update": 15.066, "loss": "2.003", "ppl": "4.01", "wps": "362102", "ups": "3.07", "wpb": "117842", "bsz": "256", "num_updates": "775400", "lr": "0.000226869", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "253166"} +[2022-08-03 23:48:38,345][train_inner][INFO] - {"epoch": 16, "update": 15.07, "loss": "2.005", "ppl": "4.01", "wps": "365975", "ups": "3.09", "wpb": "118431", "bsz": "256", "num_updates": "775600", "lr": "0.000226667", "gnorm": "0.779", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "253231"} +[2022-08-03 23:49:43,192][train_inner][INFO] - {"epoch": 16, "update": 15.074, "loss": "2.002", "ppl": "4.01", "wps": "366176", "ups": "3.08", "wpb": "118726", "bsz": "256", "num_updates": "775800", "lr": "0.000226465", "gnorm": "0.778", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "253296"} +[2022-08-03 23:50:48,053][train_inner][INFO] - {"epoch": 16, "update": 15.077, "loss": "2.007", "ppl": "4.02", "wps": "362026", "ups": "3.08", "wpb": "117406", "bsz": "256", "num_updates": "776000", "lr": "0.000226263", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "253360"} +[2022-08-03 23:51:52,754][train_inner][INFO] - {"epoch": 16, "update": 15.081, "loss": "2.004", "ppl": "4.01", "wps": "366527", "ups": "3.09", "wpb": "118570", "bsz": "256", "num_updates": "776200", "lr": "0.000226061", "gnorm": "0.779", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "253425"} +[2022-08-03 23:52:57,973][train_inner][INFO] - {"epoch": 16, "update": 15.085, "loss": "2", "ppl": "4", "wps": "364780", "ups": "3.07", "wpb": "118951", "bsz": "256", "num_updates": "776400", "lr": "0.000225859", "gnorm": "0.776", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "253490"} +[2022-08-03 23:54:02,894][train_inner][INFO] - {"epoch": 16, "update": 15.089, "loss": "1.994", "ppl": "3.98", "wps": "364642", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "776600", "lr": "0.000225657", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "253555"} +[2022-08-03 23:55:08,021][train_inner][INFO] - {"epoch": 16, "update": 15.093, "loss": "2.005", "ppl": "4.01", "wps": "364087", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "776800", "lr": "0.000225455", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "253620"} +[2022-08-03 23:56:12,869][train_inner][INFO] - {"epoch": 16, "update": 15.097, "loss": "2.003", "ppl": "4.01", "wps": "364049", "ups": "3.08", "wpb": "118039", "bsz": "256", "num_updates": "777000", "lr": "0.000225253", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "253685"} +[2022-08-03 23:57:17,719][train_inner][INFO] - {"epoch": 16, "update": 15.101, "loss": "2.008", "ppl": "4.02", "wps": "366323", "ups": "3.08", "wpb": "118777", "bsz": "256", "num_updates": "777200", "lr": "0.000225051", "gnorm": "0.809", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "253750"} +[2022-08-03 23:58:22,421][train_inner][INFO] - {"epoch": 16, "update": 15.105, "loss": "2.009", "ppl": "4.02", "wps": "364279", "ups": "3.09", "wpb": "117847", "bsz": "256", "num_updates": "777400", "lr": "0.000224848", "gnorm": "0.787", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "253815"} +[2022-08-03 23:59:27,602][train_inner][INFO] - {"epoch": 16, "update": 15.108, "loss": "1.994", "ppl": "3.98", "wps": "362134", "ups": "3.07", "wpb": "118018", "bsz": "256", "num_updates": "777600", "lr": "0.000224646", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "253880"} +[2022-08-04 00:00:32,417][train_inner][INFO] - {"epoch": 16, "update": 15.112, "loss": "2.003", "ppl": "4.01", "wps": "365891", "ups": "3.09", "wpb": "118574", "bsz": "256", "num_updates": "777800", "lr": "0.000224444", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "253945"} +[2022-08-04 00:01:37,376][train_inner][INFO] - {"epoch": 16, "update": 15.116, "loss": "1.999", "ppl": "4", "wps": "365070", "ups": "3.08", "wpb": "118572", "bsz": "256", "num_updates": "778000", "lr": "0.000224242", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "254010"} +[2022-08-04 00:02:42,571][train_inner][INFO] - {"epoch": 16, "update": 15.12, "loss": "2.008", "ppl": "4.02", "wps": "362471", "ups": "3.07", "wpb": "118155", "bsz": "256", "num_updates": "778200", "lr": "0.00022404", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "254075"} +[2022-08-04 00:03:47,150][train_inner][INFO] - {"epoch": 16, "update": 15.124, "loss": "2.002", "ppl": "4.01", "wps": "365852", "ups": "3.1", "wpb": "118130", "bsz": "256", "num_updates": "778400", "lr": "0.000223838", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "254139"} +[2022-08-04 00:04:51,494][train_inner][INFO] - {"epoch": 16, "update": 15.128, "loss": "1.998", "ppl": "3.99", "wps": "367679", "ups": "3.11", "wpb": "118286", "bsz": "256", "num_updates": "778600", "lr": "0.000223636", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "254204"} +[2022-08-04 00:05:56,512][train_inner][INFO] - {"epoch": 16, "update": 15.132, "loss": "2.002", "ppl": "4", "wps": "363713", "ups": "3.08", "wpb": "118238", "bsz": "256", "num_updates": "778800", "lr": "0.000223434", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "254269"} +[2022-08-04 00:07:01,106][train_inner][INFO] - {"epoch": 16, "update": 15.136, "loss": "2.01", "ppl": "4.03", "wps": "365680", "ups": "3.1", "wpb": "118102", "bsz": "256", "num_updates": "779000", "lr": "0.000223232", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "254333"} +[2022-08-04 00:08:06,342][train_inner][INFO] - {"epoch": 16, "update": 15.14, "loss": "2.005", "ppl": "4.01", "wps": "362639", "ups": "3.07", "wpb": "118284", "bsz": "256", "num_updates": "779200", "lr": "0.00022303", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "254399"} +[2022-08-04 00:08:31,239][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 00:09:11,434][train_inner][INFO] - {"epoch": 16, "update": 15.143, "loss": "2.005", "ppl": "4.01", "wps": "362027", "ups": "3.07", "wpb": "117822", "bsz": "256", "num_updates": "779400", "lr": "0.000222828", "gnorm": "0.783", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.9", "wall": "254464"} +[2022-08-04 00:09:34,748][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 00:10:16,312][train_inner][INFO] - {"epoch": 16, "update": 15.147, "loss": "2.005", "ppl": "4.02", "wps": "365279", "ups": "3.08", "wpb": "118492", "bsz": "256", "num_updates": "779600", "lr": "0.000222626", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "254529"} +[2022-08-04 00:11:21,291][train_inner][INFO] - {"epoch": 16, "update": 15.151, "loss": "1.996", "ppl": "3.99", "wps": "365452", "ups": "3.08", "wpb": "118733", "bsz": "256", "num_updates": "779800", "lr": "0.000222424", "gnorm": "0.781", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "254594"} +[2022-08-04 00:12:25,897][train_inner][INFO] - {"epoch": 16, "update": 15.155, "loss": "2.005", "ppl": "4.01", "wps": "365310", "ups": "3.1", "wpb": "118004", "bsz": "256", "num_updates": "780000", "lr": "0.000222222", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "254658"} +[2022-08-04 00:13:30,837][train_inner][INFO] - {"epoch": 16, "update": 15.159, "loss": "1.997", "ppl": "3.99", "wps": "365251", "ups": "3.08", "wpb": "118595", "bsz": "256", "num_updates": "780200", "lr": "0.00022202", "gnorm": "0.781", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.7", "wall": "254723"} +[2022-08-04 00:14:35,740][train_inner][INFO] - {"epoch": 16, "update": 15.163, "loss": "2.009", "ppl": "4.03", "wps": "363101", "ups": "3.08", "wpb": "117830", "bsz": "256", "num_updates": "780400", "lr": "0.000221818", "gnorm": "0.781", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "254788"} +[2022-08-04 00:15:40,432][train_inner][INFO] - {"epoch": 16, "update": 15.167, "loss": "2.009", "ppl": "4.02", "wps": "365612", "ups": "3.09", "wpb": "118259", "bsz": "256", "num_updates": "780600", "lr": "0.000221616", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "254853"} +[2022-08-04 00:16:45,782][train_inner][INFO] - {"epoch": 16, "update": 15.171, "loss": "2", "ppl": "4", "wps": "361832", "ups": "3.06", "wpb": "118228", "bsz": "256", "num_updates": "780800", "lr": "0.000221414", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "254918"} +[2022-08-04 00:17:50,532][train_inner][INFO] - {"epoch": 16, "update": 15.175, "loss": "1.999", "ppl": "4", "wps": "366181", "ups": "3.09", "wpb": "118549", "bsz": "256", "num_updates": "781000", "lr": "0.000221212", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "254983"} +[2022-08-04 00:18:55,187][train_inner][INFO] - {"epoch": 16, "update": 15.178, "loss": "2.003", "ppl": "4.01", "wps": "365219", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "781200", "lr": "0.00022101", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.9", "wall": "255048"} +[2022-08-04 00:20:00,622][train_inner][INFO] - {"epoch": 16, "update": 15.182, "loss": "1.996", "ppl": "3.99", "wps": "362212", "ups": "3.06", "wpb": "118505", "bsz": "256", "num_updates": "781400", "lr": "0.000220808", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.4", "wall": "255113"} +[2022-08-04 00:21:05,585][train_inner][INFO] - {"epoch": 16, "update": 15.186, "loss": "1.997", "ppl": "3.99", "wps": "364551", "ups": "3.08", "wpb": "118409", "bsz": "256", "num_updates": "781600", "lr": "0.000220606", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "255178"} +[2022-08-04 00:22:10,772][train_inner][INFO] - {"epoch": 16, "update": 15.19, "loss": "1.999", "ppl": "4", "wps": "363767", "ups": "3.07", "wpb": "118564", "bsz": "256", "num_updates": "781800", "lr": "0.000220404", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.8", "wall": "255243"} +[2022-08-04 00:23:15,322][train_inner][INFO] - {"epoch": 16, "update": 15.194, "loss": "1.994", "ppl": "3.98", "wps": "368132", "ups": "3.1", "wpb": "118811", "bsz": "256", "num_updates": "782000", "lr": "0.000220202", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "255308"} +[2022-08-04 00:24:19,727][train_inner][INFO] - {"epoch": 16, "update": 15.198, "loss": "1.995", "ppl": "3.99", "wps": "366420", "ups": "3.11", "wpb": "117996", "bsz": "256", "num_updates": "782200", "lr": "0.00022", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "255372"} +[2022-08-04 00:25:24,359][train_inner][INFO] - {"epoch": 16, "update": 15.202, "loss": "2.001", "ppl": "4", "wps": "365165", "ups": "3.09", "wpb": "118005", "bsz": "256", "num_updates": "782400", "lr": "0.000219798", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "255437"} +[2022-08-04 00:26:11,398][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 00:26:29,612][train_inner][INFO] - {"epoch": 16, "update": 15.206, "loss": "2.006", "ppl": "4.02", "wps": "362826", "ups": "3.07", "wpb": "118375", "bsz": "256", "num_updates": "782600", "lr": "0.000219596", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "255502"} +[2022-08-04 00:27:33,957][train_inner][INFO] - {"epoch": 16, "update": 15.21, "loss": "2.001", "ppl": "4", "wps": "365640", "ups": "3.11", "wpb": "117636", "bsz": "256", "num_updates": "782800", "lr": "0.000219394", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "255566"} +[2022-08-04 00:28:38,702][train_inner][INFO] - {"epoch": 16, "update": 15.213, "loss": "2.002", "ppl": "4.01", "wps": "362812", "ups": "3.09", "wpb": "117448", "bsz": "256", "num_updates": "783000", "lr": "0.000219192", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "255631"} +[2022-08-04 00:29:43,763][train_inner][INFO] - {"epoch": 16, "update": 15.217, "loss": "2.003", "ppl": "4.01", "wps": "363251", "ups": "3.07", "wpb": "118166", "bsz": "256", "num_updates": "783200", "lr": "0.00021899", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "255696"} +[2022-08-04 00:30:48,389][train_inner][INFO] - {"epoch": 16, "update": 15.221, "loss": "1.998", "ppl": "4", "wps": "365836", "ups": "3.09", "wpb": "118210", "bsz": "256", "num_updates": "783400", "lr": "0.000218788", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.5", "wall": "255761"} +[2022-08-04 00:31:53,385][train_inner][INFO] - {"epoch": 16, "update": 15.225, "loss": "1.998", "ppl": "4", "wps": "363172", "ups": "3.08", "wpb": "118022", "bsz": "256", "num_updates": "783600", "lr": "0.000218586", "gnorm": "0.792", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "255826"} +[2022-08-04 00:32:58,459][train_inner][INFO] - {"epoch": 16, "update": 15.229, "loss": "1.999", "ppl": "4", "wps": "365225", "ups": "3.07", "wpb": "118832", "bsz": "256", "num_updates": "783800", "lr": "0.000218384", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "255891"} +[2022-08-04 00:34:02,795][train_inner][INFO] - {"epoch": 16, "update": 15.233, "loss": "1.999", "ppl": "4", "wps": "366704", "ups": "3.11", "wpb": "117959", "bsz": "256", "num_updates": "784000", "lr": "0.000218182", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "255955"} +[2022-08-04 00:35:07,680][train_inner][INFO] - {"epoch": 16, "update": 15.237, "loss": "1.993", "ppl": "3.98", "wps": "364482", "ups": "3.08", "wpb": "118246", "bsz": "256", "num_updates": "784200", "lr": "0.00021798", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.1", "wall": "256020"} +[2022-08-04 00:36:12,630][train_inner][INFO] - {"epoch": 16, "update": 15.241, "loss": "1.998", "ppl": "3.99", "wps": "363987", "ups": "3.08", "wpb": "118203", "bsz": "256", "num_updates": "784400", "lr": "0.000217778", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "256085"} +[2022-08-04 00:37:17,604][train_inner][INFO] - {"epoch": 16, "update": 15.245, "loss": "2", "ppl": "4", "wps": "364344", "ups": "3.08", "wpb": "118362", "bsz": "256", "num_updates": "784600", "lr": "0.000217576", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "256150"} +[2022-08-04 00:38:22,583][train_inner][INFO] - {"epoch": 16, "update": 15.248, "loss": "2.001", "ppl": "4", "wps": "363822", "ups": "3.08", "wpb": "118202", "bsz": "256", "num_updates": "784800", "lr": "0.000217374", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "256215"} +[2022-08-04 00:39:27,378][train_inner][INFO] - {"epoch": 16, "update": 15.252, "loss": "2.002", "ppl": "4", "wps": "365453", "ups": "3.09", "wpb": "118396", "bsz": "256", "num_updates": "785000", "lr": "0.000217172", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "256280"} +[2022-08-04 00:40:32,703][train_inner][INFO] - {"epoch": 16, "update": 15.256, "loss": "1.998", "ppl": "3.99", "wps": "362543", "ups": "3.06", "wpb": "118415", "bsz": "256", "num_updates": "785200", "lr": "0.00021697", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "256345"} +[2022-08-04 00:41:37,434][train_inner][INFO] - {"epoch": 16, "update": 15.26, "loss": "1.995", "ppl": "3.99", "wps": "367067", "ups": "3.09", "wpb": "118801", "bsz": "256", "num_updates": "785400", "lr": "0.000216768", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "256410"} +[2022-08-04 00:42:42,354][train_inner][INFO] - {"epoch": 16, "update": 15.264, "loss": "2.006", "ppl": "4.02", "wps": "362626", "ups": "3.08", "wpb": "117706", "bsz": "256", "num_updates": "785600", "lr": "0.000216566", "gnorm": "0.793", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "256475"} +[2022-08-04 00:43:46,998][train_inner][INFO] - {"epoch": 16, "update": 15.268, "loss": "1.998", "ppl": "4", "wps": "367128", "ups": "3.09", "wpb": "118661", "bsz": "256", "num_updates": "785800", "lr": "0.000216364", "gnorm": "0.791", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "256539"} +[2022-08-04 00:44:51,718][train_inner][INFO] - {"epoch": 16, "update": 15.272, "loss": "1.997", "ppl": "3.99", "wps": "366085", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "786000", "lr": "0.000216162", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.4", "wall": "256604"} +[2022-08-04 00:45:56,496][train_inner][INFO] - {"epoch": 16, "update": 15.276, "loss": "1.996", "ppl": "3.99", "wps": "366419", "ups": "3.09", "wpb": "118677", "bsz": "256", "num_updates": "786200", "lr": "0.00021596", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "256669"} +[2022-08-04 00:47:01,612][train_inner][INFO] - {"epoch": 16, "update": 15.279, "loss": "1.99", "ppl": "3.97", "wps": "363123", "ups": "3.07", "wpb": "118224", "bsz": "256", "num_updates": "786400", "lr": "0.000215758", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "256734"} +[2022-08-04 00:48:06,697][train_inner][INFO] - {"epoch": 16, "update": 15.283, "loss": "1.997", "ppl": "3.99", "wps": "363721", "ups": "3.07", "wpb": "118363", "bsz": "256", "num_updates": "786600", "lr": "0.000215556", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "256799"} +[2022-08-04 00:48:20,067][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 00:49:11,695][train_inner][INFO] - {"epoch": 16, "update": 15.287, "loss": "1.995", "ppl": "3.99", "wps": "363613", "ups": "3.08", "wpb": "118168", "bsz": "256", "num_updates": "786800", "lr": "0.000215354", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "256864"} +[2022-08-04 00:50:16,662][train_inner][INFO] - {"epoch": 16, "update": 15.291, "loss": "2.003", "ppl": "4.01", "wps": "362202", "ups": "3.08", "wpb": "117655", "bsz": "256", "num_updates": "787000", "lr": "0.000215152", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "256929"} +[2022-08-04 00:51:21,891][train_inner][INFO] - {"epoch": 16, "update": 15.295, "loss": "1.996", "ppl": "3.99", "wps": "365320", "ups": "3.07", "wpb": "119145", "bsz": "256", "num_updates": "787200", "lr": "0.000214949", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "256994"} +[2022-08-04 00:52:26,862][train_inner][INFO] - {"epoch": 16, "update": 15.299, "loss": "2.001", "ppl": "4", "wps": "363925", "ups": "3.08", "wpb": "118222", "bsz": "256", "num_updates": "787400", "lr": "0.000214747", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "257059"} +[2022-08-04 00:53:32,495][train_inner][INFO] - {"epoch": 16, "update": 15.303, "loss": "1.992", "ppl": "3.98", "wps": "360030", "ups": "3.05", "wpb": "118147", "bsz": "256", "num_updates": "787600", "lr": "0.000214545", "gnorm": "0.792", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "257125"} +[2022-08-04 00:54:37,487][train_inner][INFO] - {"epoch": 16, "update": 15.307, "loss": "1.993", "ppl": "3.98", "wps": "366072", "ups": "3.08", "wpb": "118957", "bsz": "256", "num_updates": "787800", "lr": "0.000214343", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "257190"} +[2022-08-04 00:55:42,278][train_inner][INFO] - {"epoch": 16, "update": 15.311, "loss": "1.995", "ppl": "3.99", "wps": "364645", "ups": "3.09", "wpb": "118127", "bsz": "256", "num_updates": "788000", "lr": "0.000214141", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "257255"} +[2022-08-04 00:56:48,327][train_inner][INFO] - {"epoch": 16, "update": 15.314, "loss": "1.994", "ppl": "3.98", "wps": "360045", "ups": "3.03", "wpb": "118901", "bsz": "256", "num_updates": "788200", "lr": "0.000213939", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.5", "wall": "257321"} +[2022-08-04 00:57:53,234][train_inner][INFO] - {"epoch": 16, "update": 15.318, "loss": "2", "ppl": "4", "wps": "364694", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "788400", "lr": "0.000213737", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "257386"} +[2022-08-04 00:58:59,231][train_inner][INFO] - {"epoch": 16, "update": 15.322, "loss": "1.998", "ppl": "3.99", "wps": "358107", "ups": "3.03", "wpb": "118168", "bsz": "256", "num_updates": "788600", "lr": "0.000213535", "gnorm": "0.791", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.4", "wall": "257452"} +[2022-08-04 00:59:32,302][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 01:00:04,521][train_inner][INFO] - {"epoch": 16, "update": 15.326, "loss": "1.996", "ppl": "3.99", "wps": "362614", "ups": "3.06", "wpb": "118372", "bsz": "256", "num_updates": "788800", "lr": "0.000213333", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "257517"} +[2022-08-04 01:01:09,309][train_inner][INFO] - {"epoch": 16, "update": 15.33, "loss": "1.999", "ppl": "4", "wps": "364823", "ups": "3.09", "wpb": "118180", "bsz": "256", "num_updates": "789000", "lr": "0.000213131", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "257582"} +[2022-08-04 01:02:13,902][train_inner][INFO] - {"epoch": 16, "update": 15.334, "loss": "2.001", "ppl": "4", "wps": "365127", "ups": "3.1", "wpb": "117921", "bsz": "256", "num_updates": "789200", "lr": "0.000212929", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "257646"} +[2022-08-04 01:03:18,868][train_inner][INFO] - {"epoch": 16, "update": 15.338, "loss": "1.994", "ppl": "3.98", "wps": "364635", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "789400", "lr": "0.000212727", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "257711"} +[2022-08-04 01:04:23,480][train_inner][INFO] - {"epoch": 16, "update": 15.342, "loss": "1.991", "ppl": "3.97", "wps": "367427", "ups": "3.1", "wpb": "118699", "bsz": "256", "num_updates": "789600", "lr": "0.000212525", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "257776"} +[2022-08-04 01:05:28,526][train_inner][INFO] - {"epoch": 16, "update": 15.346, "loss": "1.997", "ppl": "3.99", "wps": "364352", "ups": "3.07", "wpb": "118496", "bsz": "256", "num_updates": "789800", "lr": "0.000212323", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "257841"} +[2022-08-04 01:06:33,139][train_inner][INFO] - {"epoch": 16, "update": 15.349, "loss": "2.001", "ppl": "4", "wps": "365375", "ups": "3.1", "wpb": "118039", "bsz": "256", "num_updates": "790000", "lr": "0.000212121", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "257905"} +[2022-08-04 01:07:38,118][train_inner][INFO] - {"epoch": 16, "update": 15.353, "loss": "1.991", "ppl": "3.98", "wps": "365466", "ups": "3.08", "wpb": "118736", "bsz": "256", "num_updates": "790200", "lr": "0.000211919", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "257970"} +[2022-08-04 01:08:43,152][train_inner][INFO] - {"epoch": 16, "update": 15.357, "loss": "1.994", "ppl": "3.98", "wps": "365269", "ups": "3.08", "wpb": "118772", "bsz": "256", "num_updates": "790400", "lr": "0.000211717", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "258035"} +[2022-08-04 01:09:48,091][train_inner][INFO] - {"epoch": 16, "update": 15.361, "loss": "1.992", "ppl": "3.98", "wps": "366092", "ups": "3.08", "wpb": "118866", "bsz": "256", "num_updates": "790600", "lr": "0.000211515", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "258100"} +[2022-08-04 01:10:39,053][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 +[2022-08-04 01:10:53,370][train_inner][INFO] - {"epoch": 16, "update": 15.365, "loss": "1.992", "ppl": "3.98", "wps": "362256", "ups": "3.06", "wpb": "118238", "bsz": "256", "num_updates": "790800", "lr": "0.000211313", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "258166"} +[2022-08-04 01:11:58,170][train_inner][INFO] - {"epoch": 16, "update": 15.369, "loss": "1.995", "ppl": "3.99", "wps": "364103", "ups": "3.09", "wpb": "117967", "bsz": "256", "num_updates": "791000", "lr": "0.000211111", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "258230"} +[2022-08-04 01:13:03,245][train_inner][INFO] - {"epoch": 16, "update": 15.373, "loss": "1.991", "ppl": "3.97", "wps": "365622", "ups": "3.07", "wpb": "118964", "bsz": "256", "num_updates": "791200", "lr": "0.000210909", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "258296"} +[2022-08-04 01:14:08,003][train_inner][INFO] - {"epoch": 16, "update": 15.377, "loss": "1.996", "ppl": "3.99", "wps": "365717", "ups": "3.09", "wpb": "118413", "bsz": "256", "num_updates": "791400", "lr": "0.000210707", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "258360"} +[2022-08-04 01:15:13,477][train_inner][INFO] - {"epoch": 16, "update": 15.381, "loss": "1.992", "ppl": "3.98", "wps": "362162", "ups": "3.05", "wpb": "118559", "bsz": "256", "num_updates": "791600", "lr": "0.000210505", "gnorm": "0.787", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "258426"} +[2022-08-04 01:16:19,346][train_inner][INFO] - {"epoch": 16, "update": 15.384, "loss": "1.995", "ppl": "3.98", "wps": "359541", "ups": "3.04", "wpb": "118412", "bsz": "256", "num_updates": "791800", "lr": "0.000210303", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.8", "wall": "258492"} +[2022-08-04 01:17:24,349][train_inner][INFO] - {"epoch": 16, "update": 15.388, "loss": "1.992", "ppl": "3.98", "wps": "364416", "ups": "3.08", "wpb": "118438", "bsz": "256", "num_updates": "792000", "lr": "0.000210101", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "258557"} +[2022-08-04 01:18:30,121][train_inner][INFO] - {"epoch": 16, "update": 15.392, "loss": "1.998", "ppl": "3.99", "wps": "358604", "ups": "3.04", "wpb": "117927", "bsz": "256", "num_updates": "792200", "lr": "0.000209899", "gnorm": "0.791", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "258622"} +[2022-08-04 01:19:35,080][train_inner][INFO] - {"epoch": 16, "update": 15.396, "loss": "1.99", "ppl": "3.97", "wps": "364552", "ups": "3.08", "wpb": "118404", "bsz": "256", "num_updates": "792400", "lr": "0.000209697", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "258687"} +[2022-08-04 01:20:15,866][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 01:20:40,602][train_inner][INFO] - {"epoch": 16, "update": 15.4, "loss": "1.997", "ppl": "3.99", "wps": "361086", "ups": "3.05", "wpb": "118294", "bsz": "256", "num_updates": "792600", "lr": "0.000209495", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "258753"} +[2022-08-04 01:21:45,509][train_inner][INFO] - {"epoch": 16, "update": 15.404, "loss": "1.993", "ppl": "3.98", "wps": "365890", "ups": "3.08", "wpb": "118743", "bsz": "256", "num_updates": "792800", "lr": "0.000209293", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.8", "wall": "258818"} +[2022-08-04 01:22:50,700][train_inner][INFO] - {"epoch": 16, "update": 15.408, "loss": "1.989", "ppl": "3.97", "wps": "362158", "ups": "3.07", "wpb": "118045", "bsz": "256", "num_updates": "793000", "lr": "0.000209091", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "258883"} +[2022-08-04 01:23:55,554][train_inner][INFO] - {"epoch": 16, "update": 15.412, "loss": "1.993", "ppl": "3.98", "wps": "364008", "ups": "3.08", "wpb": "118036", "bsz": "256", "num_updates": "793200", "lr": "0.000208889", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "258948"} +[2022-08-04 01:25:00,379][train_inner][INFO] - {"epoch": 16, "update": 15.415, "loss": "1.991", "ppl": "3.98", "wps": "367052", "ups": "3.09", "wpb": "118969", "bsz": "256", "num_updates": "793400", "lr": "0.000208687", "gnorm": "0.79", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "259013"} +[2022-08-04 01:26:05,075][train_inner][INFO] - {"epoch": 16, "update": 15.419, "loss": "1.992", "ppl": "3.98", "wps": "365369", "ups": "3.09", "wpb": "118188", "bsz": "256", "num_updates": "793600", "lr": "0.000208485", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "259077"} +[2022-08-04 01:27:09,815][train_inner][INFO] - {"epoch": 16, "update": 15.423, "loss": "1.992", "ppl": "3.98", "wps": "365533", "ups": "3.09", "wpb": "118320", "bsz": "256", "num_updates": "793800", "lr": "0.000208283", "gnorm": "0.794", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "259142"} +[2022-08-04 01:28:14,974][train_inner][INFO] - {"epoch": 16, "update": 15.427, "loss": "1.993", "ppl": "3.98", "wps": "363914", "ups": "3.07", "wpb": "118560", "bsz": "256", "num_updates": "794000", "lr": "0.000208081", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "259207"} +[2022-08-04 01:29:19,712][train_inner][INFO] - {"epoch": 16, "update": 15.431, "loss": "2.004", "ppl": "4.01", "wps": "364767", "ups": "3.09", "wpb": "118071", "bsz": "256", "num_updates": "794200", "lr": "0.000207879", "gnorm": "0.79", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "259272"} +[2022-08-04 01:30:24,778][train_inner][INFO] - {"epoch": 16, "update": 15.435, "loss": "1.994", "ppl": "3.98", "wps": "364013", "ups": "3.07", "wpb": "118423", "bsz": "256", "num_updates": "794400", "lr": "0.000207677", "gnorm": "0.794", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "259337"} +[2022-08-04 01:31:30,150][train_inner][INFO] - {"epoch": 16, "update": 15.439, "loss": "1.991", "ppl": "3.98", "wps": "362512", "ups": "3.06", "wpb": "118489", "bsz": "256", "num_updates": "794600", "lr": "0.000207475", "gnorm": "0.794", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "259402"} +[2022-08-04 01:32:35,089][train_inner][INFO] - {"epoch": 16, "update": 15.443, "loss": "2.001", "ppl": "4", "wps": "363641", "ups": "3.08", "wpb": "118069", "bsz": "256", "num_updates": "794800", "lr": "0.000207273", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "259467"} +[2022-08-04 01:33:40,107][train_inner][INFO] - {"epoch": 16, "update": 15.447, "loss": "1.989", "ppl": "3.97", "wps": "365951", "ups": "3.08", "wpb": "118967", "bsz": "256", "num_updates": "795000", "lr": "0.000207071", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "259532"} +[2022-08-04 01:34:15,649][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 01:34:45,299][train_inner][INFO] - {"epoch": 16, "update": 15.45, "loss": "1.988", "ppl": "3.97", "wps": "363843", "ups": "3.07", "wpb": "118596", "bsz": "256", "num_updates": "795200", "lr": "0.000206869", "gnorm": "0.788", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "259598"} +[2022-08-04 01:35:49,913][train_inner][INFO] - {"epoch": 16, "update": 15.454, "loss": "1.993", "ppl": "3.98", "wps": "366730", "ups": "3.1", "wpb": "118478", "bsz": "256", "num_updates": "795400", "lr": "0.000206667", "gnorm": "0.791", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "259662"} +[2022-08-04 01:36:54,682][train_inner][INFO] - {"epoch": 16, "update": 15.458, "loss": "1.997", "ppl": "3.99", "wps": "366214", "ups": "3.09", "wpb": "118595", "bsz": "256", "num_updates": "795600", "lr": "0.000206465", "gnorm": "0.79", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "259727"} +[2022-08-04 01:37:59,769][train_inner][INFO] - {"epoch": 16, "update": 15.462, "loss": "1.99", "ppl": "3.97", "wps": "365210", "ups": "3.07", "wpb": "118848", "bsz": "256", "num_updates": "795800", "lr": "0.000206263", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "259792"} +[2022-08-04 01:39:04,644][train_inner][INFO] - {"epoch": 16, "update": 15.466, "loss": "1.992", "ppl": "3.98", "wps": "363885", "ups": "3.08", "wpb": "118033", "bsz": "256", "num_updates": "796000", "lr": "0.000206061", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "259857"} +[2022-08-04 01:40:00,005][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 +[2022-08-04 01:40:00,295][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-04 01:40:10,118][train_inner][INFO] - {"epoch": 16, "update": 15.47, "loss": "1.995", "ppl": "3.99", "wps": "363016", "ups": "3.05", "wpb": "118839", "bsz": "256", "num_updates": "796200", "lr": "0.000205859", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "259922"} +[2022-08-04 01:41:14,719][train_inner][INFO] - {"epoch": 16, "update": 15.474, "loss": "1.995", "ppl": "3.99", "wps": "365617", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "796400", "lr": "0.000205657", "gnorm": "0.793", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.5", "wall": "259987"} +[2022-08-04 01:42:19,678][train_inner][INFO] - {"epoch": 16, "update": 15.478, "loss": "1.989", "ppl": "3.97", "wps": "364556", "ups": "3.08", "wpb": "118403", "bsz": "256", "num_updates": "796600", "lr": "0.000205455", "gnorm": "0.795", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23", "wall": "260052"} +[2022-08-04 01:43:24,831][train_inner][INFO] - {"epoch": 16, "update": 15.482, "loss": "1.988", "ppl": "3.97", "wps": "365817", "ups": "3.07", "wpb": "119169", "bsz": "256", "num_updates": "796800", "lr": "0.000205253", "gnorm": "0.782", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "260117"} +[2022-08-04 01:44:30,225][train_inner][INFO] - {"epoch": 16, "update": 15.485, "loss": "1.989", "ppl": "3.97", "wps": "361456", "ups": "3.06", "wpb": "118184", "bsz": "256", "num_updates": "797000", "lr": "0.000205051", "gnorm": "0.79", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.2", "wall": "260183"} +[2022-08-04 01:45:35,521][train_inner][INFO] - {"epoch": 16, "update": 15.489, "loss": "1.992", "ppl": "3.98", "wps": "362507", "ups": "3.06", "wpb": "118350", "bsz": "256", "num_updates": "797200", "lr": "0.000204848", "gnorm": "0.804", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.4", "wall": "260248"} +[2022-08-04 01:46:40,745][train_inner][INFO] - {"epoch": 16, "update": 15.493, "loss": "1.989", "ppl": "3.97", "wps": "363266", "ups": "3.07", "wpb": "118465", "bsz": "256", "num_updates": "797400", "lr": "0.000204646", "gnorm": "0.79", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.7", "wall": "260313"} +[2022-08-04 01:47:45,805][train_inner][INFO] - {"epoch": 16, "update": 15.497, "loss": "1.983", "ppl": "3.95", "wps": "364686", "ups": "3.07", "wpb": "118620", "bsz": "256", "num_updates": "797600", "lr": "0.000204444", "gnorm": "0.79", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "260378"} +[2022-08-04 01:48:50,978][train_inner][INFO] - {"epoch": 16, "update": 15.501, "loss": "1.996", "ppl": "3.99", "wps": "364056", "ups": "3.07", "wpb": "118632", "bsz": "256", "num_updates": "797800", "lr": "0.000204242", "gnorm": "0.8", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "260443"} +[2022-08-04 01:49:56,029][train_inner][INFO] - {"epoch": 16, "update": 15.505, "loss": "1.99", "ppl": "3.97", "wps": "362383", "ups": "3.07", "wpb": "117864", "bsz": "256", "num_updates": "798000", "lr": "0.00020404", "gnorm": "0.795", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "260508"} +[2022-08-04 01:51:01,109][train_inner][INFO] - {"epoch": 16, "update": 15.509, "loss": "1.992", "ppl": "3.98", "wps": "364792", "ups": "3.07", "wpb": "118701", "bsz": "256", "num_updates": "798200", "lr": "0.000203838", "gnorm": "0.793", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "260573"} +[2022-08-04 01:52:06,263][train_inner][INFO] - {"epoch": 16, "update": 15.513, "loss": "1.997", "ppl": "3.99", "wps": "364554", "ups": "3.07", "wpb": "118760", "bsz": "256", "num_updates": "798400", "lr": "0.000203636", "gnorm": "0.794", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "260639"} +[2022-08-04 01:53:10,975][train_inner][INFO] - {"epoch": 16, "update": 15.517, "loss": "1.99", "ppl": "3.97", "wps": "364406", "ups": "3.09", "wpb": "117905", "bsz": "256", "num_updates": "798600", "lr": "0.000203434", "gnorm": "0.8", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.6", "wall": "260703"} +[2022-08-04 01:54:15,709][train_inner][INFO] - {"epoch": 16, "update": 15.52, "loss": "1.991", "ppl": "3.98", "wps": "363842", "ups": "3.09", "wpb": "117762", "bsz": "256", "num_updates": "798800", "lr": "0.000203232", "gnorm": "0.794", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.8", "wall": "260768"} +[2022-08-04 01:55:21,258][train_inner][INFO] - {"epoch": 16, "update": 15.524, "loss": "1.989", "ppl": "3.97", "wps": "359673", "ups": "3.05", "wpb": "117880", "bsz": "256", "num_updates": "799000", "lr": "0.00020303", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "260834"} +[2022-08-04 01:56:26,243][train_inner][INFO] - {"epoch": 16, "update": 15.528, "loss": "1.991", "ppl": "3.98", "wps": "365720", "ups": "3.08", "wpb": "118829", "bsz": "256", "num_updates": "799200", "lr": "0.000202828", "gnorm": "0.792", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.6", "wall": "260899"} +[2022-08-04 01:56:53,421][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 +[2022-08-04 01:57:31,587][train_inner][INFO] - {"epoch": 16, "update": 15.532, "loss": "1.988", "ppl": "3.97", "wps": "361679", "ups": "3.06", "wpb": "118165", "bsz": "256", "num_updates": "799400", "lr": "0.000202626", "gnorm": "0.795", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "260964"} +[2022-08-04 01:58:36,220][train_inner][INFO] - {"epoch": 16, "update": 15.536, "loss": "1.994", "ppl": "3.98", "wps": "364436", "ups": "3.09", "wpb": "117773", "bsz": "256", "num_updates": "799600", "lr": "0.000202424", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.8", "wall": "261029"} +[2022-08-04 01:59:41,321][train_inner][INFO] - {"epoch": 16, "update": 15.54, "loss": "1.99", "ppl": "3.97", "wps": "361654", "ups": "3.07", "wpb": "117717", "bsz": "256", "num_updates": "799800", "lr": "0.000202222", "gnorm": "0.797", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.5", "wall": "261094"} +[2022-08-04 02:00:46,530][train_inner][INFO] - {"epoch": 16, "update": 15.544, "loss": "1.983", "ppl": "3.95", "wps": "362606", "ups": "3.07", "wpb": "118224", "bsz": "256", "num_updates": "800000", "lr": "0.00020202", "gnorm": "0.792", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.9", "wall": "261159"} +[2022-08-04 02:00:46,532][fairseq_cli.train][INFO] - begin validation on "valid" subset +[2022-08-04 02:01:09,447][valid][INFO] - {"epoch": 16, "valid_loss": "1.903", "valid_ppl": "3.74", "valid_wps": "1.61134e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "800000", "valid_best_loss": "1.903"} +[2022-08-04 02:01:09,450][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 16 @ 800000 updates +[2022-08-04 02:01:09,450][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_16_800000.pt +[2022-08-04 02:01:19,982][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_16_800000.pt +[2022-08-04 02:01:46,409][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_16_800000.pt (epoch 16 @ 800000 updates, score 1.903) (writing took 36.959332070313394 seconds) +[2022-08-04 02:02:51,400][train_inner][INFO] - {"epoch": 16, "update": 15.548, "loss": "1.998", "ppl": "4", "wps": "189015", "ups": "1.6", "wpb": "118010", "bsz": "256", "num_updates": "800200", "lr": "0.000201818", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "261284"} +[2022-08-04 02:03:56,529][train_inner][INFO] - {"epoch": 16, "update": 15.552, "loss": "1.993", "ppl": "3.98", "wps": "363662", "ups": "3.07", "wpb": "118423", "bsz": "256", "num_updates": "800400", "lr": "0.000201616", "gnorm": "0.794", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "261349"} +[2022-08-04 02:05:01,407][train_inner][INFO] - {"epoch": 16, "update": 15.555, "loss": "1.991", "ppl": "3.97", "wps": "365895", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "800600", "lr": "0.000201414", "gnorm": "0.81", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "261414"} +[2022-08-04 02:06:06,090][train_inner][INFO] - {"epoch": 16, "update": 15.559, "loss": "1.996", "ppl": "3.99", "wps": "365049", "ups": "3.09", "wpb": "118061", "bsz": "256", "num_updates": "800800", "lr": "0.000201212", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.3", "wall": "261478"} +[2022-08-04 02:07:11,096][train_inner][INFO] - {"epoch": 16, "update": 15.563, "loss": "1.996", "ppl": "3.99", "wps": "363513", "ups": "3.08", "wpb": "118150", "bsz": "256", "num_updates": "801000", "lr": "0.00020101", "gnorm": "0.802", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "261543"} +[2022-08-04 02:08:15,723][train_inner][INFO] - {"epoch": 16, "update": 15.567, "loss": "1.989", "ppl": "3.97", "wps": "364406", "ups": "3.09", "wpb": "117750", "bsz": "256", "num_updates": "801200", "lr": "0.000200808", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.7", "wall": "261608"} +[2022-08-04 02:09:20,735][train_inner][INFO] - {"epoch": 16, "update": 15.571, "loss": "1.992", "ppl": "3.98", "wps": "363858", "ups": "3.08", "wpb": "118272", "bsz": "256", "num_updates": "801400", "lr": "0.000200606", "gnorm": "0.798", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "261673"} +[2022-08-04 02:10:25,619][train_inner][INFO] - {"epoch": 16, "update": 15.575, "loss": "1.992", "ppl": "3.98", "wps": "365766", "ups": "3.08", "wpb": "118660", "bsz": "256", "num_updates": "801600", "lr": "0.000200404", "gnorm": "0.791", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25", "wall": "261738"} +[2022-08-04 02:11:30,554][train_inner][INFO] - {"epoch": 16, "update": 15.579, "loss": "1.992", "ppl": "3.98", "wps": "364894", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "801800", "lr": "0.000200202", "gnorm": "0.793", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26", "wall": "261803"} +[2022-08-04 02:12:35,315][train_inner][INFO] - {"epoch": 16, "update": 15.583, "loss": "1.993", "ppl": "3.98", "wps": "366675", "ups": "3.09", "wpb": "118730", "bsz": "256", "num_updates": "802000", "lr": "0.0002", "gnorm": "0.797", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "261868"} +[2022-08-04 02:13:40,140][train_inner][INFO] - {"epoch": 16, "update": 15.586, "loss": "1.995", "ppl": "3.98", "wps": "364370", "ups": "3.09", "wpb": "118099", "bsz": "256", "num_updates": "802200", "lr": "0.000199798", "gnorm": "0.802", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "261932"} +[2022-08-04 02:14:45,004][train_inner][INFO] - {"epoch": 16, "update": 15.59, "loss": "1.988", "ppl": "3.97", "wps": "364174", "ups": "3.08", "wpb": "118108", "bsz": "256", "num_updates": "802400", "lr": "0.000199596", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "261997"} +[2022-08-04 02:15:50,023][train_inner][INFO] - {"epoch": 16, "update": 15.594, "loss": "1.983", "ppl": "3.95", "wps": "363235", "ups": "3.08", "wpb": "118083", "bsz": "256", "num_updates": "802600", "lr": "0.000199394", "gnorm": "0.798", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "262062"} +[2022-08-04 02:16:54,812][train_inner][INFO] - {"epoch": 16, "update": 15.598, "loss": "1.983", "ppl": "3.95", "wps": "366229", "ups": "3.09", "wpb": "118638", "bsz": "256", "num_updates": "802800", "lr": "0.000199192", "gnorm": "0.795", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.5", "wall": "262127"} +[2022-08-04 02:17:59,993][train_inner][INFO] - {"epoch": 16, "update": 15.602, "loss": "1.994", "ppl": "3.98", "wps": "362967", "ups": "3.07", "wpb": "118290", "bsz": "256", "num_updates": "803000", "lr": "0.00019899", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "262192"} +[2022-08-04 02:19:05,110][train_inner][INFO] - {"epoch": 16, "update": 15.606, "loss": "1.988", "ppl": "3.97", "wps": "363442", "ups": "3.07", "wpb": "118330", "bsz": "256", "num_updates": "803200", "lr": "0.000198788", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "262257"} +[2022-08-04 02:20:09,959][train_inner][INFO] - {"epoch": 16, "update": 15.61, "loss": "1.99", "ppl": "3.97", "wps": "365126", "ups": "3.08", "wpb": "118390", "bsz": "256", "num_updates": "803400", "lr": "0.000198586", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.2", "wall": "262322"} +[2022-08-04 02:21:14,654][train_inner][INFO] - {"epoch": 16, "update": 15.614, "loss": "1.982", "ppl": "3.95", "wps": "364938", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "803600", "lr": "0.000198384", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "262387"} +[2022-08-04 02:22:19,297][train_inner][INFO] - {"epoch": 16, "update": 15.618, "loss": "1.986", "ppl": "3.96", "wps": "367672", "ups": "3.09", "wpb": "118835", "bsz": "256", "num_updates": "803800", "lr": "0.000198182", "gnorm": "0.791", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "262452"} +[2022-08-04 02:23:23,889][train_inner][INFO] - {"epoch": 16, "update": 15.621, "loss": "1.986", "ppl": "3.96", "wps": "364464", "ups": "3.1", "wpb": "117704", "bsz": "256", "num_updates": "804000", "lr": "0.00019798", "gnorm": "0.803", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "262516"} +[2022-08-04 02:24:28,894][train_inner][INFO] - {"epoch": 16, "update": 15.625, "loss": "1.986", "ppl": "3.96", "wps": "364697", "ups": "3.08", "wpb": "118534", "bsz": "256", "num_updates": "804200", "lr": "0.000197778", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "262581"} +[2022-08-04 02:25:34,062][train_inner][INFO] - {"epoch": 16, "update": 15.629, "loss": "1.986", "ppl": "3.96", "wps": "363480", "ups": "3.07", "wpb": "118436", "bsz": "256", "num_updates": "804400", "lr": "0.000197576", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "262646"} +[2022-08-04 02:26:38,723][train_inner][INFO] - {"epoch": 16, "update": 15.633, "loss": "1.979", "ppl": "3.94", "wps": "367340", "ups": "3.09", "wpb": "118760", "bsz": "256", "num_updates": "804600", "lr": "0.000197374", "gnorm": "0.792", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "262711"} +[2022-08-04 02:27:43,929][train_inner][INFO] - {"epoch": 16, "update": 15.637, "loss": "1.991", "ppl": "3.97", "wps": "363572", "ups": "3.07", "wpb": "118534", "bsz": "255.9", "num_updates": "804800", "lr": "0.000197172", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "262776"} +[2022-08-04 02:28:48,841][train_inner][INFO] - {"epoch": 16, "update": 15.641, "loss": "1.982", "ppl": "3.95", "wps": "364812", "ups": "3.08", "wpb": "118400", "bsz": "256", "num_updates": "805000", "lr": "0.00019697", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "262841"} +[2022-08-04 02:29:53,531][train_inner][INFO] - {"epoch": 16, "update": 15.645, "loss": "1.982", "ppl": "3.95", "wps": "366009", "ups": "3.09", "wpb": "118385", "bsz": "256", "num_updates": "805200", "lr": "0.000196768", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "262906"} +[2022-08-04 02:30:57,736][train_inner][INFO] - {"epoch": 16, "update": 15.649, "loss": "1.986", "ppl": "3.96", "wps": "368329", "ups": "3.12", "wpb": "118241", "bsz": "256", "num_updates": "805400", "lr": "0.000196566", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.9", "wall": "262970"} +[2022-08-04 02:32:02,407][train_inner][INFO] - {"epoch": 16, "update": 15.653, "loss": "1.993", "ppl": "3.98", "wps": "366642", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "805600", "lr": "0.000196364", "gnorm": "0.797", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "263035"} +[2022-08-04 02:33:07,113][train_inner][INFO] - {"epoch": 16, "update": 15.656, "loss": "1.988", "ppl": "3.97", "wps": "364001", "ups": "3.09", "wpb": "117764", "bsz": "256", "num_updates": "805800", "lr": "0.000196162", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "263099"} +[2022-08-04 02:34:11,647][train_inner][INFO] - {"epoch": 16, "update": 15.66, "loss": "1.991", "ppl": "3.97", "wps": "365784", "ups": "3.1", "wpb": "118025", "bsz": "256", "num_updates": "806000", "lr": "0.00019596", "gnorm": "0.797", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "263164"} +[2022-08-04 02:35:16,846][train_inner][INFO] - {"epoch": 16, "update": 15.664, "loss": "1.986", "ppl": "3.96", "wps": "362789", "ups": "3.07", "wpb": "118267", "bsz": "256", "num_updates": "806200", "lr": "0.000195758", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "263229"} +[2022-08-04 02:36:21,746][train_inner][INFO] - {"epoch": 16, "update": 15.668, "loss": "1.988", "ppl": "3.97", "wps": "364062", "ups": "3.08", "wpb": "118136", "bsz": "256", "num_updates": "806400", "lr": "0.000195556", "gnorm": "0.798", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "263294"} +[2022-08-04 02:37:26,859][train_inner][INFO] - {"epoch": 16, "update": 15.672, "loss": "1.983", "ppl": "3.95", "wps": "364622", "ups": "3.07", "wpb": "118707", "bsz": "255.9", "num_updates": "806600", "lr": "0.000195354", "gnorm": "0.796", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "263359"} +[2022-08-04 02:38:31,517][train_inner][INFO] - {"epoch": 16, "update": 15.676, "loss": "1.99", "ppl": "3.97", "wps": "366808", "ups": "3.09", "wpb": "118583", "bsz": "256", "num_updates": "806800", "lr": "0.000195152", "gnorm": "0.799", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.3", "wall": "263424"} +[2022-08-04 02:39:37,093][train_inner][INFO] - {"epoch": 16, "update": 15.68, "loss": "1.986", "ppl": "3.96", "wps": "360002", "ups": "3.05", "wpb": "118037", "bsz": "256", "num_updates": "807000", "lr": "0.000194949", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "263489"} +[2022-08-04 02:40:37,549][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 +[2022-08-04 02:40:42,096][train_inner][INFO] - {"epoch": 16, "update": 15.684, "loss": "1.988", "ppl": "3.97", "wps": "361459", "ups": "3.08", "wpb": "117476", "bsz": "256", "num_updates": "807200", "lr": "0.000194747", "gnorm": "0.803", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "263554"} +[2022-08-04 02:41:47,846][train_inner][INFO] - {"epoch": 16, "update": 15.688, "loss": "1.986", "ppl": "3.96", "wps": "360883", "ups": "3.04", "wpb": "118641", "bsz": "256", "num_updates": "807400", "lr": "0.000194545", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "263620"}