params.yaml

model_name_or_path: unicamp-dl/ptt5-base-portuguese-vocab 
do_lower_case: false
deepspeed: false

# neptune
neptune: false
neptune_project: ramon.pires/information-extraction-t5
experiment_name: experiment01
tags: [ptt5, compound]

# optimizer
optimizer: AdamW
lr: 1e-4
weight_decay: 1e-5

# preprocess dataset
project: [
  form,
  ]
raw_data_file: [
  data/raw/sample_train.json
  ]
raw_valid_data_file: [
  null,
  ]
raw_test_data_file: [
  data/raw/sample_test.json
  ]
train_file: data/processed/train-v0.1.json
valid_file: data/processed/dev-v0.1.json
test_file: data/processed/test-v0.1.json
type_names: [
  form.etiqueta,
  form.agencia,
  form.conta_corrente,
  form.cpf,
  form.nome_completo,
  form.n_doc_serie,
  form.orgao_emissor,
  form.data_emissao,
  form.data_nascimento,
  form.nome_mae,
  form.nome_pai,
  form.endereco,
  ]
use_compound_question: [
  form.endereco,
  ]
return_raw_text: [
  null,
  ]

train_force_qa: true
train_choose_question: first
valid_percent: 0.2
context_content: windows_token
window_overlap: 0.2
max_windows: 3
max_size: 2048
max_seq_length: 512

# dataset
train_batch_size: 8
val_batch_size: 8
shuffle_train: true
use_sentence_id: false
negative_ratio: -1

seed: 20210519
num_workers: 6

# inference and post-processing
num_beams: 5
max_length: 200
get_highestprob_answer: true
split_compound_answers: true
group_qas: true
normalize_outputs: true
only_misprediction_outputs: true
use_cached_predictions: true

# Trainer
accelerator: auto
devices: auto
max_epochs: 26
deterministic: true
accumulate_grad_batches: 2
amp_backend: native
precision: 32
gradient_clip_val: 1.0
val_check_interval: 1.0
check_val_every_n_epoch: 2
limit_val_batches: 0.5