-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathconfig.py
More file actions
58 lines (37 loc) · 1.4 KB
/
config.py
File metadata and controls
58 lines (37 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#
class Config():
"""
#LAMOGEL: Language Model Generation Lifecycle
All you need to customize lifecycle values is to set bottom variables values
After the variables change you should run app.py
"""
DEBUG_MODE: bool=True
# curpus
FINE_TUNING_CORPUS_PATH: str = "data/corpus/fine_tuning/"
PRE_TRAINING_CORPUS_PATH: str = "data/corpus/pre_training/"
FINE_TUNING_FILENAME:str = "fine_tuning_corpus.txt"
PRE_TRAINING_FILENAME:str = "pre_training_corpus.txt"
#split
SPLIT_RANGE: float = 0.995 # 1 means all data to fine-tuning and 0 means all data to pre-training (pre-training data using to mlm model)
IS_SHUFFLE: bool = True # shuffle returned fine-tuning and pre-training data
#pre-processing
REMOVE_NUMBER: bool = False
#model and tokenizzer
PRE_TRAINED_MODEL: str = "bert-base-uncased"
TOKENIZER_MODEL: str = "bert-base-uncased"
TOKENIZER_MAX_LENGTH = 100
EPOCHS: int = 1
BATCH_SIZE: int = 8
MASK_CONFIDENCE: float = 0.15
LR: float = 5e-5
GENERATED_MODEL_PATH: str = "data/models/"
GENERATED_MODEL_NAME: str = "LAMOGEL"
GENERATED_MODEL_FORMAT: str = ".pt"
#database
DB: dict = {
'USER': 'USER',
'PASSWORD': 'PASSWORD',
'ADDRESS': 'ADDRESS',
'PORT': 'PORT',
'DB_NAME': 'DB_NAME'
}