-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcopier.yaml
150 lines (147 loc) · 5.31 KB
/
copier.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Copyright (c) ZenML GmbH 2024. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
project_name:
type: str
help: Short name for your project
default: ZenML LLM Finetuning project
version:
type: str
help: |
Version of your project
default: "0.1.0"
open_source_license:
type: str
help: >-
The license under which your project will be released
choices:
Apache Software License 2.0: apache
MIT license: mit
BSD license: bsd
ISC license: isc
GNU General Public License v3: gpl3
Not open source: none
default: apache
full_name:
type: str
help: >-
The name of the person/entity holding the copyright
default: ZenML GmbH
when: "{{ open_source_license }}"
email:
type: str
help: >-
The email of the person/entity holding the copyright
default: [email protected]
when: "{{ open_source_license }}"
product_name:
type: str
help: The technical name of the data product you are building
default: llm-peft
validator: >-
{% if not (product_name | regex_search('^[a-z][-a-z0-9]*$')) %}
The product name can only contain alphanumeric characters and dashes and must start with a character.
{% endif %}
model_repository:
type: str
help: |
Huggingface repository of the model to finetune.
default: microsoft/phi-2
steps_of_finetuning:
type: int
help: The number of steps of finetuning job.
default: 300
use_fast_tokenizer:
type: bool
help: Wether to use the fast tokenization or not, make sure your base model supports that
default: false
cuda_version:
type: str
help: The available cuda version. (Only relevant when using a remote orchestrator)
choices:
CUDA 11.8: cuda11.8
CUDA 12.1: cuda12.1
default: cuda11.8
system_prompt:
type: str
help: |
The system prompt to be used for the finetuning job.
default: |
Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
dataset_name:
type: str
help: |
The dataset to be used for the finetuning job.
default: gem/viggo
step_operator:
type: str
help: The operator to be used for the finetuning and evaluation jobs.
default: gcp_a100
bf16:
type: bool
help: |
Whether to use bf16 for the finetuning job.
default: true
zenml_server_url:
type: str
help: The URL of the ZenML server [Optional]
default: ""
# CONFIGURATION -------------------------
_templates_suffix: ""
_subdirectory: "./template"
_exclude:
- license
- license_header
_tasks:
# Remove unused imports and variables
- >-
{% if _copier_conf.os == 'windows' %}
echo "Auto-formatting not supported on Windows"
{% else %}
{{ _copier_python }} -m ruff check --select F401,F841 --fix \
--exclude "__init__.py" --isolated \
steps pipelines materializers run.py > /dev/null 2>&1 || true
{% endif %}
# Sort imports
- >-
{% if _copier_conf.os == 'windows' %}
echo "Auto-formatting not supported on Windows"
{% else %}
{{ _copier_python }} -m ruff check --select I \
--fix --ignore D \
steps pipelines materializers run.py > /dev/null 2>&1 || true
{% endif %}
# Auto-format code
- >-
{% if _copier_conf.os == 'windows' %}
echo "Auto-formatting not supported on Windows"
{% else %}
{{ _copier_python }} -m ruff format \
--exclude '' --line-length 79 \
steps pipelines materializers run.py > /dev/null 2>&1 || true
{% endif %}
- |
echo "Congratulations, your project has been generated in the '{{ _copier_conf.dst_path }}' directory."
echo "You can now run the following commands to get started:"
echo " cd {{ _copier_conf.dst_path }}"
echo " pip install -r requirements.txt"
{%- if zenml_server_url %}
echo " zenml login {{ zenml_server_url }}"
{%- endif %}
echo " python run.py --config orchestrator_finetune.yaml"
echo "Next, you should take a look at the '{{ _copier_conf.dst_path }}/README.md' file in the generated project."
echo "Happy coding!"
_jinja_extensions:
- jinja2_time.TimeExtension