Skip to content

Commit

Permalink
Merge pull request #978 from bmaltais/dev2
Browse files Browse the repository at this point in the history
v21.7.7
  • Loading branch information
bmaltais authored Jun 12, 2023
2 parents fa8fbe1 + c292364 commit c0eb172
Show file tree
Hide file tree
Showing 19 changed files with 344 additions and 212 deletions.
Empty file added .cache/config/.gitkeep
Empty file.
Empty file added .cache/keras/.gitkeep
Empty file.
Empty file added .cache/nv/.gitkeep
Empty file.
Empty file added .cache/triton/.gitkeep
Empty file.
Empty file added .cache/user/.gitkeep
Empty file.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ COPY --chown=appuser . .
STOPSIGNAL SIGINT
ENV LD_PRELOAD=libtcmalloc.so
ENV PATH="$PATH:/home/appuser/.local/bin"
CMD python3 "./kohya_gui.py" ${CLI_ARGS} --listen 0.0.0.0 --server_port 7860
CMD python3 "./kohya_gui.py" ${CLI_ARGS} --listen 0.0.0.0 --server_port 7860 --headless
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,12 @@ This will store a backup file with your current locally installed pip packages a

## Change History

* 2023/06/12 (v21.7.7)
- Add `Print only` button to all training tabs
- Sort json file vars for easier visual search
- Fixed a bug where clip skip did not work when training with weighted captions (`--weighted_captions` specified) and when generating sample images during training.
- Add verification and reporting of bad dataset folder name structure for DB, LoRA and TI training.
- Some docker build fix.
* 2023/06/06 (v21.7.6)
- Small UI improvements
- Fix `train_network.py` to probably work with older versions of LyCORIS.
Expand Down
Empty file added dataset/.gitkeep
Empty file.
59 changes: 59 additions & 0 deletions docs/image_folder_structure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Drambootd, Lora and TI image folder structure

To ensure successful training with Kohya, it is crucial to follow a specific folder structure that provides the necessary image repeats. Please adhere to the following structure precisely:

Folder Structure Example:

```txt
c:
|
├──images
| |
| ├── 30_cat
| | |
| | ├── image1.jpg
| | ├── image1.txt
| | ├── image2.png
| | └── image2.txt
| |
| ├── 30_dog
| | |
| | ├── image1.jpg
| | ├── image1.txt
| | ├── image2.png
| | └── image2.txt
| |
| └── 40_black mamba
| |
| ├── image1.jpg
| ├── image1.txt
| ├── image2.png
| └── image2.txt
|
├──regularization
| |
| ├── 1_cat
| | |
| | ├── reg1.jpg
| | ├── reg2.jpg
| |
| ├── 1_dog
| | |
| | ├── reg1.jpg
| | ├── reg2.jpg
| |
| └── 1_black mamba
| |
| ├── reg1.jpg
| ├── reg2.jpg
```

Please note the following important information regarding file extensions and their impact on concept names during model training:

If a file with a .txt or .caption extension and the same name as an image is present in the image subfolder, it will take precedence over the concept name during the model training process.
For example, if there is an image file named image1.jpg in the 30_cat subfolder, and there is a corresponding text file named image1.txt or image1.caption in the same subfolder, the concept name used during training will be determined by the content of that text file rather than the subfolder name.

Ensure that the content of such text files accurately reflects the desired concept name or any relevant caption information associated with the corresponding image.

By considering this information and maintaining the proper folder structure, including any necessary text or caption files, you can ensure a smooth and effective training process with Kohya.
71 changes: 42 additions & 29 deletions dreambooth_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
update_my_data,
check_if_model_exist,
output_message,
verify_image_folder_pattern,
)
from library.tensorboard_gui import (
gradio_tensorboard,
Expand Down Expand Up @@ -152,12 +153,8 @@ def save_configuration(
# Return the values of the variables as a dictionary
variables = {
name: value
for name, value in parameters # locals().items()
if name
not in [
'file_path',
'save_as',
]
for name, value in sorted(parameters, key=lambda x: x[0])
if name not in ['file_path', 'save_as']
}

# Extract the destination directory from the file path
Expand Down Expand Up @@ -281,6 +278,7 @@ def open_configuration(

def train_model(
headless,
print_only,
pretrained_model_name_or_path,
v2,
v_parameterization,
Expand Down Expand Up @@ -354,6 +352,9 @@ def train_model(
wandb_api_key,
scale_v_pred_loss_like_noise_pred,
):
print_only_bool = True if print_only.get('label') == 'True' else False
log.info(f'Start training Dreambooth...')

headless_bool = True if headless.get('label') == 'True' else False

if pretrained_model_name_or_path == '':
Expand All @@ -373,6 +374,9 @@ def train_model(
msg='Image folder does not exist', headless=headless_bool
)
return

if not verify_image_folder_pattern(train_data_dir):
return

if reg_data_dir != '':
if not os.path.exists(reg_data_dir):
Expand All @@ -381,6 +385,9 @@ def train_model(
headless=headless_bool,
)
return

if not verify_image_folder_pattern(reg_data_dir):
return

if output_dir == '':
output_message(
Expand All @@ -401,16 +408,6 @@ def train_model(
)
lr_warmup = '0'

# if float(noise_offset) > 0 and (
# multires_noise_iterations > 0 or multires_noise_discount > 0
# ):
# output_message(
# msg="noise offset and multires_noise can't be set at the same time. Only use one or the other.",
# title='Error',
# headless=headless_bool,
# )
# return

# Get a list of all subfolders in train_data_dir, excluding hidden folders
subfolders = [
f
Expand Down Expand Up @@ -523,7 +520,7 @@ def train_model(
run_cmd += f' --train_data_dir="{train_data_dir}"'
if len(reg_data_dir):
run_cmd += f' --reg_data_dir="{reg_data_dir}"'
run_cmd += f' --resolution={max_resolution}'
run_cmd += f' --resolution="{max_resolution}"'
run_cmd += f' --output_dir="{output_dir}"'
if not logging_dir == '':
run_cmd += f' --logging_dir="{logging_dir}"'
Expand Down Expand Up @@ -615,20 +612,28 @@ def train_model(
output_dir,
)

log.info(run_cmd)

# Run the command
if os.name == 'posix':
os.system(run_cmd)
if print_only_bool:
log.warning(
'Here is the trainer command as a reference. It will not be executed:\n'
)
log.info(run_cmd)
else:
subprocess.run(run_cmd)
log.info(run_cmd)

# Run the command
if os.name == 'posix':
os.system(run_cmd)
else:
subprocess.run(run_cmd)

# check if output_dir/last is a folder... therefore it is a diffuser model
last_dir = pathlib.Path(f'{output_dir}/{output_name}')
# check if output_dir/last is a folder... therefore it is a diffuser model
last_dir = pathlib.Path(f'{output_dir}/{output_name}')

if not last_dir.is_dir():
# Copy inference model for v2 if required
save_inference_file(output_dir, v2, v_parameterization, output_name)
if not last_dir.is_dir():
# Copy inference model for v2 if required
save_inference_file(
output_dir, v2, v_parameterization, output_name
)


def dreambooth_tab(
Expand Down Expand Up @@ -859,6 +864,8 @@ def dreambooth_tab(

button_run = gr.Button('Train model', variant='primary')

button_print = gr.Button('Print training command')

# Setup gradio tensorboard buttons
button_start_tensorboard, button_stop_tensorboard = gradio_tensorboard()

Expand Down Expand Up @@ -978,7 +985,13 @@ def dreambooth_tab(

button_run.click(
train_model,
inputs=[dummy_headless] + settings_list,
inputs=[dummy_headless] + [dummy_db_false] + settings_list,
show_progress=False,
)

button_print.click(
train_model,
inputs=[dummy_headless] + [dummy_db_true] + settings_list,
show_progress=False,
)

Expand Down
80 changes: 51 additions & 29 deletions finetune_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,8 @@ def save_configuration(
# Return the values of the variables as a dictionary
variables = {
name: value
for name, value in parameters # locals().items()
if name
not in [
'file_path',
'save_as',
]
for name, value in sorted(parameters, key=lambda x: x[0])
if name not in ['file_path', 'save_as']
}

# Extract the destination directory from the file path
Expand Down Expand Up @@ -286,6 +282,7 @@ def open_configuration(

def train_model(
headless,
print_only,
pretrained_model_name_or_path,
v2,
v_parameterization,
Expand Down Expand Up @@ -365,6 +362,9 @@ def train_model(
wandb_api_key,
scale_v_pred_loss_like_noise_pred,
):
print_only_bool = True if print_only.get('label') == 'True' else False
log.info(f'Start Finetuning...')

headless_bool = True if headless.get('label') == 'True' else False

if check_if_model_exist(
Expand Down Expand Up @@ -407,11 +407,12 @@ def train_model(

log.info(run_cmd)

# Run the command
if os.name == 'posix':
os.system(run_cmd)
else:
subprocess.run(run_cmd)
if not print_only_bool:
# Run the command
if os.name == 'posix':
os.system(run_cmd)
else:
subprocess.run(run_cmd)

# create images buckets
if generate_image_buckets:
Expand All @@ -432,11 +433,12 @@ def train_model(

log.info(run_cmd)

# Run the command
if os.name == 'posix':
os.system(run_cmd)
else:
subprocess.run(run_cmd)
if not print_only_bool:
# Run the command
if os.name == 'posix':
os.system(run_cmd)
else:
subprocess.run(run_cmd)

image_num = len(
[
Expand Down Expand Up @@ -495,7 +497,7 @@ def train_model(
run_cmd += f' --learning_rate={learning_rate}'

run_cmd += ' --enable_bucket'
run_cmd += f' --resolution={max_resolution}'
run_cmd += f' --resolution="{max_resolution}"'
run_cmd += f' --min_bucket_reso={min_bucket_reso}'
run_cmd += f' --max_bucket_reso={max_bucket_reso}'

Expand Down Expand Up @@ -575,20 +577,28 @@ def train_model(
output_dir,
)

log.info(run_cmd)

# Run the command
if os.name == 'posix':
os.system(run_cmd)
if print_only_bool:
log.warning(
'Here is the trainer command as a reference. It will not be executed:\n'
)
log.info(run_cmd)
else:
subprocess.run(run_cmd)
log.info(run_cmd)

# Run the command
if os.name == 'posix':
os.system(run_cmd)
else:
subprocess.run(run_cmd)

# check if output_dir/last is a folder... therefore it is a diffuser model
last_dir = pathlib.Path(f'{output_dir}/{output_name}')
# check if output_dir/last is a folder... therefore it is a diffuser model
last_dir = pathlib.Path(f'{output_dir}/{output_name}')

if not last_dir.is_dir():
# Copy inference model for v2 if required
save_inference_file(output_dir, v2, v_parameterization, output_name)
if not last_dir.is_dir():
# Copy inference model for v2 if required
save_inference_file(
output_dir, v2, v_parameterization, output_name
)


def remove_doublequote(file_path):
Expand Down Expand Up @@ -823,6 +833,8 @@ def finetune_tab(headless=False):

button_run = gr.Button('Train model', variant='primary')

button_print = gr.Button('Print training command')

# Setup gradio tensorboard buttons
button_start_tensorboard, button_stop_tensorboard = gradio_tensorboard()

Expand Down Expand Up @@ -917,7 +929,17 @@ def finetune_tab(headless=False):
scale_v_pred_loss_like_noise_pred,
]

button_run.click(train_model, inputs=[dummy_headless] + settings_list)
button_run.click(
train_model,
inputs=[dummy_headless] + [dummy_db_false] + settings_list,
show_progress=False,
)

button_print.click(
train_model,
inputs=[dummy_headless] + [dummy_db_true] + settings_list,
show_progress=False,
)

button_open_config.click(
open_configuration,
Expand Down
Loading

0 comments on commit c0eb172

Please sign in to comment.