-
Notifications
You must be signed in to change notification settings - Fork 22
Questions about FLUX.2-klein-base-4B+FlowGRPO #108
Copy link
Copy link
Open
Description
作者您好,我在复现FLUX.2-klein-base-4B+FlowGRPO时发现训练的曲线有些不稳定,想知道这是否正常 我同时用hpsv2和pickscore作为reward,训练时采样分辨率设置为512,eval为1024 
详细配置如下(主要的改变为kl_beta: 1.0e-3 ,原始的配置文件中设置kl_beta: 0):

# Environment Configuration
launcher: "accelerate" # Options: accelerate
config_file: config/accelerate_configs/fsdp_full_shard.yaml # Switch to config/accelerate_configs/fsdp2.yaml if you have limited GPU memory.
num_processes: 4 # Number of processes to launch (overrides config file)
main_process_port: 29500
mixed_precision: "bf16" # Options: no, fp16, bf16
run_name: null # Run name (auto: {model_type}_{finetune_type}_{trainer_type}_{timestamp})
project: "Flow-Factory" # Project name for logging
logging_backend: "wandb" # Options: wandb, swanlab, tensorboard, none
# Data Configuration
data:
dataset_dir: "dataset/pickscore" # Path to dataset folder
preprocessing_batch_size: 8 # Batch size for preprocessing
dataloader_num_workers: 16 # Number of workers for DataLoader
force_reprocess: true # Force reprocessing of the dataset
cache_dir: "~/jcy/.cache/flow_factory/datasets" # Cache directory for preprocessed datasets
max_dataset_size: null # Limit the maximum number of samples in the dataset
# Model Configuration
model:
finetune_type: 'lora' # Options: full, lora
lora_rank : 64
lora_alpha : 128
target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
model_name_or_path: "/primus_xpfs_workspace_T04/fangtai/MODELS/black-forest-labs/FLUX.2-klein-base-4B" # Options: black-forest-labs/FLUX.2-klein-base-4B, black-forest-labs/FLUX.2-klein-base-9B
model_type: "flux2-klein"
resume_path: null # Path to load previous checkpoint/lora adapter
resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
log:
save_dir: "~/jcy/Flow-Factory" # Directory to save model checkpoints and logs
save_freq: 20 # Save frequency in epochs (0 to disable)
save_model_only: true # Save only the model weights (not optimizer, scheduler, etc.)
# Training Configuration
train:
# Trainer settings
trainer_type: 'grpo' # Options: 'grpo', 'nft', 'awm'
advantage_aggregation: 'gdpo' # Options: 'sum', 'gdpo'
# Clipping
clip_range: 1.0e-4 # PPO/GRPO clipping range
adv_clip_range: 5.0 # Advantage clipping range
# KL div
kl_type: 'v-based' # Options: 'x-based', 'v-based'
kl_beta: 1.0e-3 # KL divergence coefficient. Set ~1e-2 for 'x-based' and ~1e-3 for 'v-based'.
ref_param_device: 'cuda' # Options: cpu, cuda
# Sampling settings
resolution: 512 # Can be int or [height, width]
condition_image_size: 512
num_inference_steps: 10 # Number of timesteps
guidance_scale: 4.0 # Guidance scale for sampling
# Batch and sampling
per_device_batch_size: 1 # Batch size per device. For image-to-image task, this will always fallback to 1.
group_size: 16 # Group size for GRPO sampling
global_std: false # Use global std for advantage normalization
unique_sample_num_per_epoch: 48 # Unique samples per group
gradient_step_per_epoch: 2 # Gradient steps per epoch
# Optimization
learning_rate: 3.0e-4 # Initial learning rate
adam_weight_decay: 1.0e-4 # AdamW weight decay
adam_betas: [0.9, 0.999] # AdamW betas
adam_epsilon: 1.0e-8 # AdamW epsilon
max_grad_norm: 1.0 # Max gradient norm for clipping
# EMA
ema_decay: 0.9 # EMA decay rate (0 to disable)
ema_update_interval: 4 # EMA update interval (in epochs)
ema_device: "cuda" # Device to store EMA model (options: cpu, cuda)
# Gradient checkpointing
enable_gradient_checkpointing: false # Enable gradient checkpointing to save memory with extra compute
# Seed
seed: 42 # Random seed
# Scheduler Configuration
scheduler:
dynamics_type: "Flow-SDE" # Options: Flow-SDE, Dance-SDE, CPS, ODE
noise_level: 0.7 # Noise level for sampling
num_sde_steps: 1 # Number of noise steps
sde_steps: [1, 2, 3] # Custom noise window, noise steps are randomly selected from this list during training
seed: 42 # Scheduler seed (for noise step selection)
# Evaluation settings
eval:
resolution: 1024 # Evaluation resolution
condition_image_size: 1024
per_device_batch_size: 1 # Eval batch size
guidance_scale: 4.0 # Guidance scale for sampling
num_inference_steps: 28 # Number of eval timesteps
eval_freq: 20 # Eval frequency in epochs (0 to disable)
seed: 42 # Eval seed (defaults to training seed)
# Reward Model Configuration
rewards:
- name: "hpsv2"
reward_model: "hpsv2"
batch_size: 16
device: "cuda"
dtype: bfloat16
- name: "pickscore"
reward_model: "PickScore"
batch_size: 16
device: "cuda"
dtype: bfloat16
# Optional Evaluation Reward Models
eval_rewards:
- name: "hpsv2"
reward_model: "hpsv2"
batch_size: 16
device: "cuda"
dtype: bfloat16
- name: "pickscore"
reward_model: "PickScore"
batch_size: 16
device: "cuda"
dtype: bfloat16
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels