.project-metadata.yaml

name: Fine Tuning Studio
description: |
  This AMP demonstrates how PEFT and other fine-tuning optimization techniques can be used for efficient and effective customization of an existing LLM to perform new tasks.
author: Cloudera Inc.
specification_version: 1.0
prototype_version: 1.0
date: "2023-07-22"

environment_variables:
  FINE_TUNING_STUDIO_SQLITE_DB:
    default: ".app/state.db"
    description: >-
      This is the location of the FTS app's SQLite database.
  FINE_TUNING_STUDIO_PROJECT_DEFAULTS:
    default: "data/project_defaults.json"
    description: >-
      Project defaults for the app that are populated into the Studio when the AMP is initially deployed.
  CUSTOM_LORA_ADAPTERS_DIR:
    default: "data/adapters/"
    description: >-
      The directory containing the reproduced LoRA adapters created by the fine-tuning jobs in this project. Also the location to look for any custom LoRA adapters.
  HUGGINGFACE_ACCESS_TOKEN:
    default: ""
    description: >-
      In order to access Huggingface gated models, please create a Huggingface Access Token. Log in to Huggingface -> Settings -> Access Tokens.

runtimes:
  - editor: JupyterLab
    kernel: Python 3.10
    edition: Nvidia GPU

tasks:
  # - type: run_session
  #   name: Validate GPU Availibility in this workspace
  #   script: bin/check_gpu_resources.py
  #   short_summary: Check for GPU availibility. 
  #   long_summary: Check GPUs are enabled on this workspace and are currently schedulable.
  #   kernel: python3
  #   cpu: 2
  #   memory: 4

  - type: create_job
    name: Install Dependencies
    script: bin/install-dependencies-uv.py
    entity_label: install_deps
    arguments: None
    short_summary: Install Dependencies
    kernel: python3
    cpu: 2
    memory: 8
    environment:
      TASK_TYPE: CREATE/RUN_JOB
      
  - type: run_job
    entity_label: install_deps
    arguments: None
    short_summary: Running Install Dependencies
    long_summary: >-
      Running the job to install dependencies.
  
  # - type: run_session
  #   name: Validate GPU CUDA Capability
  #   script: bin/check_gpu_capability.py
  #   short_summary: Check for GPU capability. 
  #   long_summary: Check GPU device supports the CUDA capabilities required.
  #   kernel: python3
  #   cpu: 2
  #   memory: 4
  #   gpu: 1

  - type: create_job
    name: Accel_Finetuning_Base_Job
    short_summary: Create Template Job for creating finetuning tasks
    entity_label: accel_fine_tune_job_template
    script: ft/scripts/accel_fine_tune_base_script.py
    arguments: None
    long_summary: Create Template Job for creating accelerator-based finetuning tasks. This job is used as the template for creating and launching fine-tuning tasks in the application.
    cpu: 2
    memory: 8
    gpu: 1
    environment:
      TASK_TYPE: CREATE/RUN_JOB
      MLFLOW_FLATTEN_PARAMS: true
      PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True

  - type: create_job
    name: Mlflow_Evaluation_Base_Job
    short_summary: Create Template Job for creating mlflow evaluation tasks
    entity_label: mlflow_evaluation_job_template
    script: ft/scripts/mlflow_evaluation_base_script.py
    arguments: None
    long_summary: Create Template Job for creating mlflow evaluation tasks. This job is used as the template for creating and launching mlflow evaluation tasks in the application.
    cpu: 2
    memory: 8
    gpu: 1
    environment:
      TASK_TYPE: CREATE/RUN_JOB

  - type: run_session
    name: Initialize Project Defaults
    script: bin/initialize-project-defaults-uv.py
    short_summary: Initialize Project Defaults
    long_summary: Initialize default datasets, prompts, models, adapters, etc., shipped with this version of the Studio.
    kernel: python3
    cpu: 2
    memory: 8

  - type: start_application
    name: Fine Tuning Studio
    short_summary: Start Fine Tuning Studio
    subdomain: fine-tuning-studio
    script: bin/run-app.py
    long_summary: This application requires an available GPU to run the LLM model and LoRA adapters.
    cpu: 2
    memory: 8
    is_embedded: true
    environment_variables:
      TASK_TYPE: START_APPLICATION

  - type: start_application
    name: Sample-Ticketing-Agent-Application
    short_summary: Start Sample Ticketing Agent Application
    subdomain: sample-ticketing-agent
    script: examples/ticketing-agent-app/ticketing-agent-launch.py
    long_summary: This application launcher launches the sample ticketing streamlit application.
    cpu: 2
    memory: 8
    environment_variables:
      TASK_TYPE: START_APPLICATION