modelscope · zyzhang1130 · Apr 19, 2024 · Apr 23, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/examples/conversation_with_agent_with_finetuned_model/FinetuneDialogAgent.py b/examples/conversation_with_agent_with_finetuned_model/FinetuneDialogAgent.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+"""
+This module provides the FinetuneDialogAgent class,
+which extends DialogAgent to enhance fine-tuning
+capabilities with custom hyperparameters.
+"""
+from typing import Any, Optional, Dict
+from loguru import logger
+from agentscope.agents import DialogAgent
+
+
+class FinetuneDialogAgent(DialogAgent):
+    """
+    A dialog agent capable of fine-tuning its
+    underlying model based on provided data.
+
+    Inherits from DialogAgent and adds functionality for
+    fine-tuning with custom hyperparameters.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        sys_prompt: str,
+        model_config_name: str,
+        use_memory: bool = True,
+        memory_config: Optional[dict] = None,
+    ):
+        """
+        Initializes a new FinetuneDialogAgent with specified configuration.
+
+        Arguments:
+            name (str): Name of the agent.
+            sys_prompt (str): System prompt or description of the agent's role.
+            model_config_name (str): The configuration name for
+                                     the underlying model.
+            use_memory (bool, optional): Indicates whether to utilize
+                                         memory features. Defaults to True.
+            memory_config (dict, optional): Configuration for memory
+                                            functionalities if
+                                            `use_memory` is True.
+
+        Note:
+            Refer to `class DialogAgent(AgentBase)` for more information.
+        """
+        super().__init__(
+            name,
+            sys_prompt,
+            model_config_name,
+            use_memory,
+            memory_config,
+        )
+        self.finetune = True
+
+    def load_model(
+        self,
+        pretrained_model_name_or_path: Optional[str] = None,
+        local_model_path: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """
+        Load a new model into the agent.
+
+        Arguments:
+            pretrained_model_name_or_path (str): The Hugging Face
+                             model ID or a custom identifier.
+                             Needed if loading model from Hugging Face.
+            local_model_path (str, optional): Path to a locally saved model.
+
+        Raises:
+            Exception: If the model loading process fails or if the
+                       model wrapper does not support dynamic loading.
+        """
+        if hasattr(self.model, "load_model"):
+            self.model.load_model(
+                pretrained_model_name_or_path,
+                local_model_path,
+                fine_tune_config,
+            )
+        else:
+            logger.error(
+                "The model wrapper does not support dynamic model loading.",
+            )
+
+    def load_tokenizer(
+        self,
+        pretrained_model_name_or_path: Optional[str] = None,
+        local_model_path: Optional[str] = None,
+    ) -> None:
+        """
+        Load a new tokenizer for the agent.
+
+        Arguments:
+            pretrained_model_name_or_path (str): The Hugging Face model
+                            ID or a custom identifier.
+                            Needed if loading tokenizer from Hugging Face.
+            local_tokenizer_path (str, optional): Path to a locally saved
+                                                  tokenizer.
+
+        Raises:
+            Exception: If the model tokenizer process fails or if the
+                       model wrapper does not support dynamic loading.
+        """
+        if hasattr(self.model, "load_tokenizer"):
+            self.model.load_tokenizer(
+                pretrained_model_name_or_path,
+                local_model_path,
+            )
+        else:
+            logger.error("The model wrapper does not support dynamic loading.")
+
+    def fine_tune(
+        self,
+        data_path: Optional[str] = None,
+        output_dir: Optional[str] = None,
+        fine_tune_config: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """
+        Fine-tune the agent's underlying model.
+
+        Arguments:
+            data_path (str): The path to the training data.
+            output_dir (str, optional): User specified path
+                                       to save the fine-tuned model
+                                       and its tokenizer. By default
+                                       save to this example's
+                                       directory if not specified.
+
+        Raises:
+            Exception: If fine-tuning fails or if the
+                       model wrapper does not support fine-tuning.
+        """
+        if hasattr(self.model, "fine_tune"):
+            self.model.fine_tune(data_path, output_dir, fine_tune_config)
+        else:
+            logger.error("The model wrapper does not support fine-tuning.")
diff --git a/examples/conversation_with_agent_with_finetuned_model/README.md b/examples/conversation_with_agent_with_finetuned_model/README.md
@@ -0,0 +1,74 @@
+# User-Agent Conversation with Custom Model Loading and Fine-Tuning in AgentScope
+
+This example demonstrates how to load and optionally fine-tune a Hugging Face model within a user-agent conversation setup using AgentScope. The complete code is provided in `agentscope/examples/conversation_with_agent_with_finetuned_model`.
+
+## Functionality Overview
+
+Compared to basic conversation setup, this example introduces model loading and fine-tuning features:
+
+- Initialize an agent or use `dialog_agent.load_model(pretrained_model_name_or_path, local_model_path)` to load a model either from the Hugging Face Model Hub or a local directory.
+- Initalize an agent or apply `dialog_agent.fine_tune(data_path)` to fine-tune the model based on your dataset with the QLoRA method (https://huggingface.co/blog/4bit-transformers-bitsandbytes).
+
+The default hyperparameters for (SFT) fine-tuning are specified in `agentscope/examples/conversation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py` and `agentscope/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json`. For customized hyperparameters, specify them in `model_configs` if the model needs to be fine-tuned at initialization, or specify through `fine_tune_config` in `FinetuneDialogAgent`'s `fine_tune` method after initialization, as shown in the example script `conversation_with_agent_with_finetuned_model.py`.
+
+## Agent Initialization
+
+When initializing an agent, the following parameters need specification:
+
+- `pretrained_model_name_or_path` (str): Identifier for the model on Hugging Face.
+- `local_model_path` (str): Local path to the model (defaults to loading from Hugging Face if not provided).
+- `data_path` (str): Path to training data (fine-tuning is skipped if not provided).
+- `device` (str): The device (e.g., 'cuda', 'cpu') for model operation, defaulting to 'cuda' if available.
+- `fine_tune_config` (dict, Optional): A configuration dictionary for fine-tuning the model. It allows specifying hyperparameters and other training options that will be passed to the fine-tuning method. If not provided, default settings will be used. This allows for customization of the fine-tuning process to optimize model performance based on specific requirements. Note that if `continue_lora_finetuning` is set to `True`, `lora_config` should not be specified since the previously saved peft model's config will be used instead. If `continue_lora_finetuning` is set to `False`, `lora_config` should be specified.
+- `huggingface_token` (from .env file): Token required for models needing authentication from Hugging Face.
+
+## Example Ouputs
+### Before Fine-tuning:
+```
+System:
+
+You're a helpful assistant.
+
+User:
+
+Who are you?
+
+Assistant:
+
+I am a woman who is passionate about life, my family, my friends, my work, and my community. I am a woman who is a wife, a mother, a daughter, a sister, a friend, a teacher, a coach, a mentor, a leader, a volunteer, a writer, a reader, a traveler, a gardener, a cook, a baker, a cra
+```
+### After Fine-tuning (with the default configuration in `model_configs`):
+```
+System:
+
+You're a helpful assistant.
+
+User:
+
+Who are you?
+
+Assistant:
+
+I am a language model trained by Google to answer questions.
+```
+(This example is trained with the default setting, with training time 872 seconds and 9.914 GB gpu memory cost. Reduce training batch size can reduce the memory required. Note that the model is loaded in 4 bits (i.e., QLoRA)).
+
+## Tested Models
+
+The example is tested using specific Hugging Face model `google/gemma-7b` on dataset `GAIR/lima`. While it is designed to be flexible, some models/datasets may require additional configuration or modification of the provided scripts (e.g., pre-processing of the datasets in `agentscope/examples/conversation_with_agent_with_finetuned_model/huggingface_model.py`).
+
+## Prerequisites
+
+Before running this example, ensure you have installed the following packages:
+
+- `transformers`
+- `python-dotenv`
+- `datasets`
+- `trl`
+- `bitsandbytes`
+- `sentencepiece`
+
+Additionally, set `HUGGINGFACE_TOKEN` in the `agentscope/examples/conversation_with_agent_with_finetuned_model/.env`.
+
+```bash
+python conversation_with_agent_with_finetuned_model.py
diff --git a/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json b/examples/conversation_with_agent_with_finetuned_model/configs/model_configs.json
@@ -0,0 +1,22 @@
+[
+    {
+        "model_type": "huggingface",
+        "config_name": "my_custom_model",
+
+        "pretrained_model_name_or_path": "google/gemma-7b",
+
+        "max_length": 128,
+        "device": "cuda",
+
+        "data_path": "GAIR/lima",
+
+        "fine_tune_config": {
+            "lora_config": {"r": 16, "lora_alpha": 32},
+            "training_args": {"max_steps": 200, "logging_steps": 1},
+            "bnb_config" : {"load_in_4bit": "True",
+                                    "bnb_4bit_use_double_quant": "True",
+                                    "bnb_4bit_quant_type": "nf4",
+                                    "bnb_4bit_compute_dtype": "torch.bfloat16"}
+        }
+    }
+]
diff --git a/...versation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py b/...versation_with_agent_with_finetuned_model/conversation_with_agent_with_finetuned_model.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+"""
+This script sets up a conversational agent using
+AgentScope with a Hugging Face model.
+It includes initializing a FinetuneDialogAgent,
+loading and fine-tuning a pre-trained model,
+and conducting a dialogue via a sequential pipeline.
+The conversation continues until the user exits.
+Features include model and tokenizer loading,
+and fine-tuning on the lima dataset with adjustable parameters.
+"""
+# This import is necessary for AgentScope to properly use
+# HuggingFaceWrapper even though it's not explicitly used in this file.
+# To remove the pylint disable without causing issues
+# HuggingFaceWrapper needs to be put under src/agentscope/agents.
+# pylint: disable=unused-import
+from huggingface_model import HuggingFaceWrapper
+from FinetuneDialogAgent import FinetuneDialogAgent
+import agentscope
+from agentscope.agents.user_agent import UserAgent
+from agentscope.pipelines.functional import sequentialpipeline
+
+
+def main() -> None:
+    """A basic conversation demo with a custom model"""
+
+    # Initialize AgentScope with your custom model configuration
+
+    agentscope.init(
+        model_configs=[
+            {
+                "model_type": "huggingface",
+                "config_name": "my_custom_model",
+                # Or another generative model of your choice.
+                # Needed from loading from Hugging Face.
+                "pretrained_model_name_or_path": "google/gemma-7b",
+                # "local_model_path": "", # Specify your local model path
+                "max_length": 256,
+                # Device for inference. Fine-tuning occurs on gpus.
+                "device": "cuda",
+                # Specify a Hugging Face data path if you
+                # wish to finetune the model from the start
+                "data_path": "GAIR/lima",
+                # "output_dir":
+                # fine_tune_config (Optional): Configuration for
+                # fine-tuning the model.
+                # This dictionary can include hyperparameters and other
+                # training options that will be passed to the
+                # fine-tuning method. Defaults to None.
+                # `lora_config` and `training_args` follow
+                # the standard lora and sfttrainer fields.
+                # "lora_config" shouldn't be specified if
+                # loading a model saved as lora model
+                # '"continue_lora_finetuning": True' if
+                # loading a model saved as lora model
+                "fine_tune_config": {
+                    "continue_lora_finetuning": False,
+                    "max_seq_length": 4096,
+                    "lora_config": {
+                        "r": 16,
+                        "lora_alpha": 32,
+                        "lora_dropout": 0.05,
+                        "bias": "none",
+                        "task_type": "CAUSAL_LM",
+                    },
+                    "training_args": {
+                        "num_train_epochs": 5,
+                        # "max_steps": 100,
+                        "logging_steps": 1,
+                        # "learning_rate": 5e-07
+                    },
+                    # "bnb_config": {
+                    #     "load_in_8bit": True,
+                    # "bnb_4bit_use_double_quant": True,
+                    # "bnb_4bit_quant_type": "nf4",
+                    # "bnb_4bit_compute_dtype": "bfloat16",
+                    # },
+                },
+            },
+        ],
+    )
+
+    # # alternatively can load `model_configs` from json file
+    # agentscope.init(
+    #     model_configs="./configs/model_configs.json",
+    # )
+
+    # Init agents with the custom model
+    dialog_agent = FinetuneDialogAgent(
+        name="Assistant",
+        sys_prompt=("You're a helpful assistant."),
+        # Use your custom model config name here
+        model_config_name="my_custom_model",
+    )
+
+    # (Optional) can load another model after
+    # the agent has been instantiated if needed
+    # (for `fine_tune_config` specify only
+    # `lora_config` and `bnb_config` if used)
+    dialog_agent.load_model(
+        pretrained_model_name_or_path="google/gemma-7b",
+        # local_model_path="",
+        fine_tune_config={
+            # "bnb_config": {
+            #     "load_in_4bit": True,
+            #     "bnb_4bit_use_double_quant": True,
+            #     "bnb_4bit_quant_type": "nf4",
+            #     "bnb_4bit_compute_dtype": "bfloat16",
+            # },
+        },
+    )  # load model from Hugging Face
+
+    dialog_agent.load_tokenizer(
+        pretrained_model_name_or_path="google/gemma-7b",
+        # local_model_path="",
+    )  # load tokenizer
+
+    # fine-tune loaded model with lima dataset
+    # with customized hyperparameters
+    # `fine_tune_config` argument is optional
+    # specify only `lora_config` and
+    # `training_args` if used). Defaults to None.
+    # "lora_config" shouldn't be specified if
+    # loading a model saved as lora model
+    # '"continue_lora_finetuning": True' if
+    # loading a model saved as lora model
+    dialog_agent.fine_tune(
+        "GAIR/lima",
+        fine_tune_config={
+            "continue_lora_finetuning": True,
+            # "lora_config": {"r": 24, "lora_alpha": 48},
+            "training_args": {"max_steps": 300, "logging_steps": 3},
+        },
+    )
+
+    user_agent = UserAgent()
+
+    # Start the conversation between user and assistant
+    x = None
+    while x is None or x.content != "exit":
+        x = sequentialpipeline([user_agent, dialog_agent], x)
+
+
+if __name__ == "__main__":
+    main()