From 8f286d5992bd349edc93cd2fd4474c3cddf0bc21 Mon Sep 17 00:00:00 2001 From: Jongsu Liam Kim Date: Tue, 23 Jan 2024 00:21:01 +0900 Subject: [PATCH] Update library of Fine-tune Llama 2 * To enable kbit quantization, gradient_checkpointing must be passed into TrainingArguments --- Fine_tune_Llama_2_in_Google_Colab.ipynb | 4613 ++++++++++++----------- 1 file changed, 2307 insertions(+), 2306 deletions(-) diff --git a/Fine_tune_Llama_2_in_Google_Colab.ipynb b/Fine_tune_Llama_2_in_Google_Colab.ipynb index c75609c..56cb68d 100644 --- a/Fine_tune_Llama_2_in_Google_Colab.ipynb +++ b/Fine_tune_Llama_2_in_Google_Colab.ipynb @@ -1,2053 +1,233 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4", - "authorship_tag": "ABX9TyN5b5wF0ITT+T1IRzUm6Zjj", - "include_colab_link": true + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + { + "cell_type": "markdown", + "metadata": { + "id": "OSHlAbqzDFDq" + }, + "source": [ + "# Fine-tune Llama 2 in Google Colab\n", + "> 🗣️ Large Language Model Course\n", + "\n", + "❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.\n", + "\n", + "This notebook runs on a T4 GPU. (Last update: 01 Aug 2023)\n" + ] }, - "language_info": { - "name": "python" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GLXwJqbjtPho" + }, + "outputs": [], + "source": [ + "!pip install -q accelerate peft bitsandbytes transformers trl[quantization]" + ] }, - "accelerator": "GPU", - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "52c4bf7418f74bc79a8c12fe35901974": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c5e609d111b34d408a53a4cd71bb43d5", - "IPY_MODEL_0e0a20b5ed7a44e9834022e7eba2194d", - "IPY_MODEL_b5627331e78e4eb28765ed20f32cf403" - ], - "layout": "IPY_MODEL_8084d4cb267f4a52b3d80ec34d291190" - } + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nAMzy_0FtaUZ" + }, + "outputs": [], + "source": [ + "import os\n", + "import torch\n", + "from datasets import load_dataset\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " BitsAndBytesConfig,\n", + " HfArgumentParser,\n", + " TrainingArguments,\n", + " pipeline,\n", + " logging,\n", + ")\n", + "from peft import LoraConfig, PeftModel\n", + "from trl import SFTTrainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ib_We3NLtj2E" + }, + "outputs": [], + "source": [ + "# The model that you want to train from the Hugging Face hub\n", + "model_name = \"NousResearch/Llama-2-7b-chat-hf\"\n", + "\n", + "# The instruction dataset to use\n", + "dataset_name = \"mlabonne/guanaco-llama2-1k\"\n", + "\n", + "# Fine-tuned model name\n", + "new_model = \"llama-2-7b-miniguanaco\"\n", + "\n", + "################################################################################\n", + "# QLoRA parameters\n", + "################################################################################\n", + "\n", + "# LoRA attention dimension\n", + "lora_r = 64\n", + "\n", + "# Alpha parameter for LoRA scaling\n", + "lora_alpha = 16\n", + "\n", + "# Dropout probability for LoRA layers\n", + "lora_dropout = 0.1\n", + "\n", + "################################################################################\n", + "# bitsandbytes parameters\n", + "################################################################################\n", + "\n", + "# Activate 4-bit precision base model loading\n", + "use_4bit = True\n", + "\n", + "# Compute dtype for 4-bit base models\n", + "bnb_4bit_compute_dtype = \"float16\"\n", + "\n", + "# Quantization type (fp4 or nf4)\n", + "bnb_4bit_quant_type = \"nf4\"\n", + "\n", + "# Activate nested quantization for 4-bit base models (double quantization)\n", + "use_nested_quant = False\n", + "\n", + "################################################################################\n", + "# TrainingArguments parameters\n", + "################################################################################\n", + "\n", + "# Output directory where the model predictions and checkpoints will be stored\n", + "output_dir = \"./results\"\n", + "\n", + "# Number of training epochs\n", + "num_train_epochs = 1\n", + "\n", + "# Enable fp16/bf16 training (set bf16 to True with an A100)\n", + "fp16 = False\n", + "bf16 = False\n", + "\n", + "# Batch size per GPU for training\n", + "per_device_train_batch_size = 4\n", + "\n", + "# Batch size per GPU for evaluation\n", + "per_device_eval_batch_size = 4\n", + "\n", + "# Number of update steps to accumulate the gradients for\n", + "gradient_accumulation_steps = 1\n", + "\n", + "# Enable gradient checkpointing\n", + "gradient_checkpointing = True\n", + "\n", + "# Maximum gradient normal (gradient clipping)\n", + "max_grad_norm = 0.3\n", + "\n", + "# Initial learning rate (AdamW optimizer)\n", + "learning_rate = 2e-4\n", + "\n", + "# Weight decay to apply to all layers except bias/LayerNorm weights\n", + "weight_decay = 0.001\n", + "\n", + "# Optimizer to use\n", + "optim = \"paged_adamw_32bit\"\n", + "\n", + "# Learning rate schedule\n", + "lr_scheduler_type = \"cosine\"\n", + "\n", + "# Number of training steps (overrides num_train_epochs)\n", + "max_steps = -1\n", + "\n", + "# Ratio of steps for a linear warmup (from 0 to learning rate)\n", + "warmup_ratio = 0.03\n", + "\n", + "# Group sequences into batches with same length\n", + "# Saves memory and speeds up training considerably\n", + "group_by_length = True\n", + "\n", + "# Save checkpoint every X updates steps\n", + "save_steps = 0\n", + "\n", + "# Log every X updates steps\n", + "logging_steps = 25\n", + "\n", + "################################################################################\n", + "# SFT parameters\n", + "################################################################################\n", + "\n", + "# Maximum sequence length to use\n", + "max_seq_length = None\n", + "\n", + "# Pack multiple short examples in the same input sequence to increase efficiency\n", + "packing = False\n", + "\n", + "# Load the entire model on the GPU 0\n", + "device_map = {\"\": 0}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "52c4bf7418f74bc79a8c12fe35901974", + "c5e609d111b34d408a53a4cd71bb43d5", + "0e0a20b5ed7a44e9834022e7eba2194d", + "b5627331e78e4eb28765ed20f32cf403", + "8084d4cb267f4a52b3d80ec34d291190", + "a8dcdf1f7ab64242acb057e8b54ebf79", + "1ca492fddbaa4ea7a3226649154e01fd", + "a8eda8bfe08e4152a80c63830138c96d", + "1f258eacd6d0472385d41523b65dea8b", + "228b1bcf604f454f8060a250b58008a1", + "90b281e9c5ed4e77ab93e5879d0b15a3" + ] }, - "c5e609d111b34d408a53a4cd71bb43d5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a8dcdf1f7ab64242acb057e8b54ebf79", - "placeholder": "​", - "style": "IPY_MODEL_1ca492fddbaa4ea7a3226649154e01fd", - "value": "Loading checkpoint shards: 100%" - } + "id": "OJXpOgBFuSrc", + "outputId": "8d06ed40-ea32-4d85-8665-413bde069607" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "52c4bf7418f74bc79a8c12fe35901974", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00\"Open" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Fine-tune Llama 2 in Google Colab\n", - "> 🗣️ Large Language Model Course\n", - "\n", - "❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.\n", - "\n", - "This notebook runs on a T4 GPU. (Last update: 01 Aug 2023)\n" - ], - "metadata": { - "id": "OSHlAbqzDFDq" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GLXwJqbjtPho" - }, - "outputs": [], - "source": [ - "!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7" - ] - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "import torch\n", - "from datasets import load_dataset\n", - "from transformers import (\n", - " AutoModelForCausalLM,\n", - " AutoTokenizer,\n", - " BitsAndBytesConfig,\n", - " HfArgumentParser,\n", - " TrainingArguments,\n", - " pipeline,\n", - " logging,\n", - ")\n", - "from peft import LoraConfig, PeftModel\n", - "from trl import SFTTrainer" - ], - "metadata": { - "id": "nAMzy_0FtaUZ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# The model that you want to train from the Hugging Face hub\n", - "model_name = \"NousResearch/Llama-2-7b-chat-hf\"\n", - "\n", - "# The instruction dataset to use\n", - "dataset_name = \"mlabonne/guanaco-llama2-1k\"\n", - "\n", - "# Fine-tuned model name\n", - "new_model = \"llama-2-7b-miniguanaco\"\n", - "\n", - "################################################################################\n", - "# QLoRA parameters\n", - "################################################################################\n", - "\n", - "# LoRA attention dimension\n", - "lora_r = 64\n", - "\n", - "# Alpha parameter for LoRA scaling\n", - "lora_alpha = 16\n", - "\n", - "# Dropout probability for LoRA layers\n", - "lora_dropout = 0.1\n", - "\n", - "################################################################################\n", - "# bitsandbytes parameters\n", - "################################################################################\n", - "\n", - "# Activate 4-bit precision base model loading\n", - "use_4bit = True\n", - "\n", - "# Compute dtype for 4-bit base models\n", - "bnb_4bit_compute_dtype = \"float16\"\n", - "\n", - "# Quantization type (fp4 or nf4)\n", - "bnb_4bit_quant_type = \"nf4\"\n", - "\n", - "# Activate nested quantization for 4-bit base models (double quantization)\n", - "use_nested_quant = False\n", - "\n", - "################################################################################\n", - "# TrainingArguments parameters\n", - "################################################################################\n", - "\n", - "# Output directory where the model predictions and checkpoints will be stored\n", - "output_dir = \"./results\"\n", - "\n", - "# Number of training epochs\n", - "num_train_epochs = 1\n", - "\n", - "# Enable fp16/bf16 training (set bf16 to True with an A100)\n", - "fp16 = False\n", - "bf16 = False\n", - "\n", - "# Batch size per GPU for training\n", - "per_device_train_batch_size = 4\n", - "\n", - "# Batch size per GPU for evaluation\n", - "per_device_eval_batch_size = 4\n", - "\n", - "# Number of update steps to accumulate the gradients for\n", - "gradient_accumulation_steps = 1\n", - "\n", - "# Enable gradient checkpointing\n", - "gradient_checkpointing = True\n", - "\n", - "# Maximum gradient normal (gradient clipping)\n", - "max_grad_norm = 0.3\n", - "\n", - "# Initial learning rate (AdamW optimizer)\n", - "learning_rate = 2e-4\n", - "\n", - "# Weight decay to apply to all layers except bias/LayerNorm weights\n", - "weight_decay = 0.001\n", - "\n", - "# Optimizer to use\n", - "optim = \"paged_adamw_32bit\"\n", - "\n", - "# Learning rate schedule\n", - "lr_scheduler_type = \"cosine\"\n", - "\n", - "# Number of training steps (overrides num_train_epochs)\n", - "max_steps = -1\n", - "\n", - "# Ratio of steps for a linear warmup (from 0 to learning rate)\n", - "warmup_ratio = 0.03\n", - "\n", - "# Group sequences into batches with same length\n", - "# Saves memory and speeds up training considerably\n", - "group_by_length = True\n", - "\n", - "# Save checkpoint every X updates steps\n", - "save_steps = 0\n", - "\n", - "# Log every X updates steps\n", - "logging_steps = 25\n", - "\n", - "################################################################################\n", - "# SFT parameters\n", - "################################################################################\n", - "\n", - "# Maximum sequence length to use\n", - "max_seq_length = None\n", - "\n", - "# Pack multiple short examples in the same input sequence to increase efficiency\n", - "packing = False\n", - "\n", - "# Load the entire model on the GPU 0\n", - "device_map = {\"\": 0}" - ], - "metadata": { - "id": "ib_We3NLtj2E" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Load dataset (you can process it here)\n", - "dataset = load_dataset(dataset_name, split=\"train\")\n", - "\n", - "# Load tokenizer and model with QLoRA configuration\n", - "compute_dtype = getattr(torch, bnb_4bit_compute_dtype)\n", - "\n", - "bnb_config = BitsAndBytesConfig(\n", - " load_in_4bit=use_4bit,\n", - " bnb_4bit_quant_type=bnb_4bit_quant_type,\n", - " bnb_4bit_compute_dtype=compute_dtype,\n", - " bnb_4bit_use_double_quant=use_nested_quant,\n", - ")\n", - "\n", - "# Check GPU compatibility with bfloat16\n", - "if compute_dtype == torch.float16 and use_4bit:\n", - " major, _ = torch.cuda.get_device_capability()\n", - " if major >= 8:\n", - " print(\"=\" * 80)\n", - " print(\"Your GPU supports bfloat16: accelerate training with bf16=True\")\n", - " print(\"=\" * 80)\n", - "\n", - "# Load base model\n", - "model = AutoModelForCausalLM.from_pretrained(\n", - " model_name,\n", - " quantization_config=bnb_config,\n", - " device_map=device_map\n", - ")\n", - "model.config.use_cache = False\n", - "model.config.pretraining_tp = 1\n", - "\n", - "# Load LLaMA tokenizer\n", - "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", - "tokenizer.pad_token = tokenizer.eos_token\n", - "tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n", - "\n", - "# Load LoRA configuration\n", - "peft_config = LoraConfig(\n", - " lora_alpha=lora_alpha,\n", - " lora_dropout=lora_dropout,\n", - " r=lora_r,\n", - " bias=\"none\",\n", - " task_type=\"CAUSAL_LM\",\n", - ")\n", - "\n", - "# Set training parameters\n", - "training_arguments = TrainingArguments(\n", - " output_dir=output_dir,\n", - " num_train_epochs=num_train_epochs,\n", - " per_device_train_batch_size=per_device_train_batch_size,\n", - " gradient_accumulation_steps=gradient_accumulation_steps,\n", - " optim=optim,\n", - " save_steps=save_steps,\n", - " logging_steps=logging_steps,\n", - " learning_rate=learning_rate,\n", - " weight_decay=weight_decay,\n", - " fp16=fp16,\n", - " bf16=bf16,\n", - " max_grad_norm=max_grad_norm,\n", - " max_steps=max_steps,\n", - " warmup_ratio=warmup_ratio,\n", - " group_by_length=group_by_length,\n", - " lr_scheduler_type=lr_scheduler_type,\n", - " report_to=\"tensorboard\"\n", - ")\n", - "\n", - "# Set supervised fine-tuning parameters\n", - "trainer = SFTTrainer(\n", - " model=model,\n", - " train_dataset=dataset,\n", - " peft_config=peft_config,\n", - " dataset_text_field=\"text\",\n", - " max_seq_length=max_seq_length,\n", - " tokenizer=tokenizer,\n", - " args=training_arguments,\n", - " packing=packing,\n", - ")\n", - "\n", - "# Train model\n", - "trainer.train()\n", - "\n", - "# Save trained model\n", - "trainer.model.save_pretrained(new_model)" - ], - "metadata": { - "id": "OJXpOgBFuSrc", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000, - "referenced_widgets": [ - "52c4bf7418f74bc79a8c12fe35901974", - "c5e609d111b34d408a53a4cd71bb43d5", - "0e0a20b5ed7a44e9834022e7eba2194d", - "b5627331e78e4eb28765ed20f32cf403", - "8084d4cb267f4a52b3d80ec34d291190", - "a8dcdf1f7ab64242acb057e8b54ebf79", - "1ca492fddbaa4ea7a3226649154e01fd", - "a8eda8bfe08e4152a80c63830138c96d", - "1f258eacd6d0472385d41523b65dea8b", - "228b1bcf604f454f8060a250b58008a1", - "90b281e9c5ed4e77ab93e5879d0b15a3" - ] - }, - "outputId": "8d06ed40-ea32-4d85-8665-413bde069607" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/2 [00:00" - ], "text/html": [ "\n", "
\n", @@ -3065,285 +1245,2106 @@ " \n", " \n", "

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Load dataset (you can process it here)\n", + "dataset = load_dataset(dataset_name, split=\"train\")\n", + "\n", + "# Load tokenizer and model with QLoRA configuration\n", + "compute_dtype = getattr(torch, bnb_4bit_compute_dtype)\n", + "\n", + "bnb_config = BitsAndBytesConfig(\n", + " load_in_4bit=use_4bit,\n", + " bnb_4bit_quant_type=bnb_4bit_quant_type,\n", + " bnb_4bit_compute_dtype=compute_dtype,\n", + " bnb_4bit_use_double_quant=use_nested_quant,\n", + ")\n", + "\n", + "# Check GPU compatibility with bfloat16\n", + "if compute_dtype == torch.float16 and use_4bit:\n", + " major, _ = torch.cuda.get_device_capability()\n", + " if major >= 8:\n", + " print(\"=\" * 80)\n", + " print(\"Your GPU supports bfloat16: accelerate training with bf16=True\")\n", + " print(\"=\" * 80)\n", + "\n", + "# Load base model\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " model_name,\n", + " quantization_config=bnb_config,\n", + " device_map=device_map\n", + ")\n", + "model.config.use_cache = False\n", + "model.config.pretraining_tp = 1\n", + "\n", + "# Load LLaMA tokenizer\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n", + "\n", + "# Load LoRA configuration\n", + "peft_config = LoraConfig(\n", + " lora_alpha=lora_alpha,\n", + " lora_dropout=lora_dropout,\n", + " r=lora_r,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\",\n", + ")\n", + "\n", + "# Set training parameters\n", + "training_arguments = TrainingArguments(\n", + " output_dir=output_dir,\n", + " num_train_epochs=num_train_epochs,\n", + " per_device_train_batch_size=per_device_train_batch_size,\n", + " gradient_accumulation_steps=gradient_accumulation_steps,\n", + " gradient_checkpointing=gradient_checkpointing,\n", + " optim=optim,\n", + " save_steps=save_steps,\n", + " logging_steps=logging_steps,\n", + " learning_rate=learning_rate,\n", + " weight_decay=weight_decay,\n", + " fp16=fp16,\n", + " bf16=bf16,\n", + " max_grad_norm=max_grad_norm,\n", + " max_steps=max_steps,\n", + " warmup_ratio=warmup_ratio,\n", + " group_by_length=group_by_length,\n", + " lr_scheduler_type=lr_scheduler_type,\n", + " report_to=\"tensorboard\"\n", + ")\n", + "\n", + "# Set supervised fine-tuning parameters\n", + "trainer = SFTTrainer(\n", + " model=model,\n", + " train_dataset=dataset,\n", + " peft_config=peft_config,\n", + " dataset_text_field=\"text\",\n", + " max_seq_length=max_seq_length,\n", + " tokenizer=tokenizer,\n", + " args=training_arguments,\n", + " packing=packing,\n", + ")\n", + "\n", + "# Train model\n", + "trainer.train()\n", + "\n", + "# Save trained model\n", + "trainer.model.save_pretrained(new_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "crj9svNe4hU5" + }, + "outputs": [], + "source": [ + "# %load_ext tensorboard\n", + "# %tensorboard --logdir results/runs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "frlSLPin4IJ4", + "outputId": "e5bf6b3a-f20e-49f7-e0b7-36f71ca207c1" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1270: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n", + " warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INST] What is a large language model? [/INST] A large language model is a type of artificial intelligence (AI) model that is trained on a large dataset of text to generate human-like language outputs. It is designed to be able to understand and generate text in a way that is similar to human language, and can be used for a wide range of applications such as chatbots, language translation, and text summarization.\n", + "\n", + "Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer models, and are often based on pre-trained models such as BERT or RoBERTa. These models are trained on large datasets of text, such as books, articles, or websites, and are designed to learn the patterns and structures of language.\n", + "\n", + "Some examples of large language models include:\n", + "\n", + "* BERT (Bidirectional Encoder Representations from Transformers\n" + ] + } + ], + "source": [ + "# Ignore warnings\n", + "logging.set_verbosity(logging.CRITICAL)\n", + "\n", + "# Run text generation pipeline with our next model\n", + "prompt = \"What is a large language model?\"\n", + "pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n", + "result = pipe(f\"[INST] {prompt} [/INST]\")\n", + "print(result[0]['generated_text'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mkQCviG0Zta-", + "outputId": "e7c4ab10-4039-4490-b7f0-6ea118bdd709" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "19965" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Empty VRAM\n", + "del model\n", + "del pipe\n", + "del trainer\n", + "import gc\n", + "gc.collect()\n", + "gc.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "051d193cd87f47c1971fb87544e1e615", + "9d7247c119e642c5894f15ca6974ef3e", + "a79c22bb34ec4f698a00752b47a6f631", + "d95f3a3f26c6470d984542cdfd68bec1", + "343e11c62a59448eb43bbc0c31bf5f11", + "a153c96bd1fe4c48a41e9b9c7c00dd6e", + "84da055d24694320843e13ad37438792", + "e375632975904402baea46163e2eeca1", + "95501d0b5a22407288f008bf8cc69726", + "6aef866a6c474dfabb2140ded933c5aa", + "d66fa096d442423c9447cbfbdc1aad8d" + ] + }, + "id": "QQn30cRtAZ-P", + "outputId": "1c5ef3c4-d107-4c43-9bd6-0ca72903db0e" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "051d193cd87f47c1971fb87544e1e615", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00[INST] {prompt} [/INST]\")\n", - "print(result[0]['generated_text'])" - ], - "metadata": { - "id": "frlSLPin4IJ4", - "colab": { - "base_uri": "https://localhost:8080/" + "model.push_to_hub(new_model, use_temp_dir=False)\n", + "tokenizer.push_to_hub(new_model, use_temp_dir=False)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyN5b5wF0ITT+T1IRzUm6Zjj", + "gpuType": "T4", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "051d193cd87f47c1971fb87544e1e615": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9d7247c119e642c5894f15ca6974ef3e", + "IPY_MODEL_a79c22bb34ec4f698a00752b47a6f631", + "IPY_MODEL_d95f3a3f26c6470d984542cdfd68bec1" + ], + "layout": "IPY_MODEL_343e11c62a59448eb43bbc0c31bf5f11" + } + }, + "0e0a20b5ed7a44e9834022e7eba2194d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a8eda8bfe08e4152a80c63830138c96d", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1f258eacd6d0472385d41523b65dea8b", + "value": 2 + } + }, + "130120644beb48acbc038651459af43c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1af01f1f1aac42b8bff46fe4df8a59ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_eee8731f316244eda5ff0765fd12bf85", + "IPY_MODEL_f135278e410f4b708435bb80fb630bcf", + "IPY_MODEL_2e6fc79bf5c149d6b0bc5c52e18debc7" + ], + "layout": "IPY_MODEL_a4b0debc025444a59abd6953b3512c0d" + } + }, + "1b6c59a51359453c926bfcddb3d0f0ea": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1ca492fddbaa4ea7a3226649154e01fd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1f258eacd6d0472385d41523b65dea8b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1f73f8b4d4da4e74adc135f2a2f6ee65": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b6c59a51359453c926bfcddb3d0f0ea", + "placeholder": "​", + "style": "IPY_MODEL_dac3669f18284161a58d52f26dffb761", + "value": "pytorch_model-00002-of-00002.bin: 100%" + } + }, + "20670478612f4b1a8a5f23d71a2609a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "228b1bcf604f454f8060a250b58008a1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "27bb18a199ca47108c7a61e9c443de36": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2bb3d36d248a48fba364f14d9e840306": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2e6fc79bf5c149d6b0bc5c52e18debc7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b29647e268414329be56047e522e28b9", + "placeholder": "​", + "style": "IPY_MODEL_27bb18a199ca47108c7a61e9c443de36", + "value": " 9.98G/9.98G [06:35<00:00, 25.8MB/s]" + } + }, + "33ebb868f3e846f6af1a1a2a8ad6a3cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1f73f8b4d4da4e74adc135f2a2f6ee65", + "IPY_MODEL_68da6e6e69c8419895bea2068760534e", + "IPY_MODEL_6dc1a868e08c4c3b8315116d2c46573b" + ], + "layout": "IPY_MODEL_7a5d714c17374104bb6f5caaa5541c10" + } + }, + "343e11c62a59448eb43bbc0c31bf5f11": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "52c4bf7418f74bc79a8c12fe35901974": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c5e609d111b34d408a53a4cd71bb43d5", + "IPY_MODEL_0e0a20b5ed7a44e9834022e7eba2194d", + "IPY_MODEL_b5627331e78e4eb28765ed20f32cf403" + ], + "layout": "IPY_MODEL_8084d4cb267f4a52b3d80ec34d291190" + } + }, + "68da6e6e69c8419895bea2068760534e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a3511f489f6d47cc8d404ab6f367b29f", + "max": 3500316627, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_20670478612f4b1a8a5f23d71a2609a7", + "value": 3500316627 + } + }, + "6aef866a6c474dfabb2140ded933c5aa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6dc1a868e08c4c3b8315116d2c46573b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b463153ec04749e38540389efa2981f7", + "placeholder": "​", + "style": "IPY_MODEL_2bb3d36d248a48fba364f14d9e840306", + "value": " 3.50G/3.50G [02:27<00:00, 26.4MB/s]" + } + }, + "7a1f3340688d408092adade75f4baac4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7a5d714c17374104bb6f5caaa5541c10": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8084d4cb267f4a52b3d80ec34d291190": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "84da055d24694320843e13ad37438792": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8c887ca9b0eb44fdb8608bf36b5db5c5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "90b281e9c5ed4e77ab93e5879d0b15a3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9434350b1b9c4060812feb9ecbf63278": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "95501d0b5a22407288f008bf8cc69726": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9d7247c119e642c5894f15ca6974ef3e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a153c96bd1fe4c48a41e9b9c7c00dd6e", + "placeholder": "​", + "style": "IPY_MODEL_84da055d24694320843e13ad37438792", + "value": "Loading checkpoint shards: 100%" + } + }, + "a153c96bd1fe4c48a41e9b9c7c00dd6e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a3511f489f6d47cc8d404ab6f367b29f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a4b0debc025444a59abd6953b3512c0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a79c22bb34ec4f698a00752b47a6f631": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e375632975904402baea46163e2eeca1", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_95501d0b5a22407288f008bf8cc69726", + "value": 2 + } + }, + "a8dcdf1f7ab64242acb057e8b54ebf79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a8eda8bfe08e4152a80c63830138c96d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b29647e268414329be56047e522e28b9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b463153ec04749e38540389efa2981f7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - "outputId": "e5bf6b3a-f20e-49f7-e0b7-36f71ca207c1" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1270: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n", - " warnings.warn(\n", - "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n", - " warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n" - ] + "b5627331e78e4eb28765ed20f32cf403": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_228b1bcf604f454f8060a250b58008a1", + "placeholder": "​", + "style": "IPY_MODEL_90b281e9c5ed4e77ab93e5879d0b15a3", + "value": " 2/2 [01:13<00:00, 33.04s/it]" + } + }, + "b6e821c974674f2290c354238d6c919c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "bf722f71c61b4285bcbbf32fd619b3a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8c887ca9b0eb44fdb8608bf36b5db5c5", + "placeholder": "​", + "style": "IPY_MODEL_e4698337e6b843afac706ab657ca6af9", + "value": " 2/2 [06:36<00:00, 396.47s/it]" + } + }, + "bf77e97593a349718bdb5fd9bfd28fe3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c3a4fedc73b3480089ef9d13381471ed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_eeba50e8242c4753bfc0ea48e03f9078", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7a1f3340688d408092adade75f4baac4", + "value": 2 + } + }, + "c5e609d111b34d408a53a4cd71bb43d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a8dcdf1f7ab64242acb057e8b54ebf79", + "placeholder": "​", + "style": "IPY_MODEL_1ca492fddbaa4ea7a3226649154e01fd", + "value": "Loading checkpoint shards: 100%" + } + }, + "c99aff4cfd664ae8a165a27bea0566c8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e4b64cab6b7b418c8a2575ee26839039", + "IPY_MODEL_c3a4fedc73b3480089ef9d13381471ed", + "IPY_MODEL_bf722f71c61b4285bcbbf32fd619b3a6" + ], + "layout": "IPY_MODEL_fd11a6148b704c5b9142c5e8de2d3b25" + } + }, + "d66fa096d442423c9447cbfbdc1aad8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d95f3a3f26c6470d984542cdfd68bec1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6aef866a6c474dfabb2140ded933c5aa", + "placeholder": "​", + "style": "IPY_MODEL_d66fa096d442423c9447cbfbdc1aad8d", + "value": " 2/2 [00:59<00:00, 27.43s/it]" + } + }, + "dac3669f18284161a58d52f26dffb761": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e375632975904402baea46163e2eeca1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[INST] What is a large language model? [/INST] A large language model is a type of artificial intelligence (AI) model that is trained on a large dataset of text to generate human-like language outputs. It is designed to be able to understand and generate text in a way that is similar to human language, and can be used for a wide range of applications such as chatbots, language translation, and text summarization.\n", - "\n", - "Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer models, and are often based on pre-trained models such as BERT or RoBERTa. These models are trained on large datasets of text, such as books, articles, or websites, and are designed to learn the patterns and structures of language.\n", - "\n", - "Some examples of large language models include:\n", - "\n", - "* BERT (Bidirectional Encoder Representations from Transformers\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Empty VRAM\n", - "del model\n", - "del pipe\n", - "del trainer\n", - "import gc\n", - "gc.collect()\n", - "gc.collect()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "e4698337e6b843afac706ab657ca6af9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - "id": "mkQCviG0Zta-", - "outputId": "e7c4ab10-4039-4490-b7f0-6ea118bdd709" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "19965" - ] - }, - "metadata": {}, - "execution_count": 7 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Reload model in FP16 and merge it with LoRA weights\n", - "base_model = AutoModelForCausalLM.from_pretrained(\n", - " model_name,\n", - " low_cpu_mem_usage=True,\n", - " return_dict=True,\n", - " torch_dtype=torch.float16,\n", - " device_map=device_map,\n", - ")\n", - "model = PeftModel.from_pretrained(base_model, new_model)\n", - "model = model.merge_and_unload()\n", - "\n", - "# Reload tokenizer to save it\n", - "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", - "tokenizer.pad_token = tokenizer.eos_token\n", - "tokenizer.padding_side = \"right\"" - ], - "metadata": { - "id": "QQn30cRtAZ-P", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 49, - "referenced_widgets": [ - "051d193cd87f47c1971fb87544e1e615", - "9d7247c119e642c5894f15ca6974ef3e", - "a79c22bb34ec4f698a00752b47a6f631", - "d95f3a3f26c6470d984542cdfd68bec1", - "343e11c62a59448eb43bbc0c31bf5f11", - "a153c96bd1fe4c48a41e9b9c7c00dd6e", - "84da055d24694320843e13ad37438792", - "e375632975904402baea46163e2eeca1", - "95501d0b5a22407288f008bf8cc69726", - "6aef866a6c474dfabb2140ded933c5aa", - "d66fa096d442423c9447cbfbdc1aad8d" - ] + "e4b64cab6b7b418c8a2575ee26839039": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f0bcdaf940d14ad796fc7ac46c8e1e64", + "placeholder": "​", + "style": "IPY_MODEL_b6e821c974674f2290c354238d6c919c", + "value": "Upload 2 LFS files: 100%" + } }, - "outputId": "1c5ef3c4-d107-4c43-9bd6-0ca72903db0e" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/2 [00:00