Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

setfit example does not work #532

Open
geraldstanje opened this issue Jun 2, 2024 · 1 comment
Open

setfit example does not work #532

geraldstanje opened this issue Jun 2, 2024 · 1 comment

Comments

@geraldstanje
Copy link

geraldstanje commented Jun 2, 2024

Hi,

I took the following example and enabled the training part in the code: https://github.com/huggingface/setfit/blob/main/notebooks/setfit-onnx-optimum.ipynb

But example gives an error:

Traceback (most recent call last):
  File "/teamspace/studios/this_studio/setfit_test/setfit-onnx-optimum-example.py", line 205, in <module>
    main()
  File "/teamspace/studios/this_studio/setfit_test/setfit-onnx-optimum-example.py", line 190, in main
    onnx_setfit_model(test_dataset["text"][:2])
  File "/teamspace/studios/this_studio/setfit_test/setfit-onnx-optimum-example.py", line 126, in __call__
    return self.predict(inputs)
  File "/teamspace/studios/this_studio/setfit_test/setfit-onnx-optimum-example.py", line 123, in predict
    return self.model_head.predict(embeddings)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/sklearn/linear_model/_base.py", line 451, in predict
    scores = self.decision_function(X)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/sklearn/linear_model/_base.py", line 432, in decision_function
    X = self._validate_data(X, accept_sparse="csr", reset=False)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/sklearn/base.py", line 605, in _validate_data
    out = check_array(X, input_name="X", **check_params)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/sklearn/utils/validation.py", line 915, in check_array
    array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/sklearn/utils/_array_api.py", line 380, in _asarray_with_order
    array = numpy.asarray(array, order=order, dtype=dtype)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/_tensor.py", line 1062, in __array__
    return self.numpy()
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

it looks like the function predict in OnnxSetFitModel needs to change as follows - does it mean it runs on the cpu and not gpu? does mean_pooling also run on the cpu?

    def predict(self, inputs):
        encoded_inputs = self.tokenizer(
            inputs, padding=True, truncation=True, return_tensors="pt"
        ).to(self.ort_model.device)

        outputs = self.ort_model(**encoded_inputs)
        embeddings = mean_pooling(
            outputs["last_hidden_state"], encoded_inputs["attention_mask"]
        )

        if embeddings.is_cuda:
            embeddings = embeddings.cpu()

        embeddings_np = embeddings.numpy()
        return self.model_head.predict(embeddings_np)

Code:

from pathlib import Path
from time import perf_counter

import evaluate
import numpy as np
import torch
from tqdm.auto import tqdm
import os

import matplotlib.pyplot as plt
import pandas as pd

from setfit import SetFitModel
from setfit import SetFitModel, Trainer, TrainingArguments

from datasets import load_dataset
from setfit.exporters.utils import mean_pooling
from optimum.onnxruntime import ORTModelForFeatureExtraction, AutoOptimizationConfig, ORTOptimizer
from transformers import AutoTokenizer

metric = evaluate.load("accuracy")

class PerformanceBenchmark:
    def __init__(self, model, dataset, optim_type):
        self.model = model
        self.dataset = dataset
        self.optim_type = optim_type

    def compute_accuracy(self):
        preds = self.model.predict(self.dataset["text"])
        labels = self.dataset["label"]
        accuracy = metric.compute(predictions=preds, references=labels)
        print(f"Accuracy on test set - {accuracy['accuracy']:.3f}")
        return accuracy

    def compute_size(self):
        state_dict = self.model.model_body.state_dict()
        tmp_path = Path("model.pt")
        torch.save(state_dict, tmp_path)
        # Calculate size in megabytes
        size_mb = Path(tmp_path).stat().st_size / (1024 * 1024)
        # Delete temporary file
        tmp_path.unlink()
        print(f"Model size (MB) - {size_mb:.2f}")
        return {"size_mb": size_mb}

    def time_model(self, query="that loves its characters and communicates something rather beautiful about human nature"):
        latencies = []
        # Warmup
        for _ in range(10):
            _ = self.model([query])
        # Timed run
        for _ in range(100):
            start_time = perf_counter()
            _ = self.model([query])
            latency = perf_counter() - start_time
            latencies.append(latency)
        # Compute run statistics
        time_avg_ms = 1000 * np.mean(latencies)
        time_std_ms = 1000 * np.std(latencies)
        print(rf"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}")
        return {"time_avg_ms": time_avg_ms, "time_std_ms": time_std_ms}

    def run_benchmark(self):
        metrics = {}
        metrics[self.optim_type] = self.compute_size()
        metrics[self.optim_type].update(self.compute_accuracy())
        metrics[self.optim_type].update(self.time_model())
        return metrics

def plot_metrics(perf_metrics):
    df = pd.DataFrame.from_dict(perf_metrics, orient="index")

    for idx in df.index:
        df_opt = df.loc[idx]
        plt.errorbar(
            df_opt["time_avg_ms"],
            df_opt["accuracy"] * 100,
            xerr=df_opt["time_std_ms"],
            fmt="o",
            alpha=0.5,
            ms=df_opt["size_mb"] / 15,
            label=idx,
            capsize=5,
            capthick=1,
        )

    legend = plt.legend(loc="lower right")

    plt.ylim(63, 95)
    # Use the slowest model to define the x-axis range
    xlim = max([metrics["time_avg_ms"] for metrics in perf_metrics.values()]) * 1.2
    plt.xlim(0, xlim)
    plt.ylabel("Accuracy (%)")
    plt.xlabel("Average latency with batch_size=1 (ms)")
    plt.show()

class OnnxPerformanceBenchmark(PerformanceBenchmark):
    def __init__(self, *args, model_path, **kwargs):
        super().__init__(*args, **kwargs)
        self.model_path = model_path

    def compute_size(self):
        size_mb = Path(self.model_path).stat().st_size / (1024 * 1024)
        print(f"Model size (MB) - {size_mb:.2f}")
        return {"size_mb": size_mb}

class OnnxSetFitModel:
    def __init__(self, ort_model, tokenizer, model_head):
        self.ort_model = ort_model
        self.tokenizer = tokenizer
        self.model_head = model_head

    def predict(self, inputs):
        encoded_inputs = self.tokenizer(
            inputs, padding=True, truncation=True, return_tensors="pt"
        ).to(self.ort_model.device)

        outputs = self.ort_model(**encoded_inputs)
        embeddings = mean_pooling(
            outputs["last_hidden_state"], encoded_inputs["attention_mask"]
        )
        return self.model_head.predict(embeddings)

    def __call__(self, inputs):
        return self.predict(inputs)

def main():
    # Set the TOKENIZERS_PARALLELISM environment variable
    os.environ['TOKENIZERS_PARALLELISM'] = 'false'

    dataset = load_dataset("SetFit/sst2")
    #dataset
    train_dataset = dataset["train"]
    test_dataset = dataset["validation"]

    # Evaluate the uploaded model!
    #model = SetFitModel.from_pretrained("dkorat/bge-small-en-v1.5_setfit-sst2-english")
    #pb = PerformanceBenchmark(model=model, dataset=test_dataset, optim_type="bge-small (PyTorch)")
    #perf_metrics = pb.run_benchmark()

    # Fine-tune the base model and Evaluate!
    # Load pretrained model from the Hub
    model = SetFitModel.from_pretrained(
        "sentence-transformers/all-MiniLM-L6-v2" #"BAAI/bge-small-en-v1.5"
    )
    args = TrainingArguments(num_iterations=20)

    # Create trainer
    small_trainer = Trainer(
        model=model, args=args, train_dataset=train_dataset
    )
    # Train!
    small_trainer.train()

    # Save and push the model to the Hub (change the model name accordingly)
    model.save_pretrained("setfit-test-model-example")

    # Evaluate!
    pb = PerformanceBenchmark(
        model=small_trainer.model, dataset=test_dataset, optim_type="bge-small (base)"
    )
    perf_metrics = pb.run_benchmark()

    plot_metrics(perf_metrics)

    #!pip install optimum[onnxruntime-gpu] -qqq

    # Load a PyTorch model and export it to the ONNX format
    ort_model = ORTModelForFeatureExtraction.from_pretrained(
        "setfit-test-model-example", #"dkorat/bge-small-en-v1.5_setfit-sst2-english",
        export=True,
        provider="CUDAExecutionProvider",
    )

    # Create the optimizer
    optimizer = ORTOptimizer.from_pretrained(ort_model)

    # Optimize using the appropriate optimization strategy
    opt_model_path = optimizer.optimize(save_dir="bge_auto_opt_O2", optimization_config=AutoOptimizationConfig.O2())

    # Load the optimized ONNX model
    ort_model = ORTModelForFeatureExtraction.from_pretrained(opt_model_path, provider="CUDAExecutionProvider")

    # Load the optimized ONNX model
    tokenizer = AutoTokenizer.from_pretrained(opt_model_path, model_max_length=512)
    onnx_setfit_model = OnnxSetFitModel(ort_model, tokenizer, model.model_head)

    # Perform inference
    onnx_setfit_model(test_dataset["text"][:2])

    pb = OnnxPerformanceBenchmark(
        onnx_setfit_model,
        test_dataset,
        "bge-small (optimum ONNX)",
        model_path="bge_auto_opt_O2/model_optimized.onnx",
    )
    perf_metrics.update(pb.run_benchmark())

    plot_metrics(perf_metrics)

    print(f"Speedup: {perf_metrics['bge-small (PyTorch)']['time_avg_ms'] / perf_metrics['bge-small (optimum ONNX)']['time_avg_ms']:.2f}x")

if __name__ == "__main__":
    main()

Logs:
setfit_test.txt

@geraldstanje
Copy link
Author

geraldstanje commented Jun 8, 2024

@MosheWasserb

according to https://github.com/huggingface/setfit/pull/435/files and https://github.com/huggingface/setfit/blob/main/docs/source/en/tutorials/onnx.mdx there should be the following in the notebook:

self.model_head.predict(embeddings.cpu())

but https://raw.githubusercontent.com/huggingface/setfit/main/notebooks/setfit-onnx-optimum.ipynb uses:

self.model_head.predict(embeddings)

is that a bug?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant