mistral_ & bark_api

JosefAlbers · web-flow · commit 085b29be137f · 2024-07-03T03:05:06.000+09:00
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,4 @@
-phi3v/
-quantized_phi3v/
+models/
 adapters/
 *.egg-info
 *.json
diff --git a/README.md b/README.md
@@ -36,6 +36,7 @@ agent = Agent()
 
 ```python
 agent('What is shown in this image?', 'https://collectionapi.metmuseum.org/api/collection/v1/iiif/344291/725918/main-image')
+agent('What is the location?')
 agent.end()
 ```
 
@@ -192,4 +193,4 @@ This project is licensed under the [MIT License](LICENSE).
 
 ## Citation
 
-<a href="https://zenodo.org/doi/10.5281/zenodo.11403221"><img src="https://zenodo.org/badge/806709541.svg" alt="DOI"></a>
+<a href="https://zenodo.org/doi/10.5281/zenodo.11403221"><img src="https://zenodo.org/badge/806709541.svg" alt="DOI"></a>
diff --git a/examples/main.py b/examples/main.py
@@ -18,6 +18,7 @@
 # Visual Question Answering (VQA)
 agent = Agent()
 agent('What is shown in this image?', 'https://collectionapi.metmuseum.org/api/collection/v1/iiif/344291/725918/main-image')
+agent('What is the location?')
 agent.end()
 
 # Generative Feedback Loop
diff --git a/gte.py b/gte.py
@@ -14,6 +14,9 @@
 
 import datasets
 import numpy as np
+import os
+
+PATH_GTE = 'models/gte'
 
 def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
     last_hidden = mx.multiply(last_hidden_state, attention_mask[..., None])
@@ -123,15 +126,17 @@ def __call__(
         y = self.encoder(x, attention_mask)
         return y, mx.tanh(self.pooler(y[:, 0]))
 
-
 class GteModel:
     def __init__(self) -> None:
-        model_path = snapshot_download(repo_id="vegaluisjose/mlx-rag")
+        model_path = PATH_GTE
+        if not os.path.exists(model_path):
+            snapshot_download(repo_id="vegaluisjose/mlx-rag", local_dir=model_path)
+            snapshot_download(repo_id="thenlper/gte-large", allow_patterns=["vocab.txt", "*.json"], local_dir=model_path)
         with open(f"{model_path}/config.json") as f:
             model_config = ModelConfig(**json.load(f))
         self.model = Bert(model_config)
         self.model.load_weights(f"{model_path}/model.npz")
-        self.tokenizer = BertTokenizer.from_pretrained("thenlper/gte-large")
+        self.tokenizer = BertTokenizer.from_pretrained(model_path)
 
     def __call__(self, input_text: List[str]) -> mx.array:
         tokens = self.tokenizer(input_text, return_tensors="np", padding=True)
@@ -201,4 +206,4 @@ def __call__(self, text, n_topk=1):
         query_embed = self.embed(text)
         scores = mx.matmul(query_embed, self.list_embed.T)
         list_idx = mx.argsort(scores)[:,:-1-n_topk:-1].tolist()
-        return [[self.list_api[j] for j in i] for i in list_idx]
+        return [[self.list_api[j] for j in i] for i in list_idx]
diff --git a/phi_3_vision_mlx.py b/phi_3_vision_mlx.py