|
1 | 1 | {
|
2 | 2 | "cells": [
|
| 3 | + { |
| 4 | + "metadata": {}, |
| 5 | + "cell_type": "markdown", |
| 6 | + "source": [ |
| 7 | + "# Using Evidently Prompt Registry with Prompt Templates\n", |
| 8 | + "\n", |
| 9 | + "In this tutorial, we’ll walk through how to use the **Evidently Prompt Registry** to store, version, and reuse prompts.\n", |
| 10 | + "We’ll also see how to connect it with **LLMJudge** and prompt templates for evaluation.\n", |
| 11 | + "\n", |
| 12 | + "## Connect to Evidently Cloud\n", |
| 13 | + "First, import the CloudWorkspace and authenticate with your token.\n", |
| 14 | + "\n" |
| 15 | + ], |
| 16 | + "id": "794ee107e0bbccd8" |
| 17 | + }, |
3 | 18 | {
|
4 | 19 | "cell_type": "code",
|
5 | 20 | "id": "initial_id",
|
6 | 21 | "metadata": {
|
7 |
| - "collapsed": true |
| 22 | + "collapsed": true, |
| 23 | + "jupyter": { |
| 24 | + "outputs_hidden": true |
| 25 | + } |
8 | 26 | },
|
9 |
| - "source": "from evidently.ui.workspace import CloudWorkspace", |
| 27 | + "source": [ |
| 28 | + "from evidently.ui.workspace import CloudWorkspace" |
| 29 | + ], |
10 | 30 | "outputs": [],
|
11 | 31 | "execution_count": null
|
12 | 32 | },
|
13 | 33 | {
|
14 |
| - "metadata": {}, |
15 | 34 | "cell_type": "code",
|
16 |
| - "source": "ws = CloudWorkspace(\"\", url=\"https://pr-1885.evidently.dev/\")", |
17 | 35 | "id": "f475f54d0e3cb0c0",
|
| 36 | + "metadata": {}, |
| 37 | + "source": [ |
| 38 | + "ws = CloudWorkspace(\"your token\")" |
| 39 | + ], |
18 | 40 | "outputs": [],
|
19 | 41 | "execution_count": null
|
20 | 42 | },
|
21 | 43 | {
|
22 | 44 | "metadata": {},
|
23 |
| - "cell_type": "code", |
| 45 | + "cell_type": "markdown", |
24 | 46 | "source": [
|
25 |
| - "org_id = \"019396b3-b862-7bdb-95ea-1786c2262724\"\n", |
26 |
| - "project = ws.search_project(\"Prompts Example\", org_id=org_id)[0]" |
| 47 | + "### Select Project\n", |
| 48 | + "You need to provide your `org_id` and the project name where you want to store prompts." |
27 | 49 | ],
|
| 50 | + "id": "99644be6bf6b6302" |
| 51 | + }, |
| 52 | + { |
| 53 | + "cell_type": "code", |
28 | 54 | "id": "a96e7e78ef17d78c",
|
| 55 | + "metadata": {}, |
| 56 | + "source": [ |
| 57 | + "org_id = \"your org id\"\n", |
| 58 | + "project = ws.search_project(\"your project name\", org_id=org_id)[0]" |
| 59 | + ], |
29 | 60 | "outputs": [],
|
30 | 61 | "execution_count": null
|
31 | 62 | },
|
32 | 63 | {
|
33 | 64 | "metadata": {},
|
| 65 | + "cell_type": "markdown", |
| 66 | + "source": [ |
| 67 | + "## Create or Load a Prompt\n", |
| 68 | + "You can create a new prompt or load an existing one by name.\n" |
| 69 | + ], |
| 70 | + "id": "5754e6dabe37fd1c" |
| 71 | + }, |
| 72 | + { |
34 | 73 | "cell_type": "code",
|
| 74 | + "id": "774f71e32051de58", |
| 75 | + "metadata": {}, |
35 | 76 | "source": [
|
36 | 77 | "prompt = ws.prompts.get_or_create_prompt(project.id, \"my criteria\")\n",
|
37 | 78 | "prompt.list_versions()"
|
38 | 79 | ],
|
39 |
| - "id": "774f71e32051de58", |
40 | 80 | "outputs": [],
|
41 | 81 | "execution_count": null
|
42 | 82 | },
|
43 | 83 | {
|
44 | 84 | "metadata": {},
|
| 85 | + "cell_type": "markdown", |
| 86 | + "source": [ |
| 87 | + "## Add Prompt Versions\n", |
| 88 | + "Let’s add new versions of the prompt content.\n", |
| 89 | + "This helps you track changes over time.\n" |
| 90 | + ], |
| 91 | + "id": "8499d58283f2f2b2" |
| 92 | + }, |
| 93 | + { |
45 | 94 | "cell_type": "code",
|
| 95 | + "id": "f23d3f7ebd0d33b7", |
| 96 | + "metadata": {}, |
46 | 97 | "source": [
|
47 | 98 | "criteria = \"aaaa\"\n",
|
48 | 99 | "prompt.bump_version(criteria)"
|
49 | 100 | ],
|
50 |
| - "id": "f23d3f7ebd0d33b7", |
51 | 101 | "outputs": [],
|
52 | 102 | "execution_count": null
|
53 | 103 | },
|
54 | 104 | {
|
55 |
| - "metadata": {}, |
56 | 105 | "cell_type": "code",
|
57 |
| - "source": "prompt.list_versions()", |
58 | 106 | "id": "90fd01c8828bfedb",
|
| 107 | + "metadata": {}, |
| 108 | + "source": [ |
| 109 | + "prompt.list_versions()" |
| 110 | + ], |
59 | 111 | "outputs": [],
|
60 | 112 | "execution_count": null
|
61 | 113 | },
|
62 | 114 | {
|
63 |
| - "metadata": {}, |
64 | 115 | "cell_type": "code",
|
65 |
| - "source": "prompt.get_version().content", |
66 | 116 | "id": "372186afd8fbdba1",
|
| 117 | + "metadata": {}, |
| 118 | + "source": [ |
| 119 | + "prompt.get_version().content" |
| 120 | + ], |
67 | 121 | "outputs": [],
|
68 | 122 | "execution_count": null
|
69 | 123 | },
|
70 | 124 | {
|
71 |
| - "metadata": {}, |
72 | 125 | "cell_type": "code",
|
73 |
| - "source": "prompt.bump_version(\"bbbb\")", |
74 | 126 | "id": "184ecb4d1f318477",
|
| 127 | + "metadata": {}, |
| 128 | + "source": [ |
| 129 | + "prompt.bump_version(\"bbbb\")" |
| 130 | + ], |
75 | 131 | "outputs": [],
|
76 | 132 | "execution_count": null
|
77 | 133 | },
|
78 | 134 | {
|
79 |
| - "metadata": {}, |
80 | 135 | "cell_type": "code",
|
81 |
| - "source": "prompt.get_version(\"latest\").content.as_text()", |
82 | 136 | "id": "bff5bedad1668104",
|
| 137 | + "metadata": {}, |
| 138 | + "source": [ |
| 139 | + "prompt.get_version(\"latest\").content.as_text()" |
| 140 | + ], |
83 | 141 | "outputs": [],
|
84 | 142 | "execution_count": null
|
85 | 143 | },
|
86 | 144 | {
|
87 |
| - "metadata": {}, |
88 | 145 | "cell_type": "code",
|
89 |
| - "source": "prompt.delete_version(prompt.get_version().id)", |
90 | 146 | "id": "10d977c3d9fe6549",
|
| 147 | + "metadata": {}, |
| 148 | + "source": [ |
| 149 | + "prompt.delete_version(prompt.get_version().id)" |
| 150 | + ], |
91 | 151 | "outputs": [],
|
92 | 152 | "execution_count": null
|
93 | 153 | },
|
94 | 154 | {
|
95 |
| - "metadata": {}, |
96 | 155 | "cell_type": "code",
|
97 |
| - "source": "prompt.get_version(\"latest\").content.as_text()", |
98 | 156 | "id": "1e749db861675458",
|
| 157 | + "metadata": {}, |
| 158 | + "source": [ |
| 159 | + "prompt.get_version(\"latest\").content.as_text()" |
| 160 | + ], |
99 | 161 | "outputs": [],
|
100 | 162 | "execution_count": null
|
101 | 163 | },
|
102 | 164 | {
|
103 | 165 | "metadata": {},
|
| 166 | + "cell_type": "markdown", |
| 167 | + "source": [ |
| 168 | + "## Delete a Prompt Version\n", |
| 169 | + "You can also remove versions if needed.\n" |
| 170 | + ], |
| 171 | + "id": "86bf2b572a1e993a" |
| 172 | + }, |
| 173 | + { |
104 | 174 | "cell_type": "code",
|
105 |
| - "source": "prompt.delete()", |
106 | 175 | "id": "aebc870b0fae5768",
|
| 176 | + "metadata": {}, |
| 177 | + "source": [ |
| 178 | + "prompt.delete()" |
| 179 | + ], |
107 | 180 | "outputs": [],
|
108 | 181 | "execution_count": null
|
109 | 182 | },
|
110 | 183 | {
|
111 |
| - "metadata": {}, |
112 | 184 | "cell_type": "code",
|
113 |
| - "source": "ws.prompts.list_prompts(project.id)", |
114 | 185 | "id": "d50289a21b01362b",
|
| 186 | + "metadata": {}, |
| 187 | + "source": [ |
| 188 | + "ws.prompts.list_prompts(project.id)" |
| 189 | + ], |
| 190 | + "outputs": [], |
| 191 | + "execution_count": null |
| 192 | + }, |
| 193 | + { |
| 194 | + "metadata": {}, |
| 195 | + "cell_type": "markdown", |
| 196 | + "source": [ |
| 197 | + "## Define a Judge with Criteria\n", |
| 198 | + "Now, let’s define a **judge** that evaluates model responses using a template.\n", |
| 199 | + "We’ll use a binary classification (GOOD / BAD) with simple criteria.\n" |
| 200 | + ], |
| 201 | + "id": "1637286eb1dbf4db" |
| 202 | + }, |
| 203 | + { |
| 204 | + "cell_type": "code", |
| 205 | + "id": "41317777700f94a4", |
| 206 | + "metadata": {}, |
| 207 | + "source": [ |
| 208 | + "from evidently.llm.templates import BinaryClassificationPromptTemplate\n", |
| 209 | + "from evidently.descriptors import LLMJudge\n", |
| 210 | + "\n", |
| 211 | + "judge = LLMJudge(provider=\"openai\", model=\"gpt-4o-mini\", template=BinaryClassificationPromptTemplate(\n", |
| 212 | + " target_category=\"GOOD\",\n", |
| 213 | + " non_target_category=\"BAD\",\n", |
| 214 | + " criteria=\"\"\"Classify the model’s response with the following criteria:\n", |
| 215 | + "Correctness: Is the response factually accurate?\n", |
| 216 | + "Clarity: Is the response easy to understand?\n", |
| 217 | + "Relevance: Does it fully address the question?\n", |
| 218 | + "Output only one rating: good or bad.\"\"\"\n", |
| 219 | + "))" |
| 220 | + ], |
| 221 | + "outputs": [], |
| 222 | + "execution_count": null |
| 223 | + }, |
| 224 | + { |
| 225 | + "metadata": {}, |
| 226 | + "cell_type": "markdown", |
| 227 | + "source": [ |
| 228 | + "## Store the Judge Template in the Prompt Registry\n", |
| 229 | + "Instead of keeping the template inline, let’s store it in the registry.\n" |
| 230 | + ], |
| 231 | + "id": "8cc9a4cafc8ff314" |
| 232 | + }, |
| 233 | + { |
| 234 | + "cell_type": "code", |
| 235 | + "id": "86e411c8f0cb6731", |
| 236 | + "metadata": {}, |
| 237 | + "source": [ |
| 238 | + "template_prompt = ws.prompts.get_or_create_prompt(project.id, \"my template\")\n", |
| 239 | + "template_prompt.bump_version(judge.feature.template)" |
| 240 | + ], |
| 241 | + "outputs": [], |
| 242 | + "execution_count": null |
| 243 | + }, |
| 244 | + { |
| 245 | + "cell_type": "code", |
| 246 | + "id": "3119dcad06f5b57d", |
| 247 | + "metadata": {}, |
| 248 | + "source": [ |
| 249 | + "template_prompt.list_versions()" |
| 250 | + ], |
| 251 | + "outputs": [], |
| 252 | + "execution_count": null |
| 253 | + }, |
| 254 | + { |
| 255 | + "metadata": {}, |
| 256 | + "cell_type": "markdown", |
| 257 | + "source": [ |
| 258 | + "## Reuse the Template\n", |
| 259 | + "You can now load the template from the registry and create a new judge.\n" |
| 260 | + ], |
| 261 | + "id": "2ffd8ab794c92ba3" |
| 262 | + }, |
| 263 | + { |
| 264 | + "cell_type": "code", |
| 265 | + "id": "b75a1a697cbca9fd", |
| 266 | + "metadata": {}, |
| 267 | + "source": [ |
| 268 | + "new_judge = LLMJudge(provider=\"openai\",\n", |
| 269 | + " model=\"gpt-4o-mini\",\n", |
| 270 | + " template=template_prompt.get_version().content.template)\n", |
| 271 | + "new_judge" |
| 272 | + ], |
| 273 | + "outputs": [], |
| 274 | + "execution_count": null |
| 275 | + }, |
| 276 | + { |
| 277 | + "metadata": {}, |
| 278 | + "cell_type": "markdown", |
| 279 | + "source": [ |
| 280 | + "## Clean Up\n", |
| 281 | + "Finally, remove the template prompt if you no longer need it.\n" |
| 282 | + ], |
| 283 | + "id": "f0f8532b561b783" |
| 284 | + }, |
| 285 | + { |
| 286 | + "cell_type": "code", |
| 287 | + "id": "4eae17691463d29d", |
| 288 | + "metadata": {}, |
| 289 | + "source": [ |
| 290 | + "template_prompt.delete()" |
| 291 | + ], |
115 | 292 | "outputs": [],
|
116 | 293 | "execution_count": null
|
117 | 294 | }
|
118 | 295 | ],
|
119 | 296 | "metadata": {
|
120 | 297 | "kernelspec": {
|
121 |
| - "display_name": "Python 3", |
| 298 | + "display_name": "Python 3 (ipykernel)", |
122 | 299 | "language": "python",
|
123 | 300 | "name": "python3"
|
124 | 301 | },
|
125 | 302 | "language_info": {
|
126 | 303 | "codemirror_mode": {
|
127 | 304 | "name": "ipython",
|
128 |
| - "version": 2 |
| 305 | + "version": 3 |
129 | 306 | },
|
130 | 307 | "file_extension": ".py",
|
131 | 308 | "mimetype": "text/x-python",
|
132 | 309 | "name": "python",
|
133 | 310 | "nbconvert_exporter": "python",
|
134 |
| - "pygments_lexer": "ipython2", |
135 |
| - "version": "2.7.6" |
| 311 | + "pygments_lexer": "ipython3", |
| 312 | + "version": "3.11.11" |
136 | 313 | }
|
137 | 314 | },
|
138 | 315 | "nbformat": 4,
|
|
0 commit comments