diff --git a/evaluation/results/solution_optimization_mm/README.md b/evaluation/results/solution_optimization_mm/README.md new file mode 100644 index 0000000..94a54b3 --- /dev/null +++ b/evaluation/results/solution_optimization_mm/README.md @@ -0,0 +1,52 @@ +# Results for Solution Optimization on Multimodal Tasks + +We are pleased to release the results of our solution optimization experiments for multimodal tasks, including [MathVista](https://mathvista.github.io/) and [ScienceQA](https://scienceqa.github.io/) tasks. This release aims to ensure transparency and provide detailed insights into our optimization processes. + +The files in this repository contain the solution trajectory, final solution, and the answers for each question in the datasets. Each `.json` file (can be downloaded from google drive) is structured as a dictionary where the key is the question index, and the value is another dictionary with the following possible keys: + +- **`question`**: The question text. +- **`answer`**: The ground truth answer. +- **`predictions`**: The trajectory of solutions throughout the optimization process. +- **`loss_history`**: The history of loss function values over different iterations of optimization. +- **`performance_history`**: The prediction score, where 1 indicates a correct answer and 0 indicates an incorrect answer. +- **`result_data`**: The intermediate results of predictions. +- **`ques_data`**: The metadata of the question, which is useful for further fine-grained analysis. + +## Notes +Solution optimization in general depends heavily on the test-time objective. Depending on the test-time objective, e.g. the model can be driven to explore more, which is the approach we took. For this reason, we used majority voting to get the final prediction. There are lots of interesting questions around identifying good test-time training strategies! + +## Experiment on MathVista + +[MathVista](https://mathvista.github.io/) is a comprehensive benchmark for mathematical reasoning within visual contexts. It emphasizes the diversity and complexity of visual perception and mathematical reasoning challenges. + +To conduct an experiment on MathVista, use the following example command: + +```sh +cd evaluation +python solution_optimization_mm.py --task mathvista \ +--engine=gpt-4o \ +--eval_engine=gpt-4o \ +--max_iterations 4 \ +--num_threads 10 \ +--majority_voting +``` + +The resulting `mathvista_predictions.json` file can be downloaded from [here](https://drive.google.com/drive/u/0/folders/1hJ6kQozkTNiUPxtEhZCgCqnK2EFW5MHY). + +## Experiment on ScienceQA + +[ScienceQA](https://scienceqa.github.io/) (Science Question Answering) is a multimodal benchmark consisting of multiple-choice questions covering a diverse set of science topics. It challenges participants to understand scientific images, retrieve relevant knowledge, and provide accurate reasoning for high-school-level scientific questions. + +Running an experiment on ScienceQA, use the following example command: + +```sh +cd evaluation + +python solution_optimization_mm.py --task scienceqa \ +--engine=gpt-4o \ +--eval_engine=gpt-4o \ +--max_iterations 8 \ +--num_threads 20 +``` + +The resulting `scienceqa_predictions.json` file can be downloaded from [here](https://drive.google.com/file/d/1BkMD3CcaxAUpB-9L0aI8bLEqoo8jHqlZ/view?usp=sharing). diff --git a/evaluation/solution_optimization_mm.py b/evaluation/solution_optimization_mm.py new file mode 100644 index 0000000..f8bedd2 --- /dev/null +++ b/evaluation/solution_optimization_mm.py @@ -0,0 +1,285 @@ +import re +import os +import json +import argparse +import concurrent +from tqdm import tqdm +import numpy as np +from dotenv import load_dotenv +load_dotenv(override=True) +from collections import Counter + +import textgrad as tg +from textgrad.tasks.multimodal import load_multimodal_instance_task + +def load_checkpoint_results(checkpoint_file): + all_solutions = {} + if checkpoint_file.endswith(".jsonl"): + with open(checkpoint_file, "r") as f: + for line in f: + data = json.loads(line) + all_solutions[data["ques_data"]["pid"]] = data + elif checkpoint_file.endswith(".json"): + with open(checkpoint_file, "r") as f: + all_solutions = json.load(f) + print("Loaded existing results from", checkpoint_file) + print(f"There are {len(all_solutions)} samples in the file!") + return all_solutions + +class MajorityVoting: + def __init__(self, solutions, max_iterations): + self.solutions = solutions + self.max_iterations = max_iterations + + def majority_element(self, predictions): + count = Counter(predictions) + return max(count.keys(), key=count.get) + + def compute_accuracy(self): + final_score = 0 + for n in range(1, self.max_iterations + 1): + correct = 0 + for _, solution in self.solutions.items(): + answer = solution["answer"] + predictions = [res["normalized_answer"] for res in solution["result_data"]] + predictions = predictions[1:n+1] + majority = self.majority_element(predictions) + if majority == answer: + correct += 1 + acc = round(correct / len(self.solutions), 3) + if acc > final_score: + final_score = acc + return final_score + +def config(): + parser = argparse.ArgumentParser(description="Optimize a prompt for a task.") + parser.add_argument("--task", type=str, default="mathvista", choices = ["mathvista", "scienceqa"], help="The task to evaluate the model on.") + parser.add_argument("--multimodal", action='store_true', help="Determine if we want to load multimodal dataset.") + parser.add_argument("--task_instruction", type=str, default=None, help="The instruction for the task.") + parser.add_argument("--evaluation_instruction", type=str, default=None, help="The instruction for the evaluation.") + parser.add_argument("--instance_role", type=str, default=None, help="The role description of the instance.") + parser.add_argument("--image_role", type=str, default=None, help="The role description of the image.") + parser.add_argument("--question_role", type=str, default=None, help="The role description of the question.") + parser.add_argument("--cache_root", type=str, default=None, help="The cache directory to save data.") + parser.add_argument("--results_dir", type=str, default="./results/solution_optimization_mm", help="The directory to save the results.") + parser.add_argument("--output_file", type=str, default="", help="The output file to save or load the results.") + parser.add_argument("--save_every", type=int, default=50, help="The frequency to save the results.") + parser.add_argument("--engine", type=str, default="gpt-4o", help="The API to use for inference.") + parser.add_argument("--eval_engine", type=str, default="gpt-4o", help="The API to use for evaluation.") + parser.add_argument("--max_iterations", type=int, default=3, help="The maximum number of iterations of test-time updates.") + parser.add_argument("--num_threads", type=int, default=16, help="The number of threads to use for evaluation.") + parser.add_argument("--test_num", type=int, default=-1, help="The number of test samples to evaluate., -1 means all.") + parser.add_argument("--majority_voting", action='store_true', default=False, help="Determine if we want to use majority voting.") + return parser.parse_args() + +def get_zeroshot_answer(question, mm_data=None): + """Getting the zero-shot answer from an LLM without optimizing the response at test time.""" + # The system prompt is from: https://github.com/openai/simple-evals/blob/main/sampler/chat_completion_sampler.py + STARTING_SYSTEM_PROMPT = ( + "You are ChatGPT, a large language model trained by OpenAI, based on the GPT-4 architecture." + + "\nKnowledge cutoff: 2023-12\nCurrent date: 2024-07-01" + ) + if args.question_role is not None: + question_role = args.question_role + else: + question_role = "question to the language model" + if args.image_role is not None: + image_role = args.image_role + else: + image_role = "image associate with question to the language model" + system_prompt = tg.Variable(STARTING_SYSTEM_PROMPT, requires_grad=False, role_description="system prompt to the language model") + if mm_data is None: # vanilla text. + model = tg.BlackboxLLM(llm_engine, system_prompt) + response = model(tg.Variable(question, requires_grad=False, role_description=question_role)) + else: # multi-modal (such as image). + from textgrad.autograd import MultimodalLLMCall + variable_mm_data = tg.Variable(mm_data, requires_grad=False, role_description=image_role) + variable_question = tg.Variable(question, requires_grad=False, role_description=question_role) + response = MultimodalLLMCall(args.engine)([variable_mm_data, variable_question]) + return response + +def run_test_time_training(sample): + performance_history = [] + results_data = [] + mm_data, question, answer, ques_data, test_time_objective, instance_eval_fn = sample # NOTE: check the sample format + zero_shot_response = get_zeroshot_answer(question, mm_data) + + # NOTE: Set the instance role description + if args.instance_role is not None: + instance_role_description = args.instance_role + else: + if args.task == "mathvista": + instance_role_description = "detailed reasoning and prediction for the image-related math question." + elif args.task == "scienceqa": + instance_role_description = "detailed reasoning and prediction for the image-related science question." + else: + instance_role_description = "precise and short answer to the image-related question." + + instance_var = tg.Variable(zero_shot_response.value, requires_grad=True, role_description=instance_role_description) + + # Evaluate the zero-shot response + # NOTE: The instance_eval_fn should return a tuple of (score, result_data) if result_data is needed. + eval_score = instance_eval_fn(instance_var) + if isinstance(eval_score, tuple): + score, result_data = eval_score + else: + score = eval_score + result_data = None + performance_history.append(int(score)) + results_data.append(result_data) + + optimizer = tg.TextualGradientDescent(engine=llm_engine, + parameters=[instance_var], + constraints=["Your response should answer the question given an image."]) + + predictions = [] + predictions.append(tg.Variable( + instance_var.value, + role_description=instance_var.role_description + )) + + loss_history = [] + + # Start test time optimization + for _ in range(args.max_iterations): + optimizer.zero_grad() + # Compute the test time loss + test_time_loss = test_time_objective(instance_var) + loss_history.append(str(test_time_loss)) + test_time_loss.backward() + optimizer.step() + + eval_score = instance_eval_fn(instance_var) + # NOTE: The instance_eval_fn should return a tuple of (score, result_data) if result_data is needed. + if isinstance(eval_score, tuple): + score, result_data = eval_score + else: + score = eval_score + result_data = None + + predictions.append(tg.Variable( + instance_var.value, + role_description=instance_var.role_description + )) + performance_history.append(int(score)) + results_data.append(result_data) + + return question, answer, predictions, performance_history, loss_history, results_data, ques_data # NOTE: check the return format + +args = config() +llm_engine = tg.get_engine(engine_name=args.engine) +tg.set_backward_engine(llm_engine, override=True) + +eval_engine = tg.get_engine(engine_name=args.eval_engine) +test_set = load_multimodal_instance_task(args.task, + evaluation_api=eval_engine, + task_instruction=args.task_instruction, # NOTE: check the instruction + evaluation_instruction=args.evaluation_instruction, # NOTE: check the instruction + root=args.cache_root) + +# Create the results directory if it does not exist +os.makedirs(args.results_dir, exist_ok=True) +output_file = f"{args.task}_predictions.json" if args.output_file == "" else args.output_file +output_file = os.path.join(args.results_dir, output_file) +output_jsonl_file = output_file.replace(".json", ".jsonl") + +# Read the checkpoint result file if it exists +if os.path.exists(output_jsonl_file): + all_solutions = load_checkpoint_results(output_jsonl_file) +elif os.path.exists(output_file): + all_solutions = load_checkpoint_results(output_file) +else: + all_solutions = {} +all_history = [solution["performance_history"] for solution in all_solutions.values()] + +if args.num_threads > 0: + # Use ThreadPoolExecutor + with concurrent.futures.ThreadPoolExecutor(max_workers=args.num_threads) as executor: + futures = [] + print("\nPreparing to run test-time training...") + for i, sample in enumerate(test_set): + image_bytes, query, answer, ques_data, test_time_objective, instance_eval_fn = sample # NOTE: check the sample format + pid = ques_data["pid"] # NOTE: check the question id + if pid in all_solutions: + # print(f"Skipping sample {pid}") + continue + if args.test_num > 0 and i >= args.test_num: + break + future = executor.submit(run_test_time_training, sample) + futures.append(future) + + print("\nRunning test-time training...") + for _, future in enumerate(tqdm(concurrent.futures.as_completed(futures), total=len(futures), position=0)): + question, answer, predictions, performance_history, loss_history, result_data, ques_data = future.result() # NOTE: check the return format + pid = ques_data["pid"] # NOTE: check the question id + all_solutions[pid] = { + "question": question, + "answer": answer, + "predictions": [p.value for p in predictions], + "loss_history": loss_history, + "performance_history": performance_history, + "result_data": result_data, + "ques_data": ques_data + } + all_history.append(performance_history) + # save result to output file in append mode + with open(output_jsonl_file, "a") as f: + json_line = json.dumps(all_solutions[pid]) + f.write(json_line + "\n") +else: + # Use regular for loop + if args.test_num > 0: + total = min(args.test_num, len(test_set)) + else: + total = len(test_set) + for i, sample in enumerate(tqdm(test_set, total=total, position=0)): + image_bytes, query, answer, ques_data, test_time_objective, instance_eval_fn = sample # NOTE: check the sample format + pid = ques_data["pid"] # NOTE: check the question id + if pid in all_solutions: + continue + if args.test_num > 0 and i >= args.test_num: + break + future = run_test_time_training(sample) + + question, answer, predictions, performance_history, loss_history, result_data, ques_data = future # NOTE: check the return format + result = { + "question": question, + "answer": answer, + "predictions": [p.value for p in predictions], + "performance_history": performance_history, + "loss_history": loss_history, + "result_data": result_data, + "ques_data": ques_data + } + all_solutions[pid] = result + all_history.append(performance_history) + + # save result to output file in append mode + with open(output_jsonl_file, "a") as f: + json_line = json.dumps(result) + f.write(json_line + "\n") + if i % args.save_every == 0: + with open(output_file, "w") as f: + print("Saving results to", output_file) + json.dump(all_solutions, f, indent=4) + +# Quick summary of the results +all_solutions = {k: all_solutions[k] for k in sorted(all_solutions, key=lambda x: int(x))} # sorted by int(pid) +with open(output_file, "w") as f: + json.dump(all_solutions, f, indent=4) + +print("\nSummary of the results:") +score_history = np.array(all_history).mean(axis=0) +score_history = [round(float(s), 3) for s in score_history] +print(score_history) + +if args.majority_voting: + print("\nMajority Voting:") + majority_voting = MajorityVoting(all_solutions, args.max_iterations) + final_score = majority_voting.compute_accuracy() +else: + final_score = round(max(np.array(all_history).mean(axis=0)), 3) + +print(f"Final Accuracy: {final_score}") +print(f"Total samples: {len(all_solutions)}") +print("Done!") diff --git a/examples/notebooks/TextGrad-Vision-MathVista.ipynb b/examples/notebooks/TextGrad-Vision-MathVista.ipynb new file mode 100644 index 0000000..cba0942 --- /dev/null +++ b/examples/notebooks/TextGrad-Vision-MathVista.ipynb @@ -0,0 +1,486 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 21, + "id": "8dd3140c-45d0-478e-b184-ec5faed66964", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# Some utils to read images\n", + "\n", + "import io\n", + "from PIL import Image" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "3ca4c62b-2d83-412d-b410-0ed5272a6f06", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "import textgrad as tg\n", + "from textgrad import get_engine\n", + "from textgrad.autograd import MultimodalLLMCall\n", + "from textgrad.loss import ImageQALoss" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5fae4779", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Assuming the OpenAI API key is stored in an environment variable named 'OPENAI_API_KEY'\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "assert openai_api_key is not None and len(openai_api_key) > 0, \"Please set the OPENAI_API_KEY environment variable\"" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d1986a01-3afd-46a8-a99e-e977b1141768", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "tg.set_backward_engine(\"gpt-4o\", override=True)" + ] + }, + { + "cell_type": "markdown", + "id": "48623c79-b296-44b6-b688-70b5cf5c2f95", + "metadata": {}, + "source": [ + "# Simply answering questions about images" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "e0629de4-9fcf-4df4-9316-cc86455929e6", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00'\n" + ] + } + ], + "source": [ + "# import httpx\n", + "\n", + "# image_url = \"https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg\"\n", + "# image_data = httpx.get(image_url).content\n", + "\n", + "# Import necessary library\n", + "import httpx\n", + "\n", + "# Path to the local image\n", + "image_path = \".assets/176.png\"\n", + "\n", + "# Read the local image file in binary mode\n", + "with open(image_path, 'rb') as file:\n", + " image_data = file.read()\n", + "\n", + "# Print the first few bytes of the image data to verify (optional)\n", + "print(image_data[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a4a2fadb", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end. \\nQuestion: Find $RS$ if $\\triangle QRS$ is an equilateral triangle. \\nChoices: (A) 0.5 (B) 1 (C) 1.5 (D) 2'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "image_variable = tg.Variable(image_data, role_description=\"image to answer a question about\", requires_grad=False)\n", + "\n", + "# MathVista-176, ground truth = (D) 2\n", + "question_text = \"\"\"\n", + "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end. \n", + "Question: Find $RS$ if $\\triangle QRS$ is an equilateral triangle. \n", + "Choices: (A) 0.5 (B) 1 (C) 1.5 (D) 2\n", + "\"\"\"\n", + "question_text = question_text.strip()\n", + "question_text" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "22829367", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Variable(value=Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end. \n", + "Question: Find $RS$ if $\triangle QRS$ is an equilateral triangle. \n", + "Choices: (A) 0.5 (B) 1 (C) 1.5 (D) 2, role=question, grads=set())" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "question_variable = tg.Variable(question_text, role_description=\"question\", requires_grad=False)\n", + "question_variable" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "4858dce2", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Variable(value=Since \\( \\triangle QRS \\) is an equilateral triangle, all sides are equal. Therefore, we can set up the following equations based on the given side lengths:\n", + "\n", + "\\[ 4x = 6x - 1 \\]\n", + "\\[ 4x = 2x + 1 \\]\n", + "\n", + "First, solve the equation \\( 4x = 6x - 1 \\):\n", + "\n", + "\\[ 4x = 6x - 1 \\]\n", + "\\[ 4x - 6x = -1 \\]\n", + "\\[ -2x = -1 \\]\n", + "\\[ x = \\frac{1}{2} \\]\n", + "\n", + "Next, solve the equation \\( 4x = 2x + 1 \\):\n", + "\n", + "\\[ 4x = 2x + 1 \\]\n", + "\\[ 4x - 2x = 1 \\]\n", + "\\[ 2x = 1 \\]\n", + "\\[ x = \\frac{1}{2} \\]\n", + "\n", + "Now that we have \\( x = \\frac{1}{2} \\), we can find the length of \\( RS \\):\n", + "\n", + "\\[ RS = 2x + 1 \\]\n", + "\\[ RS = 2\\left(\\frac{1}{2}\\right) + 1 \\]\n", + "\\[ RS = 1 + 1 \\]\n", + "\\[ RS = 2 \\]\n", + "\n", + "Therefore, the length of \\( RS \\) is 2. The correct option is (D)., role=response from the language model, grads=set())" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "response = MultimodalLLMCall(\"gpt-4o\")([image_variable, question_variable])\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "7d541981-ed39-4ae1-9ca0-7ce751709972", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAATEAAAEkCAYAAABHfgZAAAABRmlDQ1BJQ0MgUHJvZmlsZQAAeJxjYGASSSwoyGFhYGDIzSspCnJ3UoiIjFJgf8LAzCDAIM6gzSCRmFxc4BgQ4ANUwgCjUcG3awyMIPqyLsis20t6bu0rWcNrOvtoo53qmxxM9SiAKyW1OBlI/wHi9OSCohIGBsYUIFu5vKQAxO4AskWKgI4CsueA2OkQ9gYQOwnCPgJWExLkDGTfALIFkjMSgWYwvgCydZKQxNOR2FB7QYDXxdXHRyHY1cjc0NKDgHtJBiWpFSUg2jm/oLIoMz2jRMERGEqpCp55yXo6CkYGRgYMDKAwh6j+fAMcloxiHAixlHYGBmMtoCAXQizrCQPDnukMDIKnEGLq2UBv2TEwHIgsSCxKhDuA8RtLcZqxEYTNvZ2BgXXa//+fwxkY2DUZGP5e////9/b///8uY2BgvgXU+w0Aq89gTzqng/UAADVvSURBVHgB7Z0HmBTF1oaP5Jwl55xzWkD5QSWIgIGkgDknBBUEBCWIgAjiNes1EgQMgCgoVzGQMyw5ZyTnHPavr2Z66F1md2d2e6a7Z77zPLvb011dVf1W75nuqhNuiFMiFBIgARJwKYE0Lu03u00CJEACmgCVGG8EEiABVxOgEnP18LHzJEACVGK8B0iABFxNgErM1cPHzpMACVCJ8R4gARJwNQEqMVcPHztPAiRAJcZ7gARIwNUEqMRcPXzsPAmQAJUY7wESIAFXE6ASc/XwsfMkQAJUYrwHSIAEXE2ASszVw8fOkwAJUInxHiABEnA1ASoxVw8fO08CJEAlxnuABEjA1QSoxFw9fOw8CZAAlRjvARIgAVcToBJz9fCx8yRAAlRivAdIgARcTYBKzNXDx86TAAmkIwIS2L5tm8z+3/8CApE+fXrJmzevFClcWGrVri1p0vB7MCBwLBQyAlRiIUPrnoqn//STjBg+POgO58+fXx599FF59rnngj6XJ5CAVQT4NWoVSRfXs2LFihT1/uDBgzJs2DD54P33U3Q+TyIBKwjcwOS5VmB0dx01a9QQKCRI8+bNJSYmxu8FXb16VU6cOCF///23rFmzxlfmhhtukNmzZ0vlKlV8+7hBAuEiwNfJcJF2aDt79+71KTB08YEHH5Tbbrstyd4iafyLL74o306cqMvh83L1NEclliQ2HgwRAb5OhgisW6pN+CpZq1atZLuOJ69u3brFK4cJfwoJ2EGASswO6g5qc6VpPqxYsWKSL1++gHqXKWPGeOUCPS/eSfxAAhYQoBKzAKKbq1i+fLmv+4E8hRmFZ86caWwKnsJqK3MLCgnYQYBKzA7qDmnzypUrsnr1al9vYPcViJw/f17GjR/vK3rzzTdL7ty5fZ+5QQLhJEAlFk7aDmtr08aNcvbsWV+vAnkSO378uHTp3FkO/PuvPg9PYX379vXVwQ0SCDcBrk6Gm7iD2ks4qb9WmU1s27r1uh7iiQ3Ka+26dfLbr7/6FF/atGllxIgRXJW8jhh3hJMAlVg4aTusrYRKrH///gH3EBP57ysj15vUqySFBOwkwNdJO+nb3DZsu1IqmBfLmClTSk/neSRgGQFa7FuG0l0VYS6sQvnygldFSOvWraVsuXJ+L+L06dOyccMGWbVqlZw5c8ZXpk6dOvLTjBm+z9wgATsI8HXSDuoOaDM2NtanwNCdl3v3looVKybZs82bN0uHe+6RQ4cO6XJb/cyfJVkBD5JACAjwdTIEUN1Qpdk+LGvWrFJePZUlJ+XUk1qbNm18xXLkyOHb5gYJ2EWASswu8ja3a7bUr6EcwAONC5bdpLhy5sxp81WweRIQoRKL0rvA/CRWMwB/SQMTzDAMwZMZhQTsJkAlZvcI2NA+5rQQvcKQYFyGMJdmSCCvoEZZ/iWBUBGgEgsVWQfXa36VRDcDsdRHOcQSMyb18blChQr4QyEBWwlQidmK357Gza+SBQoUkEKFCgXUkV27dsUrV6Zs2Xif+YEE7CBAJWYHdZvbXLFypa8HgT6F4YTDhw/7zsPG1i1b4n3mBxKwgwCVmB3UbW5zo3L8NiQYJXbu3DnjNP33vffei/eZH0jADgK02LeDOtskARKwjACfxCxDyYpIgATsIEAlZgd1tkkCJGAZASoxy1CyIhIgATsIUInZQd0Fbe49dkZ+X3vNINYFXWYXo5QAlViUDnxyl/3KpEXy7Ndz5dKVq8kV5XESsJUAlZit+J3Z+MItB2T8/M2yYd9xeX/2Wmd2kr0iAS8BKjHeCvEIqGTe0mPcfMFfSO9vF8qhU+c9H/ibBBxIgErMgYNiZ5e+mbdJFm896OsCXicHfLfE95kbJOA0AlRiThsRG/tz+vwl6Tt5se5BriwZpGwBT7ywz/5cL6t2HbGxZ2yaBBInQCWWOJuoOzJs+grZp1YlIa/fXVfGPdVcbrhB5MrVOHlBvWJSSMCJBKjEnDgqNvRp28GTMmbWat1ypcK55elbq0iDMvmle2NP2Oo/1++T75dss6FnbJIEkiZAJZY0n6g5+vLEhXL+kifz0ZhuMZI+refWeLNTfcmWKb3mYC4TNWB4oY4nQCXm+CEKfQf/WLdXfli6XTd0R60S0rJaMV+jhXNnlb5ta+nP2w+dkrd/WeU7xg0ScAIBKjEnjIKNfcB8V89xC3QPMqRLI2/fF3Ndb3q1ri6lbsyu9w+fsVJgzU8hAacQoBJzykjY1I9P5qyX1bs9K4/Ptagq5Qt6ViTN3cmUPq28dW9DvUuvYE7yrGCay3CbBOwiQCVmF3kHtHvszAUZ+L3HBix/jswyoH2dRHt1T73S0qxyYX183PxNsshkS5boSTxAAmEgQCUWBshObWLQj8vksNca/42O9SWnsg1LSt7p1kjSprlBW/P3+Gaez6o/qXN4jARCTYBKLNSEHVr/+n3H5IP/efwia5XIJw83TT5zUfVieeWx/6ukrwhPYrDup5CA3QSoxOweAZva7zV+gY5QAWNWPGGlwUYAMqRDPcmdNaMuCet+zJFRSMBOAlRidtK3qe0ZK3bKrNW7desd65eRmysGlrINJ+TLnkkG3umZO4N1/5s/rbDpKtgsCXgIUIlF2Z1w8fJVeXGCx6Qic4Z0MrJLg6AJPHNbFYFVP2T0zNUC+zEKCdhFgErMLvI2tfuf39bIpn9P6NZfblNDSuTz2H8F0x1Y84/u6rEng5U/LPkpJGAXASoxu8jb0O7Bk+dkyLRluuWiebJK7zY1U9yLVtWLSZuaxfX58Kmcs25fiuviiSSQGgJUYqmh57JzX1VxwU6cvah7PaJzQ8maMV2qrmB010YCK38IolzA+p9CAuEmQCUWbuI2tbdi52H5758bdOuNyhWQe2PKpronsO5/9raquh5Y/X+q4o5RSCDcBKjEwk3chvYQahpPSlfVBkwpxnZvrOOEWdEVrFTC2h+CCLDHvU96VtTNOkggEAJUYoFQcnmZKYu3yt8b9uuruL9Jealb6kbLrghW/kOV7RgE1v+DflxqWd2siAQCIUAlFgglF5c5d/GySvaxSF9BdhUXbJiKD2a1PNy0otQskVdXi+xIyJJEIYFwEaASCxdpm9oZ9ctq2XnYY8fVv31tKZQri+U9gT/l2G6Ndb1ILNJz/HzL22CFJJAYASqxxMhEwP49R8/IiBkei/oy+XPIC62qheyqYPXfsX5pXT+8AX5euStkbbFiEjAToBIz04iw7T6TFsqZC5f1VY1SwQ4zpksb0itEzDF4AUDgFQDvAAoJhJoAlVioCdtU//zNB2Tigi269VurFJE765QMeU9g/f/S7dV1Oxv3H5f3Zq8JeZtsgASoxCLwHoApxQvjPPG+0ikXoTEqSkW4pM8dtQTeAJDBU5cJvAQoJBBKAlRioaRrU91fz90kS7Yd0q0/0aySVC2aJ2w9gRfA8M4ep3J4B8BLgEICoSRAJRZKujbUfUrF9+rnzeKdJ1tGGXRP3bD34r6YchJTtoBu9/O/NsjKnZ4Y/mHvCBuMCgJUYhE2zG9MWy77j5/VV/X6XXUlb7ZMYb9CxFeEVwC8AzzZw+eFvQ9sMHoIUIlF0FhvVVm835kVq6+oSpHc8tQtlW27unqlbxR4B0D+Ut4CUxYze7htgxHhDVOJRdAAv6TMGi5cvqKvCBEmMKlvp8A7AF4CkN4q5hi8BygkYDUBe+9yq68miuv7fe1embpshybQrnZJaVGtqO004B3Qr11t3Y8dymvgbRUFlkICVhOgErOaqA31XVauPohSAUF8r1HeRLc2dOW6Jnu2riallbcAZLiKxw8vAgoJWEmASsxKmjbV9bHK4r1mz1Hdeo+W1aScnyzeNnVNewkYShXeA69M8jij29Uftht5BKjEXD6mR09fkNe+X6qvokDOzPKqcvJ2mtxVt5TcorwGIBMWbJYFWw44rYvsj4sJUIm5ePDQ9ddV/K4jp8/rq0AW7xyZMzjyisZ4FxoQoBHZw+FVQCEBKwhQiVlB0aY61u09Jh/+vk63XkcFOnzo5go29ST5ZqsVyyOPK+8BCLwJvpm7OfmTWIIEAiBAJRYAJKcWQdwuTOrDuDSYLN52Xc9g5T1wLXv4IoF3AYUEUkuASiy1BG06/yeVxfu32D269c4NykqT8gVt6kngzcJ74PW7PW5Q8CoYNn154CezJAkkQoBKLBEwTt5tzuKdRcXvGpGCLN52Xd/TyougsvImgIyZGSvblJcBhQRSQ4BKLDX0bDp37K+xstmUxbt43mw29ST4ZnVoIDXJD4F3wUvMHh48RJ4RjwCVWDwczv9w4MQ5GaqcvCHFlPLqfUdNve2mX/AmaFurhO7yj0u3yx/r9rqp++yrwwhQiTlsQJLrTv8pi+XkuYu62Ej1GonXSTfK2ypcNrOHu3HknNdnKjHnjUmiPVq2/ZB88fdGfRwT+ZjQd6vAq+D5Fp7EJbG7j8rHf3hMRdx6Pey3fQSoxOxjH1TLsA01Z/GGSQVMK9wsA+6sLfAygAxUXgfHzlxw8+Ww7zYRoBKzCXywzU5atEXmbvpXn/agMmqFcavbBd4FQzvU15cBr4PXf/C4T7n9utj/8BKgEgsv7xS1dlbF4erjzeKNf/w3OtZLUT1OPOnhphWkdsl8umsfKO8DeCFQSCAYAlRiwdCyqexbP6+SXUdO69bh4F0wZxabemJ9swhhbbwaw/ug1/gF1jfCGiOaAJWYw4d3t1JeI2es1L0sW0BNhres6vAeB9+9myoUkk4NyugTf43dLfBGoJBAoASoxAIlZVO53uo1Eq+TkLfvaxjyLN42XaaM7MLs4Xaxd3u7VGIOHsF5aiIfE/qQ26oWFYSdjlSB18HLbWroy4M3wru/eRKeROr18rqsI0AlZh1LS2tCvK0eKuQ0TCvCncXb0gsJorI+yvsAXggQeCXAO4FCAskRoBJLjpBNx79URq0wboUg9RpSsEW6aGf2eNnDF0f6JfP6LCBAJWYBRKurgFtR/ylLdLU6fI1Kghst0qVhWWnsDSv0+V8bZfmOw9Fy6bzOFBKgEkshuFCe9sa0FfLvibO6iUEqkGCebBlD2Zyj6jYHeMQrNbwU8EpNIYHECFCJJUbGpv1bDpwQhNqBVC2aR57whnS2qTu2NFtXeSM8cFN53fY/G/fL5EVbbekHG3UHASoxh43TixMW+rJ4j1H+kXZn8bYLD7KHG0lPen/L7OF2jYMb2qUSc9AozV6zR6Yv36F71L5OSbnVm+bMQV0MW1fgldC/fS3dHrwV4LVAIQF/BKjE/FGxYR9cbnp6s3hnTJdWGbbG2NALZzWJRMDwUoCMUF4L8F6gkEBCAlRiCYnY9PkjFU9rrdf5+YVW1aRM/hw29cQ5zUKZG9nDtRM8s4c7Z3Ac1BMqMQcMBsLQvOYNQ+N5jartgF45owvm1+pvF24ReDFQSMBMgErMTMOm7ddUQMCjpz0BATGhnT1Tept64sxm3+neWC9wwNTCCAzpzJ6yV3YQoBKzg7qpzTV7VGjmOev1HrNpgalI1G/CW+HJ5pU1h6XKi+GrfzZFPRMCuEYgqpTYuXPn5M727eWW5s1l6NCh1yjYuIXJfCOL91j1xIH4WpTrCQxSSXcNo99+kxdbnj385MmT8sH770v3bt2kZYsW+j4ZNmyYLF8eGQl+f/v1V33fP/rII9fDdfmedC7vf1Ddx026eLHHH698hQpBnRuKwtOW7ZD/rfWkK7s3pqw0KlcgFM1ERJ1QYFBkz309T3szDJ263LKkwbGxsYJ/7t27d8djhXsF90yfPn3kueefj3fMbR9mz54t69d7nvjd1vfk+hs1T2L79u2T99UN6RRB4tgXJyzQ3YHj8/BODZzSNcf2A6+UhiM8vBrg3ZBauXLlijz7zDM+BZY/f35pp57Wm9x0k6RJk0auXr0qb775psyZMye1Tdl2/tatW+WnGTNsaz/UDUeNEhsyZIicP38+1DwDrv+dWbGy9eBJXd4cgibgCqKwoDkkkVXZw6dNmyabN2/WNBs3bizL1OvjRx99JJMnT5Yvv/pKZZTyvN6PHj3aNcTj1ArIsWPHZOPGjfpaOnXsKCdPpF7hOxVAVLxOLlmyRKZNnSqZM2cWzIvZLXDufsObxbtEvuy+YIB298sN7SM4JMwu8CpuvI6nxrNh0cKFvst+c/hwSZs2re/zrbfeKo2bNJG5//wjq1auFDy1mY/7CjpsY9euXRLTsKHDehW67kT8kxheBwa8+qom+LR6bciZ02MBnhTSr9Q38Ah1Q08YP95vsePHj8vIESN0mZkzZ/otk9RO88T0CBU/K7NLs3gndY2hPAYDWBjCQoyFkZS2t33HDn1q3rx5pWzZstdVU6WyZ1X08uXLsnevZ/7SXCjU94q5LW77JxDxT2JT1GvB6tWrpVChQvL000/LZ59+6p+Eae8SNaH7ww8/SPr06aVFy5aSL18+31HczI8/9pjMnTtXciiFOCtIJWY2ETAnyPA1wI1kCcAVCS5JI39eKYaJyjO3Vkn2PH8FTp86pXdjLsyfpFP3gCFnz3rCIxmf8TcU98rBgwfl4sWLUqBAAX0PmtsLZLtYsWKyLsEkfpfOnfX/QSDnu61MRD+JnT59Wk/KYlBeVU9jeJ0MRLp06aKLXbp0Sb6dODHeKXiqgwLDpO+HH34oJUuVinc8qQ8w1uzxzTxBnCxzqrKkzuEx/wTgHG6krjMbC/svnfjehx5+WPr37y9PPPmk30I7tm/X+zE3Vrhw4evKhOJeefihh6R+vXrytXojSIng3syVK1e8n0yZMqWkKlecE9FKbOzYsYJvtTp16sidd90V8IBgHqRo0aK6/Lhx4/QKFT58+eWXgtcHSN++faVZs2Z6O9BfExdskfmbD+ji5qSxgZ7PctcImJMIm922rpUIbKujmvR+5tlnpVOnTtedsGfPHt+qZCn1ZZUjx/X+rKG6V9CZy2oOjpI8gYhVYjvUXMenn3yiV5cGDR7sW2VKHonosp3V4zcEk6R//vmnfvoaOGCA3ocleNz4wQgcmF+ZvEifkjMLsnjXD+Z0lvVD4MGbK0gdFUARYnag91M06F1YAHrowQfFeIV8QG37EzyhWX2v+GuH+xInELFKbPCgQXpe4e6775batWsnTiCRIx3VN7OxvP7GG2/oeTDMh1WuUkXGjBmTyFmJ7zaHkkEW7/w5Anu1TbxGHsEr+VgVOBJWEOZQRqklg2mEZ9T86dq1a3VVMTEx8sADDyRardX3SqIN8YBfAhGpxLAkPmvWLD0H1t+7Mun36pPYWbx4cWnUqJEusX7dOsGKZO7cueWLL74IeG7NqN4c1K98QZXFu0U14xD/ppIAkooguQjEHFQypdViQv0xtXCD+wdSSa1OfqGmETJkyJBolVbeK4k2wgOJEoi41UnY8rz22mv6gvHKV7BgwUQvPrkDndQr5bx583QxTJZ+/PHHgpWfYMUcXhnBDjOki8jvjmCxWFYeZiqwGcMrO8J7t6xezGeCEUwjMIbGpDqmDyBYtJkwYYLfuTBdwPQrpffKgQMHBHNvZjlz5oz+CGPVZcuWmQ9J1apVJWPGjPH2RfuHiFNisNsyfMQuqW9VTO6b5cKFC/rjJnWD4NhtyqARr4j+ZL5XgeEYJnUbqteKYMWc6KJltWJyR60SwVbB8skQQMLd3irx7usqJpuRaKV3m5rJnBX/MBTHA/ffL/Pnz9cHoCzGKwV2442eObf4pa//lNJ7BaY8Q9ScrT/BynjC1fG56p4sXbq0v+JRuy/ilJjZvSKhAjOPMhQdfuCi4U+JffH55zJp0iTfKXidnPPHH3KbinAQqJhTjqVPm0bGdAteCQbaVrSXe7lNDfn8rw2CV3ekvLu/SXmfCUZybDCJ361rV1m0aJEuCvejz9W0Qfbs2ZM7VR+34l4JqCEW8ksg4t5rsmTJog1bYdzq78eYrAcNHM+eLdt1YPBtbLyStmnTxvf4PmXKlOvKJrXDnPwVWbwrFc6dVHEeSwUBONGP7NJQ12BOPhxIlS+99JJPgWG88QQWqAJL7b3ypLJP261eJ80/tWrV0t0eMHBgvP0ow6cwPyOqnkSiSipWqBBXqGDBOHXz+L1uFY4lrmqVKrpM27Zt49REb9wTjz+uP5coXjxOPZH5PS/hzuNnLsQVeOarOOn2UVy+p76MO3r6fMIi/Gwxgavq0bfJ4KmaeZruH8ct3XYw2RZUGB49trgnOnboEKdWJpM9xyhg1b1i1Gf8bXP77bpPyhHd2JXqv+3btdN1Nm/WLNV1Oa2CiHsS86OnA95lTOweOXJELwh89tln2u0Dk7YQrFxNnz49oPqGKgfvAyfO6bKD76knubNyMjYgcKkoBFMLI7AkXuV7qICT8JJIShBnC4KFm/c/+EDSpQtshsXKeyWp/vFY8gQCG7Hk64mIEr169ZI1a9bo10fMiRj+dE2bNtV+bFhJwitl9+7dk7zezf+ekHd/i9VlqhXLI49HYRbvJAGF8GDtkvnkIWUE+181P4akIkgugoCTiYkxIY/oFMaqZGJl6ylXoJIlS+rDVt0ribXF/YEToBLzsvpAfQtP/fFH/emtUaOkZs2aPoq4we+55x5BmaUqrA/86ZLymeylgh1evHxVn/+OMsZMm0Y9IlDCRgDeEFMWbxPMjfVRad4QugdzZv5k3/79ejcMXHskE711zDvvaCVm5b3ir0/cFxyBqH2dzGCKTrBixQp5U8VThzzxxBPSoUOH6yjCKtuQn3/+2di87u+vsbtlxoqdev9ddUtJ88pFrivDHaElUCBnZoFXBAQJd0eqxLuJySkVWz9QwT1j5b2SWLtJGdYmdk5y+404aIjMEmlyAybpIu2i7LqeSyqLd83+38k6lQQ3U/q0snZ4JynNJLi2DAeehKv2nSx4tcdT2PqRnaW4siejRB6BqH0SC8VQfvj7Oq3AUHfPVtWpwEIBOcA64RUB7wiIzh7+rccGLMDTWcxFBKjELBqsw6fOa4txVFcoVxbp285j62NR9awmBQTaKu+IFtU8IZUmLdoic5k9PAUUnX8KlZhFYzTw+yVy7MwFXdubKnMRs3hbBDaV1Yzp2siXPdwISJnKKnm6wwhQiVkwILG7j8on3ize9cvkl+5NyllQK6uwgkBllT0c3hKQ5TsOyxd/b7SiWtbhIAJUYhYMxgvKqPLK1Tgd1womFczibQFUC6t4/a66kjebJzxz/ymLtemFhdWzKpsJUImlcgB+XLpd/li3V9fStVE5iSlbIJU18nSrCSB7+OB76upq4UUBbwpK5BCgEkvFWJoTuGbNqLJ4q7hWFGcSgNcEvCcgyB4O0wtKZBCgEkvFOI6ZGSvbvFm8X2lbS4rkzpqK2nhqKAno7OFqkh8CG7IXlVcFJTIIUImlcBz3Hz8rw6Z7XktKqizeL7aunsKaeFq4CNxSpYjcqVyQID8pr4rfYvfobf5yNwEqsRSOX1+VuejU+Uv67JEqIzWzeKcQZJhPG6UMYH3Zw8fP1wlGwtwFNmcxASqxFABdvPWgfDN3sz6zacVC0rF+6RTUwlPsIFBGuYH1bF1NNw33MHhZUNxNgEosyPGDpylMKhCvCtEp3unWOMgaWNxuAv3a1dZeFejH6z8uFSTfpbiXAJVYkGM3YcFmWbDlgD7rkaYVpWaJvEHWwOJ2E4A3xbBO9XU3jp6+IAO/X2p3l9h+KghQiQUB78wFlcVbxaeC5FJZvId0qBfE2SzqJAJIJFKv9I26S/C2WLPnqJO6x74EQYBKLAhYw39aIXuOntFnDLizDrN4B8HOaUXhVYGpACN7OKYIKO4kQCUW4LjtOHxK3p65WpeuUCiXPNeiaoBnsphTCTQqV0Duiymnu/f72r0yVSXgpbiPAJVYgGPWe+JCOacyTENGd40R5JGkuJ8AvCzgbQF5SRnAwguD4i4C/E8MYLz+2rBfx2xH0dY1isntNYoHcBaLuIFA0TxZpc8dnthvW5X3BbwwKO4iQCWWzHghOsUL4+bpUnj6Gn2fx3UlmdN42EUEXrq9upRQXhcQeGHAG4PiHgJUYsmMFVJ/rdx5RJd65rYqUrFwrmTO4GG3EYC3xcguHud9eGH0m7zYbZcQ1f2lEkti+E+cvSgDvluiS9yYPZO8puJSUSKTQKcGZeRm5X0B+XruJlmy7VBkXmgEXhWVWBKDOnjqMjl48pwuMVjZhME2jBK5BIwcofDGwBQC84C5Y6ypxBIZp437j8t7s9foozWK55XH/q9SIiW5O1II1CqRTx5WXhiQ+ZsPyMQFWyLl0iL6OqjEEhlexJtiFu9E4ETw7qHqiTun94m7z6SFAi8NirMJUIn5GZ+Zq3bLzyt36SP31Cst/1epsJ9S3BWJBPLnyCwD2tfRlwbvjBEzVkTiZUbUNVGJJRhOZPHuNcHjgoIs3m+pWGGU6CIAb4zyBXPqix71y2rZqbw1KM4lQCWWYGzen71WNuw7rvf2UtFaS93osR9KUIwfI5gAsoeP9oayhpdGb2YPd/RoU4mZhueQyuI9SMWXghRW8fL7qrj5lOgk0KZmcWlVvZi++CmLt8rfymuD4kwCVGKmcRmobMKOK9swyPDO9SWbijtFiV4Cho+sEQgT3hsU5xGgEvOOyapdR+TTP9frTw1UFu9ujco7b7TYo7ASqFQ4tzx9axXd5oqdh+Vz5b1BcR4BKjHvmPRUSSOMLN5ju3viTDlvuNijcBN47a46kk95a0BeVU/q8OKgOIsAlZgaj++XbJM56/bpkcETGJ7EKCQAArmzZvRF8IX3xpBpywjGYQSiXomdv3RFXlaxwiCYA8NcGIUEzATgrVG9mCeXwn9+WyObmD3cjMf27ahXYqNVtNbthzx2QFiNxKokhQTMBDxZrRrpXcwebibjjO2oVmL7jp2RN1XcfAjswWAXRiEBfwSaVS4sd9ctpQ/NUNnDZ63e7a8Y99lAIKqV2CuTFstpbxZvWObDQp9CAokRQPZw4x7pNX6BwLuDYj+BqFVii1QW73HzN+kRwLcsfCQpJJAUATyt92zleVpfv++YfPC/tUkV57EwEYhKJQbjxR7feOJFYb5jjNfFJEzM2YyLCfRrd23edNCPy+Sw8vKg2EsgKpUYnsDwJAbByhPihVFIIBACWMF+05s9/NgZZA/3RP4N5FyWCQ2BqFNimAPDXBgEkVoRsZVCAsEQ6N64vNT32hIie/jq3Z4cDMHUwbLWEYg6JYbVSKxKQhAzH7HzKSQQDAFkDR/brZHOHu7JhjU/mNNZ1mICUaXEYA8GuzAIshYhexGFBFJCoGHZAtK1UTl9Krw9fli6PSXV8BwLCESVEoNlPiz0IcgfySzeFtxBUVwFsocbkU7M91YUI7Hl0qNGif25fp/2kQRlxIpCJm8KCaSGQBHl3fHKHTV1FduQPXyW5yk/NXXy3OAJRIUSM89bIGrn28pokUICVhB48fYaUtKXPfzafKsVdbOOwAhEhRL7TMUJQ7wwyLO3VZUKhXLpbf4igdQSMOdhwMp3X2YPTy3SoM+PeCWGSK1GFm9kshl4Z52gIfEEEkiKQIf61zJifTNvkyz22iAmdQ6PWUcg4pUYYuYjdj5kiCmnoHUIWRMJiBjZw7U3yLj5zB4expsiopUYshYhexGkZom88og3u3MY+bKpKCEAr49HvVniF245IOPnb46SK7f/MiNaiSF/pBFp4J1ujQV+khQSCBUBPOnDCwTyyqRFzB4eKtAJ6o1YJfbLql2CTN6QjmrOomnFQgkunR9JwFoC8P4YqGLyQ/aaYtVZ2wprS0ggIpWYzuKt4j1BMmdIJyOZxTvhuPNziAhg9RveIJC3f1klO5g9XLMI5a+IVGKIg75x/3HN7UUVrdWw4wklSNZNAiAALxDDDtGcv4F0Qkcg4pQYViKHTF2miRXNoyyqmcU7dHcPa/ZL4PYaxQU/kO8WbxN4i1BCRyDilNirUxabsng3kKwZ04WOHmsmgUQIGNnDcfgFZXLB7OGJgLJgd0QpsZU7j8h/vVmaY1SUgftiPFEGLODEKkggKALwCsH8GATeIvAaoYSGQEQpsRfGzdPfeGlUwCcYHyLuE4UE7CKAlUojXh28RuA9QrGeQMQosSlq7uGvDfs1oe5Nyvkib1qPjDWSQGAEYDMG2zEI5moHq5j8FOsJRIQSO3fxsvT2ZvHOrmOgN7CeFGskgRQQgBW/kcPhvdnXVs1TUBVPSYRARCixt1W0VsMep6/KRlMoV5ZELpe7SSC8BMzZw832i+HtRWS35nolBsvo4d4s3qXz52AW78i+X115df9XqbAg0gXE7EniyotxYKddr8TMPmqjlGV+xnRpHYiZXYp2AuYM82af3mjnYsX1u1qJLTBFC2heuYjcVbeUFUxYBwlYTgBeI4gCC0F0FcyPUawh4FoldlUFboIRIeI3mecdrMHCWkjAegJ9lfcI4vJDsFJpxLmzvqXoqtG1SuybuZt9ETQfb1ZJqhXLE10jx6t1HQF4jyBDEsQccdh1F+KwDrtSiZ3SscwXaZS5s2aUwfd4bHEcxpbdIYHrCCBXJXJWQsy5H64ryB0BE3ClEntz+grZf/ysvsjX764r+ZjFO+ABZ0F7CTB7uPX8XafEzPn9KhfJLU/dUtl6KqyRBEJIoH6Z/NK9cXndAiJcINIFJeUEXKfEXjJn8e4awyzeKR97nmkjgTc71Wf2cIv4u0qJ/bFur/y4dLu+9DtqlZCW1ZjF26L7gNWEmUBhtUrZT3mXQOBtgiiwlJQRcI0SYxbvlA0wz3IugZ6tqgu8TCDDZ6zUcfmd21vn9sw1SuyTOesldvdRTfL5FtWkfMGczqXKnpFAAASuyx4+aXEAZ7FIQgKuUGLHzlyQgd8v0X0vkDOzvNq+dsLr4GcScCWBu5WXSbPKhXXfx83fJMhZSQmOgCuU2Os/LJXD3izeQzvUl5ze3H7BXSpLk4AzCZizhxteKM7sqTN75Xgltn7fMfnw93WaXu2S+eThphWcSZK9IoEUEqheLK/A6wSyaOtB+WbephTWFJ2nOV6J9Ry3QGfxhpEgvrEQeppCApFGAF4n8D6B9J28WE4rrxRKYAQcrcRmrNgpv8Z6snh3alBGbqrALN6BDStLuY0AvE5e82YP38fs4UENn2OV2MXLV+XFCQv0xSCL9wiv42xQV8fCJOAiAk/fWkUqFc6tezxaRSvefuiUi3pvX1cdq8Te/S1WNv17QpN5uU0NKaHiMVFIIJIJIHs48lVCkD38Je+XeCRfsxXX5kgldvDkORk6bbm+vmJ5s0mfO2paca2sgwQcT6BV9WLSpmZx3c8flHfKnHXMHp7coDlSifVXWbxPeHP0De/UQLKo10kKCUQLgdFdG0mGdJ5/TWYPT37UHafEVuw8LJ//tVH3vFG5AnJvTNnkr4IlSCCCCMAb5bkWnuzhq3cfkU+ZPTzJ0XWUEkOo6R7fzBeEnoYpxdjujZnFO8nh48FIJTCgfR3JnyOzvjxkD4fXCsU/gRvilPg/FP69kxdtlc7v/c/XsDGIvh3cIIEoIoC5YUN6tKym7SSNz/x7jYBjlBiyeFfqM1l2qrAkFBIggfgEsHK5algHnwlG/KPR/ckxM+YLtxyUxuUL6J/oHhJePQn4J/DPxn+pxPygccyTmJ++cRcJkAAJJEvAURP7yfaWBUiABEggAQEqsQRA+JEESMBdBKjE3DVe7C0JkEACAo6Z2E/QL35MhsDly5dl4sSJsnDhQtm2daukz5BBYmJipEOHDlKuXLlkznbH4d9+/VVGjBghpUqVks/++193dJq9DDsBRyqxK1euSJo0aZShK2OH+bsjDh8+LJ06dpQNGzbEO7x0yRL57NNP5b3335fWrVvHO+bGD7Nnz5b169e7sesh6TP+L9KmTRuSut1cqe1K7MKFC/LHH3/IzzNmyKZNm+Tff/+VI0eOaCWWK1cuqVWrljRu0kQ6deokuXN7wpS4GXhq+3716lV55OGHfQqsbNmyUqFCBTl48KAsUUrs3Llz0qtnT2nUqJHkzOneZCpb1dPlT+qeiGZZsWKF/r/YvHmz/t/YvXu3pEuXTvLkySNFixWTZs2ayd133y0lSpSIZkxim4nFpUuX5NNPPpGxY8fKqVPJG7jmUP+Qr7zyijz44INRPWC/zpolDz30kGbQqlUr+VgxTJ8+vf786quvyufe1643hg3zlXMDMDiOHD9+XCvjOXPm6Htj//79uuuVKlWS39UXXbQIvsiHvfGGfP/995KcQ00GNY3QX437Y489Fi14rrtOW57ENm7cKI8+8ojg2zZQOXnihPTr21ff6C+88EKgp0VcuQlqHgyCV+1+/fv7FBj23XXnnT4ltjHBqyaOO1l27dolMQ0bOrmLYekb/jfat2snJ0+ejNcevqiyZs0qJ9T/gVmxXbx4UV4bOFCKFi0aEVMI8S46wA9hV2KLFy+WB+6/Xw+G0ce8efPqCenq1atL9Ro19GvQypUrZfmyZTJp0iQxvpFRfqSa6MUEdoMGDYzTo+pv7OrV+npvvfVWwaukWWrVri2jR4/WN3mFihXNh/Q2vjS+mzJFb997331SvLgnbpW54M8//yxrYmP1rqefeUayZ2cwSjOfUG4fPXpUHnzgAZ8Cy5Qpkzz99NPSpk0bqVS5sm4abzB///23vKveYDB9YAj+TyJhHtS4nqD+Kq0eNlHv9HHlypaNK1SwoO/niccfj1MT1Yn2Ye/evXE3NWniK49z1TdVouUj+QA4Gez+8+67QV+qWsX0nT9gwIDrzv/zzz/jihYposv069fvuuOh3KEmreOOHTsW76dlixa6L82bNQtl046pG/8LxvjWrVMnbt3atYn2Tc19xrVr29ZXvmyZMnHqqSzR8pF8IGx2Ygqi9FSvgadPn9ZKFqssH330kXz08ceCJ7HEpHDhwjJ12jTBX0OWL18u58+fNz5Gzd+1a9b4rrWs14zizJkzgjmk1eoJDd/SSUmp0qWlfv36usjkyZPjMdy2bZs88cQTghUwPOkOGjQoqaosP4bVaCzkmH/wJBItghXnmTNn+i63b79+vqcv307TBtiY54dxH+DVMholbErsi88/l3nz5vkYv/zyy9KufXvf56Q2sBrT3lQWNlLGK09S50XaMfNrdaFCheTJJ5+UymrSu6t6NWzVsqWUV4rt5ZdeSlKZ3XvvvRoL5hinTZ3q2VbzL/d37y7YV6RIEflEmWlgFYwSPgI//PCDb9wwBu3UvFhyUl6tShuC8cKcWTRKWJTYju3bZejQoT6+TZs2leeef973OZCN29W8gFkwZxZtcs709DlMrT5OV0+o5qcvmKuMHz9eOitzFCh6f3JH27aSJUsWfejrr7/WT17qNUbwJIZv98+/+CLJJ2N/dXJf6glsUWYUhuTPnz+gLxEYAb+j5sbwgzeaaJWwKDH8sxivf1hlefc//wnakLW2mrQ2TAkwWLAlizaBkjLkHzW5W69ePZn47bcyf8ECfRMXKFBAH4YV/1TvU5ZR3viLb+u2SpFBYIfUpXNn+euvv/TnUW+/LdWqVdPb/BVeAjAvMQTj8sEHH/j+Z4z9Cf/iywj2k/i5/fbbEx6Oms8hf2eAcab5HwqrajfeeGPQgPFkYX7qyJcvX9B1uP2EC6YnsYIFC8r4CRMkW7Zs+rJKliwpFdWKpJoEFzD/RH0zwwXJn3Tu0kWv+uKY8YqP+TAYTiYnWP2cb5oWSK68cRxzPHXq1DE+8m8CAuXLl4+3Z+iQIfLRhx9K8+bNpaEyPcFPSfXkRbmeQMiV2KJFi7QVvtH0PYn8YxnHE/sL+xizRKMSg3+kIZgzMRSYsQ//CJiUh2KCvVFibiowT4GV986dO/WpTW66SV4dMMCoJsm/8KqYP39+kmX8HYQBJyVxAk8+9ZQ2bjXGBCUx2Y8FGPxA8KSNsWuoxviOO+6QaPwf0CAS/Aq5Evvll198TcINBk9iKZFDyq3GLEmtaJrLRdK2eeK2TAIbMeM6YR8GJYanViwEwAgyoUARHTp0yLf7lltuCdgnDwsssKAPVviamjQxfCF9/c03emEGtpT+5MCBAzJ9+nT9AwNXjAWmAGC1H80SciW2b+9eH1/cyCkFjsEzBNbq5pUZY3+k/zUm5HGdyAblT9KbVhX9Te7jiRZuS2fPnvWd/oNyb8HrZCACg8qoNaoMBFAqyiD6CMyJlH2Y9ifG3CYMWv255eFL6rvvvpNLapoF82fRHCwh5BP7eCQ2JKUOyZjjmeK1NEddVatWFazgRJtg6d0Q80SwsQ9/YS8EwZJ7wqcwcHxKmWVgtRjf/Le1aKHLxioLfbx+UpxBoHKVKvLsc8/JOLXSvE5F8ZilQhLhdR+vkgmjWMBMZqKaG41mCb0SM60i5lTGjCmReXPnyr59+3yn3hXABLSvcARtIKKHsUK7ZcsWv1e2TLlqQaDwEtp6wSxDWeXrb+33VbgeOIwbYv6SMPbxr/0EoLTgjgf3ox+Vwlqo5phbKsd/s+CJLZol5ErsrPfJAJBPJXBqDRS8OSAefPm6du0a6KkRVQ52XDVr1tTXhNfrhE9j8GQw4m81atw43rXjG/sDpbggL/furZ/C8PoCxQjBKyWe1CjhIwCTmfeUuRFWfMeMGRNQw9oYWUUuMT9lwxsmmiXkSszsiIy4YUm5RmA1rb+KzGDYLWFgMMizf/vNN0aPqOgX0eyU3E1Z1kMQN6yjWuldtWqVnjNBAMHHveFY8AT2vMmYeK2aY+nVq5c+D6taPXr00Nv41VHZGEGwejj3n3/0Nn+FhwBe4fF0POqtt7RJTKCt4mnc7LyfEpOlQNtyQ7mQKzHjyQEwMF/zl3qd8SeYqMR8DdyT4AKDaAoITzx8+HBfcURt6BHFYXgAoqOK6Hqf90kUyqm1erWooEwrEBnEeOV+9tlnfYHyEBnhIRWDDUoPkRBg3W2eBL5The8xFluMpXwfcG6ElMDxY8d89eN/w7zY4jvgZwPjbEwb4DBsyaJZQq7EanlffwzIcCzes2eP8dH3F24vv//+u/4MhQZXmIdVBFPjFQffPmPeeUcyZszoOydaN95QAfPuvOsuHf3WzABshqtQRb379PHthtM9eCMq7hfKpci8wolCcLhu4Z3g/1V9aeBp2CliTGIb84BO6ZdV/Thhml7BSjK8MAKRUaNGieG9gfky2PlFs4Q8siueABqrUMlmY0c4L2PlJaG9EWLEw8o84SsnbuJPlVNyC+XkTLlGAN/ImANTIWz0k1cNFYvNeKq6VopbTiWAxZmbTQoI0Yvh+5qYZwNs+/orz4cZprDdiOxruJE59TpD3a+QKzFcACaVn1IWyQmlmIoTXlu5opRU1uMwwIRTtzlSg1EepgBfffWV8ZF/SSBiCMDvMeFcJEwpED8fFvp4E/lXGbkuUP6xi9XKpPkL/gEVQBFhyBHGKJolLEoMgD9R3xivv/Zailkjhvjr6lXUPJ+T4sp4Igk4hABe9duqxRZY4wcqeDMZqqYUunsXeQI9L1LLhU2FP67muH766SepU7dukizxrQI3GES6MELy4gS8Tg4ZPDjJc3mQBNxGAKYSM9QiFpK+JPdEhVXIR9WXOYxfqcCujXTYnsSuNelZzod/H5K+nlSZjjDZjMGEDUwVtYJWQEVogCBZQvdu3bTrBSagkSTDvNpprpPbJOB2AlhJxuT+PuXzim2Er7pRRWvB/wMc9hF6yVjscPu1Wtl/W5RYMBeAhQE4uz6s7MMQaoZCAiRAAmYCjldi5s5ymwRIgAQSEgjbnFjChvmZBEiABKwgQCVmBUXWQQIkYBsBKjHb0LNhEiABKwhQiVlBkXWQAAnYRoBKzDb0bJgESMAKAlRiVlBkHSRAArYRoBKzDT0bJgESsIIAlZgVFFkHCZCAbQSoxGxDz4ZJgASsIEAlZgVF1kECJGAbASox29CzYRIgASsIUIlZQZF1kAAJ2EaASsw29GyYBEjACgJUYlZQZB0kQAK2EaASsw09GyYBErCCAJWYFRRZBwmQgG0EqMRsQ8+GSYAErCBAJWYFRdZBAiRgGwEqMdvQs2ESIAErCFCJWUGRdZAACdhGgErMNvRsmARIwAoCVGJWUGQdJEACthGgErMNPRsmARKwggCVmBUUWQcJkIBtBKjEbEPPhkmABKwgQCVmBUXWQQIkYBsBKjHb0LNhEiABKwhQiVlBkXWQAAnYRoBKzDb0bJgESMAKAlRiVlBkHSRAArYRoBKzDT0bJgESsIIAlZgVFFkHCZCAbQSoxGxDz4ZJgASsIEAlZgVF1kECJGAbASox29CzYRIgASsIUIlZQZF1kAAJ2Ebg/wFOj0zXP1RjsgAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image.open(io.BytesIO(image_data))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "29affc0a-cedc-40fd-bec4-6bf5178409cf", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loss_fn = ImageQALoss(\n", + " evaluation_instruction=\"Does this seem like a complete and good answer for the image? Criticize heavily.\",\n", + " # engine=\"claude-3-5-sonnet-20240620\"\n", + " engine=\"gpt-4o\",\n", + ")\n", + "loss_fn" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ca9b6c47", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Variable(value=The provided answer is mostly correct but can be improved for clarity and completeness. Here is a detailed critique:\n", + "\n", + "1. **Introduction and Setup**:\n", + " - The answer correctly identifies that since \\( \\triangle QRS \\) is an equilateral triangle, all sides are equal.\n", + " - It sets up the correct equations based on the given side lengths: \\( 4x = 6x - 1 \\) and \\( 4x = 2x + 1 \\).\n", + "\n", + "2. **Solving the Equations**:\n", + " - The solution to the first equation \\( 4x = 6x - 1 \\) is correctly shown step-by-step:\n", + " \\[\n", + " 4x = 6x - 1 \\\\\n", + " 4x - 6x = -1 \\\\\n", + " -2x = -1 \\\\\n", + " x = \\frac{1}{2}\n", + " \\]\n", + " - The solution to the second equation \\( 4x = 2x + 1 \\) is also correctly shown:\n", + " \\[\n", + " 4x = 2x + 1 \\\\\n", + " 4x - 2x = 1 \\\\\n", + " 2x = 1 \\\\\n", + " x = \\frac{1}{2}\n", + " \\]\n", + "\n", + "3. **Finding \\( RS \\)**:\n", + " - The calculation for \\( RS \\) is correctly performed:\n", + " \\[\n", + " RS = 2x + 1 \\\\\n", + " RS = 2\\left(\\frac{1}{2}\\right) + 1 \\\\\n", + " RS = 1 + 1 \\\\\n", + " RS = 2\n", + " \\]\n", + "\n", + "4. **Conclusion**:\n", + " - The answer correctly concludes that the length of \\( RS \\) is 2 and identifies the correct option as (D).\n", + "\n", + "**Critique and Improvements**:\n", + "- The answer is clear and logically structured, but it can be improved by explicitly stating that the two equations \\( 4x = 6x - 1 \\) and \\( 4x = 2x + 1 \\) are derived from the property of the equilateral triangle where all sides are equal.\n", + "- It would be beneficial to mention that solving both equations independently confirms the consistency of the value of \\( x \\).\n", + "\n", + "**Revised Answer**:\n", + "Since \\( \\triangle QRS \\) is an equilateral triangle, all sides are equal. Therefore, we can set up the following equations based on the given side lengths:\n", + "\n", + "\\[ 4x = 6x - 1 \\]\n", + "\\[ 4x = 2x + 1 \\]\n", + "\n", + "First, solve the equation \\( 4x = 6x - 1 \\):\n", + "\n", + "\\[ 4x = 6x - 1 \\]\n", + "\\[ 4x - 6x = -1 \\]\n", + "\\[ -2x = -1 \\]\n", + "\\[ x = \\frac{1}{2} \\]\n", + "\n", + "Next, solve the equation \\( 4x = 2x + 1 \\):\n", + "\n", + "\\[ 4x = 2x + 1 \\]\n", + "\\[ 4x - 2x = 1 \\]\n", + "\\[ 2x = 1 \\]\n", + "\\[ x = \\frac{1}{2} \\]\n", + "\n", + "Both equations give the same value for \\( x \\), confirming the consistency. Now that we have \\( x = \\frac{1}{2} \\), we can find the length of \\( RS \\):\n", + "\n", + "\\[ RS = 2x + 1 \\]\n", + "\\[ RS = 2\\left(\\frac{1}{2}\\right) + 1 \\]\n", + "\\[ RS = 1 + 1 \\]\n", + "\\[ RS = 2 \\]\n", + "\n", + "Therefore, the length of \\( RS \\) is 2. The correct option is (D)., role=evaluation of the response from the language model, grads=set())" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loss = loss_fn(question=question_variable, image=image_variable, response=response)\n", + "loss" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "38c2d4ff-1458-459d-8915-3d1a254564fb", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimizer = tg.TGD(parameters=[response])\n", + "optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "66d2c361", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Since \\( \\triangle QRS \\) is an equilateral triangle, all sides are equal. Therefore, we can set up the following equations based on the equality of the sides \\( QR \\), \\( QS \\), and \\( RS \\):\n", + "\n", + "\\[ 4x = 6x - 1 \\]\n", + "\\[ 4x = 2x + 1 \\]\n", + "\n", + "First, solve the equation \\( 4x = 6x - 1 \\):\n", + "\n", + "\\[ 4x = 6x - 1 \\]\n", + "\\[ 4x - 6x = -1 \\]\n", + "\\[ -2x = -1 \\]\n", + "\\[ x = \\frac{1}{2} \\]\n", + "\n", + "Next, solve the equation \\( 4x = 2x + 1 \\):\n", + "\n", + "\\[ 4x = 2x + 1 \\]\n", + "\\[ 4x - 2x = 1 \\]\n", + "\\[ 2x = 1 \\]\n", + "\\[ x = \\frac{1}{2} \\]\n", + "\n", + "Both equations give the same value for \\( x \\), confirming the consistency. Now that we have \\( x = \\frac{1}{2} \\), we can find the length of \\( RS \\) by substituting \\( x \\) into the expression for \\( RS \\):\n", + "\n", + "\\[ RS = 2x + 1 \\]\n", + "\\[ RS = 2\\left(\\frac{1}{2}\\right) + 1 \\]\n", + "\\[ RS = 1 + 1 \\]\n", + "\\[ RS = 2 \\]\n", + "\n", + "Therefore, the length of \\( RS \\) is 2. The correct option is (D).\n" + ] + } + ], + "source": [ + "loss.backward()\n", + "optimizer.step()\n", + "print(response.value)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a46a87c8-31a2-41f6-8ba8-b9c99ae60f27", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "textgrad", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/notebooks/assets/176.png b/examples/notebooks/assets/176.png new file mode 100644 index 0000000..662b24a Binary files /dev/null and b/examples/notebooks/assets/176.png differ