diff --git a/notebooks/Spanish/chat_pdf_images.ipynb b/notebooks/Spanish/chat_pdf_images.ipynb new file mode 100644 index 0000000..3d2159a --- /dev/null +++ b/notebooks/Spanish/chat_pdf_images.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fd77e3b8", + "metadata": {}, + "source": [ + "# Chatea con imágenes de páginas PDF\n", + "\n", + "**Si estás buscando la aplicación web, revisa la carpeta src/.** \n", + "\n", + "Este notebook demuestra cómo convertir páginas de PDF a imágenes y enviarlas a un modelo de visión para inferencia" + ] + }, + { + "cell_type": "markdown", + "id": "e5eb545b", + "metadata": {}, + "source": [ + "## Autentícate en OpenAI\n", + "\n", + "El siguiente código se conecta a OpenAI, ya sea usando una cuenta de Azure OpenAI, modelos de GitHub, o modelo local de Ollama. Mira el README para instrucciones sobre cómo configurar el archivo `.env`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ae3a4d3", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import azure.identity\n", + "import openai\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv(\".env\", override=True)\n", + "\n", + "openai_host = os.getenv(\"OPENAI_HOST\", \"github\")\n", + "\n", + "if openai_host == \"github\":\n", + " print(\"Usando GitHub Models con GITHUB_TOKEN como clave\")\n", + " openai_client = openai.OpenAI(\n", + " api_key=os.environ[\"GITHUB_TOKEN\"],\n", + " base_url=\"https://models.github.ai/inference\",\n", + " )\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"openai/gpt-4o\")\n", + "elif openai_host == \"local\":\n", + " print(\"Usando API local compatible con OpenAI sin clave\")\n", + " openai_client = openai.OpenAI(api_key=\"no-key-required\", base_url=os.environ[\"LOCAL_OPENAI_ENDPOINT\"])\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"gpt-4o\")\n", + "elif openai_host == \"azure\" and os.getenv(\"AZURE_OPENAI_KEY_FOR_CHATVISION\"):\n", + " # Autenticación usando una clave de API de Azure OpenAI\n", + " # Esto generalmente no se recomienda, pero se proporciona por conveniencia\n", + " print(\"Usando Azure OpenAI con clave\")\n", + " openai_client = openai.OpenAI(\n", + " base_url=os.environ[\"AZURE_OPENAI_ENDPOINT\"] + \"/openai/v1/\",\n", + " api_key=os.environ[\"AZURE_OPENAI_KEY_FOR_CHATVISION\"],\n", + " )\n", + " # Esto es en realidad el nombre del deployment, no el nombre del modelo\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"gpt-4o\")\n", + "elif openai_host == \"azure\" and os.getenv(\"AZURE_OPENAI_ENDPOINT\"):\n", + " tenant_id = os.environ[\"AZURE_TENANT_ID\"]\n", + " print(\"Usando Azure OpenAI con credencial de Azure Developer CLI para tenant id\", tenant_id)\n", + " default_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=tenant_id)\n", + " token_provider = azure.identity.get_bearer_token_provider(\n", + " default_credential, \"https://cognitiveservices.azure.com/.default\"\n", + " )\n", + " openai_client = openai.OpenAI(\n", + " base_url=os.environ[\"AZURE_OPENAI_ENDPOINT\"] + \"/openai/v1/\",\n", + " api_key=token_provider,\n", + " )\n", + " # Esto es en realidad el nombre del deployment, no el nombre del modelo\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"gpt-4o\")\n", + "\n", + "print(f\"Usando modelo {model_name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "74df1ca5", + "metadata": {}, + "source": [ + "## Convierte PDFs a imágenes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3664e1d", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install Pillow PyMuPDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8c56b19", + "metadata": {}, + "outputs": [], + "source": [ + "import pymupdf\n", + "from PIL import Image\n", + "\n", + "filename = \"../plants.pdf\"\n", + "doc = pymupdf.open(filename)\n", + "for i in range(doc.page_count):\n", + " doc = pymupdf.open(filename)\n", + " page = doc.load_page(i)\n", + " pix = page.get_pixmap()\n", + " original_img = Image.frombytes(\"RGB\", [pix.width, pix.height], pix.samples)\n", + " original_img.save(f\"page_{i}.png\")" + ] + }, + { + "cell_type": "markdown", + "id": "f822fb8f", + "metadata": {}, + "source": [ + "## Envía imágenes al modelo de visión" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bdaa995", + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "\n", + "def open_image_as_base64(filename):\n", + " with open(filename, \"rb\") as image_file:\n", + " image_data = image_file.read()\n", + " image_base64 = base64.b64encode(image_data).decode(\"utf-8\")\n", + " return f\"data:image/png;base64,{image_base64}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4de5dca7", + "metadata": {}, + "outputs": [], + "source": [ + "user_content = [{\"text\": \"¿Qué plantas están listadas en estas páginas?\", \"type\": \"text\"}]\n", + "# Procesa solo las primeras páginas, ya que procesar todas las páginas (doc.page_count) es lento\n", + "for i in range(3):\n", + " user_content.append({\"image_url\": {\"url\": open_image_as_base64(f\"../page_{i}.png\")}, \"type\": \"image_url\"})\n", + "\n", + "response = openai_client.chat.completions.create(model=model_name, messages=[{\"role\": \"user\", \"content\": user_content}])\n", + "\n", + "print(response.choices[0].message.content)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Spanish/chat_vision.ipynb b/notebooks/Spanish/chat_vision.ipynb new file mode 100644 index 0000000..2cd0573 --- /dev/null +++ b/notebooks/Spanish/chat_vision.ipynb @@ -0,0 +1,404 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a6fb0c66", + "metadata": {}, + "source": [ + "# Chatea con modelos de visión\n", + "\n", + "**Si estás buscando la aplicación web, revisa la carpeta src/.**\n", + "\n", + "Este notebook solo se proporciona para experimentar manualmente con el modelo de visión." + ] + }, + { + "cell_type": "markdown", + "id": "e8390ffb", + "metadata": {}, + "source": [ + "## Autentícate en OpenAI\n", + "\n", + "El siguiente código se conecta a OpenAI, ya sea usando una cuenta de Azure OpenAI, modelos de GitHub, o modelo local de Ollama. Mira el README para instrucciones sobre cómo configurar el archivo `.env`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c2e70bb", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import azure.identity\n", + "import openai\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv(\".env\", override=True)\n", + "\n", + "openai_host = os.getenv(\"OPENAI_HOST\", \"github\")\n", + "\n", + "if openai_host == \"github\":\n", + " print(\"Usando GitHub Models con GITHUB_TOKEN como clave\")\n", + " openai_client = openai.OpenAI(\n", + " api_key=os.environ[\"GITHUB_TOKEN\"],\n", + " base_url=\"https://models.github.ai/inference\",\n", + " )\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"openai/gpt-4o\")\n", + "elif openai_host == \"local\":\n", + " print(\"Usando API local compatible con OpenAI sin clave\")\n", + " openai_client = openai.OpenAI(api_key=\"no-key-required\", base_url=os.environ[\"LOCAL_OPENAI_ENDPOINT\"])\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"gpt-4o\")\n", + "elif openai_host == \"azure\" and os.getenv(\"AZURE_OPENAI_KEY_FOR_CHATVISION\"):\n", + " # Autenticación usando una clave de API de Azure OpenAI\n", + " # Esto generalmente no se recomienda, pero se proporciona por conveniencia\n", + " print(\"Usando Azure OpenAI con clave\")\n", + " openai_client = openai.OpenAI(\n", + " base_url=os.environ[\"AZURE_OPENAI_ENDPOINT\"] + \"/openai/v1/\",\n", + " api_key=os.environ[\"AZURE_OPENAI_KEY_FOR_CHATVISION\"],\n", + " )\n", + " # Esto es en realidad el nombre del deployment, no el nombre del modelo\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"gpt-4o\")\n", + "elif openai_host == \"azure\" and os.getenv(\"AZURE_OPENAI_ENDPOINT\"):\n", + " tenant_id = os.environ[\"AZURE_TENANT_ID\"]\n", + " print(\"Usando Azure OpenAI con credencial de Azure Developer CLI\")\n", + " default_credential = azure.identity.AzureDeveloperCliCredential(tenant_id=tenant_id)\n", + " token_provider = azure.identity.get_bearer_token_provider(\n", + " default_credential, \"https://cognitiveservices.azure.com/.default\"\n", + " )\n", + " openai_client = openai.OpenAI(\n", + " base_url=os.environ[\"AZURE_OPENAI_ENDPOINT\"] + \"/openai/v1/\",\n", + " api_key=token_provider,\n", + " )\n", + " # Esto es en realidad el nombre del deployment, no el nombre del modelo\n", + " model_name = os.getenv(\"OPENAI_MODEL\", \"gpt-4o\")\n", + "\n", + "print(f\"Usando modelo {model_name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "2b094d4b", + "metadata": {}, + "source": [ + "## Envía una imagen por URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28773b5a", + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"¿es un unicornio?\", \"type\": \"text\"},\n", + " {\n", + " \"image_url\": {\"url\": \"https://upload.wikimedia.org/wikipedia/commons/6/6e/Ur-painting.jpg\"},\n", + " \"type\": \"image_url\",\n", + " },\n", + " ],\n", + " }\n", + "]\n", + "response = openai_client.chat.completions.create(model=model_name, messages=messages)\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "ad23a842", + "metadata": {}, + "source": [ + "## Envía una imagen por Data URI\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7d970ab", + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "\n", + "def open_image_as_base64(filename):\n", + " with open(filename, \"rb\") as image_file:\n", + " image_data = image_file.read()\n", + " image_base64 = base64.b64encode(image_data).decode(\"utf-8\")\n", + " return f\"data:image/png;base64,{image_base64}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1579e9e3", + "metadata": {}, + "outputs": [], + "source": [ + "response = openai_client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"¿estos son cocodrilos o caimanes?\", \"type\": \"text\"},\n", + " {\"image_url\": {\"url\": open_image_as_base64(\"../mystery_reptile.png\")}, \"type\": \"image_url\"},\n", + " ],\n", + " }\n", + " ],\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "ba4ade9a", + "metadata": {}, + "source": [ + "## Casos de uso para análisis de imágenes" + ] + }, + { + "cell_type": "markdown", + "id": "37a54c5e", + "metadata": {}, + "source": [ + "### Accesibilidad" + ] + }, + { + "cell_type": "markdown", + "id": "b67d1ea4", + "metadata": {}, + "source": [ + "#### Asistencia para personas con discapacidad visual" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e60a3bc", + "metadata": {}, + "outputs": [], + "source": [ + "response = openai_client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"¿hay algo bueno para veganos en este menú?\", \"type\": \"text\"},\n", + " {\"image_url\": {\"url\": open_image_as_base64(\"../menu.png\")}, \"type\": \"image_url\"},\n", + " ],\n", + " }\n", + " ],\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "f4adb316", + "metadata": {}, + "source": [ + "#### Subtítulos automáticos de imágenes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92bb77dc", + "metadata": {}, + "outputs": [], + "source": [ + "response = openai_client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"Sugiere un texto alternativo para esta imagen\", \"type\": \"text\"},\n", + " {\"image_url\": {\"url\": open_image_as_base64(\"../azure_arch.png\")}, \"type\": \"image_url\"},\n", + " ],\n", + " }\n", + " ],\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "69873fb0", + "metadata": {}, + "source": [ + "### Automatización de procesos de negocio" + ] + }, + { + "cell_type": "markdown", + "id": "1bacd251", + "metadata": {}, + "source": [ + "#### Procesamiento de reclamaciones de seguros" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5998961", + "metadata": {}, + "outputs": [], + "source": [ + "response = openai_client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": (\n", + " \"Eres un asistente de IA que ayuda a las compañías de seguros de autos a procesar reclamaciones. \"\n", + " \"Aceptas imágenes de autos dañados que se envían con las reclamaciones, y puedes hacer juicios \"\n", + " \"sobre las causas del daño automovilístico y la validez de las reclamaciones sobre ese daño.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"La reclamación dice que este daño es por granizo. ¿Es válida?\", \"type\": \"text\"},\n", + " {\"image_url\": {\"url\": open_image_as_base64(\"../dented_car.jpg\")}, \"type\": \"image_url\"},\n", + " ],\n", + " },\n", + " ],\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "0cd4304a", + "metadata": {}, + "source": [ + "#### Análisis de gráficos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e17e551", + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"¿En qué zona estamos perdiendo más árboles?\", \"type\": \"text\"},\n", + " {\n", + " \"image_url\": {\n", + " \"url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/1/1f/20210331_Global_tree_cover_loss_-_World_Resources_Institute.svg/1280px-20210331_Global_tree_cover_loss_-_World_Resources_Institute.svg.png\"\n", + " },\n", + " \"type\": \"image_url\",\n", + " },\n", + " ],\n", + " }\n", + "]\n", + "response = openai_client.chat.completions.create(model=model_name, messages=messages)\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "61036a69", + "metadata": {}, + "source": [ + "#### Análisis de tablas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea9aa772", + "metadata": {}, + "outputs": [], + "source": [ + "response = openai_client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"¿Cuál es la planta más barata?\", \"type\": \"text\"},\n", + " {\"image_url\": {\"url\": open_image_as_base64(\"../page_0.png\")}, \"type\": \"image_url\"},\n", + " ],\n", + " }\n", + " ],\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "7036400e", + "metadata": {}, + "source": [ + "#### Soporte de electrodomésticos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb382457", + "metadata": {}, + "outputs": [], + "source": [ + "response = openai_client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"¿Cómo configuro esto para lavar los platos rápidamente?\", \"type\": \"text\"},\n", + " {\"image_url\": {\"url\": open_image_as_base64(\"../dishwasher.png\")}, \"type\": \"image_url\"},\n", + " ],\n", + " }\n", + " ],\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}