Skip to content

Commit

Permalink
minor change
Browse files Browse the repository at this point in the history
  • Loading branch information
lspataroG committed Feb 5, 2025
1 parent 4a00d72 commit 124e5df
Showing 1 changed file with 34 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"id": "ur8xi4C7S06n"
},
Expand Down Expand Up @@ -104,7 +104,7 @@
"source": [
"# Overview\n",
"\n",
"This notebook demonstrates how to use BigFrames and LangChain to build a RAG (Retrieval Augmented Generation) pipeline using Vertex AI. \n",
"This notebook demonstrates how to use BigFrames and LangChain to build a RAG (Retrieval Augmented Generation) pipeline using Vertex AI.\n",
"\n",
"Specifically, we are going to build a data pipeline capable of being deployed in a production environment with scheduled execution.\n",
"\n",
Expand Down Expand Up @@ -151,9 +151,35 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tFy3H3aPgx12"
},
"outputs": [],
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tFy3H3aPgx12",
"outputId": "77abe225-b1ae-4e07-f251-ccb136d198ef"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",

Check warning on line 166 in gemini/use-cases/retrieval-augmented-generation/rag_pipeline_terabyte_scale_with_bigframes.ipynb

View workflow job for this annotation

GitHub Actions / Check Spelling

`hdone` is not a recognized word. (unrecognized-spelling)
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.3/93.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m26.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.4/84.4 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.2/243.2 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m27.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.9/50.9 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for swifter (setup.py) ... \u001b[?25l\u001b[?25hdone\n",

Check warning on line 174 in gemini/use-cases/retrieval-augmented-generation/rag_pipeline_terabyte_scale_with_bigframes.ipynb

View workflow job for this annotation

GitHub Actions / Check Spelling

`hdone` is not a recognized word. (unrecognized-spelling)
"\u001b[33m WARNING: The script dotenv is installed in '/root/.local/bin' which is not on PATH.\n",
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
"\u001b[0m\u001b[33m WARNING: The script markdownify is installed in '/root/.local/bin' which is not on PATH.\n",
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
"\u001b[0m"
]
}
],
"source": [
"%pip install --upgrade --user --quiet google-cloud-aiplatform \"bigframes\" langchain markdownify swifter \"langchain-google-community[featurestore]\" langchain-google-vertexai\n"
]
Expand Down Expand Up @@ -253,8 +279,8 @@
"from google.cloud import bigquery\n",
"import vertexai\n",
"\n",
"PROJECT_ID = \"\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n",
"if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n",
"PROJECT_ID = \"\" # @param {type: \"string\", placeholder: \"your-project-id\", isTemplate: true}\n",
"if not PROJECT_ID or PROJECT_ID == \"your-project-id\":\n",
" PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n",
"\n",
"# GOOGLE_CLOUD_REGION must be in a US region because the source dataset is in US\n",
Expand Down Expand Up @@ -951,8 +977,7 @@
],
"metadata": {
"colab": {
"name": "rag_pipeline_terabyte_scale_with_bigframes.ipynb",
"toc_visible": true
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
Expand Down

0 comments on commit 124e5df

Please sign in to comment.