Merge pull request #1 from neo4j-field/prompts-tools

migrate Honda poc code into repo
neo4j-field · Sep 16, 2024 · f5c62ab · f5c62ab
2 parents 79f6d0f + 2faf40a
commit f5c62ab
Show file tree

Hide file tree

Showing 38 changed files with 2,431 additions and 198 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+private/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/.ipynb b/.ipynb
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from src.ps_genai_agents.prompts.cypher_prompts import create_cypher_prompt\n",
+    "from neo4j_genai.schema import get_schema\n",
+    "from neo4j import GraphDatabase"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "driver = GraphDatabase.driver(\n",
+    "    uri=os.environ.get(\"NEO4J_URI\", \"\"),\n",
+    "    auth=(os.environ.get(\"NEO4J_USERNAME\"), os.environ.get(\"NEO4J_PASSWORD\")),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "You are an expert Neo4j Cypher translator who understands the question in english and convert to Cypher strictly based on the Neo4j Schema provided and following the instructions below:\n",
+      "    <instructions>\n",
+      "    * Use aliases to refer the node or relationship in the generated Cypher query\n",
+      "    * Generate Cypher query compatible ONLY for Neo4j Version 5\n",
+      "    * Do not use EXISTS in the cypher. Use alias when using the WITH keyword\n",
+      "    * Only use SIZE when checking the size of a list\n",
+      "    * Use only Nodes and relationships mentioned in the schema\n",
+      "    * Always enclose the Cypher output inside 3 backticks (```)\n",
+      "    * Always do a case-insensitive and fuzzy search for any properties related search. Eg: to search for a Company name use `toLower(c.name) contains 'neo4j'`\n",
+      "    * Cypher is NOT SQL. So, do not mix and match the syntaxes\n",
+      "    * Ensure that null results are filtered out before running aggregations!\n",
+      "    </instructions>\n",
+      "\n",
+      "    Strictly use this Schema for Cypher generation:\n",
+      "    <schema>\n",
+      "    Node properties:\n",
+      "Customer {id: STRING, ageBucket: STRING, gender: STRING}\n",
+      "Category {id: STRING}\n",
+      "Problem {id: STRING, problem: STRING}\n",
+      "Question {id: INTEGER, question: STRING}\n",
+      "Vehicle {id: STRING, totalProblems: INTEGER}\n",
+      "Verbatim {id: STRING, verbatim: STRING, verbatimText: STRING, severity: FLOAT, gender: STRING, make: STRING, model: STRING, adaEmbedding: LIST, titanEmbedding: LIST, ageBucket: STRING, minAge: INTEGER, maxAge: INTEGER}\n",
+      "Relationship properties:\n",
+      "\n",
+      "The relationships:\n",
+      "(:Customer)-[:SUBMITTED]->(:Verbatim)\n",
+      "(:Problem)-[:HAS_CATEGORY]->(:Category)\n",
+      "(:Question)-[:HAS_PROBLEM]->(:Problem)\n",
+      "(:Vehicle)-[:HAS_CATEGORY]->(:Category)\n",
+      "(:Vehicle)-[:HAS_VERBATIM]->(:Verbatim)\n",
+      "(:Verbatim)-[:HAS_CATEGORY]->(:Category)\n",
+      "(:Verbatim)-[:HAS_PROBLEM]->(:Problem)\n",
+      "(:Verbatim)-[:HAS_QUESTION]->(:Question)\n",
+      "    </schema>\n",
+      "\n",
+      "    The samples below follow the instructions and the schema mentioned above. So, please follow the same when you generate the cypher:\n",
+      "    <samples>\n",
+      "    \n",
+      "    \n",
+      "Human: What is the proportion of male to female complainants about cup holders in the Honda Odyssey?\n",
+      "Assistant: MATCH (v:Verbatim {{make: \"Honda\", model: \"Odyssey\"}})\n",
+      "  WHERE v.verbatimText CONTAINS \"cup holder\"\n",
+      "WITH SUM(COUNT {{MATCH (v:Verbatim) WHERE v.gender = \"Male\" RETURN v}}) AS males, \n",
+      "     SUM(COUNT {{MATCH (v:Verbatim) WHERE v.gender = \"Female\" RETURN v}}) AS females\n",
+      "RETURN males, females, toFloat(males) / (CASE WHEN females = 0 THEN 1 ELSE females END)  AS maleToFemale\n",
+      "\n",
+      "Human: What is the proportion of male to female complainants per categories in the Acura RDX?\n",
+      "Assistant: MATCH (c:Category)<-[:HAS_CATEGORY]-(v:Verbatim {{make: \"Acura\", model: \"RDX\"}})\n",
+      "WITH c.id AS category,\n",
+      "     SUM(COUNT {{MATCH (v:Verbatim) WHERE v.gender = \"Male\" RETURN v}}) AS males, \n",
+      "     SUM(COUNT {{MATCH (v:Verbatim) WHERE v.gender = \"Female\" RETURN v}}) AS females\n",
+      "RETURN category, males, females, toFloat(males) / (CASE WHEN females = 0 THEN 1 ELSE females END) AS maleToFemale \n",
+      "ORDER BY category\n",
+      "\n",
+      "Human: Summarize the top 3 most common complaints about Apple CarPlay on the Honda Pilot.\n",
+      "Assistant: MATCH (v:Verbatim {{make: \"Honda\", model: \"Pilot\"}})\n",
+      "  WHERE v.verbatimText CONTAINS \"carplay\"\n",
+      "RETURN v.verbatim as content\n",
+      "\n",
+      "Human: What are the top 10 problems with males between the ages of 40 and 70 years old and what vehicles were involved?\n",
+      "Assistant: MATCH (p:Problem)<-[:HAS_PROBLEM]-(v:Verbatim)\n",
+      "  WHERE v.minAge >= 40 AND v.maxAge <= 70 AND v.gender = \"Male\"\n",
+      "WITH p, COUNT(v) AS total, COLLECT(DISTINCT v.make + \" \" + v.model) AS vehicles\n",
+      "ORDER BY total DESC\n",
+      "LIMIT 5\n",
+      "RETURN p.problem AS problem, total, vehicles\n",
+      "\n",
+      "Human: What are the top 10 problems that share the most responses between a Honda Odyssey and Honda Civic?\n",
+      "Assistant: MATCH (p:Problem)<-[:HAS_PROBLEM]-(v:Verbatim {{make: \"Honda\", model: \"Odyssey\"}})\n",
+      "WITH p, count(v) AS total1\n",
+      "MATCH (p)<-[:HAS_PROBLEM]-(v:Verbatim {{make: \"Honda\", model: \"Civic\"}})\n",
+      "WITH p.problem AS problem, total1, count(v) AS total2\n",
+      "RETURN problem, total1 + total2 AS totalResponses\n",
+      "ORDER BY totalResponses DESC \n",
+      "LIMIT 10\n",
+      "\n",
+      "Human: What are the total responses under seat23 for honda civic, what is the male to female proportion for these responses and what is the problem for seat23?\n",
+      "Assistant: MATCH (p:Problem {{id: \"SEAT23\"}})<-[:HAS_PROBLEM]-(v:Verbatim {{make: \"Honda\", model: \"Civic\"}})\n",
+      "WITH p.problem AS problem, COUNT(v) AS totalResponses, \n",
+      "SUM(CASE WHEN v.gender = \"Male\" THEN 1 ELSE 0 END) AS males,\n",
+      "SUM(CASE WHEN v.gender = \"Female\" THEN 1 ELSE 0 END) AS females\n",
+      "RETURN totalResponses, males, females, toFloat(males) /  (CASE WHEN females = 0 THEN 1 ELSE females END) AS maleToFemaleRatio, problem\n",
+      "\n",
+      "Human: Compare the sentiment for verbatims related to parking cameras in Acura MDX and Acura RDX.\n",
+      "Assistant: MATCH (v:Verbatim) \n",
+      "  WHERE v.verbatimText CONTAINS 'acura mdx' AND v.verbatimText CONTAINS 'parking cameras'\n",
+      "RETURN v.model AS model, v.verbatim as content LIMIT 50\n",
+      "UNION\n",
+      "MATCH (v:Verbatim) \n",
+      "  WHERE v.verbatimText CONTAINS 'acura rdx' AND v.verbatimText CONTAINS 'parking cameras'\n",
+      "RETURN v.model AS model, v.verbatim as content LIMIT 50\n",
+      "\n",
+      "Human: What are the sentiments towards Honda doors?\n",
+      "Assistant: MATCH (v:Verbatim) \n",
+      "  WHERE v.verbatimText CONTAINS 'honda' AND v.verbatimText CONTAINS 'door'\n",
+      "RETURN v.model AS model, v.verbatim as content\n",
+      "\n",
+      "Human: For customers who complained about problem DRE10, what other problems are indicated from the same customers? What are top 2 common problems between customers?\n",
+      "Assistant: MATCH (p:Problem {{id: 'DRE10'}})<-[:HAS_PROBLEM]-(v:Verbatim)<-[:SUBMITTED]-(c:Customer)\n",
+      "  WHERE v.verbatim <> ''\n",
+      "WITH c, p\n",
+      "MATCH (c)-[:SUBMITTED]->(v2:Verbatim)-[:HAS_PROBLEM]->(p2:Problem)\n",
+      "  WHERE p <> p2 AND v2.verbatim <> ''\n",
+      "WITH p.problem AS problem, p2.problem AS similarProblem, COLLECT(v2.verbatim) AS content\n",
+      "RETURN problem, similarProblem, content, SIZE(content) AS sharedTotal\n",
+      "ORDER BY sharedTotal DESC LIMIT 2;\n",
+      "\n",
+      "Human: For the infotainment category, what are top 3 models with the highest severity score?\n",
+      "Assistant: MATCH (v:Verbatim)\n",
+      "  WHERE v.verbatimText CONTAINS 'infotainment' AND v.severity IS NOT NULL\n",
+      "RETURN DISTINCT v.model AS model, count(v) AS totalResponses, avg(v.severity) AS score \n",
+      "ORDER BY score DESC \n",
+      "LIMIT 10;\n",
+      "\n",
+      "Human: What are the top 3 infotainment problems for each age buckets?\n",
+      "Assistant: MATCH (v:Verbatim)-[:HAS_PROBLEM]->(p:Problem)\n",
+      "  WHERE v.verbatimText CONTAINS 'infotainment' AND v.ageBucket IS NOT NULL\n",
+      "WITH v.ageBucket AS ageBucket, p.problem AS problem, collect(v.verbatim) AS responses\n",
+      "WITH ageBucket, problem, size(responses) AS total, responses\n",
+      "WITH * ORDER BY ageBucket, total DESC\n",
+      "WITH ageBucket, COLLECT(problem) AS problems, COLLECT(total) AS totals, COLLECT(responses) AS responsesList\n",
+      "RETURN ageBucket, problems[..3] AS problem, totals[..3] AS total, responsesList[..3] AS reponses\n",
+      "LIMIT 3\n",
+      "\n",
+      "Human: What are the 3 most reported problems for Honda vehicles by model year and what are the 3 most reported problems for Acura vehicles by model year?\n",
+      "Assistant: MATCH (v:Verbatim{{make: 'Honda'}})-[:HAS_PROBLEM]->(p:Problem)\n",
+      "WITH v.make AS make, p.problem AS problem, COUNT(v) AS total ORDER BY total DESC LIMIT 3\n",
+      "RETURN make, problem, total\n",
+      "UNION\n",
+      "MATCH (v:Verbatim{{make: 'Acura'}})-[:HAS_PROBLEM]->(p:Problem)\n",
+      "WITH v.make AS make, p.problem AS problem, COUNT(v) AS total ORDER BY total DESC LIMIT 3\n",
+      "RETURN make, problem, total;\n",
+      "\n",
+      "Human: What are the top 5 most severe questions for females aged 30-34 for all Acura models?\n",
+      "Assistant: MATCH (q:Question)<-[:HAS_QUESTION]-(v:Verbatim) \n",
+      "WHERE v.gender = \"Female\" AND v.make = \"Acura\" AND v.minAge >= 30 AND v.maxAge <= 34 AND v.severity IS NOT NULL\n",
+      "WITH q, avg(v.severity) as avgSeverity\n",
+      "RETURN q.question AS question, avgSeverity\n",
+      "ORDER BY avgSeverity DESC\n",
+      "LIMIT 5\n",
+      "\n",
+      "Human: What are the top 5 most severe problems for females aged 30-34 for all Acura models?\n",
+      "Assistant: MATCH (p:Problem)<-[:HAS_PROBLEM]-(v:Verbatim) \n",
+      "WHERE v.gender = \"Female\" AND v.make = \"Acura\" AND v.minAge >= 30 AND v.maxAge <= 34 AND v.severity IS NOT NULL\n",
+      "WITH p, avg(v.severity) as avgSeverity\n",
+      "RETURN p.problem AS problem, avgSeverity\n",
+      "ORDER BY avgSeverity DESC\n",
+      "LIMIT 5\n",
+      "\n",
+      "Human: How similar or different are the verbatims for males and females aged 30-34 for all Acura models?\n",
+      "Assistant: MATCH (v:Verbatim) \n",
+      "WHERE v.gender = \"Male\" AND v.make = \"Acura\" AND v.minAge >= 30 AND v.maxAge <= 34 AND isEmpty(v.verbatim) = false\n",
+      "WITH collect(v.verbatim) as content\n",
+      "RETURN \"Male\" as gender, content LIMIT 50\n",
+      "UNION\n",
+      "MATCH (v:Verbatim) \n",
+      "WHERE v.gender = \"Female\" AND v.make = \"Acura\" AND v.minAge >= 30 AND v.maxAge <= 34 AND isEmpty(v.verbatim) = false\n",
+      "WITH collect(v.verbatim) as content\n",
+      "RETURN \"Female\" as gender, content LIMIT 50\n",
+      "\n",
+      "Human: Please summarize the verbatims for 2023 RDX for question 010 Trunk/TG Touch-Free Sensor DTU and create 5 categories for the problems. As an output, I want a list of verbatims and the corresponding categories\n",
+      "Assistant: MATCH (q:Question{{id: 10}})<-[:HAS_QUESTION]-(v:Verbatim)\n",
+      "  WHERE v.model='RDX'\n",
+      "RETURN v.verbatim\n",
+      "\n",
+      "\n",
+      "</samples>\n",
+      "\n",
+      "    Human: {question}\n",
+      "    Assistant:\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "p = create_cypher_prompt(\n",
+    "    graph_schema=get_schema(driver=driver), \n",
+    "    examples_yaml_path=\"data/iqs/queries/queries.yml\"\n",
+    ")\n",
+    "print(p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ps-genai-agents-lVTtXIGJ-py3.12",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Makefile b/Makefile
@@ -4,17 +4,23 @@
 all: help
 
 test:
-	poetry run pytest tests
+	poetry run python3 -m pytest tests
 
 test_integration:
-	poetry run pytest tests/integration
+	poetry run pytest python3 -m tests/integration
 
 test_unit:
-	poetry run pytest tests/unit
+	poetry run pytest python3 -m tests/unit
 
 init:
 	poetry install --with dev, ui
 	pre-commit install
+	poetry run python3 -m pip install -U --no-cache-dir  \
+            --config-settings="--global-option=build_ext" \
+            --config-settings="--global-option=-I$(brew --prefix graphviz)/include/" \
+            --config-settings="--global-option=-L$(brew --prefix graphviz)/lib/" \
+            pygraphviz
+
 
 ######################
 # LINTING AND FORMATTING

diff --git a/images/In Search for Linear Relations in Sentence Embedding Spaces.pdf b/images/In Search for Linear Relations in Sentence Embedding Spaces.pdf
diff --git a/images/genai-basic-design.png b/images/genai-basic-design.png
diff --git a/images/graphrag-architecture.png b/images/graphrag-architecture.png
diff --git a/images/music-rag-architecture.png b/images/music-rag-architecture.png
diff --git a/images/music_composer_agent_langgraph (1).svg b/images/music_composer_agent_langgraph (1).svg
diff --git a/images/music_composer_agent_langgraph.svg b/images/music_composer_agent_langgraph.svg
diff --git a/images/vector-space-example.webp b/images/vector-space-example.webp
diff --git a/notebooks/text2cypher_graph_agent.ipynb b/notebooks/text2cypher_graph_agent.ipynb
diff --git a/notebooks/vector_search_graph_agent.ipynb b/notebooks/vector_search_graph_agent.ipynb