Skip to content

Commit

Permalink
Demo dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
maxluk committed Nov 7, 2024
1 parent d49e5aa commit 9551555
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 7 deletions.
93 changes: 89 additions & 4 deletions playground/case_graph.sql
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,32 @@ SELECT * FROM cypher('case_playground_graph', $$
RETURN e.id
$$) AS result(id int);

CREATE OR REPLACE FUNCTION create_case_in_case_graph(case_id text)
RETURNS void
LANGUAGE plpgsql
VOLATILE
AS $BODY$
BEGIN
load 'age';
SET search_path TO ag_catalog;
EXECUTE format('SELECT * FROM cypher(''case_graph'', $$CREATE (:case {case_id: %s})$$) AS (a agtype);', quote_ident(case_id));
END
$BODY$;

CREATE OR REPLACE FUNCTION create_case_link_in_case_graph(id_from text, id_to text)
RETURNS void
LANGUAGE plpgsql
VOLATILE
AS $BODY$
BEGIN
load 'age';
SET search_path TO ag_catalog;
EXECUTE format('SELECT * FROM cypher(''case_graph'', $$MATCH (a:case), (b:case) WHERE a.case_id = %s AND b.case_id = %s CREATE (a)-[e:REF]->(b) RETURN e$$) AS (a agtype);', quote_ident(id_from), quote_ident(id_to));
END
$BODY$;



CREATE OR REPLACE FUNCTION create_case(case_id text)
RETURNS void
LANGUAGE plpgsql
Expand All @@ -147,6 +173,8 @@ BEGIN
END
$BODY$;


-- REAL CREATION
-- Delete all edges
DELETE FROM case_graph_full._ag_label_edge;
-- Delete all nodes
Expand Down Expand Up @@ -200,19 +228,76 @@ INSERT INTO case_graph_full."REF" (start_id, end_id)
SELECT gid_from AS start_id, gid_to AS end_id
FROM gedges;

-- CREATION of case_graph
SELECT * FROM ag_catalog.drop_graph('case_graph', true);
SELECT create_graph('case_graph');

-- Create nodes (doesn't work in dbeaver, but works in pgadmin)
SELECT create_case_in_case_graph(cases.id)
FROM public.cases;

SELECT * from cypher('case_graph', $$
MATCH ()-[r]->(n)
MATCH (n)
RETURN COUNT(n.case_id)
$$) as (case_id TEXT);

WITH edges AS (
SELECT c1.id AS id_from, c2.id AS id_to
FROM public.cases c1
LEFT JOIN
LATERAL jsonb_array_elements(c1.data -> 'cites_to') AS cites_to_element ON true
LEFT JOIN
LATERAL jsonb_array_elements(cites_to_element -> 'case_ids') AS case_ids ON true
JOIN public.cases c2
ON case_ids::text = c2.id
LIMIT 10
)
SELECT create_case_link_in_case_graph(edges.id_from, edges.id_to)
FROM edges
limit 1;

WITH edges AS (
SELECT DISTINCT c1.id AS id_from, c2.id AS id_to
FROM public.cases c1
LEFT JOIN
LATERAL jsonb_array_elements(c1.data -> 'cites_to') AS cites_to_element ON true
LEFT JOIN
LATERAL jsonb_array_elements(cites_to_element -> 'case_ids') AS case_ids ON true
JOIN public.cases c2
ON case_ids::text = c2.id
), gedges AS (
SELECT edges.id_from, node1.id AS gid_from, edges.id_to, node2.id AS gid_to
FROM edges
LEFT JOIN case_graph."case" node1 ON node1.properties::json ->> 'case_id' = edges.id_from
LEFT JOIN case_graph."case" node2 ON node2.properties::json ->> 'case_id' = edges.id_to
)
INSERT INTO case_graph."REF" (start_id, end_id)
SELECT gid_from AS start_id, gid_to AS end_id
FROM gedges;

drop index case_graph.case_graph_idx_on_case_id;
-- doesn't work for WHERE IN clauses
CREATE INDEX case_graph_idx_on_case_id ON case_graph."case" USING gin (properties);
-- works for WHERE IN clauses
CREATE INDEX CONCURRENTLY case_graph_ex_idx ON case_graph."case"
(ag_catalog.agtype_access_operator(properties, '"case_id"'::agtype));

CREATE INDEX case_graph_idx_on_end_id ON case_graph."REF" (end_id);
CREATE INDEX case_graph_idx_on_start_id ON case_graph."REF" (start_id);

SELECT * from cypher('case_graph', $$ EXPLAIN ANALYZE
MATCH ()-[r:REF]->(n)
WHERE n.case_id IN ['782330', '615468']
RETURN r
$$) as (r agtype);

SELECT * from cypher('case_graph_full', $$
MATCH ()-[r]->(n)
SELECT * from cypher('case_graph', $$
MATCH ()-[r:REF]->(n)
WHERE n.case_id IN ['782330', '615468']
RETURN r
$$) as (r agtype);

SELECT * from cypher('case_graph', $$
MATCH ()-[r]->()
RETURN r
RETURN count(r)
$$) as (r agtype);
35 changes: 32 additions & 3 deletions playground/graph_query_v2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ graph AS (
from (
SELECT semantic_ranked.id, graph_query.ref_id, c2.description_vector <=> embedding AS ref_cosine
FROM semantic_ranked
LEFT JOIN cypher('case_graph_full', $$
MATCH (s)-[r]->(n)
LEFT JOIN cypher('case_graph', $$
MATCH (s)-[r:REF]->(n)
RETURN n.case_id AS case_id, s.case_id AS ref_id
$$) as graph_query(case_id TEXT, ref_id TEXT)
ON semantic_ranked.id = graph_query.case_id
Expand Down Expand Up @@ -126,6 +126,35 @@ select label,score,graph_rank,semantic_rank,vector_rank,id,case_name,"date","dat
FROM rrf
order by score DESC;


-- Demo dataset
WITH
user_query as (
select 'Water leaking into the apartment from the floor above.' as query_text
),
embedding_query AS (
SELECT query_text, azure_openai.create_embeddings('text-embedding-3-small', query_text)::vector AS embedding
from user_query
),
vector AS (
SELECT cases.id, cases.data#>>'{name_abbreviation}' AS case_name, cases.data#>>'{decision_date}' AS date, cases.data AS data,
RANK() OVER (ORDER BY description_vector <=> embedding) AS vector_rank, query_text, embedding
FROM cases, embedding_query
WHERE (cases.data#>>'{court, id}')::integer IN (9029)--, 8985) -- Washington Supreme Court (9029) or Washington Court of Appeals (8985)
ORDER BY description_vector <=> embedding
LIMIT 60
),
graph AS (
SELECT vector.id, graph_query.ref_id
FROM vector
LEFT JOIN cypher('case_graph', $$
MATCH (s)-[r:REF]->(n)
RETURN n.case_id AS case_id, s.case_id AS ref_id
$$) as graph_query(case_id TEXT, ref_id TEXT)
ON vector.id = graph_query.case_id
)
select * from graph;

-- Unoptimized final query
-- Recall: 40% -> 60% -> 70%
WITH
Expand Down Expand Up @@ -288,7 +317,7 @@ order by score DESC;

-- Experiments

select vector('[3,1,2]');


select *
from cypher('case_graph_full', $$
Expand Down

0 comments on commit 9551555

Please sign in to comment.