-
Notifications
You must be signed in to change notification settings - Fork 217
Refactoring utils.py and creating a document management UI #31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
AstraBert
merged 10 commits into
main
from
clelia/utils-refactoring-and-document-management
Jul 17, 2025
Merged
Changes from 4 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
c9d8db8
refactor: refactoring utils; feat: adding document management class
AstraBert f25b3c2
feat: add UI
AstraBert 666a89f
chore: delete try.html and vbump
AstraBert d1f3944
ci: typecheck
AstraBert a40a08d
chore: implementing suggestions
AstraBert e99ea0c
feat: first implementation of parametrized SQL (untested)
AstraBert ecd9973
chore: resolve suggestions + tests
AstraBert 2c889ac
Fix boolean evaluation error
nick-galluzzo ca8799b
Merge branch 'main' into clelia/utils-refactoring-and-document-manage…
AstraBert ec7479c
ci: linting
AstraBert File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| from pydantic import BaseModel, model_validator, Field | ||
| from sqlalchemy import Engine, create_engine, Connection, Result, text | ||
| from typing_extensions import Self | ||
| from typing import Optional, Any, List, cast | ||
|
|
||
|
|
||
| class ManagedDocument(BaseModel): | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| document_name: str | ||
| content: str | ||
| summary: str | ||
| q_and_a: str | ||
| mindmap: str | ||
| bullet_points: str | ||
| is_exported: bool = Field(default=False) | ||
|
|
||
| @model_validator(mode="after") | ||
| def validate_input_for_sql(self) -> Self: | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if not self.is_exported: | ||
| self.document_name = self.document_name.replace("'", "''") | ||
| self.content = self.content.replace("'", "''") | ||
| self.summary = self.summary.replace("'", "''") | ||
| self.q_and_a = self.q_and_a.replace("'", "''") | ||
| self.mindmap = self.mindmap.replace("'", "''") | ||
| self.bullet_points = self.bullet_points.replace("'", "''") | ||
| return self | ||
|
|
||
|
|
||
| class DocumentManager: | ||
| def __init__( | ||
| self, | ||
| engine: Optional[Engine] = None, | ||
| engine_url: Optional[str] = None, | ||
| table_name: Optional[str] = None, | ||
| ): | ||
| self.table_name: str = table_name or "documents" | ||
| self.table_exists: bool = False | ||
| self._connection: Optional[Connection] = None | ||
| if engine: | ||
| self._engine: Engine = engine | ||
| elif engine_url: | ||
| self._engine = create_engine(url=engine_url) | ||
| else: | ||
| raise ValueError("One of engine or engine_setup_kwargs must be set") | ||
|
|
||
| def _connect(self) -> None: | ||
| self._connection = self._engine.connect() | ||
|
|
||
| def _create_table(self) -> None: | ||
| if not self._connection: | ||
| self._connect() | ||
| self._connection = cast(Connection, self._connection) | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self._connection.execute( | ||
| text(f""" | ||
| CREATE TABLE IF NOT EXISTS {self.table_name} ( | ||
| id SERIAL PRIMARY KEY, | ||
| document_name TEXT NOT NULL, | ||
| content TEXT, | ||
| summary TEXT, | ||
| q_and_a TEXT, | ||
| mindmap TEXT, | ||
| bullet_points TEXT | ||
| ); | ||
| """) | ||
| ) | ||
| self._connection.commit() | ||
| self.table_exists = True | ||
|
|
||
| def import_documents(self, documents: List[ManagedDocument]) -> None: | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if not self._connection: | ||
| self._connect() | ||
| self._connection = cast(Connection, self._connection) | ||
| if not self.table_exists: | ||
| self._create_table() | ||
| for document in documents: | ||
| self._connection.execute( | ||
| text( | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| f""" | ||
| INSERT INTO {self.table_name} (document_name, content, summary, q_and_a, mindmap, bullet_points) | ||
| VALUES ( | ||
| '{document.document_name}', | ||
| '{document.content}', | ||
| '{document.summary}', | ||
| '{document.q_and_a}', | ||
| '{document.mindmap}', | ||
| '{document.bullet_points}' | ||
| ); | ||
| """ | ||
| ) | ||
| ) | ||
| self._connection.commit() | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| def export_documents(self, limit: Optional[int] = None) -> List[ManagedDocument]: | ||
| if not limit: | ||
| limit = 15 | ||
| result = self._execute( | ||
| text( | ||
| f""" | ||
| SELECT * FROM {self.table_name} ORDER BY id LIMIT {limit}; | ||
| """ | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ) | ||
| ) | ||
| rows = result.fetchall() | ||
| documents = [] | ||
| for row in rows: | ||
| document = ManagedDocument( | ||
| document_name=row.document_name, | ||
| content=row.content, | ||
| summary=row.summary, | ||
| q_and_a=row.q_and_a, | ||
| mindmap=row.mindmap, | ||
| bullet_points=row.bullet_points, | ||
| is_exported=True, | ||
| ) | ||
| document.mindmap = ( | ||
| document.mindmap.replace('""', '"') | ||
| .replace("''", "'") | ||
| .replace("''mynetwork''", "'mynetwork'") | ||
| ) | ||
| document.document_name = document.document_name.replace('""', '"').replace( | ||
| "''", "'" | ||
| ) | ||
| document.content = document.content.replace('""', '"').replace("''", "'") | ||
| document.summary = document.summary.replace('""', '"').replace("''", "'") | ||
| document.q_and_a = document.q_and_a.replace('""', '"').replace("''", "'") | ||
| document.bullet_points = document.bullet_points.replace('""', '"').replace( | ||
| "''", "'" | ||
| ) | ||
AstraBert marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| documents.append(document) | ||
| return documents | ||
|
|
||
| def _execute( | ||
| self, | ||
| statement: Any, | ||
| parameters: Optional[Any] = None, | ||
| execution_options: Optional[Any] = None, | ||
| ) -> Result: | ||
| if not self._connection: | ||
| self._connect() | ||
| self._connection = cast(Connection, self._connection) | ||
| return self._connection.execute( | ||
| statement=statement, | ||
| parameters=parameters, | ||
| execution_options=execution_options, | ||
| ) | ||
|
|
||
| def disconnect(self) -> None: | ||
| if not self._connection: | ||
| raise ValueError("Engine was never connected!") | ||
| self._engine.dispose(close=True) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| import uuid | ||
| import os | ||
| import warnings | ||
| import json | ||
| from pydantic import BaseModel, Field, model_validator | ||
| from typing_extensions import Self | ||
| from typing import List, Union | ||
|
|
||
| from pyvis.network import Network | ||
| from llama_index.core.llms import ChatMessage | ||
| from llama_index.llms.openai import OpenAIResponses | ||
|
|
||
|
|
||
| class Node(BaseModel): | ||
| id: str | ||
| content: str | ||
|
|
||
|
|
||
| class Edge(BaseModel): | ||
| from_id: str | ||
| to_id: str | ||
|
|
||
|
|
||
| class MindMap(BaseModel): | ||
| nodes: List[Node] = Field( | ||
| description="List of nodes in the mind map, each represented as a Node object with an 'id' and concise 'content' (no more than 5 words).", | ||
| examples=[ | ||
| [ | ||
| Node(id="A", content="Fall of the Roman Empire"), | ||
| Node(id="B", content="476 AD"), | ||
| Node(id="C", content="Barbarian invasions"), | ||
| ], | ||
| [ | ||
| Node(id="A", content="Auxin is released"), | ||
| Node(id="B", content="Travels to the roots"), | ||
| Node(id="C", content="Root cells grow"), | ||
| ], | ||
| ], | ||
| ) | ||
| edges: List[Edge] = Field( | ||
| description="The edges connecting the nodes of the mind map, as a list of Edge objects with from_id and to_id fields representing the source and target node IDs.", | ||
| examples=[ | ||
| [ | ||
| Edge(from_id="A", to_id="B"), | ||
| Edge(from_id="A", to_id="C"), | ||
| Edge(from_id="B", to_id="C"), | ||
| ], | ||
| [ | ||
| Edge(from_id="C", to_id="A"), | ||
| Edge(from_id="B", to_id="C"), | ||
| Edge(from_id="A", to_id="B"), | ||
| ], | ||
| ], | ||
| ) | ||
|
|
||
| @model_validator(mode="after") | ||
| def validate_mind_map(self) -> Self: | ||
| all_nodes = [el.id for el in self.nodes] | ||
| all_edges = [el.from_id for el in self.edges] + [el.to_id for el in self.edges] | ||
| if set(all_nodes).issubset(set(all_edges)) and set(all_nodes) != set(all_edges): | ||
| raise ValueError( | ||
| "There are non-existing nodes listed as source or target in the edges" | ||
| ) | ||
| return self | ||
|
|
||
|
|
||
| class MindMapCreationFailedWarning(Warning): | ||
| """A warning returned if the mind map creation failed""" | ||
|
|
||
|
|
||
| if os.getenv("OPENAI_API_KEY", None): | ||
| LLM = OpenAIResponses(model="gpt-4.1", api_key=os.getenv("OPENAI_API_KEY")) | ||
| LLM_STRUCT = LLM.as_structured_llm(MindMap) | ||
|
|
||
|
|
||
| async def get_mind_map(summary: str, highlights: List[str]) -> Union[str, None]: | ||
| try: | ||
| keypoints = "\n- ".join(highlights) | ||
| messages = [ | ||
| ChatMessage( | ||
| role="user", | ||
| content=f"This is the summary for my document: {summary}\n\nAnd these are the key points:\n- {keypoints}", | ||
| ) | ||
| ] | ||
| response = await LLM_STRUCT.achat(messages=messages) | ||
| response_json = json.loads(response.message.content) | ||
| net = Network(directed=True, height="750px", width="100%") | ||
| net.set_options(""" | ||
| var options = { | ||
| "physics": { | ||
| "enabled": false | ||
| } | ||
| } | ||
| """) | ||
| nodes = response_json["nodes"] | ||
| edges = response_json["edges"] | ||
| for node in nodes: | ||
| net.add_node(n_id=node["id"], label=node["content"]) | ||
| for edge in edges: | ||
| net.add_edge(source=edge["from_id"], to=edge["to_id"]) | ||
| name = str(uuid.uuid4()) | ||
| net.save_graph(name + ".html") | ||
| return name + ".html" | ||
| except Exception as e: | ||
| warnings.warn( | ||
| message=f"An error occurred during the creation of the mind map: {e}", | ||
| category=MindMapCreationFailedWarning, | ||
| ) | ||
| return None |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.