Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
06e5ec0
Add S3 Bedrock BDA ingestion support with user confirmation and pymup…
prins-agivant Nov 17, 2025
788fe2a
Update README for OpenAI and Bedrock config, add pymupdf4llm license
prins-agivant Nov 18, 2025
58a86d1
Update README for OpenAI and Bedrock config, add pymupdf4llm license
prins-agivant Nov 18, 2025
c20aff8
Fix prompt_path to use ./common/prompts/ for OpenAI and Bedrock
prins-agivant Nov 18, 2025
5a0f87c
bug fixes
prins-agivant Nov 21, 2025
3bfe5c1
Fix PDF extractions
prins-agivant Nov 24, 2025
a660bb7
Fix PDF extraction threading issue: add lock for pymupdf4llm (not thr…
prins-agivant Nov 24, 2025
2743859
Add S3 Bedrock BDA ingestion support with user confirmation and pymup…
prins-agivant Nov 17, 2025
ddae372
Update README for OpenAI and Bedrock config, add pymupdf4llm license
prins-agivant Nov 18, 2025
feb7345
Update README for OpenAI and Bedrock config, add pymupdf4llm license
prins-agivant Nov 18, 2025
7a67896
Fix prompt_path to use ./common/prompts/ for OpenAI and Bedrock
prins-agivant Nov 18, 2025
7f51fea
bug fixes
prins-agivant Nov 21, 2025
a2b8d90
Add local temp file storage for ingestion review
prins-agivant Nov 24, 2025
38619e0
Add direct ingestion option with checkbox to skip file review
prins-agivant Nov 24, 2025
3bbe8bc
Auto-process files on upload/download, delete temp files with origina…
prins-agivant Nov 26, 2025
214b2e1
Merge latest main and consolidate markdown_parsing.py into text_extra…
prins-agivant Dec 1, 2025
dd57724
Merge latest main and consolidate markdown_parsing.py into text_extra…
prins-agivant Dec 1, 2025
07191a2
Merge pymupdf4llm_integration: consolidate markdown processing and ad…
prins-agivant Dec 1, 2025
5d47468
Fix merge conflict resolution: add missing try block and remove incor…
prins-agivant Dec 1, 2025
7fd1ab2
Supportai merge issue fix for temp file ingestion
prins-agivant Dec 2, 2025
aa1ce34
Redesign temp file storage: save immediately during file processing i…
prins-agivant Dec 3, 2025
845fd91
Add Server Configuration UI for real-time LLM and GraphRAG config upd…
prins-agivant Dec 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,62 @@

# Configs
SERVER_CONFIG = os.getenv("SERVER_CONFIG", "configs/server_config.json")


def get_config_file_path():
"""Get the path to the server config file."""
return SERVER_CONFIG


def get_current_config():
"""Get the current in-memory configuration (llm_config and graphrag_config)."""
return {
"llm_config": llm_config,
"graphrag_config": graphrag_config,
}


def update_config(new_llm_config: dict = None, new_graphrag_config: dict = None, persist: bool = True):
"""
Update the in-memory configuration and optionally persist to file.
This allows config changes to take effect immediately without container restart.

Args:
new_llm_config: New LLM configuration to apply
new_graphrag_config: New GraphRAG configuration to apply
persist: If True, also save changes to server_config.json file
"""
global llm_config, graphrag_config

# Update llm_config in memory
if new_llm_config is not None:
llm_config.clear()
llm_config.update(new_llm_config)

# Update graphrag_config in memory
if new_graphrag_config is not None:
graphrag_config.clear()
graphrag_config.update(new_graphrag_config)

# Persist to file if requested
if persist:
config_path = get_config_file_path()
if config_path[-5:] == ".json":
# Read current file config
with open(config_path, "r") as f:
file_config = json.load(f)

# Update with new values
if new_llm_config is not None:
file_config["llm_config"] = new_llm_config
if new_graphrag_config is not None:
file_config["graphrag_config"] = new_graphrag_config

# Write back to file
with open(config_path, "w") as f:
json.dump(file_config, f, indent=2)

return True
PATH_PREFIX = os.getenv("PATH_PREFIX", "")
PRODUCTION = os.getenv("PRODUCTION", "false").lower() == "true"

Expand Down
5 changes: 3 additions & 2 deletions common/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,10 @@ ordered-set==4.1.0
orjson==3.10.18
packaging==24.2
pandas==2.2.3
#pathtools==0.1.2
pathtools==0.1.2
pillow==11.2.1
PyMuPDF==1.26.4
#PyMuPDF==1.26.4
pymupdf4llm==0.2.0
platformdirs==4.3.8
pluggy==1.6.0
prometheus_client==0.22.1
Expand Down
163 changes: 31 additions & 132 deletions common/utils/image_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,155 +11,54 @@

logger = logging.getLogger(__name__)



def describe_image_with_llm(image_input):
def describe_image_with_llm(file_path):
"""
Send image (pixmap or PIL image) to LLM vision model and return description.
Uses multimodal_service from config if available, otherwise falls back to completion_service.
Currently supports: OpenAI, Azure OpenAI, Google GenAI, and Google VertexAI
Read image file and convert to base64 to send to LLM.
"""
try:
from PIL import Image as PILImage

client = get_multimodal_service()
if not client:
return "[Image: Failed to create multimodal LLM client]"


# Read image and convert to base64
pil_image = PILImage.open(file_path)
buffer = io.BytesIO()
# Convert to RGB if needed for better compatibility
if image_input.mode != 'RGB':
image_input = image_input.convert('RGB')
image_input.save(buffer, format="JPEG", quality=95)
b64_img = base64.b64encode(buffer.getvalue()).decode("utf-8")
if pil_image.mode != 'RGB':
pil_image = pil_image.convert('RGB')
pil_image.save(buffer, format="JPEG", quality=95)
image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')

# Build messages (system + human)
messages = [
SystemMessage(
content="You are a helpful assistant that describes images concisely for document analysis."
),
HumanMessage(
content=[
{
"type": "text",
"text": (
"Please describe what you see in this image and "
"if the image has scanned text then extract all the text. "
"if the image has any logo, icon, or branding element, try to describe it with text. "
"Focus on any text, diagrams, charts, or other visual elements."
"If the image is purely a logo, icon, or branding element, start your response with 'LOGO:' or 'ICON:'."
),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{b64_img}"},
},
]
),
SystemMessage(
content="You are a helpful assistant that describes images concisely for document analysis."
),
HumanMessage(
content=[
{
"type": "text",
"text": (
"Please describe what you see in this image and "
"if the image has scanned text then extract all the text. "
"If the image has any graph, chart, table, or other diagram, describe it. "
),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
},
],
),
]

# Get response from LangChain LLM client
# Access the underlying LangChain client
langchain_client = client.llm
response = langchain_client.invoke(messages)

return response.content if hasattr(response, 'content') else str(response)
return response.content if hasattr(response, "content") else str(response)

except Exception as e:
logger.error(f"Failed to describe image with LLM: {str(e)}")
return "[Image: Error processing image description]"


def save_image_and_get_markdown(image_input, context_info="", graphname=None):
"""
Save image locally to static/images/ folder and return markdown reference with description.

LEGACY/OLD APPROACH: Used for backward compatibility with JSONL-based loading.
Images are saved as files and served via /ui/images/ endpoint with img:// protocol.

For NEW direct loading approach, images are stored in Image vertex as base64
and served via /ui/image_vertex/ endpoint with image:// protocol.

Args:
image_input: PIL Image object
context_info: Optional context (e.g., "page 3 of invoice.pdf")
graphname: Graph name to organize images by graph (optional)

Returns:
dict with:
- 'markdown': Markdown string with img:// reference
- 'image_id': Unique identifier for the saved image
- 'image_path': Path where image was saved to static/images/
"""
try:
# FIRST: Get description from LLM to check if it's a logo
description = describe_image_with_llm(image_input)

# Check if the image is a logo, icon, or decorative element BEFORE saving
# These should be filtered out as they're not content-relevant
description_lower = description.lower()
logo_indicators = ['logo', 'icon', 'branding', 'watermark', 'trademark', 'company logo', 'brand logo']

if any(indicator in description_lower for indicator in logo_indicators):
logger.info(f"Detected logo/icon in image, skipping: {description[:100]}")
return None

# If not a logo, proceed with saving the image
# Generate unique image ID using hash of image content
buffer = io.BytesIO()
if image_input.mode != 'RGB':
image_input = image_input.convert('RGB')
image_input.save(buffer, format="JPEG", quality=95)
image_bytes = buffer.getvalue()

# Create hash-based ID (deterministic for same image)
image_hash = hashlib.sha256(image_bytes).hexdigest()[:16]
image_id = f"{image_hash}.jpg"

# Save image to local storage directory organized by graphname
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# If graphname is provided, organize images by graph
if graphname:
images_dir = os.path.join(project_root, "static", "images", graphname)
# Include graphname in the image reference for URL construction
image_reference = f"{graphname}/{image_id}"
else:
images_dir = os.path.join(project_root, "static", "images")
image_reference = image_id

os.makedirs(images_dir, exist_ok=True)

image_path = os.path.join(images_dir, image_id)

# Save image file (skip if already exists with same hash)
if not os.path.exists(image_path):
with open(image_path, 'wb') as f:
f.write(image_bytes)
logger.info(f"Saved content image to: {image_path}")
else:
logger.debug(f"Image already exists: {image_path}")

# Generate markdown with custom img:// protocol (will be replaced later)
# Format: ![description](img://graphname/image_id) or ![description](img://image_id)
markdown = f"![{description}](img://{image_reference})"

logger.info(f"Created image reference: {image_reference} with description")

return {
'markdown': markdown,
'image_id': image_reference,
'image_path': image_path,
'description': description
}

except Exception as e:
logger.error(f"Failed to save image and generate markdown: {str(e)}")
# Fallback to text description only
fallback_desc = f"[Image: {context_info} - processing failed]"
return {
'markdown': fallback_desc,
'image_id': None,
'image_path': None,
'description': fallback_desc
}


Loading
Loading