tigergraph · prinskumar-tigergraph · Nov 17, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/common/config.py b/common/config.py
@@ -51,6 +51,62 @@
 
 # Configs
 SERVER_CONFIG = os.getenv("SERVER_CONFIG", "configs/server_config.json")
+
+
+def get_config_file_path():
+    """Get the path to the server config file."""
+    return SERVER_CONFIG
+
+
+def get_current_config():
+    """Get the current in-memory configuration (llm_config and graphrag_config)."""
+    return {
+        "llm_config": llm_config,
+        "graphrag_config": graphrag_config,
+    }
+
+
+def update_config(new_llm_config: dict = None, new_graphrag_config: dict = None, persist: bool = True):
+    """
+    Update the in-memory configuration and optionally persist to file.
+    This allows config changes to take effect immediately without container restart.
+
+    Args:
+        new_llm_config: New LLM configuration to apply
+        new_graphrag_config: New GraphRAG configuration to apply
+        persist: If True, also save changes to server_config.json file
+    """
+    global llm_config, graphrag_config
+
+    # Update llm_config in memory
+    if new_llm_config is not None:
+        llm_config.clear()
+        llm_config.update(new_llm_config)
+
+    # Update graphrag_config in memory
+    if new_graphrag_config is not None:
+        graphrag_config.clear()
+        graphrag_config.update(new_graphrag_config)
+
+    # Persist to file if requested
+    if persist:
+        config_path = get_config_file_path()
+        if config_path[-5:] == ".json":
+            # Read current file config
+            with open(config_path, "r") as f:
+                file_config = json.load(f)
+
+            # Update with new values
+            if new_llm_config is not None:
+                file_config["llm_config"] = new_llm_config
+            if new_graphrag_config is not None:
+                file_config["graphrag_config"] = new_graphrag_config
+
+            # Write back to file
+            with open(config_path, "w") as f:
+                json.dump(file_config, f, indent=2)
+
+    return True
 PATH_PREFIX = os.getenv("PATH_PREFIX", "")
 PRODUCTION = os.getenv("PRODUCTION", "false").lower() == "true"
 

diff --git a/common/requirements.txt b/common/requirements.txt
@@ -108,9 +108,10 @@ ordered-set==4.1.0
 orjson==3.10.18
 packaging==24.2
 pandas==2.2.3
-#pathtools==0.1.2
+pathtools==0.1.2
 pillow==11.2.1
-PyMuPDF==1.26.4
+#PyMuPDF==1.26.4
+pymupdf4llm==0.2.0
 platformdirs==4.3.8
 pluggy==1.6.0
 prometheus_client==0.22.1

diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py
@@ -11,155 +11,54 @@
 
 logger = logging.getLogger(__name__)
 
-
-
-def describe_image_with_llm(image_input):
+def describe_image_with_llm(file_path):
     """
-    Send image (pixmap or PIL image) to LLM vision model and return description.
-    Uses multimodal_service from config if available, otherwise falls back to completion_service.
-    Currently supports: OpenAI, Azure OpenAI, Google GenAI, and Google VertexAI
+    Read image file and convert to base64 to send to LLM.
     """
     try:
+        from PIL import Image as PILImage
+
         client = get_multimodal_service()
         if not client:
             return "[Image: Failed to create multimodal LLM client]"
-
+
+        # Read image and convert to base64
+        pil_image = PILImage.open(file_path)
         buffer = io.BytesIO()
-        # Convert to RGB if needed for better compatibility
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        b64_img = base64.b64encode(buffer.getvalue()).decode("utf-8")
+        if pil_image.mode != 'RGB':
+            pil_image = pil_image.convert('RGB')
+        pil_image.save(buffer, format="JPEG", quality=95)
+        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
 
-        # Build messages (system + human)
         messages = [
-        SystemMessage(
-            content="You are a helpful assistant that describes images concisely for document analysis."
-        ),
-        HumanMessage(
-            content=[
-                {
-                    "type": "text",
-                    "text": (
-                        "Please describe what you see in this image and "
-                        "if the image has scanned text then extract all the text. "
-                        "if the image has any logo, icon, or branding element, try to describe it with text. "
-                        "Focus on any text, diagrams, charts, or other visual elements."
-                        "If the image is purely a logo, icon, or branding element, start your response with 'LOGO:' or 'ICON:'."
-                    ),
-                },
-                 {
-                     "type": "image_url",
-                     "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"},
-                 },
-            ]
-        ),
+            SystemMessage(
+                content="You are a helpful assistant that describes images concisely for document analysis."
+            ),
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Please describe what you see in this image and "
+                            "if the image has scanned text then extract all the text. "
+                            "If the image has any graph, chart, table, or other diagram, describe it. "
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+                    },
+                ],
+            ),
         ]
 
-        # Get response from LangChain LLM client
-        # Access the underlying LangChain client
         langchain_client = client.llm
         response = langchain_client.invoke(messages)
 
-        return response.content if hasattr(response, 'content') else str(response)
+        return response.content if hasattr(response, "content") else str(response)
 
     except Exception as e:
         logger.error(f"Failed to describe image with LLM: {str(e)}")
         return "[Image: Error processing image description]"
 
 
-def save_image_and_get_markdown(image_input, context_info="", graphname=None):
-    """
-    Save image locally to static/images/ folder and return markdown reference with description.
-
-    LEGACY/OLD APPROACH: Used for backward compatibility with JSONL-based loading.
-    Images are saved as files and served via /ui/images/ endpoint with img:// protocol.
-
-    For NEW direct loading approach, images are stored in Image vertex as base64
-    and served via /ui/image_vertex/ endpoint with image:// protocol.
-
-    Args:
-        image_input: PIL Image object
-        context_info: Optional context (e.g., "page 3 of invoice.pdf")
-        graphname: Graph name to organize images by graph (optional)
-
-    Returns:
-        dict with:
-            - 'markdown': Markdown string with img:// reference
-            - 'image_id': Unique identifier for the saved image
-            - 'image_path': Path where image was saved to static/images/
-    """
-    try:
-        # FIRST: Get description from LLM to check if it's a logo
-        description = describe_image_with_llm(image_input)
-
-        # Check if the image is a logo, icon, or decorative element BEFORE saving
-        # These should be filtered out as they're not content-relevant
-        description_lower = description.lower()
-        logo_indicators = ['logo', 'icon', 'branding', 'watermark', 'trademark', 'company logo', 'brand logo']
-
-        if any(indicator in description_lower for indicator in logo_indicators):
-            logger.info(f"Detected logo/icon in image, skipping: {description[:100]}")
-            return None
-
-        # If not a logo, proceed with saving the image
-        # Generate unique image ID using hash of image content
-        buffer = io.BytesIO()
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        image_bytes = buffer.getvalue()
-
-        # Create hash-based ID (deterministic for same image)
-        image_hash = hashlib.sha256(image_bytes).hexdigest()[:16]
-        image_id = f"{image_hash}.jpg"
-
-        # Save image to local storage directory organized by graphname
-        project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-        # If graphname is provided, organize images by graph
-        if graphname:
-            images_dir = os.path.join(project_root, "static", "images", graphname)
-            # Include graphname in the image reference for URL construction
-            image_reference = f"{graphname}/{image_id}"
-        else:
-            images_dir = os.path.join(project_root, "static", "images")
-            image_reference = image_id
-
-        os.makedirs(images_dir, exist_ok=True)
-
-        image_path = os.path.join(images_dir, image_id)
-
-        # Save image file (skip if already exists with same hash)
-        if not os.path.exists(image_path):
-            with open(image_path, 'wb') as f:
-                f.write(image_bytes)
-            logger.info(f"Saved content image to: {image_path}")
-        else:
-            logger.debug(f"Image already exists: {image_path}")
-
-        # Generate markdown with custom img:// protocol (will be replaced later)
-        # Format: ![description](img://graphname/image_id) or ![description](img://image_id)
-        markdown = f"![{description}](img://{image_reference})"
-
-        logger.info(f"Created image reference: {image_reference} with description")
-
-        return {
-            'markdown': markdown,
-            'image_id': image_reference,
-            'image_path': image_path,
-            'description': description
-        }
-
-    except Exception as e:
-        logger.error(f"Failed to save image and generate markdown: {str(e)}")
-        # Fallback to text description only
-        fallback_desc = f"[Image: {context_info} - processing failed]"
-        return {
-            'markdown': fallback_desc,
-            'image_id': None,
-            'image_path': None,
-            'description': fallback_desc
-        }
-
-