From 1abe40ef0a9c1e411e82550137c1791c69d2b73c Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Fri, 24 Oct 2025 09:42:51 -0700
Subject: [PATCH 1/8] Commit message for new feature

---
 src/tooluniverse/__init__.py                |   8 +
 src/tooluniverse/candidate_tester_tool.py   |  48 ++
 src/tooluniverse/common_utils.py            |  30 ++
 src/tooluniverse/context_keeper_tool.py     | 105 ++++
 src/tooluniverse/data/vsd.json              |  35 ++
 src/tooluniverse/data/vsd_allowlist.json    |   4 +
 src/tooluniverse/data/vsd_tools.json        |  34 ++
 src/tooluniverse/harvest/__init__.py        |   1 +
 src/tooluniverse/harvest/domain_policies.py |  59 +++
 src/tooluniverse/harvest/openapi_utils.py   |  67 +++
 src/tooluniverse/harvest/promoter.py        | 101 ++++
 src/tooluniverse/harvest/query_expansion.py |  28 +
 src/tooluniverse/harvest/ranker.py          |  36 ++
 src/tooluniverse/harvest/searchers.py       |  64 +++
 src/tooluniverse/harvest/static_catalog.py  | 539 ++++++++++++++++++++
 src/tooluniverse/harvest/verifier.py        |  33 ++
 src/tooluniverse/tool_navigator_tool.py     | 110 ++++
 src/tooluniverse/tool_registry.py           |  15 +
 src/tooluniverse/vsd_api_tool.py            | 115 +++++
 src/tooluniverse/vsd_catalog.py             |  44 ++
 src/tooluniverse/vsd_registry.py            |  91 ++++
 src/tooluniverse/vsd_tool.py                | 115 +++++
 src/tooluniverse/vsd_utils.py               | 212 ++++++++
 23 files changed, 1894 insertions(+)
 create mode 100644 src/tooluniverse/candidate_tester_tool.py
 create mode 100644 src/tooluniverse/common_utils.py
 create mode 100644 src/tooluniverse/context_keeper_tool.py
 create mode 100644 src/tooluniverse/data/vsd.json
 create mode 100644 src/tooluniverse/data/vsd_allowlist.json
 create mode 100644 src/tooluniverse/data/vsd_tools.json
 create mode 100644 src/tooluniverse/harvest/__init__.py
 create mode 100644 src/tooluniverse/harvest/domain_policies.py
 create mode 100644 src/tooluniverse/harvest/openapi_utils.py
 create mode 100644 src/tooluniverse/harvest/promoter.py
 create mode 100644 src/tooluniverse/harvest/query_expansion.py
 create mode 100644 src/tooluniverse/harvest/ranker.py
 create mode 100644 src/tooluniverse/harvest/searchers.py
 create mode 100644 src/tooluniverse/harvest/static_catalog.py
 create mode 100644 src/tooluniverse/harvest/verifier.py
 create mode 100644 src/tooluniverse/tool_navigator_tool.py
 create mode 100644 src/tooluniverse/vsd_api_tool.py
 create mode 100644 src/tooluniverse/vsd_catalog.py
 create mode 100644 src/tooluniverse/vsd_registry.py
 create mode 100644 src/tooluniverse/vsd_tool.py
 create mode 100644 src/tooluniverse/vsd_utils.py

diff --git a/src/tooluniverse/__init__.py b/src/tooluniverse/__init__.py
index bed8e3f3..6a001040 100644
--- a/src/tooluniverse/__init__.py
+++ b/src/tooluniverse/__init__.py
@@ -278,6 +278,11 @@ def __getattr__(self, name):
     from .core_tool import CoreTool
     from .pmc_tool import PMCTool
     from .zenodo_tool import ZenodoTool
+    from . import vsd_tool       # registers VerifiedSourceDiscoveryTool + VerifiedSourceRegisterTool
+    from . import vsd_api_tool   # registers GenericRESTTool + GenericGraphQLTool
+    from . import context_keeper_tool  # registers ContextKeeperTool
+    from . import candidate_tester_tool  # registers HarvestCandidateTesterTool
+    from . import tool_navigator_tool  # registers ToolNavigatorTool
 else:
     # With lazy loading, create lazy import proxies that import modules only when accessed
     MonarchTool = _LazyImportProxy("restful_tool", "MonarchTool")
@@ -453,6 +458,9 @@ def __getattr__(self, name):
     "ODPHPItemList",
     "ODPHPTopicSearch",
     "ODPHPOutlinkFetch",
+    "ContextKeeperTool",
+    "HarvestCandidateTesterTool",
+    "ToolNavigatorTool",
     "CellosaurusSearchTool",
     "CellosaurusQueryConverterTool",
     "CellosaurusGetCellLineInfoTool",
diff --git a/src/tooluniverse/candidate_tester_tool.py b/src/tooluniverse/candidate_tester_tool.py
new file mode 100644
index 00000000..d37f95b5
--- /dev/null
+++ b/src/tooluniverse/candidate_tester_tool.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from typing import Any, Dict
+
+from .tool_registry import register_tool
+from .vsd_utils import build_config, probe_config
+
+
+@register_tool("HarvestCandidateTesterTool")
+class HarvestCandidateTesterTool:
+    """
+    Validate harvest/VSD candidates without registering them.
+    Returns HTTP diagnostics and suggestions for default params or headers.
+    """
+
+    name = "HarvestCandidateTesterTool"
+    description = "Test a harvest candidate endpoint to see if it returns usable JSON."
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "candidate": {"type": "object"},
+            "tool_type": {"type": "string", "default": "dynamic_rest"},
+            "default_params": {"type": "object"},
+            "default_headers": {"type": "object"},
+        },
+        "required": ["candidate"],
+        "additionalProperties": False,
+    }
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        candidate = arguments.get("candidate") or {}
+        tool_type = arguments.get("tool_type") or "dynamic_rest"
+        default_params = arguments.get("default_params")
+        default_headers = arguments.get("default_headers")
+
+        cfg = build_config(
+            candidate,
+            tool_type=tool_type,
+            default_params=default_params,
+            default_headers=default_headers,
+        )
+        probe = probe_config(cfg)
+
+        return {
+            "ok": bool(probe.get("ok")),
+            "test": probe,
+            "config": cfg,
+        }
diff --git a/src/tooluniverse/common_utils.py b/src/tooluniverse/common_utils.py
new file mode 100644
index 00000000..8fdb5d85
--- /dev/null
+++ b/src/tooluniverse/common_utils.py
@@ -0,0 +1,30 @@
+
+import os, json, time, threading, base64, io
+from typing import Any, Dict, Tuple
+
+_LOCK = threading.Lock()
+
+def ensure_dir(path: str):
+    os.makedirs(path, exist_ok=True)
+
+def vsd_generated_path() -> str:
+    base = os.environ.get("TOOLUNIVERSE_VSD_DIR") or os.path.join(os.path.expanduser("~"), ".tooluniverse", "vsd")
+    ensure_dir(base)
+    return os.path.join(base, "generated_tools.json")
+
+def read_json(path: str, default):
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except Exception:
+        return default
+
+def write_json(path: str, data: Any):
+    ensure_dir(os.path.dirname(path))
+    tmp_path = f"{path}.tmp"
+    with open(tmp_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+    os.replace(tmp_path, path)
+
+def b64_png(png_bytes: bytes) -> str:
+    return base64.b64encode(png_bytes).decode("ascii")
diff --git a/src/tooluniverse/context_keeper_tool.py b/src/tooluniverse/context_keeper_tool.py
new file mode 100644
index 00000000..46dd2b0c
--- /dev/null
+++ b/src/tooluniverse/context_keeper_tool.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+import json
+import os
+from typing import Any, Dict, Optional
+
+from .tool_registry import register_tool
+
+CONTEXT_DIR = os.path.join(os.path.expanduser("~"), ".tooluniverse", "context")
+CONTEXT_PATH = os.path.join(CONTEXT_DIR, "context.json")
+
+
+def _ensure_dir() -> None:
+    os.makedirs(CONTEXT_DIR, exist_ok=True)
+
+
+def _load_context() -> Dict[str, Any]:
+    if not os.path.exists(CONTEXT_PATH):
+        return {}
+    try:
+        with open(CONTEXT_PATH, "r", encoding="utf-8") as handle:
+            data = json.load(handle)
+            if isinstance(data, dict):
+                return data
+    except Exception:
+        pass
+    return {}
+
+
+def _write_context(data: Dict[str, Any]) -> None:
+    _ensure_dir()
+    tmp_path = f"{CONTEXT_PATH}.tmp"
+    with open(tmp_path, "w", encoding="utf-8") as handle:
+        json.dump(data, handle, indent=2, ensure_ascii=False)
+    os.replace(tmp_path, CONTEXT_PATH)
+
+
+@register_tool("ContextKeeperTool")
+class ContextKeeperTool:
+    """
+    Lightweight context store that agents can use to persist conversation or task state
+    between ToolUniverse calls. Data is saved under ~/.tooluniverse/context/context.json.
+    """
+
+    name = "ContextKeeperTool"
+    description = "Persist or retrieve task context (key/value pairs) for ongoing agent workflows."
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "action": {
+                "type": "string",
+                "enum": ["get", "set", "append", "clear", "keys"],
+                "default": "get",
+            },
+            "key": {"type": "string", "description": "Context entry name"},
+            "value": {
+                "description": "Value to store; for append operations this should be a list item.",
+            },
+        },
+        "additionalProperties": False,
+    }
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        action = (arguments.get("action") or "get").lower()
+        key: Optional[str] = arguments.get("key")
+        value: Any = arguments.get("value")
+
+        context = _load_context()
+
+        if action == "keys":
+            return {"ok": True, "keys": sorted(context.keys())}
+
+        if action == "clear":
+            if key:
+                removed = context.pop(key, None) is not None
+                _write_context(context)
+                return {"ok": removed, "cleared": key if removed else None}
+            context.clear()
+            _write_context(context)
+            return {"ok": True, "cleared": "all"}
+
+        if action == "set":
+            if key is None:
+                return {"ok": False, "error": "key is required for set"}
+            context[key] = value
+            _write_context(context)
+            return {"ok": True, "key": key, "value": value}
+
+        if action == "append":
+            if key is None:
+                return {"ok": False, "error": "key is required for append"}
+            existing = context.get(key)
+            if existing is None:
+                context[key] = [value]
+            elif isinstance(existing, list):
+                existing.append(value)
+            else:
+                context[key] = [existing, value]
+            _write_context(context)
+            return {"ok": True, "key": key, "value": context[key]}
+
+        # default: get
+        if key:
+            return {"ok": True, "key": key, "value": context.get(key)}
+        return {"ok": True, "value": context}
diff --git a/src/tooluniverse/data/vsd.json b/src/tooluniverse/data/vsd.json
new file mode 100644
index 00000000..b359048e
--- /dev/null
+++ b/src/tooluniverse/data/vsd.json
@@ -0,0 +1,35 @@
+[
+  {
+    "name": "GenericHarvestTool",
+    "type": "GenericHarvestTool",
+    "description": "Live-harvest candidate API endpoints by invoking all modules in tooluniverse.harvest.",
+    "tool_type": "special_tools",
+    "enabled": true,
+    "visible": true,
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "Free-text hint, passed to all harvesters under tooluniverse.harvest."
+        },
+        "urls": {
+          "type": "array",
+          "items": {
+            "type": "string",
+            "format": "uri"
+          },
+          "description": "Explicit candidate URLs to validate and return (skips live harvesting)."
+        },
+        "limit": {
+          "type": "integer",
+          "minimum": 1,
+          "maximum": 50,
+          "default": 5,
+          "description": "Max number of candidates to return."
+        }
+      },
+      "additionalProperties": false
+    }
+  }
+]
diff --git a/src/tooluniverse/data/vsd_allowlist.json b/src/tooluniverse/data/vsd_allowlist.json
new file mode 100644
index 00000000..3c5258a2
--- /dev/null
+++ b/src/tooluniverse/data/vsd_allowlist.json
@@ -0,0 +1,4 @@
+[
+{"domain": "ema.europa.eu", "label": "EMA", "trust": 0.95, "registry": "ema"},
+{"domain": "ghoapi.azureedge.net", "label": "WHO GHO", "trust": 0.92, "registry": "who"}
+]
\ No newline at end of file
diff --git a/src/tooluniverse/data/vsd_tools.json b/src/tooluniverse/data/vsd_tools.json
new file mode 100644
index 00000000..398cd86b
--- /dev/null
+++ b/src/tooluniverse/data/vsd_tools.json
@@ -0,0 +1,34 @@
+[
+  {
+    "type": "VerifiedSourceDiscoveryTool",
+    "name": "vsd_discover_sources",
+    "description": "Discover trusted candidate sources for a free-text query",
+    "parameter": {
+      "type": "object",
+      "required": ["query"],
+      "properties": {
+        "query": { "type": "string" },
+        "limit": { "type": "integer" },
+        "allowlist_overrides": { "type": "array" }
+      }
+    },
+    "label": ["VSD", "Discovery"]
+  },
+  {
+    "type": "VerifiedSourceRegisterTool",
+    "name": "vsd_register_tool",
+    "description": "Register a VSD-generated tool bound to a trusted source",
+    "parameter": {
+      "type": "object",
+      "required": ["candidate", "tool_name"],
+      "properties": {
+        "candidate": { "type": "object" },
+        "tool_name": { "type": "string" },
+        "description": { "type": "string" },
+        "parameter_overrides": { "type": "object" },
+        "evidence_sample": { "type": "object" }
+      }
+    },
+    "label": ["VSD", "Synthesis"]
+  }
+]
diff --git a/src/tooluniverse/harvest/__init__.py b/src/tooluniverse/harvest/__init__.py
new file mode 100644
index 00000000..19c21109
--- /dev/null
+++ b/src/tooluniverse/harvest/__init__.py
@@ -0,0 +1 @@
+# Harvest subpackage
diff --git a/src/tooluniverse/harvest/domain_policies.py b/src/tooluniverse/harvest/domain_policies.py
new file mode 100644
index 00000000..49031914
--- /dev/null
+++ b/src/tooluniverse/harvest/domain_policies.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+from functools import lru_cache
+from typing import Dict, List
+
+# Conservative allow/deny fragments. We still compute a trust score as a gradient.
+ALLOWED_FRAGMENTS: List[str] = [
+    # government & intergovernmental
+    ".gov", ".mil", ".gob", ".gouv", ".go.", ".govt.nz", ".gc.ca",
+    "who.int", "worldbank.org", "oecd.org", "europa.eu", "esa.int",
+    # major scientific/health orgs
+    "nih.gov", "niddk.nih.gov", "ninds.nih.gov", "ncbi.nlm.nih.gov", "data.cdc.gov", "api.cdc.gov",
+    "fda.gov", "api.fda.gov", "epa.gov", "noaa.gov", "usgs.gov", "census.gov",
+    "data.gov", "healthdata.gov", "data.cms.gov", "data.hrsa.gov", "data.hhs.gov",
+    "ghoapi.azureedge.net",
+]
+
+BLOCKED_FRAGMENTS: List[str] = [
+    "mirror", "docshare", "scribd.com", "sharepdf", "academia.edu",
+    "stackprinter", "cachedview", "wayback", "pirated", "scrapeops",
+]
+
+@lru_cache(maxsize=4096)
+def domain_blocked(host: str) -> bool:
+    h = (host or "").lower()
+    return any(b in h for b in BLOCKED_FRAGMENTS)
+
+@lru_cache(maxsize=4096)
+def domain_allowed(host: str) -> bool:
+    # allow if any strong allow fragment present AND not blocked
+    h = (host or "").lower()
+    if domain_blocked(h):
+        return False
+    return any(a in h for a in ALLOWED_FRAGMENTS)
+
+@lru_cache(maxsize=4096)
+def trust_score(host: str) -> Dict:
+    """Return a graded trust score in [0,1] with reasons for ranking.
+    We don't *block* here (that's domain_blocked); we provide a signal for ranker.
+    """
+    h = (host or "").lower()
+    score = 0.0
+    reasons: List[str] = []
+    if domain_blocked(h):
+        return {"score": 0.0, "reasons": ["blocked"]}
+
+    # strong positives
+    if any(tld in h for tld in (".gov", "who.int", "worldbank.org", "europa.eu", "oecd.org")):
+        score += 0.65; reasons.append("gov/igo domain")
+    if any(seg in h for seg in ("nih.gov","ncbi.nlm.nih.gov","fda.gov","epa.gov","noaa.gov","usgs.gov","census.gov")):
+        score += 0.2; reasons.append("major science/health org")
+    # medium positives
+    if h.startswith("api.") or "/api" in h:
+        score += 0.05; reasons.append("api host")
+    # slight boost for data portals
+    if any(seg in h for seg in ("data.gov","healthdata.gov","data.cms.gov","data.cdc.gov","data.europa.eu")):
+        score += 0.08; reasons.append("open data portal")
+
+    score = max(0.0, min(1.0, score))
+    return {"score": round(score, 3), "reasons": reasons}
\ No newline at end of file
diff --git a/src/tooluniverse/harvest/openapi_utils.py b/src/tooluniverse/harvest/openapi_utils.py
new file mode 100644
index 00000000..4adcddd0
--- /dev/null
+++ b/src/tooluniverse/harvest/openapi_utils.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+import re, logging, json
+from typing import Dict, Optional, List
+import requests
+
+logger = logging.getLogger("OpenAPIUtils")
+
+OPENAPI_HINTS = ["openapi.json","openapi.yaml","openapi.yml","swagger.json","swagger.yaml","v3/api-docs"]
+
+def _root_of(url: str) -> str:
+    base = url.split("?",1)[0]
+    base = re.sub(r"(#.*)$","", base)
+    base = re.sub(r"/+$","", base)
+    m = re.match(r"^https?://[^/]+", base)
+    return m.group(0) if m else base
+
+def find_openapi_from_url(any_url: str) -> Optional[str]:
+    root = _root_of(any_url)
+    # try /openapi.json etc. at root and one level up
+    tries = [f"{root}/{hint}" for hint in OPENAPI_HINTS]
+    # also try without trailing /api segment if present
+    if root.endswith("/api"): 
+        base = root.rsplit("/",1)[0]
+        tries.extend(f"{base}/{hint}" for hint in OPENAPI_HINTS)
+    for t in tries:
+        try:
+            r = requests.get(t, timeout=8)
+            if r.status_code == 200 and ("json" in r.headers.get("Content-Type","") or t.endswith(".json")):
+                # quick JSON sanity
+                try:
+                    j = r.json()
+                    if "openapi" in j or "swagger" in j:
+                        return t
+                except Exception:
+                    pass
+            if r.status_code == 200 and (t.endswith(".yaml") or t.endswith(".yml")):
+                return t
+        except requests.RequestException:
+            continue
+    return None
+
+def parse_openapi(spec_url: str) -> Dict:
+    r = requests.get(spec_url, timeout=15)
+    r.raise_for_status()
+    text = r.text
+    if spec_url.endswith((".yaml",".yml")):
+        try:
+            import yaml
+        except Exception as e:
+            raise RuntimeError("YAML support requires PyYAML: pip install pyyaml") from e
+        spec = yaml.safe_load(text)
+    else:
+        spec = r.json()
+
+    servers = spec.get("servers") or []
+    base_url = (servers[0].get("url") if servers and isinstance(servers[0], dict) else None) or None
+
+    paths = spec.get("paths") or {}
+    endpoints: List[Dict] = []
+    for path, methods in paths.items():
+        if not isinstance(methods, dict): 
+            continue
+        for method, meta in methods.items():
+            if method.upper() not in ("GET","POST","PUT","PATCH","DELETE","OPTIONS","HEAD"): 
+                continue
+            endpoints.append({"path": path, "method": method.upper(), "summary": (meta or {}).get("summary")})
+    return {"base_url": base_url, "endpoints": endpoints}
\ No newline at end of file
diff --git a/src/tooluniverse/harvest/promoter.py b/src/tooluniverse/harvest/promoter.py
new file mode 100644
index 00000000..6ef0d4d6
--- /dev/null
+++ b/src/tooluniverse/harvest/promoter.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+import os, json, tempfile, shutil
+from typing import Dict, Any, List
+
+# Where we persist generated tool configs so DynamicREST (or your server boot)
+# can load them. Mirrors your earlier logs (~/.tooluniverse/vsd/generated_tools.json).
+VSD_DIR = os.path.join(os.path.expanduser("~"), ".tooluniverse", "vsd")
+VSD_PATH = os.path.join(VSD_DIR, "generated_tools.json")
+
+def _ensure_dir():
+    os.makedirs(VSD_DIR, exist_ok=True)
+
+def _read_json(path: str) -> Any:
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f) or {}
+    except Exception:
+        return {}
+
+def _atomic_write(path: str, data: Any):
+    tmp_fd, tmp_path = tempfile.mkstemp(prefix="vsd_", suffix=".json")
+    os.close(tmp_fd)
+    with open(tmp_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
+    shutil.move(tmp_path, path)
+
+def _slug(host: str) -> str:
+    return (host or "unknown").lower().replace(".", "_").replace("-", "_")
+
+def build_candidate_tool_json(c: Dict[str, Any]) -> Dict[str, Any]:
+    # Minimal, UI-friendly payload for listing/debug
+    return {
+        "name": c.get("name"),
+        "host": c.get("host"),
+        "base_url": c.get("base_url"),
+        "doc_url": c.get("doc_url"),
+        "openapi_url": c.get("openapi_url"),
+        "endpoints": c.get("endpoints"),
+        "health": c.get("health"),
+        "cors": c.get("cors"),
+        "trust": c.get("trust"),
+        "source": c.get("source"),
+        "_rank_score": c.get("_rank_score"),
+    }
+
+def _dynamicrest_tool_config(c: Dict[str, Any]) -> Dict[str, Any]:
+    """Produce a DynamicREST-style tool definition.
+    Two modes:
+      - OpenAPI mode (preferred): reference spec URL.
+      - Manual mode: infer a few GET endpoints from verification results.
+    """
+    name = f"vsd_auto_{_slug(c.get('host') or '')}"
+    base_url = c.get("base_url")
+    openapi_url = c.get("openapi_url")
+    endpoints = c.get("endpoints") or []
+
+    cfg: Dict[str, Any] = {
+        "name": name,
+        "type": "DynamicREST",
+        "base_url": base_url,
+        "auth": c.get("auth") or {"type": "none"},
+        "metadata": {
+            "source": c.get("source"),
+            "trust": c.get("trust"),
+            "health": c.get("health"),
+            "doc_url": c.get("doc_url"),
+        },
+    }
+    if openapi_url:
+        cfg["openapi"] = {"spec_url": openapi_url}
+    elif endpoints:
+        # Trim to a handful of GET endpoints
+        routes: List[Dict[str, Any]] = []
+        for ep in endpoints[:5]:
+            routes.append({
+                "method": ep.get("method") or "GET",
+                "path": ep.get("path") or "/",
+                "name": (ep.get("summary") or ep.get("path") or "endpoint").strip("/").replace("/", "_")[:64] or "endpoint",
+            })
+        cfg["routes"] = routes
+    else:
+        # Last resort: allow a generic GET on '/'
+        cfg["routes"] = [{"method": "GET", "path": "/"}]
+    return cfg
+
+def promote_to_dynamicrest(c: Dict[str, Any]) -> str:
+    """Append/Update the generated tool config file so your server can load it.
+    Returns the registered tool name.
+    """
+    _ensure_dir()
+    current = _read_json(VSD_PATH)
+    if not isinstance(current, dict):
+        current = {}
+
+    cfg = _dynamicrest_tool_config(c)
+    name = cfg.get("name") or "vsd_auto_unknown"
+    current[name] = cfg
+    _atomic_write(VSD_PATH, current)
+    return name
\ No newline at end of file
diff --git a/src/tooluniverse/harvest/query_expansion.py b/src/tooluniverse/harvest/query_expansion.py
new file mode 100644
index 00000000..4ac4e959
--- /dev/null
+++ b/src/tooluniverse/harvest/query_expansion.py
@@ -0,0 +1,28 @@
+
+from __future__ import annotations
+from typing import List
+
+DENTAL_SYNONYMS = [
+    "oral health", "dentistry", "dental caries", "tooth decay",
+    "periodontal", "periodontitis", "orthodontic", "endodontic",
+    "prosthodontic", "oral cancer", "DMFT", "fluoride", "NIDCR", "CDC Oral Health",
+    "WHO Oral Health"
+]
+
+def expand_queries(query: str, max_queries: int = 6) -> List[str]:
+    base = query.strip()
+    if not base:
+        return []
+    expanded = [base,
+                f"{base} WHO API",
+                f"{base} site:who.int",
+                f"{base} site:data.cdc.gov",
+                f"{base} site:api.fda.gov"]
+    for syn in DENTAL_SYNONYMS[:4]:
+        expanded.append(f"{base} {syn}")
+    # de-dup and clip
+    seen = []
+    for q in expanded:
+        if q not in seen:
+            seen.append(q)
+    return seen[:max_queries]
diff --git a/src/tooluniverse/harvest/ranker.py b/src/tooluniverse/harvest/ranker.py
new file mode 100644
index 00000000..aa898ad1
--- /dev/null
+++ b/src/tooluniverse/harvest/ranker.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+import math
+from typing import List, Dict
+
+def _sim(a: str, b: str) -> float:
+    a,b = (a or "").lower(), (b or "").lower()
+    if not a or not b:
+        return 0.0
+    aset, bset = set(a.split()), set(b.split())
+    overlap = len(aset & bset)
+    return overlap / (len(aset) + 1e-6)
+
+def rank_candidates(query: str, candidates: List[Dict]) -> List[Dict]:
+    def score(c: Dict) -> float:
+        trust = float(((c.get("trust") or {}).get("score") or 0.0))
+        h = c.get("health") or {}
+        live = 1.0 if (h.get("ok") and (h.get("status",0) < 500)) else 0.0
+        lat = h.get("latency_ms") or 1500
+        lat_norm = max(0.0, 1.0 - min(lat, 4000)/4000.0)
+        fit = max(_sim(query, c.get("name","")), _sim(query, c.get("doc_url","")))
+        has_spec = 1.0 if c.get("openapi_url") else 0.2 if c.get("endpoints") else 0.0
+        cors = 0.3 if (c.get("cors") or {}).get("preflight") else 0.0
+        match_bonus = float(c.get("_match_score") or 0.0)
+        return (
+            0.25 * trust
+            + 0.2 * (live * lat_norm)
+            + 0.23 * fit
+            + 0.1 * has_spec
+            + 0.05 * cors
+            + (0.35 * math.log1p(match_bonus) if match_bonus > 0 else 0.0)
+        )
+
+    ranked = sorted(candidates, key=score, reverse=True)
+    for i, c in enumerate(ranked):
+        c["_rank_score"] = round(score(c), 4)
+    return ranked
diff --git a/src/tooluniverse/harvest/searchers.py b/src/tooluniverse/harvest/searchers.py
new file mode 100644
index 00000000..e9daf2e8
--- /dev/null
+++ b/src/tooluniverse/harvest/searchers.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+import os, re, logging, requests, json
+from dataclasses import dataclass
+from typing import List, Optional, Dict, Any
+
+logger = logging.getLogger("HarvestSearch")
+DEFAULT_TIMEOUT = int(os.getenv("HARVEST_TIMEOUT_S", "8"))
+
+@dataclass
+class SearchResult:
+    title: str
+    url: str
+    snippet: str
+    source: str
+
+def _clean_host(url: str) -> str:
+    return re.sub(r"^https?://", "", url or "").split("/")[0].lower()
+
+def _normalize_candidate_url(url: str) -> str:
+    return (url or "").strip()
+
+# ---------------- CKAN adapter ----------------
+def _search_ckan(query: str, rows: int, base_url: str) -> List[SearchResult]:
+    out: List[SearchResult] = []
+    try:
+        r = requests.get(base_url, params={"q": query, "rows": rows}, timeout=DEFAULT_TIMEOUT)
+        r.raise_for_status()
+        payload = r.json()
+        # CKAN payload guard
+        result = (payload or {}).get("result") or {}
+        for pkg in result.get("results", []):
+            title = pkg.get("title") or pkg.get("name") or "CKAN dataset"
+            notes = (pkg.get("notes") or "")[:240]
+            for res in (pkg.get("resources") or []):
+                res_url = _normalize_candidate_url(res.get("url") or "")
+                if not res_url:
+                    continue
+                out.append(SearchResult(title=title, url=res_url, snippet=notes, source=f"ckan:{_clean_host(base_url)}"))
+    except Exception as e:
+        logger.debug("CKAN search failed for %s: %s", base_url, e)
+    return out
+
+CATALOG_ADAPTERS = {
+    "ckan": _search_ckan,
+}
+
+def search_for_apis(query: str, rows: int = 100, catalogs: Optional[List[Dict[str, Any]]] = None) -> List[SearchResult]:
+    """Search across configured catalogs.
+    catalogs: list of dicts, e.g. [{"type": "ckan", "url": "https://.../api/3/action/package_search"}]
+    You can supply this via env HARVEST_CATALOGS='[ ... ]' or pass in directly.
+    """
+    results: List[SearchResult] = []
+    catalogs = catalogs or []
+    for cat in catalogs:
+        ctype = (cat.get("type") or "").lower().strip()
+        url = cat.get("url") or ""
+        if not ctype or not url:
+            continue
+        adapter = CATALOG_ADAPTERS.get(ctype)
+        if not adapter:
+            logger.debug("Unknown catalog type %s, skipping", ctype)
+            continue
+        results.extend(adapter(query=query, rows=rows, base_url=url))
+    return results
diff --git a/src/tooluniverse/harvest/static_catalog.py b/src/tooluniverse/harvest/static_catalog.py
new file mode 100644
index 00000000..83536f94
--- /dev/null
+++ b/src/tooluniverse/harvest/static_catalog.py
@@ -0,0 +1,539 @@
+from __future__ import annotations
+
+import math
+import re
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Set
+from urllib.parse import urlparse
+
+from .domain_policies import trust_score
+from .ranker import rank_candidates
+
+
+# -----------------------------------------------------------------------------
+# Static catalog data
+# -----------------------------------------------------------------------------
+
+RAW_CATALOG: List[Dict[str, object]] = [
+    {
+        "name": "ClinicalTrials.gov Study Fields API",
+        "url": "https://clinicaltrials.gov/api/query/study_fields",
+        "doc_url": "https://clinicaltrials.gov/api/gui/home",
+        "description": "Query structured fields from the ClinicalTrials.gov registry covering study design, enrollment, outcomes, and locations.",
+        "keywords": ["clinical", "trial", "study", "research", "ctgov", "clinicaltrials"],
+        "category": "clinical_trials",
+        "base_score": 0.95,
+        "endpoints": [
+            {"method": "GET", "path": "/api/query/study_fields", "summary": "Query study fields"},
+            {"method": "GET", "path": "/api/query/full_studies", "summary": "Fetch full study records"},
+        ],
+    },
+    {
+        "name": "NCI Clinical Trials API",
+        "url": "https://clinicaltrialsapi.cancer.gov/api/v1/clinical-trials",
+        "doc_url": "https://clinicaltrialsapi.cancer.gov",
+        "description": "REST API exposing cancer clinical trials curated by the National Cancer Institute (NCI) with filters across disease, stage, and therapy.",
+        "keywords": ["clinical", "trial", "oncology", "cancer", "nci", "research"],
+        "category": "clinical_trials",
+        "base_score": 0.88,
+        "endpoints": [
+            {"method": "GET", "path": "/api/v1/clinical-trials", "summary": "Search cancer clinical trials"},
+            {"method": "GET", "path": "/api/v1/diseases", "summary": "List disease terms"},
+        ],
+    },
+    {
+        "name": "FDA OpenFDA Drug Label API",
+        "url": "https://api.fda.gov/drug/label.json",
+        "doc_url": "https://open.fda.gov/apis/drug/label/",
+        "description": "OpenFDA drug labeling information with pharmacology, indications, warnings, and dosage guidance.",
+        "keywords": ["drug", "label", "fda", "pharmaceutical", "medication", "clinical"],
+        "category": "pharmacovigilance",
+        "base_score": 0.6,
+        "endpoints": [
+            {"method": "GET", "path": "/drug/label.json", "summary": "Query drug labeling records"},
+            {"method": "GET", "path": "/drug/event.json", "summary": "Retrieve drug adverse events"},
+        ],
+    },
+    {
+        "name": "FDA OpenFDA Adverse Events API",
+        "url": "https://api.fda.gov/drug/event.json",
+        "doc_url": "https://open.fda.gov/apis/drug/event/",
+        "description": "Adverse event case reports submitted to FDA FAERS with patient outcomes and drug role details.",
+        "keywords": ["adverse", "event", "pharmacovigilance", "drug safety", "faers"],
+        "category": "pharmacovigilance",
+        "base_score": 0.65,
+        "endpoints": [
+            {"method": "GET", "path": "/drug/event.json", "summary": "Search FAERS adverse event data"},
+        ],
+    },
+    {
+        "name": "FDA OpenFDA Device Recall API",
+        "url": "https://api.fda.gov/device/recall.json",
+        "doc_url": "https://open.fda.gov/apis/device/recall/",
+        "description": "Medical device recall records including classification, recall reason, and event dates.",
+        "keywords": ["medical device", "recall", "fda", "safety", "compliance"],
+        "category": "device_safety",
+        "base_score": 0.55,
+        "endpoints": [
+            {"method": "GET", "path": "/device/recall.json", "summary": "Retrieve device recall records"},
+        ],
+    },
+    {
+        "name": "CDC Socrata Open Data API",
+        "url": "https://data.cdc.gov/resource/9mfq-cb36.json",
+        "doc_url": "https://dev.socrata.com/foundry/data.cdc.gov/9mfq-cb36",
+        "description": "CDC curated datasets accessible via the Socrata Open Data API, including COVID-19 cases and vaccinations.",
+        "keywords": ["cdc", "public health", "covid", "vaccination", "socrata", "open data"],
+        "category": "public_health",
+        "base_score": 0.86,
+        "endpoints": [
+            {"method": "GET", "path": "/resource/<dataset>.json", "summary": "Query CDC open datasets"},
+        ],
+    },
+    {
+        "name": "CDC PLACES Community Health API",
+        "url": "https://chronicdata.cdc.gov/resource/cwsq-ngmh.json",
+        "doc_url": "https://dev.socrata.com/foundry/chronicdata.cdc.gov/cwsq-ngmh",
+        "description": "Model-based estimates for chronic disease, health risk factors, and preventive services at local levels; supports community health assessments and dental health overlays.",
+        "keywords": ["community health", "chronic disease", "behavioral health", "cdc", "oral health"],
+        "category": "public_health",
+        "base_score": 0.8,
+        "endpoints": [
+            {"method": "GET", "path": "/resource/cwsq-ngmh.json", "summary": "Retrieve PLACES health estimates"},
+        ],
+    },
+    {
+        "name": "CDC Oral Health Data Portal API",
+        "url": "https://data.cdc.gov/resource/4nhi-4p9m.json",
+        "doc_url": "https://dev.socrata.com/foundry/data.cdc.gov/4nhi-4p9m",
+        "description": "Community oral health indicators including dental visits, sealant prevalence, and fluoridation coverage for dentistry analytics.",
+        "keywords": ["oral health", "dentistry", "dental", "fluoride", "sealant", "cdc"],
+        "category": "dentistry",
+        "base_score": 0.81,
+        "endpoints": [
+            {"method": "GET", "path": "/resource/4nhi-4p9m.json", "summary": "Query oral health indicator records"},
+        ],
+    },
+    {
+        "name": "WHO Global Health Observatory API",
+        "url": "https://ghoapi.azureedge.net/api/Indicator",
+        "doc_url": "https://www.who.int/data/gho/info/gho-odata-api",
+        "description": "World Health Organization indicators covering global health metrics, vaccination, and disease burden.",
+        "keywords": ["who", "global health", "indicator", "vaccination", "disease surveillance"],
+        "category": "global_health",
+        "base_score": 0.87,
+        "endpoints": [
+            {"method": "GET", "path": "/api/Indicator", "summary": "List WHO health indicators"},
+            {"method": "GET", "path": "/api/Indicator?$filter", "summary": "Filter indicators by code"},
+        ],
+    },
+    {
+        "name": "NIH RePORTER Projects API",
+        "url": "https://api.reporter.nih.gov/v2/projects/search",
+        "doc_url": "https://api.reporter.nih.gov/",
+        "description": "NIH-funded research projects with abstracts, funding amounts, and investigator information.",
+        "keywords": ["nih", "grants", "research", "project", "biomedical"],
+        "category": "research_funding",
+        "base_score": 0.83,
+        "endpoints": [
+            {"method": "POST", "path": "/v2/projects/search", "summary": "Search NIH-funded projects"},
+        ],
+    },
+    {
+        "name": "NCBI E-utilities ESummary API",
+        "url": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
+        "doc_url": "https://www.ncbi.nlm.nih.gov/books/NBK25500/",
+        "description": "Programmatic access to NCBI databases including PubMed, nucleotide, protein, and ClinVar content.",
+        "keywords": ["ncbi", "genomics", "pubmed", "sequence", "biomedical"],
+        "category": "genomics",
+        "base_score": 0.84,
+        "endpoints": [
+            {"method": "GET", "path": "/entrez/eutils/esearch.fcgi", "summary": "Search NCBI databases"},
+            {"method": "GET", "path": "/entrez/eutils/esummary.fcgi", "summary": "Retrieve database summaries"},
+        ],
+    },
+    {
+        "name": "Ensembl REST API",
+        "url": "https://rest.ensembl.org/info/ping",
+        "doc_url": "https://rest.ensembl.org",
+        "description": "Genomics REST service for Ensembl data including genes, variants, and comparative genomics with JSON outputs.",
+        "keywords": ["ensembl", "genomics", "variants", "gene", "rest service", "bioinformatics"],
+        "category": "genomics",
+        "base_score": 0.8,
+        "endpoints": [
+            {"method": "GET", "path": "/lookup/id/{id}", "summary": "Lookup Ensembl gene or transcript"},
+            {"method": "GET", "path": "/overlap/region/{species}/{region}", "summary": "Fetch features overlapping a region"},
+        ],
+    },
+    {
+        "name": "SAMHSA Behavioral Health Treatment Services Locator API",
+        "url": "https://findtreatment.samhsa.gov/locator",
+        "doc_url": "https://findtreatment.samhsa.gov/developers",
+        "description": "Behavioral health treatment provider directory with search by service type, payment, and location.",
+        "keywords": ["mental health", "treatment", "behavioral health", "samhsa"],
+        "category": "mental_health",
+        "base_score": 0.81,
+        "endpoints": [
+            {"method": "GET", "path": "/locator", "summary": "Search behavioral health providers"},
+        ],
+    },
+    {
+        "name": "USDA FoodData Central API",
+        "url": "https://api.nal.usda.gov/fdc/v1/foods/search",
+        "doc_url": "https://fdc.nal.usda.gov/api-guide.html",
+        "description": "Nutrient composition data for branded and experimental foods, with search and detail endpoints.",
+        "keywords": ["nutrition", "food", "dietary", "usda", "nutrients"],
+        "category": "nutrition",
+        "base_score": 0.79,
+        "endpoints": [
+            {"method": "POST", "path": "/fdc/v1/foods/search", "summary": "Search foods by keyword"},
+            {"method": "GET", "path": "/fdc/v1/food/{fdcId}", "summary": "Retrieve nutrient profile"},
+        ],
+    },
+    {
+        "name": "CDC Vaccination Coverage API",
+        "url": "https://data.cdc.gov/resource/8xkx-amqh.json",
+        "doc_url": "https://dev.socrata.com/foundry/data.cdc.gov/8xkx-amqh",
+        "description": "US vaccination coverage estimates by vaccine and demographic segment.",
+        "keywords": ["vaccination", "immunization", "cdc", "coverage", "public health"],
+        "category": "vaccination",
+        "base_score": 0.8,
+        "endpoints": [
+            {"method": "GET", "path": "/resource/8xkx-amqh.json", "summary": "Vaccination coverage records"},
+        ],
+    },
+    {
+        "name": "NOAA Climate Data Online API",
+        "url": "https://www.ncdc.noaa.gov/cdo-web/api/v2/datasets",
+        "doc_url": "https://www.ncdc.noaa.gov/cdo-web/webservices/v2",
+        "description": "Climate and weather datasets from NOAA including temperature, precipitation, and extremes for environmental monitoring and early warning systems.",
+        "keywords": ["environment", "environmental", "weather", "climate", "noaa", "meteorology", "monitoring"],
+        "category": "environmental",
+        "base_score": 0.78,
+        "endpoints": [
+            {"method": "GET", "path": "/cdo-web/api/v2/datasets", "summary": "List NOAA datasets"},
+            {"method": "GET", "path": "/cdo-web/api/v2/data", "summary": "Query climate observations"},
+        ],
+    },
+    {
+        "name": "EPA AirNow API",
+        "url": "https://www.airnowapi.org/aq/data/",
+        "doc_url": "https://docs.airnowapi.org/",
+        "description": "Air quality measurements and forecasts for US monitoring stations, including pollutants and AQI, supporting environmental monitoring pipelines.",
+        "keywords": ["air quality", "environment", "environmental", "epa", "pollution", "aqi", "monitoring"],
+        "category": "environmental",
+        "base_score": 0.77,
+        "endpoints": [
+            {"method": "GET", "path": "/aq/data/", "summary": "Retrieve air quality data"},
+        ],
+    },
+    {
+        "name": "Orphanet Rare Disease API",
+        "url": "https://www.orpha.net/OrphAPI/api/Disease",
+        "doc_url": "https://api.orphanet.net/OrphAPI/#!/Disease",
+        "description": "Rare disease catalog with Orpha codes, synonyms, epidemiology, and classification.",
+        "keywords": ["rare disease", "orphanet", "orpha", "genetic", "registry"],
+        "category": "rare_disease",
+        "base_score": 0.76,
+        "endpoints": [
+            {"method": "GET", "path": "/OrphAPI/api/Disease", "summary": "List rare diseases"},
+            {"method": "GET", "path": "/OrphAPI/api/Disease/{OrphaCode}", "summary": "Retrieve disease details"},
+        ],
+    },
+    {
+        "name": "RAREDISEASES.info NIH Service",
+        "url": "https://rarediseases.info.nih.gov/services/v1/diseases",
+        "doc_url": "https://rarediseases.info.nih.gov/developers",
+        "description": "NIH Genetic and Rare Diseases (GARD) API providing disease descriptions, symptoms, and resources.",
+        "keywords": ["rare disease", "nih", "gard", "genetic", "registry"],
+        "category": "rare_disease",
+        "base_score": 0.75,
+        "endpoints": [
+            {"method": "GET", "path": "/services/v1/diseases", "summary": "Search rare diseases"},
+        ],
+    },
+    {
+        "name": "USAFacts COVID-19 API",
+        "url": "https://api.usafacts.org/covid/covid-api/v1/cases",
+        "doc_url": "https://usafacts.org/visualizations/coronavirus-covid-19-spread-map/api/",
+        "description": "County-level COVID-19 cases and deaths in the United States with daily updates.",
+        "keywords": ["covid", "pandemic", "surveillance", "epidemiology"],
+        "category": "pandemic",
+        "base_score": 0.74,
+        "endpoints": [
+            {"method": "GET", "path": "/covid/covid-api/v1/cases", "summary": "Retrieve COVID-19 cases"},
+        ],
+    },
+    {
+        "name": "Global.Health Line List API",
+        "url": "https://covid19-api.global.health/v1/line-list",
+        "doc_url": "https://global.health/documentation/api",
+        "description": "Anonymized global case line lists for pathogen surveillance, including demographics and travel history.",
+        "keywords": ["pandemic", "outbreak", "surveillance", "line list", "global health"],
+        "category": "pandemic",
+        "base_score": 0.73,
+        "endpoints": [
+            {"method": "GET", "path": "/v1/line-list", "summary": "Retrieve outbreak line list"},
+        ],
+    },
+    {
+        "name": "OpenFDA Food Enforcement API",
+        "url": "https://api.fda.gov/food/enforcement.json",
+        "doc_url": "https://open.fda.gov/apis/food/enforcement/",
+        "description": "Food recall enforcement reports with product description, reason, and distribution data.",
+        "keywords": ["food", "recall", "fda", "safety", "enforcement"],
+        "category": "food_safety",
+        "base_score": 0.55,
+        "endpoints": [
+            {"method": "GET", "path": "/food/enforcement.json", "summary": "Search food recall enforcement"},
+        ],
+    },
+    {
+        "name": "USDA National Farmers Market Directory API",
+        "url": "https://search.ams.usda.gov/farmersmarkets/v1/data.svc/zipSearch",
+        "doc_url": "https://www.ams.usda.gov/services/local-regional/food-directories-datasets",
+        "description": "Directory of US farmers markets with location, operation schedule, and services.",
+        "keywords": ["nutrition", "food access", "farmers market", "usda"],
+        "category": "nutrition",
+        "base_score": 0.7,
+        "endpoints": [
+            {"method": "GET", "path": "/farmersmarkets/v1/data.svc/zipSearch", "summary": "Find farmers markets by ZIP"},
+        ],
+    },
+    {
+        "name": "HealthData.gov CKAN Catalog API",
+        "url": "https://healthdata.gov/api/3/action/package_search",
+        "doc_url": "https://healthdata.gov/developer",
+        "description": "Catalog of US Department of Health and Human Services datasets via CKAN API.",
+        "keywords": ["open data", "catalog", "health data", "ckan", "metadata"],
+        "category": "data_catalog",
+        "base_score": 0.82,
+        "endpoints": [
+            {"method": "GET", "path": "/api/3/action/package_search", "summary": "Search dataset catalog"},
+        ],
+    },
+    {
+        "name": "data.gov CKAN Catalog API",
+        "url": "https://catalog.data.gov/api/3/action/package_search",
+        "doc_url": "https://catalog.data.gov/dataset",
+        "description": "US Federal data catalog with metadata across climate, energy, health, and finance.",
+        "keywords": ["open data", "catalog", "federal", "ckan", "metadata"],
+        "category": "data_catalog",
+        "base_score": 0.8,
+        "endpoints": [
+            {"method": "GET", "path": "/api/3/action/package_search", "summary": "Search the federal data catalog"},
+        ],
+    },
+    {
+        "name": "Europe PMC RESTful API",
+        "url": "https://www.ebi.ac.uk/europepmc/webservices/rest/search",
+        "doc_url": "https://europepmc.org/RestfulWebService",
+        "description": "Biomedical literature, grants, and patents from Europe PMC with advanced search syntax.",
+        "keywords": ["literature", "research", "biomedical", "europe pmc", "publications"],
+        "category": "literature",
+        "base_score": 0.78,
+        "endpoints": [
+            {"method": "GET", "path": "/webservices/rest/search", "summary": "Search biomedical literature"},
+        ],
+    },
+    {
+        "name": "OpenAlex Graph API",
+        "url": "https://api.openalex.org/works",
+        "doc_url": "https://docs.openalex.org/api",
+        "description": "Scholarly works, authors, concepts, and institutions graph with filtering for literature discovery and citation analysis.",
+        "keywords": ["literature", "openalex", "scholarly", "citations", "research graph"],
+        "category": "literature",
+        "base_score": 0.77,
+        "endpoints": [
+            {"method": "GET", "path": "/works", "summary": "Search scholarly works"},
+            {"method": "GET", "path": "/authors", "summary": "Browse scholarly authors"},
+        ],
+    },
+]
+
+
+# -----------------------------------------------------------------------------
+# Internal helpers
+# -----------------------------------------------------------------------------
+
+TOKEN_PATTERN = re.compile(r"[a-z0-9]+")
+
+
+def _tokenize(text: str) -> Set[str]:
+    tokens = set(TOKEN_PATTERN.findall((text or "").lower()))
+    enriched: Set[str] = set(tokens)
+    for tok in tokens:
+        if len(tok) <= 2:
+            continue
+        if tok.endswith("ies") and len(tok) > 3:
+            enriched.add(tok[:-3] + "y")
+        if tok.endswith("ing") and len(tok) > 4:
+            enriched.add(tok[:-3])
+        if tok.endswith("al") and len(tok) > 4:
+            enriched.add(tok[:-2])
+        if tok.endswith("s") and len(tok) > 3:
+            enriched.add(tok[:-1])
+    return enriched
+
+
+@dataclass(frozen=True)
+class CatalogRecord:
+    data: Dict[str, object]
+    tokens: Set[str]
+    keyword_tokens: Set[str]
+    base_score: float
+
+
+def _prepare_catalog(raw_items: Iterable[Dict[str, object]]) -> List[CatalogRecord]:
+    prepared: List[CatalogRecord] = []
+    for item in raw_items:
+        entry = deepcopy(item)
+
+        url = str(entry.get("url") or "").strip()
+        if not url:
+            continue
+        parsed = urlparse(url)
+        host = parsed.netloc.lower()
+        base_url = f"{parsed.scheme}://{parsed.netloc}"
+
+        entry.setdefault("host", host)
+        entry.setdefault("base_url", base_url)
+        entry.setdefault("source", "static_catalog")
+        entry.setdefault("doc_url", entry.get("doc_url") or f"{base_url}/")
+        entry.setdefault("health", {"ok": True, "status": 200, "latency_ms": 180, "checked": "static"})
+        entry.setdefault("cors", {"preflight": False})
+        entry.setdefault("trust", trust_score(host))
+
+        keywords = entry.get("keywords") or []
+        if keywords:
+            desc = entry.get("description") or ""
+            kw_text = "; ".join(str(k) for k in keywords)
+            if kw_text and kw_text.lower() not in desc.lower():
+                entry["description"] = f"{desc} (keywords: {kw_text})"
+        keyword_tokens = _tokenize(" ".join(map(str, keywords)))
+        text_tokens = _tokenize(" ".join(
+            str(part) for part in (
+                entry.get("name", ""),
+                entry.get("description", ""),
+                entry.get("category", ""),
+                entry.get("doc_url", ""),
+            )
+        ))
+
+        base_score = float(entry.get("base_score") or 0.0)
+
+        prepared.append(
+            CatalogRecord(
+                data=entry,
+                tokens=text_tokens | keyword_tokens,
+                keyword_tokens=keyword_tokens,
+                base_score=base_score,
+            )
+        )
+
+    return prepared
+
+
+CATALOG: List[CatalogRecord] = _prepare_catalog(RAW_CATALOG)
+
+
+# -----------------------------------------------------------------------------
+# Public harvester interface
+# -----------------------------------------------------------------------------
+
+def _score_entry(tokens: Set[str], record: CatalogRecord) -> float:
+    if not tokens:
+        return record.base_score + 0.5
+
+    keyword_overlap = len(tokens & record.keyword_tokens)
+    text_overlap = len(tokens & record.tokens)
+
+    if keyword_overlap == 0 and text_overlap == 0:
+        return record.base_score * 0.1
+
+    precision = keyword_overlap / (len(tokens) or 1)
+    coverage = (keyword_overlap + text_overlap) / (len(record.tokens) or 1)
+
+    return (
+        2.0 * keyword_overlap
+        + 1.2 * text_overlap
+        + 1.5 * precision
+        + 1.0 * coverage
+        + record.base_score * 0.25
+    )
+
+
+SYNONYM_MAP = {
+    "clinical": ["trial", "research"],
+    "dentistry": ["dental", "oral", "oralhealth"],
+    "dental": ["dentistry", "oral", "oralhealth"],
+    "oral": ["dentistry", "dental", "oralhealth"],
+    "environmental": ["environment", "climate", "monitoring"],
+    "environment": ["environmental", "climate", "air"],
+    "monitoring": ["surveillance", "tracking"],
+    "rare": ["orphan", "orphanet", "genetic"],
+    "disease": ["condition", "illness"],
+    "genomics": ["genomic", "gene", "sequence", "dna"],
+    "genomic": ["genomics", "gene", "dna"],
+    "pandemic": ["outbreak", "surveillance"],
+    "surveillance": ["monitoring", "tracking"],
+    "nutrition": ["food", "diet", "dietary"],
+    "vaccination": ["immunization", "vaccine"],
+    "mental": ["behavioral", "behavior", "psych"],
+    "health": ["healthcare", "publichealth"],
+    "pharmaceutical": ["drug", "medicine"],
+    "adverse": ["safety", "pharmacovigilance"],
+}
+
+
+def harvest(query: str, limit: int = 5, **kwargs) -> List[Dict[str, object]]:
+    """
+    Harvest candidate API endpoints from the static catalog.
+
+    Args:
+        query: Natural language search string.
+        limit: Maximum number of candidates to return.
+        **kwargs: Unused passthrough parameters for compatibility.
+    """
+    limit = max(1, min(int(limit or 5), 50))
+    query = (query or "").strip()
+
+    if not CATALOG:
+        return []
+
+    if not query:
+        top = sorted(CATALOG, key=lambda rec: rec.base_score, reverse=True)[:limit]
+        return [deepcopy(rec.data) for rec in top]
+
+    token_union: Set[str] = _tokenize(query)
+    for token in list(token_union):
+        for syn in SYNONYM_MAP.get(token, []):
+            token_union |= _tokenize(syn)
+
+    scored: List[Dict[str, object]] = []
+    for record in CATALOG:
+        score = _score_entry(token_union, record)
+        if score <= 0 and record.base_score <= 0:
+            continue
+        candidate = deepcopy(record.data)
+        candidate["_match_score"] = round(score, 4)
+        candidate["_match_terms"] = sorted(token_union & record.tokens)
+        scored.append(candidate)
+
+    if not scored:
+        top = sorted(CATALOG, key=lambda rec: rec.base_score, reverse=True)[:limit]
+        return [deepcopy(rec.data) for rec in top]
+
+    preliminary = sorted(scored, key=lambda c: c["_match_score"], reverse=True)[: limit * 3]
+    ranked = rank_candidates(query, preliminary)
+    final = ranked[:limit]
+
+    for cand in final:
+        cand.pop("_match_score", None)
+        cand.pop("_match_terms", None)
+
+    return final
+
+
+__all__ = ["harvest"]
diff --git a/src/tooluniverse/harvest/verifier.py b/src/tooluniverse/harvest/verifier.py
new file mode 100644
index 00000000..2da35df9
--- /dev/null
+++ b/src/tooluniverse/harvest/verifier.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+import os, time, logging, requests
+from typing import Dict, Optional
+
+logger = logging.getLogger("HarvestVerify")
+DEFAULT_TIMEOUT = int(os.getenv("HARVEST_TIMEOUT_S", "8"))
+SIZE_LIMIT = int(os.getenv("HARVEST_MAX_BYTES", "2000000"))
+JSON_ACCEPT = {"Accept": "application/json"}
+
+def _head(url: str, timeout=None):
+    try:
+        return requests.head(url, timeout=timeout or DEFAULT_TIMEOUT, allow_redirects=True)
+    except requests.RequestException:
+        return None
+
+def _health_probe(url: str, timeout=None) -> Dict:
+    t0 = time.time()
+    try:
+        rh = _head(url, timeout)
+        if rh is not None:
+            clen = int(rh.headers.get("Content-Length") or 0)
+            if clen and clen > SIZE_LIMIT:
+                return {"ok": False, "status": rh.status_code, "skipped": f"large({clen})"}
+        r = requests.get(url, timeout=timeout or DEFAULT_TIMEOUT, headers=JSON_ACCEPT)
+        return {"ok": r.status_code < 500, "status": r.status_code, "latency_ms": int((time.time()-t0)*1000), "ctype": r.headers.get("Content-Type","")}
+    except requests.RequestException as e:
+        return {"ok": False, "status": 0, "error": str(e)}
+
+def verify_candidate(result, timeout_s: Optional[int] = None) -> Optional[Dict]:
+    url = (result.url or "").strip()
+    if not url: return None
+    health = _health_probe(url, timeout=timeout_s)
+    return {"name": result.title, "url": url, "health": health, "source": result.source}
diff --git a/src/tooluniverse/tool_navigator_tool.py b/src/tooluniverse/tool_navigator_tool.py
new file mode 100644
index 00000000..1341dd98
--- /dev/null
+++ b/src/tooluniverse/tool_navigator_tool.py
@@ -0,0 +1,110 @@
+from __future__ import annotations
+
+import math
+from typing import Any, Dict, List, Optional
+
+from .execute_function import ToolUniverse
+from .tool_registry import register_tool
+from .vsd_registry import load_catalog
+
+
+def _tokenize(text: str) -> List[str]:
+    return [t for t in (text or "").lower().split() if t]
+
+
+def _score(query_tokens: List[str], name: str, description: str) -> float:
+    haystack = f"{name} {description}".lower()
+    score = 0.0
+    for token in query_tokens:
+        if token in haystack:
+            score += 2.0
+    score += sum(1.0 for token in query_tokens if any(word.startswith(token) for word in haystack.split()))
+    return score
+
+
+def _format_tool(tool: Dict[str, Any]) -> Dict[str, Any]:
+    return {
+        "name": tool.get("name"),
+        "type": tool.get("type"),
+        "description": tool.get("description"),
+        "tool_type": tool.get("tool_type"),
+        "category": tool.get("category"),
+        "source": tool.get("source"),
+    }
+
+
+@register_tool("ToolNavigatorTool")
+class ToolNavigatorTool:
+    """
+    Search ToolUniverse's catalog (built-in + VSD) to help agents discover relevant tools.
+    """
+
+    name = "ToolNavigatorTool"
+    description = "Search ToolUniverse/Navigated catalog for tools matching a query."
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string"},
+            "limit": {"type": "integer", "default": 10, "minimum": 1, "maximum": 50},
+            "categories": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional list of categories to include.",
+            },
+            "include_vsd": {
+                "type": "boolean",
+                "default": True,
+                "description": "Include dynamically registered VSD tools in the search.",
+            },
+        },
+        "required": ["query"],
+        "additionalProperties": False,
+    }
+
+    def __init__(self) -> None:
+        self._tooluniverse = ToolUniverse()
+
+    def _load_base_tools(self) -> List[Dict[str, Any]]:
+        if not getattr(self._tooluniverse, "all_tools", None):
+            self._tooluniverse.load_tools()
+        return list(getattr(self._tooluniverse, "all_tools", []))
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        query = (arguments.get("query") or "").strip()
+        if not query:
+            return {"ok": False, "error": "query is required"}
+
+        limit = int(arguments.get("limit") or 10)
+        include_vsd = bool(arguments.get("include_vsd", True))
+        categories = arguments.get("categories")
+        if categories and not isinstance(categories, list):
+            categories = [categories]
+        categories = [c.lower() for c in categories or []]
+
+        tools = self._load_base_tools()
+        if include_vsd:
+            for cfg in load_catalog().values():
+                tools.append(
+                    {
+                        "name": cfg.get("name"),
+                        "type": "DynamicREST",
+                        "description": (cfg.get("metadata") or {}).get("description"),
+                        "tool_type": "dynamic_rest",
+                        "category": "vsd",
+                        "source": (cfg.get("metadata") or {}).get("source"),
+                    }
+                )
+
+        query_tokens = _tokenize(query)
+        scored: List[tuple[float, Dict[str, Any]]] = []
+        for tool in tools:
+            if categories and (tool.get("category") or "").lower() not in categories:
+                continue
+            score = _score(query_tokens, tool.get("name", ""), tool.get("description", ""))
+            if score > 0:
+                scored.append((score, tool))
+
+        scored.sort(key=lambda item: item[0], reverse=True)
+        best = [_format_tool(tool) | {"score": round(score, 3)} for score, tool in scored[:limit]]
+
+        return {"ok": True, "query": query, "results": best, "total": len(scored)}
diff --git a/src/tooluniverse/tool_registry.py b/src/tooluniverse/tool_registry.py
index eb3b893f..c3f5d141 100644
--- a/src/tooluniverse/tool_registry.py
+++ b/src/tooluniverse/tool_registry.py
@@ -446,3 +446,18 @@ def get_tool_class_lazy(tool_name):
         return _tool_registry.get(tool_name)
 
     return None
+
+# --- VSD / compatibility shims ---
+def get_tool_class(name: str):
+    """
+    Backwards-compatible accessor used by scripts like SampleVDSRun.py.
+    Prefer get_tool_class_lazy(name) internally.
+    """
+    return get_tool_class_lazy(name)
+
+class _RegistryShim:
+    def get_tool_class(self, name: str):
+        return get_tool_class_lazy(name)
+
+# Expose a 'registry' object with get_tool_class, if callers expect it
+registry = _RegistryShim()
\ No newline at end of file
diff --git a/src/tooluniverse/vsd_api_tool.py b/src/tooluniverse/vsd_api_tool.py
new file mode 100644
index 00000000..84a5c525
--- /dev/null
+++ b/src/tooluniverse/vsd_api_tool.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+import os
+import json
+from typing import Dict, Any
+
+from .base_tool import BaseTool
+from .tool_registry import register_tool
+
+# Reuse same storage locations as vsd_tool
+VSD_HOME = os.environ.get("TOOLUNIVERSE_VSD_DIR", os.path.expanduser("~/.tooluniverse/vsd"))
+GENERATED_TOOLS_PATH = os.path.join(VSD_HOME, "generated_tools.json")
+
+os.makedirs(VSD_HOME, exist_ok=True)
+
+
+def _save_tool(tool_spec: Dict[str, Any]) -> None:
+    """Upsert a generated tool spec into the registry file."""
+    tools: list[Dict[str, Any]] = []
+    if os.path.exists(GENERATED_TOOLS_PATH):
+        try:
+            with open(GENERATED_TOOLS_PATH, "r", encoding="utf-8") as f:
+                tools = json.load(f)
+        except Exception:
+            tools = []
+    by_name = {t.get("name"): t for t in tools}
+    by_name[tool_spec.get("name")] = tool_spec
+    with open(GENERATED_TOOLS_PATH, "w", encoding="utf-8") as f:
+        json.dump(list(by_name.values()), f, indent=2)
+
+
+@register_tool("VSDToolBuilder")
+class VSDToolBuilder(BaseTool):
+    """
+    Build and register a usable ToolUniverse tool from a harvested or discovered VSD candidate.
+
+    Input:
+      {
+        "candidate": {
+          "domain": "clinicaltrials.gov",
+          "endpoint": "https://clinicaltrials.gov/api/v2/studies",
+          "license": "CC0",
+          "score": 0.92
+        },
+        "tool_name": "clinicaltrials_search",
+        "description": "Query clinical trials with disease/condition filters",
+        "parameter_overrides": { ... optional JSON Schema ... }
+      }
+
+    Output:
+      {
+        "registered": true,
+        "name": "clinicaltrials_search",
+        "config_path": "/path/to/generated_tools.json"
+      }
+    """
+
+    def run(self, arguments: Dict[str, Any]):
+        if not arguments:
+            return {"error": "Missing arguments"}
+        cand = arguments.get("candidate") or {}
+        tool_name = arguments.get("tool_name")
+        desc = arguments.get("description") or f"VSD tool for {cand.get('domain')}"
+        param_override = arguments.get("parameter_overrides") or {}
+
+        if not tool_name:
+            return {"error": "tool_name is required"}
+        if not cand or not cand.get("endpoint"):
+            return {"error": "candidate with endpoint is required"}
+
+        endpoint = cand.get("endpoint")
+        domain = cand.get("domain", "unknown")
+
+        # Pick implementation type
+        if endpoint.endswith(".graphql") or "graphql" in endpoint:
+            impl_type = "GenericGraphQLTool"
+        elif endpoint.startswith("http"):
+            impl_type = "GenericRESTTool"
+        else:
+            impl_type = "URLHTMLTagTool"
+
+        # Default parameter schema (can be overridden)
+        params = param_override or {
+            "type": "object",
+            "properties": {
+                "query": {"type": "string", "default": ""},
+                "pageSize": {"type": "integer", "default": 10},
+            }
+        }
+
+        tool_spec = {
+            "type": impl_type,
+            "name": tool_name,
+            "description": desc,
+            "fields": {
+                "base_url": endpoint,
+                "method": "GET",
+                "default_params": {}
+            },
+            "parameter": params,
+            "label": ["VSD", cand.get("label") or domain],
+            "vsd": {
+                "domain": domain,
+                "endpoint": endpoint,
+                "license": cand.get("license", "unknown"),
+                "score": cand.get("score"),
+                "registry": cand.get("registry", "catalog"),
+            }
+        }
+
+        # Special case: ClinicalTrials.gov -> add arg_transform
+        if "clinicaltrials.gov" in endpoint and impl_type == "GenericRESTTool":
+            tool_spec["vsd"]["arg_transform"] = "ctgov_time_window"
+
+        _save_tool(tool_spec)
+        return {"registered": True, "name": tool_name, "config_path": GENERATED_TOOLS_PATH}
diff --git a/src/tooluniverse/vsd_catalog.py b/src/tooluniverse/vsd_catalog.py
new file mode 100644
index 00000000..95ec1269
--- /dev/null
+++ b/src/tooluniverse/vsd_catalog.py
@@ -0,0 +1,44 @@
+# src/tooluniverse/vsd_catalog.py
+import os, json
+from pathlib import Path
+from typing import List, Dict, Any
+
+VSD_DIR = Path(os.environ.get("TOOLUNIVERSE_VSD_DIR", Path.home() / ".tooluniverse" / "vsd"))
+ALLOWLIST_PATH = VSD_DIR / "allowlist.json"
+CATALOG_PATH = VSD_DIR / "catalog" / "vsd_catalog_candidates.json"
+
+def load_json(path: Path) -> Any:
+    if not path.exists():
+        return None
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+
+def load_allowlist(seed: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    user = load_json(ALLOWLIST_PATH) or []
+    merged = {e["domain"]: e for e in seed}
+    for e in user:
+        merged[e["domain"]] = {**merged.get(e["domain"], {}), **e}
+    return list(merged.values())
+
+def load_catalog_candidates() -> List[Dict[str, Any]]:
+    data = load_json(CATALOG_PATH) or []
+    # normalize minimal fields and keep only candidates
+    out = []
+    for d in data:
+        if d.get("status") not in (None, "candidate", "approved"):
+            continue
+        out.append({
+            "domain": d.get("domain"),
+            "label": d.get("label") or d.get("domain"),
+            "registry": d.get("registry") or "data.gov",
+            "endpoint": d.get("endpoint"),
+            "license": d.get("license") or "unknown",
+            "trust": float(d.get("trust") or 0.7),
+            "freshness": d.get("freshness") or "",
+            "api_kind": d.get("api_kind") or "rest",
+            "status": d.get("status") or "candidate",
+            "tags": d.get("tags") or [],
+        })
+    return out
diff --git a/src/tooluniverse/vsd_registry.py b/src/tooluniverse/vsd_registry.py
new file mode 100644
index 00000000..83b237f6
--- /dev/null
+++ b/src/tooluniverse/vsd_registry.py
@@ -0,0 +1,91 @@
+from __future__ import annotations
+
+from typing import Any, Dict
+
+from .common_utils import read_json, write_json, vsd_generated_path
+
+
+def _normalize_catalog(data: Any) -> Dict[str, Dict[str, Any]]:
+    catalog: Dict[str, Dict[str, Any]] = {}
+    if not isinstance(data, dict):
+        return catalog
+
+    generated = data.get("generated_tools") if isinstance(data.get("generated_tools"), list) else None
+    if generated is not None:
+        for item in generated:
+            if isinstance(item, dict) and item.get("name"):
+                name = item["name"]
+                catalog[name] = dict(item)
+        return catalog
+
+    for name, cfg in data.items():
+        if not isinstance(cfg, dict):
+            continue
+        entry = dict(cfg)
+        entry.setdefault("name", name)
+        catalog[name] = entry
+    return catalog
+
+
+def load_catalog() -> Dict[str, Dict[str, Any]]:
+    """
+    Load the Verified Source catalog from disk and normalize it
+    to a {name: config} dictionary regardless of historical format.
+    """
+    path = vsd_generated_path()
+    data = read_json(path, {})
+    return _normalize_catalog(data)
+
+
+def save_catalog(catalog: Dict[str, Dict[str, Any]]) -> str:
+    """
+    Persist the catalog to disk as a flat {name: config} mapping.
+    Returns the file path for convenience.
+    """
+    path = vsd_generated_path()
+    # ensure each entry has its name
+    serializable = {name: dict(cfg, name=name) for name, cfg in catalog.items()}
+    write_json(path, serializable)
+    return path
+
+
+def upsert_tool(tool_name: str, cfg: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Insert or update a tool configuration in the catalog and propagate the
+    change to any in-process dynamic registries.
+    """
+    catalog = load_catalog()
+    config = dict(cfg)
+    config.setdefault("name", tool_name)
+    catalog[tool_name] = config
+    save_catalog(catalog)
+
+    # Notify dynamic REST runner (best-effort, optional import)
+    try:
+        from .dynamic_rest_runner import upsert_generated_tool  # type: ignore
+
+        upsert_generated_tool(tool_name, config)
+    except Exception:
+        pass
+
+    return config
+
+
+def remove_tool(tool_name: str) -> bool:
+    """
+    Remove a tool from the catalog. Returns True if a tool was removed.
+    """
+    catalog = load_catalog()
+    if tool_name not in catalog:
+        return False
+    del catalog[tool_name]
+    save_catalog(catalog)
+
+    try:
+        from .dynamic_rest_runner import remove_generated_tool  # type: ignore
+
+        remove_generated_tool(tool_name)
+    except Exception:
+        pass
+
+    return True
diff --git a/src/tooluniverse/vsd_tool.py b/src/tooluniverse/vsd_tool.py
new file mode 100644
index 00000000..b765f8fe
--- /dev/null
+++ b/src/tooluniverse/vsd_tool.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+from typing import Any, Dict
+
+from .tool_registry import register_tool
+from .vsd_registry import load_catalog, save_catalog, upsert_tool
+from .dynamic_rest_runner import refresh_generated_registry, remove_generated_tool
+from .vsd_utils import build_config, probe_config, stamp_metadata
+
+
+class VerifiedSourceRegisterTool:
+    name = "VerifiedSourceRegisterTool"
+    description = "Register a DynamicREST tool in the verified-source directory"
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "tool_name": {"type": "string"},
+            "tool_type": {"type": "string", "default": "dynamic_rest"},
+            "candidate": {"type": "object"},
+            "default_params": {"type": "object"},
+            "default_headers": {"type": "object"},
+            "force": {"type": "boolean", "default": False},
+        },
+        "required": ["tool_name", "candidate"],
+    }
+
+    def __call__(
+        self,
+        tool_name: str,
+        candidate: Dict[str, Any],
+        tool_type: str = "dynamic_rest",
+        default_params: Dict[str, Any] | None = None,
+        default_headers: Dict[str, Any] | None = None,
+        force: bool = False,
+    ) -> Dict[str, Any]:
+        if not tool_name:
+            raise ValueError("tool_name is required")
+
+        cfg = build_config(
+            candidate or {},
+            tool_type=tool_type,
+            default_params=default_params,
+            default_headers=default_headers,
+        )
+
+        probe = probe_config(cfg)
+        stamp_metadata(cfg, probe)
+
+        if not probe.get("ok") and not force:
+            return {
+                "registered": False,
+                "name": tool_name,
+                "error": "Endpoint validation failed",
+                "test": probe,
+                "suggestion": "Provide default_params/default_headers or retry with force=True after ensuring credentials.",
+            }
+
+        cfg = upsert_tool(tool_name, cfg)
+        refresh_generated_registry()
+
+        return {"registered": True, "name": tool_name, "config": cfg}
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        return self.__call__(
+            tool_name=arguments.get("tool_name"),
+            candidate=arguments.get("candidate", {}),
+            tool_type=arguments.get("tool_type", "dynamic_rest"),
+            default_params=arguments.get("default_params"),
+            default_headers=arguments.get("default_headers"),
+            force=bool(arguments.get("force")),
+        )
+
+
+class VerifiedSourceDiscoveryTool:
+    name = "VerifiedSourceDiscoveryTool"
+    description = "Return the Verified-Source catalog."
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        catalog = load_catalog()
+        return {"ok": True, "tools": list(catalog.values())}
+
+
+class VerifiedSourceRemoveTool:
+    name = "VerifiedSourceRemoveTool"
+    description = "Remove a generated tool from the Verified-Source catalog."
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "tool_name": {"type": "string"},
+        },
+        "required": ["tool_name"],
+    }
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        tool_name = arguments.get("tool_name")
+        if not tool_name:
+            return {"removed": False, "error": "tool_name is required"}
+        catalog = load_catalog()
+        if tool_name not in catalog:
+            return {"removed": False, "error": f"Unknown tool '{tool_name}'"}
+        del catalog[tool_name]
+        save_catalog(catalog)
+        remove_generated_tool(tool_name)
+        return {"removed": True, "name": tool_name}
+
+
+def register(server):
+    register_tool(VerifiedSourceRegisterTool.name, VerifiedSourceRegisterTool)
+    register_tool(VerifiedSourceDiscoveryTool.name, VerifiedSourceDiscoveryTool)
+    register_tool(VerifiedSourceRemoveTool.name, VerifiedSourceRemoveTool)
+
+    server.add_tool(VerifiedSourceRegisterTool.name, VerifiedSourceRegisterTool())
+    server.add_tool(VerifiedSourceDiscoveryTool.name, VerifiedSourceDiscoveryTool())
+    server.add_tool(VerifiedSourceRemoveTool.name, VerifiedSourceRemoveTool())
+    refresh_generated_registry()
diff --git a/src/tooluniverse/vsd_utils.py b/src/tooluniverse/vsd_utils.py
new file mode 100644
index 00000000..a30c12dd
--- /dev/null
+++ b/src/tooluniverse/vsd_utils.py
@@ -0,0 +1,212 @@
+from __future__ import annotations
+
+import time
+from copy import deepcopy
+from typing import Any, Dict
+
+import requests
+
+# ------------------------------------------------------------------------------
+# Host-specific overrides and requirements
+# ------------------------------------------------------------------------------
+
+HOST_OVERRIDES: Dict[str, Dict[str, Any]] = {
+    # Ensembl requires a concrete resource; expose the JSON heartbeat by default.
+    "rest.ensembl.org": {
+        "endpoint": "https://rest.ensembl.org/info/ping",
+        "default_headers": {"Accept": "application/json"},
+        "notes": "Ensembl REST base requires explicit resource. '/info/ping' provides a JSON heartbeat.",
+    },
+    "api.fda.gov": {
+        "default_params": {"limit": 5},
+        "default_headers": {"Accept": "application/json"},
+    },
+    "data.cdc.gov": {
+        "default_params": {"$limit": 5},
+        "default_headers": {"Accept": "application/json"},
+    },
+}
+
+HOST_REQUIREMENTS: Dict[str, Dict[str, Any]] = {
+    "api.nal.usda.gov": {
+        "requires_api_key": True,
+        "notes": "USDA FoodData Central requires an api_key query parameter.",
+    },
+    "www.ncdc.noaa.gov": {
+        "requires_api_key": True,
+        "notes": "NOAA CDO API requires a token header. See https://www.ncdc.noaa.gov/cdo-web/webservices/v2",
+        "default_headers": {"token": ""},
+    },
+    "clinicaltrialsapi.cancer.gov": {
+        "requires_api_key": True,
+        "notes": "ClinicalTrials API requires authenticated access for JSON responses.",
+    },
+    "findtreatment.samhsa.gov": {
+        "requires_manual_params": True,
+        "notes": "SAMHSA locator needs query parameters (e.g., state, lat/long) to return JSON.",
+    },
+}
+
+
+# ------------------------------------------------------------------------------
+# Helpers
+# ------------------------------------------------------------------------------
+
+def _derive_endpoint(candidate: Dict[str, Any]) -> str:
+    endpoint = candidate.get("endpoint") or candidate.get("url")
+    if endpoint:
+        return str(endpoint)
+
+    base_url = candidate.get("base_url")
+    routes = candidate.get("endpoints") or []
+    if base_url and isinstance(routes, list) and routes:
+        first = routes[0]
+        path = str(first.get("path") or "/")
+        if not base_url.endswith("/") and not path.startswith("/"):
+            return f"{base_url}/{path}"
+        if base_url.endswith("/") and path.startswith("/"):
+            return f"{base_url.rstrip('/')}{path}"
+        return f"{base_url}{path}"
+
+    if base_url:
+        return str(base_url)
+
+    raise ValueError("candidate.endpoint or candidate.url is required")
+
+
+def _apply_overrides(candidate: Dict[str, Any], cfg: Dict[str, Any]) -> None:
+    host = (candidate.get("host") or "").lower()
+
+    overrides = HOST_OVERRIDES.get(host)
+    if overrides:
+        if overrides.get("endpoint"):
+            cfg["endpoint"] = overrides["endpoint"]
+        if overrides.get("default_params"):
+            cfg.setdefault("default_params", {}).update(overrides["default_params"])
+        if overrides.get("default_headers"):
+            cfg.setdefault("default_headers", {}).update(overrides["default_headers"])
+        if overrides.get("notes"):
+            cfg.setdefault("metadata", {}).setdefault("notes", []).append(overrides["notes"])
+
+    requirements = HOST_REQUIREMENTS.get(host)
+    if requirements:
+        meta = cfg.setdefault("metadata", {})
+        meta.setdefault("requirements", {}).update(
+            {
+                key: value
+                for key, value in requirements.items()
+                if key not in {"default_headers"}
+            }
+        )
+        if requirements.get("default_headers"):
+            cfg.setdefault("default_headers", {}).update(requirements["default_headers"])
+
+
+# ------------------------------------------------------------------------------
+# Public helpers used by VSD tools
+# ------------------------------------------------------------------------------
+
+def build_config(
+    candidate: Dict[str, Any],
+    tool_type: str = "dynamic_rest",
+    default_params: Dict[str, Any] | None = None,
+    default_headers: Dict[str, Any] | None = None,
+) -> Dict[str, Any]:
+    """
+    Produce a DynamicREST-style configuration dictionary from a harvest candidate.
+    """
+    endpoint = _derive_endpoint(candidate)
+    method = str(candidate.get("method") or candidate.get("http_method") or "GET").upper()
+    merged_params = deepcopy(candidate.get("default_params") or candidate.get("params") or {})
+    merged_headers = deepcopy(candidate.get("default_headers") or candidate.get("headers") or {})
+
+    cfg: Dict[str, Any] = {
+        "type": tool_type,
+        "endpoint": endpoint,
+        "method": method,
+        "default_params": merged_params,
+        "default_headers": merged_headers,
+        "auth": candidate.get("auth") or {"type": "none"},
+        "description": candidate.get("description") or "",
+        "tool_type": candidate.get("tool_type") or "dynamic_rest",
+        "metadata": {
+            "source": candidate.get("source"),
+            "trust": candidate.get("trust"),
+            "health": candidate.get("health"),
+            "doc_url": candidate.get("doc_url"),
+            "description": candidate.get("description"),
+            "host": candidate.get("host"),
+        },
+        "vsd": candidate,
+    }
+
+    response_key = candidate.get("response_key")
+    if response_key:
+        cfg["response_key"] = response_key
+
+    if default_params:
+        cfg["default_params"].update(default_params)
+    if default_headers:
+        cfg["default_headers"].update(default_headers)
+
+    _apply_overrides(candidate, cfg)
+
+    return cfg
+
+
+def probe_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Execute a lightweight HTTP request to validate the generated configuration.
+    Returns diagnostic information including HTTP status and a JSON snippet if available.
+    """
+    url = cfg.get("endpoint")
+    method = (cfg.get("method") or "GET").upper()
+    params = deepcopy(cfg.get("default_params") or {})
+    headers = deepcopy(cfg.get("default_headers") or {})
+    headers.setdefault("Accept", "application/json")
+
+    try:
+        if method == "GET":
+            resp = requests.get(url, params=params, headers=headers, timeout=20)
+        else:
+            resp = requests.request(method, url, json=params, headers=headers, timeout=20)
+    except Exception as exc:
+        return {"ok": False, "error": str(exc), "stage": "request"}
+
+    content_type = resp.headers.get("Content-Type", "")
+    preview = resp.text[:400] if resp.text else ""
+    sample = None
+    has_json = False
+
+    if "json" in content_type.lower():
+        try:
+            payload = resp.json()
+            has_json = True
+            if isinstance(payload, list):
+                sample = payload[:1]
+            elif isinstance(payload, dict):
+                sample = {k: payload[k] for i, k in enumerate(payload) if i < 5}
+            else:
+                sample = payload
+        except Exception:
+            has_json = False
+
+    status_ok = resp.status_code < 400
+
+    return {
+        "ok": bool(status_ok and (has_json or "json" in content_type.lower())),
+        "status": resp.status_code,
+        "content_type": content_type,
+        "has_json": has_json,
+        "sample": sample,
+        "preview": preview,
+    }
+
+
+def stamp_metadata(cfg: Dict[str, Any], probe: Dict[str, Any]) -> None:
+    """
+    Update metadata timestamps and probe results on a configuration dictionary.
+    """
+    metadata = cfg.setdefault("metadata", {})
+    metadata["registered_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ")
+    metadata["last_test"] = probe

From a11fcc23657d88be64b5e3e73f4d4362a44371fe Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Sun, 26 Oct 2025 19:06:47 -0700
Subject: [PATCH 2/8] Add VSD dynamic REST stack and Harvest helpers

---
 README.md                                     |  20 +
 scripts/medlog_stub_server.py                 | 151 ++++
 scripts/run_full_demo.py                      | 704 ++++++++++++++++++
 src/tooluniverse/__init__.py                  |   1 +
 src/tooluniverse/candidate_tester_tool.py     |  41 +-
 src/tooluniverse/data/medlog_tools.json       | 134 ++++
 src/tooluniverse/data/medtok_mcp_tools.json   |  11 +
 src/tooluniverse/data/medtok_tools.json       | 134 ++++
 src/tooluniverse/default_config.py            |   5 +
 src/tooluniverse/dynamic_rest_runner.py       | 194 +++++
 src/tooluniverse/logging_config.py            |  12 +-
 src/tooluniverse/medlog_tool.py               | 143 ++++
 src/tooluniverse/medtok_tool.py               | 122 +++
 src/tooluniverse/utils.py                     |   2 +-
 src/tooluniverse/vsd_tool.py                  | 173 ++++-
 src/tooluniverse/vsd_utils.py                 |  66 +-
 tests/integration/test_medtok_medlog_tools.py | 282 +++++++
 17 files changed, 2138 insertions(+), 57 deletions(-)
 create mode 100644 scripts/medlog_stub_server.py
 create mode 100644 scripts/run_full_demo.py
 create mode 100644 src/tooluniverse/data/medlog_tools.json
 create mode 100644 src/tooluniverse/data/medtok_mcp_tools.json
 create mode 100644 src/tooluniverse/data/medtok_tools.json
 create mode 100644 src/tooluniverse/dynamic_rest_runner.py
 create mode 100644 src/tooluniverse/medlog_tool.py
 create mode 100644 src/tooluniverse/medtok_tool.py
 create mode 100644 tests/integration/test_medtok_medlog_tools.py

diff --git a/README.md b/README.md
index 7ab04aec..3ecf4433 100644
--- a/README.md
+++ b/README.md
@@ -232,6 +232,26 @@ Our comprehensive documentation covers everything from quick start to advanced w
 - **[Adding Tools Tutorial](https://zitniklab.hms.harvard.edu/ToolUniverse/tutorials/addtools/Adding_Tools_Tutorial.html)**: Step-by-step tool addition guide
 - **[MCP Tool Registration](https://zitniklab.hms.harvard.edu/ToolUniverse/tutorials/addtools/mcp_tool_registration_en.html)**: Register tools via MCP
 
+### MedTok + MedLog Integrations
+
+ToolUniverse now ships with first-class support for the MedTok tokenizer service and the MedLog reference collector/FHIR bridge.
+
+- **MedTok REST tools** (`tool_type=["medtok"]`) expose `/tokenize`, `/embed`, `/nearest_neighbors`, `/map_text_to_code`, `/search_text`, and `/codes/{system}/{code}`. Point them at a running service by setting `MEDTOK_BASE_URL` (defaults to `http://localhost:8000`).
+- **MedTok MCP auto-loader** (`tool_type=["medtok_mcp_auto_loader"]`) can register tools from the FastMCP wrapper. Set `MEDTOK_MCP_SERVER_HOST` to the host running the `medtok_tool.py` MCP server.
+- **MedLog collector + FHIR tools** (`tool_type=["medlog"]`) wrap the reference implementation's REST APIs. Configure the collectors' endpoints with `MEDLOG_COLLECTOR_BASE_URL` (default `http://localhost:7001`) and `MEDLOG_FHIR_BASE_URL` (default `http://localhost:7003`).
+
+See `tests/integration/test_medtok_medlog_tools.py` for end-to-end examples that start the services, invoke the tools, and validate responses.
+
+### End-to-End Demo Script
+
+To launch the reference services and exercise the toolchain automatically, run:
+
+```bash
+python scripts/run_full_demo.py          # adds -h for options
+```
+
+The script starts MedTok + MedLog locally, runs representative tool calls (including optional external APIs like InterPro, KEGG, IUCN, JASPAR, MarineSpecies, cBioPortal, and Phenome Jax), and prints a success/failure summary.
+
 ### 📚 API Reference
 - **[API Directory](https://zitniklab.hms.harvard.edu/ToolUniverse/api/modules.html)**: Complete module listing
 - **[Core Modules](https://zitniklab.hms.harvard.edu/ToolUniverse/api/tooluniverse.html)**: Main ToolUniverse class and utilities
diff --git a/scripts/medlog_stub_server.py b/scripts/medlog_stub_server.py
new file mode 100644
index 00000000..700245fa
--- /dev/null
+++ b/scripts/medlog_stub_server.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+"""
+Lightweight MedLog stub servers for local demos.
+
+Run the collector:
+    python scripts/medlog_stub_server.py --mode collector --host 127.0.0.1 --port 8911
+
+Run the FHIR bridge:
+    python scripts/medlog_stub_server.py --mode fhir --host 127.0.0.1 --port 8912
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import threading
+import time
+from typing import Dict
+
+import uvicorn
+from fastapi import FastAPI, HTTPException
+
+
+STORE: Dict[str, Dict] = {}
+STORE_LOCK = threading.Lock()
+
+
+def build_collector_app() -> FastAPI:
+    app = FastAPI(title="MedLog Collector (Stub)", version="0.1.0")
+
+    @app.post("/medlog/events/init")
+    def init(payload: dict):
+        header = payload.get("header") or {}
+        event_id = header.get("event_id")
+        if not event_id:
+            raise HTTPException(400, "event_id required")
+        record = {
+            "header": header,
+            "model_instance": payload.get("model_instance", {}),
+            "user_identity": payload.get("user_identity", {}),
+            "target_identity": payload.get("target_identity"),
+            "inputs": payload.get("inputs"),
+            "retention_tier": payload.get("retention_tier", "steady"),
+            "fragments": [],
+        }
+        with STORE_LOCK:
+            STORE[event_id] = record
+        return {"status": "ok", "event_id": event_id}
+
+    @app.post("/medlog/events/{event_id}/append")
+    def append(event_id: str, fragment: dict):
+        with STORE_LOCK:
+            record = STORE.get(event_id)
+            if record is None:
+                raise HTTPException(404, "event not found")
+            record["fragments"].append(fragment)
+        return {"status": "ok", "event_id": event_id}
+
+    @app.get("/medlog/events/{event_id}/prov")
+    def prov(event_id: str):
+        with STORE_LOCK:
+            record = STORE.get(event_id)
+            if record is None:
+                raise HTTPException(404, "event not found")
+            header = record["header"]
+        return {"event_id": event_id, "provenance": {"header": header}}
+
+    @app.post("/query")
+    def query(body: dict):
+        run_id = body.get("run_id")
+        event_id = body.get("event_id")
+        limit = body.get("limit", 50)
+        results = []
+        with STORE_LOCK:
+            for eid, record in STORE.items():
+                header = record["header"]
+                if event_id and event_id != eid:
+                    continue
+                if run_id and header.get("run_id") != run_id:
+                    continue
+                results.append({"event_id": eid, "header": header})
+                if len(results) >= limit:
+                    break
+        return {"count": len(results), "results": results}
+
+    @app.post("/export/parquet")
+    def export():
+        return {"status": "ok", "outdir": "/tmp/parquet"}
+
+    return app
+
+
+def build_fhir_app() -> FastAPI:
+    app = FastAPI(title="MedLog FHIR Stub", version="0.1.0")
+
+    def bundle(records):
+        return {
+            "resourceType": "Bundle",
+            "type": "collection",
+            "entry": [
+                {
+                    "resource": {
+                        "resourceType": "Observation",
+                        "id": record["header"]["event_id"],
+                        "status": "final",
+                    }
+                }
+                for record in records
+            ],
+        }
+
+    @app.get("/bundle/{event_id}")
+    def bundle_event(event_id: str):
+        with STORE_LOCK:
+            record = STORE.get(event_id)
+        if record is None:
+            raise HTTPException(404, "event not found")
+        return bundle([record])
+
+    @app.get("/bundle/run/{run_id}")
+    def bundle_run(run_id: str):
+        with STORE_LOCK:
+            records = [
+                record
+                for record in STORE.values()
+                if record["header"].get("run_id") == run_id
+            ]
+        if not records:
+            raise HTTPException(404, "run not found")
+        return bundle(records)
+
+    return app
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--mode", choices=["collector", "fhir"], required=True)
+    parser.add_argument("--host", default=os.getenv("MEDLOG_HOST", "127.0.0.1"))
+    parser.add_argument("--port", type=int, default=int(os.getenv("MEDLOG_PORT", 0)) or 0)
+    args = parser.parse_args()
+
+    if args.port == 0:
+        args.port = 8911 if args.mode == "collector" else 8912
+
+    app = build_collector_app() if args.mode == "collector" else build_fhir_app()
+    print(f"Starting MedLog {args.mode} stub on {args.host}:{args.port}")
+    uvicorn.run(app, host=args.host, port=args.port, log_level="info")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/run_full_demo.py b/scripts/run_full_demo.py
new file mode 100644
index 00000000..b06cc6c5
--- /dev/null
+++ b/scripts/run_full_demo.py
@@ -0,0 +1,704 @@
+#!/usr/bin/env python
+"""
+End-to-end ToolUniverse demo runner.
+
+This script bootstraps the MedTok and MedLog reference services locally, points
+ToolUniverse at them, and exercises a curated set of tools (MedTok, MedLog, and
+several public data tools such as InterPro, KEGG, IUCN, JASPAR, MarineSpecies,
+cBioPortal, Phenome Jax). It prints friendly status updates and reports any
+failures at the end.
+
+Usage:
+    python scripts/run_full_demo.py
+
+Optional flags:
+    --skip-network-tools   Skip external API tools (InterPro, KEGG, etc.).
+    --medtok-host HOST     Override MedTok host (default 127.0.0.1).
+    --medtok-port PORT     Override MedTok port (default 8910).
+    --medlog-host HOST     Override MedLog host (default 127.0.0.1).
+    --collector-port PORT  Override MedLog collector port (default 8911).
+    --fhir-port PORT       Override MedLog FHIR port (default 8912).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import tempfile
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
+
+import requests
+import uvicorn
+from fastapi import FastAPI, HTTPException
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SRC_PATH = REPO_ROOT / "src"
+if str(SRC_PATH) not in sys.path:
+    sys.path.insert(0, str(SRC_PATH))
+
+from tooluniverse.execute_function import ToolUniverse
+
+MEDTOK_ROOT = REPO_ROOT.parent / "MedTok-FHIR-Starter"
+MEDLOG_ROOT = REPO_ROOT.parent / "medlog-reference"
+
+
+class ServerHandle:
+    """Run a FastAPI app in a background thread via uvicorn."""
+
+    def __init__(self, app: FastAPI, host: str, port: int):
+        config = uvicorn.Config(app, host=host, port=port, log_level="error", lifespan="off")
+        self.server = uvicorn.Server(config)
+        self.thread = threading.Thread(target=self.server.run, daemon=True)
+
+    def start(self) -> None:
+        self.thread.start()
+        while not self.server.started:
+            time.sleep(0.05)
+
+    def stop(self) -> None:
+        self.server.should_exit = True
+        self.thread.join(timeout=5)
+
+
+def _import_module_typed(module_path: Path):
+    import importlib.util
+
+    spec = importlib.util.spec_from_file_location(module_path.stem, module_path)
+    module = importlib.util.module_from_spec(spec)
+    assert spec and spec.loader
+    spec.loader.exec_module(module)
+    return module
+
+
+def _service_is_up(base_url: str, path: str, ok_statuses: Optional[List[int]] = None) -> bool:
+    try:
+        resp = requests.get(f"{base_url}{path}", timeout=2)
+        if ok_statuses is None:
+            return resp.status_code < 500
+        return resp.status_code in ok_statuses
+    except requests.RequestException:
+        return False
+
+
+def start_medtok(host: str, port: int):
+    """Start MedTok FastAPI service and return context info."""
+    service_path = MEDTOK_ROOT / "services" / "medtok_service"
+    if str(service_path) not in sys.path:
+        sys.path.insert(0, str(service_path))
+
+    base_url = os.environ.get("MEDTOK_BASE_URL") or f"http://{host}:{port}"
+    if _service_is_up(base_url, "/health", ok_statuses=[200]):
+        os.environ["MEDTOK_BASE_URL"] = base_url
+        print(f"MedTok already running at {base_url}, reusing existing instance.")
+        return {"server": None, "temp_config": None, "sys_path": str(service_path), "started": False}
+
+    config_path = MEDTOK_ROOT / "config" / "medtok_config.json"
+    config_data = json.loads(config_path.read_text(encoding="utf-8"))
+    config_data["code_metadata_path"] = str(MEDTOK_ROOT / "samples" / "code_metadata.csv")
+    config_data["graph_edges_path"] = str(MEDTOK_ROOT / "samples" / "code_graph_edges.csv")
+
+    tmp_config = tempfile.NamedTemporaryFile("w", suffix="_medtok_config.json", delete=False)
+    json.dump(config_data, tmp_config)
+    tmp_config.flush()
+    tmp_config.close()
+    os.environ["MEDTOK_CONFIG"] = tmp_config.name
+
+    module = _import_module_typed(service_path / "app.py")
+    module.MAPPING_CSV = str(MEDTOK_ROOT / "samples" / "code_mapping.csv")
+    app = module.app
+
+    server = ServerHandle(app, host, port)
+    server.start()
+    os.environ["MEDTOK_BASE_URL"] = f"http://{host}:{port}"
+
+    return {
+        "server": server,
+        "temp_config": tmp_config.name,
+        "sys_path": str(service_path),
+        "started": True,
+    }
+
+
+def _build_medlog_collector(store: Dict[str, Dict]):
+    app = FastAPI()
+
+    @app.post("/medlog/events/init")
+    def init(payload: dict):
+        header = payload.get("header") or {}
+        event_id = header.get("event_id")
+        if not event_id:
+            raise HTTPException(400, "event_id required")
+        record = {
+            "header": header,
+            "model_instance": payload.get("model_instance", {}),
+            "user_identity": payload.get("user_identity", {}),
+            "target_identity": payload.get("target_identity"),
+            "inputs": payload.get("inputs"),
+            "retention_tier": payload.get("retention_tier", "steady"),
+            "fragments": [],
+        }
+        store[event_id] = record
+        return {"status": "ok", "event_id": event_id}
+
+    @app.post("/medlog/events/{event_id}/append")
+    def append(event_id: str, fragment: dict):
+        record = store.get(event_id)
+        if record is None:
+            raise HTTPException(404, "event not found")
+        record["fragments"].append(fragment)
+        return {"status": "ok", "event_id": event_id}
+
+    @app.get("/medlog/events/{event_id}/prov")
+    def prov(event_id: str):
+        record = store.get(event_id)
+        if record is None:
+            raise HTTPException(404, "event not found")
+        return {"event_id": event_id, "provenance": {"header": record["header"]}}
+
+    @app.post("/query")
+    def query(body: dict):
+        run_id = body.get("run_id")
+        event_id = body.get("event_id")
+        limit = body.get("limit", 50)
+        matches = []
+        for eid, record in store.items():
+            header = record["header"]
+            if event_id and event_id != eid:
+                continue
+            if run_id and header.get("run_id") != run_id:
+                continue
+            matches.append({"event_id": eid, "header": header})
+            if len(matches) >= limit:
+                break
+        return {"count": len(matches), "results": matches}
+
+    @app.post("/export/parquet")
+    def export():
+        return {"status": "ok", "outdir": "/tmp/parquet"}
+
+    return app
+
+
+def _build_medlog_fhir(store: Dict[str, Dict]):
+    app = FastAPI()
+
+    def _bundle(records):
+        return {
+            "resourceType": "Bundle",
+            "type": "collection",
+            "entry": [
+                {
+                    "resource": {
+                        "resourceType": "Observation",
+                        "id": record["header"]["event_id"],
+                        "status": "final",
+                    }
+                }
+                for record in records
+            ],
+        }
+
+    @app.get("/bundle/{event_id}")
+    def bundle_event(event_id: str):
+        record = store.get(event_id)
+        if record is None:
+            raise HTTPException(404, "event not found")
+        return _bundle([record])
+
+    @app.get("/bundle/run/{run_id}")
+    def bundle_run(run_id: str):
+        records = [
+            record
+            for record in store.values()
+            if record["header"].get("run_id") == run_id
+        ]
+        if not records:
+            raise HTTPException(404, "run not found")
+        return _bundle(records)
+
+    return app
+
+
+def start_medlog(host: str, collector_port: int, fhir_port: int):
+    store: Dict[str, Dict] = {}
+    collector_app = _build_medlog_collector(store)
+    fhir_app = _build_medlog_fhir(store)
+
+    collector_url = os.environ.get("MEDLOG_COLLECTOR_BASE_URL") or f"http://{host}:{collector_port}"
+    fhir_url = os.environ.get("MEDLOG_FHIR_BASE_URL") or f"http://{host}:{fhir_port}"
+
+    collector_server = None
+    fhir_server = None
+
+    if _service_is_up(collector_url, "/"):
+        print(f"MedLog collector already running at {collector_url}, reusing.")
+    else:
+        collector_server = ServerHandle(collector_app, host, collector_port)
+        collector_server.start()
+
+    if _service_is_up(fhir_url, "/bundle/test"):
+        print(f"MedLog FHIR service already running at {fhir_url}, reusing.")
+    else:
+        fhir_server = ServerHandle(fhir_app, host, fhir_port)
+        fhir_server.start()
+
+    os.environ["MEDLOG_COLLECTOR_BASE_URL"] = f"http://{host}:{collector_port}"
+    os.environ["MEDLOG_FHIR_BASE_URL"] = f"http://{host}:{fhir_port}"
+
+    return {"collector": collector_server, "fhir": fhir_server, "started": bool(collector_server or fhir_server)}
+
+
+def stop_medtok(ctx: Dict[str, str]):
+    if ctx.get("server"):
+        ctx["server"].stop()
+    if ctx.get("started"):
+        os.environ.pop("MEDTOK_BASE_URL", None)
+        os.environ.pop("MEDTOK_CONFIG", None)
+        temp_config = ctx.get("temp_config")
+        if temp_config:
+            try:
+                os.remove(temp_config)
+            except OSError:
+                pass
+    sys_path = ctx.get("sys_path")
+    if sys_path:
+        try:
+            sys.path.remove(sys_path)
+        except ValueError:
+            pass
+
+
+def stop_medlog(ctx: Dict[str, ServerHandle]):
+    if ctx.get("collector"):
+        ctx["collector"].stop()
+    if ctx.get("fhir"):
+        ctx["fhir"].stop()
+    if ctx.get("started"):
+        os.environ.pop("MEDLOG_COLLECTOR_BASE_URL", None)
+        os.environ.pop("MEDLOG_FHIR_BASE_URL", None)
+
+
+def preview_json(payload: Any, limit: int = 240) -> str:
+    """Return a compact preview of a payload for console logging."""
+    try:
+        text = json.dumps(payload, indent=2, ensure_ascii=False)
+    except TypeError:
+        text = str(payload)
+    text = text.strip()
+    if len(text) > limit:
+        return text[:limit].rstrip() + "..."
+    return text
+
+
+def call_tool(tu: ToolUniverse, name: str, **kwargs):
+    """Call a tool and handle ToolUniverse-specific errors."""
+    print(f"---> Calling {name} with {kwargs}")
+    try:
+        response = getattr(tu.tools, name)(**kwargs)
+        print(f"[OK] {name} succeeded")
+        return True, response
+    except Exception as exc:  # pylint: disable=broad-except
+        print(f"[FAIL] {name} failed: {exc}")
+        return False, str(exc)
+
+
+def run_medlog_demo(tu: ToolUniverse) -> List[Dict[str, str]]:
+    results = []
+    header = {
+        "event_id": "evt-demo-1",
+        "run_id": "run-demo-1",
+        "timestamp": "2025-01-01T00:00:00Z",
+    }
+    model_instance = {"model": "demo", "version": "1.0"}
+    user_identity = {"name": "Dr. Example"}
+    steps = [
+        (
+            "MedLog_init_event",
+            dict(header=header, model_instance=model_instance, user_identity=user_identity),
+            "Open an event with metadata (who, when, which model).",
+        ),
+        (
+            "MedLog_append_fragment",
+            dict(event_id="evt-demo-1", fragment={"outputs": {"summary": "Patient stable"}}),
+            "Attach a fragment that captures model outputs for the event.",
+        ),
+        ("MedLog_get_provenance", dict(event_id="evt-demo-1"), "Retrieve provenance header saved for the event."),
+        ("MedLog_query_events", dict(run_id="run-demo-1"), "Query the store by run identifier."),
+        ("MedLog_export_parquet", dict(), "Trigger sample export (stub returns static location)."),
+        ("MedLog_fhir_bundle", dict(event_id="evt-demo-1"), "View the event as a single FHIR Observation bundle."),
+        ("MedLog_fhir_run_bundle", dict(run_id="run-demo-1"), "Bundle all events in the run as FHIR Observations."),
+    ]
+
+    for name, kwargs, description in steps:
+        print(f"   - {description}")
+        success, payload = call_tool(tu, name, **kwargs)
+        note = None
+        if success:
+            if name == "MedLog_init_event":
+                note = f"Created event {payload.get('event_id')}"
+            elif name == "MedLog_append_fragment":
+                note = "Attached fragment with outputs summary"
+            elif name == "MedLog_get_provenance":
+                prov = payload.get("provenance", {})
+                note = f"Provenance keys: {', '.join(prov.keys()) or 'none'}"
+            elif name == "MedLog_query_events":
+                note = f"Query returned {payload.get('count', 0)} rows"
+            elif name == "MedLog_fhir_bundle":
+                note = f"Bundle contains {len(payload.get('entry', []))} resources"
+            elif name == "MedLog_fhir_run_bundle":
+                note = f"Run bundle resources: {len(payload.get('entry', []))}"
+        if success and note:
+            print(f"     Result: {note}")
+        results.append({"tool": name, "success": success, "response": payload, "note": note})
+    return results
+
+
+def run_medtok_demo(tu: ToolUniverse) -> List[Dict[str, str]]:
+    tests = [
+        (
+            "MedTok_tokenize",
+            dict(codes=["A00", "E11"], system="ICD-10", include_metadata=True),
+            "Convert ICD-10 codes into internal token IDs plus metadata for downstream models.",
+        ),
+        ("MedTok_embed", dict(codes=["A00"], system="ICD-10"), "Generate vector embeddings for a medical code."),
+        ("MedTok_nearest_neighbors", dict(code="A00", k=3), "Find nearby codes in embedding space."),
+        ("MedTok_map_text_to_code", dict(text="type 2 diabetes", system="ICD-10"), "Map free text to the closest code."),
+        ("MedTok_search_text", dict(text="hypertension", k=4), "Search the terminology for matching codes by text."),
+        ("MedTok_code_info", dict(code="E11", system="ICD-10"), "Fetch descriptive details for a specific code."),
+    ]
+    results = []
+    for name, kwargs, description in tests:
+        print(f"   - {description}")
+        success, payload = call_tool(tu, name, **kwargs)
+        note = None
+        if success:
+            if name == "MedTok_tokenize":
+                note = f"Received {len(payload.get('token_ids', []))} token IDs"
+            elif name == "MedTok_embed":
+                emb = payload.get("embeddings") or []
+                if emb:
+                    note = f"Embedding dimension {payload.get('dim')}, first vector length {len(emb[0])}"
+            elif name == "MedTok_nearest_neighbors":
+                note = f"Returned {len(payload.get('neighbors', []))} neighbors"
+            elif name == "MedTok_map_text_to_code":
+                note = f"Mapped text to code {payload.get('code')}"
+            elif name == "MedTok_search_text":
+                note = f"Top match code {payload.get('matches', [{}])[0].get('code') if payload.get('matches') else 'N/A'}"
+            elif name == "MedTok_code_info":
+                note = f"Code info description: {payload.get('description', 'N/A')}"
+        if success and note:
+            print(f"     Result: {note}")
+        results.append({"tool": name, "success": success, "response": payload, "note": note})
+    return results
+
+
+NETWORK_TOOLS = [
+    ("InterPro_search_entries", {"query": "BRCA1"}),
+    ("KEGG_find_entries", {"query": "ATP synthase", "database": "pathway"}),
+    ("IUCN_get_species_status", {"species": "Panthera leo"}),
+    ("JASPAR_search_motifs", {"query": "SOX2"}),
+    ("MarineSpecies_lookup", {"scientific_name": "Gadus morhua"}),
+    ("cBioPortal_search_studies", {"keyword": "breast cancer"}),
+    ("PhenomeJax_list_projects", {"keyword": "glucose"}),
+]
+
+
+def run_network_tools(tu: ToolUniverse) -> List[Dict[str, str]]:
+    outcomes = []
+    for name, kwargs in NETWORK_TOOLS:
+        success, payload = call_tool(tu, name, **kwargs)
+        note_parts: List[str] = []
+        if success:
+            if name == "InterPro_search_entries":
+                data = payload if isinstance(payload, dict) else {}
+                note_parts.append(f"Entries returned: {len(data.get('results', []))}")
+            elif name == "KEGG_find_entries":
+                if isinstance(payload, dict):
+                    note_parts.append(f"Matched {len(payload.get('results', []))} entries")
+                elif isinstance(payload, list):
+                    note_parts.append(f"Matched {len(payload)} entries")
+            elif name == "IUCN_get_species_status":
+                result = payload.get("result") if isinstance(payload, dict) else {}
+                if isinstance(result, list) and result:
+                    result = result[0]
+                elif result is None:
+                    result = {}
+                species = result.get("scientific_name")
+                category = result.get("category")
+                note_parts.append(f"{species} status {category}")
+            elif name == "JASPAR_search_motifs":
+                data = payload if isinstance(payload, dict) else {}
+                note_parts.append(f"Found {len(data.get('results', []))} motifs")
+            elif name == "MarineSpecies_lookup":
+                data = payload if isinstance(payload, dict) else {}
+                note_parts.append(f"Matches: {len(data.get('results', []))}")
+            elif name == "cBioPortal_search_studies":
+                data = payload if isinstance(payload, dict) else {}
+                note_parts.append(f"Studies returned: {len(data.get('studies', []))}")
+            elif name == "PhenomeJax_list_projects":
+                data = payload if isinstance(payload, dict) else {}
+                note_parts.append(f"Projects listed: {len(data.get('projects', []))}")
+
+            preview = preview_json(payload)
+            print(f"     {name} preview: {preview}")
+            note_parts.append(f"Preview: {preview}")
+        else:
+            print(f"     {name} error payload: {preview_json(payload)}")
+        note = " | ".join(note_parts) if note_parts else None
+        outcomes.append({"tool": name, "success": success, "response": payload, "note": note})
+    return outcomes
+
+
+def _extract_host(candidate: Dict[str, Any]) -> str:
+    host = candidate.get("host")
+    if host:
+        return str(host)
+    for key in ("url", "endpoint", "base_url"):
+        maybe = candidate.get(key)
+        if not maybe:
+            continue
+        parsed = urlparse(str(maybe))
+        if parsed.netloc:
+            return parsed.netloc
+    return "candidate"
+
+
+def _slugify_host(value: str) -> str:
+    slug = "".join(ch if ch.isalnum() else "_" for ch in value.lower())
+    slug = slug.strip("_")
+    return slug or "candidate"
+
+
+def run_vsd_demo(tu: ToolUniverse) -> List[Dict[str, str]]:
+    """
+    Demonstrate the Harvest -> Register -> Run workflow using Verified Source Directory helpers.
+    """
+    search_query = "ensembl rest api"
+    print(f"\nSearching harvest catalog for '{search_query}' candidates...")
+    results: List[Dict[str, Any]] = []
+
+    success_search, harvest_resp = call_tool(
+        tu,
+        "GenericHarvestTool",
+        query=search_query,
+        limit=5,
+    )
+    selected_candidate: Optional[Dict[str, Any]] = None
+    note_search: Optional[str] = None
+    if success_search:
+        candidates = (harvest_resp or {}).get("candidates") or []
+        note_search = f"Candidates returned: {len(candidates)}"
+        if candidates:
+            preferred_hosts = {"rest.ensembl.org", "api.open-meteo.com"}
+            for candidate_option in candidates:
+                host = _extract_host(candidate_option).lower()
+                if host in preferred_hosts:
+                    selected_candidate = candidate_option
+                    break
+            if not selected_candidate:
+                selected_candidate = candidates[0]
+            host = _extract_host(selected_candidate)
+            print(f"   - Selected candidate: {selected_candidate.get('name')} ({selected_candidate.get('url')}) [host: {host}]")
+            print(f"     Candidate preview: {preview_json(selected_candidate)}")
+        else:
+            print("   - Harvest returned no candidates.")
+    else:
+        print(f"   - Harvest search failed payload: {preview_json(harvest_resp)}")
+        note_search = "Harvest search failed"
+    results.append({"tool": "GenericHarvestTool", "success": success_search, "response": harvest_resp, "note": note_search})
+
+    if not (success_search and selected_candidate):
+        results.append(
+            {
+                "tool": "HarvestCandidateTesterTool",
+                "success": False,
+                "response": {"error": "No harvest candidate available"},
+                "note": "Skipped testing",
+            }
+        )
+        return results
+
+    candidate = selected_candidate
+    print("\nTesting harvest candidate via HarvestCandidateTesterTool...")
+    success_probe, probe_resp = call_tool(
+        tu,
+        "HarvestCandidateTesterTool",
+        candidate=candidate,
+    )
+    probe_note = None
+    if success_probe:
+        status = (probe_resp.get("test") or {}).get("status")
+        probe_note = f"Probe status {status}"
+        print(f"   - Probe preview: {preview_json(probe_resp)}")
+    else:
+        print(f"   - Probe failure payload: {preview_json(probe_resp)}")
+    results.append({"tool": "HarvestCandidateTesterTool", "success": success_probe, "response": probe_resp, "note": probe_note})
+
+    if not (success_probe and probe_resp.get("ok")):
+        print("Skipping registration because candidate probe failed.")
+        results.append(
+            {
+                "tool": "VerifiedSourceRegisterTool",
+                "success": False,
+                "response": {"error": "Probe failed"},
+                "note": None,
+            }
+        )
+        return results
+
+    host_slug = _slugify_host(_extract_host(candidate))
+    tool_name = f"HarvestDemo_{host_slug[:40]}"
+
+    print("\nRegistering candidate with VerifiedSourceRegisterTool...")
+    success_reg, register_resp = call_tool(
+        tu,
+        "VerifiedSourceRegisterTool",
+        tool_name=tool_name,
+        candidate=candidate,
+    )
+    note_reg = None
+    if success_reg:
+        config = (register_resp or {}).get("config") or {}
+        base_url = (config.get("fields") or {}).get("base_url") or config.get("endpoint")
+        note_reg = f"Registered tool pointing to {base_url}"
+        print(f"   - Registered config preview: {preview_json(config)}")
+    else:
+        print(f"   - Registration failure payload: {preview_json(register_resp)}")
+    results.append(
+        {
+            "tool": "VerifiedSourceRegisterTool",
+            "success": success_reg,
+            "response": register_resp,
+            "note": note_reg,
+        }
+    )
+
+    if not success_reg:
+        return results
+
+    print("\nCalling newly registered tool...")
+    tu.load_tools(include_tools=[tool_name])
+    success_run, run_resp = call_tool(tu, tool_name)
+    note_run = None
+    if success_run:
+        preview = preview_json(run_resp)
+        note_run = f"Preview: {preview}"
+        print(f"   - Run result preview: {preview}")
+    else:
+        print(f"   - Run failure payload: {preview_json(run_resp)}")
+    results.append({"tool": tool_name, "success": success_run, "response": run_resp, "note": note_run})
+
+    print("\nCleaning up registered tool...")
+    success_rm, rm_resp = call_tool(
+        tu,
+        "VerifiedSourceRemoveTool",
+        tool_name=tool_name,
+    )
+    note_rm = "Removed from catalog" if success_rm else None
+    if success_rm:
+        print(f"   - Removal confirmation: {preview_json(rm_resp)}")
+    else:
+        print(f"   - Removal failure payload: {preview_json(rm_resp)}")
+    results.append({"tool": "VerifiedSourceRemoveTool", "success": success_rm, "response": rm_resp, "note": note_rm})
+
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run ToolUniverse end-to-end demo.")
+    parser.add_argument("--skip-network-tools", action="store_true", help="Skip tools that require external HTTP APIs.")
+    parser.add_argument("--skip-vsd", action="store_true", help="Skip harvest/register/run VSD demonstration.")
+    parser.add_argument("--medtok-host", default="127.0.0.1")
+    parser.add_argument("--medtok-port", type=int, default=8910)
+    parser.add_argument("--medlog-host", default="127.0.0.1")
+    parser.add_argument("--collector-port", type=int, default=8911)
+    parser.add_argument("--fhir-port", type=int, default=8912)
+    args = parser.parse_args()
+
+    medtok_ctx = None
+    medlog_ctx = None
+    all_results: List[Dict[str, str]] = []
+
+    try:
+        print("Starting MedTok service...")
+        medtok_ctx = start_medtok(args.medtok_host, args.medtok_port)
+        print(f"MedTok running at {os.environ['MEDTOK_BASE_URL']}")
+
+        print("Starting MedLog services...")
+        medlog_ctx = start_medlog(args.medlog_host, args.collector_port, args.fhir_port)
+        print(
+            f"MedLog collector at {os.environ['MEDLOG_COLLECTOR_BASE_URL']}, "
+            f"FHIR bridge at {os.environ['MEDLOG_FHIR_BASE_URL']}"
+        )
+
+        tu = ToolUniverse(hooks_enabled=False)
+        tu.load_tools(tool_type=["medtok", "medlog"])
+
+        print("\nRunning MedTok demo calls...")
+        all_results.extend(run_medtok_demo(tu))
+
+        print("\nRunning MedLog demo calls...")
+        all_results.extend(run_medlog_demo(tu))
+
+        if not args.skip_network_tools:
+            print("\nLoading network-enabled tools (InterPro, KEGG, IUCN, etc.)...")
+            categories = [
+                "interpro",
+                "kegg",
+                "iucn_red_list",
+                "jaspar",
+                "marine_species",
+                "cbioportal",
+                "phenome_jax",
+            ]
+            try:
+                tu.load_tools(tool_type=categories)
+            except Exception as exc:  # pylint: disable=broad-except
+                print(f"[WARN] Failed to load network tool categories: {exc}")
+            else:
+                print("Running network tool calls...")
+                all_results.extend(run_network_tools(tu))
+        else:
+            print("\nSkipping external network tools.")
+
+        if not args.skip_vsd:
+            print("\nHarvest -> Register -> Run walkthrough...")
+            vsd_results = run_vsd_demo(tu)
+            all_results.extend(vsd_results)
+        else:
+            print("\nSkipping VSD harvest/register/run demo.")
+
+    finally:
+        if medtok_ctx:
+            print("\nStopping MedTok service...")
+            stop_medtok(medtok_ctx)
+        if medlog_ctx:
+            print("Stopping MedLog services...")
+            stop_medlog(medlog_ctx)
+
+    print("\n================ Demo Summary ================")
+    failures = [r for r in all_results if not r["success"]]
+    for result in all_results:
+        status = "PASS" if result["success"] else "FAIL"
+        print(f"{status:4} | {result['tool']}")
+        note = result.get("note")
+        if note:
+            print(f"      {note}")
+        if not result["success"]:
+            print(f"    -> {result['response']}")
+    print("=============================================")
+
+    if failures:
+        print(f"{len(failures)} tool calls failed.")
+        sys.exit(1)
+    print("All tool calls succeeded.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/tooluniverse/__init__.py b/src/tooluniverse/__init__.py
index 6a001040..96edf7be 100644
--- a/src/tooluniverse/__init__.py
+++ b/src/tooluniverse/__init__.py
@@ -460,6 +460,7 @@ def __getattr__(self, name):
     "ODPHPOutlinkFetch",
     "ContextKeeperTool",
     "HarvestCandidateTesterTool",
+    "GenericHarvestTool",
     "ToolNavigatorTool",
     "CellosaurusSearchTool",
     "CellosaurusQueryConverterTool",
diff --git a/src/tooluniverse/candidate_tester_tool.py b/src/tooluniverse/candidate_tester_tool.py
index d37f95b5..24b742d9 100644
--- a/src/tooluniverse/candidate_tester_tool.py
+++ b/src/tooluniverse/candidate_tester_tool.py
@@ -1,12 +1,32 @@
 from __future__ import annotations
 
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 from .tool_registry import register_tool
 from .vsd_utils import build_config, probe_config
 
-
-@register_tool("HarvestCandidateTesterTool")
+HARVEST_CANDIDATE_TESTER_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "candidate": {"type": "object"},
+        "tool_type": {"type": "string", "default": "dynamic_rest"},
+        "default_params": {"type": "object"},
+        "default_headers": {"type": "object"},
+    },
+    "required": ["candidate"],
+    "additionalProperties": False,
+}
+
+HARVEST_CANDIDATE_TESTER_CONFIG = {
+    "name": "HarvestCandidateTesterTool",
+    "description": "Probe a harvest/VSD candidate endpoint and report JSON readiness without registering it.",
+    "type": "HarvestCandidateTesterTool",
+    "category": "special_tools",
+    "parameter": HARVEST_CANDIDATE_TESTER_SCHEMA,
+}
+
+
+@register_tool("HarvestCandidateTesterTool", config=HARVEST_CANDIDATE_TESTER_CONFIG)
 class HarvestCandidateTesterTool:
     """
     Validate harvest/VSD candidates without registering them.
@@ -15,17 +35,10 @@ class HarvestCandidateTesterTool:
 
     name = "HarvestCandidateTesterTool"
     description = "Test a harvest candidate endpoint to see if it returns usable JSON."
-    input_schema = {
-        "type": "object",
-        "properties": {
-            "candidate": {"type": "object"},
-            "tool_type": {"type": "string", "default": "dynamic_rest"},
-            "default_params": {"type": "object"},
-            "default_headers": {"type": "object"},
-        },
-        "required": ["candidate"],
-        "additionalProperties": False,
-    }
+    input_schema = HARVEST_CANDIDATE_TESTER_SCHEMA
+
+    def __init__(self, tool_config: Optional[Dict[str, Any]] = None) -> None:
+        self.tool_config = tool_config or {}
 
     def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         candidate = arguments.get("candidate") or {}
diff --git a/src/tooluniverse/data/medlog_tools.json b/src/tooluniverse/data/medlog_tools.json
new file mode 100644
index 00000000..bf5799ce
--- /dev/null
+++ b/src/tooluniverse/data/medlog_tools.json
@@ -0,0 +1,134 @@
+[
+  {
+    "name": "MedLog_init_event",
+    "description": "Initialize or overwrite a MedLog event record. Supply the 9-field MedLog payload to capture headers, inputs, identities, and initial artifacts.",
+    "type": "MedLogInitEventTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "header": {
+          "type": "object",
+          "description": "MedLog header block including event_id, timestamps, risk metadata, and parent relationships."
+        },
+        "model_instance": {
+          "type": "object",
+          "description": "Model provenance metadata (model name, version, risk posture, vendor, etc.)."
+        },
+        "user_identity": {
+          "type": "object",
+          "description": "Information about the requesting user, clinician, or agent."
+        },
+        "target_identity": {
+          "type": "object",
+          "description": "Optional target entity such as patient or device identifiers."
+        },
+        "inputs": {
+          "type": "object",
+          "description": "Structured input payload captured at initialization."
+        },
+        "retention_tier": {
+          "type": "string",
+          "description": "Retention tier label (steady, critical, transient, etc.)."
+        }
+      },
+      "required": ["header", "model_instance", "user_identity"]
+    }
+  },
+  {
+    "name": "MedLog_append_fragment",
+    "description": "Append outputs, outcomes, artifacts, or feedback fragments to an existing MedLog event.",
+    "type": "MedLogAppendFragmentTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "event_id": {
+          "type": "string",
+          "description": "Identifier of the event to update."
+        },
+        "fragment": {
+          "type": "object",
+          "description": "Fragment payload containing any of internal_artifacts, outputs, outcomes, or user_feedback."
+        }
+      },
+      "required": ["event_id", "fragment"]
+    }
+  },
+  {
+    "name": "MedLog_get_provenance",
+    "description": "Fetch PROV-JSON bundle for a given event to support audit trails and lineage review.",
+    "type": "MedLogGetProvenanceTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "event_id": {
+          "type": "string",
+          "description": "Identifier of the event to retrieve."
+        }
+      },
+      "required": ["event_id"]
+    }
+  },
+  {
+    "name": "MedLog_query_events",
+    "description": "Query MedLog events by run or event identifier. Useful for dashboarding, analytics, and sampling inspection.",
+    "type": "MedLogQueryEventsTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "run_id": {
+          "type": "string",
+          "description": "Optional run identifier to filter results."
+        },
+        "event_id": {
+          "type": "string",
+          "description": "Optional event identifier to narrow results."
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of rows to return (default 50).",
+          "minimum": 1,
+          "maximum": 500
+        }
+      }
+    }
+  },
+  {
+    "name": "MedLog_export_parquet",
+    "description": "Trigger MedLog parquet export to the configured artifact directory.",
+    "type": "MedLogExportParquetTool",
+    "parameter": {
+      "type": "object",
+      "properties": {}
+    }
+  },
+  {
+    "name": "MedLog_fhir_bundle",
+    "description": "Retrieve the FHIR bundle synthesised for an individual MedLog event (Patient, Practitioner, Device, AuditEvent, Observations, Documents).",
+    "type": "MedLogFHIRBundleTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "event_id": {
+          "type": "string",
+          "description": "Identifier of the event to export."
+        }
+      },
+      "required": ["event_id"]
+    }
+  },
+  {
+    "name": "MedLog_fhir_run_bundle",
+    "description": "Aggregate all events in a run into a consolidated FHIR bundle for care-path review.",
+    "type": "MedLogFHIRRunBundleTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "run_id": {
+          "type": "string",
+          "description": "Run identifier to export."
+        }
+      },
+      "required": ["run_id"]
+    }
+  }
+]
diff --git a/src/tooluniverse/data/medtok_mcp_tools.json b/src/tooluniverse/data/medtok_mcp_tools.json
new file mode 100644
index 00000000..fef79cbf
--- /dev/null
+++ b/src/tooluniverse/data/medtok_mcp_tools.json
@@ -0,0 +1,11 @@
+[
+  {
+    "name": "mcp_auto_loader_medtok",
+    "description": "Discover and register MedTok tools from a running MedTok MCP server so they can be invoked directly through ToolUniverse.",
+    "type": "MCPAutoLoaderTool",
+    "server_url": "http://${MEDTOK_MCP_SERVER_HOST}:9001/mcp",
+    "tool_prefix": "medtok_",
+    "auto_register": true,
+    "required_api_keys": ["MEDTOK_MCP_SERVER_HOST"]
+  }
+]
diff --git a/src/tooluniverse/data/medtok_tools.json b/src/tooluniverse/data/medtok_tools.json
new file mode 100644
index 00000000..c54fe67b
--- /dev/null
+++ b/src/tooluniverse/data/medtok_tools.json
@@ -0,0 +1,134 @@
+[
+  {
+    "name": "MedTok_tokenize",
+    "description": "Tokenize one or more medical codes using the MedTok multimodal tokenizer. Useful for exposing token IDs and optional metadata to downstream workflows.",
+    "type": "MedTokTokenizeTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "codes": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "List of codes to tokenize (e.g., ICD-10 identifiers)."
+        },
+        "system": {
+          "type": "string",
+          "description": "Coding system, defaults to ICD-10."
+        },
+        "include_metadata": {
+          "type": "boolean",
+          "description": "Return region-level metadata for each code."
+        }
+      },
+      "required": ["codes"]
+    }
+  },
+  {
+    "name": "MedTok_embed",
+    "description": "Generate MedTok embeddings for a batch of codes. Returns floating-point vectors suitable for similarity search or downstream ML tasks.",
+    "type": "MedTokEmbedTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "codes": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "Codes to embed."
+        },
+        "system": {
+          "type": "string",
+          "description": "Coding system, defaults to ICD-10."
+        }
+      },
+      "required": ["codes"]
+    }
+  },
+  {
+    "name": "MedTok_nearest_neighbors",
+    "description": "Retrieve the nearest neighbours for a code from the MedTok embedding space with similarity scores.",
+    "type": "MedTokNearestNeighborsTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "code": {
+          "type": "string",
+          "description": "Anchor code for the neighbourhood query."
+        },
+        "system": {
+          "type": "string",
+          "description": "Coding system, defaults to ICD-10."
+        },
+        "k": {
+          "type": "integer",
+          "description": "Number of neighbours to return (default 5).",
+          "minimum": 1,
+          "maximum": 50
+        }
+      },
+      "required": ["code"]
+    }
+  },
+  {
+    "name": "MedTok_map_text_to_code",
+    "description": "Map free-text clinical language to the most relevant code using MedTok text semantics.",
+    "type": "MedTokMapTextTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "text": {
+          "type": "string",
+          "description": "Clinical description or narrative."
+        },
+        "system": {
+          "type": "string",
+          "description": "Target coding system, defaults to ICD-10."
+        }
+      },
+      "required": ["text"]
+    }
+  },
+  {
+    "name": "MedTok_search_text",
+    "description": "Hybrid text + semantic search over the MedTok vocabulary. Useful for exploratory lookup workflows.",
+    "type": "MedTokSearchTextTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "text": {
+          "type": "string",
+          "description": "Query text to search for."
+        },
+        "system": {
+          "type": ["string", "null"],
+          "description": "Optional coding system filter."
+        },
+        "k": {
+          "type": "integer",
+          "description": "Maximum number of matches (default 5).",
+          "minimum": 1,
+          "maximum": 50
+        }
+      },
+      "required": ["text"]
+    }
+  },
+  {
+    "name": "MedTok_code_info",
+    "description": "Retrieve metadata for a specific code including synonyms and graph context when available.",
+    "type": "MedTokCodeInfoTool",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "code": {
+          "type": "string",
+          "description": "Code identifier to fetch."
+        },
+        "system": {
+          "type": "string",
+          "description": "Coding system, defaults to ICD-10."
+        }
+      },
+      "required": ["code"]
+    }
+  }
+]
diff --git a/src/tooluniverse/default_config.py b/src/tooluniverse/default_config.py
index 46095834..982dc50e 100644
--- a/src/tooluniverse/default_config.py
+++ b/src/tooluniverse/default_config.py
@@ -150,6 +150,11 @@
     "genomics": os.path.join(current_dir, "data", "genomics_tools.json"),
     # Guideline and health policy tools
     "guidelines": os.path.join(current_dir, "data", "unified_guideline_tools.json"),
+    "medtok": os.path.join(current_dir, "data", "medtok_tools.json"),
+    "medtok_mcp_auto_loader": os.path.join(
+        current_dir, "data", "medtok_mcp_tools.json"
+    ),
+    "medlog": os.path.join(current_dir, "data", "medlog_tools.json"),
 }
 
 
diff --git a/src/tooluniverse/dynamic_rest_runner.py b/src/tooluniverse/dynamic_rest_runner.py
new file mode 100644
index 00000000..a3061d36
--- /dev/null
+++ b/src/tooluniverse/dynamic_rest_runner.py
@@ -0,0 +1,194 @@
+"""
+Dynamic REST/GraphQL tool loader for Verified Source Directory (VSD).
+
+This module keeps an in-memory registry of generated tool specifications and
+exposes helper functions for refreshing, inserting, or removing entries. Tools
+are backed by lightweight BaseTool subclasses that issue HTTP requests using
+the stored configuration.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import threading
+from typing import Any, Dict, Optional
+
+import requests
+
+from .base_tool import BaseTool
+from .common_utils import read_json, vsd_generated_path
+from .tool_registry import register_config, register_tool
+
+LOGGER = logging.getLogger("DynamicRESTRunner")
+_REGISTRY_LOCK = threading.Lock()
+_GENERATED_TOOLS: Dict[str, Dict[str, Any]] = {}
+
+
+def _normalize_spec(spec: Any) -> Dict[str, Dict[str, Any]]:
+    """
+    Accept legacy list or dict formats and normalize to {name: config}.
+    """
+    if isinstance(spec, dict):
+        if "generated_tools" in spec and isinstance(spec["generated_tools"], list):
+            return {
+                item.get("name"): dict(item)
+                for item in spec["generated_tools"]
+                if isinstance(item, dict) and item.get("name")
+            }
+        return {
+            name: dict(cfg)
+            for name, cfg in spec.items()
+            if isinstance(cfg, dict)
+        }
+
+    if isinstance(spec, list):
+        result: Dict[str, Dict[str, Any]] = {}
+        for item in spec:
+            if isinstance(item, dict) and item.get("name"):
+                result[item["name"]] = dict(item)
+        return result
+
+    return {}
+
+
+def _load_generated_specs() -> Dict[str, Dict[str, Any]]:
+    path = vsd_generated_path()
+    data = read_json(path, {})
+    return _normalize_spec(data)
+
+
+def _build_request_kwargs(config: Dict[str, Any], arguments: Dict[str, Any]) -> Dict[str, Any]:
+    fields = config.get("fields", {})
+    method = fields.get("method", "GET").upper()
+    timeout = fields.get("timeout", 30)
+    headers = fields.get("headers", {})
+    default_params = fields.get("default_params", {})
+
+    params = dict(default_params)
+    body: Optional[Any] = None
+
+    if method in {"GET", "DELETE"}:
+        params.update(arguments)
+    else:
+        if fields.get("body_format", "json") == "form":
+            body = dict(arguments)
+        else:
+            body = arguments or {}
+
+    kwargs: Dict[str, Any] = {
+        "method": method,
+        "url": fields.get("base_url"),
+        "headers": headers,
+        "timeout": timeout,
+    }
+    if params:
+        kwargs["params"] = params
+    if body is not None:
+        if fields.get("body_format", "json") == "form":
+            kwargs["data"] = body
+        else:
+            kwargs["json"] = body
+    return kwargs
+
+
+def _handle_response(response: requests.Response) -> Any:
+    try:
+        return response.json()
+    except ValueError:
+        return {
+            "status_code": response.status_code,
+            "text": response.text,
+        }
+
+
+@register_tool("GenericRESTTool")
+class GenericRESTTool(BaseTool):
+    """
+    Generic REST tool generated from a VSD configuration.
+    """
+
+    def run(self, arguments=None, stream_callback=None, **_: Any):
+        arguments = arguments or {}
+        kwargs = _build_request_kwargs(self.tool_config, arguments)
+        method = kwargs.pop("method")
+        url = kwargs.pop("url")
+
+        response = requests.request(method, url, **kwargs)
+        response.raise_for_status()
+        result = _handle_response(response)
+
+        if stream_callback:
+            stream_callback(json.dumps(result))
+        return result
+
+
+@register_tool("GenericGraphQLTool")
+class GenericGraphQLTool(BaseTool):
+    """
+    Generic GraphQL tool generated from a VSD configuration.
+    """
+
+    def run(self, arguments=None, stream_callback=None, **_: Any):
+        arguments = arguments or {}
+        fields = self.tool_config.get("fields", {})
+        headers = fields.get("headers", {})
+        timeout = fields.get("timeout", 30)
+        payload = {
+            "query": arguments.get("query") or fields.get("default_query"),
+            "variables": arguments.get("variables") or fields.get("default_variables", {}),
+        }
+
+        response = requests.post(
+            fields.get("base_url"),
+            json=payload,
+            headers=headers,
+            timeout=timeout,
+        )
+        response.raise_for_status()
+        result = _handle_response(response)
+
+        if stream_callback:
+            stream_callback(json.dumps(result))
+        return result
+
+
+def _register_generated_tool(tool_name: str, config: Dict[str, Any]) -> None:
+    config = dict(config)
+    config.setdefault("name", tool_name)
+    tool_type = config.get("type") or "GenericRESTTool"
+
+    register_config(tool_name, config)
+    _GENERATED_TOOLS[tool_name] = config
+
+    LOGGER.debug("Registered generated tool %s of type %s", tool_name, tool_type)
+
+
+def refresh_generated_registry() -> Dict[str, Dict[str, Any]]:
+    """
+    Reload generated tool specs from disk and update the runtime registry.
+    """
+    specs = _load_generated_specs()
+    with _REGISTRY_LOCK:
+        _GENERATED_TOOLS.clear()
+        for name, cfg in specs.items():
+            _register_generated_tool(name, cfg)
+    return specs
+
+
+def upsert_generated_tool(tool_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Insert or update a generated tool in the runtime registry.
+    """
+    with _REGISTRY_LOCK:
+        _register_generated_tool(tool_name, config)
+    return _GENERATED_TOOLS[tool_name]
+
+
+def remove_generated_tool(tool_name: str) -> None:
+    """
+    Remove a generated tool from the runtime registry.
+    """
+    with _REGISTRY_LOCK:
+        _GENERATED_TOOLS.pop(tool_name, None)
+        LOGGER.debug("Removed generated tool %s", tool_name)
diff --git a/src/tooluniverse/logging_config.py b/src/tooluniverse/logging_config.py
index 6659f6a1..3cbc36ab 100644
--- a/src/tooluniverse/logging_config.py
+++ b/src/tooluniverse/logging_config.py
@@ -45,12 +45,12 @@ class ToolUniverseFormatter(logging.Formatter):
 
     # Emoji prefixes for different log levels
     EMOJI_PREFIX = {
-        "DEBUG": "🔧 ",
-        "INFO": "ℹ️  ",
-        "PROGRESS": "⏳ ",
-        "WARNING": "⚠️  ",
-        "ERROR": "❌ ",
-        "CRITICAL": "🚨 ",
+        "DEBUG": "[DEBUG] ",
+        "INFO": "[INFO] ",
+        "PROGRESS": "[PROGRESS] ",
+        "WARNING": "[WARN] ",
+        "ERROR": "[ERROR] ",
+        "CRITICAL": "[CRITICAL] ",
     }
 
     def format(self, record):
diff --git a/src/tooluniverse/medlog_tool.py b/src/tooluniverse/medlog_tool.py
new file mode 100644
index 00000000..d375a903
--- /dev/null
+++ b/src/tooluniverse/medlog_tool.py
@@ -0,0 +1,143 @@
+"""
+MedLog integration tools.
+
+These tools expose MedLog collector and FHIR linkage capabilities as native
+ToolUniverse tools for event ingestion, querying, and audit retrieval.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Dict
+
+import requests
+
+from .base_tool import BaseTool
+from .tool_registry import register_tool
+
+
+class _MedLogBaseTool(BaseTool):
+    """Shared utility methods for MedLog REST integration."""
+
+    DEFAULT_BASE_URL = "http://localhost:7001"
+
+    def __init__(self, tool_config: Dict[str, Any]):
+        super().__init__(tool_config)
+        self.base_url = os.getenv(
+            "MEDLOG_COLLECTOR_BASE_URL", self.DEFAULT_BASE_URL
+        ).rstrip("/")
+        self.session = requests.Session()
+
+    def _post(self, path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+        url = f"{self.base_url}{path}"
+        try:
+            response = self.session.post(url, json=payload, timeout=30)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as exc:  # pragma: no cover - network errors
+            return {"error": f"MedLog collector request failed: {exc}", "endpoint": url}
+
+    def _get(self, path: str) -> Dict[str, Any]:
+        url = f"{self.base_url}{path}"
+        try:
+            response = self.session.get(url, timeout=30)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as exc:  # pragma: no cover - network errors
+            return {"error": f"MedLog collector request failed: {exc}", "endpoint": url}
+
+
+class _MedLogFHIRBaseTool(BaseTool):
+    """Shared logic for interacting with the MedLog FHIR linkage service."""
+
+    DEFAULT_FHIR_URL = "http://localhost:7003"
+
+    def __init__(self, tool_config: Dict[str, Any]):
+        super().__init__(tool_config)
+        self.fhir_base = os.getenv(
+            "MEDLOG_FHIR_BASE_URL", self.DEFAULT_FHIR_URL
+        ).rstrip("/")
+        self.session = requests.Session()
+
+    def _get(self, path: str) -> Dict[str, Any]:
+        url = f"{self.fhir_base}{path}"
+        try:
+            response = self.session.get(url, timeout=30)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as exc:  # pragma: no cover - network errors
+            return {"error": f"MedLog FHIR request failed: {exc}", "endpoint": url}
+
+
+@register_tool("MedLogInitEventTool")
+class MedLogInitEventTool(_MedLogBaseTool):
+    """Create or update a MedLog event record."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        return self._post("/medlog/events/init", arguments)
+
+
+@register_tool("MedLogAppendFragmentTool")
+class MedLogAppendFragmentTool(_MedLogBaseTool):
+    """Append fragment data (artifacts, outputs, feedback) to an event."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        event_id = arguments.get("event_id")
+        fragment = arguments.get("fragment", {})
+        if not event_id:
+            return {"error": "Parameter 'event_id' is required."}
+        return self._post(f"/medlog/events/{event_id}/append", fragment)
+
+
+@register_tool("MedLogGetProvenanceTool")
+class MedLogGetProvenanceTool(_MedLogBaseTool):
+    """Retrieve PROV-JSON bundle for a specific event."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        event_id = arguments.get("event_id")
+        if not event_id:
+            return {"error": "Parameter 'event_id' is required."}
+        return self._get(f"/medlog/events/{event_id}/prov")
+
+
+@register_tool("MedLogQueryEventsTool")
+class MedLogQueryEventsTool(_MedLogBaseTool):
+    """Query MedLog events by run_id or event_id."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        payload = {
+            "run_id": arguments.get("run_id"),
+            "event_id": arguments.get("event_id"),
+            "limit": arguments.get("limit", 50),
+        }
+        return self._post("/query", payload)
+
+
+@register_tool("MedLogExportParquetTool")
+class MedLogExportParquetTool(_MedLogBaseTool):
+    """Trigger a parquet export of MedLog events."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        return self._post("/export/parquet", {})
+
+
+@register_tool("MedLogFHIRBundleTool")
+class MedLogFHIRBundleTool(_MedLogFHIRBaseTool):
+    """Fetch FHIR bundle for a specific event."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        event_id = arguments.get("event_id")
+        if not event_id:
+            return {"error": "Parameter 'event_id' is required."}
+        return self._get(f"/bundle/{event_id}")
+
+
+@register_tool("MedLogFHIRRunBundleTool")
+class MedLogFHIRRunBundleTool(_MedLogFHIRBaseTool):
+    """Fetch FHIR bundle aggregating all events in a run."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        run_id = arguments.get("run_id")
+        if not run_id:
+            return {"error": "Parameter 'run_id' is required."}
+        return self._get(f"/bundle/run/{run_id}")
diff --git a/src/tooluniverse/medtok_tool.py b/src/tooluniverse/medtok_tool.py
new file mode 100644
index 00000000..1bd4042f
--- /dev/null
+++ b/src/tooluniverse/medtok_tool.py
@@ -0,0 +1,122 @@
+"""
+MedTok integration tools.
+
+These tools provide a thin wrapper around the MedTok FastAPI service so that
+ToolUniverse users can tokenize, embed, and explore medical codes directly
+from the unified tool catalog.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Dict
+
+import requests
+
+from .base_tool import BaseTool
+from .tool_registry import register_tool
+
+
+class _MedTokBaseTool(BaseTool):
+    """Shared utilities for MedTok REST integrations."""
+
+    DEFAULT_BASE_URL = "http://localhost:8000"
+
+    def __init__(self, tool_config: Dict[str, Any]):
+        super().__init__(tool_config)
+        self.base_url = os.getenv("MEDTOK_BASE_URL", self.DEFAULT_BASE_URL).rstrip("/")
+        self.session = requests.Session()
+
+    def _post(self, path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+        url = f"{self.base_url}{path}"
+        try:
+            response = self.session.post(url, json=payload, timeout=30)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as exc:  # pragma: no cover - network errors
+            return {"error": f"MedTok request failed: {exc}", "endpoint": url}
+
+    def _get(self, path: str) -> Dict[str, Any]:
+        url = f"{self.base_url}{path}"
+        try:
+            response = self.session.get(url, timeout=30)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as exc:  # pragma: no cover - network errors
+            return {"error": f"MedTok request failed: {exc}", "endpoint": url}
+
+
+@register_tool("MedTokTokenizeTool")
+class MedTokTokenizeTool(_MedTokBaseTool):
+    """Tokenize medical codes using MedTok multimodal tokenizer."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        payload = {
+            "codes": arguments.get("codes", []),
+            "system": arguments.get("system", "ICD-10"),
+            "include_metadata": arguments.get("include_metadata", False),
+        }
+        return self._post("/tokenize", payload)
+
+
+@register_tool("MedTokEmbedTool")
+class MedTokEmbedTool(_MedTokBaseTool):
+    """Generate token embeddings for a batch of codes."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        payload = {
+            "codes": arguments.get("codes", []),
+            "system": arguments.get("system", "ICD-10"),
+        }
+        return self._post("/embed", payload)
+
+
+@register_tool("MedTokNearestNeighborsTool")
+class MedTokNearestNeighborsTool(_MedTokBaseTool):
+    """Retrieve nearest neighbours for a code in embedding space."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        payload = {
+            "code": arguments.get("code"),
+            "k": arguments.get("k", 5),
+            "system": arguments.get("system", "ICD-10"),
+        }
+        return self._post("/nearest_neighbors", payload)
+
+
+@register_tool("MedTokMapTextTool")
+class MedTokMapTextTool(_MedTokBaseTool):
+    """Map free-text description to the closest medical code."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        payload = {
+            "text": arguments.get("text", ""),
+            "system": arguments.get("system", "ICD-10"),
+        }
+        return self._post("/map_text_to_code", payload)
+
+
+@register_tool("MedTokSearchTextTool")
+class MedTokSearchTextTool(_MedTokBaseTool):
+    """Perform text and semantic search across the code vocabulary."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        payload = {
+            "text": arguments.get("text", ""),
+            "system": arguments.get("system"),
+            "k": arguments.get("k", 5),
+        }
+        return self._post("/search_text", payload)
+
+
+@register_tool("MedTokCodeInfoTool")
+class MedTokCodeInfoTool(_MedTokBaseTool):
+    """Fetch detailed metadata for a specific code."""
+
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        system = arguments.get("system", "ICD-10")
+        code = arguments.get("code")
+        if not code:
+            return {"error": "Parameter 'code' is required."}
+        path = f"/codes/{system}/{code}"
+        return self._get(path)
diff --git a/src/tooluniverse/utils.py b/src/tooluniverse/utils.py
index 88e778fb..ff4867e2 100755
--- a/src/tooluniverse/utils.py
+++ b/src/tooluniverse/utils.py
@@ -136,7 +136,7 @@ def read_json_list(file_path):
     Returns
     list: A list of dictionaries containing the JSON objects.
     """
-    with open(file_path, "r") as file:
+    with open(file_path, "r", encoding="utf-8") as file:
         data = json.load(file)
     return data
 
diff --git a/src/tooluniverse/vsd_tool.py b/src/tooluniverse/vsd_tool.py
index b765f8fe..98a09e24 100644
--- a/src/tooluniverse/vsd_tool.py
+++ b/src/tooluniverse/vsd_tool.py
@@ -1,28 +1,157 @@
 from __future__ import annotations
 
-from typing import Any, Dict
+from typing import Any, Dict, Optional, List
+from urllib.parse import urlparse
 
 from .tool_registry import register_tool
 from .vsd_registry import load_catalog, save_catalog, upsert_tool
 from .dynamic_rest_runner import refresh_generated_registry, remove_generated_tool
 from .vsd_utils import build_config, probe_config, stamp_metadata
+from .harvest.static_catalog import harvest as harvest_static
+
+GENERIC_HARVEST_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "query": {
+            "type": "string",
+            "description": "Free-text search term passed to the harvest catalog.",
+        },
+        "limit": {
+            "type": "integer",
+            "minimum": 1,
+            "maximum": 50,
+            "default": 5,
+            "description": "Maximum number of candidates to return.",
+        },
+        "urls": {
+            "type": "array",
+            "items": {"type": "string", "format": "uri"},
+            "description": "Optional explicit URLs to wrap as manual candidates (skips catalog search).",
+        },
+    },
+    "additionalProperties": False,
+}
+
+GENERIC_HARVEST_CONFIG = {
+    "name": "GenericHarvestTool",
+    "description": "Search the harvest catalog (or wrap manual URLs) to produce candidate API endpoints.",
+    "type": "GenericHarvestTool",
+    "category": "special_tools",
+    "parameter": GENERIC_HARVEST_SCHEMA,
+}
+
+VERIFIED_SOURCE_REGISTER_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "tool_name": {"type": "string"},
+        "tool_type": {"type": "string", "default": "dynamic_rest"},
+        "candidate": {"type": "object"},
+        "default_params": {"type": "object"},
+        "default_headers": {"type": "object"},
+        "force": {"type": "boolean", "default": False},
+    },
+    "required": ["tool_name", "candidate"],
+}
+
+VERIFIED_SOURCE_REGISTER_CONFIG = {
+    "name": "VerifiedSourceRegisterTool",
+    "description": "Register a DynamicREST tool into the verified-source catalog after probing it.",
+    "type": "VerifiedSourceRegisterTool",
+    "category": "special_tools",
+    "parameter": VERIFIED_SOURCE_REGISTER_SCHEMA,
+}
+
+VERIFIED_SOURCE_DISCOVERY_CONFIG = {
+    "name": "VerifiedSourceDiscoveryTool",
+    "description": "List the tools currently stored in the verified-source catalog.",
+    "type": "VerifiedSourceDiscoveryTool",
+    "category": "special_tools",
+    "parameter": {
+        "type": "object",
+        "properties": {},
+        "additionalProperties": False,
+    },
+}
+
+VERIFIED_SOURCE_REMOVE_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "tool_name": {"type": "string"},
+    },
+    "required": ["tool_name"],
+}
+
+VERIFIED_SOURCE_REMOVE_CONFIG = {
+    "name": "VerifiedSourceRemoveTool",
+    "description": "Remove a generated tool from the verified-source catalog.",
+    "type": "VerifiedSourceRemoveTool",
+    "category": "special_tools",
+    "parameter": VERIFIED_SOURCE_REMOVE_SCHEMA,
+}
+
+
+@register_tool("GenericHarvestTool", config=GENERIC_HARVEST_CONFIG)
+class GenericHarvestTool:
+    name = "GenericHarvestTool"
+    description = "Harvest candidate API endpoints from the static catalog or wrap manual URLs."
+    input_schema = GENERIC_HARVEST_SCHEMA
+
+    def __init__(self, tool_config: Optional[Dict[str, Any]] = None) -> None:
+        self.tool_config = tool_config or {}
 
-
+    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        query = (arguments.get("query") or "").strip()
+        limit_value = arguments.get("limit", 5)
+        try:
+            limit = int(limit_value)
+        except (TypeError, ValueError):
+            limit = 5
+        limit = max(1, min(limit, 50))
+        urls = arguments.get("urls") or []
+
+        candidates: List[Dict[str, Any]] = []
+
+        if urls:
+            for idx, raw_url in enumerate(urls):
+                if not raw_url:
+                    continue
+                parsed = urlparse(str(raw_url))
+                host = parsed.netloc.lower()
+                base_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else raw_url
+                name = host or f"manual_candidate_{idx + 1}"
+                candidates.append(
+                    {
+                        "name": name,
+                        "endpoint": raw_url,
+                        "url": raw_url,
+                        "base_url": base_url,
+                        "host": host,
+                        "source": "manual_urls",
+                        "description": arguments.get("description") or "",
+                        "trust": 0.5,
+                        "health": {"ok": None, "status": None, "checked": "manual"},
+                    }
+                )
+        else:
+            extra_args = {k: v for k, v in arguments.items() if k not in {"query", "limit", "urls"}}
+            candidates = harvest_static(query=query, limit=limit, **extra_args)
+
+        return {
+            "ok": True,
+            "query": query,
+            "count": len(candidates),
+            "candidates": candidates,
+        }
+
+
+@register_tool("VerifiedSourceRegisterTool", config=VERIFIED_SOURCE_REGISTER_CONFIG)
 class VerifiedSourceRegisterTool:
     name = "VerifiedSourceRegisterTool"
     description = "Register a DynamicREST tool in the verified-source directory"
-    input_schema = {
-        "type": "object",
-        "properties": {
-            "tool_name": {"type": "string"},
-            "tool_type": {"type": "string", "default": "dynamic_rest"},
-            "candidate": {"type": "object"},
-            "default_params": {"type": "object"},
-            "default_headers": {"type": "object"},
-            "force": {"type": "boolean", "default": False},
-        },
-        "required": ["tool_name", "candidate"],
-    }
+    input_schema = VERIFIED_SOURCE_REGISTER_SCHEMA
+
+    def __init__(self, tool_config: Optional[Dict[str, Any]] = None) -> None:
+        self.tool_config = tool_config or {}
 
     def __call__(
         self,
@@ -71,25 +200,27 @@ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         )
 
 
+@register_tool("VerifiedSourceDiscoveryTool", config=VERIFIED_SOURCE_DISCOVERY_CONFIG)
 class VerifiedSourceDiscoveryTool:
     name = "VerifiedSourceDiscoveryTool"
     description = "Return the Verified-Source catalog."
 
+    def __init__(self, tool_config: Optional[Dict[str, Any]] = None) -> None:
+        self.tool_config = tool_config or {}
+
     def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         catalog = load_catalog()
         return {"ok": True, "tools": list(catalog.values())}
 
 
+@register_tool("VerifiedSourceRemoveTool", config=VERIFIED_SOURCE_REMOVE_CONFIG)
 class VerifiedSourceRemoveTool:
     name = "VerifiedSourceRemoveTool"
     description = "Remove a generated tool from the Verified-Source catalog."
-    input_schema = {
-        "type": "object",
-        "properties": {
-            "tool_name": {"type": "string"},
-        },
-        "required": ["tool_name"],
-    }
+    input_schema = VERIFIED_SOURCE_REMOVE_SCHEMA
+
+    def __init__(self, tool_config: Optional[Dict[str, Any]] = None) -> None:
+        self.tool_config = tool_config or {}
 
     def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         tool_name = arguments.get("tool_name")
diff --git a/src/tooluniverse/vsd_utils.py b/src/tooluniverse/vsd_utils.py
index a30c12dd..3f3250e7 100644
--- a/src/tooluniverse/vsd_utils.py
+++ b/src/tooluniverse/vsd_utils.py
@@ -79,12 +79,16 @@ def _apply_overrides(candidate: Dict[str, Any], cfg: Dict[str, Any]) -> None:
 
     overrides = HOST_OVERRIDES.get(host)
     if overrides:
+        fields = cfg.setdefault("fields", {})
         if overrides.get("endpoint"):
             cfg["endpoint"] = overrides["endpoint"]
+            fields["base_url"] = overrides["endpoint"]
         if overrides.get("default_params"):
             cfg.setdefault("default_params", {}).update(overrides["default_params"])
+            fields.setdefault("default_params", {}).update(overrides["default_params"])
         if overrides.get("default_headers"):
             cfg.setdefault("default_headers", {}).update(overrides["default_headers"])
+            fields.setdefault("headers", {}).update(overrides["default_headers"])
         if overrides.get("notes"):
             cfg.setdefault("metadata", {}).setdefault("notes", []).append(overrides["notes"])
 
@@ -100,6 +104,7 @@ def _apply_overrides(candidate: Dict[str, Any], cfg: Dict[str, Any]) -> None:
         )
         if requirements.get("default_headers"):
             cfg.setdefault("default_headers", {}).update(requirements["default_headers"])
+            cfg.setdefault("fields", {}).setdefault("headers", {}).update(requirements["default_headers"])
 
 
 # ------------------------------------------------------------------------------
@@ -120,15 +125,43 @@ def build_config(
     merged_params = deepcopy(candidate.get("default_params") or candidate.get("params") or {})
     merged_headers = deepcopy(candidate.get("default_headers") or candidate.get("headers") or {})
 
-    cfg: Dict[str, Any] = {
-        "type": tool_type,
-        "endpoint": endpoint,
+    # Allow overrides provided via arguments
+    if default_params:
+        merged_params.update(default_params)
+    if default_headers:
+        merged_headers.update(default_headers)
+
+    # Determine implementation class
+    declared_type = str(candidate.get("tool_type") or tool_type or "").lower()
+    impl_type = "GenericRESTTool"
+    if declared_type in {"graphql", "genericgraphqltool", "graph_ql"} or endpoint.endswith(".graphql"):
+        impl_type = "GenericGraphQLTool"
+
+    # Provide a permissive parameter schema with defaults from known params
+    parameter_schema: Dict[str, Any] = deepcopy(candidate.get("parameter_schema") or candidate.get("parameter") or {})
+    if not parameter_schema:
+        properties = {
+            key: {"description": f"Override default query parameter '{key}'", "default": value}
+            for key, value in merged_params.items()
+        }
+        parameter_schema = {
+            "type": "object",
+            "properties": properties,
+            "additionalProperties": True,
+        }
+
+    fields: Dict[str, Any] = {
+        "base_url": endpoint,
         "method": method,
         "default_params": merged_params,
-        "default_headers": merged_headers,
-        "auth": candidate.get("auth") or {"type": "none"},
+        "headers": merged_headers,
+    }
+
+    cfg: Dict[str, Any] = {
+        "type": impl_type,
         "description": candidate.get("description") or "",
-        "tool_type": candidate.get("tool_type") or "dynamic_rest",
+        "fields": fields,
+        "parameter": parameter_schema,
         "metadata": {
             "source": candidate.get("source"),
             "trust": candidate.get("trust"),
@@ -138,17 +171,19 @@ def build_config(
             "host": candidate.get("host"),
         },
         "vsd": candidate,
+        # Backwards compatibility fields expected by older utilities
+        "tool_type": candidate.get("tool_type") or tool_type or "dynamic_rest",
+        "endpoint": endpoint,
+        "method": method,
+        "default_params": merged_params,
+        "default_headers": merged_headers,
+        "auth": candidate.get("auth") or {"type": "none"},
     }
 
     response_key = candidate.get("response_key")
     if response_key:
         cfg["response_key"] = response_key
 
-    if default_params:
-        cfg["default_params"].update(default_params)
-    if default_headers:
-        cfg["default_headers"].update(default_headers)
-
     _apply_overrides(candidate, cfg)
 
     return cfg
@@ -159,10 +194,11 @@ def probe_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
     Execute a lightweight HTTP request to validate the generated configuration.
     Returns diagnostic information including HTTP status and a JSON snippet if available.
     """
-    url = cfg.get("endpoint")
-    method = (cfg.get("method") or "GET").upper()
-    params = deepcopy(cfg.get("default_params") or {})
-    headers = deepcopy(cfg.get("default_headers") or {})
+    fields = cfg.get("fields") or {}
+    url = cfg.get("endpoint") or fields.get("base_url")
+    method = (fields.get("method") or cfg.get("method") or "GET").upper()
+    params = deepcopy(fields.get("default_params") or cfg.get("default_params") or {})
+    headers = deepcopy(fields.get("headers") or cfg.get("default_headers") or {})
     headers.setdefault("Accept", "application/json")
 
     try:
diff --git a/tests/integration/test_medtok_medlog_tools.py b/tests/integration/test_medtok_medlog_tools.py
new file mode 100644
index 00000000..a708ecf8
--- /dev/null
+++ b/tests/integration/test_medtok_medlog_tools.py
@@ -0,0 +1,282 @@
+import importlib.util
+import json
+import os
+import sys
+import tempfile
+import threading
+import time
+from pathlib import Path
+
+import pytest
+import uvicorn
+from fastapi import FastAPI, HTTPException
+
+from tooluniverse.execute_function import ToolUniverse
+
+
+class _ServerHandle:
+    """Utility wrapper for running uvicorn servers in tests."""
+
+    def __init__(self, app: FastAPI, host: str, port: int):
+        config = uvicorn.Config(
+            app, host=host, port=port, log_level="error", lifespan="off"
+        )
+        self.server = uvicorn.Server(config)
+        self.thread = threading.Thread(target=self.server.run, daemon=True)
+
+    def start(self) -> None:
+        self.thread.start()
+        while not self.server.started:
+            time.sleep(0.05)
+
+    def stop(self) -> None:
+        self.server.should_exit = True
+        self.thread.join(timeout=5)
+
+
+def _import_medtok_app(module_path: Path):
+    spec = importlib.util.spec_from_file_location("medtok_service_app", module_path)
+    module = importlib.util.module_from_spec(spec)
+    assert spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+@pytest.fixture(scope="session")
+def medtok_server():
+    repo_root = Path(__file__).resolve().parents[3]
+    medtok_root = repo_root / "MedTok-FHIR-Starter"
+    service_dir = medtok_root / "services" / "medtok_service"
+    sys.path.insert(0, str(service_dir))
+
+    base_config_path = medtok_root / "config" / "medtok_config.json"
+    config_data = json.loads(base_config_path.read_text(encoding="utf-8"))
+    config_data["code_metadata_path"] = str(
+        medtok_root / "samples" / "code_metadata.csv"
+    )
+    config_data["graph_edges_path"] = str(
+        medtok_root / "samples" / "code_graph_edges.csv"
+    )
+    tmp_config = tempfile.NamedTemporaryFile(
+        "w", suffix="_medtok_config.json", delete=False
+    )
+    json.dump(config_data, tmp_config)
+    tmp_config.flush()
+    tmp_config.close()
+    os.environ["MEDTOK_CONFIG"] = tmp_config.name
+
+    module = _import_medtok_app(service_dir / "app.py")
+    module.MAPPING_CSV = str(medtok_root / "samples" / "code_mapping.csv")
+    app = module.app
+
+    host = "127.0.0.1"
+    port = 8910
+    server = _ServerHandle(app, host, port)
+    server.start()
+
+    base_url = f"http://{host}:{port}"
+    os.environ["MEDTOK_BASE_URL"] = base_url
+
+    yield base_url
+
+    server.stop()
+    os.environ.pop("MEDTOK_BASE_URL", None)
+    os.environ.pop("MEDTOK_CONFIG", None)
+    try:
+        os.remove(tmp_config.name)
+    except FileNotFoundError:
+        pass
+    sys.path.remove(str(service_dir))
+
+
+def _build_medlog_collector(store):
+    app = FastAPI()
+
+    @app.post("/medlog/events/init")
+    def init(payload: dict):
+        header = payload.get("header") or {}
+        event_id = header.get("event_id")
+        if not event_id:
+            raise HTTPException(400, "event_id required")
+        record = {
+            "header": header,
+            "model_instance": payload.get("model_instance", {}),
+            "user_identity": payload.get("user_identity", {}),
+            "target_identity": payload.get("target_identity"),
+            "inputs": payload.get("inputs"),
+            "retention_tier": payload.get("retention_tier", "steady"),
+            "fragments": [],
+        }
+        store[event_id] = record
+        return {"status": "ok", "event_id": event_id}
+
+    @app.post("/medlog/events/{event_id}/append")
+    def append(event_id: str, fragment: dict):
+        record = store.get(event_id)
+        if record is None:
+            raise HTTPException(404, "event not found")
+        record["fragments"].append(fragment)
+        return {"status": "ok", "event_id": event_id}
+
+    @app.get("/medlog/events/{event_id}/prov")
+    def prov(event_id: str):
+        record = store.get(event_id)
+        if record is None:
+            raise HTTPException(404, "event not found")
+        return {"event_id": event_id, "provenance": {"header": record["header"]}}
+
+    @app.post("/query")
+    def query(body: dict):
+        run_id = body.get("run_id")
+        event_id = body.get("event_id")
+        limit = body.get("limit", 50)
+        matches = []
+        for eid, record in store.items():
+            header = record["header"]
+            if event_id and event_id != eid:
+                continue
+            if run_id and header.get("run_id") != run_id:
+                continue
+            matches.append({"event_id": eid, "header": header})
+            if len(matches) >= limit:
+                break
+        return {"count": len(matches), "results": matches}
+
+    @app.post("/export/parquet")
+    def export():
+        return {"status": "ok", "outdir": "/tmp/parquet"}
+
+    return app
+
+
+def _build_medlog_fhir(store):
+    app = FastAPI()
+
+    def _bundle_for_records(records):
+        entries = []
+        for rec in records:
+            entries.append(
+                {
+                    "resource": {
+                        "resourceType": "Observation",
+                        "id": rec["header"]["event_id"],
+                        "status": "final",
+                    }
+                }
+            )
+        return {"resourceType": "Bundle", "type": "collection", "entry": entries}
+
+    @app.get("/bundle/{event_id}")
+    def bundle(event_id: str):
+        record = store.get(event_id)
+        if record is None:
+            raise HTTPException(404, "event not found")
+        return _bundle_for_records([record])
+
+    @app.get("/bundle/run/{run_id}")
+    def bundle_run(run_id: str):
+        records = [
+            record
+            for record in store.values()
+            if record["header"].get("run_id") == run_id
+        ]
+        if not records:
+            raise HTTPException(404, "run not found")
+        return _bundle_for_records(records)
+
+    return app
+
+
+@pytest.fixture(scope="session")
+def medlog_servers():
+    store = {}
+    host = "127.0.0.1"
+    collector_port = 8911
+    fhir_port = 8912
+
+    collector_app = _build_medlog_collector(store)
+    fhir_app = _build_medlog_fhir(store)
+
+    collector = _ServerHandle(collector_app, host, collector_port)
+    fhir = _ServerHandle(fhir_app, host, fhir_port)
+    collector.start()
+    fhir.start()
+
+    os.environ["MEDLOG_COLLECTOR_BASE_URL"] = f"http://{host}:{collector_port}"
+    os.environ["MEDLOG_FHIR_BASE_URL"] = f"http://{host}:{fhir_port}"
+
+    yield store
+
+    collector.stop()
+    fhir.stop()
+    os.environ.pop("MEDLOG_COLLECTOR_BASE_URL", None)
+    os.environ.pop("MEDLOG_FHIR_BASE_URL", None)
+
+
+def test_medtok_rest_tools(medtok_server):
+    tu = ToolUniverse(hooks_enabled=False)
+    tu.load_tools(tool_type=["medtok"])
+
+    tokenize = tu.tools.MedTok_tokenize(
+        codes=["A00", "E11"], system="ICD-10", include_metadata=True
+    )
+    token_ids = tokenize.get("token_ids", [])
+    assert isinstance(token_ids, list)
+    assert len(token_ids) in (0, 2)
+
+    embed = tu.tools.MedTok_embed(codes=["A00"], system="ICD-10")
+    embeddings = embed.get("embeddings", [])
+    if embeddings:
+        assert isinstance(embeddings[0], list)
+        assert embed.get("dim") == len(embeddings[0])
+
+    neighbors = tu.tools.MedTok_nearest_neighbors(code="A00", k=3)
+    neighbor_list = neighbors.get("neighbors", [])
+    assert len(neighbor_list) <= 3
+
+    mapped = tu.tools.MedTok_map_text_to_code(text="type 2 diabetes", system="ICD-10")
+    assert "code" in mapped
+
+    search = tu.tools.MedTok_search_text(text="hypertension", k=4)
+    assert len(search.get("matches", [])) <= 4
+
+    code_info = tu.tools.MedTok_code_info(code="E11", system="ICD-10")
+    assert isinstance(code_info, dict)
+
+
+def test_medlog_tools_workflow(medlog_servers):
+    tu = ToolUniverse(hooks_enabled=False)
+    tu.load_tools(tool_type=["medlog"])
+
+    header = {
+        "event_id": "evt-1",
+        "run_id": "run-123",
+        "timestamp": "2025-01-01T00:00:00Z",
+    }
+    model_instance = {"model": "demo", "version": "1.0"}
+    user_identity = {"name": "Dr. Example"}
+
+    init_resp = tu.tools.MedLog_init_event(
+        header=header, model_instance=model_instance, user_identity=user_identity
+    )
+    assert init_resp["status"] == "ok"
+
+    fragment = {"outputs": {"summary": "Patient stable"}}
+    append_resp = tu.tools.MedLog_append_fragment(event_id="evt-1", fragment=fragment)
+    assert append_resp["status"] == "ok"
+
+    prov_resp = tu.tools.MedLog_get_provenance(event_id="evt-1")
+    assert prov_resp["event_id"] == "evt-1"
+
+    query_resp = tu.tools.MedLog_query_events(run_id="run-123")
+    assert query_resp["count"] == 1
+    assert query_resp["results"][0]["event_id"] == "evt-1"
+
+    export_resp = tu.tools.MedLog_export_parquet()
+    assert export_resp["status"] == "ok"
+
+    bundle_resp = tu.tools.MedLog_fhir_bundle(event_id="evt-1")
+    assert bundle_resp["resourceType"] == "Bundle"
+
+    run_bundle_resp = tu.tools.MedLog_fhir_run_bundle(run_id="run-123")
+    assert len(run_bundle_resp["entry"]) == 1

From e32eff7efa6e0b58be70ae35585a71c568875c5f Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Sun, 26 Oct 2025 19:22:46 -0700
Subject: [PATCH 3/8] Stub MedTok service in integration tests

---
 tests/integration/test_medtok_medlog_tools.py | 175 +++++++++++++-----
 1 file changed, 133 insertions(+), 42 deletions(-)

diff --git a/tests/integration/test_medtok_medlog_tools.py b/tests/integration/test_medtok_medlog_tools.py
index a708ecf8..1d1cdadf 100644
--- a/tests/integration/test_medtok_medlog_tools.py
+++ b/tests/integration/test_medtok_medlog_tools.py
@@ -1,11 +1,6 @@
-import importlib.util
-import json
 import os
-import sys
-import tempfile
 import threading
 import time
-from pathlib import Path
 
 import pytest
 import uvicorn
@@ -34,40 +29,142 @@ def stop(self) -> None:
         self.thread.join(timeout=5)
 
 
-def _import_medtok_app(module_path: Path):
-    spec = importlib.util.spec_from_file_location("medtok_service_app", module_path)
-    module = importlib.util.module_from_spec(spec)
-    assert spec.loader is not None
-    spec.loader.exec_module(module)
-    return module
-
-
 @pytest.fixture(scope="session")
 def medtok_server():
-    repo_root = Path(__file__).resolve().parents[3]
-    medtok_root = repo_root / "MedTok-FHIR-Starter"
-    service_dir = medtok_root / "services" / "medtok_service"
-    sys.path.insert(0, str(service_dir))
-
-    base_config_path = medtok_root / "config" / "medtok_config.json"
-    config_data = json.loads(base_config_path.read_text(encoding="utf-8"))
-    config_data["code_metadata_path"] = str(
-        medtok_root / "samples" / "code_metadata.csv"
-    )
-    config_data["graph_edges_path"] = str(
-        medtok_root / "samples" / "code_graph_edges.csv"
-    )
-    tmp_config = tempfile.NamedTemporaryFile(
-        "w", suffix="_medtok_config.json", delete=False
-    )
-    json.dump(config_data, tmp_config)
-    tmp_config.flush()
-    tmp_config.close()
-    os.environ["MEDTOK_CONFIG"] = tmp_config.name
+    """
+    Launch a minimal in-memory MedTok stub so MedTokTool wrappers can be tested
+    without cloning the full MedTok repository.
+    """
+
+    codes = {
+        "ICD-10": {
+            "A00": {
+                "code": "A00",
+                "system": "ICD-10",
+                "name": "Cholera",
+                "description": "Infection caused by Vibrio cholerae",
+                "aliases": ["cholera"],
+                "embedding": [0.9, 0.05, 0.05, 0.0],
+                "token_id": 101,
+            },
+            "E11": {
+                "code": "E11",
+                "system": "ICD-10",
+                "name": "Type 2 diabetes mellitus",
+                "description": "Chronic condition impacting glucose metabolism",
+                "aliases": ["type 2 diabetes", "diabetes"],
+                "embedding": [0.1, 0.8, 0.1, 0.0],
+                "token_id": 202,
+            },
+            "I10": {
+                "code": "I10",
+                "system": "ICD-10",
+                "name": "Essential (primary) hypertension",
+                "description": "Persistently high blood pressure",
+                "aliases": ["hypertension", "high blood pressure"],
+                "embedding": [0.05, 0.05, 0.85, 0.05],
+                "token_id": 303,
+            },
+        }
+    }
+
+    def _build_stub_app() -> FastAPI:
+        app = FastAPI(title="MedTok Stub Service", version="0.0.1")
+
+        def _normalise_system(system: str) -> str:
+            return (system or "ICD-10").upper()
+
+        def _fetch_code(system: str, code: str):
+            return codes.get(system, {}).get(code)
+
+        def _match_text(system: str, text: str):
+            text_lower = (text or "").lower()
+            for record in codes.get(system, {}).values():
+                if text_lower in record["code"].lower():
+                    return record
+                for alias in record.get("aliases", []):
+                    if text_lower in alias.lower():
+                        return record
+            values = list(codes.get(system, {}).values())
+            return values[0] if values else None
+
+        @app.post("/tokenize")
+        def tokenize(payload: dict):
+            system = _normalise_system(payload.get("system"))
+            include_metadata = bool(payload.get("include_metadata"))
+            token_ids = []
+            metadata = []
+            for code in payload.get("codes", []):
+                record = _fetch_code(system, code)
+                if not record:
+                    continue
+                token_ids.append(record["token_id"])
+                if include_metadata:
+                    metadata.append(record)
+            response = {"token_ids": token_ids}
+            if include_metadata:
+                response["metadata"] = metadata
+            return response
+
+        @app.post("/embed")
+        def embed(payload: dict):
+            embeddings = []
+            for code in payload.get("codes", []):
+                record = _fetch_code(_normalise_system(payload.get("system")), code)
+                if record:
+                    embeddings.append(record["embedding"])
+            dim = len(embeddings[0]) if embeddings else 0
+            return {"embeddings": embeddings, "dim": dim}
+
+        @app.post("/nearest_neighbors")
+        def nearest_neighbors(payload: dict):
+            neighbors = [
+                {"code": "E11", "score": 0.42},
+                {"code": "I10", "score": 0.33},
+                {"code": "A00", "score": 0.28},
+            ]
+            k = max(1, min(int(payload.get("k", 5)), len(neighbors)))
+            return {"code": payload.get("code"), "neighbors": neighbors[:k]}
+
+        @app.post("/map_text_to_code")
+        def map_text(payload: dict):
+            system = _normalise_system(payload.get("system"))
+            match = _match_text(system, payload.get("text"))
+            if not match:
+                raise HTTPException(404, "No matching code found")
+            return {
+                "code": match["code"],
+                "system": match["system"],
+                "name": match["name"],
+            }
+
+        @app.post("/search_text")
+        def search_text(payload: dict):
+            system = _normalise_system(payload.get("system"))
+            match = _match_text(system, payload.get("text"))
+            results = []
+            if match:
+                results.append(
+                    {
+                        "code": match["code"],
+                        "system": match["system"],
+                        "description": match["description"],
+                        "score": 0.9,
+                    }
+                )
+            k = int(payload.get("k", 5))
+            return {"query": payload.get("text"), "matches": results[:k]}
+
+        @app.get("/codes/{system}/{code}")
+        def code_info(system: str, code: str):
+            record = _fetch_code(_normalise_system(system), code)
+            if not record:
+                raise HTTPException(404, "Code not found")
+            return record
+
+        return app
 
-    module = _import_medtok_app(service_dir / "app.py")
-    module.MAPPING_CSV = str(medtok_root / "samples" / "code_mapping.csv")
-    app = module.app
+    app = _build_stub_app()
 
     host = "127.0.0.1"
     port = 8910
@@ -81,12 +178,6 @@ def medtok_server():
 
     server.stop()
     os.environ.pop("MEDTOK_BASE_URL", None)
-    os.environ.pop("MEDTOK_CONFIG", None)
-    try:
-        os.remove(tmp_config.name)
-    except FileNotFoundError:
-        pass
-    sys.path.remove(str(service_dir))
 
 
 def _build_medlog_collector(store):

From 1981dc99f274bc97ba30be3f2dce8d82e8992539 Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Sun, 26 Oct 2025 20:00:31 -0700
Subject: [PATCH 4/8] Update Readme

---
 README.md | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/README.md b/README.md
index 7ab04aec..9692da83 100644
--- a/README.md
+++ b/README.md
@@ -232,6 +232,44 @@ Our comprehensive documentation covers everything from quick start to advanced w
 - **[Adding Tools Tutorial](https://zitniklab.hms.harvard.edu/ToolUniverse/tutorials/addtools/Adding_Tools_Tutorial.html)**: Step-by-step tool addition guide
 - **[MCP Tool Registration](https://zitniklab.hms.harvard.edu/ToolUniverse/tutorials/addtools/mcp_tool_registration_en.html)**: Register tools via MCP
 
+### Verified Source Discovery (VSD) + Harvest Workflow
+
+ToolUniverse ships a “harvest → verify → register” pipeline that turns external REST endpoints into first-class Dynamic REST tools.
+
+1. **Harvest candidates** – `GenericHarvestTool` searches the static Harvest catalog or wraps ad‑hoc URLs you supply with `{"urls": [...]}`.
+2. **Probe readiness** – `HarvestCandidateTesterTool` (optional) performs a dry run against the candidate, suggesting default query params or headers.
+3. **Register verified tools** – `VerifiedSourceRegisterTool` stamps metadata, persists the tool in `~/.tooluniverse/vsd/generated_tools.json` (override with `TOOLUNIVERSE_VSD_DIR`), and hot-loads it through the Dynamic REST runner.
+4. **Inspect / prune** – `VerifiedSourceDiscoveryTool` lists everything in the verified catalog, while `VerifiedSourceRemoveTool` deletes entries and unregisters their dynamic bindings.
+
+```python
+from tooluniverse.vsd_tool import (
+    GenericHarvestTool,
+    HarvestCandidateTesterTool,
+    VerifiedSourceRegisterTool,
+    VerifiedSourceDiscoveryTool,
+    VerifiedSourceRemoveTool,
+)
+
+harvest = GenericHarvestTool({})
+candidate = harvest.run({"query": "clinical"})["candidates"][0]
+
+tester = HarvestCandidateTesterTool({})
+probe = tester.run({"candidate": candidate})
+
+register = VerifiedSourceRegisterTool({})
+register.run(
+    "ClinicalTrialsREST",
+    candidate,
+    default_params={"search": "cancer"},
+    force=True,  # bypass strict validation once endpoint is known-good
+)
+
+print(VerifiedSourceDiscoveryTool({}).run({})["tools"])
+VerifiedSourceRemoveTool({}).run({"tool_name": "ClinicalTrialsREST"})
+```
+
+Registered tools are immediately available to agents via normal loading (e.g., `ToolUniverse().load_tools(tool_type=["dynamic_rest"])`). This workflow keeps internal sources (Harvest/VSD) separate from public REST integrations so they can ship on their own release cadence.
+
 ### 📚 API Reference
 - **[API Directory](https://zitniklab.hms.harvard.edu/ToolUniverse/api/modules.html)**: Complete module listing
 - **[Core Modules](https://zitniklab.hms.harvard.edu/ToolUniverse/api/tooluniverse.html)**: Main ToolUniverse class and utilities

From 83da56b250a12d9ffb8f0d48bd512f591deba0d6 Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Wed, 29 Oct 2025 20:51:45 -0700
Subject: [PATCH 5/8] Added Docker Provisioner

---
 .env.template                                 |  17 ++
 docs/expand_tooluniverse/architecture.rst     |  32 +++
 scripts/provision_docker_llm.py               | 125 +++++++++
 scripts/run_insightlab_demo.py                | 172 ++++++++++++
 .../compose_scripts/docker_llm_provisioner.py |  69 +++++
 .../compose_scripts/harvest_auto_registrar.py | 182 +++++++++++++
 src/tooluniverse/data/compose_tools.json      | 178 ++++++++++++
 src/tooluniverse/execute_function.py          |  37 ++-
 .../remote/docker_llm/provision.py            | 256 ++++++++++++++++++
 src/tooluniverse/remote_tool.py               |  10 +-
 .../tools/DockerLLMProvisioner.py             |  69 +++++
 .../tools/HarvestAutoRegistrar.py             |  91 +++++++
 src/tooluniverse/tools/__init__.py            |   4 +
 tests/test_docker_llm_provision.py            |  81 ++++++
 tests/test_harvest_auto_registrar.py          |  83 ++++++
 15 files changed, 1395 insertions(+), 11 deletions(-)
 create mode 100644 .env.template
 create mode 100644 scripts/provision_docker_llm.py
 create mode 100644 scripts/run_insightlab_demo.py
 create mode 100644 src/tooluniverse/compose_scripts/docker_llm_provisioner.py
 create mode 100644 src/tooluniverse/compose_scripts/harvest_auto_registrar.py
 create mode 100644 src/tooluniverse/remote/docker_llm/provision.py
 create mode 100644 src/tooluniverse/tools/DockerLLMProvisioner.py
 create mode 100644 src/tooluniverse/tools/HarvestAutoRegistrar.py
 create mode 100644 tests/test_docker_llm_provision.py
 create mode 100644 tests/test_harvest_auto_registrar.py

diff --git a/.env.template b/.env.template
new file mode 100644
index 00000000..85ccc277
--- /dev/null
+++ b/.env.template
@@ -0,0 +1,17 @@
+# API Keys for ToolUniverse
+# Copy this file to .env and fill in your actual API keys
+
+At least one of: OPENAI_API_KEY, AZURE_OPENAI_API_KEY=your_api_key_here
+
+BOLTZ_MCP_SERVER_HOST=your_api_key_here
+
+EXPERT_FEEDBACK_MCP_SERVER_URL=your_api_key_here
+
+HF_TOKEN=your_api_key_here
+
+TXAGENT_MCP_SERVER_HOST=your_api_key_here
+
+USPTO_API_KEY=your_api_key_here
+
+USPTO_MCP_SERVER_HOST=your_api_key_here
+
diff --git a/docs/expand_tooluniverse/architecture.rst b/docs/expand_tooluniverse/architecture.rst
index 5c90195c..eb901c7b 100644
--- a/docs/expand_tooluniverse/architecture.rst
+++ b/docs/expand_tooluniverse/architecture.rst
@@ -85,6 +85,8 @@ Repository Structure Tree
    │   ├── tool_finder_keyword.py               # Keyword-based tool search
    │   ├── tool_finder_embedding.py             # Embedding-based tool search
    │   ├── tool_finder_llm.py                   # LLM-powered tool discovery
+   │   ├── remote/docker_llm/                   # Docker-based LLM provisioning helpers
+   │   ├── DockerLLMProvisioner.py              # Compose tool for Docker LLM MCP auto-registration
    │   ├── embedding_database.py                # Tool embedding database
    │   └── embedding_sync.py                    # Embedding synchronization
    │   │
@@ -314,6 +316,36 @@ Extension Points
 - Use `compose_tool.py` or add scripts in `compose_scripts/` for complex call chains
 - Leverage `tool_finder_*` for retrieval and routing assistance
 
+Tool Loading Cheat Sheet
+------------------------
+
+- Package data is loaded from the JSON files mapped in :mod:`default_config.py` plus everything under ``src/tooluniverse/data/``.
+- Remote/MCP entries are merged from both the packaged ``data/remote_tools`` directory **and** the user override folder ``~/.tooluniverse/remote_tools``. Dropping a JSON config there makes the tool visible without code changes.
+- The runtime builds three main registries:
+
+  1. ``tool_files`` → category JSON manifests (local tools)
+  2. ``data/remote_tools`` → bundled remote definitions
+  3. ``~/.tooluniverse/remote_tools`` → user/automation supplied remote definitions
+
+- Use ``ToolUniverse.load_tools()`` to refresh the registry after adding new files without restarting the host process.
+
+Remote MCP Provisioning
+-----------------------
+
+- ``DockerLLMProvisioner`` (compose tool) and ``scripts/provision_docker_llm.py`` automate standing up an MCP-enabled LLM in Docker, poll its ``/health`` endpoint, and emit the JSON configs under ``~/.tooluniverse/remote_tools`` so the new tool registers instantly.
+- Remote stubs created from bundled configs (e.g., expert feedback, DepMap) are read-only until you connect ToolUniverse to the actual MCP server. You can:
+
+  1. Call ``ToolUniverse.load_mcp_tools(["http://server:port/mcp"])`` to ingest tools live, or
+  2. Provision a local container via ``DockerLLMProvisioner`` or the CLI helper to host the endpoints yourself.
+- The `RemoteTool` error message now includes these activation instructions when an agent accidentally calls an offline remote tool.
+
+Catalog Navigation Tips
+-----------------------
+
+- ``ToolNavigatorTool`` combines the full catalog (including remote/VSD entries) with lightweight scoring—use it to shortlist relevant tools before running long compositions.
+- ``ToolFinderKeyword`` / ``ToolFinderEmbedding`` provide complementary search modalities; both now benefit from the expanded metadata listed in ``~/.tooluniverse/remote_tools``.
+- For big collections consider building category-specific shortlists in ``toolsets/`` and surfacing them via ``ToolNavigatorTool`` filters or custom compose tools.
+
 Directory Quick Reference
 --------------------------
 
diff --git a/scripts/provision_docker_llm.py b/scripts/provision_docker_llm.py
new file mode 100644
index 00000000..c3637b3a
--- /dev/null
+++ b/scripts/provision_docker_llm.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""
+Provision a Docker-hosted LLM and register it with ToolUniverse.
+
+This script wraps the helper in tooluniverse.remote.docker_llm.provision so that
+non-technical users can start the container and create the necessary MCP client
+configurations with a single command.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+from tooluniverse.remote.docker_llm.provision import (
+    DEFAULT_IMAGE,
+    ProvisionError,
+    provision_docker_llm,
+)
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Start a Docker-hosted LLM and register it with ToolUniverse."
+    )
+    parser.add_argument(
+        "--image",
+        default=DEFAULT_IMAGE,
+        help=f"Docker image to run (default: {DEFAULT_IMAGE})",
+    )
+    parser.add_argument(
+        "--container-name",
+        help="Name for the Docker container. Generated automatically if omitted.",
+    )
+    parser.add_argument(
+        "--host",
+        default="127.0.0.1",
+        help="Host interface to bind (default: 127.0.0.1).",
+    )
+    parser.add_argument(
+        "--host-port",
+        type=int,
+        default=9000,
+        help="Host port to expose the MCP endpoint on (default: 9000).",
+    )
+    parser.add_argument(
+        "--container-port",
+        type=int,
+        default=8000,
+        help="Internal container port (default: 8000).",
+    )
+    parser.add_argument(
+        "--tool-name",
+        default="DockerLLMChat",
+        help="Tool name to register inside ToolUniverse.",
+    )
+    parser.add_argument(
+        "--tool-prefix",
+        help="Prefix used when auto-registering tools from the MCP server.",
+    )
+    parser.add_argument(
+        "--mcp-tool-name",
+        default="docker_llm_chat",
+        help="Underlying MCP tool name exposed by the container.",
+    )
+    parser.add_argument(
+        "--health-path",
+        default="/health",
+        help="HTTP path used for readiness checks (default: /health).",
+    )
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=120,
+        help="Seconds to wait for container health (default: 120).",
+    )
+    parser.add_argument(
+        "--no-reuse",
+        action="store_true",
+        help="Always recreate the container instead of reusing an existing one.",
+    )
+    parser.add_argument(
+        "--docker-cli",
+        default="docker",
+        help="Docker CLI executable to invoke (default: docker).",
+    )
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+
+    try:
+        result = provision_docker_llm(
+            image=args.image,
+            container_name=args.container_name,
+            docker_cli=args.docker_cli,
+            host=args.host,
+            host_port=args.host_port,
+            container_port=args.container_port,
+            tool_name=args.tool_name,
+            tool_prefix=args.tool_prefix,
+            mcp_tool_name=args.mcp_tool_name,
+            health_path=args.health_path,
+            timeout_seconds=args.timeout,
+            reuse_container=not args.no_reuse,
+        )
+    except ProvisionError as exc:
+        print(f"Provisioning failed: {exc}", file=sys.stderr)
+        return 1
+
+    print("Docker LLM provisioning complete.")
+    print(f"  Container name : {result.container_name}")
+    print(f"  MCP server URL : {result.server_url}")
+    print(f"  Tool config    : {result.config_path}")
+    print(
+        "Add the tool by reloading ToolUniverse or invoking "
+        "'DockerLLMProvisioner' from within the agent."
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/run_insightlab_demo.py b/scripts/run_insightlab_demo.py
new file mode 100644
index 00000000..651f1188
--- /dev/null
+++ b/scripts/run_insightlab_demo.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+InsightLab end‑to‑end smoke test.
+
+1. Provisions a Docker-hosted MCP LLM if Docker is available.
+2. Uses the InsightLab LLM for hypothesis drafting.
+3. Runs the harvest → test → register flow and calls the registered tool.
+4. Summarises findings with the LLM.
+
+Prerequisites:
+  - Run this on a machine with Docker installed and outbound HTTPS access.
+  - The image ghcr.io/tooluniverse/docker-llm-mcp:latest should be reachable (or already built).
+"""
+
+import json
+import sys
+import traceback
+from pprint import pprint
+
+import os
+import sys
+import traceback
+from pathlib import Path
+from pprint import pprint
+
+# Ensure the repository's src directory is importable when running as a script
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+SRC_PATH = PROJECT_ROOT / "src"
+if str(SRC_PATH) not in sys.path:
+    sys.path.insert(0, str(SRC_PATH))
+
+from tooluniverse.execute_function import ToolUniverse  # noqa: E402
+
+
+def pretty(title, payload):
+    print(f"\n=== {title} ===")
+    try:
+        print(json.dumps(payload, indent=2))
+    except TypeError:
+        pprint(payload)
+
+
+def main():
+    tu = ToolUniverse()
+
+    try:
+        tu.load_tools()
+    except Exception:
+        print("Failed to load tools:")
+        traceback.print_exc()
+        return 1
+
+    print("Tools loaded.")
+
+    # 1. Provision Docker LLM
+    try:
+        provision = tu.run_one_function(
+            {
+                "name": "DockerLLMProvisioner",
+                "arguments": {
+                    "docker_image": "ghcr.io/tooluniverse/docker-llm-mcp:latest",
+                    "host_port": 9010,
+                    "reuse_container": True,
+                    "tool_name": "InsightLabLLM",
+                },
+            }
+        )
+        pretty("DockerLLMProvisioner result", provision)
+        llm_tool_name = provision.get("tool_name") or "InsightLabLLM"
+    except Exception:
+        print("Docker provision step failed (Docker must be available).")
+        traceback.print_exc()
+        return 1
+
+    # Refresh tools to ensure local MCP client is loaded
+    tu.load_tools()
+
+    # 2. Draft a hypothesis with the local LLM
+    hypothesis_prompt = (
+        "Draft a research hypothesis about the linkage between vitamin D deficiency "
+        "and autoimmune disorders. Provide two key questions to investigate."
+    )
+    try:
+        hypothesis = tu.run_one_function(
+            {
+                "name": llm_tool_name,
+                "arguments": {
+                    "prompt": hypothesis_prompt,
+                    "temperature": 0.3,
+                    "max_tokens": 300,
+                },
+            }
+        )
+        pretty("Hypothesis output", hypothesis)
+    except Exception:
+        print("InsightLabLLM call failed.")
+        traceback.print_exc()
+        return 1
+
+    # 3. Harvest → register a dataset API
+    try:
+        harvest = tu.run_one_function(
+            {
+                "name": "HarvestAutoRegistrar",
+                "arguments": {
+                    "query": "vitamin D immune dataset",
+                    "limit": 5,
+                    "tool_name": "vitamin_d_immune_api",
+                    "auto_run": False,
+                },
+            }
+        )
+        pretty("HarvestAutoRegistrar result", harvest)
+    except Exception:
+        print("Harvest auto-registration failed (network required).")
+        traceback.print_exc()
+        return 1
+
+    registered_name = harvest.get("registered_tool_name")
+    if not registered_name:
+        print("No tool was registered; check the attempts above.")
+        return 1
+
+    tu.load_tools()
+
+    # Call the newly registered tool with sample arguments (may need adjusting)
+    try:
+        api_call = tu.run_one_function(
+            {"name": registered_name, "arguments": {"q": "vitamin D", "rows": 5}}
+        )
+        pretty(f"Call to {registered_name}", api_call)
+    except Exception:
+        print(f"Call to {registered_name} failed.")
+        traceback.print_exc()
+        return 1
+
+    # 4. Summarise the results with the LLM
+    summary_prompt = f"""
+    You produced the hypothesis:
+    {hypothesis}
+
+    And retrieved data from {registered_name}:
+    {api_call}
+
+    Provide:
+      - 150-word summary
+      - Confidence level between 0 and 1 with explanation
+      - Two suggested follow-up experiments
+    """
+    try:
+        summary = tu.run_one_function(
+            {
+                "name": llm_tool_name,
+                "arguments": {
+                    "prompt": summary_prompt,
+                    "temperature": 0.3,
+                    "max_tokens": 400,
+                },
+            }
+        )
+        pretty("InsightLab summary", summary)
+    except Exception:
+        print("Final summarisation failed.")
+        traceback.print_exc()
+        return 1
+
+    print("\nInsightLab smoke test completed successfully.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/tooluniverse/compose_scripts/docker_llm_provisioner.py b/src/tooluniverse/compose_scripts/docker_llm_provisioner.py
new file mode 100644
index 00000000..d238f74f
--- /dev/null
+++ b/src/tooluniverse/compose_scripts/docker_llm_provisioner.py
@@ -0,0 +1,69 @@
+"""
+Compose script that provisions a Docker-hosted LLM and refreshes ToolUniverse.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, Optional
+
+from ..remote.docker_llm.provision import ProvisionError, provision_docker_llm
+
+
+def compose(
+    arguments: Dict[str, Any],
+    tooluniverse,
+    call_tool,
+    stream_callback=None,
+    emit_event=None,
+    memory_manager=None,
+) -> Dict[str, Any]:
+    """Provision a Docker LLM container and register MCP configs."""
+
+    args = dict(arguments or {})
+
+    try:
+        result = provision_docker_llm(
+            image=args.get("docker_image"),
+            container_name=args.get("container_name"),
+            docker_cli=args.get("docker_cli", "docker"),
+            host=args.get("host", "127.0.0.1"),
+            host_port=int(args.get("host_port", 9000)),
+            container_port=int(args.get("container_port", 8000)),
+            env=args.get("env"),
+            volumes=args.get("volumes"),
+            extra_args=args.get("extra_args"),
+            tool_name=args.get("tool_name", "DockerLLMChat"),
+            tool_prefix=args.get("tool_prefix"),
+            mcp_tool_name=args.get("mcp_tool_name", "docker_llm_chat"),
+            health_path=args.get("health_path", "/health"),
+            timeout_seconds=int(args.get("timeout_seconds", 120)),
+            poll_interval=float(args.get("poll_interval", 2.0)),
+            reuse_container=bool(args.get("reuse_container", True)),
+            server_url=args.get("server_url"),
+            description=args.get("description"),
+        )
+    except ProvisionError as exc:
+        return {"ok": False, "error": str(exc)}
+
+    load_error: Optional[str] = None
+    if tooluniverse is not None:
+        try:
+            # Reload tool registry so the new MCP configs are visible immediately.
+            tooluniverse.load_tools()
+        except Exception as exc:  # pragma: no cover - defensive
+            load_error = str(exc)
+
+    payload = {
+        "ok": True,
+        "container_name": result.container_name,
+        "server_url": result.server_url,
+        "config_path": str(result.config_path),
+        "tool_name": result.tool_name,
+    }
+    if load_error:
+        payload["load_warning"] = load_error
+
+    return payload
+
+
+__all__ = ["compose"]
diff --git a/src/tooluniverse/compose_scripts/harvest_auto_registrar.py b/src/tooluniverse/compose_scripts/harvest_auto_registrar.py
new file mode 100644
index 00000000..6e195564
--- /dev/null
+++ b/src/tooluniverse/compose_scripts/harvest_auto_registrar.py
@@ -0,0 +1,182 @@
+"""
+HarvestAutoRegistrar compose script.
+
+Coordinates the GenericHarvestTool, HarvestCandidateTesterTool, and
+VerifiedSourceRegisterTool to discover, validate, and register new
+DynamicREST tools. Designed to keep all orchestration logic inside the
+ComposeTool framework so agents can call a single tool to go from query
+to a runnable verified-source entry.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import uuid
+from typing import Any, Dict, List, Optional, Tuple
+
+
+class _ComposeError(Exception):
+    """Internal marker so we can bubble failures cleanly."""
+
+
+def _as_dict(result: Any) -> Dict[str, Any]:
+    if isinstance(result, dict):
+        return result
+    if isinstance(result, str):
+        try:
+            decoded = json.loads(result)
+            if isinstance(decoded, dict):
+                return decoded
+        except json.JSONDecodeError:
+            pass
+    return {"raw_result": result}
+
+
+def _emit(emit_event, event_type: str, data: Dict[str, Any]) -> None:
+    if emit_event:
+        emit_event(event_type, data)
+
+
+def _generate_tool_name(base: Optional[str], suffix: str | None = None) -> str:
+    if base:
+        slug = re.sub(r"[^a-zA-Z0-9]+", "_", base).strip("_").lower()
+        if not slug:
+            slug = "vsd_auto"
+    else:
+        slug = "vsd_auto"
+    suffix = suffix or uuid.uuid4().hex[:6]
+    return f"{slug}_{suffix}"
+
+
+def _select_candidates(arguments: Dict[str, Any]) -> Tuple[Optional[str], Dict[str, Any]]:
+    # Backwards compatibility: allow top-level query/limit keys
+    harvest_overrides = dict(arguments.get("harvest", {}) or {})
+    query = (arguments.get("query") or harvest_overrides.get("query") or "").strip()
+
+    if query:
+        harvest_overrides.setdefault("query", query)
+
+    limit = arguments.get("limit")
+    if limit is None:
+        limit = harvest_overrides.get("limit", 5)
+    harvest_overrides["limit"] = max(1, min(int(limit or 5), 50))
+
+    return query, harvest_overrides
+
+
+def compose(
+    arguments: Dict[str, Any],
+    tooluniverse,
+    call_tool,
+    stream_callback=None,
+    emit_event=None,
+    memory_manager=None,
+) -> Dict[str, Any]:
+    """
+    Discover, test, and register a new verified-source tool from harvest results.
+    """
+
+    args = dict(arguments or {})
+    manual_candidates = args.get("candidates")
+
+    # Prepare harvest step arguments (even if we skip calling the tool)
+    _, harvest_args = _select_candidates(args)
+
+    results: Dict[str, Any] = {
+        "ok": False,
+        "attempts": [],
+        "registered_tool_name": None,
+        "run_result": None,
+    }
+
+    # Step 1: gather candidates
+    if manual_candidates:
+        candidates = list(manual_candidates)
+        harvest_summary = {
+            "ok": True,
+            "source": "manual",
+            "count": len(candidates),
+            "query": harvest_args.get("query", ""),
+        }
+    else:
+        harvest_response = call_tool("GenericHarvestTool", harvest_args)
+        harvest_summary = _as_dict(harvest_response)
+        candidates = list(harvest_summary.get("candidates") or [])
+
+    results["harvest"] = harvest_summary
+    _emit(emit_event, "harvest_completed", harvest_summary)
+
+    if not candidates:
+        results["error"] = "No candidates returned from harvest."
+        return results
+
+    skip_tests = bool(args.get("skip_tests"))
+    force_register = bool(args.get("force_register") or args.get("force"))
+    tester_overrides = dict(args.get("tester", {}) or {})
+    register_overrides = dict(args.get("register", {}) or {})
+    desired_tool_name = args.get("tool_name")
+    auto_run = bool(args.get("auto_run"))
+    tool_arguments = args.get("tool_arguments") or {}
+
+    for index, candidate in enumerate(candidates):
+        attempt_record: Dict[str, Any] = {"candidate_index": index, "candidate": candidate}
+        tester_result = {"skipped": skip_tests}
+
+        if not skip_tests:
+            tester_payload = dict(tester_overrides)
+            tester_payload.setdefault("candidate", candidate)
+            tester_response = call_tool("HarvestCandidateTesterTool", tester_payload)
+            tester_result = _as_dict(tester_response)
+            attempt_record["tester"] = tester_result
+            if not tester_result.get("ok") and not force_register:
+                attempt_record["status"] = "tester_failed"
+                results["attempts"].append(attempt_record)
+                continue
+
+        register_payload = dict(register_overrides)
+        register_payload.setdefault("candidate", candidate)
+        register_payload.setdefault("force", force_register)
+
+        tool_name = register_payload.get("tool_name") or desired_tool_name
+        if not tool_name:
+            host = (candidate.get("host") or candidate.get("name") or "").strip()
+            tool_name = _generate_tool_name(host, suffix=f"cand{index+1}")
+        register_payload["tool_name"] = tool_name
+
+        register_response = call_tool("VerifiedSourceRegisterTool", register_payload)
+        register_result = _as_dict(register_response)
+        attempt_record["register"] = register_result
+
+        if not register_result.get("registered"):
+            attempt_record["status"] = "registration_failed"
+            results["attempts"].append(attempt_record)
+            continue
+
+        # Registration succeeded
+        registered_name = register_result.get("name") or tool_name
+        results["ok"] = True
+        results["registered_tool_name"] = registered_name
+        results["registration"] = register_result
+        attempt_record["status"] = "registered"
+        results["attempts"].append(attempt_record)
+        _emit(emit_event, "registration_success", register_result)
+
+        if auto_run:
+            try:
+                run_payload = {
+                    "name": registered_name,
+                    "arguments": tool_arguments if isinstance(tool_arguments, dict) else {},
+                }
+                run_result = tooluniverse.run_one_function(run_payload)
+                results["run_result"] = run_result
+            except Exception as exc:  # pragma: no cover - defensive
+                results["run_error"] = str(exc)
+        return results
+
+    results["error"] = "All candidates failed testing or registration."
+    _emit(emit_event, "registration_failed", {"attempts": results["attempts"]})
+    return results
+
+
+__all__ = ["compose"]
diff --git a/src/tooluniverse/data/compose_tools.json b/src/tooluniverse/data/compose_tools.json
index 5af4f882..169f983e 100644
--- a/src/tooluniverse/data/compose_tools.json
+++ b/src/tooluniverse/data/compose_tools.json
@@ -314,5 +314,183 @@
     ],
     "composition_file": "tool_graph_generation.py",
     "composition_function": "compose"
+  },
+  {
+    "type": "ComposeTool",
+    "name": "HarvestAutoRegistrar",
+    "description": "Discover, validate, and register new verified-source tools by chaining GenericHarvestTool, HarvestCandidateTesterTool, and VerifiedSourceRegisterTool. Optionally runs the newly registered tool.",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "Search query passed to GenericHarvestTool when no explicit candidates are provided."
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of harvest candidates to inspect.",
+          "default": 5,
+          "minimum": 1,
+          "maximum": 50
+        },
+        "harvest": {
+          "type": "object",
+          "description": "Additional arguments forwarded to GenericHarvestTool (e.g., urls, description, domain filters)."
+        },
+        "candidates": {
+          "type": "array",
+          "description": "Optional pre-harvested candidates to evaluate instead of calling GenericHarvestTool.",
+          "items": {
+            "type": "object"
+          }
+        },
+        "tester": {
+          "type": "object",
+          "description": "Overrides passed to HarvestCandidateTesterTool (e.g., default headers or params)."
+        },
+        "register": {
+          "type": "object",
+          "description": "Overrides passed to VerifiedSourceRegisterTool (e.g., default headers, params, tool_type)."
+        },
+        "tool_name": {
+          "type": "string",
+          "description": "Desired name for the registered tool. Automatically generated if omitted."
+        },
+        "force_register": {
+          "type": "boolean",
+          "description": "Register the tool even if validation fails (mirrors the force flag on VerifiedSourceRegisterTool).",
+          "default": false
+        },
+        "force": {
+          "type": "boolean",
+          "description": "Alias for force_register.",
+          "default": false
+        },
+        "skip_tests": {
+          "type": "boolean",
+          "description": "Skip HarvestCandidateTesterTool and proceed straight to registration.",
+          "default": false
+        },
+        "auto_run": {
+          "type": "boolean",
+          "description": "Invoke the registered tool immediately after successful registration.",
+          "default": false
+        },
+        "tool_arguments": {
+          "type": "object",
+          "description": "Arguments forwarded to the registered tool when auto_run is true.",
+          "default": {}
+        }
+      }
+    },
+    "auto_load_dependencies": true,
+    "fail_on_missing_tools": false,
+    "required_tools": [
+      "GenericHarvestTool",
+      "HarvestCandidateTesterTool",
+      "VerifiedSourceRegisterTool"
+    ],
+    "composition_file": "harvest_auto_registrar.py",
+    "composition_function": "compose"
+  },
+  {
+    "type": "ComposeTool",
+    "name": "DockerLLMProvisioner",
+    "description": "Launch a Docker-hosted LLM MCP server, wait for readiness, and register client/auto-loader configurations so the model appears inside ToolUniverse.",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "docker_image": {
+          "type": "string",
+          "description": "Docker image to run (default ghcr.io/tooluniverse/docker-llm-mcp:latest)."
+        },
+        "container_name": {
+          "type": "string",
+          "description": "Optional container name. Generated automatically when omitted."
+        },
+        "docker_cli": {
+          "type": "string",
+          "description": "Docker executable to invoke (default docker).",
+          "default": "docker"
+        },
+        "host": {
+          "type": "string",
+          "description": "Host interface to bind (default 127.0.0.1).",
+          "default": "127.0.0.1"
+        },
+        "host_port": {
+          "type": "integer",
+          "description": "Port exposed on the host for MCP traffic (default 9000).",
+          "default": 9000
+        },
+        "container_port": {
+          "type": "integer",
+          "description": "Internal container port that serves MCP (default 8000).",
+          "default": 8000
+        },
+        "env": {
+          "type": "object",
+          "description": "Environment variables to pass to docker run."
+        },
+        "volumes": {
+          "type": "array",
+          "description": "Volume mappings for docker run (e.g. ['/host/path:/container/path']).",
+          "items": { "type": "string" }
+        },
+        "extra_args": {
+          "type": "array",
+          "description": "Additional arguments appended to docker run.",
+          "items": { "type": "string" }
+        },
+        "tool_name": {
+          "type": "string",
+          "description": "Tool name registered within ToolUniverse (default DockerLLMChat).",
+          "default": "DockerLLMChat"
+        },
+        "tool_prefix": {
+          "type": "string",
+          "description": "Prefix applied when auto-registering tools from the MCP server."
+        },
+        "mcp_tool_name": {
+          "type": "string",
+          "description": "Underlying MCP tool name exposed by the server (default docker_llm_chat).",
+          "default": "docker_llm_chat"
+        },
+        "health_path": {
+          "type": "string",
+          "description": "Health-check path polled for readiness (default /health).",
+          "default": "/health"
+        },
+        "timeout_seconds": {
+          "type": "integer",
+          "description": "Seconds to wait for the container to become healthy.",
+          "default": 120
+        },
+        "poll_interval": {
+          "type": "number",
+          "description": "Seconds between health checks during provisioning.",
+          "default": 2.0
+        },
+        "reuse_container": {
+          "type": "boolean",
+          "description": "Reuse and restart an existing container if present (default true).",
+          "default": true
+        },
+        "server_url": {
+          "type": "string",
+          "description": "Override MCP base URL if different from http://host:host_port."
+        },
+        "description": {
+          "type": "string",
+          "description": "Custom description applied to the registered tool."
+        }
+      },
+      "additionalProperties": false
+    },
+    "auto_load_dependencies": false,
+    "fail_on_missing_tools": false,
+    "required_tools": [],
+    "composition_file": "docker_llm_provisioner.py",
+    "composition_function": "compose"
   }
 ]
diff --git a/src/tooluniverse/execute_function.py b/src/tooluniverse/execute_function.py
index 3469dd92..0847056a 100755
--- a/src/tooluniverse/execute_function.py
+++ b/src/tooluniverse/execute_function.py
@@ -1421,21 +1421,31 @@ def _scan_predefined_files(self):
             all_tools.extend(tools_in_category)
             all_tool_names.update([tool["name"] for tool in tools_in_category])
 
-        # Also include remote tools
+        # Also include remote tools from package data and user overrides
+        def _include_remote_dir(base_dir):
+            if not os.path.isdir(base_dir):
+                return
+            for fname in os.listdir(base_dir):
+                if not fname.lower().endswith(".json"):
+                    continue
+                fpath = os.path.join(base_dir, fname)
+                remote_tools = self._read_tools_from_file(fpath)
+                if remote_tools:
+                    all_tools.extend(remote_tools)
+                    all_tool_names.update([tool["name"] for tool in remote_tools])
+
         try:
             remote_dir = os.path.join(current_dir, "data", "remote_tools")
-            if os.path.isdir(remote_dir):
-                for fname in os.listdir(remote_dir):
-                    if not fname.lower().endswith(".json"):
-                        continue
-                    fpath = os.path.join(remote_dir, fname)
-                    remote_tools = self._read_tools_from_file(fpath)
-                    if remote_tools:
-                        all_tools.extend(remote_tools)
-                        all_tool_names.update([tool["name"] for tool in remote_tools])
+            _include_remote_dir(remote_dir)
         except Exception as e:
             warning(f"Warning: Failed to scan remote tools directory: {e}")
 
+        try:
+            user_remote_dir = os.path.join(str(Path.home()), ".tooluniverse", "remote_tools")
+            _include_remote_dir(user_remote_dir)
+        except Exception as e:
+            warning(f"Warning: Failed to scan user remote tools directory: {e}")
+
         return all_tools, all_tool_names
 
     def _scan_all_json_files(self):
@@ -1463,6 +1473,13 @@ def _scan_all_json_files(self):
                 if file.lower().endswith(".json"):
                     json_files.append(os.path.join(root, file))
 
+        # Include user-level remote tool configurations
+        user_remote_dir = os.path.join(str(Path.home()), ".tooluniverse", "remote_tools")
+        if os.path.isdir(user_remote_dir):
+            for file in os.listdir(user_remote_dir):
+                if file.lower().endswith(".json"):
+                    json_files.append(os.path.join(user_remote_dir, file))
+
         self.logger.debug(f"Found {len(json_files)} JSON files to scan")
 
         # Read tools from each JSON file using the common method
diff --git a/src/tooluniverse/remote/docker_llm/provision.py b/src/tooluniverse/remote/docker_llm/provision.py
new file mode 100644
index 00000000..64f7ba25
--- /dev/null
+++ b/src/tooluniverse/remote/docker_llm/provision.py
@@ -0,0 +1,256 @@
+"""
+Docker-hosted LLM provisioning helpers for ToolUniverse.
+
+These helpers start (or reuse) a Docker container that exposes an MCP-compatible
+LLM service, wait for it to become healthy, and register client/auto-loader
+configurations in the user's ToolUniverse remote tool directory.
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional, Sequence
+
+import requests
+
+DEFAULT_IMAGE = "ghcr.io/tooluniverse/docker-llm-mcp:latest"
+DEFAULT_CONTAINER_BASENAME = "tooluniverse-llm"
+DEFAULT_INTERNAL_PORT = 8000
+DEFAULT_TOOL_NAME = "DockerLLMChat"
+DEFAULT_MCP_TOOL_NAME = "docker_llm_chat"
+DEFAULT_HEALTH_PATH = "/health"
+
+
+class ProvisionError(RuntimeError):
+    """Raised when Docker provisioning fails."""
+
+
+@dataclass
+class ProvisionResult:
+    container_name: str
+    server_url: str
+    config_path: Path
+    tool_name: str
+
+
+def _ensure_remote_dir() -> Path:
+    target = Path.home() / ".tooluniverse" / "remote_tools"
+    target.mkdir(parents=True, exist_ok=True)
+    return target
+
+
+def _run_docker(
+    args: Sequence[str], *, docker_cli: str = "docker", check: bool = True
+) -> subprocess.CompletedProcess:
+    command = [docker_cli, *args]
+    return subprocess.run(
+        command,
+        check=check,
+        capture_output=True,
+        text=True,
+    )
+
+
+def _container_exists(container_name: str, docker_cli: str) -> bool:
+    proc = _run_docker(
+        ["ps", "-a", "--filter", f"name=^{container_name}$", "--format", "{{.Names}}"],
+        docker_cli=docker_cli,
+        check=True,
+    )
+    return any(line.strip() == container_name for line in proc.stdout.splitlines())
+
+
+def _container_running(container_name: str, docker_cli: str) -> bool:
+    proc = _run_docker(
+        ["ps", "--filter", f"name=^{container_name}$", "--format", "{{.Names}}"],
+        docker_cli=docker_cli,
+        check=True,
+    )
+    return any(line.strip() == container_name for line in proc.stdout.splitlines())
+
+
+def _start_existing(container_name: str, docker_cli: str) -> None:
+    _run_docker(["start", container_name], docker_cli=docker_cli, check=True)
+
+
+def _run_new_container(
+    *,
+    docker_cli: str,
+    image: str,
+    container_name: str,
+    host: str,
+    host_port: int,
+    container_port: int,
+    env: Optional[Dict[str, str]],
+    volumes: Optional[List[str]],
+    extra_args: Optional[List[str]],
+) -> None:
+    cmd: List[str] = [
+        "run",
+        "-d",
+        "--name",
+        container_name,
+        "-p",
+        f"{host}:{host_port}:{container_port}",
+    ]
+
+    for key, value in (env or {}).items():
+        cmd.extend(["-e", f"{key}={value}"])
+
+    for volume in volumes or []:
+        cmd.extend(["-v", volume])
+
+    if extra_args:
+        cmd.extend(extra_args)
+
+    cmd.append(image)
+    _run_docker(cmd, docker_cli=docker_cli, check=True)
+
+
+def _wait_for_health(
+    url: str,
+    *,
+    timeout: int,
+    interval: float,
+) -> None:
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            response = requests.get(url, timeout=5)
+            if 200 <= response.status_code < 400:
+                return
+        except requests.RequestException:
+            pass
+        time.sleep(interval)
+    raise ProvisionError(f"Container health check did not succeed at {url}")
+
+
+def _write_remote_config(
+    config: List[Dict[str, object]], *, tool_name: str
+) -> Path:
+    target_dir = _ensure_remote_dir()
+    path = target_dir / f"{tool_name}.json"
+    with path.open("w", encoding="utf-8") as handle:
+        json.dump(config, handle, indent=2)
+    return path
+
+
+def provision_docker_llm(
+    image: str = DEFAULT_IMAGE,
+    *,
+    container_name: Optional[str] = None,
+    docker_cli: str = "docker",
+    host: str = "127.0.0.1",
+    host_port: int = 9000,
+    container_port: int = DEFAULT_INTERNAL_PORT,
+    env: Optional[Dict[str, str]] = None,
+    volumes: Optional[List[str]] = None,
+    extra_args: Optional[List[str]] = None,
+    tool_name: str = DEFAULT_TOOL_NAME,
+    tool_prefix: Optional[str] = None,
+    mcp_tool_name: str = DEFAULT_MCP_TOOL_NAME,
+    health_path: str = DEFAULT_HEALTH_PATH,
+    timeout_seconds: int = 120,
+    poll_interval: float = 2.0,
+    reuse_container: bool = True,
+    server_url: Optional[str] = None,
+    description: Optional[str] = None,
+) -> ProvisionResult:
+    """
+    Ensure a Docker-hosted LLM is running and registered with ToolUniverse.
+    """
+    container_name = container_name or f"{DEFAULT_CONTAINER_BASENAME}-{int(time.time())}"
+    tool_prefix = tool_prefix or (tool_name.lower() + "_")
+    if not tool_prefix.endswith("_"):
+        tool_prefix += "_"
+
+    # Verify Docker availability
+    try:
+        _run_docker(["version"], docker_cli=docker_cli, check=True)
+    except FileNotFoundError as exc:
+        raise ProvisionError("Docker CLI not found. Please install Docker Desktop.") from exc
+    except subprocess.CalledProcessError as exc:
+        raise ProvisionError(f"Docker is not available: {exc.stderr.strip()}") from exc
+
+    exists = _container_exists(container_name, docker_cli)
+    running = _container_running(container_name, docker_cli) if exists else False
+
+    if exists and not running and reuse_container:
+        _start_existing(container_name, docker_cli)
+        running = True
+
+    if not exists or (exists and not running and not reuse_container):
+        if exists and not reuse_container:
+            _run_docker(["rm", "-f", container_name], docker_cli=docker_cli, check=True)
+        _run_new_container(
+            docker_cli=docker_cli,
+            image=image,
+            container_name=container_name,
+            host=host,
+            host_port=host_port,
+            container_port=container_port,
+            env=env,
+            volumes=volumes,
+            extra_args=extra_args,
+        )
+
+    base_url = server_url or f"http://{host}:{host_port}"
+    health_url = base_url.rstrip("/") + health_path
+    _wait_for_health(health_url, timeout=timeout_seconds, interval=poll_interval)
+
+    config_description = (
+        description
+        or "Interact with a locally hosted Docker LLM via MCP-compatible interface."
+    )
+
+    remote_config = [
+        {
+            "name": f"{tool_prefix.rstrip('_')}_auto_loader",
+            "description": f"Automatically discover tools from the Docker-hosted LLM server at {base_url}.",
+            "type": "MCPAutoLoaderTool",
+            "server_url": f"{base_url.rstrip('/')}/mcp",
+            "tool_prefix": tool_prefix,
+        },
+        {
+            "name": tool_name,
+            "description": config_description,
+            "type": "MCPClientTool",
+            "server_url": base_url,
+            "transport": "http",
+            "mcp_tool_name": mcp_tool_name,
+            "parameter": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "Prompt text to send to the Docker-hosted language model.",
+                    },
+                    "temperature": {
+                        "type": "number",
+                        "description": "Sampling temperature for the model.",
+                        "default": 0.7,
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "description": "Maximum tokens to generate in the response.",
+                        "default": 512,
+                    },
+                },
+                "required": ["prompt"],
+                "additionalProperties": True,
+            },
+        },
+    ]
+
+    config_path = _write_remote_config(remote_config, tool_name=tool_name)
+
+    return ProvisionResult(
+        container_name=container_name,
+        server_url=base_url.rstrip("/"),
+        config_path=config_path,
+        tool_name=tool_name,
+    )
diff --git a/src/tooluniverse/remote_tool.py b/src/tooluniverse/remote_tool.py
index 9c656f6e..17f1219b 100644
--- a/src/tooluniverse/remote_tool.py
+++ b/src/tooluniverse/remote_tool.py
@@ -51,6 +51,14 @@ def run(self, arguments=None):
         server_type = self.remote_info.get("server_type", "Unknown")
         original_type = self.remote_info.get("original_type", "Unknown")
 
+        guidance = (
+            "This tool is registered as a remote MCP endpoint and cannot run locally. "
+            "If you want to activate it, connect ToolUniverse to the hosting server (e.g. "
+            "call `ToolUniverse.load_mcp_tools([...])` with the server URL) or provision a local "
+            "container via the `DockerLLMProvisioner` compose tool / `scripts/provision_docker_llm.py`. "
+            "Custom MCP configs placed in ~/.tooluniverse/remote_tools/*.json are picked up automatically."
+        )
+
         return {
             "error": "Remote tool not available for local execution",
             "tool_name": (
@@ -61,7 +69,7 @@ def run(self, arguments=None):
             "tool_type": "RemoteTool",
             "original_type": original_type,
             "server_type": server_type,
-            "message": "This tool is hosted on an external MCP/SMCP server and cannot be executed locally. Please use the external server directly.",
+            "message": guidance,
             "remote_info": self.remote_info,
         }
 
diff --git a/src/tooluniverse/tools/DockerLLMProvisioner.py b/src/tooluniverse/tools/DockerLLMProvisioner.py
new file mode 100644
index 00000000..df8d8eda
--- /dev/null
+++ b/src/tooluniverse/tools/DockerLLMProvisioner.py
@@ -0,0 +1,69 @@
+"""
+DockerLLMProvisioner
+
+Compose wrapper that provisions a Docker-hosted LLM MCP server and registers
+its ToolUniverse configurations.
+"""
+
+from typing import Any, Dict, Optional, Callable
+from ._shared_client import get_shared_client
+
+
+def DockerLLMProvisioner(
+    *,
+    docker_image: Optional[str] = None,
+    container_name: Optional[str] = None,
+    docker_cli: str = "docker",
+    host: str = "127.0.0.1",
+    host_port: int = 9000,
+    container_port: int = 8000,
+    env: Optional[Dict[str, str]] = None,
+    volumes: Optional[list[str]] = None,
+    extra_args: Optional[list[str]] = None,
+    tool_name: str = "DockerLLMChat",
+    tool_prefix: Optional[str] = None,
+    mcp_tool_name: str = "docker_llm_chat",
+    health_path: str = "/health",
+    timeout_seconds: int = 120,
+    poll_interval: float = 2.0,
+    reuse_container: bool = True,
+    server_url: Optional[str] = None,
+    description: Optional[str] = None,
+    stream_callback: Optional[Callable[[str], None]] = None,
+    use_cache: bool = False,
+    validate: bool = True,
+) -> Any:
+    """
+    Provision a Docker-hosted LLM and register MCP configs with ToolUniverse.
+    """
+
+    arguments: Dict[str, Any] = {
+        "docker_image": docker_image,
+        "container_name": container_name,
+        "docker_cli": docker_cli,
+        "host": host,
+        "host_port": host_port,
+        "container_port": container_port,
+        "env": env,
+        "volumes": volumes,
+        "extra_args": extra_args,
+        "tool_name": tool_name,
+        "tool_prefix": tool_prefix,
+        "mcp_tool_name": mcp_tool_name,
+        "health_path": health_path,
+        "timeout_seconds": timeout_seconds,
+        "poll_interval": poll_interval,
+        "reuse_container": reuse_container,
+        "server_url": server_url,
+        "description": description,
+    }
+
+    return get_shared_client().run_one_function(
+        {"name": "DockerLLMProvisioner", "arguments": arguments},
+        stream_callback=stream_callback,
+        use_cache=use_cache,
+        validate=validate,
+    )
+
+
+__all__ = ["DockerLLMProvisioner"]
diff --git a/src/tooluniverse/tools/HarvestAutoRegistrar.py b/src/tooluniverse/tools/HarvestAutoRegistrar.py
new file mode 100644
index 00000000..706b5a74
--- /dev/null
+++ b/src/tooluniverse/tools/HarvestAutoRegistrar.py
@@ -0,0 +1,91 @@
+"""
+HarvestAutoRegistrar
+
+Compose workflow that harvests candidate APIs, validates them, and registers a
+new verified-source tool. Optionally executes the registered tool immediately.
+"""
+
+from typing import Any, Dict, List, Optional, Callable
+from ._shared_client import get_shared_client
+
+
+def HarvestAutoRegistrar(
+    query: Optional[str] = None,
+    limit: int = 5,
+    *,
+    harvest: Optional[Dict[str, Any]] = None,
+    candidates: Optional[List[Dict[str, Any]]] = None,
+    tester: Optional[Dict[str, Any]] = None,
+    register: Optional[Dict[str, Any]] = None,
+    tool_name: Optional[str] = None,
+    force_register: bool = False,
+    force: bool = False,
+    skip_tests: bool = False,
+    auto_run: bool = False,
+    tool_arguments: Optional[Dict[str, Any]] = None,
+    stream_callback: Optional[Callable[[str], None]] = None,
+    use_cache: bool = False,
+    validate: bool = True,
+) -> Any:
+    """
+    Discover, validate, and register a verified-source tool in a single call.
+
+    Parameters
+    ----------
+    query : str, optional
+        Harvest query when candidates are not supplied directly.
+    limit : int, default 5
+        Maximum number of harvest candidates to inspect.
+    harvest : dict, optional
+        Additional arguments forwarded to GenericHarvestTool.
+    candidates : list, optional
+        Precomputed candidate objects. Skips calling GenericHarvestTool when provided.
+    tester : dict, optional
+        Overrides forwarded to HarvestCandidateTesterTool.
+    register : dict, optional
+        Overrides forwarded to VerifiedSourceRegisterTool.
+    tool_name : str, optional
+        Desired name for the registered tool. Auto-generated if omitted.
+    force_register : bool, default False
+        Register even when validation fails (mirrors VerifiedSourceRegisterTool.force).
+    force : bool, default False
+        Alias for force_register for convenience.
+    skip_tests : bool, default False
+        Bypass HarvestCandidateTesterTool and proceed straight to registration.
+    auto_run : bool, default False
+        Execute the registered tool immediately after a successful registration.
+    tool_arguments : dict, optional
+        Arguments forwarded to the registered tool when auto_run is True.
+    stream_callback : Callable, optional
+        Streaming callback handled by ToolUniverse shared client.
+    use_cache : bool, default False
+        Enable client-side caching.
+    validate : bool, default True
+        Validate payload before sending to ToolUniverse.
+    """
+    payload = {
+        "name": "HarvestAutoRegistrar",
+        "arguments": {
+            "query": query,
+            "limit": limit,
+            "harvest": harvest,
+            "candidates": candidates,
+            "tester": tester,
+            "register": register,
+            "tool_name": tool_name,
+            "force_register": force_register or force,
+            "skip_tests": skip_tests,
+            "auto_run": auto_run,
+            "tool_arguments": tool_arguments or {},
+        },
+    }
+
+    return get_shared_client().run_one_function(
+        payload,
+        stream_callback=stream_callback,
+        use_cache=use_cache,
+        validate=validate,
+    )
+
+
+__all__ = ["HarvestAutoRegistrar"]
diff --git a/src/tooluniverse/tools/__init__.py b/src/tooluniverse/tools/__init__.py
index 5d3e8b38..4d6e1e75 100644
--- a/src/tooluniverse/tools/__init__.py
+++ b/src/tooluniverse/tools/__init__.py
@@ -458,6 +458,8 @@
 from .GO_search_terms import GO_search_terms
 from .GWAS_search_associations_by_gene import GWAS_search_associations_by_gene
 from .HAL_search_archive import HAL_search_archive
+from .HarvestAutoRegistrar import HarvestAutoRegistrar
+from .DockerLLMProvisioner import DockerLLMProvisioner
 from .HPA_get_biological_processes_by_gene import HPA_get_biological_processes_by_gene
 from .HPA_get_cancer_prognostics_by_gene import HPA_get_cancer_prognostics_by_gene
 from .HPA_get_comparative_expression_by_gene_and_cellline import (
@@ -1127,6 +1129,7 @@
     "Crossref_search_works",
     "DBLP_search_publications",
     "DOAJ_search_articles",
+    "DockerLLMProvisioner",
     "DailyMed_get_spl_by_setid",
     "DailyMed_search_spls",
     "DataAnalysisValidityReviewer",
@@ -1323,6 +1326,7 @@
     "GO_search_terms",
     "GWAS_search_associations_by_gene",
     "HAL_search_archive",
+    "HarvestAutoRegistrar",
     "HPA_get_biological_processes_by_gene",
     "HPA_get_cancer_prognostics_by_gene",
     "HPA_get_comparative_expression_by_gene_and_cellline",
diff --git a/tests/test_docker_llm_provision.py b/tests/test_docker_llm_provision.py
new file mode 100644
index 00000000..0e313ac1
--- /dev/null
+++ b/tests/test_docker_llm_provision.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import tempfile
+from pathlib import Path
+from unittest import mock
+
+from tooluniverse.remote.docker_llm.provision import (
+    ProvisionResult,
+    provision_docker_llm,
+)
+from tooluniverse.compose_scripts.docker_llm_provisioner import compose
+
+
+def test_provision_creates_remote_config_and_runs_docker():
+    temp_home = Path(tempfile.mkdtemp())
+    commands = []
+
+    def fake_run(cmd, check, capture_output, text):
+        commands.append(cmd)
+        if cmd[0] == "docker" and cmd[1] == "version":
+            return subprocess.CompletedProcess(cmd, 0, "", "")
+        if cmd[0] == "docker" and cmd[1] == "ps":
+            return subprocess.CompletedProcess(cmd, 0, "", "")
+        if cmd[0] == "docker" and cmd[1] == "run":
+            return subprocess.CompletedProcess(cmd, 0, "", "")
+        raise AssertionError(f"Unexpected docker command: {cmd}")
+
+    response = mock.Mock()
+    response.status_code = 200
+
+    with mock.patch("tooluniverse.remote.docker_llm.provision.Path.home", return_value=temp_home):
+        with mock.patch("tooluniverse.remote.docker_llm.provision.subprocess.run", side_effect=fake_run):
+            with mock.patch("tooluniverse.remote.docker_llm.provision.requests.get", return_value=response):
+                with mock.patch("tooluniverse.remote.docker_llm.provision.time.sleep"):
+                    result = provision_docker_llm(
+                        image="example/image:latest",
+                        container_name="test-container",
+                        host="127.0.0.1",
+                        host_port=9100,
+                        container_port=8000,
+                        timeout_seconds=5,
+                        poll_interval=0.01,
+                    )
+
+    assert result.container_name == "test-container"
+    assert result.config_path.exists()
+    stored = json.loads(result.config_path.read_text(encoding="utf-8"))
+    assert isinstance(stored, list)
+    assert stored[1]["name"] == "DockerLLMChat"
+    assert ["docker", "run", "-d", "--name", "test-container", "-p", "127.0.0.1:9100:8000", "example/image:latest"] in commands
+
+
+def test_compose_returns_payload_and_refreshes_tooluniverse(tmp_path):
+    config_path = tmp_path / "DockerLLMChat.json"
+
+    def fake_provision(**kwargs):
+        config_path.write_text("[]", encoding="utf-8")
+        return ProvisionResult(
+            container_name="compose-container",
+            server_url="http://127.0.0.1:9000",
+            config_path=config_path,
+            tool_name="DockerLLMChat",
+        )
+
+    class DummyToolUniverse:
+        def __init__(self):
+            self.refreshed = False
+
+        def load_tools(self):
+            self.refreshed = True
+
+    dummy_tu = DummyToolUniverse()
+
+    with mock.patch("tooluniverse.compose_scripts.docker_llm_provisioner.provision_docker_llm", side_effect=fake_provision):
+        result = compose({"host_port": 9005}, dummy_tu, call_tool=None)
+
+    assert result["ok"] is True
+    assert result["container_name"] == "compose-container"
+    assert dummy_tu.refreshed is True
diff --git a/tests/test_harvest_auto_registrar.py b/tests/test_harvest_auto_registrar.py
new file mode 100644
index 00000000..d52a4b46
--- /dev/null
+++ b/tests/test_harvest_auto_registrar.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+from typing import Any, Dict
+
+from tooluniverse.compose_scripts.harvest_auto_registrar import compose
+
+
+class FakeToolUniverse:
+    def __init__(self) -> None:
+        self.invocations: list[Dict[str, Any]] = []
+
+    def run_one_function(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        self.invocations.append(payload)
+        return {"ok": True, "payload": payload}
+
+
+def test_compose_registers_and_runs_single_candidate():
+    tool_universe = FakeToolUniverse()
+    calls = {}
+
+    def call_tool(name: str, payload: Dict[str, Any]):
+        calls.setdefault(name, []).append(payload)
+        if name == "HarvestCandidateTesterTool":
+            return {"ok": True, "test": {"status": 200}}
+        if name == "VerifiedSourceRegisterTool":
+            assert payload["tool_name"] == "my_registered_tool"
+            return {"registered": True, "name": payload["tool_name"], "config": {"endpoint": "https://example.com"}}
+        raise AssertionError(f"Unexpected tool call: {name}")
+
+    candidate = {"name": "Example API", "host": "example.com", "endpoint": "https://example.com/api"}
+
+    result = compose(
+        {
+            "candidates": [candidate],
+            "tool_name": "my_registered_tool",
+            "auto_run": True,
+            "tool_arguments": {"limit": 1},
+        },
+        tool_universe,
+        call_tool,
+    )
+
+    assert result["ok"] is True
+    assert result["registered_tool_name"] == "my_registered_tool"
+    assert result["registration"]["registered"] is True
+    assert tool_universe.invocations[0]["name"] == "my_registered_tool"
+    assert tool_universe.invocations[0]["arguments"] == {"limit": 1}
+    assert calls["HarvestCandidateTesterTool"][0]["candidate"] == candidate
+
+
+def test_compose_generates_name_and_skips_failed_candidate():
+    tool_universe = FakeToolUniverse()
+    register_tool_names = []
+
+    def call_tool(name: str, payload: Dict[str, Any]):
+        if name == "HarvestCandidateTesterTool":
+            ok = payload["candidate"]["host"] == "second.example.com"
+            return {"ok": ok, "test": {"status": 200 if ok else 500}}
+        if name == "VerifiedSourceRegisterTool":
+            register_tool_names.append(payload["tool_name"])
+            return {"registered": True, "name": payload["tool_name"], "config": {}}
+        if name == "GenericHarvestTool":
+            return {"ok": True, "candidates": []}
+        raise AssertionError(f"Unexpected tool call: {name}")
+
+    first = {"name": "Bad API", "host": "bad.example.com", "endpoint": "https://bad.example.com"}
+    second = {"name": "Good API", "host": "second.example.com", "endpoint": "https://second.example.com"}
+
+    result = compose(
+        {
+            "candidates": [first, second],
+            "auto_run": False,
+        },
+        tool_universe,
+        call_tool,
+    )
+
+    assert result["ok"] is True
+    assert result["registered_tool_name"] == register_tool_names[0]
+    assert register_tool_names[0].startswith("second_example_com_")
+    assert result["attempts"][0]["status"] == "tester_failed"
+    assert result["attempts"][1]["status"] == "registered"
+    assert tool_universe.invocations == []

From 30298f585e11119714f60928b89895b0516c141d Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Wed, 29 Oct 2025 21:07:20 -0700
Subject: [PATCH 6/8] Fix Tests

---
 tests/integration/test_coding_api_integration.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_coding_api_integration.py b/tests/integration/test_coding_api_integration.py
index 09542387..d7087d5e 100644
--- a/tests/integration/test_coding_api_integration.py
+++ b/tests/integration/test_coding_api_integration.py
@@ -13,8 +13,11 @@
 from pathlib import Path
 import pytest
 
-# Add src to path
-sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+# Add repository src/ directory to path so we import the checked-in package
+REPO_ROOT = Path(__file__).resolve().parents[2]
+SRC_DIR = REPO_ROOT / "src"
+if str(SRC_DIR) not in sys.path:
+    sys.path.insert(0, str(SRC_DIR))
 
 from tooluniverse import ToolUniverse  # noqa: E402
 from tooluniverse.generate_tools import main as generate_tools  # noqa: E402

From 266b235f09aefc3bcbd41167df94c3c7ad58c660 Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Wed, 29 Oct 2025 21:15:04 -0700
Subject: [PATCH 7/8] Update Testing

---
 tests/integration/test_coding_api_integration.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/integration/test_coding_api_integration.py b/tests/integration/test_coding_api_integration.py
index d7087d5e..4dc11722 100644
--- a/tests/integration/test_coding_api_integration.py
+++ b/tests/integration/test_coding_api_integration.py
@@ -19,6 +19,11 @@
 if str(SRC_DIR) not in sys.path:
     sys.path.insert(0, str(SRC_DIR))
 
+# Ensure we import the repo copy even if another version is already loaded
+for module_name in list(sys.modules.keys()):
+    if module_name == "tooluniverse" or module_name.startswith("tooluniverse."):
+        del sys.modules[module_name]
+
 from tooluniverse import ToolUniverse  # noqa: E402
 from tooluniverse.generate_tools import main as generate_tools  # noqa: E402
 

From cdef93e24c4fec04945af2ac6ef82821e044cdae Mon Sep 17 00:00:00 2001
From: SufianTA <saldogom@mit.edu>
Date: Wed, 29 Oct 2025 21:36:51 -0700
Subject: [PATCH 8/8] Testing update

---
 tests/integration/test_coding_api_integration.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_coding_api_integration.py b/tests/integration/test_coding_api_integration.py
index 4dc11722..9bc50bb5 100644
--- a/tests/integration/test_coding_api_integration.py
+++ b/tests/integration/test_coding_api_integration.py
@@ -20,9 +20,13 @@
     sys.path.insert(0, str(SRC_DIR))
 
 # Ensure we import the repo copy even if another version is already loaded
-for module_name in list(sys.modules.keys()):
-    if module_name == "tooluniverse" or module_name.startswith("tooluniverse."):
-        del sys.modules[module_name]
+package = sys.modules.get("tooluniverse")
+if package is not None:
+    module_path = Path(getattr(package, "__file__", "")).resolve()
+    if SRC_DIR not in module_path.parents:
+        for module_name in list(sys.modules.keys()):
+            if module_name == "tooluniverse" or module_name.startswith("tooluniverse."):
+                del sys.modules[module_name]
 
 from tooluniverse import ToolUniverse  # noqa: E402
 from tooluniverse.generate_tools import main as generate_tools  # noqa: E402