Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 134 additions & 5 deletions graphiti_core/llm_client/openai_generic_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,91 @@ def __init__(
else:
self.client = client

# Instance-level fallback state for providers that don't support json_schema
# (e.g., LiteLLM with Gemini). Once set to True, remains True for client lifetime.
self._use_json_object_mode: bool = False

def _is_schema_returned_as_data(self, response: dict[str, Any]) -> bool:
"""Detect if the model returned the schema definition instead of data.

When some providers (e.g., LiteLLM with Gemini) receive json_schema format,
they return the schema definition itself instead of data conforming to the schema.

Args:
response: The parsed JSON response from the LLM

Returns:
True if the response appears to be a JSON Schema definition
"""
# Immediate detection: JSON Schema keywords that are NEVER present in real data
schema_keywords = {'$defs', '$schema', 'definitions', 'properties'}
if any(key in response for key in schema_keywords):
return True

# Also detect "type": "object" at top level (another JSON Schema pattern)
return response.get('type') == 'object'

def _extract_json(self, text: str) -> dict[str, Any]:
"""Extract the first valid JSON object from text that may contain trailing content.

Some LLM providers return JSON followed by explanatory text, which breaks
standard JSON parsing. This method finds and extracts the first complete
JSON object.

Args:
text: Raw response text that may contain JSON with trailing content

Returns:
Parsed JSON as a dictionary

Raises:
json.JSONDecodeError: If no valid JSON object can be extracted
"""
text = text.strip()

# Try standard parsing first (fast path)
try:
return json.loads(text)
except json.JSONDecodeError as e:
# Only handle "Extra data" errors - other errors should propagate
if 'Extra data' not in str(e):
raise

# Find the first complete JSON object by matching braces
if not text.startswith('{'):
raise json.JSONDecodeError('No JSON object found', text, 0)

depth = 0
in_string = False
escape_next = False

for i, char in enumerate(text):
if escape_next:
escape_next = False
continue

if char == '\\' and in_string:
escape_next = True
continue

if char == '"' and not escape_next:
in_string = not in_string
continue

if in_string:
continue

if char == '{':
depth += 1
elif char == '}':
depth -= 1
if depth == 0:
# Found complete JSON object
json_str = text[: i + 1]
return json.loads(json_str)

raise json.JSONDecodeError('Incomplete JSON object', text, len(text))

async def _generate_response(
self,
messages: list[Message],
Expand All @@ -107,9 +192,10 @@ async def _generate_response(
elif m.role == 'system':
openai_messages.append({'role': 'system', 'content': m.content})
try:
# Prepare response format
response_format: dict[str, Any] = {'type': 'json_object'}
if response_model is not None:
# Prepare response format based on mode
response_format: dict[str, Any]
if response_model is not None and not self._use_json_object_mode:
# Preferred mode: use json_schema format (works with OpenAI, vLLM, etc.)
schema_name = getattr(response_model, '__name__', 'structured_response')
json_schema = response_model.model_json_schema()
response_format = {
Expand All @@ -119,6 +205,21 @@ async def _generate_response(
'schema': json_schema,
},
}
else:
# Fallback mode: use json_object format with schema embedded in prompt
# (for providers that don't support json_schema, e.g., LiteLLM with Gemini)
response_format = {'type': 'json_object'}
if response_model is not None:
# Append schema to last user message (like base class does)
serialized_model = json.dumps(response_model.model_json_schema())
for i in range(len(openai_messages) - 1, -1, -1):
if openai_messages[i]['role'] == 'user':
content = openai_messages[i].get('content', '')
openai_messages[i]['content'] = (
f'{content}\n\nRespond with a JSON object in the following '
f'format:\n\n{serialized_model}'
)
break

response = await self.client.chat.completions.create(
model=self.model or DEFAULT_MODEL,
Expand All @@ -128,7 +229,7 @@ async def _generate_response(
response_format=response_format, # type: ignore[arg-type]
)
result = response.choices[0].message.content or ''
return json.loads(result)
return self._extract_json(result)
except openai.RateLimitError as e:
raise RateLimitError from e
except Exception as e:
Expand All @@ -153,22 +254,50 @@ async def generate_response(
# Wrap entire operation in tracing span
with self.tracer.start_span('llm.generate') as span:
attributes = {
'llm.provider': 'openai',
'llm.provider': 'openai_generic',
'model.size': model_size.value,
'max_tokens': max_tokens,
'structured_output.mode': 'json_object'
if self._use_json_object_mode
else 'json_schema',
}
if prompt_name:
attributes['prompt.name'] = prompt_name
span.add_attributes(attributes)

retry_count = 0
last_error = None
# Track if we've already attempted fallback in this call
fallback_attempted_this_call = False

while retry_count <= self.MAX_RETRIES:
try:
response = await self._generate_response(
messages, response_model, max_tokens=max_tokens, model_size=model_size
)

# Check for schema-as-data pattern (only if using json_schema mode)
if (
response_model is not None
and not self._use_json_object_mode
and self._is_schema_returned_as_data(response)
):
if not fallback_attempted_this_call:
logger.warning(
'Provider returned schema definition instead of data. '
'Switching to json_object mode with embedded schema.'
)
self._use_json_object_mode = True
fallback_attempted_this_call = True
span.add_attributes({'structured_output.fallback_triggered': True})
# Retry immediately with fallback mode (does NOT count against MAX_RETRIES)
continue
else:
# Fallback already attempted but still got schema - treat as error
raise ValueError(
'Provider returned schema definition even in fallback mode'
)

return response
except (RateLimitError, RefusalError):
# These errors should not trigger retries
Expand Down
5 changes: 5 additions & 0 deletions mcp_server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ MODEL_NAME=gpt-4.1-mini
# Optional: Only needed for non-standard OpenAI endpoints
# OPENAI_BASE_URL=https://api.openai.com/v1

# OpenAI Generic Provider Configuration (LiteLLM, Ollama, vLLM)
# Use provider: "openai_generic" in config.yaml for OpenAI-compatible APIs
# OPENAI_BASE_URL=http://localhost:4000/v1 # LiteLLM proxy
# OPENAI_BASE_URL=http://localhost:11434/v1 # Ollama

# Optional: Group ID for namespacing graph data
# GROUP_ID=my_project

Expand Down
32 changes: 24 additions & 8 deletions mcp_server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ The Graphiti MCP server provides comprehensive knowledge graph capabilities:
- **Group Management**: Organize and manage groups of related data with group_id filtering
- **Graph Maintenance**: Clear the graph and rebuild indices
- **Graph Database Support**: Multiple backend options including FalkorDB (default) and Neo4j
- **Multiple LLM Providers**: Support for OpenAI, Anthropic, Gemini, Groq, and Azure OpenAI
- **Multiple LLM Providers**: Support for OpenAI, Anthropic, Gemini, Groq, Azure OpenAI, and OpenAI-compatible APIs (LiteLLM, Ollama, vLLM)
- **Multiple Embedding Providers**: Support for OpenAI, Voyage, Sentence Transformers, and Gemini embeddings
- **Rich Entity Types**: Built-in entity types including Preferences, Requirements, Procedures, Locations, Events, Organizations, Documents, and more for structured knowledge extraction
- **HTTP Transport**: Default HTTP transport with MCP endpoint at `/mcp/` for broad client compatibility
Expand Down Expand Up @@ -164,23 +164,37 @@ server:
transport: "http" # Default. Options: stdio, http

llm:
provider: "openai" # or "anthropic", "gemini", "groq", "azure_openai"
provider: "openai" # or "anthropic", "gemini", "groq", "azure_openai", "openai_generic"
model: "gpt-4.1" # Default model

database:
provider: "falkordb" # Default. Options: "falkordb", "neo4j"
```

### Using Ollama for Local LLM
### Using OpenAI-Compatible APIs (LiteLLM, Ollama, vLLM)

To use Ollama with the MCP server, configure it as an OpenAI-compatible endpoint:
The `openai_generic` provider supports any OpenAI-compatible API, including LiteLLM proxy, Ollama, and vLLM. It uses the standard `/chat/completions` endpoint with automatic fallback for providers that don't support `json_schema` response format.

**LiteLLM Proxy Example:**
```yaml
llm:
provider: "openai"
model: "gpt-oss:120b" # or your preferred Ollama model
api_base: "http://localhost:11434/v1"
api_key: "ollama" # dummy key required
provider: "openai_generic"
model: "gemini/gemini-2.0-flash" # or any LiteLLM model
providers:
openai:
api_key: "your-litellm-key"
base_url: "http://localhost:4000/v1"
```

**Ollama Example:**
```yaml
llm:
provider: "openai_generic"
model: "llama3.2" # or your preferred Ollama model
providers:
openai:
api_key: "ollama" # dummy key required
base_url: "http://localhost:11434/v1"

embedder:
provider: "sentence_transformers" # recommended for local setup
Expand All @@ -189,6 +203,8 @@ embedder:

Make sure Ollama is running locally with: `ollama serve`

> **Note:** The `openai_generic` provider automatically handles JSON response parsing for providers that return extra text after JSON output.

### Entity Types

Graphiti MCP Server includes built-in entity types for structured knowledge extraction. These entity types are always enabled and configured via the `entity_types` section in your `config.yaml`:
Expand Down
34 changes: 34 additions & 0 deletions mcp_server/src/services/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@
HAS_GROQ = True
except ImportError:
HAS_GROQ = False

try:
from graphiti_core.llm_client.openai_generic_client import OpenAIGenericClient

HAS_OPENAI_GENERIC = True
except ImportError:
HAS_OPENAI_GENERIC = False

from utils.utils import create_azure_credential_token_provider


Expand Down Expand Up @@ -246,6 +254,32 @@ def create(config: LLMConfig) -> LLMClient:
)
return GroqClient(config=llm_config)

case 'openai_generic':
# OpenAI Generic client - uses /chat/completions with response_format
# instead of /responses endpoint. Compatible with LiteLLM, Ollama, vLLM, etc.
if not HAS_OPENAI_GENERIC:
raise ValueError(
'OpenAI Generic client not available in current graphiti-core version'
)
if not config.providers.openai:
raise ValueError('OpenAI provider configuration not found')

api_key = config.providers.openai.api_key
base_url = config.providers.openai.api_url
_validate_api_key('OpenAI Generic', api_key, logger)

from graphiti_core.llm_client.config import LLMConfig as CoreLLMConfig

llm_config = CoreLLMConfig(
api_key=api_key,
base_url=base_url,
model=config.model,
small_model='gpt-4.1-mini',
temperature=config.temperature,
max_tokens=config.max_tokens,
)
return OpenAIGenericClient(config=llm_config)

case _:
raise ValueError(f'Unsupported LLM provider: {provider}')

Expand Down
Loading