Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Example Environment Variables for Lightspeed Stack
# Copy this file to .env and set appropriate values

# Required: User anonymization pepper (set to a secure random value)
# This is used for HMAC-based user ID hashing to protect user privacy
USER_ANON_PEPPER=your-secure-random-string-here

# Optional: OpenAI API Key (if using OpenAI models)
OPENAI_API_KEY=your-openai-api-key

# Optional: Other environment variables as needed for your configuration
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ Lightspeed Core Stack is based on the FastAPI framework (Uvicorn). The service i
- please note that currently Python 3.14 is not officially supported
- all sources are made (backward) compatible with Python 3.12; it is checked on CI

## Environment Variables

The following environment variable is required for the service to start:

* `USER_ANON_PEPPER` - A secure random string used for user anonymization. Set this to a cryptographically secure random value:
```bash
export USER_ANON_PEPPER="your-secure-random-string-here"
```

**Security Note**: This value should be treated as a secret and kept secure. It's used for HMAC-based user ID hashing to protect user privacy while enabling usage analytics.

# Installation

Installation steps depends on operation system. Please look at instructions for your system:
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ services:
- ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- USER_ANON_PEPPER=${USER_ANON_PEPPER:-default-pepper-for-development-only}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

πŸ› οΈ Refactor suggestion

Remove insecure default pepper fallback.

Providing a default pepper invites accidental production deployments with a shared key, breaking privacy guarantees and cross-environment isolation.

Use a required variable and fail fast if it’s missing (compose will error):

-      - USER_ANON_PEPPER=${USER_ANON_PEPPER:-default-pepper-for-development-only}
+      - USER_ANON_PEPPER=${USER_ANON_PEPPER}

Optional (outside this line): keep a dev-only override in docker-compose.override.yaml or an .env file that is not committed.

πŸ“ Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
- USER_ANON_PEPPER=${USER_ANON_PEPPER:-default-pepper-for-development-only}
- USER_ANON_PEPPER=${USER_ANON_PEPPER}
πŸ€– Prompt for AI Agents
In docker-compose.yaml around line 33, the service currently falls back to a
hardcoded default pepper which is insecure; remove the default fallback so the
env interpolation uses USER_ANON_PEPPER without a default (so docker-compose
will error if it’s missing) and ensure any developer-only value is provided via
a non-committed .env or docker-compose.override.yaml instead.

depends_on:
llama-stack:
condition: service_healthy
Expand Down
15 changes: 12 additions & 3 deletions src/app/endpoints/conversations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from app.database import get_session
from utils.endpoints import check_configuration_loaded, validate_conversation_ownership
from utils.suid import check_suid
from utils.user_anonymization import get_anonymous_user_id

logger = logging.getLogger("app.endpoints.handlers")
router = APIRouter(tags=["conversations"])
Expand Down Expand Up @@ -154,13 +155,21 @@ def get_conversations_list_endpoint_handler(

user_id, _, _ = auth

logger.info("Retrieving conversations for user %s", user_id)
# Get anonymous user ID for database lookup
anonymous_user_id = get_anonymous_user_id(user_id)

logger.info(
"Retrieving conversations for anonymous user %s",
anonymous_user_id,
)

with get_session() as session:
try:
# Get all conversations for this user
# Get all conversations for this user using anonymous ID
user_conversations = (
session.query(UserConversation).filter_by(user_id=user_id).all()
session.query(UserConversation)
.filter_by(anonymous_user_id=anonymous_user_id)
.all()
)

# Return conversation summaries with metadata
Expand Down
16 changes: 11 additions & 5 deletions src/app/endpoints/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,17 @@
from configuration import configuration
from app.database import get_session
import metrics
import constants
from models.database.conversations import UserConversation
from models.responses import QueryResponse, UnauthorizedResponse, ForbiddenResponse
from models.requests import QueryRequest, Attachment
import constants
from utils.endpoints import (
check_configuration_loaded,
get_agent,
get_system_prompt,
validate_conversation_ownership,
)
from utils.user_anonymization import get_anonymous_user_id
from utils.mcp_headers import mcp_headers_dependency, handle_mcp_headers_with_toolgroups
from utils.transcripts import store_transcript
from utils.types import TurnSummary
Expand Down Expand Up @@ -76,25 +77,30 @@ def is_transcripts_enabled() -> bool:
def persist_user_conversation_details(
user_id: str, conversation_id: str, model: str, provider_id: str
) -> None:
"""Associate conversation to user in the database."""
"""Associate conversation to user in the database using anonymous user ID."""
# Get anonymous user ID for database storage
anonymous_user_id = get_anonymous_user_id(user_id)

with get_session() as session:
existing_conversation = (
session.query(UserConversation)
.filter_by(id=conversation_id, user_id=user_id)
.filter_by(id=conversation_id, anonymous_user_id=anonymous_user_id)
.first()
)

if not existing_conversation:
conversation = UserConversation(
id=conversation_id,
user_id=user_id,
anonymous_user_id=anonymous_user_id,
last_used_model=model,
last_used_provider=provider_id,
message_count=1,
)
session.add(conversation)
logger.debug(
"Associated conversation %s to user %s", conversation_id, user_id
"Associated conversation %s to anonymous user %s",
conversation_id,
anonymous_user_id,
)
else:
existing_conversation.last_used_model = model
Expand Down
8 changes: 5 additions & 3 deletions src/models/database/conversations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime

from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import DateTime, func
from sqlalchemy import DateTime, func, ForeignKey

from models.database.base import Base

Expand All @@ -16,8 +16,10 @@ class UserConversation(Base): # pylint: disable=too-few-public-methods
# The conversation ID
id: Mapped[str] = mapped_column(primary_key=True)

# The user ID associated with the conversation
user_id: Mapped[str] = mapped_column(index=True)
# The anonymous user ID associated with the conversation
anonymous_user_id: Mapped[str] = mapped_column(
ForeignKey("user_mapping.anonymous_id"), index=True, nullable=False
)

# The last provider/model used in the conversation
last_used_model: Mapped[str] = mapped_column()
Expand Down
32 changes: 32 additions & 0 deletions src/models/database/user_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""User ID anonymization mapping model."""

from datetime import datetime

from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import DateTime, func, Index, String

from models.database.base import Base


class UserMapping(Base): # pylint: disable=too-few-public-methods
"""Model for mapping real user IDs to anonymous UUIDs."""

__tablename__ = "user_mapping"

# Anonymous UUID used for all storage/analytics (primary key)
anonymous_id: Mapped[str] = mapped_column(
String(36), primary_key=True, nullable=False
)

# Original user ID from authentication (hashed for security)
user_id_hash: Mapped[str] = mapped_column(
String(64), index=True, unique=True, nullable=False
)

created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(), # pylint: disable=not-callable
)

# Index for efficient lookups
__table_args__ = (Index("ix_user_mapping_hash_lookup", "user_id_hash"),)
22 changes: 19 additions & 3 deletions src/utils/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,37 @@
from configuration import AppConfig
from utils.suid import get_suid
from utils.types import GraniteToolParser
from utils.user_anonymization import get_anonymous_user_id


logger = logging.getLogger("utils.endpoints")


def validate_conversation_ownership(
user_id: str, conversation_id: str
) -> UserConversation | None:
"""Validate that the conversation belongs to the user."""
) -> UserConversation:
"""
Validate that the conversation belongs to the user using anonymous ID lookup.

Raises HTTPException(403) if conversation is not found or doesn't belong to user.
Returns the conversation object if valid.
"""
# Get anonymous user ID for database lookup
anonymous_user_id = get_anonymous_user_id(user_id)

with get_session() as session:
conversation = (
session.query(UserConversation)
.filter_by(id=conversation_id, user_id=user_id)
.filter_by(id=conversation_id, anonymous_user_id=anonymous_user_id)
.first()
)

if not conversation:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Forbidden: conversation does not belong to user",
)

return conversation


Expand Down
20 changes: 14 additions & 6 deletions src/utils/transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@
from models.requests import Attachment, QueryRequest
from utils.suid import get_suid
from utils.types import TurnSummary
from utils.user_anonymization import get_anonymous_user_id

logger = logging.getLogger("utils.transcripts")


def construct_transcripts_path(user_id: str, conversation_id: str) -> Path:
"""Construct path to transcripts."""
def construct_transcripts_path(anonymous_user_id: str, conversation_id: str) -> Path:
"""Construct path to transcripts using anonymous user ID."""
# these two normalizations are required by Snyk as it detects
# this Path sanitization pattern
uid = os.path.normpath("/" + user_id).lstrip("/")
uid = os.path.normpath("/" + anonymous_user_id).lstrip("/")
cid = os.path.normpath("/" + conversation_id).lstrip("/")
file_path = (
configuration.user_data_collection_configuration.transcripts_storage or ""
Expand All @@ -46,7 +47,7 @@ def store_transcript( # pylint: disable=too-many-arguments,too-many-positional-
"""Store transcript in the local filesystem.

Args:
user_id: The user ID (UUID).
user_id: The original user ID from authentication (will be anonymized).
conversation_id: The conversation ID (UUID).
query_is_valid: The result of the query validation.
query: The query (without attachments).
Expand All @@ -56,7 +57,14 @@ def store_transcript( # pylint: disable=too-many-arguments,too-many-positional-
truncated: The flag indicating if the history was truncated.
attachments: The list of `Attachment` objects.
"""
transcripts_path = construct_transcripts_path(user_id, conversation_id)
# Get anonymous user ID for storage
anonymous_user_id = get_anonymous_user_id(user_id)
logger.debug(
"Storing transcript for anonymous user %s",
anonymous_user_id,
)

transcripts_path = construct_transcripts_path(anonymous_user_id, conversation_id)
transcripts_path.mkdir(parents=True, exist_ok=True)

data_to_store = {
Expand All @@ -65,7 +73,7 @@ def store_transcript( # pylint: disable=too-many-arguments,too-many-positional-
"model": model_id,
"query_provider": query_request.provider,
"query_model": query_request.model,
"user_id": user_id,
"anonymous_user_id": anonymous_user_id, # Store anonymous ID only
"conversation_id": conversation_id,
"timestamp": datetime.now(UTC).isoformat(),
},
Expand Down
Loading
Loading