Skip to content
15 changes: 15 additions & 0 deletions .github/codeql/codeql-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# CodeQL configuration for code scanning.
# See: https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning
name: "CodeQL config"

paths-ignore:
- "package/@stackframe/**"
- "node_modules/**"
- "**/node_modules/**"

# Exclude py/path-injection for backend/app/utils/file_utils.py pattern:
# Paths are validated by safe_resolve_path (under base) before use; the query
# does not recognize this validation. Excluding to avoid false positives.
query-filters:
- exclude:
id: py/path-injection
7 changes: 1 addition & 6 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,7 @@ jobs:
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
config: |
paths-ignore:
# Third-party packages (vendored from external sources)
- 'package/@stackframe/**'
- 'node_modules/**'
- '**/node_modules/**'
config-file: ./.github/codeql/codeql-config.yml
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
Expand Down
118 changes: 40 additions & 78 deletions backend/app/service/chat_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import asyncio
import datetime
import logging
import os
import platform
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -57,7 +56,7 @@
set_current_task_id,
)
from app.utils.event_loop_utils import set_main_event_loop
from app.utils.file_utils import get_working_directory
from app.utils.file_utils import get_working_directory, safe_list_directory
from app.utils.server.sync_step import sync_step
from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
from app.utils.workforce import Workforce
Expand Down Expand Up @@ -91,41 +90,24 @@ def format_task_context(
# Skip file listing if requested
if not skip_files:
working_directory = task_data.get("working_directory")
skip_ext = (".pyc", ".tmp")
if working_directory:
try:
if os.path.exists(working_directory):
generated_files = []
for root, dirs, files in os.walk(working_directory):
dirs[:] = [
d
for d in dirs
if not d.startswith(".")
and d
not in ["node_modules", "__pycache__", "venv"]
]
for file in files:
if not file.startswith(".") and not file.endswith(
skip_ext
):
file_path = os.path.join(root, file)
absolute_path = os.path.abspath(file_path)

# Only add if not seen before
if (
seen_files is None
or absolute_path not in seen_files
):
generated_files.append(absolute_path)
if seen_files is not None:
seen_files.add(absolute_path)

if generated_files:
context_parts.append(
"Generated Files from Previous Task:"
)
for file_path in sorted(generated_files):
context_parts.append(f" - {file_path}")
generated_files = safe_list_directory(
working_directory,
base=working_directory,
skip_dirs={"node_modules", "__pycache__", "venv"},
skip_extensions=(".pyc", ".tmp"),
skip_prefix=".",
)
if seen_files is not None:
generated_files = [
p for p in generated_files if p not in seen_files
]
seen_files.update(generated_files)
if generated_files:
context_parts.append("Generated Files from Previous Task:")
for file_path in sorted(generated_files):
context_parts.append(f" - {file_path}")
except Exception as e:
logger.warning(f"Failed to collect generated files: {e}")

Expand Down Expand Up @@ -171,31 +153,20 @@ def collect_previous_task_context(
f"Previous Task Result:\n{previous_task_result}\n"
)

# Collect generated files from working directory
# Collect generated files from working directory (safe listing)
try:
if os.path.exists(working_directory):
generated_files = []
for root, dirs, files in os.walk(working_directory):
dirs[:] = [
d
for d in dirs
if not d.startswith(".")
and d not in ["node_modules", "__pycache__", "venv"]
]
skip_ext = (".pyc", ".tmp")
for file in files:
if not file.startswith(".") and not file.endswith(
skip_ext
):
file_path = os.path.join(root, file)
absolute_path = os.path.abspath(file_path)
generated_files.append(absolute_path)

if generated_files:
context_parts.append("Generated Files from Previous Task:")
for file_path in sorted(generated_files):
context_parts.append(f" - {file_path}")
context_parts.append("")
generated_files = safe_list_directory(
working_directory,
base=working_directory,
skip_dirs={"node_modules", "__pycache__", "venv"},
skip_extensions=(".pyc", ".tmp"),
skip_prefix=".",
)
if generated_files:
context_parts.append("Generated Files from Previous Task:")
for file_path in sorted(generated_files):
context_parts.append(f" - {file_path}")
context_parts.append("")
except Exception as e:
logger.warning(f"Failed to collect generated files: {e}")

Expand Down Expand Up @@ -271,30 +242,21 @@ def build_conversation_context(
context += f"Assistant: {entry['content']}\n\n"

if working_directories:
all_generated_files = set() # Use set to avoid duplicates
all_generated_files: set[str] = set()
for working_directory in working_directories:
try:
if os.path.exists(working_directory):
for root, dirs, files in os.walk(working_directory):
dirs[:] = [
d
for d in dirs
if not d.startswith(".")
and d
not in ["node_modules", "__pycache__", "venv"]
]
for file in files:
if not file.startswith(
"."
) and not file.endswith((".pyc", ".tmp")):
file_path = os.path.join(root, file)
absolute_path = os.path.abspath(file_path)
all_generated_files.add(absolute_path)
files_list = safe_list_directory(
working_directory,
base=working_directory,
skip_dirs={"node_modules", "__pycache__", "venv"},
skip_extensions=(".pyc", ".tmp"),
skip_prefix=".",
)
all_generated_files.update(files_list)
except Exception as e:
logger.warning(
"Failed to collect generated "
f"files from {working_directory}"
f": {e}"
f"files from {working_directory}: {e}"
)

if all_generated_files:
Expand Down
Loading