Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions backend/apps/ai/common/base/chunk_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ def process_chunks_batch(self, entities: list[Model]) -> int:
continue

chunk_texts = Chunk.split_text(full_content)
if not chunk_texts:
unique_chunk_texts = list(set(chunk_texts))

if not unique_chunk_texts:
self.stdout.write(f"No chunks created for {self.entity_name} {entity_key}")
continue

if chunks := create_chunks_and_embeddings(
chunk_texts=chunk_texts,
chunk_texts=unique_chunk_texts,
context=context,
openai_client=self.openai_client,
save=False,
Expand Down
6 changes: 4 additions & 2 deletions backend/apps/ai/common/base/context_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ def process_context_batch(self, entities: list[Model]) -> int:
):
processed += 1
entity_key = self.get_entity_key(entity)
self.stdout.write(f"Created context for {entity_key}")
self.stdout.write(f"Created/updated context for {entity_key}")
else:
entity_key = self.get_entity_key(entity)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is repetitive.

Copy link
Collaborator Author

@Dishant1804 Dishant1804 Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

self.stdout.write(self.style.ERROR(f"Failed to create context for {entity_key}"))
self.stdout.write(
self.style.ERROR(f"Failed to create/update context for {entity_key}")
)

return processed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,6 @@ class Command(BaseContextCommand):
key_field_name = "slack_message_id"
model_class = Message

def add_arguments(self, parser):
"""Override to use different default batch size for messages."""
super().add_arguments(parser)
parser.add_argument(
"--message-key",
type=str,
help="Process only the message with this key",
)
parser.add_argument(
"--all",
action="store_true",
help="Process all the messages",
)
parser.add_argument(
"--batch-size",
type=int,
default=100,
help="Number of messages to process in each batch",
)

def extract_content(self, entity: Message) -> tuple[str, str]:
"""Extract content from the message."""
return entity.cleaned_text or "", ""
Expand Down
82 changes: 71 additions & 11 deletions backend/tests/apps/ai/common/base/chunk_command_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,16 +211,21 @@ def test_process_chunks_batch_success(
mock_create_chunks.return_value = mock_chunks
command.openai_client = Mock()

with patch.object(command.stdout, "write") as mock_write:
with (
patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter,
patch.object(command.stdout, "write") as mock_write,
):
mock_qs = Mock()
mock_qs.values_list.return_value = []
mock_chunk_filter.return_value = mock_qs
result = command.process_chunks_batch([mock_entity])

assert result == 1
mock_create_chunks.assert_called_once_with(
chunk_texts=["chunk1", "chunk2", "chunk3"],
context=mock_context,
openai_client=command.openai_client,
save=False,
)
_, kwargs = mock_create_chunks.call_args
assert set(kwargs["chunk_texts"]) == {"chunk1", "chunk2", "chunk3"}
assert kwargs["context"] == mock_context
assert kwargs["openai_client"] == command.openai_client
assert kwargs["save"] is False
mock_bulk_save.assert_called_once_with(mock_chunks)
mock_write.assert_has_calls(
[
Expand Down Expand Up @@ -261,7 +266,13 @@ def test_process_chunks_batch_multiple_entities(
mock_create_chunks.return_value = mock_chunks[:2]
command.openai_client = Mock()

with patch.object(command.stdout, "write"):
with (
patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter,
patch.object(command.stdout, "write"),
):
mock_qs = Mock()
mock_qs.values_list.return_value = []
mock_chunk_filter.return_value = mock_qs
result = command.process_chunks_batch(entities)

assert result == 3
Expand Down Expand Up @@ -325,14 +336,22 @@ def test_process_chunks_batch_content_combination(
"extract_content",
return_value=("prose", "metadata"),
):
command.process_chunks_batch([mock_entity])
with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter:
mock_qs = Mock()
mock_qs.values_list.return_value = []
mock_chunk_filter.return_value = mock_qs
command.process_chunks_batch([mock_entity])

expected_content = "metadata\n\nprose"
mock_split_text.assert_called_once_with(expected_content)

mock_split_text.reset_mock()
with patch.object(command, "extract_content", return_value=("prose", "")):
command.process_chunks_batch([mock_entity])
with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter:
mock_qs = Mock()
mock_qs.values_list.return_value = []
mock_chunk_filter.return_value = mock_qs
command.process_chunks_batch([mock_entity])

mock_split_text.assert_called_with("prose")

Expand Down Expand Up @@ -402,11 +421,52 @@ def test_process_chunks_batch_metadata_only_content(
"extract_content",
return_value=("", "metadata"),
):
command.process_chunks_batch([mock_entity])
with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter:
mock_qs = Mock()
mock_qs.values_list.return_value = []
mock_chunk_filter.return_value = mock_qs
command.process_chunks_batch([mock_entity])

mock_split_text.assert_called_once_with("metadata\n\n")
mock_bulk_save.assert_called_once()

@patch("apps.ai.common.base.chunk_command.ContentType.objects.get_for_model")
@patch("apps.ai.common.base.chunk_command.Context.objects.filter")
@patch("apps.ai.models.chunk.Chunk.split_text")
@patch("apps.ai.common.base.chunk_command.create_chunks_and_embeddings")
@patch("apps.ai.models.chunk.Chunk.bulk_save")
def test_process_chunks_batch_with_duplicates(
self,
mock_bulk_save,
mock_create_chunks,
mock_split_text,
mock_context_filter,
mock_get_content_type,
command,
mock_entity,
mock_context,
mock_content_type,
mock_chunks,
):
"""Test that duplicate chunk texts are filtered out before processing."""
mock_get_content_type.return_value = mock_content_type
mock_context_filter.return_value.first.return_value = mock_context
mock_split_text.return_value = ["chunk1", "chunk2", "chunk1", "chunk3", "chunk2"]
mock_create_chunks.return_value = mock_chunks
command.openai_client = Mock()

with patch.object(command.stdout, "write"):
result = command.process_chunks_batch([mock_entity])

assert result == 1
mock_split_text.assert_called_once()
_, kwargs = mock_create_chunks.call_args
assert set(kwargs["chunk_texts"]) == {"chunk1", "chunk2", "chunk3"}
assert kwargs["context"] == mock_context
assert kwargs["openai_client"] == command.openai_client
assert kwargs["save"] is False
mock_bulk_save.assert_called_once_with(mock_chunks)

def test_process_chunks_batch_whitespace_only_content(
self, command, mock_entity, mock_context, mock_content_type
):
Expand Down
12 changes: 6 additions & 6 deletions backend/tests/apps/ai/common/base/context_command_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_process_context_batch_success(
entity=mock_entity,
source="owasp_test_entity",
)
mock_write.assert_called_once_with("Created context for test-key-123")
mock_write.assert_called_once_with("Created/updated context for test-key-123")

@patch("apps.ai.common.base.context_command.Context")
def test_process_context_batch_creation_fails(self, mock_context_class, command, mock_entity):
Expand All @@ -130,7 +130,7 @@ def test_process_context_batch_creation_fails(self, mock_context_class, command,
mock_context_class.update_data.assert_called_once()
mock_write.assert_called_once()
call_args = mock_write.call_args[0][0]
assert "Failed to create context for test-key-123" in str(call_args)
assert "Failed to create/update context for test-key-123" in str(call_args)

@patch("apps.ai.common.base.context_command.Context")
def test_process_context_batch_multiple_entities(
Expand Down Expand Up @@ -184,9 +184,9 @@ def test_process_context_batch_mixed_success_failure(
assert mock_write.call_count == 3

write_calls = mock_write.call_args_list
assert "Created context for test-key-1" in str(write_calls[0])
assert "Failed to create context for test-key-2" in str(write_calls[1])
assert "Created context for test-key-3" in str(write_calls[2])
assert "Created/updated context for test-key-1" in str(write_calls[0])
assert "Failed to create/update context for test-key-2" in str(write_calls[1])
assert "Created/updated context for test-key-3" in str(write_calls[2])

def test_process_context_batch_content_combination(self, command, mock_entity, mock_context):
"""Test that metadata and prose content are properly combined."""
Expand Down Expand Up @@ -261,7 +261,7 @@ def test_get_entity_key_usage(self, command, mock_context):
with patch.object(command.stdout, "write") as mock_write:
command.process_context_batch([entity])

mock_write.assert_called_once_with("Created context for custom-entity-key")
mock_write.assert_called_once_with("Created/updated context for custom-entity-key")

def test_process_context_batch_empty_list(self, command):
"""Test process_context_batch with empty entity list."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,9 @@ def test_process_context_batch_success(self, command, mock_committee):
entity=mock_committee,
source="owasp_committee",
)
mock_write.assert_called_once_with("Created context for test-committee")
mock_write.assert_called_once_with(
"Created/updated context for test-committee"
)

def test_process_context_batch_empty_content(self, command, mock_committee):
"""Test context batch processing with empty content."""
Expand Down Expand Up @@ -206,7 +208,7 @@ def test_process_context_batch_create_failure(self, command, mock_committee):

assert result == 0
mock_error.assert_called_once_with(
"Failed to create context for test-committee"
"Failed to create/update context for test-committee"
)
mock_write.assert_called_once_with("ERROR: Failed")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,9 @@ def test_add_arguments(self, command):
parser = Mock()
command.add_arguments(parser)

assert parser.add_argument.call_count == 6
assert parser.add_argument.call_count == 3
calls = parser.add_argument.call_args_list

# First 3 calls are from parent class (BaseAICommand)
assert calls[0][0] == ("--message-key",)
assert calls[0][1]["type"] is str
assert "Process only the message with this key" in calls[0][1]["help"]
Expand All @@ -86,19 +85,5 @@ def test_add_arguments(self, command):

assert calls[2][0] == ("--batch-size",)
assert calls[2][1]["type"] is int
assert calls[2][1]["default"] == 50 # Default from parent class
assert calls[2][1]["default"] == 50
assert "Number of messages to process in each batch" in calls[2][1]["help"]

# Next 3 calls are from the command itself (duplicates with different defaults)
assert calls[3][0] == ("--message-key",)
assert calls[3][1]["type"] is str
assert "Process only the message with this key" in calls[3][1]["help"]

assert calls[4][0] == ("--all",)
assert calls[4][1]["action"] == "store_true"
assert "Process all the messages" in calls[4][1]["help"]

assert calls[5][0] == ("--batch-size",)
assert calls[5][1]["type"] is int
assert calls[5][1]["default"] == 100 # Overridden default from command
assert "Number of messages to process in each batch" in calls[5][1]["help"]