Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1540,6 +1540,24 @@ impl Session {
}
}

/// Returns the input if there was no task running to inject into
pub async fn inject_response_items(
&self,
input: Vec<ResponseInputItem>,
) -> Result<(), Vec<ResponseInputItem>> {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
Some(at) => {
let mut ts = at.turn_state.lock().await;
for item in input {
ts.push_pending_input(item);
}
Ok(())
}
None => Err(input),
}
}

pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
Expand Down
88 changes: 87 additions & 1 deletion codex-rs/core/src/event_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::WebSearchAction;
use codex_protocol::models::is_image_close_tag_text;
use codex_protocol::models::is_image_open_tag_text;
use codex_protocol::models::is_local_image_close_tag_text;
use codex_protocol::models::is_local_image_open_tag_text;
use codex_protocol::user_input::UserInput;
use tracing::warn;
use uuid::Uuid;
Expand All @@ -32,9 +36,17 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {

let mut content: Vec<UserInput> = Vec::new();

for content_item in message.iter() {
for (idx, content_item) in message.iter().enumerate() {
match content_item {
ContentItem::InputText { text } => {
if (is_local_image_open_tag_text(text) || is_image_open_tag_text(text))
&& (matches!(message.get(idx + 1), Some(ContentItem::InputImage { .. })))
|| (idx > 0
&& (is_local_image_close_tag_text(text) || is_image_close_tag_text(text))
&& matches!(message.get(idx - 1), Some(ContentItem::InputImage { .. })))
{
continue;
}
if is_session_prefix(text) || is_user_shell_command_text(text) {
return None;
}
Expand Down Expand Up @@ -177,6 +189,80 @@ mod tests {
}
}

#[test]
fn skips_local_image_label_text() {
let image_url = "".to_string();
let label = codex_protocol::models::local_image_open_tag_text(1);
let user_text = "Please review this image.".to_string();

let item = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputText { text: label },
ContentItem::InputImage {
image_url: image_url.clone(),
},
ContentItem::InputText {
text: "</image>".to_string(),
},
ContentItem::InputText {
text: user_text.clone(),
},
],
};

let turn_item = parse_turn_item(&item).expect("expected user message turn item");

match turn_item {
TurnItem::UserMessage(user) => {
let expected_content = vec![
UserInput::Image { image_url },
UserInput::Text { text: user_text },
];
assert_eq!(user.content, expected_content);
}
other => panic!("expected TurnItem::UserMessage, got {other:?}"),
}
}

#[test]
fn skips_unnamed_image_label_text() {
let image_url = "".to_string();
let label = codex_protocol::models::image_open_tag_text();
let user_text = "Please review this image.".to_string();

let item = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputText { text: label },
ContentItem::InputImage {
image_url: image_url.clone(),
},
ContentItem::InputText {
text: codex_protocol::models::image_close_tag_text(),
},
ContentItem::InputText {
text: user_text.clone(),
},
],
};

let turn_item = parse_turn_item(&item).expect("expected user message turn item");

match turn_item {
TurnItem::UserMessage(user) => {
let expected_content = vec![
UserInput::Image { image_url },
UserInput::Text { text: user_text },
];
assert_eq!(user.content, expected_content);
}
other => panic!("expected TurnItem::UserMessage, got {other:?}"),
}
}

#[test]
fn skips_user_instructions_and_env() {
let items = vec![
Expand Down
13 changes: 11 additions & 2 deletions codex-rs/core/src/tools/handlers/view_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use crate::tools::context::ToolPayload;
use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::user_input::UserInput;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::local_image_content_items_with_label_number;

pub struct ViewImageHandler;

Expand Down Expand Up @@ -63,8 +65,15 @@ impl ToolHandler for ViewImageHandler {
}
let event_path = abs_path.clone();

let content: Vec<ContentItem> =
local_image_content_items_with_label_number(&abs_path, None);
let input = ResponseInputItem::Message {
role: "user".to_string(),
content,
};

session
.inject_input(vec![UserInput::LocalImage { path: abs_path }])
.inject_response_items(vec![input])
.await
.map_err(|_| {
FunctionCallError::RespondToModel(
Expand Down
8 changes: 4 additions & 4 deletions codex-rs/core/src/tools/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::tools::handlers::PLAN_TOOL;
use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
use crate::tools::registry::ToolRegistryBuilder;
use codex_protocol::models::VIEW_IMAGE_TOOL_NAME;
use codex_protocol::openai_models::ApplyPatchToolType;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::ModelInfo;
Expand Down Expand Up @@ -403,10 +404,9 @@ fn create_view_image_tool() -> ToolSpec {
);

ToolSpec::Function(ResponsesApiTool {
name: "view_image".to_string(),
description:
"Attach a local image (by filesystem path) to the thread context for this turn."
.to_string(),
name: VIEW_IMAGE_TOOL_NAME.to_string(),
description: "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags)."
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
Expand Down
14 changes: 14 additions & 0 deletions codex-rs/core/tests/suite/view_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,20 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {

let image_message =
find_image_message(&body).expect("pending input image message not included in request");
let content_items = image_message
.get("content")
.and_then(Value::as_array)
.expect("image message has content array");
assert_eq!(
content_items.len(),
1,
"view_image should inject only the image content item (no tag/label text)"
);
assert_eq!(
content_items[0].get("type").and_then(Value::as_str),
Some("input_image"),
"view_image should inject only an input_image content item"
);
let image_url = image_message
.get("content")
.and_then(Value::as_array)
Expand Down
Loading
Loading