feat(codex): support gpt-5.3-codex commentary rendering (#201)

This commit is contained in:
banteg
2026-02-11 01:33:08 +04:00
committed by GitHub
parent 01df3f5ea0
commit 8a75dc8ca5
6 changed files with 326 additions and 7 deletions
@@ -0,0 +1,8 @@
{"type":"thread.started","thread_id":"0199a213-81c0-7800-8aa1-bbab2a035a53"}
{"type":"turn.started"}
{"type":"item.completed","item":{"id":"item_1","type":"agent_message","phase":"commentary","text":"Inspecting repository state."}}
{"type":"item.completed","item":{"id":"item_2","type":"agent_message","phase":"final_answer","text":"Implemented the requested changes."}}
{"type":"item.started","item":{"id":"item_3","type":"collab_tool_call","tool":"spawn_agent","sender_thread_id":"main-thread","receiver_thread_ids":["worker-thread"],"prompt":"Find failing tests","agents_states":{},"status":"in_progress"}}
{"type":"item.completed","item":{"id":"item_3","type":"collab_tool_call","tool":"spawn_agent","sender_thread_id":"main-thread","receiver_thread_ids":["worker-thread"],"prompt":"Find failing tests","agents_states":{"worker-thread":{"status":"completed","message":"done"}},"status":"completed"}}
{"type":"item.completed","item":{"id":"item_4","type":"future_item","foo":"bar","count":2}}
{"type":"turn.completed","usage":{"input_tokens":10,"cached_input_tokens":0,"output_tokens":5}}
+49
View File
@@ -9,10 +9,12 @@ from takopi.config import ConfigError
from takopi.events import EventFactory
from takopi.model import ActionEvent, CompletedEvent, StartedEvent
from takopi.runners.codex import (
_AgentMessageSummary,
CodexRunner,
_format_change_summary,
_normalize_change_list,
_parse_reconnect_message,
_select_final_answer,
_short_tool_name,
_summarize_todo_list,
_summarize_tool_result,
@@ -73,6 +75,39 @@ def test_summarize_todo_list_and_title() -> None:
assert _todo_title(_summarize_todo_list("nope")) == "todo"
def test_select_final_answer() -> None:
assert (
_select_final_answer(
[
_AgentMessageSummary(text="working", phase="commentary"),
_AgentMessageSummary(text="done", phase="final_answer"),
]
)
== "done"
)
assert (
_select_final_answer(
[
_AgentMessageSummary(text="first", phase=None),
_AgentMessageSummary(text="second", phase=None),
]
)
== "second"
)
assert (
_select_final_answer([_AgentMessageSummary(text="working", phase="commentary")])
is None
)
assert (
_select_final_answer(
[_AgentMessageSummary(text="intermediate", phase="foobar")]
)
is None
)
def test_translate_codex_events_for_items() -> None:
factory = EventFactory("codex")
event = codex_schema.ItemStarted(
@@ -100,6 +135,20 @@ def test_translate_codex_events_for_items() -> None:
assert out[0].action.kind == "note"
assert out[0].action.title == "thinking"
event = codex_schema.ItemCompleted(
item=codex_schema.AgentMessageItem(
id="m1",
text="working",
phase="commentary",
)
)
out = translate_codex_event(event, title="Codex", factory=factory)
assert isinstance(out[0], ActionEvent)
assert out[0].action.kind == "note"
assert out[0].action.title == "working"
assert out[0].phase == "completed"
assert out[0].ok is True
event = codex_schema.ItemUpdated(
item=codex_schema.TodoListItem(
id="t1",
+12
View File
@@ -36,9 +36,21 @@ def _decode_fixture(name: str) -> list[str]:
"fixture",
[
"codex_exec_json_all_formats.jsonl",
"codex_exec_json_phase_and_unknown.jsonl",
],
)
def test_codex_schema_parses_fixture(fixture: str) -> None:
errors = _decode_fixture(fixture)
assert not errors, f"{fixture} had {len(errors)} errors: " + "; ".join(errors[:5])
def test_codex_schema_decodes_unknown_item_type() -> None:
event = codex_schema.decode_event(
'{"type":"item.completed","item":{"id":"item_99","type":"future_item",'
'"foo":"bar","count":2}}'
)
assert isinstance(event, codex_schema.ItemCompleted)
assert isinstance(event.item, codex_schema.UnknownItem)
assert event.item.item_type == "future_item"
assert event.item.payload == {"foo": "bar", "count": 2}
+119
View File
@@ -347,6 +347,125 @@ async def test_codex_runner_reconnect_notice_updates_phase(tmp_path) -> None:
assert isinstance(seen[3], CompletedEvent)
@pytest.mark.anyio
async def test_codex_runner_prefers_final_answer_phase(tmp_path) -> None:
thread_id = "019b73c4-0c3f-7701-a0bb-aac6b4d8a3bc"
codex_path = tmp_path / "codex"
codex_path.write_text(
"#!/usr/bin/env python3\n"
"import json\n"
"import sys\n"
"\n"
"sys.stdin.read()\n"
f"print(json.dumps({{'type': 'thread.started', 'thread_id': '{thread_id}'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.started'}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_0', 'type': 'agent_message', 'phase': 'commentary', 'text': 'Working through the task.'}}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_1', 'type': 'agent_message', 'phase': 'final_answer', 'text': 'Done.'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.completed', 'usage': {'input_tokens': 1, 'cached_input_tokens': 0, 'output_tokens': 1}}), flush=True)\n",
encoding="utf-8",
)
codex_path.chmod(0o755)
runner = CodexRunner(codex_cmd=str(codex_path), extra_args=[])
seen = [evt async for evt in runner.run("hi", None)]
assert len(seen) == 4
assert isinstance(seen[0], StartedEvent)
assert isinstance(seen[1], ActionEvent)
assert seen[1].action.kind == "turn"
assert isinstance(seen[2], ActionEvent)
assert seen[2].action.kind == "note"
assert seen[2].action.title == "Working through the task."
assert seen[2].phase == "completed"
assert seen[2].ok is True
assert isinstance(seen[3], CompletedEvent)
assert seen[3].answer == "Done."
@pytest.mark.anyio
async def test_codex_runner_legacy_agent_message_no_phase(tmp_path) -> None:
thread_id = "019b73c4-0c3f-7701-a0bb-aac6b4d8a3bc"
codex_path = tmp_path / "codex"
codex_path.write_text(
"#!/usr/bin/env python3\n"
"import json\n"
"import sys\n"
"\n"
"sys.stdin.read()\n"
f"print(json.dumps({{'type': 'thread.started', 'thread_id': '{thread_id}'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.started'}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_0', 'type': 'agent_message', 'text': 'first'}}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_1', 'type': 'agent_message', 'text': 'second'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.completed', 'usage': {'input_tokens': 1, 'cached_input_tokens': 0, 'output_tokens': 1}}), flush=True)\n",
encoding="utf-8",
)
codex_path.chmod(0o755)
runner = CodexRunner(codex_cmd=str(codex_path), extra_args=[])
seen = [evt async for evt in runner.run("hi", None)]
completed = next(evt for evt in seen if isinstance(evt, CompletedEvent))
assert completed.answer == "second"
@pytest.mark.anyio
async def test_codex_runner_collab_tool_call_does_not_break_stream(tmp_path) -> None:
thread_id = "019b73c4-0c3f-7701-a0bb-aac6b4d8a3bc"
codex_path = tmp_path / "codex"
codex_path.write_text(
"#!/usr/bin/env python3\n"
"import json\n"
"import sys\n"
"\n"
"sys.stdin.read()\n"
f"print(json.dumps({{'type': 'thread.started', 'thread_id': '{thread_id}'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.started'}), flush=True)\n"
"print(json.dumps({'type': 'item.started', 'item': {'id': 'item_0', 'type': 'collab_tool_call', 'tool': 'spawn_agent', 'sender_thread_id': 'main', 'receiver_thread_ids': ['worker'], 'prompt': 'check tests', 'agents_states': {}, 'status': 'in_progress'}}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_0', 'type': 'collab_tool_call', 'tool': 'spawn_agent', 'sender_thread_id': 'main', 'receiver_thread_ids': ['worker'], 'prompt': 'check tests', 'agents_states': {'worker': {'status': 'completed', 'message': 'ok'}}, 'status': 'completed'}}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_1', 'type': 'agent_message', 'text': 'ok'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.completed', 'usage': {'input_tokens': 1, 'cached_input_tokens': 0, 'output_tokens': 1}}), flush=True)\n",
encoding="utf-8",
)
codex_path.chmod(0o755)
runner = CodexRunner(codex_cmd=str(codex_path), extra_args=[])
seen = [evt async for evt in runner.run("hi", None)]
completed = next(evt for evt in seen if isinstance(evt, CompletedEvent))
assert completed.answer == "ok"
@pytest.mark.anyio
async def test_codex_runner_unknown_item_type_does_not_break_stream(tmp_path) -> None:
thread_id = "019b73c4-0c3f-7701-a0bb-aac6b4d8a3bc"
codex_path = tmp_path / "codex"
codex_path.write_text(
"#!/usr/bin/env python3\n"
"import json\n"
"import sys\n"
"\n"
"sys.stdin.read()\n"
f"print(json.dumps({{'type': 'thread.started', 'thread_id': '{thread_id}'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.started'}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_0', 'type': 'future_item', 'foo': 'bar'}}), flush=True)\n"
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'item_1', 'type': 'agent_message', 'text': 'ok'}}), flush=True)\n"
"print(json.dumps({'type': 'turn.completed', 'usage': {'input_tokens': 1, 'cached_input_tokens': 0, 'output_tokens': 1}}), flush=True)\n",
encoding="utf-8",
)
codex_path.chmod(0o755)
runner = CodexRunner(codex_cmd=str(codex_path), extra_args=[])
seen = [evt async for evt in runner.run("hi", None)]
completed = next(evt for evt in seen if isinstance(evt, CompletedEvent))
assert completed.ok is True
assert completed.answer == "ok"
@pytest.mark.anyio
async def test_codex_runner_includes_stderr_reason(tmp_path) -> None:
codex_path = tmp_path / "codex"