From 6c5763b014f496b7173da9427e24a2bae98d1517 Mon Sep 17 00:00:00 2001 From: banteg <4562643+banteg@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:37:37 +0400 Subject: [PATCH] fix(telegram): preserve directives for voice transcripts (#141) --- src/takopi/telegram/loop.py | 11 +++++- tests/test_telegram_bridge.py | 70 +++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/src/takopi/telegram/loop.py b/src/takopi/telegram/loop.py index e23b6a5..ceb0509 100644 --- a/src/takopi/telegram/loop.py +++ b/src/takopi/telegram/loop.py @@ -808,6 +808,7 @@ async def run_main_loop( ) reply = make_reply(cfg, msg) text = msg.text + is_voice_transcribed = False if msg.voice is not None: text = await transcribe_voice( bot=cfg.bot, @@ -819,7 +820,7 @@ async def run_main_loop( ) if text is None: continue - text = f"(voice transcribed) {text}" + is_voice_transcribed = True topic_key = ( _topic_key(msg, cfg, scope_chat_ids=topics_chat_ids) if topic_store is not None @@ -981,6 +982,14 @@ async def run_main_loop( except DirectiveError as exc: await reply(text=f"error:\n{exc}") continue + if is_voice_transcribed: + resolved = ResolvedMessage( + prompt=f"(voice transcribed) {resolved.prompt}", + resume_token=resolved.resume_token, + engine_override=resolved.engine_override, + context=resolved.context, + context_source=resolved.context_source, + ) text = resolved.prompt resume_token = resolved.resume_token diff --git a/tests/test_telegram_bridge.py b/tests/test_telegram_bridge.py index f3f5760..02e6699 100644 --- a/tests/test_telegram_bridge.py +++ b/tests/test_telegram_bridge.py @@ -51,6 +51,7 @@ from takopi.telegram.types import ( TelegramCallbackQuery, TelegramDocument, TelegramIncomingMessage, + TelegramVoice, ) from takopi.transport import MessageRef, RenderedMessage, SendOptions from tests.plugin_fixtures import FakeEntryPoint, install_entrypoints @@ -1908,6 +1909,75 @@ async def test_run_main_loop_prompt_upload_uses_caption_directives( assert "[uploaded file: incoming/hello.txt]" in prompt_text +@pytest.mark.anyio +async def test_run_main_loop_voice_transcript_preserves_directive( + monkeypatch: pytest.MonkeyPatch, +) -> None: + codex_runner = ScriptRunner([Return(answer="codex")], engine=CODEX_ENGINE) + claude_runner = ScriptRunner([Return(answer="claude")], engine="claude") + router = AutoRouter( + entries=[ + RunnerEntry(engine=claude_runner.engine, runner=claude_runner), + RunnerEntry(engine=codex_runner.engine, runner=codex_runner), + ], + default_engine=claude_runner.engine, + ) + runtime = TransportRuntime(router=router, projects=_empty_projects()) + transport = _FakeTransport() + exec_cfg = ExecBridgeConfig( + transport=transport, + presenter=MarkdownPresenter(), + final_notify=True, + ) + cfg = TelegramBridgeConfig( + bot=_FakeBot(), + runtime=runtime, + chat_id=123, + startup_msg="", + exec_cfg=exec_cfg, + voice_transcription=True, + ) + + async def _fake_transcribe( + *, + bot: BotClient, + msg: TelegramIncomingMessage, + enabled: bool, + model: str, + max_bytes: int | None = None, + reply, + ) -> str: + _ = bot, msg, enabled, model, max_bytes, reply + return "/codex do thing" + + monkeypatch.setattr(telegram_loop, "transcribe_voice", _fake_transcribe) + monkeypatch.setattr(telegram_loop, "list_command_ids", lambda **_: []) + + async def poller(_cfg: TelegramBridgeConfig): + yield TelegramIncomingMessage( + transport="telegram", + chat_id=123, + message_id=1, + text="", + reply_to_message_id=None, + reply_to_text=None, + sender_id=123, + voice=TelegramVoice( + file_id="voice-1", + mime_type=None, + file_size=None, + duration=None, + raw={"file_id": "voice-1"}, + ), + ) + + await run_main_loop(cfg, poller) + + assert not claude_runner.calls + assert len(codex_runner.calls) == 1 + assert codex_runner.calls[0][0].startswith("(voice transcribed) do thing") + + @pytest.mark.anyio async def test_run_main_loop_prompt_upload_auto_resumes_chat_sessions( tmp_path: Path,