fix(telegram): preserve directives for voice transcripts (#141)

This commit is contained in:
banteg
2026-01-15 21:37:37 +04:00
committed by GitHub
parent 5a2d780b3e
commit 6c5763b014
2 changed files with 80 additions and 1 deletions
+10 -1
View File
@@ -808,6 +808,7 @@ async def run_main_loop(
) )
reply = make_reply(cfg, msg) reply = make_reply(cfg, msg)
text = msg.text text = msg.text
is_voice_transcribed = False
if msg.voice is not None: if msg.voice is not None:
text = await transcribe_voice( text = await transcribe_voice(
bot=cfg.bot, bot=cfg.bot,
@@ -819,7 +820,7 @@ async def run_main_loop(
) )
if text is None: if text is None:
continue continue
text = f"(voice transcribed) {text}" is_voice_transcribed = True
topic_key = ( topic_key = (
_topic_key(msg, cfg, scope_chat_ids=topics_chat_ids) _topic_key(msg, cfg, scope_chat_ids=topics_chat_ids)
if topic_store is not None if topic_store is not None
@@ -981,6 +982,14 @@ async def run_main_loop(
except DirectiveError as exc: except DirectiveError as exc:
await reply(text=f"error:\n{exc}") await reply(text=f"error:\n{exc}")
continue continue
if is_voice_transcribed:
resolved = ResolvedMessage(
prompt=f"(voice transcribed) {resolved.prompt}",
resume_token=resolved.resume_token,
engine_override=resolved.engine_override,
context=resolved.context,
context_source=resolved.context_source,
)
text = resolved.prompt text = resolved.prompt
resume_token = resolved.resume_token resume_token = resolved.resume_token
+70
View File
@@ -51,6 +51,7 @@ from takopi.telegram.types import (
TelegramCallbackQuery, TelegramCallbackQuery,
TelegramDocument, TelegramDocument,
TelegramIncomingMessage, TelegramIncomingMessage,
TelegramVoice,
) )
from takopi.transport import MessageRef, RenderedMessage, SendOptions from takopi.transport import MessageRef, RenderedMessage, SendOptions
from tests.plugin_fixtures import FakeEntryPoint, install_entrypoints from tests.plugin_fixtures import FakeEntryPoint, install_entrypoints
@@ -1908,6 +1909,75 @@ async def test_run_main_loop_prompt_upload_uses_caption_directives(
assert "[uploaded file: incoming/hello.txt]" in prompt_text assert "[uploaded file: incoming/hello.txt]" in prompt_text
@pytest.mark.anyio
async def test_run_main_loop_voice_transcript_preserves_directive(
monkeypatch: pytest.MonkeyPatch,
) -> None:
codex_runner = ScriptRunner([Return(answer="codex")], engine=CODEX_ENGINE)
claude_runner = ScriptRunner([Return(answer="claude")], engine="claude")
router = AutoRouter(
entries=[
RunnerEntry(engine=claude_runner.engine, runner=claude_runner),
RunnerEntry(engine=codex_runner.engine, runner=codex_runner),
],
default_engine=claude_runner.engine,
)
runtime = TransportRuntime(router=router, projects=_empty_projects())
transport = _FakeTransport()
exec_cfg = ExecBridgeConfig(
transport=transport,
presenter=MarkdownPresenter(),
final_notify=True,
)
cfg = TelegramBridgeConfig(
bot=_FakeBot(),
runtime=runtime,
chat_id=123,
startup_msg="",
exec_cfg=exec_cfg,
voice_transcription=True,
)
async def _fake_transcribe(
*,
bot: BotClient,
msg: TelegramIncomingMessage,
enabled: bool,
model: str,
max_bytes: int | None = None,
reply,
) -> str:
_ = bot, msg, enabled, model, max_bytes, reply
return "/codex do thing"
monkeypatch.setattr(telegram_loop, "transcribe_voice", _fake_transcribe)
monkeypatch.setattr(telegram_loop, "list_command_ids", lambda **_: [])
async def poller(_cfg: TelegramBridgeConfig):
yield TelegramIncomingMessage(
transport="telegram",
chat_id=123,
message_id=1,
text="",
reply_to_message_id=None,
reply_to_text=None,
sender_id=123,
voice=TelegramVoice(
file_id="voice-1",
mime_type=None,
file_size=None,
duration=None,
raw={"file_id": "voice-1"},
),
)
await run_main_loop(cfg, poller)
assert not claude_runner.calls
assert len(codex_runner.calls) == 1
assert codex_runner.calls[0][0].startswith("(voice transcribed) do thing")
@pytest.mark.anyio @pytest.mark.anyio
async def test_run_main_loop_prompt_upload_auto_resumes_chat_sessions( async def test_run_main_loop_prompt_upload_auto_resumes_chat_sessions(
tmp_path: Path, tmp_path: Path,