feat: telegram voice transcription (#74)
This commit is contained in:
@@ -16,6 +16,24 @@ This document captures current behavior so transport changes stay intentional.
|
|||||||
4. High-value messages enqueue a send.
|
4. High-value messages enqueue a send.
|
||||||
5. All writes go through the outbox.
|
5. All writes go through the outbox.
|
||||||
|
|
||||||
|
## Incoming messages
|
||||||
|
|
||||||
|
`parse_incoming_update` accepts text messages and voice notes.
|
||||||
|
|
||||||
|
If voice transcription is enabled, takopi downloads the voice payload from Telegram,
|
||||||
|
transcribes it with OpenAI, and routes the transcript through the same command and
|
||||||
|
directive pipeline as typed text.
|
||||||
|
|
||||||
|
Configuration (under `[transports.telegram]`):
|
||||||
|
|
||||||
|
```toml
|
||||||
|
voice_transcription = true
|
||||||
|
```
|
||||||
|
|
||||||
|
Set `OPENAI_API_KEY` in the environment. If transcription is enabled but the API key
|
||||||
|
is missing or the audio download fails, takopi replies with a short error and skips
|
||||||
|
the run.
|
||||||
|
|
||||||
## Outbox model
|
## Outbox model
|
||||||
|
|
||||||
- Single worker processes one op at a time.
|
- Single worker processes one op at a time.
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ parallel runs across threads, per thread queue support.
|
|||||||
|
|
||||||
`/cancel` a running task.
|
`/cancel` a running task.
|
||||||
|
|
||||||
|
optional voice note transcription for Telegram (routes transcript like typed text).
|
||||||
|
|
||||||
## requirements
|
## requirements
|
||||||
|
|
||||||
- `uv` for installation (`curl -LsSf https://astral.sh/uv/install.sh | sh`)
|
- `uv` for installation (`curl -LsSf https://astral.sh/uv/install.sh | sh`)
|
||||||
@@ -59,6 +61,9 @@ transport = "telegram"
|
|||||||
[transports.telegram]
|
[transports.telegram]
|
||||||
bot_token = "123456789:ABCdefGHIjklMNOpqrsTUVwxyz"
|
bot_token = "123456789:ABCdefGHIjklMNOpqrsTUVwxyz"
|
||||||
chat_id = 123456789
|
chat_id = 123456789
|
||||||
|
voice_transcription = true
|
||||||
|
|
||||||
|
# set OPENAI_API_KEY in your environment for voice transcription
|
||||||
|
|
||||||
[codex]
|
[codex]
|
||||||
# optional: profile from ~/.codex/config.toml
|
# optional: profile from ~/.codex/config.toml
|
||||||
|
|||||||
@@ -25,6 +25,18 @@ _RECONNECTING_RE = re.compile(
|
|||||||
r"^Reconnecting\.{3}\s*(?P<attempt>\d+)/(?P<max>\d+)\s*$",
|
r"^Reconnecting\.{3}\s*(?P<attempt>\d+)/(?P<max>\d+)\s*$",
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
_EXEC_ONLY_FLAGS = {"--skip-git-repo-check"}
|
||||||
|
|
||||||
|
|
||||||
|
def _split_exec_flags(extra_args: list[str]) -> tuple[list[str], list[str]]:
|
||||||
|
base_args: list[str] = []
|
||||||
|
exec_args: list[str] = []
|
||||||
|
for arg in extra_args:
|
||||||
|
if arg in _EXEC_ONLY_FLAGS:
|
||||||
|
exec_args.append(arg)
|
||||||
|
else:
|
||||||
|
base_args.append(arg)
|
||||||
|
return base_args, exec_args
|
||||||
|
|
||||||
|
|
||||||
def _parse_reconnect_message(message: str) -> tuple[int, int] | None:
|
def _parse_reconnect_message(message: str) -> tuple[int, int] | None:
|
||||||
@@ -397,7 +409,8 @@ class CodexRunner(ResumeTokenMixin, JsonlSubprocessRunner):
|
|||||||
state: Any,
|
state: Any,
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
_ = prompt, state
|
_ = prompt, state
|
||||||
args = [*self.extra_args, "exec", "--json"]
|
base_args, exec_args = _split_exec_flags(self.extra_args)
|
||||||
|
args = [*base_args, "exec", *exec_args, "--json"]
|
||||||
if resume:
|
if resume:
|
||||||
args.extend(["resume", resume.value, "-"])
|
args.extend(["resume", resume.value, "-"])
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ class TelegramTransportSettings(BaseModel):
|
|||||||
|
|
||||||
bot_token: SecretStr | None = None
|
bot_token: SecretStr | None = None
|
||||||
chat_id: int | None = None
|
chat_id: int | None = None
|
||||||
|
voice_transcription: bool = False
|
||||||
|
|
||||||
@field_validator("bot_token", mode="before")
|
@field_validator("bot_token", mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
"""Telegram-specific clients and adapters."""
|
"""Telegram-specific clients and adapters."""
|
||||||
|
|
||||||
from .client import parse_incoming_update, poll_incoming
|
from .client import parse_incoming_update, poll_incoming
|
||||||
from .types import TelegramIncomingMessage
|
from .types import TelegramIncomingMessage, TelegramVoice
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"TelegramIncomingMessage",
|
"TelegramIncomingMessage",
|
||||||
|
"TelegramVoice",
|
||||||
"parse_incoming_update",
|
"parse_incoming_update",
|
||||||
"poll_incoming",
|
"poll_incoming",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from .bridge import (
|
|||||||
TelegramBridgeConfig,
|
TelegramBridgeConfig,
|
||||||
TelegramPresenter,
|
TelegramPresenter,
|
||||||
TelegramTransport,
|
TelegramTransport,
|
||||||
|
TelegramVoiceTranscriptionConfig,
|
||||||
run_main_loop,
|
run_main_loop,
|
||||||
)
|
)
|
||||||
from .client import TelegramClient
|
from .client import TelegramClient
|
||||||
@@ -43,6 +44,14 @@ def _build_startup_message(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_voice_transcription_config(
|
||||||
|
transport_config: dict[str, object],
|
||||||
|
) -> TelegramVoiceTranscriptionConfig:
|
||||||
|
return TelegramVoiceTranscriptionConfig(
|
||||||
|
enabled=bool(transport_config.get("voice_transcription", False)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelegramBackend(TransportBackend):
|
class TelegramBackend(TransportBackend):
|
||||||
id = "telegram"
|
id = "telegram"
|
||||||
description = "Telegram bot"
|
description = "Telegram bot"
|
||||||
@@ -87,12 +96,14 @@ class TelegramBackend(TransportBackend):
|
|||||||
presenter=presenter,
|
presenter=presenter,
|
||||||
final_notify=final_notify,
|
final_notify=final_notify,
|
||||||
)
|
)
|
||||||
|
voice_transcription = _build_voice_transcription_config(transport_config)
|
||||||
cfg = TelegramBridgeConfig(
|
cfg = TelegramBridgeConfig(
|
||||||
bot=bot,
|
bot=bot,
|
||||||
runtime=runtime,
|
runtime=runtime,
|
||||||
chat_id=chat_id,
|
chat_id=chat_id,
|
||||||
startup_msg=startup_msg,
|
startup_msg=startup_msg,
|
||||||
exec_cfg=exec_cfg,
|
exec_cfg=exec_cfg,
|
||||||
|
voice_transcription=voice_transcription,
|
||||||
)
|
)
|
||||||
anyio.run(run_main_loop, cfg)
|
anyio.run(run_main_loop, cfg)
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
import shlex
|
import shlex
|
||||||
from collections.abc import AsyncIterator, Awaitable, Callable, Sequence
|
from collections.abc import AsyncIterator, Awaitable, Callable, Sequence
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import anyio
|
import anyio
|
||||||
|
|
||||||
@@ -40,10 +42,14 @@ from ..transport_runtime import TransportRuntime
|
|||||||
from .client import BotClient, poll_incoming
|
from .client import BotClient, poll_incoming
|
||||||
from .types import TelegramIncomingMessage
|
from .types import TelegramIncomingMessage
|
||||||
from .render import prepare_telegram
|
from .render import prepare_telegram
|
||||||
|
from .transcribe import transcribe_audio
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
_MAX_BOT_COMMANDS = 100
|
_MAX_BOT_COMMANDS = 100
|
||||||
|
_OPENAI_AUDIO_MAX_BYTES = 25 * 1024 * 1024
|
||||||
|
_OPENAI_TRANSCRIPTION_MODEL = "gpt-4o-mini-transcribe"
|
||||||
|
_OPENAI_TRANSCRIPTION_CHUNKING = "auto"
|
||||||
|
|
||||||
|
|
||||||
def _is_cancel_command(text: str) -> bool:
|
def _is_cancel_command(text: str) -> bool:
|
||||||
@@ -191,6 +197,11 @@ class TelegramPresenter:
|
|||||||
return RenderedMessage(text=text, extra={"entities": entities})
|
return RenderedMessage(text=text, extra={"entities": entities})
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TelegramVoiceTranscriptionConfig:
|
||||||
|
enabled: bool = False
|
||||||
|
|
||||||
|
|
||||||
def _as_int(value: int | str, *, label: str) -> int:
|
def _as_int(value: int | str, *, label: str) -> int:
|
||||||
if isinstance(value, bool) or not isinstance(value, int):
|
if isinstance(value, bool) or not isinstance(value, int):
|
||||||
raise TypeError(f"Telegram {label} must be int")
|
raise TypeError(f"Telegram {label} must be int")
|
||||||
@@ -285,6 +296,7 @@ class TelegramBridgeConfig:
|
|||||||
chat_id: int
|
chat_id: int
|
||||||
startup_msg: str
|
startup_msg: str
|
||||||
exec_cfg: ExecBridgeConfig
|
exec_cfg: ExecBridgeConfig
|
||||||
|
voice_transcription: TelegramVoiceTranscriptionConfig | None = None
|
||||||
|
|
||||||
|
|
||||||
async def _send_plain(
|
async def _send_plain(
|
||||||
@@ -345,6 +357,125 @@ async def poll_updates(
|
|||||||
yield msg
|
yield msg
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_openai_api_key(
|
||||||
|
cfg: TelegramVoiceTranscriptionConfig,
|
||||||
|
) -> str | None:
|
||||||
|
env_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
if isinstance(env_key, str):
|
||||||
|
env_key = env_key.strip()
|
||||||
|
if env_key:
|
||||||
|
return env_key
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_voice_filename(file_path: str | None, mime_type: str | None) -> str:
|
||||||
|
name = Path(file_path).name if file_path else ""
|
||||||
|
if not name:
|
||||||
|
if mime_type == "audio/ogg":
|
||||||
|
return "voice.ogg"
|
||||||
|
return "voice.dat"
|
||||||
|
if name.endswith(".oga"):
|
||||||
|
return f"{name[:-4]}.ogg"
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
async def _transcribe_voice(
|
||||||
|
cfg: TelegramBridgeConfig,
|
||||||
|
msg: TelegramIncomingMessage,
|
||||||
|
) -> str | None:
|
||||||
|
voice = msg.voice
|
||||||
|
if voice is None:
|
||||||
|
return msg.text
|
||||||
|
settings = cfg.voice_transcription
|
||||||
|
if settings is None or not settings.enabled:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="voice transcription is disabled.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
api_key = _resolve_openai_api_key(settings)
|
||||||
|
if not api_key:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="voice transcription requires OPENAI_API_KEY.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
if voice.file_size is not None and voice.file_size > _OPENAI_AUDIO_MAX_BYTES:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="voice message is too large to transcribe.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
file_info = await cfg.bot.get_file(voice.file_id)
|
||||||
|
if not isinstance(file_info, dict):
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="failed to fetch voice file.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
file_path = file_info.get("file_path")
|
||||||
|
if not isinstance(file_path, str) or not file_path:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="failed to fetch voice file.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
audio_bytes = await cfg.bot.download_file(file_path)
|
||||||
|
if not audio_bytes:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="failed to download voice message.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
if len(audio_bytes) > _OPENAI_AUDIO_MAX_BYTES:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="voice message is too large to transcribe.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
filename = _normalize_voice_filename(file_path, voice.mime_type)
|
||||||
|
transcript = await transcribe_audio(
|
||||||
|
audio_bytes,
|
||||||
|
filename=filename,
|
||||||
|
api_key=api_key,
|
||||||
|
model=_OPENAI_TRANSCRIPTION_MODEL,
|
||||||
|
chunking_strategy=_OPENAI_TRANSCRIPTION_CHUNKING,
|
||||||
|
mime_type=voice.mime_type,
|
||||||
|
)
|
||||||
|
if transcript is None:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="voice transcription failed.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
transcript = transcript.strip()
|
||||||
|
if not transcript:
|
||||||
|
await _send_plain(
|
||||||
|
cfg.exec_cfg.transport,
|
||||||
|
chat_id=msg.chat_id,
|
||||||
|
user_msg_id=msg.message_id,
|
||||||
|
text="voice transcription returned empty text.",
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return transcript
|
||||||
|
|
||||||
|
|
||||||
async def _handle_cancel(
|
async def _handle_cancel(
|
||||||
cfg: TelegramBridgeConfig,
|
cfg: TelegramBridgeConfig,
|
||||||
msg: TelegramIncomingMessage,
|
msg: TelegramIncomingMessage,
|
||||||
@@ -702,6 +833,7 @@ class _TelegramCommandExecutor(CommandExecutor):
|
|||||||
async def _dispatch_command(
|
async def _dispatch_command(
|
||||||
cfg: TelegramBridgeConfig,
|
cfg: TelegramBridgeConfig,
|
||||||
msg: TelegramIncomingMessage,
|
msg: TelegramIncomingMessage,
|
||||||
|
text: str,
|
||||||
command_id: str,
|
command_id: str,
|
||||||
args_text: str,
|
args_text: str,
|
||||||
running_tasks: RunningTasks,
|
running_tasks: RunningTasks,
|
||||||
@@ -738,7 +870,7 @@ async def _dispatch_command(
|
|||||||
return
|
return
|
||||||
ctx = CommandContext(
|
ctx = CommandContext(
|
||||||
command=command_id,
|
command=command_id,
|
||||||
text=msg.text,
|
text=text,
|
||||||
args_text=args_text,
|
args_text=args_text,
|
||||||
args=_split_command_args(args_text),
|
args=_split_command_args(args_text),
|
||||||
message=message_ref,
|
message=message_ref,
|
||||||
@@ -826,6 +958,10 @@ async def run_main_loop(
|
|||||||
|
|
||||||
async for msg in poller(cfg):
|
async for msg in poller(cfg):
|
||||||
text = msg.text
|
text = msg.text
|
||||||
|
if msg.voice is not None:
|
||||||
|
text = await _transcribe_voice(cfg, msg)
|
||||||
|
if text is None:
|
||||||
|
continue
|
||||||
user_msg_id = msg.message_id
|
user_msg_id = msg.message_id
|
||||||
chat_id = msg.chat_id
|
chat_id = msg.chat_id
|
||||||
reply_id = msg.reply_to_message_id
|
reply_id = msg.reply_to_message_id
|
||||||
@@ -850,6 +986,7 @@ async def run_main_loop(
|
|||||||
_dispatch_command,
|
_dispatch_command,
|
||||||
cfg,
|
cfg,
|
||||||
msg,
|
msg,
|
||||||
|
text,
|
||||||
command_id,
|
command_id,
|
||||||
args_text,
|
args_text,
|
||||||
running_tasks,
|
running_tasks,
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ import httpx
|
|||||||
import anyio
|
import anyio
|
||||||
|
|
||||||
from ..logging import get_logger
|
from ..logging import get_logger
|
||||||
from .types import TelegramIncomingMessage
|
from .types import TelegramIncomingMessage, TelegramVoice
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@@ -50,8 +50,30 @@ def parse_incoming_update(
|
|||||||
if not isinstance(msg, dict):
|
if not isinstance(msg, dict):
|
||||||
return None
|
return None
|
||||||
text = msg.get("text")
|
text = msg.get("text")
|
||||||
|
voice_payload: TelegramVoice | None = None
|
||||||
if not isinstance(text, str):
|
if not isinstance(text, str):
|
||||||
return None
|
voice = msg.get("voice")
|
||||||
|
if not isinstance(voice, dict):
|
||||||
|
return None
|
||||||
|
file_id = voice.get("file_id")
|
||||||
|
if not isinstance(file_id, str) or not file_id:
|
||||||
|
return None
|
||||||
|
voice_payload = TelegramVoice(
|
||||||
|
file_id=file_id,
|
||||||
|
mime_type=voice.get("mime_type")
|
||||||
|
if isinstance(voice.get("mime_type"), str)
|
||||||
|
else None,
|
||||||
|
file_size=voice.get("file_size")
|
||||||
|
if isinstance(voice.get("file_size"), int)
|
||||||
|
and not isinstance(voice.get("file_size"), bool)
|
||||||
|
else None,
|
||||||
|
duration=voice.get("duration")
|
||||||
|
if isinstance(voice.get("duration"), int)
|
||||||
|
and not isinstance(voice.get("duration"), bool)
|
||||||
|
else None,
|
||||||
|
raw=voice,
|
||||||
|
)
|
||||||
|
text = ""
|
||||||
chat = msg.get("chat")
|
chat = msg.get("chat")
|
||||||
if not isinstance(chat, dict):
|
if not isinstance(chat, dict):
|
||||||
return None
|
return None
|
||||||
@@ -87,6 +109,7 @@ def parse_incoming_update(
|
|||||||
reply_to_message_id=reply_to_message_id,
|
reply_to_message_id=reply_to_message_id,
|
||||||
reply_to_text=reply_to_text,
|
reply_to_text=reply_to_text,
|
||||||
sender_id=sender_id,
|
sender_id=sender_id,
|
||||||
|
voice=voice_payload,
|
||||||
raw=msg,
|
raw=msg,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -123,6 +146,10 @@ class BotClient(Protocol):
|
|||||||
allowed_updates: list[str] | None = None,
|
allowed_updates: list[str] | None = None,
|
||||||
) -> list[dict] | None: ...
|
) -> list[dict] | None: ...
|
||||||
|
|
||||||
|
async def get_file(self, file_id: str) -> dict | None: ...
|
||||||
|
|
||||||
|
async def download_file(self, file_path: str) -> bytes | None: ...
|
||||||
|
|
||||||
async def send_message(
|
async def send_message(
|
||||||
self,
|
self,
|
||||||
chat_id: int,
|
chat_id: int,
|
||||||
@@ -356,6 +383,7 @@ class TelegramClient:
|
|||||||
raise ValueError("Provide either token or client, not both.")
|
raise ValueError("Provide either token or client, not both.")
|
||||||
self._client_override = client
|
self._client_override = client
|
||||||
self._base = None
|
self._base = None
|
||||||
|
self._file_base = None
|
||||||
self._http_client = None
|
self._http_client = None
|
||||||
self._owns_http_client = False
|
self._owns_http_client = False
|
||||||
else:
|
else:
|
||||||
@@ -363,6 +391,7 @@ class TelegramClient:
|
|||||||
raise ValueError("Telegram token is empty")
|
raise ValueError("Telegram token is empty")
|
||||||
self._client_override = None
|
self._client_override = None
|
||||||
self._base = f"https://api.telegram.org/bot{token}"
|
self._base = f"https://api.telegram.org/bot{token}"
|
||||||
|
self._file_base = f"https://api.telegram.org/file/bot{token}"
|
||||||
self._http_client = http_client or httpx.AsyncClient(timeout=timeout_s)
|
self._http_client = http_client or httpx.AsyncClient(timeout=timeout_s)
|
||||||
self._owns_http_client = http_client is None
|
self._owns_http_client = http_client is None
|
||||||
self._clock = clock
|
self._clock = clock
|
||||||
@@ -556,6 +585,46 @@ class TelegramClient:
|
|||||||
except TelegramRetryAfter as exc:
|
except TelegramRetryAfter as exc:
|
||||||
await self._sleep(exc.retry_after)
|
await self._sleep(exc.retry_after)
|
||||||
|
|
||||||
|
async def get_file(self, file_id: str) -> dict | None:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
if self._client_override is not None:
|
||||||
|
return await self._client_override.get_file(file_id)
|
||||||
|
result = await self._post("getFile", {"file_id": file_id})
|
||||||
|
return result if isinstance(result, dict) else None
|
||||||
|
except TelegramRetryAfter as exc:
|
||||||
|
await self._sleep(exc.retry_after)
|
||||||
|
|
||||||
|
async def download_file(self, file_path: str) -> bytes | None:
|
||||||
|
if self._client_override is not None:
|
||||||
|
return await self._client_override.download_file(file_path)
|
||||||
|
if self._http_client is None or self._file_base is None:
|
||||||
|
raise RuntimeError("TelegramClient is configured without an HTTP client.")
|
||||||
|
url = f"{self._file_base}/{file_path}"
|
||||||
|
try:
|
||||||
|
resp = await self._http_client.get(url)
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
request_url = getattr(exc.request, "url", None)
|
||||||
|
logger.error(
|
||||||
|
"telegram.file_network_error",
|
||||||
|
url=str(request_url) if request_url is not None else None,
|
||||||
|
error=str(exc),
|
||||||
|
error_type=exc.__class__.__name__,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
logger.error(
|
||||||
|
"telegram.file_http_error",
|
||||||
|
status=resp.status_code,
|
||||||
|
url=str(resp.request.url),
|
||||||
|
error=str(exc),
|
||||||
|
body=resp.text,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return resp.content
|
||||||
|
|
||||||
async def send_message(
|
async def send_message(
|
||||||
self,
|
self,
|
||||||
chat_id: int,
|
chat_id: int,
|
||||||
|
|||||||
@@ -0,0 +1,100 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from ..logging import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
OPENAI_TRANSCRIBE_URL = "https://api.openai.com/v1/audio/transcriptions"
|
||||||
|
|
||||||
|
|
||||||
|
async def transcribe_audio(
|
||||||
|
audio_bytes: bytes,
|
||||||
|
*,
|
||||||
|
filename: str,
|
||||||
|
api_key: str,
|
||||||
|
model: str,
|
||||||
|
language: str | None = None,
|
||||||
|
prompt: str | None = None,
|
||||||
|
chunking_strategy: str | None = "auto",
|
||||||
|
mime_type: str | None = None,
|
||||||
|
timeout_s: float = 120,
|
||||||
|
http_client: httpx.AsyncClient | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
data: dict[str, Any] = {"model": model}
|
||||||
|
if language:
|
||||||
|
data["language"] = language
|
||||||
|
if prompt:
|
||||||
|
data["prompt"] = prompt
|
||||||
|
if chunking_strategy:
|
||||||
|
data["chunking_strategy"] = chunking_strategy
|
||||||
|
|
||||||
|
files = {
|
||||||
|
"file": (
|
||||||
|
filename,
|
||||||
|
audio_bytes,
|
||||||
|
mime_type or "application/octet-stream",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
headers = {"Authorization": f"Bearer {api_key}"}
|
||||||
|
close_client = False
|
||||||
|
client = http_client
|
||||||
|
if client is None:
|
||||||
|
client = httpx.AsyncClient(timeout=timeout_s)
|
||||||
|
close_client = True
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
resp = await client.post(
|
||||||
|
OPENAI_TRANSCRIBE_URL,
|
||||||
|
data=data,
|
||||||
|
files=files,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
request_url = getattr(exc.request, "url", None)
|
||||||
|
logger.error(
|
||||||
|
"openai.transcribe.network_error",
|
||||||
|
url=str(request_url) if request_url is not None else None,
|
||||||
|
error=str(exc),
|
||||||
|
error_type=exc.__class__.__name__,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
logger.error(
|
||||||
|
"openai.transcribe.http_error",
|
||||||
|
status=resp.status_code,
|
||||||
|
url=str(resp.request.url),
|
||||||
|
error=str(exc),
|
||||||
|
body=resp.text,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
payload = resp.json()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(
|
||||||
|
"openai.transcribe.bad_response",
|
||||||
|
status=resp.status_code,
|
||||||
|
url=str(resp.request.url),
|
||||||
|
error=str(exc),
|
||||||
|
error_type=exc.__class__.__name__,
|
||||||
|
body=resp.text,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
if close_client:
|
||||||
|
await client.aclose()
|
||||||
|
|
||||||
|
text = payload.get("text")
|
||||||
|
if not isinstance(text, str):
|
||||||
|
logger.error(
|
||||||
|
"openai.transcribe.invalid_payload",
|
||||||
|
payload=payload,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return text
|
||||||
@@ -4,6 +4,15 @@ from dataclasses import dataclass
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class TelegramVoice:
|
||||||
|
file_id: str
|
||||||
|
mime_type: str | None
|
||||||
|
file_size: int | None
|
||||||
|
duration: int | None
|
||||||
|
raw: dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, slots=True)
|
@dataclass(frozen=True, slots=True)
|
||||||
class TelegramIncomingMessage:
|
class TelegramIncomingMessage:
|
||||||
transport: str
|
transport: str
|
||||||
@@ -13,4 +22,5 @@ class TelegramIncomingMessage:
|
|||||||
reply_to_message_id: int | None
|
reply_to_message_id: int | None
|
||||||
reply_to_text: str | None
|
reply_to_text: str | None
|
||||||
sender_id: int | None
|
sender_id: int | None
|
||||||
|
voice: TelegramVoice | None = None
|
||||||
raw: dict[str, Any] | None = None
|
raw: dict[str, Any] | None = None
|
||||||
|
|||||||
@@ -128,6 +128,23 @@ async def test_run_allows_parallel_different_sessions() -> None:
|
|||||||
assert max_in_flight == 2
|
assert max_in_flight == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_codex_exec_flags_after_exec() -> None:
|
||||||
|
runner = CodexRunner(
|
||||||
|
codex_cmd="codex",
|
||||||
|
extra_args=["-c", "notify=[]", "--skip-git-repo-check"],
|
||||||
|
)
|
||||||
|
state = runner.new_state("hi", None)
|
||||||
|
args = runner.build_args("hi", None, state=state)
|
||||||
|
assert args == [
|
||||||
|
"-c",
|
||||||
|
"notify=[]",
|
||||||
|
"exec",
|
||||||
|
"--skip-git-repo-check",
|
||||||
|
"--json",
|
||||||
|
"-",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.anyio
|
@pytest.mark.anyio
|
||||||
async def test_run_serializes_new_session_after_session_is_known(
|
async def test_run_serializes_new_session_after_session_is_known(
|
||||||
tmp_path, monkeypatch
|
tmp_path, monkeypatch
|
||||||
|
|||||||
@@ -106,6 +106,14 @@ class _FakeBot:
|
|||||||
_ = allowed_updates
|
_ = allowed_updates
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
async def get_file(self, file_id: str) -> dict | None:
|
||||||
|
_ = file_id
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def download_file(self, file_path: str) -> bytes | None:
|
||||||
|
_ = file_path
|
||||||
|
return None
|
||||||
|
|
||||||
async def send_message(
|
async def send_message(
|
||||||
self,
|
self,
|
||||||
chat_id: int,
|
chat_id: int,
|
||||||
@@ -386,6 +394,14 @@ async def test_telegram_transport_edit_wait_false_returns_ref() -> None:
|
|||||||
) -> list[dict] | None:
|
) -> list[dict] | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
async def get_file(self, file_id: str) -> dict | None:
|
||||||
|
_ = file_id
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def download_file(self, file_path: str) -> bytes | None:
|
||||||
|
_ = file_path
|
||||||
|
return None
|
||||||
|
|
||||||
async def send_message(
|
async def send_message(
|
||||||
self,
|
self,
|
||||||
chat_id: int,
|
chat_id: int,
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ def test_parse_incoming_update_maps_fields() -> None:
|
|||||||
assert msg.reply_to_message_id == 5
|
assert msg.reply_to_message_id == 5
|
||||||
assert msg.reply_to_text == "prev"
|
assert msg.reply_to_text == "prev"
|
||||||
assert msg.sender_id == 99
|
assert msg.sender_id == 99
|
||||||
|
assert msg.voice is None
|
||||||
assert msg.raw == update["message"]
|
assert msg.raw == update["message"]
|
||||||
|
|
||||||
|
|
||||||
@@ -38,10 +39,36 @@ def test_parse_incoming_update_filters_non_matching_chat() -> None:
|
|||||||
assert parse_incoming_update(update, chat_id=999) is None
|
assert parse_incoming_update(update, chat_id=999) is None
|
||||||
|
|
||||||
|
|
||||||
def test_parse_incoming_update_filters_non_text() -> None:
|
def test_parse_incoming_update_filters_non_text_and_non_voice() -> None:
|
||||||
update = {
|
update = {
|
||||||
"update_id": 1,
|
"update_id": 1,
|
||||||
"message": {"message_id": 10, "chat": {"id": 123}},
|
"message": {"message_id": 10, "chat": {"id": 123}, "photo": []},
|
||||||
}
|
}
|
||||||
|
|
||||||
assert parse_incoming_update(update, chat_id=123) is None
|
assert parse_incoming_update(update, chat_id=123) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_incoming_update_voice_message() -> None:
|
||||||
|
update = {
|
||||||
|
"update_id": 1,
|
||||||
|
"message": {
|
||||||
|
"message_id": 10,
|
||||||
|
"chat": {"id": 123},
|
||||||
|
"voice": {
|
||||||
|
"file_id": "voice-id",
|
||||||
|
"file_unique_id": "uniq",
|
||||||
|
"duration": 3,
|
||||||
|
"mime_type": "audio/ogg",
|
||||||
|
"file_size": 1234,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
msg = parse_incoming_update(update, chat_id=123)
|
||||||
|
assert msg is not None
|
||||||
|
assert msg.text == ""
|
||||||
|
assert msg.voice is not None
|
||||||
|
assert msg.voice.file_id == "voice-id"
|
||||||
|
assert msg.voice.mime_type == "audio/ogg"
|
||||||
|
assert msg.voice.file_size == 1234
|
||||||
|
assert msg.voice.duration == 3
|
||||||
|
|||||||
@@ -92,6 +92,14 @@ class _FakeBot:
|
|||||||
self._updates_attempts += 1
|
self._updates_attempts += 1
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
async def get_file(self, file_id: str) -> dict | None:
|
||||||
|
_ = file_id
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def download_file(self, file_path: str) -> bytes | None:
|
||||||
|
_ = file_path
|
||||||
|
return None
|
||||||
|
|
||||||
async def close(self) -> None:
|
async def close(self) -> None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user