diff --git a/README.md b/README.md index 4f2dccd..09fa724 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,11 @@ OpenCrow supports multiple messaging backends: - **Matrix** — E2EE chat rooms via mautrix - **Nostr** — NIP-17 encrypted DMs via go-nostr - **Signal** — Signal chats via `signal-cli` +- **Telegram** — Telegram Bot API (long polling) ```mermaid graph LR - Transport[Matrix / Nostr / Signal] -->|message| Inbox[(Inbox)] + Transport[Matrix / Nostr / Signal / Telegram] -->|message| Inbox[(Inbox)] Heartbeat -->|timer| Inbox Reminders[(reminders)] -->|due| Inbox Trigger["trigger.pipe"] -->|external| Inbox diff --git a/config.go b/config.go index 48034b4..a1dcf65 100644 --- a/config.go +++ b/config.go @@ -13,18 +13,20 @@ import ( ) const ( - backendMatrix = "matrix" - backendNostr = "nostr" - backendSignal = "signal" - backendSocket = "socket" + backendMatrix = "matrix" + backendNostr = "nostr" + backendSignal = "signal" + backendSocket = "socket" + backendTelegram = "telegram" ) type Config struct { - BackendType string // backendMatrix, backendNostr, backendSignal, or backendSocket + BackendType string // backendMatrix, backendNostr, backendSignal, backendSocket, or backendTelegram Matrix MatrixConfig Nostr NostrConfig Signal SignalConfig Socket SocketConfig + Telegram TelegramConfig Pi PiConfig Heartbeat HeartbeatConfig } @@ -34,6 +36,13 @@ type SocketConfig struct { Name string } +type TelegramConfig struct { + Token string + APIBase string + AllowedUsers map[string]struct{} + PollTimeout time.Duration +} + type HeartbeatConfig struct { Interval time.Duration // OPENCROW_HEARTBEAT_INTERVAL, default 0 (disabled) Prompt string // OPENCROW_HEARTBEAT_PROMPT, default built-in @@ -102,10 +111,10 @@ func loadConfig(getenv func(string) string) (*Config, error) { backendType := env.or("OPENCROW_BACKEND", backendMatrix) switch backendType { - case backendMatrix, backendNostr, backendSignal, backendSocket: + case backendMatrix, backendNostr, backendSignal, backendSocket, backendTelegram: // valid default: - return nil, fmt.Errorf("OPENCROW_BACKEND=%q is not supported (valid: matrix, nostr, signal, socket)", backendType) + return nil, fmt.Errorf("OPENCROW_BACKEND=%q is not supported (valid: matrix, nostr, signal, socket, telegram)", backendType) } idleTimeout, err := env.duration("OPENCROW_PI_IDLE_TIMEOUT", 30*time.Minute) @@ -122,6 +131,11 @@ func loadConfig(getenv func(string) string) (*Config, error) { return nil, err } + telegramCfg, err := loadTelegramConfig(env, allowedUsers) + if err != nil { + return nil, err + } + cfg := &Config{ BackendType: backendType, Matrix: MatrixConfig{ @@ -138,6 +152,7 @@ func loadConfig(getenv func(string) string) (*Config, error) { SocketPath: env.or("OPENCROW_SOCKET_PATH", filepath.Join(workingDir, "sessions", "chat.sock")), Name: env.or("OPENCROW_SOCKET_NAME", "OpenCrow"), }, + Telegram: telegramCfg, Pi: PiConfig{ BinaryPath: env.or("OPENCROW_PI_BINARY", "pi"), SessionDir: env.or("OPENCROW_PI_SESSION_DIR", "/var/lib/opencrow/sessions"), @@ -178,6 +193,8 @@ func (cfg *Config) validateBackend(env envReader) error { return cfg.Signal.validate() case backendSocket: // socket has sensible defaults, no validation needed + case backendTelegram: + return cfg.Telegram.validate() } return nil @@ -191,6 +208,50 @@ func (m MatrixConfig) validate() error { ) } +func loadTelegramConfig(env envReader, allowedUsers map[string]struct{}) (TelegramConfig, error) { + token, err := loadTelegramToken(env) + if err != nil { + return TelegramConfig{}, err + } + + pollTimeout, err := env.duration("OPENCROW_TELEGRAM_POLL_TIMEOUT", 0) + if err != nil { + return TelegramConfig{}, err + } + + telegramAllowed := allowedUsers + if raw := env.list("OPENCROW_TELEGRAM_ALLOWED_USERS"); len(raw) > 0 { + telegramAllowed = parseAllowedUsers(raw) + } + + return TelegramConfig{ + Token: token, + APIBase: env.str("OPENCROW_TELEGRAM_API_BASE"), + AllowedUsers: telegramAllowed, + PollTimeout: pollTimeout, + }, nil +} + +// loadTelegramToken reads the bot token from OPENCROW_TELEGRAM_TOKEN_FILE +// (preferred) or OPENCROW_TELEGRAM_TOKEN. Empty is allowed at load time; +// validate() enforces the requirement when telegram is the active backend. +func loadTelegramToken(env envReader) (string, error) { + if path := env.str("OPENCROW_TELEGRAM_TOKEN_FILE"); path != "" { + data, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("reading OPENCROW_TELEGRAM_TOKEN_FILE: %w", err) + } + + return strings.TrimSpace(string(data)), nil + } + + return env.str("OPENCROW_TELEGRAM_TOKEN"), nil +} + +func (t TelegramConfig) validate() error { + return requireField(t.Token, "OPENCROW_TELEGRAM_TOKEN") +} + func loadSignalConfig(env envReader, workingDir string, allowedUsers map[string]struct{}) SignalConfig { return SignalConfig{ Account: env.str("OPENCROW_SIGNAL_ACCOUNT"), diff --git a/config_test.go b/config_test.go index 79e822f..c63d79d 100644 --- a/config_test.go +++ b/config_test.go @@ -80,11 +80,17 @@ func TestLoadConfig_Errors(t *testing.T) { name: "unknown backend", env: func() map[string]string { m := baseMatrixEnv() - m["OPENCROW_BACKEND"] = "telegram" + m["OPENCROW_BACKEND"] = "discord" return m }(), }, + { + name: "telegram missing token", + env: map[string]string{ + "OPENCROW_BACKEND": "telegram", + }, + }, { name: "nostr missing private key", env: map[string]string{ @@ -243,6 +249,79 @@ func TestSocketConfig_CustomValues(t *testing.T) { } } +func TestTelegramConfig_Defaults(t *testing.T) { + t.Parallel() + + cfg, err := loadConfig(testEnv(map[string]string{ + "OPENCROW_BACKEND": "telegram", + "OPENCROW_TELEGRAM_TOKEN": "123:ABC", + })) + if err != nil { + t.Fatalf("loadConfig: %v", err) + } + + if cfg.BackendType != backendTelegram { + t.Errorf("BackendType = %q, want %q", cfg.BackendType, backendTelegram) + } + + if cfg.Telegram.Token != "123:ABC" { + t.Errorf("Token = %q, want %q", cfg.Telegram.Token, "123:ABC") + } + + if cfg.Telegram.APIBase != "" { + t.Errorf("APIBase = %q, want empty (backend applies its own default)", cfg.Telegram.APIBase) + } +} + +func TestTelegramConfig_TokenFile(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + tokenPath := filepath.Join(dir, "token") + + if err := os.WriteFile(tokenPath, []byte(" 789:XYZ \n"), 0o600); err != nil { + t.Fatal(err) + } + + cfg, err := loadConfig(testEnv(map[string]string{ + "OPENCROW_BACKEND": "telegram", + "OPENCROW_TELEGRAM_TOKEN_FILE": tokenPath, + })) + if err != nil { + t.Fatalf("loadConfig: %v", err) + } + + if cfg.Telegram.Token != "789:XYZ" { + t.Errorf("Token = %q, want %q (whitespace trimmed)", cfg.Telegram.Token, "789:XYZ") + } +} + +func TestTelegramConfig_AllowedUsersOverride(t *testing.T) { + t.Parallel() + + cfg, err := loadConfig(testEnv(map[string]string{ + "OPENCROW_BACKEND": "telegram", + "OPENCROW_TELEGRAM_TOKEN": "1:abc", + "OPENCROW_ALLOWED_USERS": "shared", + "OPENCROW_TELEGRAM_ALLOWED_USERS": "12345, @alice", + })) + if err != nil { + t.Fatalf("loadConfig: %v", err) + } + + if _, ok := cfg.Telegram.AllowedUsers["12345"]; !ok { + t.Errorf("expected 12345 in allowlist, got %v", cfg.Telegram.AllowedUsers) + } + + if _, ok := cfg.Telegram.AllowedUsers["@alice"]; !ok { + t.Errorf("expected @alice in allowlist, got %v", cfg.Telegram.AllowedUsers) + } + + if _, ok := cfg.Telegram.AllowedUsers["shared"]; ok { + t.Errorf("telegram-specific allowlist should override generic one, got %v", cfg.Telegram.AllowedUsers) + } +} + func TestDiscoverSkills_Symlinks(t *testing.T) { t.Parallel() diff --git a/docs/configuration.md b/docs/configuration.md index 3d5a9c1..04dc010 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -9,6 +9,7 @@ Set `OPENCROW_BACKEND` to choose the messaging backend. Defaults to `matrix`. | `matrix` | Matrix rooms via mautrix (default, backwards compatible) | | `nostr` | Nostr NIP-17 encrypted DMs | | `signal` | Signal chats via signal-cli | +| `telegram` | Telegram Bot API (long polling) | ## Bot commands @@ -27,7 +28,7 @@ Send these as plain text messages in any conversation with the bot: | Variable | Default | Description | |---|---|---| -| `OPENCROW_BACKEND` | `matrix` | Messaging backend (`matrix`, `nostr`, or `signal`) | +| `OPENCROW_BACKEND` | `matrix` | Messaging backend (`matrix`, `nostr`, `signal`, `socket`, or `telegram`) | | `OPENCROW_PI_BINARY` | `pi` | Path to the pi binary | | `OPENCROW_PI_SESSION_DIR` | `/var/lib/opencrow/sessions` | Session data directory | | `OPENCROW_PI_PROVIDER` | `anthropic` | LLM provider | @@ -116,6 +117,29 @@ sudo opencrow-signal-cli -a +12025550123 finishLink Once linked, set `OPENCROW_SIGNAL_ACCOUNT = "+12025550123"` and start the service. The account data persists in `OPENCROW_SIGNAL_CONFIG_DIR`. +## Telegram configuration + +OpenCrow uses the Telegram Bot API in long-polling mode, so no public HTTPS +endpoint is needed. Create a bot with [@BotFather](https://t.me/BotFather) to +obtain a token. + +| Variable | Required | Description | +|---|---|---| +| `OPENCROW_TELEGRAM_TOKEN` | Yes* | Bot token from @BotFather (e.g. `123456:ABC-DEF...`) | +| `OPENCROW_TELEGRAM_TOKEN_FILE` | Yes* | Path to a file containing the bot token (preferred for secrets) | +| `OPENCROW_TELEGRAM_API_BASE` | No | Override the API base URL (default `https://api.telegram.org`) | +| `OPENCROW_TELEGRAM_POLL_TIMEOUT` | No | Long-poll timeout duration (default `25s`) | +| `OPENCROW_TELEGRAM_ALLOWED_USERS` | No | Comma-separated user IDs (numeric) and/or `@usernames` permitted to interact. Falls back to `OPENCROW_ALLOWED_USERS` when unset. | + +*Either `OPENCROW_TELEGRAM_TOKEN` or `OPENCROW_TELEGRAM_TOKEN_FILE` is required. + +By default the bot replies to anyone who messages it. Restrict access with +`OPENCROW_TELEGRAM_ALLOWED_USERS` — find your numeric ID by messaging +[@userinfobot](https://t.me/userinfobot) (or any "what's my id" bot). + +The bot privacy mode applied by @BotFather affects which messages the bot +sees in groups. For 1:1 chats no extra setup is needed. + ## Secrets and authentication ### Nostr private key diff --git a/main.go b/main.go index 1bc021d..8c1f21b 100644 --- a/main.go +++ b/main.go @@ -18,6 +18,7 @@ import ( nostrbackend "github.com/pinpox/opencrow/nostr" signalbackend "github.com/pinpox/opencrow/signal" socketbackend "github.com/pinpox/opencrow/socket" + telegrambackend "github.com/pinpox/opencrow/telegram" // Register the pure-Go SQLite driver. _ "modernc.org/sqlite" ) @@ -231,6 +232,8 @@ func createBackend(ctx context.Context, cfg *Config, handler backend.MessageHand return createSignalBackend(cfg, handler) case backendSocket: return createSocketBackend(cfg, handler) + case backendTelegram: + return createTelegramBackend(cfg, handler) default: return nil, fmt.Errorf("unsupported backend type: %q", cfg.BackendType) } @@ -323,6 +326,23 @@ func createSocketBackend(cfg *Config, handler backend.MessageHandler) (*socketba return b, nil } +func createTelegramBackend(cfg *Config, handler backend.MessageHandler) (*telegrambackend.Backend, error) { + tgCfg := telegrambackend.Config{ + Token: cfg.Telegram.Token, + APIBase: cfg.Telegram.APIBase, + AllowedUsers: cfg.Telegram.AllowedUsers, + SessionBaseDir: cfg.Pi.SessionDir, + PollTimeout: cfg.Telegram.PollTimeout, + } + + b, err := telegrambackend.New(tgCfg, handler) + if err != nil { + return nil, fmt.Errorf("creating telegram backend: %w", err) + } + + return b, nil +} + func createSignalBackend(cfg *Config, handler backend.MessageHandler) (*signalbackend.Backend, error) { signalCfg := signalbackend.Config{ BinaryPath: cfg.Signal.BinaryPath, diff --git a/nix/module.nix b/nix/module.nix index e51ac5d..5a03bf1 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -197,6 +197,7 @@ let "nostr" "signal" "socket" + "telegram" ]; default = "matrix"; description = "Messaging backend to use."; @@ -302,6 +303,30 @@ let description = "Bot profile picture URL (NIP-01 kind 0 'picture' field)."; }; + OPENCROW_TELEGRAM_TOKEN_FILE = lib.mkOption { + type = lib.types.str; + default = ""; + description = "Path to file containing the Telegram bot token. Required when backend is telegram (unless token is in environment file)."; + }; + + OPENCROW_TELEGRAM_API_BASE = lib.mkOption { + type = lib.types.str; + default = ""; + description = "Override the Telegram Bot API base URL. Empty uses https://api.telegram.org."; + }; + + OPENCROW_TELEGRAM_POLL_TIMEOUT = lib.mkOption { + type = lib.types.str; + default = ""; + description = "Long-poll timeout for getUpdates (Go duration, e.g. 25s). Empty uses backend default."; + }; + + OPENCROW_TELEGRAM_ALLOWED_USERS = lib.mkOption { + type = lib.types.str; + default = ""; + description = "Comma-separated Telegram user IDs (numeric) or @usernames allowed to interact. Empty allows everyone."; + }; + OPENCROW_PI_PROVIDER = lib.mkOption { type = lib.types.str; default = "anthropic"; @@ -481,6 +506,15 @@ let || icfg.credentialFiles != { }; message = "services.opencrow (${name}): OPENCROW_NOSTR_PRIVATE_KEY_FILE, environmentFiles, or credentialFiles is required when OPENCROW_BACKEND is nostr."; } + { + assertion = + icfg.environment.OPENCROW_BACKEND != "telegram" + || icfg.environment.OPENCROW_TELEGRAM_TOKEN_FILE != "" + # Token may also be provided via environmentFiles or credentialFiles + || (builtins.length icfg.environmentFiles) > 0 + || icfg.credentialFiles != { }; + message = "services.opencrow (${name}): OPENCROW_TELEGRAM_TOKEN_FILE, environmentFiles, or credentialFiles is required when OPENCROW_BACKEND is telegram."; + } ]; systemPackages = [ diff --git a/telegram/backend.go b/telegram/backend.go new file mode 100644 index 0000000..f482aa4 --- /dev/null +++ b/telegram/backend.go @@ -0,0 +1,723 @@ +// Package telegram implements the Backend interface for Telegram via the +// Bot API. It uses long-polling getUpdates so OpenCrow does not need a +// public webhook endpoint. +package telegram + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log/slog" + "mime/multipart" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "time" + + "github.com/pinpox/opencrow/backend" +) + +const ( + defaultAPIBase = "https://api.telegram.org" + defaultPollTimeout = 25 * time.Second + httpClientTimeout = 60 * time.Second + pollErrorBackoff = 5 * time.Second + attachmentsDirName = "attachments" +) + +// Config holds Telegram-specific configuration. +type Config struct { + // Token is the bot token issued by @BotFather (required). + Token string + // APIBase overrides the Telegram API base URL (default https://api.telegram.org). + APIBase string + // AllowedUsers is a set of permitted Telegram user IDs (numeric, as strings). + // Empty means everyone is allowed. + AllowedUsers map[string]struct{} + // SessionBaseDir is the directory under which incoming attachments are + // saved (in /attachments/). + SessionBaseDir string + // PollTimeout is the long-poll timeout passed to getUpdates. + PollTimeout time.Duration +} + +// Backend implements backend.Backend for Telegram. +type Backend struct { + cfg Config + handler backend.MessageHandler + http *http.Client + + cancel backend.Canceler + active backend.ActiveConversation + + mu sync.Mutex + offset int64 +} + +// New creates a new Telegram backend. +func New(cfg Config, handler backend.MessageHandler) (*Backend, error) { + if cfg.Token == "" { + return nil, errors.New("telegram bot token is required") + } + + if cfg.APIBase == "" { + cfg.APIBase = defaultAPIBase + } + + if cfg.PollTimeout <= 0 { + cfg.PollTimeout = defaultPollTimeout + } + + return &Backend{ + cfg: cfg, + handler: handler, + http: &http.Client{ + // Long polls block server-side for PollTimeout; allow a + // generous extra margin for network latency. + Timeout: cfg.PollTimeout + httpClientTimeout, + }, + }, nil +} + +// Run starts the long-polling loop. It returns when ctx is cancelled. +func (b *Backend) Run(ctx context.Context) error { + runCtx, cancel := context.WithCancel(ctx) + defer cancel() + + b.cancel.Set(cancel) + defer b.cancel.Set(nil) + + if err := b.ensureAttachmentsDir(); err != nil { + return err + } + + slog.Info("telegram: starting long-poll loop", "timeout", b.cfg.PollTimeout) + + for { + if runCtx.Err() != nil { + return nil + } + + updates, err := b.fetchUpdates(runCtx) + if err != nil { + if runCtx.Err() != nil { + return nil + } + + slog.Warn("telegram: getUpdates failed", "error", err) + + select { + case <-runCtx.Done(): + return nil + case <-time.After(pollErrorBackoff): + } + + continue + } + + for _, upd := range updates { + b.handleUpdate(runCtx, upd) + } + } +} + +// Stop cancels the run loop. +func (b *Backend) Stop() { + b.cancel.Cancel() +} + +// Close releases resources. Telegram has none to clean up beyond cancelling. +func (b *Backend) Close() error { + b.cancel.Cancel() + + return nil +} + +// SendMessage sends a text message to the given conversation (chat ID as string). +// The text is rendered as Telegram HTML (parse_mode=HTML) so pi's markdown +// (**bold**, `code`, [links](url), etc.) appears formatted. If Telegram +// rejects the rendered HTML (unbalanced markdown slipping past the +// converter), the message is resent as plain text without parse_mode. +// Returns the new message ID, or "" on failure. +func (b *Backend) SendMessage(ctx context.Context, conversationID string, text string, replyToID string) string { + if strings.TrimSpace(text) == "" { + return "" + } + + chatID, err := strconv.ParseInt(conversationID, 10, 64) + if err != nil { + slog.Error("telegram: invalid conversation id", "id", conversationID, "error", err) + + return "" + } + + rendered := markdownToHTML(text) + + id, err := b.sendText(ctx, chatID, rendered, replyToID, "HTML") + if err == nil { + return id + } + + slog.Warn("telegram: HTML send failed, retrying as plain text", + "conversation", conversationID, "error", err) + + id, err = b.sendText(ctx, chatID, text, replyToID, "") + if err != nil { + slog.Error("telegram: sendMessage failed", "conversation", conversationID, "error", err) + + return "" + } + + return id +} + +func (b *Backend) sendText(ctx context.Context, chatID int64, text, replyToID, parseMode string) (string, error) { + payload := map[string]any{ + "chat_id": chatID, + "text": text, + } + + if parseMode != "" { + payload["parse_mode"] = parseMode + } + + if replyToID != "" { + if rid, err := strconv.ParseInt(replyToID, 10, 64); err == nil { + payload["reply_parameters"] = map[string]any{ + "message_id": rid, + "allow_sending_without_reply": true, + } + } + } + + var result struct { + MessageID int64 `json:"message_id"` + } + + if err := b.callJSON(ctx, "sendMessage", payload, &result); err != nil { + return "", err + } + + return strconv.FormatInt(result.MessageID, 10), nil +} + +// SendFile uploads and sends a local file as a Telegram document. +func (b *Backend) SendFile(ctx context.Context, conversationID string, filePath string) error { + chatID, err := strconv.ParseInt(conversationID, 10, 64) + if err != nil { + return fmt.Errorf("invalid conversation id %q: %w", conversationID, err) + } + + body, contentType, err := buildSendDocumentBody(chatID, filePath) + if err != nil { + return err + } + + endpoint := b.endpoint("sendDocument") + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, body) + if err != nil { + return fmt.Errorf("building sendDocument request: %w", err) + } + + req.Header.Set("Content-Type", contentType) + + resp, err := b.http.Do(req) + if err != nil { + return fmt.Errorf("sendDocument: %w", err) + } + + defer resp.Body.Close() + + return decodeAPIResponse(resp.Body, nil) +} + +// SetTyping shows the "typing..." indicator while typing is true. Telegram +// indicators auto-expire after ~5s, so callers that want a sustained typing +// state should call again periodically. +func (b *Backend) SetTyping(ctx context.Context, conversationID string, typing bool) { + if !typing { + return + } + + chatID, err := strconv.ParseInt(conversationID, 10, 64) + if err != nil { + return + } + + _ = b.callJSON(ctx, "sendChatAction", map[string]any{ + "chat_id": chatID, + "action": "typing", + }, nil) +} + +// ResetConversation clears the active-conversation lock if it matches. +func (b *Backend) ResetConversation(_ context.Context, conversationID string) { + b.active.Reset(conversationID) +} + +// SystemPromptExtra returns Telegram-specific system prompt context. +func (b *Backend) SystemPromptExtra() string { + return `You are communicating via Telegram (Bot API backend). + +## Sending files to the user + +You can send files back to the user. Include a tag in your +response with the absolute path to the file: + +/path/to/file.png + +The bot strips the tag and uploads the file as a Telegram document. You can +include multiple tags in a single response. + +## File attachments from the user + +When users send files, you'll receive a message like: +"[User sent a file (...): /path/to/file]" +Use the read tool to inspect the file. + +## Formatting + +Markdown is rendered. You can use **bold**, *italic*, ` + "`inline code`" + `, +fenced ` + "```code blocks```" + `, [links](https://example.com), ~~strike~~, and +"-" / "*" bullet lists. Headings (#, ##) are rendered as bold lines. +Telegram has no real list / table layout — keep them simple. + +Avoid raw HTML in your replies; the bridge converts your markdown to the +HTML subset Telegram understands.` +} + +// MarkdownFlavor reports MarkdownFull because the backend converts +// markdown replies to Telegram HTML before sending (parse_mode=HTML), +// so callers may safely emit fenced code blocks with language hints, +// inline backticks, and other rich formatting. +func (b *Backend) MarkdownFlavor() backend.MarkdownFlavor { + return backend.MarkdownFull +} + +// --- internal --- + +func (b *Backend) endpoint(method string) string { + return fmt.Sprintf("%s/bot%s/%s", strings.TrimRight(b.cfg.APIBase, "/"), b.cfg.Token, method) +} + +func (b *Backend) fileURL(filePath string) string { + return fmt.Sprintf("%s/file/bot%s/%s", strings.TrimRight(b.cfg.APIBase, "/"), b.cfg.Token, filePath) +} + +func (b *Backend) callJSON(ctx context.Context, method string, payload any, out any) error { + body, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal %s payload: %w", method, err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, b.endpoint(method), bytes.NewReader(body)) + if err != nil { + return fmt.Errorf("building %s request: %w", method, err) + } + + req.Header.Set("Content-Type", "application/json") + + resp, err := b.http.Do(req) + if err != nil { + return fmt.Errorf("%s: %w", method, err) + } + + defer resp.Body.Close() + + return decodeAPIResponse(resp.Body, out) +} + +func (b *Backend) fetchUpdates(ctx context.Context) ([]update, error) { + b.mu.Lock() + offset := b.offset + b.mu.Unlock() + + timeoutSec := int64(b.cfg.PollTimeout / time.Second) + if timeoutSec <= 0 { + timeoutSec = int64(defaultPollTimeout / time.Second) + } + + payload := map[string]any{ + "offset": offset, + "timeout": timeoutSec, + "allowed_updates": []string{ + "message", + }, + } + + var updates []update + if err := b.callJSON(ctx, "getUpdates", payload, &updates); err != nil { + return nil, err + } + + if len(updates) > 0 { + b.mu.Lock() + // Acknowledge by advancing offset past the highest update_id. + b.offset = updates[len(updates)-1].UpdateID + 1 + b.mu.Unlock() + } + + return updates, nil +} + +func (b *Backend) handleUpdate(ctx context.Context, upd update) { + if upd.Message == nil { + return + } + + msg := upd.Message + if msg.From == nil { + return + } + + if msg.From.IsBot { + return + } + + senderID := strconv.FormatInt(msg.From.ID, 10) + if !backend.IsAllowed(b.cfg.AllowedUsers, senderID) { + // Also allow matching by @username for convenience. + if msg.From.Username == "" || !backend.IsAllowed(b.cfg.AllowedUsers, "@"+msg.From.Username) { + slog.Debug("telegram: dropping message from non-allowed sender", "sender", senderID, "username", msg.From.Username) + + return + } + } + + conversationID := strconv.FormatInt(msg.Chat.ID, 10) + if !b.active.Claim(conversationID) { + slog.Info("telegram: dropping message from different active conversation", "conversation", conversationID) + + return + } + + text := strings.TrimSpace(msg.Text) + + if attachmentText := b.downloadAttachments(ctx, msg); attachmentText != "" { + caption := strings.TrimSpace(msg.Caption) + if caption != "" { + if text != "" { + text += "\n" + } + + text += caption + } + + if text != "" { + text += "\n" + } + + text += attachmentText + } + + if text == "" { + return + } + + messageID := strconv.FormatInt(msg.MessageID, 10) + + var replyToID string + if msg.ReplyToMessage != nil { + replyToID = strconv.FormatInt(msg.ReplyToMessage.MessageID, 10) + } + + slog.Info("telegram: received message", + "conversation", conversationID, + "sender", senderID, + "len", len(text), + ) + + b.handler(ctx, backend.Message{ + ConversationID: conversationID, + SenderID: senderID, + Text: text, + MessageID: messageID, + ReplyToID: replyToID, + }) +} + +// downloadAttachments fetches every file referenced by msg and returns the +// canonical "[User sent a file ...]" lines for them. +func (b *Backend) downloadAttachments(ctx context.Context, msg *message) string { + type attachment struct { + fileID string + filename string + } + + var atts []attachment + + if len(msg.Photo) > 0 { + // Photo array is sorted from smallest to largest; pick the largest. + largest := msg.Photo[len(msg.Photo)-1] + atts = append(atts, attachment{fileID: largest.FileID}) + } + + if msg.Document != nil { + atts = append(atts, attachment{fileID: msg.Document.FileID, filename: msg.Document.FileName}) + } + + if msg.Audio != nil { + atts = append(atts, attachment{fileID: msg.Audio.FileID, filename: msg.Audio.FileName}) + } + + if msg.Video != nil { + atts = append(atts, attachment{fileID: msg.Video.FileID, filename: msg.Video.FileName}) + } + + if msg.Voice != nil { + atts = append(atts, attachment{fileID: msg.Voice.FileID}) + } + + if msg.VideoNote != nil { + atts = append(atts, attachment{fileID: msg.VideoNote.FileID}) + } + + if len(atts) == 0 { + return "" + } + + lines := make([]string, 0, len(atts)) + + for _, a := range atts { + path, err := b.downloadFile(ctx, a.fileID, a.filename) + if err != nil { + slog.Warn("telegram: failed to download attachment", "file_id", a.fileID, "error", err) + lines = append(lines, backend.AttachmentText(a.filename, "")) + + continue + } + + lines = append(lines, backend.AttachmentText(filepath.Base(path), path)) + } + + return strings.Join(lines, "\n") +} + +func (b *Backend) downloadFile(ctx context.Context, fileID, preferredName string) (string, error) { + var fileInfo struct { + FileID string `json:"file_id"` + FilePath string `json:"file_path"` + } + + if err := b.callJSON(ctx, "getFile", map[string]any{"file_id": fileID}, &fileInfo); err != nil { + return "", fmt.Errorf("getFile: %w", err) + } + + if fileInfo.FilePath == "" { + return "", errors.New("getFile returned empty file_path") + } + + // Build the destination path: prefer the user-provided filename, then + // the basename Telegram exposes, falling back to the file_id. + name := strings.TrimSpace(preferredName) + if name == "" { + name = filepath.Base(fileInfo.FilePath) + } + + if name == "" || name == "." || name == "/" { + name = fileID + } + + dir := b.attachmentsDir() + if err := os.MkdirAll(dir, 0o755); err != nil { + return "", fmt.Errorf("creating attachments dir: %w", err) + } + + dest := filepath.Join(dir, fmt.Sprintf("%d-%s", time.Now().UnixNano(), name)) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, b.fileURL(fileInfo.FilePath), nil) + if err != nil { + return "", fmt.Errorf("building file download request: %w", err) + } + + resp, err := b.http.Do(req) + if err != nil { + return "", fmt.Errorf("downloading file: %w", err) + } + + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("file download returned %s", resp.Status) + } + + f, err := os.Create(dest) + if err != nil { + return "", fmt.Errorf("creating attachment file: %w", err) + } + + if _, err := io.Copy(f, resp.Body); err != nil { + f.Close() + _ = os.Remove(dest) + + return "", fmt.Errorf("writing attachment file: %w", err) + } + + if err := f.Close(); err != nil { + return "", fmt.Errorf("closing attachment file: %w", err) + } + + return dest, nil +} + +func (b *Backend) attachmentsDir() string { + if b.cfg.SessionBaseDir == "" { + return attachmentsDirName + } + + return filepath.Join(b.cfg.SessionBaseDir, attachmentsDirName) +} + +func (b *Backend) ensureAttachmentsDir() error { + if b.cfg.SessionBaseDir == "" { + return nil + } + + if err := os.MkdirAll(b.attachmentsDir(), 0o755); err != nil { + return fmt.Errorf("creating telegram attachments dir: %w", err) + } + + return nil +} + +// --- HTTP helpers --- + +// decodeAPIResponse reads a Bot API envelope and unmarshals result into out +// (or discards it if out is nil). It returns an error when ok=false. +func decodeAPIResponse(r io.Reader, out any) error { + body, err := io.ReadAll(r) + if err != nil { + return fmt.Errorf("reading response: %w", err) + } + + var env struct { + OK bool `json:"ok"` + Description string `json:"description"` + Result json.RawMessage `json:"result"` + } + + if err := json.Unmarshal(body, &env); err != nil { + return fmt.Errorf("decoding response: %w (body=%s)", err, truncate(string(body), 200)) + } + + if !env.OK { + return fmt.Errorf("telegram api error: %s", env.Description) + } + + if out == nil || len(env.Result) == 0 { + return nil + } + + if err := json.Unmarshal(env.Result, out); err != nil { + return fmt.Errorf("decoding result: %w", err) + } + + return nil +} + +// buildSendDocumentBody builds a multipart body for sendDocument. Returns +// the body and the Content-Type header (which carries the random boundary). +func buildSendDocumentBody(chatID int64, filePath string) (io.Reader, string, error) { + f, err := os.Open(filePath) + if err != nil { + return nil, "", fmt.Errorf("opening file %s: %w", filePath, err) + } + + var buf bytes.Buffer + + w := multipart.NewWriter(&buf) + + if err := w.WriteField("chat_id", strconv.FormatInt(chatID, 10)); err != nil { + f.Close() + + return nil, "", fmt.Errorf("writing chat_id field: %w", err) + } + + part, err := w.CreateFormFile("document", filepath.Base(filePath)) + if err != nil { + f.Close() + + return nil, "", fmt.Errorf("creating form file: %w", err) + } + + if _, err := io.Copy(part, f); err != nil { + f.Close() + + return nil, "", fmt.Errorf("copying file body: %w", err) + } + + if err := f.Close(); err != nil { + return nil, "", fmt.Errorf("closing source file: %w", err) + } + + if err := w.Close(); err != nil { + return nil, "", fmt.Errorf("closing multipart writer: %w", err) + } + + return &buf, w.FormDataContentType(), nil +} + +// truncate returns s shortened to n runes with an ellipsis suffix when cut. +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + + return s[:n] + "..." +} + +// --- wire types --- + +type update struct { + UpdateID int64 `json:"update_id"` + Message *message `json:"message,omitempty"` +} + +type message struct { + MessageID int64 `json:"message_id"` + From *user `json:"from,omitempty"` + Chat chat `json:"chat"` + Date int64 `json:"date"` + Text string `json:"text,omitempty"` + Caption string `json:"caption,omitempty"` + ReplyToMessage *message `json:"reply_to_message,omitempty"` + Photo []photoSize `json:"photo,omitempty"` + Document *document `json:"document,omitempty"` + Audio *document `json:"audio,omitempty"` + Video *document `json:"video,omitempty"` + Voice *document `json:"voice,omitempty"` + VideoNote *document `json:"video_note,omitempty"` +} + +type user struct { + ID int64 `json:"id"` + Username string `json:"username,omitempty"` + IsBot bool `json:"is_bot,omitempty"` +} + +type chat struct { + ID int64 `json:"id"` + Type string `json:"type"` +} + +type photoSize struct { + FileID string `json:"file_id"` + FileSize int64 `json:"file_size,omitempty"` + Width int `json:"width,omitempty"` + Height int `json:"height,omitempty"` +} + +type document struct { + FileID string `json:"file_id"` + FileName string `json:"file_name,omitempty"` + MIMEType string `json:"mime_type,omitempty"` + FileSize int64 `json:"file_size,omitempty"` +} + diff --git a/telegram/backend_test.go b/telegram/backend_test.go new file mode 100644 index 0000000..ca77de1 --- /dev/null +++ b/telegram/backend_test.go @@ -0,0 +1,437 @@ +package telegram + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/pinpox/opencrow/backend" +) + +// fakeAPI is an httptest server that replays Telegram Bot API JSON envelopes. +type fakeAPI struct { + srv *httptest.Server + + mu sync.Mutex + updates []update + sendLog []map[string]any + files map[string][]byte // path -> body for /file/bot/ + tokenSeg string + + // failHTML, when true, makes sendMessage fail the first call with + // parse_mode=HTML and succeed when parse_mode is absent — exercising + // the plain-text fallback path. + failHTML bool + + called atomic.Int32 +} + +func newFakeAPI(t *testing.T, token string) *fakeAPI { + t.Helper() + + f := &fakeAPI{ + tokenSeg: "/bot" + token + "/", + files: map[string][]byte{}, + } + + mux := http.NewServeMux() + mux.HandleFunc("/", f.handle) + f.srv = httptest.NewServer(mux) + + t.Cleanup(f.srv.Close) + + return f +} + +func (f *fakeAPI) handle(w http.ResponseWriter, r *http.Request) { + f.called.Add(1) + + // File downloads. + if strings.HasPrefix(r.URL.Path, "/file/bot") { + f.mu.Lock() + body, ok := f.files[r.URL.Path] + f.mu.Unlock() + + if !ok { + w.WriteHeader(http.StatusNotFound) + + return + } + + _, _ = w.Write(body) + + return + } + + method := strings.TrimPrefix(r.URL.Path, f.tokenSeg) + + body, _ := io.ReadAll(r.Body) + + switch method { + case "getUpdates": + f.mu.Lock() + updates := f.updates + f.updates = nil + f.mu.Unlock() + + writeOK(w, updates) + + case "sendMessage": + var payload map[string]any + _ = json.Unmarshal(body, &payload) + + f.mu.Lock() + f.sendLog = append(f.sendLog, payload) + failHTML := f.failHTML && payload["parse_mode"] == "HTML" + f.mu.Unlock() + + if failHTML { + writeErr(w, "Bad Request: can't parse entities") + + return + } + + writeOK(w, map[string]any{"message_id": 42}) + + case "sendChatAction": + writeOK(w, true) + + case "getFile": + var payload map[string]string + _ = json.Unmarshal(body, &payload) + writeOK(w, map[string]any{ + "file_id": payload["file_id"], + "file_path": "documents/" + payload["file_id"] + ".bin", + }) + + default: + writeOK(w, nil) + } +} + +func (f *fakeAPI) addUpdate(u update) { + f.mu.Lock() + f.updates = append(f.updates, u) + f.mu.Unlock() +} + +func (f *fakeAPI) addFile(path string, body []byte) { + f.mu.Lock() + f.files[path] = body + f.mu.Unlock() +} + +func writeOK(w http.ResponseWriter, result any) { + resp := map[string]any{"ok": true, "result": result} + _ = json.NewEncoder(w).Encode(resp) +} + +func writeErr(w http.ResponseWriter, description string) { + w.WriteHeader(http.StatusBadRequest) + _ = json.NewEncoder(w).Encode(map[string]any{"ok": false, "description": description}) +} + +func TestBackend_New_RejectsEmptyToken(t *testing.T) { + t.Parallel() + + if _, err := New(Config{}, nil); err == nil { + t.Fatal("expected error for missing token") + } +} + +func TestBackend_SendMessage(t *testing.T) { + t.Parallel() + + api := newFakeAPI(t, "tok") + + b, err := New(Config{Token: "tok", APIBase: api.srv.URL}, func(context.Context, backend.Message) {}) + if err != nil { + t.Fatalf("New: %v", err) + } + + id := b.SendMessage(context.Background(), "100", "hello", "55") + if id != "42" { + t.Errorf("message id = %q, want 42", id) + } + + api.mu.Lock() + defer api.mu.Unlock() + + if len(api.sendLog) != 1 { + t.Fatalf("expected 1 send call, got %d", len(api.sendLog)) + } + + got := api.sendLog[0] + if got["text"] != "hello" { + t.Errorf("text = %v, want hello", got["text"]) + } + + // JSON unmarshals numbers as float64. + if got["chat_id"].(float64) != 100 { + t.Errorf("chat_id = %v, want 100", got["chat_id"]) + } + + rp, ok := got["reply_parameters"].(map[string]any) + if !ok { + t.Fatalf("reply_parameters missing or wrong type: %v", got["reply_parameters"]) + } + + if rp["message_id"].(float64) != 55 { + t.Errorf("reply_parameters.message_id = %v, want 55", rp["message_id"]) + } +} + +func TestBackend_SendMessage_RendersMarkdownAsHTML(t *testing.T) { + t.Parallel() + + api := newFakeAPI(t, "tok") + + b, err := New(Config{Token: "tok", APIBase: api.srv.URL}, func(context.Context, backend.Message) {}) + if err != nil { + t.Fatalf("New: %v", err) + } + + if id := b.SendMessage(context.Background(), "100", "this is **bold** and `code`", ""); id == "" { + t.Fatal("expected non-empty id") + } + + api.mu.Lock() + defer api.mu.Unlock() + + if len(api.sendLog) != 1 { + t.Fatalf("expected 1 send call, got %d", len(api.sendLog)) + } + + got := api.sendLog[0] + + if got["parse_mode"] != "HTML" { + t.Errorf("parse_mode = %v, want HTML", got["parse_mode"]) + } + + want := "this is bold and code" + if got["text"] != want { + t.Errorf("text = %q, want %q", got["text"], want) + } +} + +func TestBackend_SendMessage_FallsBackToPlainOnHTMLError(t *testing.T) { + t.Parallel() + + api := newFakeAPI(t, "tok") + api.failHTML = true + + b, err := New(Config{Token: "tok", APIBase: api.srv.URL}, func(context.Context, backend.Message) {}) + if err != nil { + t.Fatalf("New: %v", err) + } + + if id := b.SendMessage(context.Background(), "100", "weird **broken markdown", ""); id == "" { + t.Fatal("expected fallback to succeed") + } + + api.mu.Lock() + defer api.mu.Unlock() + + if len(api.sendLog) != 2 { + t.Fatalf("expected 2 send calls (HTML attempt + plain fallback), got %d", len(api.sendLog)) + } + + if api.sendLog[1]["parse_mode"] != nil { + t.Errorf("fallback should not set parse_mode, got %v", api.sendLog[1]["parse_mode"]) + } + + if api.sendLog[1]["text"] != "weird **broken markdown" { + t.Errorf("fallback text = %q, want raw original", api.sendLog[1]["text"]) + } +} + +func TestBackend_SendMessage_EmptyTextSkipsCall(t *testing.T) { + t.Parallel() + + api := newFakeAPI(t, "tok") + + b, err := New(Config{Token: "tok", APIBase: api.srv.URL}, func(context.Context, backend.Message) {}) + if err != nil { + t.Fatalf("New: %v", err) + } + + if id := b.SendMessage(context.Background(), "100", " \n", ""); id != "" { + t.Errorf("expected empty id for whitespace-only text, got %q", id) + } + + if api.called.Load() != 0 { + t.Errorf("expected no API calls for whitespace text, got %d", api.called.Load()) + } +} + +func TestBackend_RunDeliversMessage(t *testing.T) { + t.Parallel() + + api := newFakeAPI(t, "tok") + api.addUpdate(update{ + UpdateID: 7, + Message: &message{ + MessageID: 11, + From: &user{ID: 123, Username: "alice"}, + Chat: chat{ID: 456, Type: "private"}, + Text: "hi", + }, + }) + + received := make(chan backend.Message, 1) + + b, err := New(Config{ + Token: "tok", + APIBase: api.srv.URL, + PollTimeout: time.Second, + }, func(_ context.Context, m backend.Message) { + received <- m + }) + if err != nil { + t.Fatalf("New: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { _ = b.Run(ctx) }() + + select { + case msg := <-received: + if msg.ConversationID != "456" { + t.Errorf("ConversationID = %q, want 456", msg.ConversationID) + } + + if msg.SenderID != "123" { + t.Errorf("SenderID = %q, want 123", msg.SenderID) + } + + if msg.Text != "hi" { + t.Errorf("Text = %q, want hi", msg.Text) + } + + if msg.MessageID != "11" { + t.Errorf("MessageID = %q, want 11", msg.MessageID) + } + case <-time.After(3 * time.Second): + t.Fatal("did not receive message within 3s") + } + + b.Stop() +} + +func TestBackend_AllowlistDropsForeignSender(t *testing.T) { + t.Parallel() + + api := newFakeAPI(t, "tok") + api.addUpdate(update{ + UpdateID: 7, + Message: &message{ + MessageID: 11, + From: &user{ID: 999, Username: "stranger"}, + Chat: chat{ID: 456}, + Text: "hi", + }, + }) + + received := make(chan backend.Message, 1) + + b, err := New(Config{ + Token: "tok", + APIBase: api.srv.URL, + PollTimeout: time.Second, + AllowedUsers: map[string]struct{}{"123": {}}, + }, func(_ context.Context, m backend.Message) { + received <- m + }) + if err != nil { + t.Fatalf("New: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { _ = b.Run(ctx) }() + + select { + case msg := <-received: + t.Fatalf("received message from non-allowed sender: %+v", msg) + case <-time.After(500 * time.Millisecond): + // expected: no delivery + } + + b.Stop() +} + +func TestBackend_DownloadAttachment(t *testing.T) { + t.Parallel() + + api := newFakeAPI(t, "tok") + api.addFile("/file/bottok/documents/file-xyz.bin", []byte("BINARY")) + api.addUpdate(update{ + UpdateID: 8, + Message: &message{ + MessageID: 12, + From: &user{ID: 1}, + Chat: chat{ID: 2}, + Document: &document{FileID: "file-xyz", FileName: "report.pdf"}, + }, + }) + + received := make(chan backend.Message, 1) + + dir := t.TempDir() + + b, err := New(Config{ + Token: "tok", + APIBase: api.srv.URL, + SessionBaseDir: dir, + PollTimeout: time.Second, + }, func(_ context.Context, m backend.Message) { + received <- m + }) + if err != nil { + t.Fatalf("New: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { _ = b.Run(ctx) }() + + select { + case msg := <-received: + if !strings.Contains(msg.Text, "[User sent a file") { + t.Errorf("expected attachment marker in text, got %q", msg.Text) + } + + if !strings.Contains(msg.Text, filepath.Join(dir, "attachments")) { + t.Errorf("expected path under attachments dir, got %q", msg.Text) + } + case <-time.After(3 * time.Second): + t.Fatal("did not receive attachment message within 3s") + } + + b.Stop() +} + +func TestBackend_MarkdownFlavor(t *testing.T) { + t.Parallel() + + b, err := New(Config{Token: "x"}, nil) + if err != nil { + t.Fatalf("New: %v", err) + } + + if got := b.MarkdownFlavor(); got != backend.MarkdownFull { + t.Errorf("flavor = %d, want %d", got, backend.MarkdownFull) + } +} diff --git a/telegram/markdown.go b/telegram/markdown.go new file mode 100644 index 0000000..1ee0477 --- /dev/null +++ b/telegram/markdown.go @@ -0,0 +1,109 @@ +package telegram + +import ( + "fmt" + "html" + "regexp" + "strings" +) + +// markdownToHTML converts a subset of CommonMark markdown to the HTML +// dialect Telegram's parse_mode=HTML accepts. +// +// Telegram supports only: b, i, u, s, span class="tg-spoiler", tg-spoiler, +// a href, code, pre, blockquote. There are no headings, lists, or tables — +// we approximate (`#` headings → bold lines, `-`/`*` bullets → "• ", numbered +// lists pass through). HTML special characters in user content are escaped. +// +// The output is best-effort: callers should fall back to sending the raw +// text without parse_mode if Telegram rejects the rendered HTML (e.g. an +// unbalanced asterisk slipping past the regex into a stray ``). +func markdownToHTML(s string) string { + if s == "" { + return "" + } + + // Stash code blocks and links first so their contents don't get + // chewed up by the bold/italic/escape passes. Each placeholder is a + // short token containing only NUL bytes and digits — html.EscapeString + // leaves it alone, and no regex below matches it. + var stash []string + + store := func(rendered string) string { + idx := len(stash) + stash = append(stash, rendered) + + return fmt.Sprintf("\x00P%d\x00", idx) + } + + // 1. Fenced code blocks. + s = codeBlockRe.ReplaceAllStringFunc(s, func(m string) string { + sub := codeBlockRe.FindStringSubmatch(m) + lang := strings.TrimSpace(sub[1]) + body := html.EscapeString(strings.TrimRight(sub[2], "\n")) + + if lang != "" { + return store(`
` + body + `
`) + } + + return store("
" + body + "
") + }) + + // 2. Inline code (single backticks). + s = inlineCodeRe.ReplaceAllStringFunc(s, func(m string) string { + sub := inlineCodeRe.FindStringSubmatch(m) + + return store("" + html.EscapeString(sub[1]) + "") + }) + + // 3. Links [text](url). + s = linkRe.ReplaceAllStringFunc(s, func(m string) string { + sub := linkRe.FindStringSubmatch(m) + + return store(`` + html.EscapeString(sub[1]) + "") + }) + + // 4. Escape HTML special chars in everything that's left. + s = html.EscapeString(s) + + // 5. Block-level: ATX headings → bold line. Skip the heading marker + // and bold the text. Drop trailing "###" closers if present. + s = headingRe.ReplaceAllString(s, "$2") + + // 6. Bullets — "- ", "* ", "+ " at line start → "• ". + s = bulletRe.ReplaceAllString(s, "$1• ") + + // 7. Inline emphasis. **bold** before *italic* so the second pass + // only sees single asterisks. + s = boldDoubleRe.ReplaceAllString(s, "$1") + s = boldUnderRe.ReplaceAllString(s, "$1") + s = strikeRe.ReplaceAllString(s, "$1") + s = italicAstRe.ReplaceAllString(s, "$1$2$3") + s = italicUnderRe.ReplaceAllString(s, "$1$2$3") + + // 8. Restore placeholders. + for i, val := range stash { + s = strings.Replace(s, fmt.Sprintf("\x00P%d\x00", i), val, 1) + } + + return s +} + +var ( + codeBlockRe = regexp.MustCompile("(?s)```(\\w*)\\n?(.*?)```") + inlineCodeRe = regexp.MustCompile("`([^`\n]+)`") + linkRe = regexp.MustCompile(`\[([^\]\n]+)\]\(([^)\s]+)\)`) + headingRe = regexp.MustCompile(`(?m)^(#{1,6})\s+(.+?)\s*#*\s*$`) + bulletRe = regexp.MustCompile(`(?m)^([ \t]*)[-*+]\s+`) + boldDoubleRe = regexp.MustCompile(`\*\*([^\*\n]+?)\*\*`) + boldUnderRe = regexp.MustCompile(`__([^_\n]+?)__`) + strikeRe = regexp.MustCompile(`~~([^~\n]+?)~~`) + // Italic with `*foo*` — require the asterisks to NOT be adjacent to + // another asterisk (so `**bold**` left alone after the bold pass + // has no leftovers anyway, but be defensive). $1/$3 are the + // surrounding chars (or empty at line bounds). + italicAstRe = regexp.MustCompile(`(^|[^\*\w])\*([^\*\n]+?)\*([^\*\w]|$)`) + // Italic with `_foo_` — require word-boundary on both sides so we + // don't munge identifiers like `snake_case_var`. + italicUnderRe = regexp.MustCompile(`(^|\W)_([^_\n]+?)_(\W|$)`) +) diff --git a/telegram/markdown_test.go b/telegram/markdown_test.go new file mode 100644 index 0000000..1409c36 --- /dev/null +++ b/telegram/markdown_test.go @@ -0,0 +1,93 @@ +package telegram + +import "testing" + +func TestMarkdownToHTML(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + in string + want string + }{ + {"empty", "", ""}, + {"plain", "hello world", "hello world"}, + {"bold double-star", "this is **bold** ok", "this is bold ok"}, + {"bold double-underscore", "this is __bold__ ok", "this is bold ok"}, + {"italic single-star", "really *cool* word", "really cool word"}, + {"italic underscore", "use _emphasis_ here", "use emphasis here"}, + { + "identifier with underscores left alone", + "variable snake_case_name in code", + "variable snake_case_name in code", + }, + {"strike", "this is ~~gone~~ now", "this is gone now"}, + {"inline code", "run `ls -la` first", "run ls -la first"}, + { + "inline code escapes html", + "run `cat ` now", + "run cat <file> now", + }, + { + "fenced code block with language", + "before\n```go\nfmt.Println(\"hi\")\n```\nafter", + "before\n
fmt.Println("hi")
\nafter", + }, + { + "fenced code block without language", + "```\nplain code\n```", + "
plain code
", + }, + { + "link", + "see [the docs](https://example.com)", + "see the docs", + }, + { + "heading h2 to bold", + "## Heading text\nbody", + "Heading text\nbody", + }, + { + "bullet list", + "items:\n- one\n- two\n- three", + "items:\n• one\n• two\n• three", + }, + { + "escapes raw html in plain text", + "compare a < b and c > d & done", + "compare a < b and c > d & done", + }, + { + "link url is escaped", + "see [docs](https://x.com?a=1&b=2)", + "see docs", + }, + { + "bold inside paragraph with neighbours", + "start **mid** end", + "start mid end", + }, + { + "asterisks not greedy across newlines", + "a *b\nc* d", + "a *b\nc* d", + }, + { + "realistic weather reply", + "**Wrocław dziś:**\n\nteraz: **15°C**, słonecznie", + "Wrocław dziś:\n\nteraz: 15°C, słonecznie", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + got := markdownToHTML(tc.in) + if got != tc.want { + t.Errorf("markdownToHTML(%q)\n got: %q\nwant: %q", tc.in, got, tc.want) + } + }) + } +}