Harden long-lived SSE and WebSocket connections

Address intermittent stalls seen after many hours by improving liveness detection and failure handling for both streaming channels.

WebSocket changes:
- Added periodic server ping frames and a read deadline refreshed by pong replies.
- On sender write/ping failure, explicitly close the underlying connection so clients promptly observe disconnect and reconnect instead of remaining half-open.

SSE changes:
- Excluded /events from gzip middleware and added X-Accel-Buffering: no to reduce proxy buffering risk.
- Stop the SSE loop on write errors for activity/keepalive frames so dead subscribers are cleaned up immediately.

Tests:
- Added regression coverage for gzip bypass on /events.
- Added regression coverage ensuring SSE handler exits and unsubscribes on write failure.
- Verified with make format && make check.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
GitHub Copilot
2026-02-15 14:28:09 +00:00
parent 5fd2f92ef1
commit d43270d5dc
2 changed files with 116 additions and 8 deletions
+78
View File
@@ -2,6 +2,7 @@ package webterm
import (
"encoding/json"
"errors"
"io"
"net/http"
"net/http/httptest"
@@ -13,6 +14,26 @@ import (
"github.com/gorilla/websocket"
)
type failingSSEWriter struct {
header http.Header
writeErr error
}
func (w *failingSSEWriter) Header() http.Header {
if w.header == nil {
w.header = make(http.Header)
}
return w.header
}
func (w *failingSSEWriter) WriteHeader(int) {}
func (w *failingSSEWriter) Write([]byte) (int, error) {
return 0, w.writeErr
}
func (w *failingSSEWriter) Flush() {}
func newServerForTests(t *testing.T, withLanding bool) (*LocalServer, *httptest.Server, *syncSessionMap) {
t.Helper()
config := Config{
@@ -267,3 +288,60 @@ func TestMarkRouteActivityBroadcastsWithoutBlockingGlobalLock(t *testing.T) {
t.Fatalf("expected route activity broadcast")
}
}
func TestGzipMiddlewareSkipsEventsPath(t *testing.T) {
server := NewLocalServer(Config{}, ServerOptions{})
handler := server.gzipMiddleware(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
_, _ = io.WriteString(w, "ok")
}))
req := httptest.NewRequest(http.MethodGet, "/events", nil)
req.Header.Set("Accept-Encoding", "gzip")
rr := httptest.NewRecorder()
handler.ServeHTTP(rr, req)
if got := rr.Header().Get("Content-Encoding"); got != "" {
t.Fatalf("expected no gzip encoding for SSE path, got %q", got)
}
if rr.Body.String() != "ok" {
t.Fatalf("unexpected body: %q", rr.Body.String())
}
}
func TestHandleEventsReturnsOnWriteError(t *testing.T) {
server := NewLocalServer(Config{}, ServerOptions{})
req := httptest.NewRequest(http.MethodGet, "/events", nil)
writer := &failingSSEWriter{writeErr: errors.New("broken pipe")}
done := make(chan struct{})
go func() {
server.handleEvents(writer, req)
close(done)
}()
deadline := time.Now().Add(250 * time.Millisecond)
for {
server.mu.RLock()
count := len(server.sseSubscribers)
server.mu.RUnlock()
if count == 1 {
break
}
if time.Now().After(deadline) {
t.Fatalf("expected SSE subscriber to be registered")
}
time.Sleep(5 * time.Millisecond)
}
server.markRouteActivity("route-a")
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatalf("handleEvents did not exit after write error")
}
server.mu.RLock()
count := len(server.sseSubscribers)
server.mu.RUnlock()
if count != 0 {
t.Fatalf("expected SSE subscriber cleanup after write error, got %d", count)
}
}