From 37af84ceeff2747001a0d2a658b0862ee570f185 Mon Sep 17 00:00:00 2001 From: Michael Toop Date: Tue, 30 Jun 2026 09:59:54 +0200 Subject: [PATCH] Close response body in SendEvent/HealthCheck to fix FD + memory leak SendEvent discarded the http.Post response (`_, err := ...`) and never closed resp.Body. An unclosed/undrained body keeps the underlying TCP connection out of the keep-alive pool, leaking a connection + file descriptor on every call. SendEvent runs on every stats tick, so the agent's open-FD count and memory grow unbounded: long-lived agents hit "socket: too many open files" (event POSTs, then docker.sock stats fetches) and busy agents are OOM-killed (exit 137). Drain + close the body in SendEvent (and the matching http.Get in HealthCheck). Fixes #15. Co-Authored-By: Claude Opus 4.8 --- swarmpit/client.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/swarmpit/client.go b/swarmpit/client.go index d46d3f9..1b9af97 100644 --- a/swarmpit/client.go +++ b/swarmpit/client.go @@ -1,9 +1,11 @@ package swarmpit import ( + "io" "log" "time" "bytes" + "io/ioutil" "net/http" "encoding/json" "github.com/swarmpit/agent/setup" @@ -40,18 +42,27 @@ func SendEvent(eventType EventType, message interface{}) { log.Printf("DEBUG: Docker event: %s", buffer) } - _, err := http.Post(arg.EventEndpoint, "application/json; charset=utf-8", buffer) + resp, err := http.Post(arg.EventEndpoint, "application/json; charset=utf-8", buffer) if err != nil { log.Printf("ERROR: Event sending failed: %s", err) + return } + // Drain and close the body so the underlying keep-alive connection is + // reused/released. Without this every POST leaks a connection + file + // descriptor (SendEvent runs on every stats tick), eventually exhausting + // the agent's FD limit ("too many open files") and growing memory until OOM. + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() } func HealthCheck() { for { <-time.After(5 * time.Second) - _, err := http.Get(arg.HealthCheckEndpoint) + resp, err := http.Get(arg.HealthCheckEndpoint) if err == nil { + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() log.Printf("INFO: Swarmpit OK") break; }