From 0ac8d9facdbaa9591c5bffd5cd83caf9144ee70e Mon Sep 17 00:00:00 2001 From: Jay Date: Thu, 23 Apr 2026 20:58:57 -0400 Subject: [PATCH] Added logging to pools and workers. --- inbound/pool.go | 33 +++++++++-- inbound/worker.go | 51 +++++++++++++--- inbound/worker_reader_test.go | 12 ++-- inbound/worker_watchdog_test.go | 10 ++-- outbound/pool.go | 24 ++++++++ outbound/worker.go | 84 +++++++++++++++++++++++++-- outbound/worker_dialer_test.go | 10 ++-- outbound/worker_keepalive_test.go | 8 +-- outbound/worker_session_inner_test.go | 12 ++-- transport/connection.go | 4 +- transport/logging_test.go | 8 +-- 11 files changed, 209 insertions(+), 47 deletions(-) diff --git a/inbound/pool.go b/inbound/pool.go index ee07ddd..c9dc2f3 100644 --- a/inbound/pool.go +++ b/inbound/pool.go @@ -13,12 +13,12 @@ import ( // Types -type PoolEventKind int +type PoolEventKind string const ( - EventDisconnected PoolEventKind = iota - EventDropped - EventEvicted + EventDisconnected PoolEventKind = "disconnected" + EventDropped PoolEventKind = "dropped" + EventEvicted PoolEventKind = "evicted" ) var workerToPoolEvent = map[WorkerExitKind]PoolEventKind{ @@ -152,6 +152,10 @@ func (p *Pool) Errors() <-chan error { } func (p *Pool) Close() { + if p.logger != nil { + p.logger.Debug("closing") + } + p.mu.Lock() if p.closed { p.mu.Unlock() @@ -177,6 +181,10 @@ func (p *Pool) Close() { close(p.inbox) close(p.events) close(p.errors) + + if p.logger != nil { + p.logger.Info("closed") + } }() } @@ -205,6 +213,11 @@ func (p *Pool) Replace(id string, socket types.Socket) error { if peer, exists := p.peers[id]; exists { p.removeLocked(peer) + + if p.logger != nil { + p.logger.Info("removed peer", "peer", id) + } + } else { return ErrPeerNotFound } @@ -213,6 +226,10 @@ func (p *Pool) Replace(id string, socket types.Socket) error { } func (p *Pool) Remove(id string) error { + if p.logger != nil { + p.logger.Debug("removing peer", "peer", id) + } + p.mu.Lock() defer p.mu.Unlock() @@ -227,6 +244,10 @@ func (p *Pool) Remove(id string) error { p.removeLocked(peer) + if p.logger != nil { + p.logger.Info("removed peer", "peer", id) + } + return nil } @@ -316,6 +337,10 @@ func (p *Pool) addLocked(id string, socket types.Socket) error { p.peers[id] = peer + if p.logger != nil { + p.logger.Info("added peer", "peer", id) + } + return nil } diff --git a/inbound/worker.go b/inbound/worker.go index 2e749f2..00f6666 100644 --- a/inbound/worker.go +++ b/inbound/worker.go @@ -17,12 +17,12 @@ type Worker interface { Send(data []byte) error } -type WorkerExitKind int +type WorkerExitKind string const ( - ExitDisconnected WorkerExitKind = iota - ExitError - ExitPolicy + ExitDisconnected WorkerExitKind = "disconnected" + ExitError WorkerExitKind = "error" + ExitPolicy WorkerExitKind = "policy" ) type DefaultWorker struct { @@ -62,6 +62,10 @@ func NewWorker( } func (w *DefaultWorker) Start(pool PoolPlugin) { + if w.logger != nil { + w.logger.Debug("starting") + } + toQueue := make(chan types.ReceivedMessage, 256) toForwarder := make(chan types.ReceivedMessage, 256) @@ -70,7 +74,7 @@ func (w *DefaultWorker) Start(pool PoolPlugin) { go func() { defer wg.Done() - RunReader(w.ctx, pool.OnExit, w.conn, toQueue, w.heartbeat) + RunReader(w.ctx, pool.OnExit, w.conn, toQueue, w.heartbeat, w.logger) }() go func() { @@ -85,13 +89,24 @@ func (w *DefaultWorker) Start(pool PoolPlugin) { go func() { defer wg.Done() - RunWatchdog(w.ctx, pool.OnExit, w.heartbeat, w.config.InactivityTimeout) + RunWatchdog(w.ctx, pool.OnExit, w.heartbeat, w.config.InactivityTimeout, w.logger) }() + if w.logger != nil { + w.logger.Info("started") + } + wg.Wait() + + if w.logger != nil { + w.logger.Info("stopped") + } } func (w *DefaultWorker) Stop() { + if w.logger != nil { + w.logger.Debug("shutting down") + } w.cancel() } @@ -115,6 +130,8 @@ func RunReader( conn *transport.Connection, messages chan<- types.ReceivedMessage, heartbeat chan<- struct{}, + + logger *slog.Logger, ) { for { select { @@ -122,6 +139,7 @@ func RunReader( return case data, ok := <-conn.Incoming(): if !ok { + var err error // determine exit kind // by default, the peer dropped unexpectedly kind := ExitError @@ -129,13 +147,21 @@ func RunReader( // the peer-side error is sent before the connection is closed, // so a non-blocking call here is correct // if an error is not sent, then assume the default event kind - case err := <-conn.Errors(): + case err = <-conn.Errors(): if errors.Is(err, transport.ErrPeerClosedClean) { kind = ExitDisconnected } default: } + if logger != nil { + if kind == ExitError { + logger.Error("reader: peer dropped", "event", kind, "error", err) + } else { + logger.Info("reader: peer disconnected", "event", kind) + } + } + onPeerClose(kind) return } @@ -184,9 +210,13 @@ func RunWatchdog( onInactive OnExitFunction, heartbeat <-chan struct{}, timeout time.Duration, + logger *slog.Logger, ) { // disable watchdog timeout if not configured if timeout <= 0 { + if logger != nil { + logger.Debug("watchdog: disabled") + } // drain heartbeats // wait for cancel and exit for { @@ -198,6 +228,10 @@ func RunWatchdog( } } + if logger != nil { + logger.Debug("watchdog: enabled", "timeout", timeout) + } + timer := time.NewTimer(timeout) defer timer.Stop() @@ -217,6 +251,9 @@ func RunWatchdog( // timer completed case <-timer.C: // signal peer is inactive + if logger != nil { + logger.Info("watchdog: peer is inactive") + } onInactive(ExitPolicy) return } diff --git a/inbound/worker_reader_test.go b/inbound/worker_reader_test.go index fb55e83..ab13f84 100644 --- a/inbound/worker_reader_test.go +++ b/inbound/worker_reader_test.go @@ -23,7 +23,7 @@ func TestRunReader(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go RunReader(ctx, func(WorkerExitKind) {}, conn, messages, heartbeat) + go RunReader(ctx, func(WorkerExitKind) {}, conn, messages, heartbeat, nil) before := time.Now() incoming <- honeybeetest.MockIncomingData{MsgType: websocket.TextMessage, Data: []byte("hello")} @@ -57,7 +57,7 @@ func TestRunReader(t *testing.T) { for range messages { } }() - go RunReader(ctx, func(WorkerExitKind) {}, conn, messages, heartbeat) + go RunReader(ctx, func(WorkerExitKind) {}, conn, messages, heartbeat, nil) const n = 3 for i := 0; i < n; i++ { @@ -92,7 +92,7 @@ func TestRunReader(t *testing.T) { go RunReader(ctx, func(kind WorkerExitKind) { gotKind = kind close(done) - }, conn, messages, heartbeat) + }, conn, messages, heartbeat, nil) honeybeetest.Eventually(t, func() bool { select { @@ -129,7 +129,7 @@ func TestRunReader(t *testing.T) { go RunReader(ctx, func(kind WorkerExitKind) { gotKind = kind close(done) - }, conn, messages, heartbeat) + }, conn, messages, heartbeat, nil) honeybeetest.Eventually(t, func() bool { select { @@ -166,7 +166,7 @@ func TestRunReader(t *testing.T) { go RunReader(ctx, func(kind WorkerExitKind) { gotKind = kind close(done) - }, conn, messages, heartbeat) + }, conn, messages, heartbeat, nil) honeybeetest.Eventually(t, func() bool { select { @@ -193,7 +193,7 @@ func TestRunReader(t *testing.T) { go func() { RunReader(ctx, func(WorkerExitKind) { called.Store(true) - }, conn, messages, heartbeat) + }, conn, messages, heartbeat, nil) close(done) }() diff --git a/inbound/worker_watchdog_test.go b/inbound/worker_watchdog_test.go index 6d0d0fe..1726a93 100644 --- a/inbound/worker_watchdog_test.go +++ b/inbound/worker_watchdog_test.go @@ -16,7 +16,7 @@ func TestRunWatchdog(t *testing.T) { defer cancel() called := atomic.Bool{} - go RunWatchdog(ctx, func(WorkerExitKind) { called.Store(true) }, heartbeat, 200*time.Millisecond) + go RunWatchdog(ctx, func(WorkerExitKind) { called.Store(true) }, heartbeat, 200*time.Millisecond, nil) for i := 0; i < 5; i++ { time.Sleep(20 * time.Millisecond) @@ -40,7 +40,7 @@ func TestRunWatchdog(t *testing.T) { count.Add(1) gotKind = kind close(done) - }, heartbeat, 20*time.Millisecond) + }, heartbeat, 20*time.Millisecond, nil) honeybeetest.Eventually(t, func() bool { select { @@ -62,7 +62,7 @@ func TestRunWatchdog(t *testing.T) { called := atomic.Bool{} done := make(chan struct{}) go func() { - RunWatchdog(ctx, func(WorkerExitKind) { called.Store(true) }, heartbeat, 20*time.Second) + RunWatchdog(ctx, func(WorkerExitKind) { called.Store(true) }, heartbeat, 20*time.Second, nil) close(done) }() @@ -87,7 +87,7 @@ func TestRunWatchdog(t *testing.T) { called := atomic.Bool{} done := make(chan struct{}) go func() { - RunWatchdog(ctx, func(WorkerExitKind) { called.Store(true) }, heartbeat, 0) + RunWatchdog(ctx, func(WorkerExitKind) { called.Store(true) }, heartbeat, 0, nil) close(done) }() @@ -112,7 +112,7 @@ func TestRunWatchdog(t *testing.T) { done := make(chan struct{}) go func() { - RunWatchdog(ctx, func(WorkerExitKind) {}, heartbeat, 0) + RunWatchdog(ctx, func(WorkerExitKind) {}, heartbeat, 0, nil) close(done) }() diff --git a/outbound/pool.go b/outbound/pool.go index fd14d42..b9bd579 100644 --- a/outbound/pool.go +++ b/outbound/pool.go @@ -146,6 +146,10 @@ func (p *Pool) SetDialer(d types.Dialer) { } func (p *Pool) Close() { + if p.logger != nil { + p.logger.Debug("closing") + } + p.mu.Lock() if p.closed { p.mu.Unlock() @@ -165,10 +169,18 @@ func (p *Pool) Close() { close(p.inbox) close(p.events) close(p.errors) + + if p.logger != nil { + p.logger.Info("closed") + } }() } func (p *Pool) Connect(id string) error { + if p.logger != nil { + p.logger.Debug("connecting to peer", "peer", id) + } + id, err := transport.NormalizeURL(id) if err != nil { return err @@ -215,10 +227,18 @@ func (p *Pool) Connect(id string) error { p.peers[id] = &Peer{id: id, worker: worker} + if p.logger != nil { + p.logger.Info("connected to peer", "peer", id) + } + return nil } func (p *Pool) Remove(id string) error { + if p.logger != nil { + p.logger.Debug("disconnecting from peer", "peer", id) + } + id, err := transport.NormalizeURL(id) if err != nil { return err @@ -239,6 +259,10 @@ func (p *Pool) Remove(id string) error { peer.worker.Stop() + if p.logger != nil { + p.logger.Info("disconnected from peer", "peer", id) + } + return nil } diff --git a/outbound/worker.go b/outbound/worker.go index cb0c02a..66e81bc 100644 --- a/outbound/worker.go +++ b/outbound/worker.go @@ -57,6 +57,10 @@ func NewWorker( } func (w *DefaultWorker) Start(pool PoolPlugin) { + if w.logger != nil { + w.logger.Debug("starting") + } + dial := make(chan struct{}, 1) newConn := make(chan *transport.Connection, 1) toQueue := make(chan types.ReceivedMessage, 256) @@ -68,12 +72,12 @@ func (w *DefaultWorker) Start(pool PoolPlugin) { go func() { defer wg.Done() - RunDialer(w.id, w.ctx, pool, dial, newConn) + RunDialer(w.id, w.ctx, pool, dial, newConn, w.logger) }() go func() { defer wg.Done() - RunKeepalive(w.ctx, w.heartbeat, keepalive, w.config.KeepaliveTimeout) + RunKeepalive(w.ctx, w.heartbeat, keepalive, w.config.KeepaliveTimeout, w.logger) }() go func() { @@ -96,14 +100,26 @@ func (w *DefaultWorker) Start(pool PoolPlugin) { dial: dial, keepalive: keepalive, newConn: newConn, + logger: w.logger, } session.Start(w.ctx, pool) }() + if w.logger != nil { + w.logger.Info("started") + } + wg.Wait() + + if w.logger != nil { + w.logger.Info("stopped") + } } func (w *DefaultWorker) Stop() { + if w.logger != nil { + w.logger.Debug("shutting down") + } w.cancel() } @@ -136,6 +152,8 @@ type Session struct { keepalive <-chan struct{} newConn <-chan *transport.Connection + + logger *slog.Logger } func (s *Session) Start( @@ -143,6 +161,10 @@ func (s *Session) Start( pool PoolPlugin, ) { for { + if s.logger != nil { + s.logger.Debug("session: requesting connection") + } + // request new connection select { case s.dial <- struct{}{}: @@ -159,9 +181,16 @@ func (s *Session) Start( case <-s.keepalive: select { case s.dial <- struct{}{}: + if s.logger != nil { + s.logger.Debug("session: requesting connection") + } + default: } case conn = <-s.newConn: + if s.logger != nil { + s.logger.Debug("session: connected") + } break preConn } } @@ -179,16 +208,24 @@ func (s *Session) Start( wg.Add(2) go func() { defer wg.Done() - RunReader(sctx, onStop, conn, s.messages, s.heartbeat) + RunReader(sctx, onStop, conn, s.messages, s.heartbeat, s.logger) }() go func() { defer wg.Done() - RunStopMonitor(sctx, onStop, conn, s.keepalive) + RunStopMonitor(sctx, onStop, conn, s.keepalive, s.logger) }() + if s.logger != nil { + s.logger.Info("session: started") + } + // complete session wg.Wait() + if s.logger != nil { + s.logger.Info("session: ended") + } + // tear down connection s.connPtr.Store(nil) pool.Events <- PoolEvent{ID: s.id, Kind: EventDisconnected} @@ -211,8 +248,13 @@ func RunReader( conn *transport.Connection, messages chan<- types.ReceivedMessage, heartbeat chan<- struct{}, + logger *slog.Logger, ) { defer func() { + if logger != nil { + logger.Debug("reader: stopping") + } + conn.Close() onStop() }() @@ -224,6 +266,9 @@ func RunReader( case data, ok := <-conn.Incoming(): if !ok { // connection has closed + if logger != nil { + logger.Debug("reader: disconnected") + } return } @@ -245,8 +290,13 @@ func RunStopMonitor( onStop func(), conn *transport.Connection, keepalive <-chan struct{}, + logger *slog.Logger, ) { defer func() { + if logger != nil { + logger.Debug("stop monitor: stopping") + } + conn.Close() onStop() }() @@ -254,6 +304,9 @@ func RunStopMonitor( select { case <-ctx.Done(): case <-keepalive: + if logger != nil { + logger.Debug("stop monitor: stopping: keepalive") + } } } @@ -290,9 +343,13 @@ func RunKeepalive( heartbeat <-chan struct{}, keepalive chan<- struct{}, timeout time.Duration, + logger *slog.Logger, ) { // disable keepalive timeout if not configured if timeout <= 0 { + if logger != nil { + logger.Debug("keepalive: disabled") + } // drain heartbeats // wait for cancel and exit for { @@ -304,6 +361,10 @@ func RunKeepalive( } } + if logger != nil { + logger.Debug("keepalive: enabled", "timeout", timeout) + } + timer := time.NewTimer(timeout) defer timer.Stop() @@ -323,6 +384,9 @@ func RunKeepalive( // timer completed case <-timer.C: // send keepalive signal, then reset the timer + if logger != nil { + logger.Info("keepalive: peer is inactive") + } select { case keepalive <- struct{}{}: default: @@ -359,6 +423,8 @@ func RunDialer( dial <-chan struct{}, newConn chan<- *transport.Connection, + + logger *slog.Logger, ) { for { select { @@ -377,12 +443,18 @@ func RunDialer( } }() + if logger != nil { + logger.Debug("dialer: dialing") + } // dial a new connection conn, err := connect(id, ctx, pool) close(done) // send error if dial failed and continue if err != nil { + if logger != nil { + logger.Warn("dialer: dial failed") + } select { case pool.Errors <- err: case <-ctx.Done(): @@ -390,6 +462,10 @@ func RunDialer( continue } + if logger != nil { + logger.Debug("dialer: connected") + } + // send the new connection or close and exit select { case newConn <- conn: diff --git a/outbound/worker_dialer_test.go b/outbound/worker_dialer_test.go index b41351a..ad51bc1 100644 --- a/outbound/worker_dialer_test.go +++ b/outbound/worker_dialer_test.go @@ -32,7 +32,7 @@ func TestRunDialer(t *testing.T) { }, } - go RunDialer(url, ctx, pool, dial, newConn) + go RunDialer(url, ctx, pool, dial, newConn, nil) dial <- struct{}{} honeybeetest.Eventually(t, func() bool { @@ -73,7 +73,7 @@ func TestRunDialer(t *testing.T) { ConnectionConfig: connConfig, } - go RunDialer(url, ctx, pool, dial, newConn) + go RunDialer(url, ctx, pool, dial, newConn, nil) dial <- struct{}{} // wait for dial to start blocking on gate @@ -141,7 +141,7 @@ func TestRunDialer(t *testing.T) { ConnectionConfig: connConfig, } - go RunDialer(url, ctx, pool, dial, newConn) + go RunDialer(url, ctx, pool, dial, newConn, nil) dial <- struct{}{} honeybeetest.Eventually(t, func() bool { @@ -175,7 +175,7 @@ func TestRunDialer(t *testing.T) { done := make(chan struct{}) go func() { - RunDialer(url, ctx, pool, dial, newConn) + RunDialer(url, ctx, pool, dial, newConn, nil) close(done) }() @@ -213,7 +213,7 @@ func TestRunDialer(t *testing.T) { done := make(chan struct{}) go func() { - RunDialer(url, ctx, pool, dial, newConn) + RunDialer(url, ctx, pool, dial, newConn, nil) close(done) }() diff --git a/outbound/worker_keepalive_test.go b/outbound/worker_keepalive_test.go index c94cabb..f311558 100644 --- a/outbound/worker_keepalive_test.go +++ b/outbound/worker_keepalive_test.go @@ -15,7 +15,7 @@ func TestRunKeepalive(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go RunKeepalive(ctx, heartbeat, keepalive, timeout) + go RunKeepalive(ctx, heartbeat, keepalive, timeout, nil) // send heartbeats faster than the timeout for i := 0; i < 5; i++ { @@ -41,7 +41,7 @@ func TestRunKeepalive(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go RunKeepalive(ctx, heartbeat, keepalive, timeout) + go RunKeepalive(ctx, heartbeat, keepalive, timeout, nil) // send no heartbeats, wait for timeout and keepalive signal honeybeetest.Eventually(t, func() bool { @@ -62,7 +62,7 @@ func TestRunKeepalive(t *testing.T) { done := make(chan struct{}) go func() { - RunKeepalive(ctx, heartbeat, keepalive, timeout) + RunKeepalive(ctx, heartbeat, keepalive, timeout, nil) close(done) }() @@ -83,7 +83,7 @@ func TestRunKeepalive(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - go RunKeepalive(ctx, heartbeat, keepalive, 0) + go RunKeepalive(ctx, heartbeat, keepalive, 0, nil) // these must not block for i := 0; i < 5; i++ { diff --git a/outbound/worker_session_inner_test.go b/outbound/worker_session_inner_test.go index c9da1f6..cfa1111 100644 --- a/outbound/worker_session_inner_test.go +++ b/outbound/worker_session_inner_test.go @@ -28,7 +28,7 @@ func TestRunReader(t *testing.T) { for range heartbeat { } }() - go RunReader(ctx, cancel, conn, messages, heartbeat) + go RunReader(ctx, cancel, conn, messages, heartbeat, nil) before := time.Now() incomingData <- honeybeetest.MockIncomingData{ @@ -65,7 +65,7 @@ func TestRunReader(t *testing.T) { for range messages { } }() - go RunReader(ctx, cancel, conn, messages, heartbeat) + go RunReader(ctx, cancel, conn, messages, heartbeat, nil) const count = 3 for i := 0; i < count; i++ { @@ -96,7 +96,7 @@ func TestRunReader(t *testing.T) { for range messages { } }() - go RunReader(ctx, cancel, conn, messages, heartbeat) + go RunReader(ctx, cancel, conn, messages, heartbeat, nil) // induce connection closure via reader incomingData <- honeybeetest.MockIncomingData{Err: io.EOF} @@ -125,7 +125,7 @@ func TestRunReader(t *testing.T) { heartbeat := make(chan struct{}) ctx, cancel := context.WithCancel(context.Background()) - go RunReader(ctx, cancel, conn, messages, heartbeat) + go RunReader(ctx, cancel, conn, messages, heartbeat, nil) cancel() @@ -152,7 +152,7 @@ func TestRunStopMonitor(t *testing.T) { keepalive := make(chan struct{}, 1) - go RunStopMonitor(ctx, cancel, conn, keepalive) + go RunStopMonitor(ctx, cancel, conn, keepalive, nil) keepalive <- struct{}{} @@ -177,7 +177,7 @@ func TestRunStopMonitor(t *testing.T) { keepalive := make(chan struct{}) - go RunStopMonitor(ctx, cancel, conn, keepalive) + go RunStopMonitor(ctx, cancel, conn, keepalive, nil) cancel() diff --git a/transport/connection.go b/transport/connection.go index 7354fae..f364081 100644 --- a/transport/connection.go +++ b/transport/connection.go @@ -233,7 +233,7 @@ func (c *Connection) shutdownSignalDone() { func (c *Connection) shutdownLogStart() { if c.logger != nil { - c.logger.Info("shutting down") + c.logger.Info("closing") } } @@ -260,7 +260,7 @@ func (c *Connection) shutdownCloseChannels() { func (c *Connection) shutdownLogComplete() { if c.logger != nil { - c.logger.Info("connection closed") + c.logger.Info("closed") } } diff --git a/transport/logging_test.go b/transport/logging_test.go index fdbf57c..486f925 100644 --- a/transport/logging_test.go +++ b/transport/logging_test.go @@ -161,14 +161,14 @@ func TestCloseLogging(t *testing.T) { honeybeetest.Eventually(t, func() bool { return honeybeetest.FindLogRecord( - mockHandler.GetRecords(), slog.LevelInfo, "connection closed") != nil + mockHandler.GetRecords(), slog.LevelInfo, "closed") != nil }, "expected log") records := mockHandler.GetRecords() expected := []honeybeetest.ExpectedLog{ - log(slog.LevelInfo, "shutting down", map[string]any{}), - log(slog.LevelInfo, "connection closed", map[string]any{}), + log(slog.LevelInfo, "closing", map[string]any{}), + log(slog.LevelInfo, "closed", map[string]any{}), } honeybeetest.AssertLogSequence(t, records, expected) @@ -197,7 +197,7 @@ func TestCloseLogging(t *testing.T) { records := mockHandler.GetRecords() expected := []honeybeetest.ExpectedLog{ - log(slog.LevelInfo, "shutting down", map[string]any{}), + log(slog.LevelInfo, "closing", map[string]any{}), log(slog.LevelError, "socket close failed", map[string]any{"error": closeErr}), }