Skip to content

Commit 5e77629

Browse files
Copilotdrewnoakes
andauthored
Fix macOS/Windows CI hang: correct second Select timeout, fix Cleanup lock scope, harden test thread
Agent-Logs-Url: https://github.com/zeromq/netmq/sessions/259a6416-41cd-429e-8110-4e1559b0130e Co-authored-by: drewnoakes <[email protected]>
1 parent 81d313b commit 5e77629

3 files changed

Lines changed: 17 additions & 7 deletions

File tree

src/NetMQ.Tests/CleanupTests.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,10 @@ public void NoBlockCompletesInBoundedTime()
7474
// and an error list, causing Cleanup to hang forever without this guard.
7575
_ = new DealerSocket(">tcp://localhost:5557"); // intentionally not disposed
7676

77-
var thread = new Thread(() => NetMQConfig.Cleanup(block: false));
77+
// Run cleanup on a background (daemon) thread so the process can still exit
78+
// if the thread gets stuck. IsBackground = true prevents it from blocking
79+
// process shutdown if a regression causes Cleanup to hang.
80+
var thread = new Thread(() => NetMQConfig.Cleanup(block: false)) { IsBackground = true };
7881
thread.Start();
7982
Assert.True(thread.Join(TimeSpan.FromSeconds(10)),
8083
"Cleanup(block: false) did not complete within 10 seconds");

src/NetMQ/Core/Utils/Poller.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,11 @@ private void Loop()
290290
// together. To avoid this problem, we call the Select function separately for errorList.
291291
// Please refer to this issue: https://github.com/dotnet/corefx/issues/39617
292292
SocketUtility.Select(readList, null, null, timeout);
293-
SocketUtility.Select(null, null, errorList, timeout);
293+
// If the first select found readable sockets, use a non-blocking (0) timeout for the
294+
// error check so we don't block indefinitely on a stale list before processing InEvent.
295+
// Events queued by the first select (e.g. a ForceStop command) are not yet processed
296+
// because InEvent runs after both selects, so a second infinite-wait here would deadlock.
297+
SocketUtility.Select(null, null, errorList, readList.Count > 0 ? 0 : timeout);
294298
}
295299
else
296300
{

src/NetMQ/NetMQConfig.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,17 @@ internal static Ctx Context
4848
/// <param name="block">Set to true when you want to make sure sockets send all pending messages</param>
4949
public static void Cleanup(bool block = true)
5050
{
51+
Ctx? ctx;
5152
lock (s_sync)
5253
{
53-
if (s_ctx != null)
54-
{
55-
s_ctx.Terminate(block);
56-
s_ctx = null;
57-
}
54+
// Capture and clear the context reference while holding the lock, then
55+
// call Terminate outside the lock so a long-running or stuck Terminate
56+
// (e.g. on macOS with block:false) never prevents other threads from
57+
// acquiring s_sync and observing that cleanup has already been initiated.
58+
ctx = s_ctx;
59+
s_ctx = null;
5860
}
61+
ctx?.Terminate(block);
5962
}
6063

6164
/// <summary>

0 commit comments

Comments
 (0)