Skip to content

Commit 67b070c

Browse files
authored
Add tests proving queue cleanup on timeout and hard-kill (#1019)
* Add tests proving lock queue cleanup on timeout and hard-kill Adds LockStepTimeoutQueueTest with 5 tests that verify the lock queue is properly cleaned up when builds are timed out, aborted, or hard-killed while waiting for a lockable resource: - timeoutWhileWaitingForLockClearsQueue: timeout step wrapping lock - abortWhileWaitingForLockByLabelClearsQueue: executor.interrupt() - timeoutMiddleBuildInQueuePreservesOrder: FIFO order preserved - hardKillWhileWaitingForLockClearsQueueViaIsValid: doKill() with multiple waiters — isValid() fallback removes stale queue entry - hardKillOnlyWaiterDoesNotBlockFutureBuilds: doKill() sole waiter All tests pass, confirming the lockable-resources plugin correctly handles both the clean stop() path and the isValid() fallback path. Fixes #773 * Add tests proving lock queue cleanup on timeout and hard-kill Adds LockStepTimeoutQueueTest with 5 tests that verify the lock queue is properly cleaned up when builds are timed out, aborted, or hard-killed while waiting for a lockable resource: - timeoutWhileWaitingForLockClearsQueue: timeout step wrapping lock - abortWhileWaitingForLockByLabelClearsQueue: executor.interrupt() - timeoutMiddleBuildInQueuePreservesOrder: FIFO order preserved - hardKillWhileWaitingForLockClearsQueueViaIsValid: doKill() with multiple waiters — isValid() fallback removes stale queue entry - hardKillOnlyWaiterDoesNotBlockFutureBuilds: doKill() sole waiter All tests pass, confirming the lockable-resources plugin correctly handles both the clean stop() path and the isValid() fallback path. Fixes #773 * refactor: Simplify lock definition syntax in LockStepTimeoutQueueTest * Apply spotless formatting to LockStepTimeoutQueueTest
1 parent e4f9096 commit 67b070c

1 file changed

Lines changed: 339 additions & 0 deletions

File tree

Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
package org.jenkins.plugins.lockableresources;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertTrue;
5+
6+
import hudson.model.Result;
7+
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
8+
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
9+
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
10+
import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep;
11+
import org.junit.jupiter.api.Test;
12+
import org.jvnet.hudson.test.Issue;
13+
import org.jvnet.hudson.test.JenkinsRule;
14+
import org.jvnet.hudson.test.junit.jupiter.WithJenkins;
15+
16+
/**
17+
* Tests for issue #773: Resources are not removed from queue when timeout is reached.
18+
*
19+
* <p>When a pipeline times out while its lock step is waiting in the queue, the queued context must
20+
* be removed so the resource can be acquired by other waiting builds.
21+
*/
22+
@WithJenkins
23+
class LockStepTimeoutQueueTest extends LockStepTestBase {
24+
25+
/**
26+
* A build waiting for a lock is aborted by a timeout() wrapper. The queued context must be
27+
* removed so the next waiting build gets the lock.
28+
*/
29+
@Issue("773")
30+
@Test
31+
void timeoutWhileWaitingForLockClearsQueue(JenkinsRule j) throws Exception {
32+
LockableResourcesManager.get().createResource("resource1");
33+
34+
// b1 holds the lock
35+
WorkflowJob p1 = j.jenkins.createProject(WorkflowJob.class, "holder");
36+
p1.setDefinition(new CpsFlowDefinition(
37+
"lock('resource1') {\n" + " semaphore 'hold'\n" + "}\n" + "echo 'holder done'", true));
38+
WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart();
39+
SemaphoreStep.waitForStart("hold/1", b1);
40+
41+
// b2 tries to lock with a short timeout — will time out while waiting
42+
WorkflowJob p2 = j.jenkins.createProject(WorkflowJob.class, "timeouter");
43+
p2.setDefinition(new CpsFlowDefinition(
44+
"timeout(time: 5, unit: 'SECONDS') {\n"
45+
+ " lock('resource1') {\n"
46+
+ " echo 'timeouter inside lock'\n"
47+
+ " }\n"
48+
+ "}",
49+
true));
50+
WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart();
51+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
52+
53+
// b3 also waits for the lock — queued after b2
54+
WorkflowJob p3 = j.jenkins.createProject(WorkflowJob.class, "waiter");
55+
p3.setDefinition(new CpsFlowDefinition(
56+
"lock('resource1') {\n" + " semaphore 'waiter'\n" + "}\n" + "echo 'waiter done'", true));
57+
WorkflowRun b3 = p3.scheduleBuild2(0).waitForStart();
58+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3);
59+
60+
// Verify b2 and b3 are both queued
61+
assertEquals(
62+
2,
63+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
64+
"Both b2 and b3 should be in the queue");
65+
66+
// Wait for b2 to time out
67+
j.waitForCompletion(b2);
68+
j.assertBuildStatus(Result.ABORTED, b2);
69+
j.assertLogContains("Timeout has been exceeded", b2);
70+
71+
// After b2 times out, it must be removed from the queue — only b3 remains
72+
assertEquals(
73+
1,
74+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
75+
"b2 must be removed from the queue after timeout");
76+
77+
// Release the lock from b1 → b3 should get it (not stuck behind dead b2)
78+
SemaphoreStep.success("hold/1", null);
79+
j.waitForCompletion(b1);
80+
81+
// b3 should acquire the lock
82+
SemaphoreStep.waitForStart("waiter/1", b3);
83+
j.assertLogContains("Lock acquired on [Resource: resource1]", b3);
84+
85+
SemaphoreStep.success("waiter/1", null);
86+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
87+
j.assertLogContains("waiter done", b3);
88+
89+
// Queue must be fully empty
90+
assertTrue(
91+
LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(),
92+
"Queue must be empty after all builds complete");
93+
}
94+
95+
/**
96+
* A build waiting for a lock by label is aborted. The queued context must be removed and the
97+
* next waiter proceeds.
98+
*/
99+
@Issue("773")
100+
@Test
101+
void abortWhileWaitingForLockByLabelClearsQueue(JenkinsRule j) throws Exception {
102+
LockableResourcesManager.get().createResourceWithLabel("resource1", "label1");
103+
104+
// b1 holds the lock
105+
WorkflowJob p1 = j.jenkins.createProject(WorkflowJob.class, "holder");
106+
p1.setDefinition(
107+
new CpsFlowDefinition("lock(label: 'label1', quantity: 1) {\n" + " semaphore 'hold'\n" + "}", true));
108+
WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart();
109+
SemaphoreStep.waitForStart("hold/1", b1);
110+
111+
// b2 waits for the lock
112+
WorkflowJob p2 = j.jenkins.createProject(WorkflowJob.class, "aborter");
113+
p2.setDefinition(new CpsFlowDefinition(
114+
"lock(label: 'label1', quantity: 1) {\n" + " semaphore 'aborter'\n" + "}", true));
115+
WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart();
116+
j.waitForMessage(", waiting for execution ...", b2);
117+
118+
// b3 also waits
119+
WorkflowJob p3 = j.jenkins.createProject(WorkflowJob.class, "waiter");
120+
p3.setDefinition(new CpsFlowDefinition(
121+
"lock(label: 'label1', quantity: 1) {\n" + " semaphore 'waiter'\n" + "}\n" + "echo 'waiter done'",
122+
true));
123+
WorkflowRun b3 = p3.scheduleBuild2(0).waitForStart();
124+
j.waitForMessage(", waiting for execution ...", b3);
125+
126+
assertEquals(
127+
2,
128+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
129+
"Both b2 and b3 should be in the queue");
130+
131+
// Abort b2 (simulates user abort or parent timeout propagation)
132+
b2.getExecutor().interrupt();
133+
j.waitForCompletion(b2);
134+
j.assertBuildStatus(Result.ABORTED, b2);
135+
136+
// After abort, b2 must be removed from the queue
137+
assertEquals(
138+
1,
139+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
140+
"b2 must be removed from the queue after abort");
141+
142+
// Release the lock → b3 gets it
143+
SemaphoreStep.success("hold/1", null);
144+
j.waitForCompletion(b1);
145+
146+
SemaphoreStep.waitForStart("waiter/1", b3);
147+
j.assertLogContains("Lock acquired on [Label: label1", b3);
148+
149+
SemaphoreStep.success("waiter/1", null);
150+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
151+
j.assertLogContains("waiter done", b3);
152+
153+
assertTrue(
154+
LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(),
155+
"Queue must be empty after all builds complete");
156+
}
157+
158+
/**
159+
* Multiple builds are queued for a lock. The middle one times out. The remaining builds must
160+
* still proceed in order.
161+
*/
162+
@Issue("773")
163+
@Test
164+
void timeoutMiddleBuildInQueuePreservesOrder(JenkinsRule j) throws Exception {
165+
LockableResourcesManager.get().createResource("resource1");
166+
167+
// b1 holds the lock
168+
WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder");
169+
holder.setDefinition(new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true));
170+
WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart();
171+
SemaphoreStep.waitForStart("hold/1", b1);
172+
173+
// b2 waits (no timeout)
174+
WorkflowJob first = j.jenkins.createProject(WorkflowJob.class, "first");
175+
first.setDefinition(new CpsFlowDefinition("lock('resource1') { semaphore 'first' }\necho 'first done'", true));
176+
WorkflowRun b2 = first.scheduleBuild2(0).waitForStart();
177+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
178+
179+
// b3 waits with timeout — will be aborted
180+
WorkflowJob middle = j.jenkins.createProject(WorkflowJob.class, "middle");
181+
middle.setDefinition(new CpsFlowDefinition(
182+
"timeout(time: 5, unit: 'SECONDS') {\n"
183+
+ " lock('resource1') {\n"
184+
+ " echo 'middle should never get here'\n"
185+
+ " }\n"
186+
+ "}",
187+
true));
188+
WorkflowRun b3 = middle.scheduleBuild2(0).waitForStart();
189+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3);
190+
191+
// b4 waits (no timeout)
192+
WorkflowJob last = j.jenkins.createProject(WorkflowJob.class, "last");
193+
last.setDefinition(new CpsFlowDefinition("lock('resource1') { semaphore 'last' }\necho 'last done'", true));
194+
WorkflowRun b4 = last.scheduleBuild2(0).waitForStart();
195+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b4);
196+
197+
assertEquals(3, LockableResourcesManager.get().getCurrentQueuedContext().size());
198+
199+
// b3 times out
200+
j.waitForCompletion(b3);
201+
j.assertBuildStatus(Result.ABORTED, b3);
202+
j.assertLogNotContains("middle should never get here", b3);
203+
204+
// Queue should now have b2 and b4
205+
assertEquals(
206+
2,
207+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
208+
"Only b2 and b4 should remain in the queue");
209+
210+
// Release lock → b2 gets it first (FIFO order preserved)
211+
SemaphoreStep.success("hold/1", null);
212+
j.waitForCompletion(b1);
213+
214+
SemaphoreStep.waitForStart("first/1", b2);
215+
j.assertLogContains("Lock acquired on [Resource: resource1]", b2);
216+
217+
// b4 is still waiting
218+
j.assertLogNotContains("Lock acquired on", b4);
219+
220+
// Release b2 → b4 gets the lock
221+
SemaphoreStep.success("first/1", null);
222+
j.assertBuildStatusSuccess(j.waitForCompletion(b2));
223+
224+
SemaphoreStep.waitForStart("last/1", b4);
225+
j.assertLogContains("Lock acquired on [Resource: resource1]", b4);
226+
227+
SemaphoreStep.success("last/1", null);
228+
j.assertBuildStatusSuccess(j.waitForCompletion(b4));
229+
j.assertLogContains("last done", b4);
230+
231+
assertTrue(LockableResourcesManager.get().getCurrentQueuedContext().isEmpty());
232+
}
233+
234+
/**
235+
* Simulate the "extreme prejudice" scenario: a build waiting for a lock is hard-killed via
236+
* doKill(). This bypasses the normal step shutdown — the queue entry becomes stale and must be
237+
* cleaned up by the isValid() fallback in getNextQueuedContext() when the resource is freed.
238+
*/
239+
@Issue("773")
240+
@Test
241+
void hardKillWhileWaitingForLockClearsQueueViaIsValid(JenkinsRule j) throws Exception {
242+
LockableResourcesManager.get().createResource("resource1");
243+
244+
// b1 holds the lock
245+
WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder");
246+
holder.setDefinition(new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true));
247+
WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart();
248+
SemaphoreStep.waitForStart("hold/1", b1);
249+
250+
// b2 waits for the lock — will be hard-killed
251+
WorkflowJob victim = j.jenkins.createProject(WorkflowJob.class, "victim");
252+
victim.setDefinition(
253+
new CpsFlowDefinition("lock('resource1') {\n" + " echo 'victim inside lock'\n" + "}", true));
254+
WorkflowRun b2 = victim.scheduleBuild2(0).waitForStart();
255+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
256+
isPaused(b2, 1, 1);
257+
258+
// b3 also waits
259+
WorkflowJob waiter = j.jenkins.createProject(WorkflowJob.class, "waiter");
260+
waiter.setDefinition(
261+
new CpsFlowDefinition("lock('resource1') { semaphore 'waiter' }\necho 'waiter done'", true));
262+
WorkflowRun b3 = waiter.scheduleBuild2(0).waitForStart();
263+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3);
264+
isPaused(b3, 1, 1);
265+
266+
assertEquals(2, LockableResourcesManager.get().getCurrentQueuedContext().size());
267+
268+
// Hard-kill b2 — this is the "extreme prejudice" path
269+
b2.doKill();
270+
j.waitForMessage("Hard kill!", b2);
271+
j.waitForCompletion(b2);
272+
j.assertBuildStatus(Result.ABORTED, b2);
273+
274+
// Release the lock → proceedNextContext() runs → isValid() must detect b2
275+
// is dead and skip it, then give the lock to b3
276+
SemaphoreStep.success("hold/1", null);
277+
j.waitForCompletion(b1);
278+
279+
// b3 must get the lock — if b2's stale entry blocks the queue, this will hang
280+
SemaphoreStep.waitForStart("waiter/1", b3);
281+
j.assertLogContains("Lock acquired on [Resource: resource1]", b3);
282+
j.assertLogNotContains("victim inside lock", b2);
283+
284+
SemaphoreStep.success("waiter/1", null);
285+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
286+
j.assertLogContains("waiter done", b3);
287+
288+
assertTrue(
289+
LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(),
290+
"Queue must be empty after all builds complete");
291+
}
292+
293+
/**
294+
* A build that was waiting for a lock is hard-killed while it is the ONLY waiter. When the
295+
* resource is later freed, the stale entry must be cleaned up and the queue must be empty. A
296+
* new build must then be able to acquire the lock immediately.
297+
*/
298+
@Issue("773")
299+
@Test
300+
void hardKillOnlyWaiterDoesNotBlockFutureBuilds(JenkinsRule j) throws Exception {
301+
LockableResourcesManager.get().createResource("resource1");
302+
303+
// b1 holds the lock
304+
WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder");
305+
holder.setDefinition(new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true));
306+
WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart();
307+
SemaphoreStep.waitForStart("hold/1", b1);
308+
309+
// b2 waits — will be hard-killed
310+
WorkflowJob victim = j.jenkins.createProject(WorkflowJob.class, "victim");
311+
victim.setDefinition(new CpsFlowDefinition("lock('resource1') { echo 'victim inside' }", true));
312+
WorkflowRun b2 = victim.scheduleBuild2(0).waitForStart();
313+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
314+
315+
assertEquals(1, LockableResourcesManager.get().getCurrentQueuedContext().size());
316+
317+
// Hard-kill b2
318+
b2.doKill();
319+
j.waitForCompletion(b2);
320+
j.assertBuildStatus(Result.ABORTED, b2);
321+
322+
// Release the lock from b1
323+
SemaphoreStep.success("hold/1", null);
324+
j.assertBuildStatusSuccess(j.waitForCompletion(b1));
325+
326+
// A new build must be able to acquire the lock immediately
327+
WorkflowJob fresh = j.jenkins.createProject(WorkflowJob.class, "fresh");
328+
fresh.setDefinition(new CpsFlowDefinition("lock('resource1') { semaphore 'fresh' }\necho 'fresh done'", true));
329+
WorkflowRun b3 = fresh.scheduleBuild2(0).waitForStart();
330+
SemaphoreStep.waitForStart("fresh/1", b3);
331+
j.assertLogContains("Lock acquired on [Resource: resource1]", b3);
332+
333+
SemaphoreStep.success("fresh/1", null);
334+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
335+
j.assertLogContains("fresh done", b3);
336+
337+
assertTrue(LockableResourcesManager.get().getCurrentQueuedContext().isEmpty());
338+
}
339+
}

0 commit comments

Comments
 (0)