Skip to content

Commit 37cfc7a

Browse files
committed
Add tests proving lock queue cleanup on timeout and hard-kill
Adds LockStepTimeoutQueueTest with 5 tests that verify the lock queue is properly cleaned up when builds are timed out, aborted, or hard-killed while waiting for a lockable resource: - timeoutWhileWaitingForLockClearsQueue: timeout step wrapping lock - abortWhileWaitingForLockByLabelClearsQueue: executor.interrupt() - timeoutMiddleBuildInQueuePreservesOrder: FIFO order preserved - hardKillWhileWaitingForLockClearsQueueViaIsValid: doKill() with multiple waiters — isValid() fallback removes stale queue entry - hardKillOnlyWaiterDoesNotBlockFutureBuilds: doKill() sole waiter All tests pass, confirming the lockable-resources plugin correctly handles both the clean stop() path and the isValid() fallback path. Fixes #773
1 parent a6b5b5c commit 37cfc7a

1 file changed

Lines changed: 366 additions & 0 deletions

File tree

Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
package org.jenkins.plugins.lockableresources;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertTrue;
5+
6+
import hudson.model.Result;
7+
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
8+
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
9+
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
10+
import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep;
11+
import org.junit.jupiter.api.Test;
12+
import org.jvnet.hudson.test.Issue;
13+
import org.jvnet.hudson.test.JenkinsRule;
14+
import org.jvnet.hudson.test.junit.jupiter.WithJenkins;
15+
16+
/**
17+
* Tests for issue #773: Resources are not removed from queue when timeout is reached.
18+
*
19+
* <p>When a pipeline times out while its lock step is waiting in the queue, the queued context must
20+
* be removed so the resource can be acquired by other waiting builds.
21+
*/
22+
@WithJenkins
23+
class LockStepTimeoutQueueTest extends LockStepTestBase {
24+
25+
/**
26+
* A build waiting for a lock is aborted by a timeout() wrapper. The queued context must be
27+
* removed so the next waiting build gets the lock.
28+
*/
29+
@Issue("773")
30+
@Test
31+
void timeoutWhileWaitingForLockClearsQueue(JenkinsRule j) throws Exception {
32+
LockableResourcesManager.get().createResource("resource1");
33+
34+
// b1 holds the lock
35+
WorkflowJob p1 = j.jenkins.createProject(WorkflowJob.class, "holder");
36+
p1.setDefinition(new CpsFlowDefinition(
37+
"lock('resource1') {\n"
38+
+ " semaphore 'hold'\n"
39+
+ "}\n"
40+
+ "echo 'holder done'",
41+
true));
42+
WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart();
43+
SemaphoreStep.waitForStart("hold/1", b1);
44+
45+
// b2 tries to lock with a short timeout — will time out while waiting
46+
WorkflowJob p2 = j.jenkins.createProject(WorkflowJob.class, "timeouter");
47+
p2.setDefinition(new CpsFlowDefinition(
48+
"timeout(time: 5, unit: 'SECONDS') {\n"
49+
+ " lock('resource1') {\n"
50+
+ " echo 'timeouter inside lock'\n"
51+
+ " }\n"
52+
+ "}",
53+
true));
54+
WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart();
55+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
56+
57+
// b3 also waits for the lock — queued after b2
58+
WorkflowJob p3 = j.jenkins.createProject(WorkflowJob.class, "waiter");
59+
p3.setDefinition(new CpsFlowDefinition(
60+
"lock('resource1') {\n"
61+
+ " semaphore 'waiter'\n"
62+
+ "}\n"
63+
+ "echo 'waiter done'",
64+
true));
65+
WorkflowRun b3 = p3.scheduleBuild2(0).waitForStart();
66+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3);
67+
68+
// Verify b2 and b3 are both queued
69+
assertEquals(
70+
2,
71+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
72+
"Both b2 and b3 should be in the queue");
73+
74+
// Wait for b2 to time out
75+
j.waitForCompletion(b2);
76+
j.assertBuildStatus(Result.ABORTED, b2);
77+
j.assertLogContains("Timeout has been exceeded", b2);
78+
79+
// After b2 times out, it must be removed from the queue — only b3 remains
80+
assertEquals(
81+
1,
82+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
83+
"b2 must be removed from the queue after timeout");
84+
85+
// Release the lock from b1 → b3 should get it (not stuck behind dead b2)
86+
SemaphoreStep.success("hold/1", null);
87+
j.waitForCompletion(b1);
88+
89+
// b3 should acquire the lock
90+
SemaphoreStep.waitForStart("waiter/1", b3);
91+
j.assertLogContains("Lock acquired on [Resource: resource1]", b3);
92+
93+
SemaphoreStep.success("waiter/1", null);
94+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
95+
j.assertLogContains("waiter done", b3);
96+
97+
// Queue must be fully empty
98+
assertTrue(
99+
LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(),
100+
"Queue must be empty after all builds complete");
101+
}
102+
103+
/**
104+
* A build waiting for a lock by label is aborted. The queued context must be removed and the
105+
* next waiter proceeds.
106+
*/
107+
@Issue("773")
108+
@Test
109+
void abortWhileWaitingForLockByLabelClearsQueue(JenkinsRule j) throws Exception {
110+
LockableResourcesManager.get().createResourceWithLabel("resource1", "label1");
111+
112+
// b1 holds the lock
113+
WorkflowJob p1 = j.jenkins.createProject(WorkflowJob.class, "holder");
114+
p1.setDefinition(new CpsFlowDefinition(
115+
"lock(label: 'label1', quantity: 1) {\n"
116+
+ " semaphore 'hold'\n"
117+
+ "}",
118+
true));
119+
WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart();
120+
SemaphoreStep.waitForStart("hold/1", b1);
121+
122+
// b2 waits for the lock
123+
WorkflowJob p2 = j.jenkins.createProject(WorkflowJob.class, "aborter");
124+
p2.setDefinition(new CpsFlowDefinition(
125+
"lock(label: 'label1', quantity: 1) {\n"
126+
+ " semaphore 'aborter'\n"
127+
+ "}",
128+
true));
129+
WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart();
130+
j.waitForMessage(", waiting for execution ...", b2);
131+
132+
// b3 also waits
133+
WorkflowJob p3 = j.jenkins.createProject(WorkflowJob.class, "waiter");
134+
p3.setDefinition(new CpsFlowDefinition(
135+
"lock(label: 'label1', quantity: 1) {\n"
136+
+ " semaphore 'waiter'\n"
137+
+ "}\n"
138+
+ "echo 'waiter done'",
139+
true));
140+
WorkflowRun b3 = p3.scheduleBuild2(0).waitForStart();
141+
j.waitForMessage(", waiting for execution ...", b3);
142+
143+
assertEquals(
144+
2,
145+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
146+
"Both b2 and b3 should be in the queue");
147+
148+
// Abort b2 (simulates user abort or parent timeout propagation)
149+
b2.getExecutor().interrupt();
150+
j.waitForCompletion(b2);
151+
j.assertBuildStatus(Result.ABORTED, b2);
152+
153+
// After abort, b2 must be removed from the queue
154+
assertEquals(
155+
1,
156+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
157+
"b2 must be removed from the queue after abort");
158+
159+
// Release the lock → b3 gets it
160+
SemaphoreStep.success("hold/1", null);
161+
j.waitForCompletion(b1);
162+
163+
SemaphoreStep.waitForStart("waiter/1", b3);
164+
j.assertLogContains("Lock acquired on [Label: label1", b3);
165+
166+
SemaphoreStep.success("waiter/1", null);
167+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
168+
j.assertLogContains("waiter done", b3);
169+
170+
assertTrue(
171+
LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(),
172+
"Queue must be empty after all builds complete");
173+
}
174+
175+
/**
176+
* Multiple builds are queued for a lock. The middle one times out. The remaining builds must
177+
* still proceed in order.
178+
*/
179+
@Issue("773")
180+
@Test
181+
void timeoutMiddleBuildInQueuePreservesOrder(JenkinsRule j) throws Exception {
182+
LockableResourcesManager.get().createResource("resource1");
183+
184+
// b1 holds the lock
185+
WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder");
186+
holder.setDefinition(
187+
new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true));
188+
WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart();
189+
SemaphoreStep.waitForStart("hold/1", b1);
190+
191+
// b2 waits (no timeout)
192+
WorkflowJob first = j.jenkins.createProject(WorkflowJob.class, "first");
193+
first.setDefinition(new CpsFlowDefinition(
194+
"lock('resource1') { semaphore 'first' }\necho 'first done'", true));
195+
WorkflowRun b2 = first.scheduleBuild2(0).waitForStart();
196+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
197+
198+
// b3 waits with timeout — will be aborted
199+
WorkflowJob middle = j.jenkins.createProject(WorkflowJob.class, "middle");
200+
middle.setDefinition(new CpsFlowDefinition(
201+
"timeout(time: 5, unit: 'SECONDS') {\n"
202+
+ " lock('resource1') {\n"
203+
+ " echo 'middle should never get here'\n"
204+
+ " }\n"
205+
+ "}",
206+
true));
207+
WorkflowRun b3 = middle.scheduleBuild2(0).waitForStart();
208+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3);
209+
210+
// b4 waits (no timeout)
211+
WorkflowJob last = j.jenkins.createProject(WorkflowJob.class, "last");
212+
last.setDefinition(
213+
new CpsFlowDefinition("lock('resource1') { semaphore 'last' }\necho 'last done'", true));
214+
WorkflowRun b4 = last.scheduleBuild2(0).waitForStart();
215+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b4);
216+
217+
assertEquals(3, LockableResourcesManager.get().getCurrentQueuedContext().size());
218+
219+
// b3 times out
220+
j.waitForCompletion(b3);
221+
j.assertBuildStatus(Result.ABORTED, b3);
222+
j.assertLogNotContains("middle should never get here", b3);
223+
224+
// Queue should now have b2 and b4
225+
assertEquals(
226+
2,
227+
LockableResourcesManager.get().getCurrentQueuedContext().size(),
228+
"Only b2 and b4 should remain in the queue");
229+
230+
// Release lock → b2 gets it first (FIFO order preserved)
231+
SemaphoreStep.success("hold/1", null);
232+
j.waitForCompletion(b1);
233+
234+
SemaphoreStep.waitForStart("first/1", b2);
235+
j.assertLogContains("Lock acquired on [Resource: resource1]", b2);
236+
237+
// b4 is still waiting
238+
j.assertLogNotContains("Lock acquired on", b4);
239+
240+
// Release b2 → b4 gets the lock
241+
SemaphoreStep.success("first/1", null);
242+
j.assertBuildStatusSuccess(j.waitForCompletion(b2));
243+
244+
SemaphoreStep.waitForStart("last/1", b4);
245+
j.assertLogContains("Lock acquired on [Resource: resource1]", b4);
246+
247+
SemaphoreStep.success("last/1", null);
248+
j.assertBuildStatusSuccess(j.waitForCompletion(b4));
249+
j.assertLogContains("last done", b4);
250+
251+
assertTrue(LockableResourcesManager.get().getCurrentQueuedContext().isEmpty());
252+
}
253+
254+
/**
255+
* Simulate the "extreme prejudice" scenario: a build waiting for a lock is hard-killed via
256+
* doKill(). This bypasses the normal step shutdown — the queue entry becomes stale and must be
257+
* cleaned up by the isValid() fallback in getNextQueuedContext() when the resource is freed.
258+
*/
259+
@Issue("773")
260+
@Test
261+
void hardKillWhileWaitingForLockClearsQueueViaIsValid(JenkinsRule j) throws Exception {
262+
LockableResourcesManager.get().createResource("resource1");
263+
264+
// b1 holds the lock
265+
WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder");
266+
holder.setDefinition(
267+
new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true));
268+
WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart();
269+
SemaphoreStep.waitForStart("hold/1", b1);
270+
271+
// b2 waits for the lock — will be hard-killed
272+
WorkflowJob victim = j.jenkins.createProject(WorkflowJob.class, "victim");
273+
victim.setDefinition(new CpsFlowDefinition(
274+
"lock('resource1') {\n"
275+
+ " echo 'victim inside lock'\n"
276+
+ "}",
277+
true));
278+
WorkflowRun b2 = victim.scheduleBuild2(0).waitForStart();
279+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
280+
isPaused(b2, 1, 1);
281+
282+
// b3 also waits
283+
WorkflowJob waiter = j.jenkins.createProject(WorkflowJob.class, "waiter");
284+
waiter.setDefinition(new CpsFlowDefinition(
285+
"lock('resource1') { semaphore 'waiter' }\necho 'waiter done'", true));
286+
WorkflowRun b3 = waiter.scheduleBuild2(0).waitForStart();
287+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3);
288+
isPaused(b3, 1, 1);
289+
290+
assertEquals(2, LockableResourcesManager.get().getCurrentQueuedContext().size());
291+
292+
// Hard-kill b2 — this is the "extreme prejudice" path
293+
b2.doKill();
294+
j.waitForMessage("Hard kill!", b2);
295+
j.waitForCompletion(b2);
296+
j.assertBuildStatus(Result.ABORTED, b2);
297+
298+
// Release the lock → proceedNextContext() runs → isValid() must detect b2
299+
// is dead and skip it, then give the lock to b3
300+
SemaphoreStep.success("hold/1", null);
301+
j.waitForCompletion(b1);
302+
303+
// b3 must get the lock — if b2's stale entry blocks the queue, this will hang
304+
SemaphoreStep.waitForStart("waiter/1", b3);
305+
j.assertLogContains("Lock acquired on [Resource: resource1]", b3);
306+
j.assertLogNotContains("victim inside lock", b2);
307+
308+
SemaphoreStep.success("waiter/1", null);
309+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
310+
j.assertLogContains("waiter done", b3);
311+
312+
assertTrue(
313+
LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(),
314+
"Queue must be empty after all builds complete");
315+
}
316+
317+
/**
318+
* A build that was waiting for a lock is hard-killed while it is the ONLY waiter. When the
319+
* resource is later freed, the stale entry must be cleaned up and the queue must be empty. A
320+
* new build must then be able to acquire the lock immediately.
321+
*/
322+
@Issue("773")
323+
@Test
324+
void hardKillOnlyWaiterDoesNotBlockFutureBuilds(JenkinsRule j) throws Exception {
325+
LockableResourcesManager.get().createResource("resource1");
326+
327+
// b1 holds the lock
328+
WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder");
329+
holder.setDefinition(
330+
new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true));
331+
WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart();
332+
SemaphoreStep.waitForStart("hold/1", b1);
333+
334+
// b2 waits — will be hard-killed
335+
WorkflowJob victim = j.jenkins.createProject(WorkflowJob.class, "victim");
336+
victim.setDefinition(new CpsFlowDefinition(
337+
"lock('resource1') { echo 'victim inside' }", true));
338+
WorkflowRun b2 = victim.scheduleBuild2(0).waitForStart();
339+
j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2);
340+
341+
assertEquals(1, LockableResourcesManager.get().getCurrentQueuedContext().size());
342+
343+
// Hard-kill b2
344+
b2.doKill();
345+
j.waitForCompletion(b2);
346+
j.assertBuildStatus(Result.ABORTED, b2);
347+
348+
// Release the lock from b1
349+
SemaphoreStep.success("hold/1", null);
350+
j.assertBuildStatusSuccess(j.waitForCompletion(b1));
351+
352+
// A new build must be able to acquire the lock immediately
353+
WorkflowJob fresh = j.jenkins.createProject(WorkflowJob.class, "fresh");
354+
fresh.setDefinition(new CpsFlowDefinition(
355+
"lock('resource1') { semaphore 'fresh' }\necho 'fresh done'", true));
356+
WorkflowRun b3 = fresh.scheduleBuild2(0).waitForStart();
357+
SemaphoreStep.waitForStart("fresh/1", b3);
358+
j.assertLogContains("Lock acquired on [Resource: resource1]", b3);
359+
360+
SemaphoreStep.success("fresh/1", null);
361+
j.assertBuildStatusSuccess(j.waitForCompletion(b3));
362+
j.assertLogContains("fresh done", b3);
363+
364+
assertTrue(LockableResourcesManager.get().getCurrentQueuedContext().isEmpty());
365+
}
366+
}

0 commit comments

Comments
 (0)