|
| 1 | +package org.jenkins.plugins.lockableresources; |
| 2 | + |
| 3 | +import static org.junit.jupiter.api.Assertions.assertEquals; |
| 4 | +import static org.junit.jupiter.api.Assertions.assertTrue; |
| 5 | + |
| 6 | +import hudson.model.Result; |
| 7 | +import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition; |
| 8 | +import org.jenkinsci.plugins.workflow.job.WorkflowJob; |
| 9 | +import org.jenkinsci.plugins.workflow.job.WorkflowRun; |
| 10 | +import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; |
| 11 | +import org.junit.jupiter.api.Test; |
| 12 | +import org.jvnet.hudson.test.Issue; |
| 13 | +import org.jvnet.hudson.test.JenkinsRule; |
| 14 | +import org.jvnet.hudson.test.junit.jupiter.WithJenkins; |
| 15 | + |
| 16 | +/** |
| 17 | + * Tests for issue #773: Resources are not removed from queue when timeout is reached. |
| 18 | + * |
| 19 | + * <p>When a pipeline times out while its lock step is waiting in the queue, the queued context must |
| 20 | + * be removed so the resource can be acquired by other waiting builds. |
| 21 | + */ |
| 22 | +@WithJenkins |
| 23 | +class LockStepTimeoutQueueTest extends LockStepTestBase { |
| 24 | + |
| 25 | + /** |
| 26 | + * A build waiting for a lock is aborted by a timeout() wrapper. The queued context must be |
| 27 | + * removed so the next waiting build gets the lock. |
| 28 | + */ |
| 29 | + @Issue("773") |
| 30 | + @Test |
| 31 | + void timeoutWhileWaitingForLockClearsQueue(JenkinsRule j) throws Exception { |
| 32 | + LockableResourcesManager.get().createResource("resource1"); |
| 33 | + |
| 34 | + // b1 holds the lock |
| 35 | + WorkflowJob p1 = j.jenkins.createProject(WorkflowJob.class, "holder"); |
| 36 | + p1.setDefinition(new CpsFlowDefinition( |
| 37 | + "lock('resource1') {\n" |
| 38 | + + " semaphore 'hold'\n" |
| 39 | + + "}\n" |
| 40 | + + "echo 'holder done'", |
| 41 | + true)); |
| 42 | + WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart(); |
| 43 | + SemaphoreStep.waitForStart("hold/1", b1); |
| 44 | + |
| 45 | + // b2 tries to lock with a short timeout — will time out while waiting |
| 46 | + WorkflowJob p2 = j.jenkins.createProject(WorkflowJob.class, "timeouter"); |
| 47 | + p2.setDefinition(new CpsFlowDefinition( |
| 48 | + "timeout(time: 5, unit: 'SECONDS') {\n" |
| 49 | + + " lock('resource1') {\n" |
| 50 | + + " echo 'timeouter inside lock'\n" |
| 51 | + + " }\n" |
| 52 | + + "}", |
| 53 | + true)); |
| 54 | + WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart(); |
| 55 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2); |
| 56 | + |
| 57 | + // b3 also waits for the lock — queued after b2 |
| 58 | + WorkflowJob p3 = j.jenkins.createProject(WorkflowJob.class, "waiter"); |
| 59 | + p3.setDefinition(new CpsFlowDefinition( |
| 60 | + "lock('resource1') {\n" |
| 61 | + + " semaphore 'waiter'\n" |
| 62 | + + "}\n" |
| 63 | + + "echo 'waiter done'", |
| 64 | + true)); |
| 65 | + WorkflowRun b3 = p3.scheduleBuild2(0).waitForStart(); |
| 66 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3); |
| 67 | + |
| 68 | + // Verify b2 and b3 are both queued |
| 69 | + assertEquals( |
| 70 | + 2, |
| 71 | + LockableResourcesManager.get().getCurrentQueuedContext().size(), |
| 72 | + "Both b2 and b3 should be in the queue"); |
| 73 | + |
| 74 | + // Wait for b2 to time out |
| 75 | + j.waitForCompletion(b2); |
| 76 | + j.assertBuildStatus(Result.ABORTED, b2); |
| 77 | + j.assertLogContains("Timeout has been exceeded", b2); |
| 78 | + |
| 79 | + // After b2 times out, it must be removed from the queue — only b3 remains |
| 80 | + assertEquals( |
| 81 | + 1, |
| 82 | + LockableResourcesManager.get().getCurrentQueuedContext().size(), |
| 83 | + "b2 must be removed from the queue after timeout"); |
| 84 | + |
| 85 | + // Release the lock from b1 → b3 should get it (not stuck behind dead b2) |
| 86 | + SemaphoreStep.success("hold/1", null); |
| 87 | + j.waitForCompletion(b1); |
| 88 | + |
| 89 | + // b3 should acquire the lock |
| 90 | + SemaphoreStep.waitForStart("waiter/1", b3); |
| 91 | + j.assertLogContains("Lock acquired on [Resource: resource1]", b3); |
| 92 | + |
| 93 | + SemaphoreStep.success("waiter/1", null); |
| 94 | + j.assertBuildStatusSuccess(j.waitForCompletion(b3)); |
| 95 | + j.assertLogContains("waiter done", b3); |
| 96 | + |
| 97 | + // Queue must be fully empty |
| 98 | + assertTrue( |
| 99 | + LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(), |
| 100 | + "Queue must be empty after all builds complete"); |
| 101 | + } |
| 102 | + |
| 103 | + /** |
| 104 | + * A build waiting for a lock by label is aborted. The queued context must be removed and the |
| 105 | + * next waiter proceeds. |
| 106 | + */ |
| 107 | + @Issue("773") |
| 108 | + @Test |
| 109 | + void abortWhileWaitingForLockByLabelClearsQueue(JenkinsRule j) throws Exception { |
| 110 | + LockableResourcesManager.get().createResourceWithLabel("resource1", "label1"); |
| 111 | + |
| 112 | + // b1 holds the lock |
| 113 | + WorkflowJob p1 = j.jenkins.createProject(WorkflowJob.class, "holder"); |
| 114 | + p1.setDefinition(new CpsFlowDefinition( |
| 115 | + "lock(label: 'label1', quantity: 1) {\n" |
| 116 | + + " semaphore 'hold'\n" |
| 117 | + + "}", |
| 118 | + true)); |
| 119 | + WorkflowRun b1 = p1.scheduleBuild2(0).waitForStart(); |
| 120 | + SemaphoreStep.waitForStart("hold/1", b1); |
| 121 | + |
| 122 | + // b2 waits for the lock |
| 123 | + WorkflowJob p2 = j.jenkins.createProject(WorkflowJob.class, "aborter"); |
| 124 | + p2.setDefinition(new CpsFlowDefinition( |
| 125 | + "lock(label: 'label1', quantity: 1) {\n" |
| 126 | + + " semaphore 'aborter'\n" |
| 127 | + + "}", |
| 128 | + true)); |
| 129 | + WorkflowRun b2 = p2.scheduleBuild2(0).waitForStart(); |
| 130 | + j.waitForMessage(", waiting for execution ...", b2); |
| 131 | + |
| 132 | + // b3 also waits |
| 133 | + WorkflowJob p3 = j.jenkins.createProject(WorkflowJob.class, "waiter"); |
| 134 | + p3.setDefinition(new CpsFlowDefinition( |
| 135 | + "lock(label: 'label1', quantity: 1) {\n" |
| 136 | + + " semaphore 'waiter'\n" |
| 137 | + + "}\n" |
| 138 | + + "echo 'waiter done'", |
| 139 | + true)); |
| 140 | + WorkflowRun b3 = p3.scheduleBuild2(0).waitForStart(); |
| 141 | + j.waitForMessage(", waiting for execution ...", b3); |
| 142 | + |
| 143 | + assertEquals( |
| 144 | + 2, |
| 145 | + LockableResourcesManager.get().getCurrentQueuedContext().size(), |
| 146 | + "Both b2 and b3 should be in the queue"); |
| 147 | + |
| 148 | + // Abort b2 (simulates user abort or parent timeout propagation) |
| 149 | + b2.getExecutor().interrupt(); |
| 150 | + j.waitForCompletion(b2); |
| 151 | + j.assertBuildStatus(Result.ABORTED, b2); |
| 152 | + |
| 153 | + // After abort, b2 must be removed from the queue |
| 154 | + assertEquals( |
| 155 | + 1, |
| 156 | + LockableResourcesManager.get().getCurrentQueuedContext().size(), |
| 157 | + "b2 must be removed from the queue after abort"); |
| 158 | + |
| 159 | + // Release the lock → b3 gets it |
| 160 | + SemaphoreStep.success("hold/1", null); |
| 161 | + j.waitForCompletion(b1); |
| 162 | + |
| 163 | + SemaphoreStep.waitForStart("waiter/1", b3); |
| 164 | + j.assertLogContains("Lock acquired on [Label: label1", b3); |
| 165 | + |
| 166 | + SemaphoreStep.success("waiter/1", null); |
| 167 | + j.assertBuildStatusSuccess(j.waitForCompletion(b3)); |
| 168 | + j.assertLogContains("waiter done", b3); |
| 169 | + |
| 170 | + assertTrue( |
| 171 | + LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(), |
| 172 | + "Queue must be empty after all builds complete"); |
| 173 | + } |
| 174 | + |
| 175 | + /** |
| 176 | + * Multiple builds are queued for a lock. The middle one times out. The remaining builds must |
| 177 | + * still proceed in order. |
| 178 | + */ |
| 179 | + @Issue("773") |
| 180 | + @Test |
| 181 | + void timeoutMiddleBuildInQueuePreservesOrder(JenkinsRule j) throws Exception { |
| 182 | + LockableResourcesManager.get().createResource("resource1"); |
| 183 | + |
| 184 | + // b1 holds the lock |
| 185 | + WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder"); |
| 186 | + holder.setDefinition( |
| 187 | + new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true)); |
| 188 | + WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart(); |
| 189 | + SemaphoreStep.waitForStart("hold/1", b1); |
| 190 | + |
| 191 | + // b2 waits (no timeout) |
| 192 | + WorkflowJob first = j.jenkins.createProject(WorkflowJob.class, "first"); |
| 193 | + first.setDefinition(new CpsFlowDefinition( |
| 194 | + "lock('resource1') { semaphore 'first' }\necho 'first done'", true)); |
| 195 | + WorkflowRun b2 = first.scheduleBuild2(0).waitForStart(); |
| 196 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2); |
| 197 | + |
| 198 | + // b3 waits with timeout — will be aborted |
| 199 | + WorkflowJob middle = j.jenkins.createProject(WorkflowJob.class, "middle"); |
| 200 | + middle.setDefinition(new CpsFlowDefinition( |
| 201 | + "timeout(time: 5, unit: 'SECONDS') {\n" |
| 202 | + + " lock('resource1') {\n" |
| 203 | + + " echo 'middle should never get here'\n" |
| 204 | + + " }\n" |
| 205 | + + "}", |
| 206 | + true)); |
| 207 | + WorkflowRun b3 = middle.scheduleBuild2(0).waitForStart(); |
| 208 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3); |
| 209 | + |
| 210 | + // b4 waits (no timeout) |
| 211 | + WorkflowJob last = j.jenkins.createProject(WorkflowJob.class, "last"); |
| 212 | + last.setDefinition( |
| 213 | + new CpsFlowDefinition("lock('resource1') { semaphore 'last' }\necho 'last done'", true)); |
| 214 | + WorkflowRun b4 = last.scheduleBuild2(0).waitForStart(); |
| 215 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b4); |
| 216 | + |
| 217 | + assertEquals(3, LockableResourcesManager.get().getCurrentQueuedContext().size()); |
| 218 | + |
| 219 | + // b3 times out |
| 220 | + j.waitForCompletion(b3); |
| 221 | + j.assertBuildStatus(Result.ABORTED, b3); |
| 222 | + j.assertLogNotContains("middle should never get here", b3); |
| 223 | + |
| 224 | + // Queue should now have b2 and b4 |
| 225 | + assertEquals( |
| 226 | + 2, |
| 227 | + LockableResourcesManager.get().getCurrentQueuedContext().size(), |
| 228 | + "Only b2 and b4 should remain in the queue"); |
| 229 | + |
| 230 | + // Release lock → b2 gets it first (FIFO order preserved) |
| 231 | + SemaphoreStep.success("hold/1", null); |
| 232 | + j.waitForCompletion(b1); |
| 233 | + |
| 234 | + SemaphoreStep.waitForStart("first/1", b2); |
| 235 | + j.assertLogContains("Lock acquired on [Resource: resource1]", b2); |
| 236 | + |
| 237 | + // b4 is still waiting |
| 238 | + j.assertLogNotContains("Lock acquired on", b4); |
| 239 | + |
| 240 | + // Release b2 → b4 gets the lock |
| 241 | + SemaphoreStep.success("first/1", null); |
| 242 | + j.assertBuildStatusSuccess(j.waitForCompletion(b2)); |
| 243 | + |
| 244 | + SemaphoreStep.waitForStart("last/1", b4); |
| 245 | + j.assertLogContains("Lock acquired on [Resource: resource1]", b4); |
| 246 | + |
| 247 | + SemaphoreStep.success("last/1", null); |
| 248 | + j.assertBuildStatusSuccess(j.waitForCompletion(b4)); |
| 249 | + j.assertLogContains("last done", b4); |
| 250 | + |
| 251 | + assertTrue(LockableResourcesManager.get().getCurrentQueuedContext().isEmpty()); |
| 252 | + } |
| 253 | + |
| 254 | + /** |
| 255 | + * Simulate the "extreme prejudice" scenario: a build waiting for a lock is hard-killed via |
| 256 | + * doKill(). This bypasses the normal step shutdown — the queue entry becomes stale and must be |
| 257 | + * cleaned up by the isValid() fallback in getNextQueuedContext() when the resource is freed. |
| 258 | + */ |
| 259 | + @Issue("773") |
| 260 | + @Test |
| 261 | + void hardKillWhileWaitingForLockClearsQueueViaIsValid(JenkinsRule j) throws Exception { |
| 262 | + LockableResourcesManager.get().createResource("resource1"); |
| 263 | + |
| 264 | + // b1 holds the lock |
| 265 | + WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder"); |
| 266 | + holder.setDefinition( |
| 267 | + new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true)); |
| 268 | + WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart(); |
| 269 | + SemaphoreStep.waitForStart("hold/1", b1); |
| 270 | + |
| 271 | + // b2 waits for the lock — will be hard-killed |
| 272 | + WorkflowJob victim = j.jenkins.createProject(WorkflowJob.class, "victim"); |
| 273 | + victim.setDefinition(new CpsFlowDefinition( |
| 274 | + "lock('resource1') {\n" |
| 275 | + + " echo 'victim inside lock'\n" |
| 276 | + + "}", |
| 277 | + true)); |
| 278 | + WorkflowRun b2 = victim.scheduleBuild2(0).waitForStart(); |
| 279 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2); |
| 280 | + isPaused(b2, 1, 1); |
| 281 | + |
| 282 | + // b3 also waits |
| 283 | + WorkflowJob waiter = j.jenkins.createProject(WorkflowJob.class, "waiter"); |
| 284 | + waiter.setDefinition(new CpsFlowDefinition( |
| 285 | + "lock('resource1') { semaphore 'waiter' }\necho 'waiter done'", true)); |
| 286 | + WorkflowRun b3 = waiter.scheduleBuild2(0).waitForStart(); |
| 287 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b3); |
| 288 | + isPaused(b3, 1, 1); |
| 289 | + |
| 290 | + assertEquals(2, LockableResourcesManager.get().getCurrentQueuedContext().size()); |
| 291 | + |
| 292 | + // Hard-kill b2 — this is the "extreme prejudice" path |
| 293 | + b2.doKill(); |
| 294 | + j.waitForMessage("Hard kill!", b2); |
| 295 | + j.waitForCompletion(b2); |
| 296 | + j.assertBuildStatus(Result.ABORTED, b2); |
| 297 | + |
| 298 | + // Release the lock → proceedNextContext() runs → isValid() must detect b2 |
| 299 | + // is dead and skip it, then give the lock to b3 |
| 300 | + SemaphoreStep.success("hold/1", null); |
| 301 | + j.waitForCompletion(b1); |
| 302 | + |
| 303 | + // b3 must get the lock — if b2's stale entry blocks the queue, this will hang |
| 304 | + SemaphoreStep.waitForStart("waiter/1", b3); |
| 305 | + j.assertLogContains("Lock acquired on [Resource: resource1]", b3); |
| 306 | + j.assertLogNotContains("victim inside lock", b2); |
| 307 | + |
| 308 | + SemaphoreStep.success("waiter/1", null); |
| 309 | + j.assertBuildStatusSuccess(j.waitForCompletion(b3)); |
| 310 | + j.assertLogContains("waiter done", b3); |
| 311 | + |
| 312 | + assertTrue( |
| 313 | + LockableResourcesManager.get().getCurrentQueuedContext().isEmpty(), |
| 314 | + "Queue must be empty after all builds complete"); |
| 315 | + } |
| 316 | + |
| 317 | + /** |
| 318 | + * A build that was waiting for a lock is hard-killed while it is the ONLY waiter. When the |
| 319 | + * resource is later freed, the stale entry must be cleaned up and the queue must be empty. A |
| 320 | + * new build must then be able to acquire the lock immediately. |
| 321 | + */ |
| 322 | + @Issue("773") |
| 323 | + @Test |
| 324 | + void hardKillOnlyWaiterDoesNotBlockFutureBuilds(JenkinsRule j) throws Exception { |
| 325 | + LockableResourcesManager.get().createResource("resource1"); |
| 326 | + |
| 327 | + // b1 holds the lock |
| 328 | + WorkflowJob holder = j.jenkins.createProject(WorkflowJob.class, "holder"); |
| 329 | + holder.setDefinition( |
| 330 | + new CpsFlowDefinition("lock('resource1') { semaphore 'hold' }\necho 'holder done'", true)); |
| 331 | + WorkflowRun b1 = holder.scheduleBuild2(0).waitForStart(); |
| 332 | + SemaphoreStep.waitForStart("hold/1", b1); |
| 333 | + |
| 334 | + // b2 waits — will be hard-killed |
| 335 | + WorkflowJob victim = j.jenkins.createProject(WorkflowJob.class, "victim"); |
| 336 | + victim.setDefinition(new CpsFlowDefinition( |
| 337 | + "lock('resource1') { echo 'victim inside' }", true)); |
| 338 | + WorkflowRun b2 = victim.scheduleBuild2(0).waitForStart(); |
| 339 | + j.waitForMessage("[resource1] is locked by build " + b1.getFullDisplayName(), b2); |
| 340 | + |
| 341 | + assertEquals(1, LockableResourcesManager.get().getCurrentQueuedContext().size()); |
| 342 | + |
| 343 | + // Hard-kill b2 |
| 344 | + b2.doKill(); |
| 345 | + j.waitForCompletion(b2); |
| 346 | + j.assertBuildStatus(Result.ABORTED, b2); |
| 347 | + |
| 348 | + // Release the lock from b1 |
| 349 | + SemaphoreStep.success("hold/1", null); |
| 350 | + j.assertBuildStatusSuccess(j.waitForCompletion(b1)); |
| 351 | + |
| 352 | + // A new build must be able to acquire the lock immediately |
| 353 | + WorkflowJob fresh = j.jenkins.createProject(WorkflowJob.class, "fresh"); |
| 354 | + fresh.setDefinition(new CpsFlowDefinition( |
| 355 | + "lock('resource1') { semaphore 'fresh' }\necho 'fresh done'", true)); |
| 356 | + WorkflowRun b3 = fresh.scheduleBuild2(0).waitForStart(); |
| 357 | + SemaphoreStep.waitForStart("fresh/1", b3); |
| 358 | + j.assertLogContains("Lock acquired on [Resource: resource1]", b3); |
| 359 | + |
| 360 | + SemaphoreStep.success("fresh/1", null); |
| 361 | + j.assertBuildStatusSuccess(j.waitForCompletion(b3)); |
| 362 | + j.assertLogContains("fresh done", b3); |
| 363 | + |
| 364 | + assertTrue(LockableResourcesManager.get().getCurrentQueuedContext().isEmpty()); |
| 365 | + } |
| 366 | +} |
0 commit comments