Skip to content

Commit dea0e0a

Browse files
committed
fix: call proceedNextContext() in reset() and replace test sleeps with semaphores
Fixes #988 LockableResourcesManager.reset() cleared resource state but never called proceedNextContext(), so waiting pipeline jobs were never notified when resources became free via reset(). Every other method that frees resources (unlockResources, unreserve, addResource) already does this. Added the same while(proceedNextContext()){} loop inside reset(). Replaced all sleep() calls in the reserve/setReservedBy-inside-lock tests with SemaphoreStep-based synchronisation, making the tests deterministic and roughly 4x faster (~20s vs ~80s each). Removed the try-catch Bug#2a and Bug#2b tolerance blocks since the reset() fix makes them unnecessary. Affected tests: - LockStepReserveInsideLockHonouredTest - LockStepSetReservedByInsideLockHonouredTest - LockStepTest#reserveInsideLockHonoured - LockStepTest#setReservedByInsideLockHonoured
1 parent a6b5b5c commit dea0e0a

5 files changed

Lines changed: 188 additions & 351 deletions

File tree

src/main/java/org/jenkins/plugins/lockableresources/LockableResourcesManager.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,11 @@ public void reset(List<LockableResource> resources) {
11741174
uncacheIfFreeing(r, true, true);
11751175
r.reset();
11761176
}
1177+
1178+
while (proceedNextContext()) {
1179+
// process as many contexts as possible
1180+
}
1181+
11771182
save();
11781183
}
11791184
scheduleQueueMaintenance();

src/test/java/org/jenkins/plugins/lockableresources/LockStepReserveInsideLockHonouredTest.java

Lines changed: 71 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
55
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
66
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
7+
import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep;
78
import org.junit.jupiter.api.Test;
89
import org.jvnet.hudson.test.JenkinsRule;
910
import org.jvnet.hudson.test.junit.jupiter.WithJenkins;
@@ -39,7 +40,6 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
3940
+ " def res = "
4041
+ lmget
4142
+ ".reserve([lr], 'test2a')\n"
42-
// + " semaphore 'wait-inside'\n"
4343
+ " echo \"Locked resource cause 1-2a: ${lr.getLockCause()}\"\n"
4444
+ " echo \"Locked resource reservedBy 1-2a: ${lr.getReservedBy()}\"\n"
4545
+ " if (!res) {\n"
@@ -49,34 +49,27 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
4949
+ " echo \"Locked resource reservedBy 1-2b: ${lr.getReservedBy()}\"\n"
5050
+ " }\n"
5151
+ " echo \"Unlocking parallel closure 1\"\n"
52+
+ " semaphore 'p1-inside'\n" // Java controls when p1 exits lock
5253
+ " }\n"
5354
+ " echo \"Locked resource cause 1-3 (after unlock): ${lr.getLockCause()}\"\n"
5455
+ " echo \"Locked resource reservedBy 1-3: ${lr.getReservedBy()}\"\n"
55-
+ " echo \"Ended locked parallel closure 1 with resource reserved, sleeping...\"\n"
56-
+ " sleep (5)\n"
56+
+ " echo \"Ended locked parallel closure 1 with resource reserved\"\n"
5757
+ " echo \"Locked resource cause 1-4: ${lr.getLockCause()}\"\n"
5858
+ " echo \"Locked resource reservedBy 1-4: ${lr.getReservedBy()}\"\n"
59-
+ " echo \"Resetting Locked resource via LRM and sleeping ...\"\n"
59+
+ " semaphore 'p1-before-reset'\n" // Java asserts p2 hasn't started before reset
60+
+ " echo \"Resetting Locked resource via LRM\"\n"
6061
+ " "
6162
+ lmget
6263
+ ".reset([lr])\n"
63-
+ " sleep (5)\n"
64-
+ " echo \"Un-reserving Locked resource via LRM and sleeping...\"\n"
64+
+ " echo \"Un-reserving Locked resource via LRM\"\n"
6565
+ " "
6666
+ lmget
6767
+ ".unreserve([lr])\n"
68-
+ " sleep (5)\n"
69-
// Note: the unlock attempt here might steal this resource
70-
// from another parallel stage, so we don't do it:
71-
// + " echo \"Un-locking Locked resource via LRM and sleeping...\"\n"
72-
// + " " + lmget + ".unlock([lr], null)\n"
73-
// + " sleep (5)\n"
68+
+ " semaphore 'p1-after-reset'\n" // Java ensures p2 started before p1 logs 1-5
7469
+ " echo \"Locked resource cause 1-5: ${lr.getLockCause()}\"\n"
7570
+ " echo \"Locked resource reservedBy 1-5: ${lr.getReservedBy()}\"\n"
76-
+ " sleep (5)\n"
7771
+ " if (lr.getLockCause() == null) {\n"
7872
+ " echo \"LRM seems stuck; trying to reserve/unreserve this resource by LRM methods\"\n"
79-
// + " lock(label: 'label1') { echo \"Secondary lock trick\" }\n"
8073
+ " if ("
8174
+ lmget
8275
+ ".reserve([lr], 'unstucker')) {\n"
@@ -86,76 +79,71 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
8679
+ ".unreserve([lr])\n"
8780
+ " } else { echo \"Could not reserve by LRM methods as 'unstucker'\" }\n"
8881
+ " }\n"
89-
+ " sleep (5)\n"
9082
+ " echo \"Locked resource cause 1-6: ${lr.getLockCause()}\"\n"
9183
+ " echo \"Locked resource reservedBy 1-6: ${lr.getReservedBy()}\"\n"
9284
+ "},\n"
9385
+ "p2: {\n"
94-
// + " semaphore 'wait-outside'\n"
9586
+ " org.jenkins.plugins.lockableresources.LockableResource lr = null\n"
9687
+ " echo \"Locked resource cause 2-1: not locked yet\"\n"
9788
+ " lock(label: 'label1', variable: 'someVar2') {\n"
9889
+ " echo \"VAR2 IS $env.someVar2\"\n"
9990
+ " lr = "
10091
+ lmget
10192
+ ".fromName(env.someVar2)\n"
102-
+ " sleep (1)\n"
10393
+ " echo \"Locked resource cause 2-2: ${lr.getLockCause()}\"\n"
10494
+ " echo \"Locked resource reservedBy 2-2: ${lr.getReservedBy()}\"\n"
10595
+ " echo \"Setting (directly) and dropping (via LRM) a reservation on locked resource\"\n"
10696
+ " lr.reserve('test2-1')\n"
107-
+ " sleep (3)\n"
10897
+ " "
10998
+ lmget
11099
+ ".unreserve([lr])\n"
111-
+ " echo \"Just sleeping...\"\n"
112-
+ " sleep (20)\n"
100+
+ " semaphore 'p2-holding'\n" // Java controls when p2 sets test2-2 and exits
113101
+ " echo \"Setting (directly) a reservation on locked resource\"\n"
114102
+ " lr.reserve('test2-2')\n"
115103
+ " echo \"Unlocking parallel closure 2\"\n"
116104
+ " }\n"
117105
+ " echo \"Locked resource cause 2-3: ${lr.getLockCause()}\"\n"
118106
+ " echo \"Locked resource reservedBy 2-3: ${lr.getReservedBy()}\"\n"
119-
+ " sleep (5)\n"
120107
+ " echo \"Recycling (via LRM) the reserved not-locked resource\"\n"
108+
+ " semaphore 'p2-before-recycle'\n" // Java asserts p3 hasn't got the lock before recycle
121109
+ " "
122110
+ lmget
123111
+ ".recycle([lr])\n"
124-
+ " sleep (5)\n"
112+
+ " semaphore 'p2-after-recycle'\n" // Java ensures p3 started before p2 logs 2-4
125113
+ " echo \"Locked resource cause 2-4: ${lr.getLockCause()}\"\n"
126114
+ " echo \"Locked resource reservedBy 2-4: ${lr.getReservedBy()}\"\n"
127115
+ "},\n"
128116
// Test that reserve/unreserve in p2 did not "allow" p3 to kidnap the lock:
129117
+ "p3: {\n"
130118
+ " org.jenkins.plugins.lockableresources.LockableResource lr = null\n"
131119
+ " echo \"Locked resource cause 3-1: not locked yet\"\n"
132-
+ " sleep 1\n"
120+
+ " semaphore 'p3-wait'\n" // Java controls when p3 enters the queue
133121
+ " lock(label: 'label1', variable: 'someVar3') {\n"
134122
+ " echo \"VAR3 IS $env.someVar3\"\n"
135123
+ " lr = "
136124
+ lmget
137125
+ ".fromName(env.someVar3)\n"
138126
+ " echo \"Locked resource cause 3-2: ${lr.getLockCause()}\"\n"
139127
+ " echo \"Locked resource reservedBy 3-2: ${lr.getReservedBy()}\"\n"
140-
+ " echo \"Just sleeping...\"\n"
141-
+ " sleep (10)\n"
142128
+ " echo \"Unlocking parallel closure 3\"\n"
129+
+ " semaphore 'p3-in-lock'\n" // Java controls when p3 exits lock
143130
+ " }\n"
144131
+ " echo \"Locked resource cause 3-3: ${lr.getLockCause()}\"\n"
145132
+ " echo \"Locked resource reservedBy 3-3: ${lr.getReservedBy()}\"\n"
146133
+ "},\n"
147134
// Add some pressure to try for race conditions:
148-
+ "p4: { sleep 2; lock(label: 'label1') { sleep 1 } },\n"
149-
+ "p5: { sleep 2; lock(label: 'label1') { sleep 3 } },\n"
150-
+ "p6: { sleep 2; lock(label: 'label1') { sleep 2 } },\n"
151-
+ "p7: { sleep 2; lock(label: 'label1') { sleep 1 } },\n"
152-
+ "p8: { sleep 2; lock(label: 'label1') { sleep 2 } },\n"
153-
+ "p9: { sleep 2; lock(label: 'label1') { sleep 1 } }\n"
135+
+ "p4: { semaphore 'p4'; lock(label: 'label1') { } },\n"
136+
+ "p5: { semaphore 'p5'; lock(label: 'label1') { } },\n"
137+
+ "p6: { semaphore 'p6'; lock(label: 'label1') { } },\n"
138+
+ "p7: { semaphore 'p7'; lock(label: 'label1') { } },\n"
139+
+ "p8: { semaphore 'p8'; lock(label: 'label1') { } },\n"
140+
+ "p9: { semaphore 'p9'; lock(label: 'label1') { } }\n"
154141
+ "\necho \"Survived the test\"\n"
155142
+ "}", // timeout wrapper
156143
false));
157144
WorkflowRun b1 = p.scheduleBuild2(0).waitForStart();
158145

146+
// --- Phase 1: p1 holds lock exclusively (p2,p3 blocked) ---
159147
j.waitForMessage("Locked resource cause 1-1", b1);
160148
j.assertLogNotContains("Locked resource cause 2-2", b1);
161149
j.assertLogNotContains("Locked resource cause 2-3", b1);
@@ -164,99 +152,86 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
164152
j.assertLogNotContains("Locked resource cause 2-2", b1);
165153
j.assertLogNotContains("Locked resource cause 2-3", b1);
166154

155+
// Let p1 exit lock closure
156+
SemaphoreStep.success("p1-inside/1", null);
157+
158+
// --- Phase 2: p1 released lock but resource is still reserved ---
167159
j.waitForMessage("Locked resource cause 1-3", b1);
168160
j.assertLogNotContains("Locked resource cause 2-2", b1);
169161
j.assertLogNotContains("Locked resource cause 2-3", b1);
170162

171-
// Bug #1 happens here (without further patch):
172-
// although resource is seen as reserved, it is
173-
// grabbed anyway by the other parallel thread
174-
// which is already waiting. Notably, log is like:
175-
// 62.908 [setReservedByInsideLockHonoured #1] Lock acquired on [Label: label1]
176-
// 62.909 [setReservedByInsideLockHonoured #1] Lock released on resource [Label: label1]
177-
// and the consistent ordering of acquired first,
178-
// released later is unsettling.
163+
// Bug #1: although resource is reserved, p2 must NOT grab it.
179164
j.waitForMessage("Locked resource cause 1-4", b1);
180-
// Note: stage in test has a sleep(1) to reduce chances that
181-
// this line is noticed in log although it is there AFTER 1-4:
182165
j.assertLogNotContains("Locked resource cause 2-2", b1);
183166
j.assertLogNotContains("Locked resource cause 2-3", b1);
184167
LOGGER.info("GOOD: Did not encounter Bug #1 " + "(parallel p2 gets the lock on a still-reserved resource)!");
185168

169+
// --- Phase 3: p1 calls reset() → p2 gets lock via proceedNextContext ---
170+
// Ensure p2 is in the waiting queue before p1 calls reset()
171+
j.waitForMessage("[Label: label1] is not free, waiting for execution ...", b1);
172+
173+
// p1 calls reset+unreserve, then blocks at semaphore p1-after-reset.
174+
// p2 gets the lock (dispatched by proceedNextContext), enters closure,
175+
// does reserve/unreserve cycle, then blocks at semaphore p2-holding.
176+
SemaphoreStep.success("p1-before-reset/1", null);
177+
SemaphoreStep.waitForStart("p2-holding/1", b1);
178+
179+
// Release p3 and pressure stages into the lock queue while p2 holds the lock
180+
SemaphoreStep.success("p3-wait/1", null);
181+
for (String s : new String[] {"p4", "p5", "p6", "p7", "p8", "p9"}) {
182+
SemaphoreStep.success(s + "/1", null);
183+
}
184+
185+
// Now let p1 continue to log 1-5 (p2 already holds the lock)
186+
SemaphoreStep.success("p1-after-reset/1", null);
187+
186188
j.waitForMessage("Locked resource cause 1-5", b1);
187-
// This line might not strictly be required,
188-
// but we are processing a parallel pipeline
189-
// and many seconds were spent sleeping, so:
190189
j.assertLogContains("Locked resource cause 2-1", b1);
191-
// Here the other parallel stage may have already started
192-
// (we try to recycle the resource between 1-4 and 1-5):
193-
// j.assertLogNotContains("Locked resource cause 2-2", b1);
194-
// j.assertLogNotContains("Locked resource cause 2-3", b1);
190+
j.assertLogContains("Locked resource cause 2-2", b1);
191+
LOGGER.info("GOOD: Parallel 2 started after Parallel 1 reset the resource!");
195192

196-
// Bug #2 happens here: even after the resource is known
197-
// to be un-reserved, resources already looping waiting
198-
// for it (after the fix for Bug #1) are not "notified".
199-
// Adding and removing the resource helps unblock this.
200-
boolean sawBug2a = false;
201-
try {
202-
j.waitForMessage("Locked resource cause 1-6", b1);
203-
j.assertLogContains("Locked resource cause 2-2", b1);
204-
} catch (java.lang.AssertionError t1) {
205-
sawBug2a = true;
206-
LOGGER.info("Bug #2a (Parallel 2 did not start after Parallel 1 finished "
207-
+ "and resource later released) currently tolerated");
208-
// LOGGER.info(t1.toString());
209-
// throw t1;
210-
}
211-
if (!sawBug2a) {
212-
LOGGER.info("GOOD: Did not encounter Bug #2a "
213-
+ "(Parallel 2 did not start after Parallel 1 finished "
214-
+ "and resource later released)!");
215-
}
193+
// p2 holds the lock, reservation cycle (test2-1 → unreserve) already done
194+
j.assertLogContains("Locked resource reservedBy 1-5: null", b1);
216195

217-
// If the bug is resolved, then by the time we get to 1-5
218-
// the resource should be taken by the other parallel stage
219-
// and so not locked by not-"null"; reservation should be away though
220-
// Note: Due to parallel execution timing, p2 may have already called
221-
// reserve('test2-1') before p1 logs point 1-5, so we tolerate both states
222-
boolean sawBug2b = false;
223-
try {
224-
j.assertLogContains("Locked resource reservedBy 1-5: null", b1);
225-
} catch (java.lang.AssertionError t) {
226-
// p2 set a reservation before p1 logged 1-5 - this is expected timing variation
227-
j.assertLogContains("Locked resource reservedBy 1-5: test2-1", b1);
228-
LOGGER.info("Timing variation: p2 reserved resource before p1 logged 1-5 state");
229-
}
196+
// The resource is locked by p2 at point 1-5, so "cause 1-5: null" and the
197+
// un-stucking workaround should never appear.
230198
for (String line : new String[] {
231199
"Locked resource cause 1-5: null", "LRM seems stuck; trying to reserve/unreserve", "Secondary lock trick"
232200
}) {
233-
try {
234-
j.assertLogNotContains(line, b1);
235-
} catch (java.lang.AssertionError t2) {
236-
sawBug2b = true;
237-
LOGGER.info("Bug #2b (LRM required un-stucking) currently tolerated: " + line);
238-
// LOGGER.info(t2.toString());
239-
// throw t2;
240-
}
241-
}
242-
if (!sawBug2b) {
243-
LOGGER.info("GOOD: Did not encounter Bug #2b " + "(LRM required un-stucking)!");
201+
j.assertLogNotContains(line, b1);
244202
}
203+
LOGGER.info("GOOD: Resource was properly locked at point 1-5 (no un-stucking needed)!");
204+
205+
j.waitForMessage("Locked resource cause 1-6", b1);
206+
j.assertLogContains("Locked resource cause 2-2", b1);
245207

246208
j.waitForMessage("Locked resource cause 2-2", b1);
247209
j.assertLogContains("Locked resource cause 1-5", b1);
248-
LOGGER.info("GOOD: lock#2 was taken after we un-reserved lock#1");
210+
LOGGER.info("GOOD: lock#2 was taken after p1 reset/released the resource");
249211

250-
j.waitForMessage("Unlocking parallel closure 2", b1);
212+
// --- Phase 4: p2 exits lock (still reserved as test2-2) → p3 must NOT get in ---
213+
SemaphoreStep.success("p2-holding/1", null);
214+
215+
// Wait for p2 to reach the semaphore before recycle — lock is released but reserved
216+
SemaphoreStep.waitForStart("p2-before-recycle/1", b1);
217+
j.assertLogContains("Unlocking parallel closure 2", b1);
251218
j.assertLogNotContains("Locked resource cause 3-2", b1);
252219
LOGGER.info("GOOD: lock#3 was NOT taken just after we un-locked closure 2 (keeping lock#2 reserved)");
253220

254-
// After 2-3 we lrm.recycle() the lock so it should
255-
// go to the next bidder
221+
// --- Phase 5: p2 recycles → p3 gets lock ---
222+
// p2 calls recycle(), p3 gets the lock, then p2 blocks at p2-after-recycle.
223+
// Wait for p3 to actually log 3-2 before letting p2 log 2-4.
224+
SemaphoreStep.success("p2-before-recycle/1", null);
225+
j.waitForMessage("Locked resource cause 3-2", b1);
226+
SemaphoreStep.success("p2-after-recycle/1", null);
227+
256228
j.waitForMessage("Locked resource cause 2-4", b1);
257229
j.assertLogContains("Locked resource cause 3-2", b1);
258230
LOGGER.info("GOOD: lock#3 was taken just after we recycled lock#2");
259231

232+
// Let p3 finish
233+
SemaphoreStep.success("p3-in-lock/1", null);
234+
260235
j.assertLogContains(", waiting for execution ...", b1);
261236

262237
j.assertBuildStatusSuccess(j.waitForCompletion(b1));

0 commit comments

Comments
 (0)