44import org .jenkinsci .plugins .workflow .cps .CpsFlowDefinition ;
55import org .jenkinsci .plugins .workflow .job .WorkflowJob ;
66import org .jenkinsci .plugins .workflow .job .WorkflowRun ;
7+ import org .jenkinsci .plugins .workflow .test .steps .SemaphoreStep ;
78import org .junit .jupiter .api .Test ;
89import org .jvnet .hudson .test .JenkinsRule ;
910import org .jvnet .hudson .test .junit .jupiter .WithJenkins ;
@@ -39,7 +40,6 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
3940 + " def res = "
4041 + lmget
4142 + ".reserve([lr], 'test2a')\n "
42- // + " semaphore 'wait-inside'\n"
4343 + " echo \" Locked resource cause 1-2a: ${lr.getLockCause()}\" \n "
4444 + " echo \" Locked resource reservedBy 1-2a: ${lr.getReservedBy()}\" \n "
4545 + " if (!res) {\n "
@@ -49,34 +49,27 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
4949 + " echo \" Locked resource reservedBy 1-2b: ${lr.getReservedBy()}\" \n "
5050 + " }\n "
5151 + " echo \" Unlocking parallel closure 1\" \n "
52+ + " semaphore 'p1-inside'\n " // Java controls when p1 exits lock
5253 + " }\n "
5354 + " echo \" Locked resource cause 1-3 (after unlock): ${lr.getLockCause()}\" \n "
5455 + " echo \" Locked resource reservedBy 1-3: ${lr.getReservedBy()}\" \n "
55- + " echo \" Ended locked parallel closure 1 with resource reserved, sleeping...\" \n "
56- + " sleep (5)\n "
56+ + " echo \" Ended locked parallel closure 1 with resource reserved\" \n "
5757 + " echo \" Locked resource cause 1-4: ${lr.getLockCause()}\" \n "
5858 + " echo \" Locked resource reservedBy 1-4: ${lr.getReservedBy()}\" \n "
59- + " echo \" Resetting Locked resource via LRM and sleeping ...\" \n "
59+ + " semaphore 'p1-before-reset'\n " // Java asserts p2 hasn't started before reset
60+ + " echo \" Resetting Locked resource via LRM\" \n "
6061 + " "
6162 + lmget
6263 + ".reset([lr])\n "
63- + " sleep (5)\n "
64- + " echo \" Un-reserving Locked resource via LRM and sleeping...\" \n "
64+ + " echo \" Un-reserving Locked resource via LRM\" \n "
6565 + " "
6666 + lmget
6767 + ".unreserve([lr])\n "
68- + " sleep (5)\n "
69- // Note: the unlock attempt here might steal this resource
70- // from another parallel stage, so we don't do it:
71- // + " echo \"Un-locking Locked resource via LRM and sleeping...\"\n"
72- // + " " + lmget + ".unlock([lr], null)\n"
73- // + " sleep (5)\n"
68+ + " semaphore 'p1-after-reset'\n " // Java ensures p2 started before p1 logs 1-5
7469 + " echo \" Locked resource cause 1-5: ${lr.getLockCause()}\" \n "
7570 + " echo \" Locked resource reservedBy 1-5: ${lr.getReservedBy()}\" \n "
76- + " sleep (5)\n "
7771 + " if (lr.getLockCause() == null) {\n "
7872 + " echo \" LRM seems stuck; trying to reserve/unreserve this resource by LRM methods\" \n "
79- // + " lock(label: 'label1') { echo \"Secondary lock trick\" }\n"
8073 + " if ("
8174 + lmget
8275 + ".reserve([lr], 'unstucker')) {\n "
@@ -86,76 +79,71 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
8679 + ".unreserve([lr])\n "
8780 + " } else { echo \" Could not reserve by LRM methods as 'unstucker'\" }\n "
8881 + " }\n "
89- + " sleep (5)\n "
9082 + " echo \" Locked resource cause 1-6: ${lr.getLockCause()}\" \n "
9183 + " echo \" Locked resource reservedBy 1-6: ${lr.getReservedBy()}\" \n "
9284 + "},\n "
9385 + "p2: {\n "
94- // + " semaphore 'wait-outside'\n"
9586 + " org.jenkins.plugins.lockableresources.LockableResource lr = null\n "
9687 + " echo \" Locked resource cause 2-1: not locked yet\" \n "
9788 + " lock(label: 'label1', variable: 'someVar2') {\n "
9889 + " echo \" VAR2 IS $env.someVar2\" \n "
9990 + " lr = "
10091 + lmget
10192 + ".fromName(env.someVar2)\n "
102- + " sleep (1)\n "
10393 + " echo \" Locked resource cause 2-2: ${lr.getLockCause()}\" \n "
10494 + " echo \" Locked resource reservedBy 2-2: ${lr.getReservedBy()}\" \n "
10595 + " echo \" Setting (directly) and dropping (via LRM) a reservation on locked resource\" \n "
10696 + " lr.reserve('test2-1')\n "
107- + " sleep (3)\n "
10897 + " "
10998 + lmget
11099 + ".unreserve([lr])\n "
111- + " echo \" Just sleeping...\" \n "
112- + " sleep (20)\n "
100+ + " semaphore 'p2-holding'\n " // Java controls when p2 sets test2-2 and exits
113101 + " echo \" Setting (directly) a reservation on locked resource\" \n "
114102 + " lr.reserve('test2-2')\n "
115103 + " echo \" Unlocking parallel closure 2\" \n "
116104 + " }\n "
117105 + " echo \" Locked resource cause 2-3: ${lr.getLockCause()}\" \n "
118106 + " echo \" Locked resource reservedBy 2-3: ${lr.getReservedBy()}\" \n "
119- + " sleep (5)\n "
120107 + " echo \" Recycling (via LRM) the reserved not-locked resource\" \n "
108+ + " semaphore 'p2-before-recycle'\n " // Java asserts p3 hasn't got the lock before recycle
121109 + " "
122110 + lmget
123111 + ".recycle([lr])\n "
124- + " sleep (5) \n "
112+ + " semaphore 'p2-after-recycle' \n " // Java ensures p3 started before p2 logs 2-4
125113 + " echo \" Locked resource cause 2-4: ${lr.getLockCause()}\" \n "
126114 + " echo \" Locked resource reservedBy 2-4: ${lr.getReservedBy()}\" \n "
127115 + "},\n "
128116 // Test that reserve/unreserve in p2 did not "allow" p3 to kidnap the lock:
129117 + "p3: {\n "
130118 + " org.jenkins.plugins.lockableresources.LockableResource lr = null\n "
131119 + " echo \" Locked resource cause 3-1: not locked yet\" \n "
132- + " sleep 1 \n "
120+ + " semaphore 'p3-wait' \n " // Java controls when p3 enters the queue
133121 + " lock(label: 'label1', variable: 'someVar3') {\n "
134122 + " echo \" VAR3 IS $env.someVar3\" \n "
135123 + " lr = "
136124 + lmget
137125 + ".fromName(env.someVar3)\n "
138126 + " echo \" Locked resource cause 3-2: ${lr.getLockCause()}\" \n "
139127 + " echo \" Locked resource reservedBy 3-2: ${lr.getReservedBy()}\" \n "
140- + " echo \" Just sleeping...\" \n "
141- + " sleep (10)\n "
142128 + " echo \" Unlocking parallel closure 3\" \n "
129+ + " semaphore 'p3-in-lock'\n " // Java controls when p3 exits lock
143130 + " }\n "
144131 + " echo \" Locked resource cause 3-3: ${lr.getLockCause()}\" \n "
145132 + " echo \" Locked resource reservedBy 3-3: ${lr.getReservedBy()}\" \n "
146133 + "},\n "
147134 // Add some pressure to try for race conditions:
148- + "p4: { sleep 2 ; lock(label: 'label1') { sleep 1 } },\n "
149- + "p5: { sleep 2 ; lock(label: 'label1') { sleep 3 } },\n "
150- + "p6: { sleep 2 ; lock(label: 'label1') { sleep 2 } },\n "
151- + "p7: { sleep 2 ; lock(label: 'label1') { sleep 1 } },\n "
152- + "p8: { sleep 2 ; lock(label: 'label1') { sleep 2 } },\n "
153- + "p9: { sleep 2 ; lock(label: 'label1') { sleep 1 } }\n "
135+ + "p4: { semaphore 'p4' ; lock(label: 'label1') { } },\n "
136+ + "p5: { semaphore 'p5' ; lock(label: 'label1') { } },\n "
137+ + "p6: { semaphore 'p6' ; lock(label: 'label1') { } },\n "
138+ + "p7: { semaphore 'p7' ; lock(label: 'label1') { } },\n "
139+ + "p8: { semaphore 'p8' ; lock(label: 'label1') { } },\n "
140+ + "p9: { semaphore 'p9' ; lock(label: 'label1') { } }\n "
154141 + "\n echo \" Survived the test\" \n "
155142 + "}" , // timeout wrapper
156143 false ));
157144 WorkflowRun b1 = p .scheduleBuild2 (0 ).waitForStart ();
158145
146+ // --- Phase 1: p1 holds lock exclusively (p2,p3 blocked) ---
159147 j .waitForMessage ("Locked resource cause 1-1" , b1 );
160148 j .assertLogNotContains ("Locked resource cause 2-2" , b1 );
161149 j .assertLogNotContains ("Locked resource cause 2-3" , b1 );
@@ -164,99 +152,86 @@ void reserveInsideLockHonoured(JenkinsRule j) throws Exception {
164152 j .assertLogNotContains ("Locked resource cause 2-2" , b1 );
165153 j .assertLogNotContains ("Locked resource cause 2-3" , b1 );
166154
155+ // Let p1 exit lock closure
156+ SemaphoreStep .success ("p1-inside/1" , null );
157+
158+ // --- Phase 2: p1 released lock but resource is still reserved ---
167159 j .waitForMessage ("Locked resource cause 1-3" , b1 );
168160 j .assertLogNotContains ("Locked resource cause 2-2" , b1 );
169161 j .assertLogNotContains ("Locked resource cause 2-3" , b1 );
170162
171- // Bug #1 happens here (without further patch):
172- // although resource is seen as reserved, it is
173- // grabbed anyway by the other parallel thread
174- // which is already waiting. Notably, log is like:
175- // 62.908 [setReservedByInsideLockHonoured #1] Lock acquired on [Label: label1]
176- // 62.909 [setReservedByInsideLockHonoured #1] Lock released on resource [Label: label1]
177- // and the consistent ordering of acquired first,
178- // released later is unsettling.
163+ // Bug #1: although resource is reserved, p2 must NOT grab it.
179164 j .waitForMessage ("Locked resource cause 1-4" , b1 );
180- // Note: stage in test has a sleep(1) to reduce chances that
181- // this line is noticed in log although it is there AFTER 1-4:
182165 j .assertLogNotContains ("Locked resource cause 2-2" , b1 );
183166 j .assertLogNotContains ("Locked resource cause 2-3" , b1 );
184167 LOGGER .info ("GOOD: Did not encounter Bug #1 " + "(parallel p2 gets the lock on a still-reserved resource)!" );
185168
169+ // --- Phase 3: p1 calls reset() → p2 gets lock via proceedNextContext ---
170+ // Ensure p2 is in the waiting queue before p1 calls reset()
171+ j .waitForMessage ("[Label: label1] is not free, waiting for execution ..." , b1 );
172+
173+ // p1 calls reset+unreserve, then blocks at semaphore p1-after-reset.
174+ // p2 gets the lock (dispatched by proceedNextContext), enters closure,
175+ // does reserve/unreserve cycle, then blocks at semaphore p2-holding.
176+ SemaphoreStep .success ("p1-before-reset/1" , null );
177+ SemaphoreStep .waitForStart ("p2-holding/1" , b1 );
178+
179+ // Release p3 and pressure stages into the lock queue while p2 holds the lock
180+ SemaphoreStep .success ("p3-wait/1" , null );
181+ for (String s : new String [] {"p4" , "p5" , "p6" , "p7" , "p8" , "p9" }) {
182+ SemaphoreStep .success (s + "/1" , null );
183+ }
184+
185+ // Now let p1 continue to log 1-5 (p2 already holds the lock)
186+ SemaphoreStep .success ("p1-after-reset/1" , null );
187+
186188 j .waitForMessage ("Locked resource cause 1-5" , b1 );
187- // This line might not strictly be required,
188- // but we are processing a parallel pipeline
189- // and many seconds were spent sleeping, so:
190189 j .assertLogContains ("Locked resource cause 2-1" , b1 );
191- // Here the other parallel stage may have already started
192- // (we try to recycle the resource between 1-4 and 1-5):
193- // j.assertLogNotContains("Locked resource cause 2-2", b1);
194- // j.assertLogNotContains("Locked resource cause 2-3", b1);
190+ j .assertLogContains ("Locked resource cause 2-2" , b1 );
191+ LOGGER .info ("GOOD: Parallel 2 started after Parallel 1 reset the resource!" );
195192
196- // Bug #2 happens here: even after the resource is known
197- // to be un-reserved, resources already looping waiting
198- // for it (after the fix for Bug #1) are not "notified".
199- // Adding and removing the resource helps unblock this.
200- boolean sawBug2a = false ;
201- try {
202- j .waitForMessage ("Locked resource cause 1-6" , b1 );
203- j .assertLogContains ("Locked resource cause 2-2" , b1 );
204- } catch (java .lang .AssertionError t1 ) {
205- sawBug2a = true ;
206- LOGGER .info ("Bug #2a (Parallel 2 did not start after Parallel 1 finished "
207- + "and resource later released) currently tolerated" );
208- // LOGGER.info(t1.toString());
209- // throw t1;
210- }
211- if (!sawBug2a ) {
212- LOGGER .info ("GOOD: Did not encounter Bug #2a "
213- + "(Parallel 2 did not start after Parallel 1 finished "
214- + "and resource later released)!" );
215- }
193+ // p2 holds the lock, reservation cycle (test2-1 → unreserve) already done
194+ j .assertLogContains ("Locked resource reservedBy 1-5: null" , b1 );
216195
217- // If the bug is resolved, then by the time we get to 1-5
218- // the resource should be taken by the other parallel stage
219- // and so not locked by not-"null"; reservation should be away though
220- // Note: Due to parallel execution timing, p2 may have already called
221- // reserve('test2-1') before p1 logs point 1-5, so we tolerate both states
222- boolean sawBug2b = false ;
223- try {
224- j .assertLogContains ("Locked resource reservedBy 1-5: null" , b1 );
225- } catch (java .lang .AssertionError t ) {
226- // p2 set a reservation before p1 logged 1-5 - this is expected timing variation
227- j .assertLogContains ("Locked resource reservedBy 1-5: test2-1" , b1 );
228- LOGGER .info ("Timing variation: p2 reserved resource before p1 logged 1-5 state" );
229- }
196+ // The resource is locked by p2 at point 1-5, so "cause 1-5: null" and the
197+ // un-stucking workaround should never appear.
230198 for (String line : new String [] {
231199 "Locked resource cause 1-5: null" , "LRM seems stuck; trying to reserve/unreserve" , "Secondary lock trick"
232200 }) {
233- try {
234- j .assertLogNotContains (line , b1 );
235- } catch (java .lang .AssertionError t2 ) {
236- sawBug2b = true ;
237- LOGGER .info ("Bug #2b (LRM required un-stucking) currently tolerated: " + line );
238- // LOGGER.info(t2.toString());
239- // throw t2;
240- }
241- }
242- if (!sawBug2b ) {
243- LOGGER .info ("GOOD: Did not encounter Bug #2b " + "(LRM required un-stucking)!" );
201+ j .assertLogNotContains (line , b1 );
244202 }
203+ LOGGER .info ("GOOD: Resource was properly locked at point 1-5 (no un-stucking needed)!" );
204+
205+ j .waitForMessage ("Locked resource cause 1-6" , b1 );
206+ j .assertLogContains ("Locked resource cause 2-2" , b1 );
245207
246208 j .waitForMessage ("Locked resource cause 2-2" , b1 );
247209 j .assertLogContains ("Locked resource cause 1-5" , b1 );
248- LOGGER .info ("GOOD: lock#2 was taken after we un-reserved lock#1 " );
210+ LOGGER .info ("GOOD: lock#2 was taken after p1 reset/released the resource " );
249211
250- j .waitForMessage ("Unlocking parallel closure 2" , b1 );
212+ // --- Phase 4: p2 exits lock (still reserved as test2-2) → p3 must NOT get in ---
213+ SemaphoreStep .success ("p2-holding/1" , null );
214+
215+ // Wait for p2 to reach the semaphore before recycle — lock is released but reserved
216+ SemaphoreStep .waitForStart ("p2-before-recycle/1" , b1 );
217+ j .assertLogContains ("Unlocking parallel closure 2" , b1 );
251218 j .assertLogNotContains ("Locked resource cause 3-2" , b1 );
252219 LOGGER .info ("GOOD: lock#3 was NOT taken just after we un-locked closure 2 (keeping lock#2 reserved)" );
253220
254- // After 2-3 we lrm.recycle() the lock so it should
255- // go to the next bidder
221+ // --- Phase 5: p2 recycles → p3 gets lock ---
222+ // p2 calls recycle(), p3 gets the lock, then p2 blocks at p2-after-recycle.
223+ // Wait for p3 to actually log 3-2 before letting p2 log 2-4.
224+ SemaphoreStep .success ("p2-before-recycle/1" , null );
225+ j .waitForMessage ("Locked resource cause 3-2" , b1 );
226+ SemaphoreStep .success ("p2-after-recycle/1" , null );
227+
256228 j .waitForMessage ("Locked resource cause 2-4" , b1 );
257229 j .assertLogContains ("Locked resource cause 3-2" , b1 );
258230 LOGGER .info ("GOOD: lock#3 was taken just after we recycled lock#2" );
259231
232+ // Let p3 finish
233+ SemaphoreStep .success ("p3-in-lock/1" , null );
234+
260235 j .assertLogContains (", waiting for execution ..." , b1 );
261236
262237 j .assertBuildStatusSuccess (j .waitForCompletion (b1 ));
0 commit comments