Skip to content

Commit b77026f

Browse files
MarijnS95claude
andcommitted
Add Queue::submit() for command buffer submission
Move command buffer submission logic from each backend's Device into Queue::submit(), which takes ownership of the command buffers. For now it blocks internally until completion; a TODO marks that it will return a Fence once the Fence abstraction from PR #1007 is available. - Metal: commit() + waitUntilCompleted() - Vulkan: vkEndCommandBuffer() + vkQueueSubmit() with temporary fence + vkWaitForFences() - DX12: CmdList::Close() + ExecuteCommandLists() + Queue::Signal()/Fence::SetEventOnCompletion() wait VulkanQueue now stores a VkDevice handle (with a TODO for lifetime management) so it can create/destroy fences independently. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent 5a2952f commit b77026f

4 files changed

Lines changed: 142 additions & 57 deletions

File tree

include/API/Device.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,19 @@ class Queue {
6262
public:
6363
virtual ~Queue() = 0;
6464

65+
/// Submit command buffers for execution and block until completion.
66+
// TODO: Return a Fence instead of blocking, once the Fence abstraction
67+
// from PR #1007 is available.
68+
virtual llvm::Error
69+
submit(llvm::SmallVector<std::unique_ptr<CommandBuffer>> CBs) = 0;
70+
71+
/// Convenience overload for submitting a single command buffer.
72+
llvm::Error submit(std::unique_ptr<CommandBuffer> CB) {
73+
llvm::SmallVector<std::unique_ptr<CommandBuffer>> CBs;
74+
CBs.push_back(std::move(CB));
75+
return submit(std::move(CBs));
76+
}
77+
6578
protected:
6679
Queue() = default;
6780
};

lib/API/DX/Device.cpp

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,22 +291,28 @@ class DXBuffer : public offloadtest::Buffer {
291291

292292
class DXQueue : public offloadtest::Queue {
293293
public:
294+
using Queue::submit;
295+
294296
ComPtr<ID3D12CommandQueue> Queue;
295297

296298
DXQueue(ComPtr<ID3D12CommandQueue> Queue) : Queue(Queue) {}
297-
virtual ~DXQueue() {}
299+
~DXQueue() override {}
298300

299301
static llvm::Expected<DXQueue>
300302
createGraphicsQueue(ComPtr<ID3D12Device> Device) {
301303
const D3D12_COMMAND_QUEUE_DESC Desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, 0,
302304
D3D12_COMMAND_QUEUE_FLAG_NONE, 0};
303-
ComPtr<ID3D12CommandQueue> Queue;
304-
if (auto Err =
305-
HR::toError(Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&Queue)),
306-
"Failed to create command queue."))
305+
ComPtr<ID3D12CommandQueue> CmdQueue;
306+
if (auto Err = HR::toError(
307+
Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&CmdQueue)),
308+
"Failed to create command queue."))
307309
return Err;
308-
return DXQueue(Queue);
310+
return DXQueue(CmdQueue);
309311
}
312+
313+
llvm::Error
314+
submit(llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs)
315+
override;
310316
};
311317

312318
class DXCommandBuffer : public offloadtest::CommandBuffer {
@@ -369,6 +375,52 @@ class DXCommandBuffer : public offloadtest::CommandBuffer {
369375
DXCommandBuffer() : CommandBuffer(GPUAPI::DirectX) {}
370376
};
371377

378+
llvm::Error DXQueue::submit(
379+
llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs) {
380+
// This is a hack but it works since this is all single threaded code.
381+
static uint64_t FenceCounter = 0;
382+
383+
for (auto &CB : CBs) {
384+
auto &DCB = CB->as<DXCommandBuffer>();
385+
if (auto Err =
386+
HR::toError(DCB.CmdList->Close(), "Failed to close command list."))
387+
return Err;
388+
389+
ID3D12CommandList *const CmdLists[] = {DCB.CmdList.Get()};
390+
Queue->ExecuteCommandLists(1, CmdLists);
391+
392+
const uint64_t CurrentCounter = FenceCounter + 1;
393+
if (auto Err = HR::toError(Queue->Signal(DCB.Fence.Get(), CurrentCounter),
394+
"Failed to add signal."))
395+
return Err;
396+
397+
if (DCB.Fence->GetCompletedValue() < CurrentCounter) {
398+
#ifdef _WIN32
399+
HANDLE Event = DCB.Event;
400+
#else // WSL
401+
HANDLE Event = reinterpret_cast<HANDLE>(DCB.Event);
402+
#endif
403+
if (auto Err = HR::toError(
404+
DCB.Fence->SetEventOnCompletion(CurrentCounter, Event),
405+
"Failed to register end event."))
406+
return Err;
407+
408+
#ifdef _WIN32
409+
WaitForSingleObject(DCB.Event, INFINITE);
410+
#else // WSL
411+
pollfd PollEvent;
412+
PollEvent.fd = DCB.Event;
413+
PollEvent.events = POLLIN;
414+
PollEvent.revents = 0;
415+
if (poll(&PollEvent, 1, -1) == -1)
416+
return llvm::createStringError(
417+
std::error_code(errno, std::system_category()), strerror(errno));
418+
#endif
419+
}
420+
FenceCounter = CurrentCounter;
421+
}
422+
return llvm::Error::success();
423+
}
372424
class DXDevice : public offloadtest::Device {
373425
private:
374426
ComPtr<IDXCoreAdapter> Adapter;
@@ -1175,8 +1227,10 @@ class DXDevice : public offloadtest::Device {
11751227
IS.CB->CmdList->ResourceBarrier(1, &Barrier);
11761228
}
11771229

1230+
// waitForSignal is used for tile mapping synchronization, not command buffer
1231+
// submission. TODO: Replace with a proper fence abstraction.
11781232
llvm::Error waitForSignal(InvocationState &IS) {
1179-
// This is a hack but it works since this is all single threaded code.
1233+
// Reuse the command buffer's fence for a quick queue-level signal/wait.
11801234
static uint64_t FenceCounter = 0;
11811235
const uint64_t CurrentCounter = FenceCounter + 1;
11821236

@@ -1213,14 +1267,7 @@ class DXDevice : public offloadtest::Device {
12131267
}
12141268

12151269
llvm::Error executeCommandList(InvocationState &IS) {
1216-
if (auto Err = HR::toError(IS.CB->CmdList->Close(),
1217-
"Failed to close command list."))
1218-
return Err;
1219-
1220-
ID3D12CommandList *const CmdLists[] = {IS.CB->CmdList.Get()};
1221-
GraphicsQueue.Queue->ExecuteCommandLists(1, CmdLists);
1222-
1223-
return waitForSignal(IS);
1270+
return GraphicsQueue.submit(std::move(IS.CB));
12241271
}
12251272

12261273
llvm::Error createComputeCommands(Pipeline &P, InvocationState &IS) {

lib/API/MTL/MTLDevice.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,18 @@ static MTL::VertexFormat getMTLVertexFormat(DataFormat Format, int Channels) {
7575
namespace {
7676
class MTLQueue : public offloadtest::Queue {
7777
public:
78+
using Queue::submit;
79+
7880
MTL::CommandQueue *Queue;
7981
MTLQueue(MTL::CommandQueue *Queue) : Queue(Queue) {}
80-
~MTLQueue() {
82+
~MTLQueue() override {
8183
if (Queue)
8284
Queue->release();
8385
}
86+
87+
llvm::Error
88+
submit(llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs)
89+
override;
8490
};
8591

8692
class MTLBuffer : public offloadtest::Buffer {
@@ -122,6 +128,19 @@ class MTLCommandBuffer : public offloadtest::CommandBuffer {
122128
MTLCommandBuffer() : CommandBuffer(GPUAPI::Metal) {}
123129
};
124130

131+
llvm::Error MTLQueue::submit(
132+
llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs) {
133+
for (auto &CB : CBs) {
134+
auto &MCB = CB->as<MTLCommandBuffer>();
135+
MCB.CmdBuffer->commit();
136+
MCB.CmdBuffer->waitUntilCompleted();
137+
138+
NS::Error *Err = MCB.CmdBuffer->error();
139+
if (Err)
140+
return toError(Err);
141+
}
142+
return llvm::Error::success();
143+
}
125144
class MTLDevice : public offloadtest::Device {
126145
Capabilities Caps;
127146
MTL::Device *Device;
@@ -505,15 +524,7 @@ class MTLDevice : public offloadtest::Device {
505524
}
506525

507526
llvm::Error executeCommands(InvocationState &IS) {
508-
IS.CB->CmdBuffer->commit();
509-
IS.CB->CmdBuffer->waitUntilCompleted();
510-
511-
// Check and surface any errors that occurred during execution.
512-
NS::Error *CBErr = IS.CB->CmdBuffer->error();
513-
if (CBErr)
514-
return toError(CBErr);
515-
516-
return llvm::Error::success();
527+
return GraphicsQueue.submit(std::move(IS.CB));
517528
}
518529

519530
llvm::Error copyBack(Pipeline &P, InvocationState &IS) {

lib/API/VK/Device.cpp

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -358,10 +358,18 @@ class VulkanBuffer : public offloadtest::Buffer {
358358

359359
class VulkanQueue : public offloadtest::Queue {
360360
public:
361+
using Queue::submit;
362+
361363
VkQueue Queue = VK_NULL_HANDLE;
362364
uint32_t QueueFamilyIdx = 0;
363-
VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx)
364-
: Queue(Q), QueueFamilyIdx(QueueFamilyIdx) {}
365+
// TODO: Ensure device lifetime is managed (e.g. via shared_ptr).
366+
VkDevice Device = VK_NULL_HANDLE;
367+
VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx, VkDevice Device)
368+
: Queue(Q), QueueFamilyIdx(QueueFamilyIdx), Device(Device) {}
369+
370+
llvm::Error
371+
submit(llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs)
372+
override;
365373
};
366374

367375
class VulkanCommandBuffer : public offloadtest::CommandBuffer {
@@ -414,6 +422,37 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer {
414422
VulkanCommandBuffer() : CommandBuffer(GPUAPI::Vulkan) {}
415423
};
416424

425+
llvm::Error VulkanQueue::submit(
426+
llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs) {
427+
for (auto &CB : CBs) {
428+
auto &VCB = CB->as<VulkanCommandBuffer>();
429+
if (vkEndCommandBuffer(VCB.CmdBuffer))
430+
return llvm::createStringError(std::errc::device_or_resource_busy,
431+
"Could not end command buffer.");
432+
433+
VkSubmitInfo SubmitInfo = {};
434+
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
435+
SubmitInfo.commandBufferCount = 1;
436+
SubmitInfo.pCommandBuffers = &VCB.CmdBuffer;
437+
438+
VkFenceCreateInfo FenceInfo = {};
439+
FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
440+
VkFence Fence;
441+
if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence))
442+
return llvm::createStringError(std::errc::device_or_resource_busy,
443+
"Could not create fence.");
444+
445+
if (vkQueueSubmit(Queue, 1, &SubmitInfo, Fence))
446+
return llvm::createStringError(std::errc::device_or_resource_busy,
447+
"Failed to submit to queue.");
448+
if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX))
449+
return llvm::createStringError(std::errc::device_or_resource_busy,
450+
"Failed waiting for fence.");
451+
452+
vkDestroyFence(Device, Fence, nullptr);
453+
}
454+
return llvm::Error::success();
455+
}
417456
class VulkanDevice : public offloadtest::Device {
418457
private:
419458
VkPhysicalDevice PhysicalDevice;
@@ -608,7 +647,8 @@ class VulkanDevice : public offloadtest::Device {
608647
VkQueue DeviceQueue = VK_NULL_HANDLE;
609648
vkGetDeviceQueue(Device, QueueFamilyIdx, 0, &DeviceQueue);
610649

611-
const VulkanQueue GraphicsQueue = VulkanQueue(DeviceQueue, QueueFamilyIdx);
650+
const VulkanQueue GraphicsQueue =
651+
VulkanQueue(DeviceQueue, QueueFamilyIdx, Device);
612652

613653
return std::make_shared<VulkanDevice>(PhysicalDevice, Props, Device,
614654
std::move(GraphicsQueue),
@@ -1131,34 +1171,8 @@ class VulkanDevice : public offloadtest::Device {
11311171
return llvm::Error::success();
11321172
}
11331173

1134-
llvm::Error executeCommandBuffer(InvocationState &IS,
1135-
VkPipelineStageFlags WaitMask = 0) {
1136-
if (vkEndCommandBuffer(IS.CB->CmdBuffer))
1137-
return llvm::createStringError(std::errc::device_or_resource_busy,
1138-
"Could not end command buffer.");
1139-
1140-
VkSubmitInfo SubmitInfo = {};
1141-
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1142-
SubmitInfo.commandBufferCount = 1;
1143-
SubmitInfo.pCommandBuffers = &IS.CB->CmdBuffer;
1144-
SubmitInfo.pWaitDstStageMask = &WaitMask;
1145-
VkFenceCreateInfo FenceInfo = {};
1146-
FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
1147-
VkFence Fence;
1148-
if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence))
1149-
return llvm::createStringError(std::errc::device_or_resource_busy,
1150-
"Could not create fence.");
1151-
1152-
// Submit to the queue
1153-
if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, Fence))
1154-
return llvm::createStringError(std::errc::device_or_resource_busy,
1155-
"Failed to submit to queue.");
1156-
if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX))
1157-
return llvm::createStringError(std::errc::device_or_resource_busy,
1158-
"Failed waiting for fence.");
1159-
1160-
vkDestroyFence(Device, Fence, nullptr);
1161-
return llvm::Error::success();
1174+
llvm::Error executeCommandBuffer(InvocationState &IS) {
1175+
return GraphicsQueue.submit(std::move(IS.CB));
11621176
}
11631177

11641178
llvm::Error createDescriptorPool(Pipeline &P, InvocationState &IS) {
@@ -2333,7 +2347,7 @@ class VulkanDevice : public offloadtest::Device {
23332347
if (auto Err = createCommands(P, State))
23342348
return Err;
23352349
llvm::outs() << "Commands created.\n";
2336-
if (auto Err = executeCommandBuffer(State, VK_PIPELINE_STAGE_TRANSFER_BIT))
2350+
if (auto Err = executeCommandBuffer(State))
23372351
return Err;
23382352
llvm::outs() << "Executed compute command buffer.\n";
23392353
if (auto Err = readBackData(P, State))

0 commit comments

Comments
 (0)