Skip to content

Commit 014151a

Browse files
MarijnS95claude
andcommitted
Add debug labels to command encoders for GPU debugger visibility
Add pushDebugGroup/popDebugGroup/insertDebugSignpost to CommandEncoder with no-op defaults. Each backend overrides them: - Vulkan: vkCmd{Begin,End,Insert}DebugUtilsLabelEXT, loaded via vkGetDeviceProcAddr (naturally gated by VK_EXT_debug_utils availability) - DX12: ID3D12GraphicsCommandList BeginEvent/EndEvent/SetMarker with ANSI string encoding - Metal: pushDebugGroup/popDebugGroup/insertDebugSignpost on the active native encoder, with correct pop/push across compute/blit switches Every encoder command automatically emits a signpost with its parameters (e.g. "Dispatch [8,1,1]", "CopyBuffer 4096B", "FillBuffer 256B value=0x00"). Encoder creation pushes a "ComputeEncoder (Serial/ Parallel)" debug group, balanced by a pop in endEncoding. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent a1d851f commit 014151a

4 files changed

Lines changed: 134 additions & 9 deletions

File tree

include/API/Encoder.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "API/API.h"
1313

14+
#include "llvm/ADT/StringRef.h"
1415
#include "llvm/Support/Error.h"
1516

1617
#include <cstddef>
@@ -56,6 +57,16 @@ class CommandEncoder {
5657
virtual llvm::Error fillBuffer(Buffer &Dst, size_t Offset, size_t Size,
5758
uint8_t Value) = 0;
5859

60+
/// Begin a named debug group. Visible in GPU debuggers (PIX, RenderDoc,
61+
/// Xcode). Must be balanced by a corresponding popDebugGroup() call.
62+
virtual void pushDebugGroup(llvm::StringRef Label) {}
63+
64+
/// End the most recently pushed debug group.
65+
virtual void popDebugGroup() {}
66+
67+
/// Insert a point-in-time debug marker.
68+
virtual void insertDebugSignpost(llvm::StringRef Label) {}
69+
5970
/// Finish recording. No further commands may be recorded after this call.
6071
virtual void endEncoding() = 0;
6172
};

lib/API/DX/Device.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/ADT/SmallVector.h"
4444
#include "llvm/Object/DXContainer.h"
4545
#include "llvm/Support/Error.h"
46+
#include "llvm/Support/FormatVariadic.h"
4647
#include "llvm/Support/Signals.h"
4748

4849
#include <codecvt>
@@ -552,13 +553,26 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder {
552553

553554
~DXComputeEncoder() override = default;
554555

556+
void pushDebugGroup(llvm::StringRef Label) override {
557+
CB.CmdList->BeginEvent(0, Label.data(), Label.size() + 1);
558+
}
559+
560+
void popDebugGroup() override { CB.CmdList->EndEvent(); }
561+
562+
void insertDebugSignpost(llvm::StringRef Label) override {
563+
CB.CmdList->SetMarker(0, Label.data(), Label.size() + 1);
564+
}
565+
555566
llvm::Error dispatch(uint32_t GroupCountX, uint32_t GroupCountY,
556567
uint32_t GroupCountZ, uint32_t /*ThreadsPerGroupX*/,
557568
uint32_t /*ThreadsPerGroupY*/,
558569
uint32_t /*ThreadsPerGroupZ*/) override {
559570
// DX12 bakes threadgroup size into the pipeline; only group counts are
560571
// used for dispatch.
561572
addDstBarrier();
573+
insertDebugSignpost(llvm::formatv("Dispatch [{0},{1},{2}]", GroupCountX,
574+
GroupCountY, GroupCountZ)
575+
.str());
562576
CB.CmdList->Dispatch(GroupCountX, GroupCountY, GroupCountZ);
563577
return llvm::Error::success();
564578
}
@@ -568,6 +582,7 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder {
568582
size_t Size) override {
569583
auto &DXSrc = static_cast<DXBuffer &>(Src);
570584
auto &DXDst = static_cast<DXBuffer &>(Dst);
585+
insertDebugSignpost(llvm::formatv("CopyBuffer {0}B", Size).str());
571586
CB.CmdList->CopyBufferRegion(DXDst.Buffer.Get(), DstOffset,
572587
DXSrc.Buffer.Get(), SrcOffset, Size);
573588
return llvm::Error::success();
@@ -590,23 +605,30 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder {
590605
return Err;
591606
addDstBarrier();
592607
auto &DXBuf = static_cast<DXBuffer &>(ArgBuffer);
608+
insertDebugSignpost(
609+
llvm::formatv("DispatchIndirect offset={0}", Offset).str());
593610
CB.CmdList->ExecuteIndirect(CB.DispatchIndirectSig.Get(), 1,
594611
DXBuf.Buffer.Get(), Offset, nullptr, 0);
595612
return llvm::Error::success();
596613
}
597614

598-
void barrier() override { CB.flushBarrier(); }
599-
600-
void endEncoding() override {
601-
// State remains on the command buffer for the next encoder.
615+
void barrier() override {
616+
insertDebugSignpost("Barrier");
617+
CB.flushBarrier();
602618
}
619+
620+
void endEncoding() override { popDebugGroup(); }
603621
};
604622

605623
llvm::Expected<std::unique_ptr<offloadtest::ComputeEncoder>>
606624
DXCommandBuffer::createComputeEncoder(offloadtest::EncoderMode Mode) {
607625
if (Mode == offloadtest::EncoderMode::Parallel)
608626
flushBarrier();
609-
return std::make_unique<DXComputeEncoder>(*this, Mode);
627+
auto Enc = std::make_unique<DXComputeEncoder>(*this, Mode);
628+
Enc->pushDebugGroup(Mode == offloadtest::EncoderMode::Serial
629+
? "ComputeEncoder (Serial)"
630+
: "ComputeEncoder (Parallel)");
631+
return Enc;
610632
}
611633
class DXDevice : public offloadtest::Device {
612634
private:

lib/API/MTL/MTLDevice.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/ADT/ScopeExit.h"
1818
#include "llvm/ADT/SmallString.h"
1919
#include "llvm/Support/Error.h"
20+
#include "llvm/Support/FormatVariadic.h"
2021
#include "llvm/Support/JSON.h"
2122
#include "llvm/Support/raw_ostream.h"
2223
#include <algorithm>
@@ -230,6 +231,9 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
230231
if (!ComputeEnc)
231232
return llvm::createStringError(std::errc::device_or_resource_busy,
232233
"Failed to create Metal compute encoder.");
234+
ComputeEnc->pushDebugGroup(NS::String::string(
235+
isSerial() ? "ComputeEncoder (Serial)" : "ComputeEncoder (Parallel)",
236+
NS::UTF8StringEncoding));
233237
return llvm::Error::success();
234238
}
235239

@@ -239,6 +243,7 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
239243
return llvm::Error::success();
240244
if (ComputeEnc) {
241245
barrier();
246+
ComputeEnc->popDebugGroup();
242247
ComputeEnc->endEncoding();
243248
ComputeEnc = nullptr;
244249
}
@@ -263,6 +268,29 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
263268

264269
MTL::ComputeCommandEncoder *getNative() const { return ComputeEnc; }
265270

271+
MTL::CommandEncoder *getActiveEncoder() const {
272+
if (ComputeEnc)
273+
return ComputeEnc;
274+
return BlitEnc;
275+
}
276+
277+
void pushDebugGroup(llvm::StringRef Label) override {
278+
if (auto *Enc = getActiveEncoder())
279+
Enc->pushDebugGroup(
280+
NS::String::string(Label.data(), NS::UTF8StringEncoding));
281+
}
282+
283+
void popDebugGroup() override {
284+
if (auto *Enc = getActiveEncoder())
285+
Enc->popDebugGroup();
286+
}
287+
288+
void insertDebugSignpost(llvm::StringRef Label) override {
289+
if (auto *Enc = getActiveEncoder())
290+
Enc->insertDebugSignpost(
291+
NS::String::string(Label.data(), NS::UTF8StringEncoding));
292+
}
293+
266294
llvm::Error dispatch(uint32_t GroupCountX, uint32_t GroupCountY,
267295
uint32_t GroupCountZ, uint32_t ThreadsPerGroupX,
268296
uint32_t ThreadsPerGroupY,
@@ -278,6 +306,9 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
278306
static_cast<NS::UInteger>(ThreadsPerGroupZ) * GroupCountZ);
279307
const MTL::Size GroupSize(ThreadsPerGroupX, ThreadsPerGroupY,
280308
ThreadsPerGroupZ);
309+
insertDebugSignpost(llvm::formatv("Dispatch [{0},{1},{2}]", GroupCountX,
310+
GroupCountY, GroupCountZ)
311+
.str());
281312
ComputeEnc->dispatchThreads(GridSize, GroupSize);
282313
return llvm::Error::success();
283314
}
@@ -293,6 +324,8 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
293324
auto &MTLBuf = static_cast<MTLBuffer &>(ArgBuffer);
294325
const MTL::Size GroupSize(ThreadsPerGroupX, ThreadsPerGroupY,
295326
ThreadsPerGroupZ);
327+
insertDebugSignpost(
328+
llvm::formatv("DispatchIndirect offset={0}", Offset).str());
296329
ComputeEnc->dispatchThreadgroups(MTLBuf.Buf, Offset, GroupSize);
297330
return llvm::Error::success();
298331
}
@@ -304,6 +337,7 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
304337
return Err;
305338
auto &MTLSrc = static_cast<MTLBuffer &>(Src);
306339
auto &MTLDst = static_cast<MTLBuffer &>(Dst);
340+
insertDebugSignpost(llvm::formatv("CopyBuffer {0}B", Size).str());
307341
BlitEnc->copyFromBuffer(MTLSrc.Buf, SrcOffset, MTLDst.Buf, DstOffset, Size);
308342
return llvm::Error::success();
309343
}
@@ -313,12 +347,15 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
313347
if (auto Err = ensureBlitEncoder())
314348
return Err;
315349
auto &MTLDst = static_cast<MTLBuffer &>(Dst);
350+
insertDebugSignpost(
351+
llvm::formatv("FillBuffer {0}B value=0x{1:x2}", Size, Value).str());
316352
BlitEnc->fillBuffer(MTLDst.Buf, NS::Range(Offset, Size), Value);
317353
return llvm::Error::success();
318354
}
319355

320356
void barrier() override {
321357
if (ComputeEnc && PendingScope != MTL::BarrierScope(0)) {
358+
insertDebugSignpost("Barrier");
322359
ComputeEnc->memoryBarrier(PendingScope);
323360
PendingScope = MTL::BarrierScope(0);
324361
}
@@ -327,6 +364,7 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
327364
void endEncoding() override {
328365
if (ComputeEnc) {
329366
barrier();
367+
ComputeEnc->popDebugGroup();
330368
ComputeEnc->endEncoding();
331369
ComputeEnc = nullptr;
332370
}
@@ -345,6 +383,10 @@ MTLCommandBuffer::createComputeEncoder(EncoderMode Mode) {
345383
return llvm::createStringError(
346384
std::errc::device_or_resource_busy,
347385
"Failed to create Metal compute command encoder.");
386+
NativeEncoder->pushDebugGroup(NS::String::string(
387+
Mode == EncoderMode::Serial ? "ComputeEncoder (Serial)"
388+
: "ComputeEncoder (Parallel)",
389+
NS::UTF8StringEncoding));
348390
return std::make_unique<MTLComputeEncoder>(CmdBuffer, NativeEncoder, Mode);
349391
}
350392
class MTLDevice : public offloadtest::Device {

lib/API/VK/Device.cpp

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/DenseSet.h"
1717
#include "llvm/ADT/ScopeExit.h"
1818
#include "llvm/Support/Error.h"
19+
#include "llvm/Support/FormatVariadic.h"
1920

2021
#include <algorithm>
2122
#include <cmath>
@@ -505,6 +506,10 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer {
505506
VkCommandPool CmdPool = VK_NULL_HANDLE;
506507
VkCommandBuffer CmdBuffer = VK_NULL_HANDLE;
507508

509+
PFN_vkCmdBeginDebugUtilsLabelEXT CmdBeginDebugUtilsLabel = nullptr;
510+
PFN_vkCmdEndDebugUtilsLabelEXT CmdEndDebugUtilsLabel = nullptr;
511+
PFN_vkCmdInsertDebugUtilsLabelEXT CmdInsertDebugUtilsLabel = nullptr;
512+
508513
static llvm::Expected<std::unique_ptr<VulkanCommandBuffer>>
509514
create(VkDevice Device, uint32_t QueueFamilyIdx) {
510515
auto CB = std::unique_ptr<VulkanCommandBuffer>(new VulkanCommandBuffer());
@@ -532,6 +537,17 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer {
532537
if (vkBeginCommandBuffer(CB->CmdBuffer, &BufferInfo))
533538
return llvm::createStringError(std::errc::device_or_resource_busy,
534539
"Could not begin command buffer.");
540+
541+
CB->CmdBeginDebugUtilsLabel =
542+
(PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetDeviceProcAddr(
543+
Device, "vkCmdBeginDebugUtilsLabelEXT");
544+
CB->CmdEndDebugUtilsLabel =
545+
(PFN_vkCmdEndDebugUtilsLabelEXT)vkGetDeviceProcAddr(
546+
Device, "vkCmdEndDebugUtilsLabelEXT");
547+
CB->CmdInsertDebugUtilsLabel =
548+
(PFN_vkCmdInsertDebugUtilsLabelEXT)vkGetDeviceProcAddr(
549+
Device, "vkCmdInsertDebugUtilsLabelEXT");
550+
535551
return CB;
536552
}
537553

@@ -639,6 +655,29 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder {
639655

640656
~VKComputeEncoder() override = default;
641657

658+
void pushDebugGroup(llvm::StringRef Label) override {
659+
if (!CB.CmdBeginDebugUtilsLabel)
660+
return;
661+
VkDebugUtilsLabelEXT LabelInfo = {};
662+
LabelInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
663+
LabelInfo.pLabelName = Label.data();
664+
CB.CmdBeginDebugUtilsLabel(CB.CmdBuffer, &LabelInfo);
665+
}
666+
667+
void popDebugGroup() override {
668+
if (CB.CmdEndDebugUtilsLabel)
669+
CB.CmdEndDebugUtilsLabel(CB.CmdBuffer);
670+
}
671+
672+
void insertDebugSignpost(llvm::StringRef Label) override {
673+
if (!CB.CmdInsertDebugUtilsLabel)
674+
return;
675+
VkDebugUtilsLabelEXT LabelInfo = {};
676+
LabelInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
677+
LabelInfo.pLabelName = Label.data();
678+
CB.CmdInsertDebugUtilsLabel(CB.CmdBuffer, &LabelInfo);
679+
}
680+
642681
llvm::Error dispatch(uint32_t GroupCountX, uint32_t GroupCountY,
643682
uint32_t GroupCountZ, uint32_t /*ThreadsPerGroupX*/,
644683
uint32_t /*ThreadsPerGroupY*/,
@@ -648,6 +687,9 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder {
648687
addDstBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
649688
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT);
650689

690+
insertDebugSignpost(llvm::formatv("Dispatch [{0},{1},{2}]", GroupCountX,
691+
GroupCountY, GroupCountZ)
692+
.str());
651693
vkCmdDispatch(CB.CmdBuffer, GroupCountX, GroupCountY, GroupCountZ);
652694
return llvm::Error::success();
653695
}
@@ -663,6 +705,8 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder {
663705
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT |
664706
VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
665707
auto &VKBuf = static_cast<VulkanBuffer &>(ArgBuffer);
708+
insertDebugSignpost(
709+
llvm::formatv("DispatchIndirect offset={0}", Offset).str());
666710
vkCmdDispatchIndirect(CB.CmdBuffer, VKBuf.Buffer,
667711
static_cast<VkDeviceSize>(Offset));
668712
return llvm::Error::success();
@@ -677,6 +721,7 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder {
677721
Region.srcOffset = SrcOffset;
678722
Region.dstOffset = DstOffset;
679723
Region.size = Size;
724+
insertDebugSignpost(llvm::formatv("CopyBuffer {0}B", Size).str());
680725
vkCmdCopyBuffer(CB.CmdBuffer, VKSrc.Buffer, VKDst.Buffer, 1, &Region);
681726
return llvm::Error::success();
682727
}
@@ -688,18 +733,19 @@ class VKComputeEncoder : public offloadtest::ComputeEncoder {
688733
// vkCmdFillBuffer writes repeatedly.
689734
uint32_t Data = uint32_t(Value) * 0x01010101u;
690735
addDstBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
736+
insertDebugSignpost(
737+
llvm::formatv("FillBuffer {0}B value=0x{1:x2}", Size, Value).str());
691738
vkCmdFillBuffer(CB.CmdBuffer, VKDst.Buffer, Offset, Size, Data);
692739
return llvm::Error::success();
693740
}
694741

695742
void barrier() override {
743+
insertDebugSignpost("Barrier");
696744
CB.flushBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
697745
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT);
698746
}
699747

700-
void endEncoding() override {
701-
// State remains on the command buffer for the next encoder.
702-
}
748+
void endEncoding() override { popDebugGroup(); }
703749
};
704750

705751
llvm::Expected<std::unique_ptr<offloadtest::ComputeEncoder>>
@@ -709,7 +755,11 @@ VulkanCommandBuffer::createComputeEncoder(offloadtest::EncoderMode Mode) {
709755
flushBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
710756
VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT);
711757
}
712-
return std::make_unique<VKComputeEncoder>(*this, Mode);
758+
auto Enc = std::make_unique<VKComputeEncoder>(*this, Mode);
759+
Enc->pushDebugGroup(Mode == offloadtest::EncoderMode::Serial
760+
? "ComputeEncoder (Serial)"
761+
: "ComputeEncoder (Parallel)");
762+
return Enc;
713763
}
714764
class VulkanDevice : public offloadtest::Device {
715765
private:

0 commit comments

Comments
 (0)