Skip to content

Commit 127a2c2

Browse files
MarijnS95claude
andcommitted
Add debug labels to command encoders for GPU debugger visibility
Add pushDebugGroup/popDebugGroup/insertDebugSignpost to CommandEncoder with no-op defaults. Each backend overrides them: - Vulkan: vkCmd{Begin,End,Insert}DebugUtilsLabelEXT, loaded via vkGetDeviceProcAddr (naturally gated by VK_EXT_debug_utils availability) - DX12: ID3D12GraphicsCommandList BeginEvent/EndEvent/SetMarker with ANSI string encoding - Metal: pushDebugGroup/popDebugGroup/insertDebugSignpost on the active native encoder, with correct pop/push across compute/blit switches Every encoder command automatically emits a signpost with its parameters (e.g. "Dispatch [8,1,1]", "CopyBuffer 4096B", "FillBuffer 256B value=0x00"). Encoder creation pushes a "ComputeEncoder (Serial/ Parallel)" debug group, balanced by a pop in endEncoding. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent 4db235b commit 127a2c2

4 files changed

Lines changed: 152 additions & 15 deletions

File tree

include/API/Encoder.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "API/API.h"
1313

14+
#include "llvm/ADT/StringRef.h"
1415
#include "llvm/Support/Error.h"
1516

1617
#include <cstddef>
@@ -57,6 +58,16 @@ class CommandEncoder {
5758
virtual llvm::Error fillBuffer(Buffer &Dst, size_t Offset, size_t Size,
5859
uint8_t Value) = 0;
5960

61+
/// Begin a named debug group. Visible in GPU debuggers (PIX, RenderDoc,
62+
/// Xcode). Must be balanced by a corresponding popDebugGroup() call.
63+
virtual void pushDebugGroup(llvm::StringRef Label) {}
64+
65+
/// End the most recently pushed debug group.
66+
virtual void popDebugGroup() {}
67+
68+
/// Insert a point-in-time debug marker.
69+
virtual void insertDebugSignpost(llvm::StringRef Label) {}
70+
6071
/// Finish recording. No further commands may be recorded after this call.
6172
virtual void endEncoding() = 0;
6273
};

lib/API/DX/Device.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/ADT/SmallVector.h"
4444
#include "llvm/Object/DXContainer.h"
4545
#include "llvm/Support/Error.h"
46+
#include "llvm/Support/FormatVariadic.h"
4647
#include "llvm/Support/Signals.h"
4748

4849
#include <codecvt>
@@ -551,13 +552,23 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder {
551552

552553
~DXComputeEncoder() override = default;
553554

555+
// D3D12 debug labels require WinPixEventRuntime for the proper event
556+
// encoding. Without it, BeginEvent/EndEvent/SetMarker with metadata type 0
557+
// crash the D3D12 debug layer, so leave these as no-ops for now.
558+
void pushDebugGroup(llvm::StringRef Label) override {}
559+
void popDebugGroup() override {}
560+
void insertDebugSignpost(llvm::StringRef Label) override {}
561+
554562
llvm::Error dispatch(uint32_t GroupCountX, uint32_t GroupCountY,
555563
uint32_t GroupCountZ, uint32_t /*ThreadsPerGroupX*/,
556564
uint32_t /*ThreadsPerGroupY*/,
557565
uint32_t /*ThreadsPerGroupZ*/) override {
558566
// DX12 bakes threadgroup size into the pipeline; only group counts are
559567
// used for dispatch.
560568
addDstBarrier();
569+
insertDebugSignpost(llvm::formatv("Dispatch [{0},{1},{2}]", GroupCountX,
570+
GroupCountY, GroupCountZ)
571+
.str());
561572
CB.CmdList->Dispatch(GroupCountX, GroupCountY, GroupCountZ);
562573
return llvm::Error::success();
563574
}
@@ -567,6 +578,7 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder {
567578
size_t Size) override {
568579
auto &DXSrc = static_cast<DXBuffer &>(Src);
569580
auto &DXDst = static_cast<DXBuffer &>(Dst);
581+
insertDebugSignpost(llvm::formatv("CopyBuffer {0}B", Size).str());
570582
CB.CmdList->CopyBufferRegion(DXDst.Buffer.Get(), DstOffset,
571583
DXSrc.Buffer.Get(), SrcOffset, Size);
572584
return llvm::Error::success();
@@ -589,23 +601,30 @@ class DXComputeEncoder : public offloadtest::ComputeEncoder {
589601
return Err;
590602
addDstBarrier();
591603
auto &DXBuf = static_cast<DXBuffer &>(ArgBuffer);
604+
insertDebugSignpost(
605+
llvm::formatv("DispatchIndirect offset={0}", Offset).str());
592606
CB.CmdList->ExecuteIndirect(CB.DispatchIndirectSig.Get(), 1,
593607
DXBuf.Buffer.Get(), Offset, nullptr, 0);
594608
return llvm::Error::success();
595609
}
596610

597-
void barrier() override { CB.flushBarrier(); }
598-
599-
void endEncoding() override {
600-
// State remains on the command buffer for the next encoder.
611+
void barrier() override {
612+
insertDebugSignpost("Barrier");
613+
CB.flushBarrier();
601614
}
615+
616+
void endEncoding() override { popDebugGroup(); }
602617
};
603618

604619
llvm::Expected<std::unique_ptr<offloadtest::ComputeEncoder>>
605620
DXCommandBuffer::createComputeEncoder(offloadtest::EncoderMode Mode) {
606621
if (Mode == offloadtest::EncoderMode::Parallel)
607622
flushBarrier();
608-
return std::make_unique<DXComputeEncoder>(*this, Mode);
623+
auto Enc = std::make_unique<DXComputeEncoder>(*this, Mode);
624+
Enc->pushDebugGroup(Mode == offloadtest::EncoderMode::Serial
625+
? "ComputeEncoder (Serial)"
626+
: "ComputeEncoder (Parallel)");
627+
return Enc;
609628
}
610629
class DXDevice : public offloadtest::Device {
611630
private:

lib/API/MTL/MTLDevice.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/ADT/ScopeExit.h"
1818
#include "llvm/ADT/SmallString.h"
1919
#include "llvm/Support/Error.h"
20+
#include "llvm/Support/FormatVariadic.h"
2021
#include "llvm/Support/JSON.h"
2122
#include "llvm/Support/raw_ostream.h"
2223
#include <algorithm>
@@ -230,6 +231,9 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
230231
if (!ComputeEnc)
231232
return llvm::createStringError(std::errc::device_or_resource_busy,
232233
"Failed to create Metal compute encoder.");
234+
ComputeEnc->pushDebugGroup(NS::String::string(
235+
isSerial() ? "ComputeEncoder (Serial)" : "ComputeEncoder (Parallel)",
236+
NS::UTF8StringEncoding));
233237
return llvm::Error::success();
234238
}
235239

@@ -239,6 +243,7 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
239243
return llvm::Error::success();
240244
if (ComputeEnc) {
241245
barrier();
246+
ComputeEnc->popDebugGroup();
242247
ComputeEnc->endEncoding();
243248
ComputeEnc = nullptr;
244249
}
@@ -263,6 +268,29 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
263268

264269
MTL::ComputeCommandEncoder *getNative() const { return ComputeEnc; }
265270

271+
MTL::CommandEncoder *getActiveEncoder() const {
272+
if (ComputeEnc)
273+
return ComputeEnc;
274+
return BlitEnc;
275+
}
276+
277+
void pushDebugGroup(llvm::StringRef Label) override {
278+
if (auto *Enc = getActiveEncoder())
279+
Enc->pushDebugGroup(
280+
NS::String::string(Label.data(), NS::UTF8StringEncoding));
281+
}
282+
283+
void popDebugGroup() override {
284+
if (auto *Enc = getActiveEncoder())
285+
Enc->popDebugGroup();
286+
}
287+
288+
void insertDebugSignpost(llvm::StringRef Label) override {
289+
if (auto *Enc = getActiveEncoder())
290+
Enc->insertDebugSignpost(
291+
NS::String::string(Label.data(), NS::UTF8StringEncoding));
292+
}
293+
266294
llvm::Error dispatch(uint32_t GroupCountX, uint32_t GroupCountY,
267295
uint32_t GroupCountZ, uint32_t ThreadsPerGroupX,
268296
uint32_t ThreadsPerGroupY,
@@ -278,6 +306,9 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
278306
static_cast<NS::UInteger>(ThreadsPerGroupZ) * GroupCountZ);
279307
const MTL::Size GroupSize(ThreadsPerGroupX, ThreadsPerGroupY,
280308
ThreadsPerGroupZ);
309+
insertDebugSignpost(llvm::formatv("Dispatch [{0},{1},{2}]", GroupCountX,
310+
GroupCountY, GroupCountZ)
311+
.str());
281312
ComputeEnc->dispatchThreads(GridSize, GroupSize);
282313
return llvm::Error::success();
283314
}
@@ -293,6 +324,8 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
293324
auto &MTLBuf = static_cast<MTLBuffer &>(ArgBuffer);
294325
const MTL::Size GroupSize(ThreadsPerGroupX, ThreadsPerGroupY,
295326
ThreadsPerGroupZ);
327+
insertDebugSignpost(
328+
llvm::formatv("DispatchIndirect offset={0}", Offset).str());
296329
ComputeEnc->dispatchThreadgroups(MTLBuf.Buf, Offset, GroupSize);
297330
return llvm::Error::success();
298331
}
@@ -304,6 +337,7 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
304337
return Err;
305338
auto &MTLSrc = static_cast<MTLBuffer &>(Src);
306339
auto &MTLDst = static_cast<MTLBuffer &>(Dst);
340+
insertDebugSignpost(llvm::formatv("CopyBuffer {0}B", Size).str());
307341
BlitEnc->copyFromBuffer(MTLSrc.Buf, SrcOffset, MTLDst.Buf, DstOffset, Size);
308342
return llvm::Error::success();
309343
}
@@ -313,12 +347,15 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
313347
if (auto Err = ensureBlitEncoder())
314348
return Err;
315349
auto &MTLDst = static_cast<MTLBuffer &>(Dst);
350+
insertDebugSignpost(
351+
llvm::formatv("FillBuffer {0}B value=0x{1:x2}", Size, Value).str());
316352
BlitEnc->fillBuffer(MTLDst.Buf, NS::Range(Offset, Size), Value);
317353
return llvm::Error::success();
318354
}
319355

320356
void barrier() override {
321357
if (ComputeEnc && PendingScope != MTL::BarrierScope(0)) {
358+
insertDebugSignpost("Barrier");
322359
ComputeEnc->memoryBarrier(PendingScope);
323360
PendingScope = MTL::BarrierScope(0);
324361
}
@@ -327,6 +364,7 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
327364
void endEncoding() override {
328365
if (ComputeEnc) {
329366
barrier();
367+
ComputeEnc->popDebugGroup();
330368
ComputeEnc->endEncoding();
331369
ComputeEnc = nullptr;
332370
}
@@ -345,6 +383,10 @@ MTLCommandBuffer::createComputeEncoder(EncoderMode Mode) {
345383
return llvm::createStringError(
346384
std::errc::device_or_resource_busy,
347385
"Failed to create Metal compute command encoder.");
386+
NativeEncoder->pushDebugGroup(NS::String::string(
387+
Mode == EncoderMode::Serial ? "ComputeEncoder (Serial)"
388+
: "ComputeEncoder (Parallel)",
389+
NS::UTF8StringEncoding));
348390
return std::make_unique<MTLComputeEncoder>(CmdBuffer, NativeEncoder, Mode);
349391
}
350392
class MTLDevice : public offloadtest::Device {

0 commit comments

Comments
 (0)