Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions include/API/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,29 @@
#ifndef OFFLOADTEST_API_BUFFER_H
#define OFFLOADTEST_API_BUFFER_H

#include "API/API.h"
#include "API/Resources.h"

#include "llvm/Support/Casting.h"

namespace offloadtest {

struct BufferCreateDesc {
MemoryLocation Location;
};

class Buffer {
GPUAPI API;

public:
virtual ~Buffer();

Buffer(const Buffer &) = delete;
Buffer &operator=(const Buffer &) = delete;

GPUAPI getAPI() const { return API; }

protected:
Buffer() = default;
explicit Buffer(GPUAPI API) : API(API) {}
};

} // namespace offloadtest
Expand Down
8 changes: 4 additions & 4 deletions include/API/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ class Device {
virtual llvm::Expected<std::unique_ptr<Fence>>
createFence(llvm::StringRef Name) = 0;

virtual llvm::Expected<std::shared_ptr<Buffer>>
virtual llvm::Expected<std::unique_ptr<Buffer>>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just want to make a note that I still think we should go down the shared_ptr route. However that is currently being discussed in issue #1099.
I am okay with going unique_ptr for now because the keep alive system is not place yet. However, we are close to implementing and PRing that and will probably need to switch back to shared_ptr there.

createBuffer(std::string Name, BufferCreateDesc &Desc,
size_t SizeInBytes) = 0;

virtual llvm::Expected<std::shared_ptr<Texture>>
virtual llvm::Expected<std::unique_ptr<Texture>>
createTexture(std::string Name, TextureCreateDesc &Desc) = 0;

virtual void printExtra(llvm::raw_ostream &OS) {}
Expand Down Expand Up @@ -111,11 +111,11 @@ initializeDevices(const DeviceConfig Config);
// Creates a render target texture using the format and dimensions from a
// CPUBuffer. Does not upload the buffer's data — only uses its description to
// configure the texture.
llvm::Expected<std::shared_ptr<Texture>>
llvm::Expected<std::unique_ptr<Texture>>
createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf);

// Creates a depth/stencil texture matching the dimensions of a render target.
llvm::Expected<std::shared_ptr<Texture>>
llvm::Expected<std::unique_ptr<Texture>>
createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height);

} // namespace offloadtest
Expand Down
9 changes: 7 additions & 2 deletions include/API/Texture.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
#ifndef OFFLOADTEST_API_TEXTURE_H
#define OFFLOADTEST_API_TEXTURE_H

#include "API/API.h"
#include "API/Resources.h"

#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"

#include <cstdint>
Expand Down Expand Up @@ -140,14 +142,17 @@ inline llvm::Error validateTextureCreateDesc(const TextureCreateDesc &Desc) {
}

class Texture {
GPUAPI API;

public:
virtual ~Texture();

Texture(const Texture &) = delete;
Texture &operator=(const Texture &) = delete;

GPUAPI getAPI() const { return API; }

protected:
Texture() = default;
explicit Texture(GPUAPI API) : API(API) {}
};

} // namespace offloadtest
Expand Down
71 changes: 44 additions & 27 deletions lib/API/DX/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,12 @@ class DXBuffer : public offloadtest::Buffer {

DXBuffer(ComPtr<ID3D12Resource> Buffer, llvm::StringRef Name,
BufferCreateDesc Desc, size_t SizeInBytes)
: Buffer(Buffer), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {}
: offloadtest::Buffer(GPUAPI::DirectX), Buffer(Buffer), Name(Name),
Desc(Desc), SizeInBytes(SizeInBytes) {}

static bool classof(const offloadtest::Buffer *B) {
return B->getAPI() == GPUAPI::DirectX;
}
};

class DXTexture : public offloadtest::Texture {
Expand All @@ -313,7 +318,12 @@ class DXTexture : public offloadtest::Texture {

DXTexture(ComPtr<ID3D12Resource> Resource, llvm::StringRef Name,
TextureCreateDesc Desc)
: Resource(Resource), Name(Name), Desc(Desc) {}
: offloadtest::Texture(GPUAPI::DirectX), Resource(Resource), Name(Name),
Desc(Desc) {}

static bool classof(const offloadtest::Texture *T) {
return T->getAPI() == GPUAPI::DirectX;
}
};

class DXFence : public offloadtest::Fence {
Expand Down Expand Up @@ -414,10 +424,6 @@ class DXQueue : public offloadtest::Queue {

class DXCommandBuffer : public offloadtest::CommandBuffer {
public:
static bool classof(const CommandBuffer *CB) {
return CB->getKind() == GPUAPI::DirectX;
}

ComPtr<ID3D12CommandAllocator> Allocator;
ComPtr<ID3D12GraphicsCommandList> CmdList;

Expand All @@ -440,6 +446,10 @@ class DXCommandBuffer : public offloadtest::CommandBuffer {

~DXCommandBuffer() override = default;

static bool classof(const CommandBuffer *CB) {
return CB->getKind() == GPUAPI::DirectX;
}

private:
DXCommandBuffer() : CommandBuffer(GPUAPI::DirectX) {}
};
Expand Down Expand Up @@ -479,9 +489,9 @@ class DXDevice : public offloadtest::Device {
std::unique_ptr<offloadtest::Fence> CompletionFence;

// Resources for graphics pipelines.
std::shared_ptr<DXTexture> RT;
std::shared_ptr<DXBuffer> RTReadback;
std::shared_ptr<DXTexture> DS;
std::unique_ptr<offloadtest::Texture> RT;
std::unique_ptr<offloadtest::Buffer> RTReadback;
std::unique_ptr<offloadtest::Texture> DS;
Comment on lines +492 to +494
Copy link
Copy Markdown
Contributor

@manon-traverse manon-traverse Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fantastic, using the abstract types brings InvocationState closer to being identical between render backends. The downcasting is needed for now, as we don't have a complete interface yet, but the places where we do cast act as TODO items for what has highest priority to be added to the interface.

ComPtr<ID3D12Resource> VB;

llvm::SmallVector<DescriptorTable> DescTables;
Expand All @@ -508,7 +518,7 @@ class DXDevice : public offloadtest::Device {
return DXFence::create(Device.Get(), Name);
}

llvm::Expected<std::shared_ptr<offloadtest::Buffer>>
llvm::Expected<std::unique_ptr<offloadtest::Buffer>>
createBuffer(std::string Name, BufferCreateDesc &Desc,
size_t SizeInBytes) override {
const D3D12_HEAP_TYPE HeapType = getDXHeapType(Desc.Location);
Expand Down Expand Up @@ -539,10 +549,10 @@ class DXDevice : public offloadtest::Device {
"Failed to create buffer."))
return Err;

return std::make_shared<DXBuffer>(DeviceBuffer, Name, Desc, SizeInBytes);
return std::make_unique<DXBuffer>(DeviceBuffer, Name, Desc, SizeInBytes);
}

llvm::Expected<std::shared_ptr<offloadtest::Texture>>
llvm::Expected<std::unique_ptr<offloadtest::Texture>>
createTexture(std::string Name, TextureCreateDesc &Desc) override {
if (auto Err = validateTextureCreateDesc(Desc))
return Err;
Expand Down Expand Up @@ -596,7 +606,7 @@ class DXDevice : public offloadtest::Device {
"Failed to create texture."))
return Err;

auto Tex = std::make_shared<DXTexture>(DeviceTexture, Name, Desc);
auto Tex = std::make_unique<DXTexture>(DeviceTexture, Name, Desc);

const bool IsRT = (Desc.Usage & TextureUsage::RenderTarget) != 0;
const bool IsDS = (Desc.Usage & TextureUsage::DepthStencil) != 0;
Expand Down Expand Up @@ -1534,13 +1544,15 @@ class DXDevice : public offloadtest::Device {
// while our image writer expects bottom-left.
const CPUBuffer &B = *P.Bindings.RTargetBufferPtr;
void *Mapped = nullptr;
if (auto Err = HR::toError(IS.RTReadback->Buffer->Map(0, nullptr, &Mapped),
auto &Readback = llvm::cast<DXBuffer>(*IS.RTReadback);
if (auto Err = HR::toError(Readback.Buffer->Map(0, nullptr, &Mapped),
"Failed to map render target readback"))
return Err;

// Query the copy footprint to get the actual padded row pitch used by the
// copy operation.
const D3D12_RESOURCE_DESC RTDesc = IS.RT->Resource->GetDesc();
auto &RT = llvm::cast<DXTexture>(*IS.RT);
const D3D12_RESOURCE_DESC RTDesc = RT.Resource->GetDesc();
D3D12_PLACED_SUBRESOURCE_FOOTPRINT Placed = {};
uint32_t NumRows = 0;
uint64_t RowSizeInBytes = 0;
Expand All @@ -1565,7 +1577,7 @@ class DXDevice : public offloadtest::Device {
memcpy(DstRow, SrcRow, RowBytes);
}

IS.RTReadback->Buffer->Unmap(0, nullptr);
Readback.Buffer->Unmap(0, nullptr);
return llvm::Error::success();
}

Expand All @@ -1580,15 +1592,15 @@ class DXDevice : public offloadtest::Device {
if (!TexOrErr)
return TexOrErr.takeError();

IS.RT = std::static_pointer_cast<DXTexture>(*TexOrErr);
IS.RT = std::move(*TexOrErr);

// Create readback buffer sized for the pixel data (raw bytes).
BufferCreateDesc BufDesc = {};
BufDesc.Location = MemoryLocation::GpuToCpu;
auto BufOrErr = createBuffer("RTReadback", BufDesc, OutBuf.size());
if (!BufOrErr)
return BufOrErr.takeError();
IS.RTReadback = std::static_pointer_cast<DXBuffer>(*BufOrErr);
IS.RTReadback = std::move(*BufOrErr);

return llvm::Error::success();
}
Expand All @@ -1599,7 +1611,7 @@ class DXDevice : public offloadtest::Device {
P.Bindings.RTargetBufferPtr->OutputProps.Height);
if (!TexOrErr)
return TexOrErr.takeError();
IS.DS = std::static_pointer_cast<DXTexture>(*TexOrErr);
IS.DS = std::move(*TexOrErr);
return llvm::Error::success();
}

Expand Down Expand Up @@ -1682,7 +1694,8 @@ class DXDevice : public offloadtest::Device {
PSODesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
PSODesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS;
PSODesc.DepthStencilState.StencilEnable = false;
PSODesc.DSVFormat = getDXGIFormat(IS.DS->Desc.Fmt);
auto &DS = llvm::cast<DXTexture>(*IS.DS);
PSODesc.DSVFormat = getDXGIFormat(DS.Desc.Fmt);
PSODesc.SampleMask = UINT_MAX;
PSODesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
PSODesc.NumRenderTargets = 1;
Expand All @@ -1699,6 +1712,10 @@ class DXDevice : public offloadtest::Device {
}

llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) {
auto &RT = llvm::cast<DXTexture>(*IS.RT);
auto &DS = llvm::cast<DXTexture>(*IS.DS);
auto &RTReadback = llvm::cast<DXBuffer>(*IS.RTReadback);

IS.CB->CmdList->SetGraphicsRootSignature(IS.RootSig.Get());
if (IS.DescHeap) {
ID3D12DescriptorHeap *const Heaps[] = {IS.DescHeap.Get()};
Expand All @@ -1708,17 +1725,17 @@ class DXDevice : public offloadtest::Device {
}
IS.CB->CmdList->SetPipelineState(IS.PSO.Get());

IS.CB->CmdList->OMSetRenderTargets(1, &IS.RT->ViewHandle, false,
&IS.DS->ViewHandle);
IS.CB->CmdList->OMSetRenderTargets(1, &RT.ViewHandle, false,
&DS.ViewHandle);

const auto *DepthCV =
std::get_if<ClearDepthStencil>(&*IS.DS->Desc.OptimizedClearValue);
std::get_if<ClearDepthStencil>(&*DS.Desc.OptimizedClearValue);
if (!DepthCV)
return llvm::createStringError(
std::errc::invalid_argument,
"Depth/stencil clear value must be a ClearDepthStencil.");
IS.CB->CmdList->ClearDepthStencilView(
IS.DS->ViewHandle, D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL,
DS.ViewHandle, D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL,
DepthCV->Depth, DepthCV->Stencil, 0, nullptr);

D3D12_VIEWPORT VP = {};
Expand All @@ -1740,7 +1757,7 @@ class DXDevice : public offloadtest::Device {
// Transition the render target to copy source and copy to the readback
// buffer.
const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition(
IS.RT->Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET,
RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_COPY_SOURCE);
IS.CB->CmdList->ResourceBarrier(1, &Barrier);

Expand All @@ -1750,9 +1767,9 @@ class DXDevice : public offloadtest::Device {
CD3DX12_SUBRESOURCE_FOOTPRINT(
getDXFormat(B.Format, B.Channels), B.OutputProps.Width,
B.OutputProps.Height, 1, B.OutputProps.Width * B.getElementSize())};
const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(IS.RTReadback->Buffer.Get(),
const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RTReadback.Buffer.Get(),
Footprint);
const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(IS.RT->Resource.Get(), 0);
const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RT.Resource.Get(), 0);

IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr);

Expand Down
4 changes: 2 additions & 2 deletions lib/API/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ offloadtest::initializeDevices(const DeviceConfig Config) {
return Devices;
}

llvm::Expected<std::shared_ptr<Texture>>
llvm::Expected<std::unique_ptr<Texture>>
offloadtest::createRenderTargetFromCPUBuffer(Device &Dev,
const CPUBuffer &Buf) {
auto TexFmtOrErr = toFormat(Buf.Format, Buf.Channels);
Expand All @@ -87,7 +87,7 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev,
return Dev.createTexture("RenderTarget", Desc);
}

llvm::Expected<std::shared_ptr<Texture>>
llvm::Expected<std::unique_ptr<Texture>>
offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width,
uint32_t Height) {
TextureCreateDesc Desc = {};
Expand Down
Loading
Loading