-
Notifications
You must be signed in to change notification settings - Fork 17
Image upload benchamark #238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
CrabExtra
wants to merge
10
commits into
master
Choose a base branch
from
image_upload_benchamark
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 9 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
6635ba9
Add 73_ImageUploadBenchmark example
CrabExtra 951e2fd
Simple benchmark HOST_VISIBLE vs HOST_VISIBLE & DEVICE_LOCAL
CrabExtra 141295b
Measurment was wierd, added some detail and also fix a bug related to…
CrabExtra 874814a
Resolved PR comments + adding timestamp query
CrabExtra ddb7bfc
Adding more logs to release build
CrabExtra f1fc8d5
Added image to image copy
CrabExtra 7abe408
compute shader added
CrabExtra 717f6ae
fixing PR comments
CrabExtra 244802c
Address image upload benchmark review comments
CrabExtra 0e4347f
Address image upload benchmark review comments
CrabExtra File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| include(common RESULT_VARIABLE RES) | ||
| if(NOT RES) | ||
| message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") | ||
| endif() | ||
|
|
||
| nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") | ||
|
|
||
| if(NBL_EMBED_BUILTIN_RESOURCES) | ||
| set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) | ||
| set(RESOURCE_DIR "app_resources") | ||
|
|
||
| get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) | ||
| get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) | ||
| get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) | ||
|
|
||
| file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") | ||
| foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) | ||
| LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") | ||
| endforeach() | ||
|
|
||
| ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") | ||
|
|
||
| LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) | ||
| endif() | ||
|
|
||
| set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") | ||
| set(DEPENDS | ||
| app_resources/common.hlsl | ||
| app_resources/tile_upload.comp.hlsl | ||
| ) | ||
| target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) | ||
| set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) | ||
|
|
||
| set(SM 6_8) | ||
| set(JSON [=[ | ||
| [ | ||
| { | ||
| "INPUT": "app_resources/tile_upload.comp.hlsl", | ||
| "KEY": "snakeStore" | ||
| } | ||
| ] | ||
| ]=]) | ||
| string(CONFIGURE "${JSON}" JSON) | ||
|
|
||
| NBL_CREATE_NSC_COMPILE_RULES( | ||
| TARGET ${EXECUTABLE_NAME}SPIRV | ||
| LINK_TO ${EXECUTABLE_NAME} | ||
| DEPENDS ${DEPENDS} | ||
| BINARY_DIR ${OUTPUT_DIRECTORY} | ||
| MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT | ||
| COMMON_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" -T lib_${SM} | ||
| OUTPUT_VAR KEYS | ||
| INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp | ||
| NAMESPACE nbl::this_example::builtin::build | ||
| INPUTS ${JSON} | ||
| ) | ||
|
|
||
| NBL_CREATE_RESOURCE_ARCHIVE( | ||
| NAMESPACE nbl::this_example::builtin::build | ||
| TARGET ${EXECUTABLE_NAME}_builtinsBuild | ||
| LINK_TO ${EXECUTABLE_NAME} | ||
| BIND ${OUTPUT_DIRECTORY} | ||
| BUILTINS ${KEYS} | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| #include <nbl/builtin/hlsl/morton.hlsl> | ||
|
|
||
| struct PushConstantData | ||
| { | ||
| uint64_t deviceBufferAddress; | ||
| uint64_t dstTileLocationsAddress; | ||
| uint32_t2 dstOffset; | ||
| uint32_t srcWidth; | ||
| uint32_t srcHeight; | ||
| uint32_t tilesPerRow; | ||
| }; |
99 changes: 99 additions & 0 deletions
99
73_ImageUploadBenchmark/app_resources/tile_upload.comp.hlsl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| #include "common.hlsl" | ||
|
|
||
| [[vk::binding(0,0)]] RWTexture2D<float32_t4> dstImage; | ||
| [[vk::push_constant]] PushConstantData pc; | ||
|
|
||
| using namespace nbl::hlsl; | ||
|
|
||
| static const uint32_t TILE_SIZE = 128u; | ||
| static const uint32_t TILE_SIZE_LOG2 = 7u; | ||
| static const uint32_t TILE_SIZE_MASK = TILE_SIZE - 1u; | ||
| static const uint32_t TILE_PIXELS_LOG2 = TILE_SIZE_LOG2 * 2u; | ||
| static const uint32_t BLOCK_SIZE = 16u; | ||
| static const uint32_t BLOCK_SIZE_LOG2 = 4u; | ||
| static const uint32_t BLOCK_PIXELS_LOG2 = BLOCK_SIZE_LOG2 * 2u; | ||
| static const uint32_t BLOCKS_PER_TILE_LOG2 = TILE_SIZE_LOG2 - BLOCK_SIZE_LOG2; | ||
| static const uint32_t BLOCKS_PER_TILE = TILE_SIZE / BLOCK_SIZE; | ||
|
|
||
| [numthreads(128, 4, 1)] | ||
| [shader("compute")] | ||
| void SnakeStore(uint32_t3 ID : SV_DispatchThreadID) | ||
| { | ||
| const uint32_t2 globalPos = ID.xy; | ||
| const uint32_t2 tileCoord = globalPos >> TILE_SIZE_LOG2; | ||
| const uint32_t2 localPos = globalPos & TILE_SIZE_MASK; | ||
| const uint32_t tileIdx = tileCoord.y * pc.tilesPerRow + tileCoord.x; | ||
| const uint32_t localLinearIdx = (localPos.y << TILE_SIZE_LOG2) + localPos.x; | ||
| const uint32_t srcPixelIdx = (tileIdx << TILE_PIXELS_LOG2) + localLinearIdx; | ||
| const uint32_t packedDstTile = vk::RawBufferLoad<uint32_t>(pc.dstTileLocationsAddress + tileIdx * 4u); | ||
| const uint32_t2 dstTile = uint32_t2(packedDstTile & 0xffffu, packedDstTile >> 16u); | ||
| const uint32_t2 pixelPos = pc.dstOffset + (dstTile << TILE_SIZE_LOG2) + localPos; | ||
|
|
||
| const uint32_t packed = vk::RawBufferLoad<uint32_t>(pc.deviceBufferAddress + srcPixelIdx * 4u); | ||
|
|
||
| dstImage[pixelPos] = unpackUnorm4x8(int32_t(packed)); | ||
| } | ||
|
|
||
| [numthreads(128, 4, 1)] | ||
| [shader("compute")] | ||
| void SnakeLoad(uint32_t3 ID : SV_DispatchThreadID) | ||
| { | ||
| const uint32_t2 globalPos = ID.xy; | ||
| const uint32_t2 tileCoord = globalPos >> TILE_SIZE_LOG2; | ||
| const uint32_t2 localPos = globalPos & TILE_SIZE_MASK; | ||
| const uint32_t tileIdx = tileCoord.y * pc.tilesPerRow + tileCoord.x; | ||
| const uint32_t localLinearIdx = (localPos.y << TILE_SIZE_LOG2) + localPos.x; | ||
| const uint32_t dstPixelIdx = (tileIdx << TILE_PIXELS_LOG2) + localLinearIdx; | ||
| const uint32_t packedDstTile = vk::RawBufferLoad<uint32_t>(pc.dstTileLocationsAddress + tileIdx * 4u); | ||
| const uint32_t2 dstTile = uint32_t2(packedDstTile & 0xffffu, packedDstTile >> 16u); | ||
| const uint32_t2 pixelPos = pc.dstOffset + (dstTile << TILE_SIZE_LOG2) + localPos; | ||
|
|
||
| vk::RawBufferStore<uint32_t>(pc.deviceBufferAddress + dstPixelIdx * 4u, uint32_t(packUnorm4x8(dstImage[pixelPos]))); | ||
| } | ||
|
|
||
| [numthreads(16, 16, 1)] | ||
| [shader("compute")] | ||
| void MortonStore(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. confusing semantics, you're using ID for GroupThreadID here, but above in SnakeStore you're using the same name for global disaptch ID |
||
| { | ||
| const uint32_t2 globalBlock = GroupID.xy; | ||
| const uint32_t2 threadPos = ID.xy; | ||
| const uint32_t2 tileCoord = globalBlock >> BLOCKS_PER_TILE_LOG2; | ||
| const uint32_t2 blockCoordInTile = globalBlock & (BLOCKS_PER_TILE - 1u); | ||
| const uint32_t tileIdx = tileCoord.y * pc.tilesPerRow + tileCoord.x; | ||
| const uint32_t blockIdxInTile = (blockCoordInTile.y << BLOCKS_PER_TILE_LOG2) + blockCoordInTile.x; | ||
| const uint32_t localLinearIdx = (threadPos.y << BLOCK_SIZE_LOG2) + threadPos.x; | ||
| const uint32_t srcPixelIdx = (tileIdx << TILE_PIXELS_LOG2) + (blockIdxInTile << BLOCK_PIXELS_LOG2) + localLinearIdx; | ||
| const uint32_t packedDstTile = vk::RawBufferLoad<uint32_t>(pc.dstTileLocationsAddress + tileIdx * 4u); | ||
| const uint32_t2 dstTile = uint32_t2(packedDstTile & 0xffffu, packedDstTile >> 16u); | ||
|
|
||
| morton::code<false, 4, 2> mc; | ||
| mc.value = uint16_t(localLinearIdx); | ||
| const uint32_t2 mortonLocalPos = _static_cast<uint32_t2>(mc); | ||
| const uint32_t2 pixelPos = pc.dstOffset + (dstTile << TILE_SIZE_LOG2) + (blockCoordInTile << BLOCK_SIZE_LOG2) + mortonLocalPos; | ||
|
|
||
| const uint32_t packed = vk::RawBufferLoad<uint32_t>(pc.deviceBufferAddress + srcPixelIdx * 4u); | ||
| dstImage[pixelPos] = unpackUnorm4x8(int32_t(packed)); | ||
| } | ||
|
|
||
| [numthreads(16, 16, 1)] | ||
| [shader("compute")] | ||
| void MortonLoad(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) | ||
| { | ||
| const uint32_t2 globalBlock = GroupID.xy; | ||
| const uint32_t2 threadPos = ID.xy; | ||
| const uint32_t2 tileCoord = globalBlock >> BLOCKS_PER_TILE_LOG2; | ||
| const uint32_t2 blockCoordInTile = globalBlock & (BLOCKS_PER_TILE - 1u); | ||
| const uint32_t tileIdx = tileCoord.y * pc.tilesPerRow + tileCoord.x; | ||
| const uint32_t blockIdxInTile = (blockCoordInTile.y << BLOCKS_PER_TILE_LOG2) + blockCoordInTile.x; | ||
| const uint32_t localLinearIdx = (threadPos.y << BLOCK_SIZE_LOG2) + threadPos.x; | ||
| const uint32_t dstPixelIdx = (tileIdx << TILE_PIXELS_LOG2) + (blockIdxInTile << BLOCK_PIXELS_LOG2) + localLinearIdx; | ||
| const uint32_t packedDstTile = vk::RawBufferLoad<uint32_t>(pc.dstTileLocationsAddress + tileIdx * 4u); | ||
| const uint32_t2 dstTile = uint32_t2(packedDstTile & 0xffffu, packedDstTile >> 16u); | ||
|
|
||
| morton::code<false, 4, 2> mc; | ||
| mc.value = uint16_t(localLinearIdx); | ||
| const uint32_t2 mortonLocalPos = _static_cast<uint32_t2>(mc); | ||
| const uint32_t2 pixelPos = pc.dstOffset + (dstTile << TILE_SIZE_LOG2) + (blockCoordInTile << BLOCK_SIZE_LOG2) + mortonLocalPos; | ||
|
|
||
| vk::RawBufferStore<uint32_t>(pc.deviceBufferAddress + dstPixelIdx * 4u, uint32_t(packUnorm4x8(dstImage[pixelPos]))); | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| { | ||
| "enableParallelBuild": true, | ||
| "threadsPerBuildProcess" : 2, | ||
| "isExecuted": false, | ||
| "scriptPath": "", | ||
| "cmake": { | ||
| "configurations": [ "Release", "Debug", "RelWithDebInfo" ], | ||
| "buildModes": [], | ||
| "requiredOptions": [] | ||
| }, | ||
| "profiles": [ | ||
| { | ||
| "backend": "vulkan", // should be none | ||
| "platform": "windows", | ||
| "buildModes": [], | ||
| "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example | ||
| "gpuArchitectures": [] | ||
| } | ||
| ], | ||
| "dependencies": [], | ||
| "data": [ | ||
| { | ||
| "dependencies": [], | ||
| "command": [""], | ||
| "outputs": [] | ||
| } | ||
| ] | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove the unused load function. I don't think they are used anymore, right?