Skip to content

Commit 56b5f39

Browse files
introduce TLAS export
1 parent e2f981f commit 56b5f39

2 files changed

Lines changed: 167 additions & 7 deletions

File tree

include/nbl/asset/ICPUScene.h

Lines changed: 166 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,11 @@ class ICPUScene final : public IAsset, public IScene
110110
struct SInstanceStorage final
111111
{
112112
public:
113-
inline SInstanceStorage(const size_t size=1) : morphTargets(size), materials(size), initialTransforms(size) {}
113+
inline SInstanceStorage(const size_t size=0) : morphTargets(size), materials(size), initialTransforms(size) {}
114114

115115
inline void clearInitialTransforms() {initialTransforms.clear();}
116116

117-
inline operator bool() const
117+
explicit inline operator bool() const
118118
{
119119
if (morphTargets.size()!=materials.size())
120120
return false;
@@ -131,23 +131,24 @@ class ICPUScene final : public IAsset, public IScene
131131
initialTransforms.reserve(newSize);
132132
}
133133

134-
inline void resize(const size_t newSize)
134+
inline void resize(const size_t newSize, const bool forceTransformStorage=false)
135135
{
136136
morphTargets.resize(newSize);
137137
materials.resize(newSize,InvalidMaterialTable);
138-
if (!initialTransforms.empty())
138+
if (forceTransformStorage || !initialTransforms.empty())
139139
initialTransforms.resize(newSize,ICPUGeometryCollection::SGeometryReference{}.transform);
140140
}
141141

142142
inline void erase(const size_t first, const size_t last)
143143
{
144144
morphTargets.erase(morphTargets.begin()+first,morphTargets.begin()+last);
145145
materials.erase(materials.begin()+first, materials.begin()+last);
146-
initialTransforms.erase(initialTransforms.begin()+first,initialTransforms.begin()+last);
146+
if (!initialTransforms.empty())
147+
initialTransforms.erase(initialTransforms.begin()+first,initialTransforms.begin()+last);
147148
}
148149
inline void erase(const size_t ix) {return erase(ix,ix+1);}
149150

150-
inline size_t size() const {return morphTargets.size();}
151+
inline uint64_t size() const {return morphTargets.size();}
151152

152153
inline std::span<core::smart_refctd_ptr<ICPUMorphTargets>> getMorphTargets() {return morphTargets;}
153154
inline std::span<const core::smart_refctd_ptr<ICPUMorphTargets>> getMorphTargets() const {return morphTargets;}
@@ -169,6 +170,165 @@ class ICPUScene final : public IAsset, public IScene
169170
// TODO: animations (keyframed transforms, skeleton instance)
170171
};
171172

173+
// utility
174+
class ITLASExporter
175+
{
176+
protected:
177+
using instance_flags_t = asset::ICPUTopLevelAccelerationStructure::INSTANCE_FLAGS;
178+
179+
inline ITLASExporter(const SInstanceStorage& _storage) : m_storage(_storage) {}
180+
181+
const SInstanceStorage& m_storage;
182+
183+
public:
184+
virtual inline ICPUMorphTargets::index_t getTargetIndex(const uint32_t instanceIx) {return ICPUMorphTargets::index_t{0u};}
185+
186+
virtual inline instance_flags_t getInstanceFlags(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx)
187+
{
188+
// TODO: could derive from the material table if we want FORCE_OPAQUE_BIT or FORCE_NO_OPAQUE_BIT but its a whole instance thing
189+
return instance_flags_t::TRIANGLE_FACING_CULL_DISABLE_BIT;
190+
}
191+
192+
virtual inline uint32_t getInstanceIndex(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) {return instanceIx;}
193+
194+
// default
195+
virtual inline uint32_t getSBTOffset(const material_table_offset_t materialsBeginIndex)
196+
{
197+
return 0;
198+
}
199+
200+
virtual inline uint8_t getMask(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx)
201+
{
202+
return 0xFF;
203+
}
204+
205+
virtual inline hlsl::float32_t3x4 getTransform(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx)
206+
{
207+
if (m_storage.initialTransforms.empty())
208+
return hlsl::math::linalg::diagonal<hlsl::float32_t3x4>(1.f);
209+
else
210+
return m_storage.initialTransforms[instanceIx];
211+
}
212+
213+
// TODO: when we allow non-polygon geometries in the collection, we need to return a named pair, one BLAS for tris and one for AABBs
214+
virtual core::smart_refctd_ptr<ICPUBottomLevelAccelerationStructure> getBLAS(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) = 0;
215+
216+
struct SResult
217+
{
218+
explicit inline operator bool() const {return instances && !instances->empty();}
219+
220+
core::smart_refctd_dynamic_array<ICPUTopLevelAccelerationStructure::PolymorphicInstance> instances = nullptr;
221+
bool allInstancesValid = false;
222+
};
223+
// TODO: SBT stuff
224+
inline SResult operator()()
225+
{
226+
// this is because most GPUs report 16M as max instance count, and there's only 24 bits in `instanceCustomIndex`
227+
constexpr uint64_t MaxInstanceCount = 0x1u<<24;
228+
const uint64_t instanceCount = m_storage.size();
229+
if (instanceCount>MaxInstanceCount)
230+
return {};
231+
232+
std::vector<ICPUTopLevelAccelerationStructure::PolymorphicInstance> instances;
233+
instances.reserve(instanceCount*2);
234+
bool allInstancesValid = true;
235+
for (auto i=0u; i<instanceCount; i++)
236+
{
237+
// TODO: deal with SRT motion later when we add keyframed animations
238+
const auto targetIx = getTargetIndex(i);
239+
const auto* const targets = m_storage.morphTargets[i].get();
240+
if (!targets || !targets->valid())
241+
{
242+
allInstancesValid = false;
243+
continue;
244+
}
245+
const auto* const collection = targets->getTargets()[targetIx.value].geoCollection.get();
246+
ICPUTopLevelAccelerationStructure::StaticInstance inst;
247+
inst.base.blas = getBLAS(i,targetIx);
248+
if (!inst.base.blas)
249+
{
250+
allInstancesValid = false;
251+
continue;
252+
}
253+
inst.transform = getTransform(i,targetIx);
254+
const uint32_t customIndex = getInstanceIndex(i,targetIx);
255+
if (customIndex>=MaxInstanceCount)
256+
{
257+
allInstancesValid = false;
258+
continue;
259+
}
260+
inst.base.instanceCustomIndex = customIndex;
261+
inst.base.mask = getMask(i,targetIx);
262+
const auto targetTableOffset = m_storage.materials[i]+targets->getGeometryExclusiveCount(targetIx);
263+
const auto sbtOffset = getSBTOffset(targetTableOffset);
264+
if (sbtOffset>MaxInstanceCount+collection->getGeometries().size())
265+
{
266+
allInstancesValid = false;
267+
continue;
268+
}
269+
inst.base.instanceShaderBindingTableRecordOffset = sbtOffset;
270+
inst.base.flags = static_cast<uint32_t>(getInstanceFlags(i,targetIx));
271+
instances.emplace_back().instance = std::move(inst);
272+
}
273+
// TODO: adjust BLAS geometry flags according to materials set opaqueness and NO_DUPLICATE_ANY_HIT_INVOCATION_BIT
274+
SResult retval = {.instances=core::make_refctd_dynamic_array<decltype(SResult::instances)>(instanceCount),.allInstancesValid=allInstancesValid};
275+
std::move(instances.begin(),instances.end(),retval.instances->begin());
276+
return retval;
277+
}
278+
};
279+
class CDefaultTLASExporter final : public ITLASExporter
280+
{
281+
using triangles_t = ICPUBottomLevelAccelerationStructure::Triangles<ICPUBuffer>;
282+
core::vector<triangles_t> triangleScratch;
283+
core::vector<uint32_t> primitiveCountScratch;
284+
285+
public:
286+
inline CDefaultTLASExporter(const SInstanceStorage& _storage) : ITLASExporter(_storage) {}
287+
288+
inline core::smart_refctd_ptr<ICPUBottomLevelAccelerationStructure> getBLAS(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) override
289+
{
290+
const auto* const targets = m_storage.morphTargets[instanceIx].get();
291+
const auto* const collection = targets->getTargets()[targetIx.value].geoCollection.get();
292+
// TODO: use emplace so erase can be faster
293+
auto& entry = m_blasCache[collection];
294+
if (!entry)
295+
{
296+
entry = core::make_smart_refctd_ptr<ICPUBottomLevelAccelerationStructure>();
297+
//
298+
const auto& geometries = collection->getGeometries();
299+
// deal with triangles
300+
{
301+
triangleScratch.resize(geometries.size());
302+
primitiveCountScratch.resize(geometries.size());
303+
const auto usedScratchEnd = ICPUGeometryCollection::CBLASExporter(geometries)(triangleScratch.begin(),primitiveCountScratch.data());
304+
// TODO: report some error that a there was an unsupported geometry
305+
//triangleScratch.end()!=usedScratchEnd
306+
const auto actualGeoCount = std::distance(triangleScratch.begin(),usedScratchEnd);
307+
if (actualGeoCount==0)
308+
{
309+
m_blasCache.erase(m_blasCache.find(collection));
310+
return nullptr;
311+
}
312+
auto triGeos = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<triangles_t>>(actualGeoCount);
313+
std::move(triangleScratch.begin(),usedScratchEnd,triGeos->begin());
314+
auto primCounts = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<uint32_t>>(actualGeoCount);
315+
std::copy_n(primitiveCountScratch.data(),actualGeoCount,primCounts->data());
316+
entry->setGeometries(std::move(triGeos),std::move(primCounts));
317+
}
318+
using build_f = ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
319+
// no virtual callbacks because its easy to tell what geometry collection the BLAS came from by looking at the cache after the export
320+
// TODO: Allow Update when we figure out morph targets/skinning
321+
// TODO: GEOMETRY_TYPE_IS_AABB_BIT for non-polygon geometry collections
322+
entry->setBuildFlags(build_f::PREFER_FAST_TRACE_BIT|build_f::ALLOW_COMPACTION_BIT);
323+
entry->setContentHash(entry->computeContentHash());
324+
}
325+
return entry;
326+
}
327+
328+
// when doing animations, it good to copy and reuse this with dummy BLASes but where content hashes are already the same
329+
core::unordered_map<const ICPUGeometryCollection*,core::smart_refctd_ptr<ICPUBottomLevelAccelerationStructure>> m_blasCache;
330+
};
331+
172332
//
173333
inline SInstanceStorage& getInstances() {return m_instances;}
174334
inline const SInstanceStorage& getInstances() const {return m_instances;}

0 commit comments

Comments
 (0)