Skip to content

Commit 7458174

Browse files
committed
Support for bitcode and other compilation options
1 parent 8aa523c commit 7458174

3 files changed

Lines changed: 18 additions & 20 deletions

File tree

ParallelPrimitives/RadixSort.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,11 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
157157
Kernel kernelType;
158158
};
159159

160-
const std::vector<Record> records{
161-
{ "SortSinglePassKernel", Kernel::SORT_SINGLE_PASS }, { "SortSinglePassKVKernel", Kernel::SORT_SINGLE_PASS_KV },
162-
};
160+
const std::vector<Record> records{ { "SortSinglePassKernel", Kernel::SORT_SINGLE_PASS },
161+
{ "SortSinglePassKVKernel", Kernel::SORT_SINGLE_PASS_KV },
162+
{ "GHistogram", Kernel::SORT_GHISTOGRAM },
163+
{ "OnesweepReorderKey64", Kernel::SORT_ONESWEEP_REORDER_KEY_64 },
164+
{ "OnesweepReorderKeyPair64", Kernel::SORT_ONESWEEP_REORDER_KEY_PAIR_64 } };
163165

164166

165167
for( const auto& record : records )
@@ -187,13 +189,6 @@ void RadixSort::compileKernels( const std::string& kernelPath, const std::string
187189
}
188190
}
189191

190-
// TODO: bit code support?
191-
#define LOAD_FUNC( var, kernel ) var = m_oroutils.getFunctionFromFile( m_device, currentKernelPath.c_str(), kernel, &opts );
192-
LOAD_FUNC( m_gHistogram, "gHistogram" );
193-
LOAD_FUNC( m_onesweep_reorderKey64, "onesweep_reorderKey64" );
194-
LOAD_FUNC( m_onesweep_reorderKeyPair64, "onesweep_reorderKeyPair64" );
195-
#undef LOAD_FUNC
196-
197192
}
198193

199194
void RadixSort::configure( const std::string& kernelPath, const std::string& includeDir, oroStream stream ) noexcept
@@ -245,11 +240,11 @@ void RadixSort::sort( const KeyValueSoA& src, const KeyValueSoA& dst, uint32_t n
245240
// counter for gHistogram.
246241
{
247242
int maxBlocksPerMP = 0;
248-
oroError e = oroOccupancyMaxActiveBlocksPerMultiprocessor( &maxBlocksPerMP, m_gHistogram, GHISTOGRAM_THREADS_PER_BLOCK, 0 );
243+
oroError e = oroOccupancyMaxActiveBlocksPerMultiprocessor( &maxBlocksPerMP, oroFunctions[Kernel::SORT_GHISTOGRAM], GHISTOGRAM_THREADS_PER_BLOCK, 0 );
249244
const int nBlocks = e == oroSuccess ? maxBlocksPerMP * m_props.multiProcessorCount : 2048;
250245

251246
const void* args[] = { &src.key, &n, arg_cast( m_gpSumBuffer.address() ), &startBit, arg_cast( m_gpSumCounter.address() ) };
252-
OrochiUtils::launch1D( m_gHistogram, nBlocks * GHISTOGRAM_THREADS_PER_BLOCK, args, GHISTOGRAM_THREADS_PER_BLOCK, 0, stream );
247+
OrochiUtils::launch1D( oroFunctions[Kernel::SORT_GHISTOGRAM], nBlocks * GHISTOGRAM_THREADS_PER_BLOCK, args, GHISTOGRAM_THREADS_PER_BLOCK, 0, stream );
253248
}
254249

255250
auto s = src;
@@ -264,12 +259,12 @@ void RadixSort::sort( const KeyValueSoA& src, const KeyValueSoA& dst, uint32_t n
264259
if( keyPair )
265260
{
266261
const void* args[] = { &s.key, &d.key, &s.value, &d.value, &n, arg_cast( m_gpSumBuffer.address() ), arg_cast( m_lookbackBuffer.address() ), arg_cast( m_tailIterator.address() ), &startBit, &i };
267-
OrochiUtils::launch1D( m_onesweep_reorderKeyPair64, numberOfBlocks * REORDER_NUMBER_OF_THREADS_PER_BLOCK, args, REORDER_NUMBER_OF_THREADS_PER_BLOCK, 0, stream );
262+
OrochiUtils::launch1D( oroFunctions[Kernel::SORT_ONESWEEP_REORDER_KEY_PAIR_64], numberOfBlocks * REORDER_NUMBER_OF_THREADS_PER_BLOCK, args, REORDER_NUMBER_OF_THREADS_PER_BLOCK, 0, stream );
268263
}
269264
else
270265
{
271266
const void* args[] = { &s.key, &d.key, &n, arg_cast( m_gpSumBuffer.address() ), arg_cast( m_lookbackBuffer.address() ), arg_cast( m_tailIterator.address() ), &startBit, &i };
272-
OrochiUtils::launch1D( m_onesweep_reorderKey64, numberOfBlocks * REORDER_NUMBER_OF_THREADS_PER_BLOCK, args, REORDER_NUMBER_OF_THREADS_PER_BLOCK, 0, stream );
267+
OrochiUtils::launch1D( oroFunctions[Kernel::SORT_ONESWEEP_REORDER_KEY_64], numberOfBlocks * REORDER_NUMBER_OF_THREADS_PER_BLOCK, args, REORDER_NUMBER_OF_THREADS_PER_BLOCK, 0, stream );
273268
}
274269
std::swap( s, d );
275270
}

ParallelPrimitives/RadixSort.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ class RadixSort final
6565
{
6666
SORT_SINGLE_PASS,
6767
SORT_SINGLE_PASS_KV,
68+
SORT_GHISTOGRAM,
69+
SORT_ONESWEEP_REORDER_KEY_64,
70+
SORT_ONESWEEP_REORDER_KEY_PAIR_64
6871
};
6972

7073
std::unordered_map<Kernel, oroFunction> oroFunctions;

ParallelPrimitives/RadixSortKernels.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ __device__ inline T scanExclusive( T prefix, T* sMemIO, int nElement )
322322
return sum;
323323
}
324324

325-
extern "C" __global__ void gHistogram( RADIX_SORT_KEY_TYPE* inputs, u32 numberOfInputs, u32* gpSumBuffer, u32 startBits, u32* counter )
325+
extern "C" __global__ void GHistogram( RADIX_SORT_KEY_TYPE* inputs, u32 numberOfInputs, u32* gpSumBuffer, u32 startBits, u32* counter )
326326
{
327327
__shared__ u32 localCounters[sizeof( RADIX_SORT_KEY_TYPE )][BIN_SIZE];
328328

@@ -381,7 +381,7 @@ extern "C" __global__ void gHistogram( RADIX_SORT_KEY_TYPE* inputs, u32 numberOf
381381
}
382382

383383
template <bool keyPair>
384-
__device__ __forceinline__ void onesweep_reorder( RADIX_SORT_KEY_TYPE* inputKeys, RADIX_SORT_KEY_TYPE* outputKeys, RADIX_SORT_VALUE_TYPE* inputValues, RADIX_SORT_VALUE_TYPE* outputValues, u32 numberOfInputs, u32* gpSumBuffer,
384+
__device__ __forceinline__ void OnesweepReorder( RADIX_SORT_KEY_TYPE* inputKeys, RADIX_SORT_KEY_TYPE* outputKeys, RADIX_SORT_VALUE_TYPE* inputValues, RADIX_SORT_VALUE_TYPE* outputValues, u32 numberOfInputs, u32* gpSumBuffer,
385385
volatile u64* lookBackBuffer, u32* tailIterator, u32 startBits, u32 iteration )
386386
{
387387
__shared__ u32 pSum[BIN_SIZE];
@@ -671,15 +671,15 @@ __device__ __forceinline__ void onesweep_reorder( RADIX_SORT_KEY_TYPE* inputKeys
671671
}
672672
}
673673
}
674-
extern "C" __global__ void __launch_bounds__( REORDER_NUMBER_OF_THREADS_PER_BLOCK ) onesweep_reorderKey64( RADIX_SORT_KEY_TYPE* inputKeys, RADIX_SORT_KEY_TYPE* outputKeys, u32 numberOfInputs, u32* gpSumBuffer, volatile u64* lookBackBuffer, u32* tailIterator, u32 startBits,
674+
extern "C" __global__ void __launch_bounds__( REORDER_NUMBER_OF_THREADS_PER_BLOCK ) OnesweepReorderKey64( RADIX_SORT_KEY_TYPE* inputKeys, RADIX_SORT_KEY_TYPE* outputKeys, u32 numberOfInputs, u32* gpSumBuffer, volatile u64* lookBackBuffer, u32* tailIterator, u32 startBits,
675675
u32 iteration )
676676
{
677-
onesweep_reorder<false /*keyPair*/>( inputKeys, outputKeys, nullptr, nullptr, numberOfInputs, gpSumBuffer, lookBackBuffer, tailIterator, startBits, iteration );
677+
OnesweepReorder<false /*keyPair*/>( inputKeys, outputKeys, nullptr, nullptr, numberOfInputs, gpSumBuffer, lookBackBuffer, tailIterator, startBits, iteration );
678678
}
679-
extern "C" __global__ void __launch_bounds__( REORDER_NUMBER_OF_THREADS_PER_BLOCK ) onesweep_reorderKeyPair64( RADIX_SORT_KEY_TYPE* inputKeys, RADIX_SORT_KEY_TYPE* outputKeys, RADIX_SORT_VALUE_TYPE* inputValues, RADIX_SORT_VALUE_TYPE* outputValues,
679+
extern "C" __global__ void __launch_bounds__( REORDER_NUMBER_OF_THREADS_PER_BLOCK ) OnesweepReorderKeyPair64( RADIX_SORT_KEY_TYPE* inputKeys, RADIX_SORT_KEY_TYPE* outputKeys, RADIX_SORT_VALUE_TYPE* inputValues, RADIX_SORT_VALUE_TYPE* outputValues,
680680
u32 numberOfInputs,
681681
u32* gpSumBuffer,
682682
volatile u64* lookBackBuffer, u32* tailIterator, u32 startBits, u32 iteration )
683683
{
684-
onesweep_reorder<true /*keyPair*/>( inputKeys, outputKeys, inputValues, outputValues, numberOfInputs, gpSumBuffer, lookBackBuffer, tailIterator, startBits, iteration );
684+
OnesweepReorder<true /*keyPair*/>( inputKeys, outputKeys, inputValues, outputValues, numberOfInputs, gpSumBuffer, lookBackBuffer, tailIterator, startBits, iteration );
685685
}

0 commit comments

Comments
 (0)