@@ -38,6 +38,10 @@ int ppInitialize( Api api, ppU32 flags )
3838 return hipewInit ( HIPEW_INIT_HIP );
3939 return PP_ERROR_OPEN_FAILED;
4040}
41+ Api ppGetCurAPI (ppU32 flags)
42+ {
43+ return s_api;
44+ }
4145
4246
4347// =================================
@@ -53,6 +57,11 @@ ppError cu2pp( CUresult a )
5357 return (ppError)a;
5458}
5559inline
60+ ppError cuda2pp (cudaError_t a)
61+ {
62+ return (ppError)a;
63+ }
64+ inline
5665CUcontext* ppCtx2cu ( ppCtx* a )
5766{
5867 return (CUcontext*)a;
@@ -74,12 +83,18 @@ pprtcResult nvrtc2pp( nvrtcResult a )
7483}
7584
7685#define __PP_FUNC1 ( cuname, hipname ) if ( s_api == API_CUDA ) return cu2pp( cu##cuname ); if ( s_api == API_HIP ) return hip2pp( hip##hipname );
86+ #define __PP_FUNC2 ( cudaname, hipname ) if ( s_api == API_CUDA ) return cuda2pp( cuda##cudaname ); if ( s_api == API_HIP ) return hip2pp( hip##hipname );
7787// #define __PP_FUNC1( cuname, hipname ) if( s_api == API_CUDA || API == API_CUDA ) return cu2pp( cu##cuname ); if( s_api == API_HIP || API == API_HIP ) return hip2pp( hip##hipname );
7888#define __PP_FUNC ( name ) if ( s_api == API_CUDA ) return cu2pp( cu##name ); if ( s_api == API_HIP ) return hip2pp( hip##name );
7989#define __PP_CTXT_FUNC ( name ) __PP_FUNC1(Ctx##name, name)
8090// #define __PP_CTXT_FUNC( name ) if( s_api == API_CUDA ) return cu2pp( cuCtx##name ); if( s_api == API_HIP ) return hip2pp( hip##name );
8191#define __PPRTC_FUNC1 ( cuname, hipname ) if ( s_api == API_CUDA ) return nvrtc2pp( nvrtc##cuname ); if ( s_api == API_HIP ) return hiprtc2pp( hiprtc##hipname );
8292
93+ #define __PP_FUNC_INSTANCE ( funcName, args ) \
94+ template ppError PPAPI funcName <API_AUTOMATIC> args;\
95+ template ppError PPAPI funcName <API_CUDA> args;\
96+ template ppError PPAPI funcName <API_HIP> args;
97+
8398
8499ppError PPAPI ppGetErrorName (ppError error, const char ** pStr)
85100{
@@ -106,6 +121,9 @@ ppError PPAPI ppInit(unsigned int Flags)
106121 __PP_FUNC ( Init (Flags) );
107122 return ppErrorUnknown;
108123}
124+
125+ __PP_FUNC_INSTANCE ( ppInit, (unsigned int Flags) );
126+
109127ppError PPAPI ppDriverGetVersion (int * driverVersion)
110128{
111129 __PP_FUNC ( DriverGetVersion (driverVersion) );
@@ -125,12 +143,14 @@ ppError PPAPI ppGetDeviceProperties(ppDeviceProp* props, int deviceId)
125143{
126144 if ( s_api == API_CUDA )
127145 {
128- CUdevprop p;
129- cuDeviceGetProperties ( &p, deviceId );
146+ cudaDeviceProp p;
147+ cudaError_t e = cudaGetDeviceProperties ( &p, deviceId );
148+ if (e != CUDA_SUCCESS)
149+ return ppErrorUnknown;
130150 char name[128 ];
131- cuDeviceGetName ( name, 128 , deviceId );
132- strcpy ( props->name , name );
151+ strcpy ( props->name , p.name );
133152 strcpy ( props->gcnArchName , " " );
153+ props->totalGlobalMem = p.totalGlobalMem ;
134154 printf (" todo. implement me\n " );
135155 return ppSuccess;
136156 }
@@ -268,6 +288,11 @@ ppError PPAPI ppMalloc(ppDeviceptr* dptr, size_t bytesize)
268288 __PP_FUNC1 ( MemAlloc (dptr, bytesize), Malloc ( dptr, bytesize ) );
269289 return ppErrorUnknown;
270290}
291+ ppError PPAPI ppMalloc2 (ppDeviceptr* dptr, size_t bytesize)
292+ {
293+ __PP_FUNC2 ( Malloc ((CUdeviceptr*)dptr, bytesize), Malloc (dptr, bytesize) );
294+ return ppErrorUnknown;
295+ }
271296ppError PPAPI ppMemAllocPitch (ppDeviceptr* dptr, size_t * pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes)
272297{
273298 return ppErrorUnknown;
@@ -277,8 +302,20 @@ ppError PPAPI ppFree(ppDeviceptr dptr)
277302 __PP_FUNC1 ( MemFree ( dptr ), Free ( dptr ) );
278303 return ppErrorUnknown;
279304}
305+ ppError PPAPI ppFree2 (ppDeviceptr dptr)
306+ {
307+ __PP_FUNC2 ( Free ((CUdeviceptr)dptr), Free (dptr) );
308+ return ppErrorUnknown;
309+ }
280310
281311// -------------------
312+ ppError PPAPI ppMemcpy (void *dstDevice, void * srcHost, size_t ByteCount, ppMemcpyKind kind)
313+ {
314+ __PP_FUNC2 ( Memcpy (dstDevice, srcHost, ByteCount, (cudaMemcpyKind)kind),
315+ Memcpy (dstDevice, srcHost, ByteCount, (hipMemcpyKind)kind) );
316+ return ppErrorUnknown;
317+ }
318+
282319ppError PPAPI ppMemcpyHtoD (ppDeviceptr dstDevice, void * srcHost, size_t ByteCount)
283320{
284321 __PP_FUNC1 ( MemcpyHtoD ( dstDevice, srcHost, ByteCount ),
@@ -299,7 +336,7 @@ ppError PPAPI ppMemcpyDtoD(ppDeviceptr dstDevice, ppDeviceptr srcDevice, size_t
299336
300337ppError PPAPI ppMemset (ppDeviceptr dstDevice, unsigned int ui, size_t N)
301338{
302- __PP_FUNC ( MemsetD32 ( dstDevice, ui, N ) );
339+ __PP_FUNC1 ( MemsetD8 ( (CUdeviceptr) dstDevice, ui, N ), Memset (( void *)dstDevice, ui, N) );
303340 return ppErrorUnknown;
304341}
305342
@@ -326,6 +363,12 @@ ppError PPAPI ppModuleLaunchKernel(ppFunction f, unsigned int gridDimX, unsigned
326363 ModuleLaunchKernel ( (hipFunction_t)f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, (hipStream_t)hStream, kernelParams, extra ) );
327364 return ppErrorUnknown;
328365}
366+ ppError PPAPI ppGetLastError (ppError pp_error)
367+ {
368+ __PP_FUNC2 (GetLastError ((cudaError_t)pp_error),
369+ GetLastError ((hipError_t)pp_error));
370+ return ppErrorUnknown;
371+ }
329372// -------------------
330373pprtcResult PPAPI pprtcGetErrorString (pprtcResult result)
331374{
@@ -401,8 +444,9 @@ ppError PPAPI ppPointerGetAttributes(ppPointerAttribute* attr, ppDeviceptr dptr)
401444// -----------------
402445ppError PPAPI ppStreamCreate (ppStream* stream)
403446{
404- __PP_FUNC1 ( StreamCreate ((CUstream*)stream, CU_STREAM_DEFAULT),
405- StreamCreate ((hipStream_t*)stream) );
447+ __PP_FUNC2 (StreamCreate ((cudaStream_t*)stream),
448+ StreamCreate ((hipStream_t*)stream));
449+
406450 return ppErrorUnknown;
407451}
408452
0 commit comments