-
Notifications
You must be signed in to change notification settings - Fork 43
Expand file tree
/
Copy pathcuew.h
More file actions
1555 lines (1507 loc) · 127 KB
/
cuew.h
File metadata and controls
1555 lines (1507 loc) · 127 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright 2011-2014 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*
* Modifications made by Advanced Micro Devices, Inc.:
* Copyright(C) 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* The modifications include updates to maintain an up-to-date API,
* enhancing compatibility in response to evolving technical standards.
* These changes are designed to augment the original work by the Blender Foundation,
* ensuring the software remains relevant and efficient for its intended applications.
*
* The modified software is provided under the Apache License, Version 2.0.
* For more details, see the License above.
*/
#ifndef __CUEW_H__
#define __CUEW_H__
#include <stdlib.h>
#include <stdint.h>
#ifdef OROCHI_ENABLE_CUEW
#include <cuda_runtime_api.h>
#include <cuda.h>
#include <cuda_profiler_api.h>
#include <nvrtc.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _WIN32
# define CUDAAPI __stdcall
# define CUDA_CB __stdcall
#else
# define CUDAAPI
# define CUDA_CB
#endif
typedef unsigned int GLuint;
typedef unsigned int GLenum;
#pragma region OROCHI_SUMMONER_REGION_cuew_h
/////
///// THIS REGION HAS BEEN AUTOMATICALLY GENERATED BY OROCHI SUMMONER.
///// Manual modification of this region is not recommended.
/////
// CUDA version used to make this Orochi:
// (check the github of Orochi if you want it based on a different version)
// CUDART_VERSION = 12020
// CUDA_VERSION = 12020
typedef CUresult CUDAAPI tcuArray3DCreate_v2(CUarray * pHandle, const CUDA_ARRAY3D_DESCRIPTOR * pAllocateArray);
typedef CUresult CUDAAPI tcuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR * pArrayDescriptor, CUarray hArray);
typedef CUresult CUDAAPI tcuArrayCreate_v2(CUarray * pHandle, const CUDA_ARRAY_DESCRIPTOR * pAllocateArray);
typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
typedef CUresult CUDAAPI tcuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR * pArrayDescriptor, CUarray hArray);
typedef CUresult CUDAAPI tcuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS * memoryRequirements, CUarray array, CUdevice device);
typedef CUresult CUDAAPI tcuArrayGetPlane(CUarray * pPlaneArray, CUarray hArray, unsigned int planeIdx);
typedef CUresult CUDAAPI tcuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES * sparseProperties, CUarray array);
typedef CUresult CUDAAPI tcuCoredumpGetAttribute(CUcoredumpSettings attrib, void * value, size_t * size);
typedef CUresult CUDAAPI tcuCoredumpGetAttributeGlobal(CUcoredumpSettings attrib, void * value, size_t * size);
typedef CUresult CUDAAPI tcuCoredumpSetAttribute(CUcoredumpSettings attrib, void * value, size_t * size);
typedef CUresult CUDAAPI tcuCoredumpSetAttributeGlobal(CUcoredumpSettings attrib, void * value, size_t * size);
typedef CUresult CUDAAPI tcuCtxAttach(CUcontext * pctx, unsigned int flags);
typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext * pctx, unsigned int flags, CUdevice dev);
typedef CUresult CUDAAPI tcuCtxCreate_v3(CUcontext * pctx, CUexecAffinityParam * paramsArray, int numParams, unsigned int flags, CUdevice dev);
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext);
typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int * version);
typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache * pconfig);
typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext * pctx);
typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice * device);
typedef CUresult CUDAAPI tcuCtxGetExecAffinity(CUexecAffinityParam * pExecAffinity, CUexecAffinityType type);
typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned int * flags);
typedef CUresult CUDAAPI tcuCtxGetId(CUcontext ctx, unsigned long long * ctxId);
typedef CUresult CUDAAPI tcuCtxGetLimit(size_t * pvalue, CUlimit limit);
typedef CUresult CUDAAPI tcuCtxGetSharedMemConfig(CUsharedconfig * pConfig);
typedef CUresult CUDAAPI tcuCtxGetStreamPriorityRange(int * leastPriority, int * greatestPriority);
typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext * pctx);
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxResetPersistingL2Cache();
typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxSetFlags(unsigned int flags);
typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
typedef CUresult CUDAAPI tcuCtxSetSharedMemConfig(CUsharedconfig config);
typedef CUresult CUDAAPI tcuCtxSynchronize();
typedef CUresult CUDAAPI tcuDestroyExternalMemory(CUexternalMemory extMem);
typedef CUresult CUDAAPI tcuDestroyExternalSemaphore(CUexternalSemaphore extSem);
typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int * canAccessPeer, CUdevice dev, CUdevice peerDev);
typedef CUresult CUDAAPI tcuDeviceComputeCapability(int * major, int * minor, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGet(CUdevice * device, int ordinal);
typedef CUresult CUDAAPI tcuDeviceGetAttribute(int * pi, CUdevice_attribute attrib, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetByPCIBusId(CUdevice * dev, const char * pciBusId);
typedef CUresult CUDAAPI tcuDeviceGetCount(int * count);
typedef CUresult CUDAAPI tcuDeviceGetDefaultMemPool(CUmemoryPool * pool_out, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetExecAffinitySupport(int * pi, CUexecAffinityType type, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void * value);
typedef CUresult CUDAAPI tcuDeviceGetLuid(char * luid, unsigned int * deviceNodeMask, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetMemPool(CUmemoryPool * pool, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetName(char * name, int len, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetNvSciSyncAttributes(void * nvSciSyncAttrList, CUdevice dev, int flags);
typedef CUresult CUDAAPI tcuDeviceGetP2PAttribute(int * value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice);
typedef CUresult CUDAAPI tcuDeviceGetPCIBusId(char * pciBusId, int len, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop * prop, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetTexture1DLinearMaxWidth(size_t * maxWidthInElements, CUarray_format format, unsigned int numChannels, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetUuid(CUuuid * uuid, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetUuid_v2(CUuuid * uuid, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGraphMemTrim(CUdevice device);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned int * flags, int * active);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease_v2(CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset_v2(CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext * pctx, CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags);
typedef CUresult CUDAAPI tcuDeviceSetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void * value);
typedef CUresult CUDAAPI tcuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool);
typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t * bytes, CUdevice dev);
typedef CUresult CUDAAPI tcuDriverGetVersion(int * driverVersion);
typedef CUresult CUDAAPI tcuEventCreate(CUevent * phEvent, unsigned int Flags);
typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventElapsedTime(float * pMilliseconds, CUevent hStart, CUevent hEnd);
typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
typedef CUresult CUDAAPI tcuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags);
typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedBuffer(CUdeviceptr * devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC * bufferDesc);
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray * mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC * mipmapDesc);
typedef CUresult CUDAAPI tcuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope);
typedef CUresult CUDAAPI tcuFuncGetAttribute(int * pi, CUfunction_attribute attrib, CUfunction hfunc);
typedef CUresult CUDAAPI tcuFuncGetModule(CUmodule * hmod, CUfunction hfunc);
typedef CUresult CUDAAPI tcuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char ** pStr);
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char ** pStr);
typedef CUresult CUDAAPI tcuGetExportTable(const void ** ppExportTable, const CUuuid * pExportTableId);
typedef CUresult CUDAAPI tcuGetProcAddress_v2(const char * symbol, void ** pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult * symbolStatus);
typedef CUresult CUDAAPI tcuGraphAddBatchMemOpNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphAddChildGraphNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, CUgraph childGraph);
typedef CUresult CUDAAPI tcuGraphAddDependencies(CUgraph hGraph, const CUgraphNode * from, const CUgraphNode * to, size_t numDependencies);
typedef CUresult CUDAAPI tcuGraphAddEmptyNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies);
typedef CUresult CUDAAPI tcuGraphAddEventRecordNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, CUevent event);
typedef CUresult CUDAAPI tcuGraphAddEventWaitNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, CUevent event);
typedef CUresult CUDAAPI tcuGraphAddExternalSemaphoresSignalNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphAddExternalSemaphoresWaitNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphAddHostNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphAddKernelNode_v2(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphAddMemAllocNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphAddMemFreeNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuGraphAddMemcpyNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, const CUDA_MEMCPY3D * copyParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphAddMemsetNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS * memsetParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphAddNode(CUgraphNode * phGraphNode, CUgraph hGraph, const CUgraphNode * dependencies, size_t numDependencies, CUgraphNodeParams * nodeParams);
typedef CUresult CUDAAPI tcuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS * nodeParams_out);
typedef CUresult CUDAAPI tcuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph * phGraph);
typedef CUresult CUDAAPI tcuGraphClone(CUgraph * phGraphClone, CUgraph originalGraph);
typedef CUresult CUDAAPI tcuGraphCreate(CUgraph * phGraph, unsigned int flags);
typedef CUresult CUDAAPI tcuGraphDebugDotPrint(CUgraph hGraph, const char * path, unsigned int flags);
typedef CUresult CUDAAPI tcuGraphDestroy(CUgraph hGraph);
typedef CUresult CUDAAPI tcuGraphDestroyNode(CUgraphNode hNode);
typedef CUresult CUDAAPI tcuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent * event_out);
typedef CUresult CUDAAPI tcuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent * event_out);
typedef CUresult CUDAAPI tcuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph);
typedef CUresult CUDAAPI tcuGraphExecDestroy(CUgraphExec hGraphExec);
typedef CUresult CUDAAPI tcuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t * flags);
typedef CUresult CUDAAPI tcuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphExecKernelNodeSetParams_v2(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D * copyParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS * memsetParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams * nodeParams);
typedef CUresult CUDAAPI tcuGraphExecUpdate_v2(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphExecUpdateResultInfo * resultInfo);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS * params_out);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS * params_out);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphGetEdges(CUgraph hGraph, CUgraphNode * from, CUgraphNode * to, size_t * numEdges);
typedef CUresult CUDAAPI tcuGraphGetNodes(CUgraph hGraph, CUgraphNode * nodes, size_t * numNodes);
typedef CUresult CUDAAPI tcuGraphGetRootNodes(CUgraph hGraph, CUgraphNode * rootNodes, size_t * numRootNodes);
typedef CUresult CUDAAPI tcuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphInstantiateWithFlags(CUgraphExec * phGraphExec, CUgraph hGraph, unsigned long long flags);
typedef CUresult CUDAAPI tcuGraphInstantiateWithParams(CUgraphExec * phGraphExec, CUgraph hGraph, CUDA_GRAPH_INSTANTIATE_PARAMS * instantiateParams);
typedef CUresult CUDAAPI tcuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src);
typedef CUresult CUDAAPI tcuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, CUkernelNodeAttrValue * value_out);
typedef CUresult CUDAAPI tcuGraphKernelNodeGetParams_v2(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, const CUkernelNodeAttrValue * value);
typedef CUresult CUDAAPI tcuGraphKernelNodeSetParams_v2(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS * params_out);
typedef CUresult CUDAAPI tcuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr * dptr_out);
typedef CUresult CUDAAPI tcuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D * nodeParams);
typedef CUresult CUDAAPI tcuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D * nodeParams);
typedef CUresult CUDAAPI tcuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS * nodeParams);
typedef CUresult CUDAAPI tcuGraphNodeFindInClone(CUgraphNode * phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph);
typedef CUresult CUDAAPI tcuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode * dependencies, size_t * numDependencies);
typedef CUresult CUDAAPI tcuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode * dependentNodes, size_t * numDependentNodes);
typedef CUresult CUDAAPI tcuGraphNodeGetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int * isEnabled);
typedef CUresult CUDAAPI tcuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType * type);
typedef CUresult CUDAAPI tcuGraphNodeSetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int isEnabled);
typedef CUresult CUDAAPI tcuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams * nodeParams);
typedef CUresult CUDAAPI tcuGraphReleaseUserObject(CUgraph graph, CUuserObject object, unsigned int count);
typedef CUresult CUDAAPI tcuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode * from, const CUgraphNode * to, size_t numDependencies);
typedef CUresult CUDAAPI tcuGraphRetainUserObject(CUgraph graph, CUuserObject object, unsigned int count, unsigned int flags);
typedef CUresult CUDAAPI tcuGraphUpload(CUgraphExec hGraphExec, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource * resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray * pMipmappedArray, CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr * pDevPtr, size_t * pSize, CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned int flags);
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray * pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource * resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuImportExternalMemory(CUexternalMemory * extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC * memHandleDesc);
typedef CUresult CUDAAPI tcuImportExternalSemaphore(CUexternalSemaphore * extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC * semHandleDesc);
typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuIpcGetEventHandle(CUipcEventHandle * pHandle, CUevent event);
typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle * pHandle, CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent * phEvent, CUipcEventHandle handle);
typedef CUresult CUDAAPI tcuIpcOpenMemHandle_v2(CUdeviceptr * pdptr, CUipcMemHandle handle, unsigned int Flags);
typedef CUresult CUDAAPI tcuKernelGetAttribute(int * pi, CUfunction_attribute attrib, CUkernel kernel, CUdevice dev);
typedef CUresult CUDAAPI tcuKernelGetFunction(CUfunction * pFunc, CUkernel kernel);
typedef CUresult CUDAAPI tcuKernelSetAttribute(CUfunction_attribute attrib, int val, CUkernel kernel, CUdevice dev);
typedef CUresult CUDAAPI tcuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, CUdevice dev);
typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
typedef CUresult CUDAAPI tcuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void ** kernelParams);
typedef CUresult CUDAAPI tcuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS * launchParamsList, unsigned int numDevices, unsigned int flags);
typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
typedef CUresult CUDAAPI tcuLaunchHostFunc(CUstream hStream, CUhostFn fn, void * userData);
typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void ** kernelParams, void ** extra);
typedef CUresult CUDAAPI tcuLaunchKernelEx(const CUlaunchConfig * config, CUfunction f, void ** kernelParams, void ** extra);
typedef CUresult CUDAAPI tcuLibraryGetGlobal(CUdeviceptr * dptr, size_t * bytes, CUlibrary library, const char * name);
typedef CUresult CUDAAPI tcuLibraryGetKernel(CUkernel * pKernel, CUlibrary library, const char * name);
typedef CUresult CUDAAPI tcuLibraryGetManaged(CUdeviceptr * dptr, size_t * bytes, CUlibrary library, const char * name);
typedef CUresult CUDAAPI tcuLibraryGetModule(CUmodule * pMod, CUlibrary library);
typedef CUresult CUDAAPI tcuLibraryGetUnifiedFunction(void ** fptr, CUlibrary library, const char * symbol);
typedef CUresult CUDAAPI tcuLibraryLoadData(CUlibrary * library, const void * code, CUjit_option * jitOptions, void ** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption * libraryOptions, void ** libraryOptionValues, unsigned int numLibraryOptions);
typedef CUresult CUDAAPI tcuLibraryLoadFromFile(CUlibrary * library, const char * fileName, CUjit_option * jitOptions, void ** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption * libraryOptions, void ** libraryOptionValues, unsigned int numLibraryOptions);
typedef CUresult CUDAAPI tcuLibraryUnload(CUlibrary library);
typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void * data, size_t size, const char * name, unsigned int numOptions, CUjit_option * options, void ** optionValues);
typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char * path, unsigned int numOptions, CUjit_option * options, void ** optionValues);
typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void ** cubinOut, size_t * sizeOut);
typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned int numOptions, CUjit_option * options, void ** optionValues, CUlinkState * stateOut);
typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
typedef CUresult CUDAAPI tcuMemAddressFree(CUdeviceptr ptr, size_t size);
typedef CUresult CUDAAPI tcuMemAddressReserve(CUdeviceptr * ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device);
typedef CUresult CUDAAPI tcuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUmemLocation location);
typedef CUresult CUDAAPI tcuMemAllocAsync(CUdeviceptr * dptr, size_t bytesize, CUstream hStream);
typedef CUresult CUDAAPI tcuMemAllocFromPoolAsync(CUdeviceptr * dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream);
typedef CUresult CUDAAPI tcuMemAllocHost_v2(void ** pp, size_t bytesize);
typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr * dptr, size_t bytesize, unsigned int flags);
typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr * dptr, size_t * pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr * dptr, size_t bytesize);
typedef CUresult CUDAAPI tcuMemCreate(CUmemGenericAllocationHandle * handle, size_t size, const CUmemAllocationProp * prop, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemExportToShareableHandle(void * shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemFreeAsync(CUdeviceptr dptr, CUstream hStream);
typedef CUresult CUDAAPI tcuMemFreeHost(void * p);
typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemGetAccess(unsigned long long * flags, const CUmemLocation * location, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuMemGetAddressRange_v2(CUdeviceptr * pbase, size_t * psize, CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemGetAllocationGranularity(size_t * granularity, const CUmemAllocationProp * prop, CUmemAllocationGranularity_flags option);
typedef CUresult CUDAAPI tcuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp * prop, CUmemGenericAllocationHandle handle);
typedef CUresult CUDAAPI tcuMemGetHandleForAddressRange(void * handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t * free, size_t * total);
typedef CUresult CUDAAPI tcuMemHostAlloc(void ** pp, size_t bytesize, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr * pdptr, void * p, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int * pFlags, void * p);
typedef CUresult CUDAAPI tcuMemHostRegister_v2(void * p, size_t bytesize, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostUnregister(void * p);
typedef CUresult CUDAAPI tcuMemImportFromShareableHandle(CUmemGenericAllocationHandle * handle, void * osHandle, CUmemAllocationHandleType shHandleType);
typedef CUresult CUDAAPI tcuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemMapArrayAsync(CUarrayMapInfo * mapInfoList, unsigned int count, CUstream hStream);
typedef CUresult CUDAAPI tcuMemPoolCreate(CUmemoryPool * pool, const CUmemPoolProps * poolProps);
typedef CUresult CUDAAPI tcuMemPoolDestroy(CUmemoryPool pool);
typedef CUresult CUDAAPI tcuMemPoolExportPointer(CUmemPoolPtrExportData * shareData_out, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuMemPoolExportToShareableHandle(void * handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemPoolGetAccess(CUmemAccess_flags * flags, CUmemoryPool memPool, CUmemLocation * location);
typedef CUresult CUDAAPI tcuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void * value);
typedef CUresult CUDAAPI tcuMemPoolImportFromShareableHandle(CUmemoryPool * pool_out, void * handle, CUmemAllocationHandleType handleType, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemPoolImportPointer(CUdeviceptr * ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData * shareData);
typedef CUresult CUDAAPI tcuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc * map, size_t count);
typedef CUresult CUDAAPI tcuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void * value);
typedef CUresult CUDAAPI tcuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep);
typedef CUresult CUDAAPI tcuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream);
typedef CUresult CUDAAPI tcuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, CUstream hStream);
typedef CUresult CUDAAPI tcuMemRangeGetAttribute(void * data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count);
typedef CUresult CUDAAPI tcuMemRangeGetAttributes(void ** data, size_t * dataSizes, CUmem_range_attribute * attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count);
typedef CUresult CUDAAPI tcuMemRelease(CUmemGenericAllocationHandle handle);
typedef CUresult CUDAAPI tcuMemRetainAllocationHandle(CUmemGenericAllocationHandle * handle, void * addr);
typedef CUresult CUDAAPI tcuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc * desc, size_t count);
typedef CUresult CUDAAPI tcuMemUnmap(CUdeviceptr ptr, size_t size);
typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D * pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D * pCopy);
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D * pCopy);
typedef CUresult CUDAAPI tcuMemcpy3DAsync_v2(const CUDA_MEMCPY3D * pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER * pCopy);
typedef CUresult CUDAAPI tcuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER * pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy3D_v2(const CUDA_MEMCPY3D * pCopy);
typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoHAsync_v2(void * dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyAtoH_v2(void * dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoHAsync_v2(void * dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyDtoH_v2(void * dstHost, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void * srcHost, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void * srcHost, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void * srcHost, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void * srcHost, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N);
typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N);
typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N);
typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray * pHandle, const CUDA_ARRAY3D_DESCRIPTOR * pMipmappedArrayDesc, unsigned int numMipmapLevels);
typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray * pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level);
typedef CUresult CUDAAPI tcuMipmappedArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS * memoryRequirements, CUmipmappedArray mipmap, CUdevice device);
typedef CUresult CUDAAPI tcuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES * sparseProperties, CUmipmappedArray mipmap);
typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction * hfunc, CUmodule hmod, const char * name);
typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr * dptr, size_t * bytes, CUmodule hmod, const char * name);
typedef CUresult CUDAAPI tcuModuleGetLoadingMode(CUmoduleLoadingMode * mode);
typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref * pSurfRef, CUmodule hmod, const char * name);
typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref * pTexRef, CUmodule hmod, const char * name);
typedef CUresult CUDAAPI tcuModuleLoad(CUmodule * module, const char * fname);
typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule * module, const void * image);
typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule * module, const void * image, unsigned int numOptions, CUjit_option * options, void ** optionValues);
typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule * module, const void * fatCubin);
typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
typedef CUresult CUDAAPI tcuMulticastAddDevice(CUmemGenericAllocationHandle mcHandle, CUdevice dev);
typedef CUresult CUDAAPI tcuMulticastBindAddr(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUdeviceptr memptr, size_t size, unsigned long long flags);
typedef CUresult CUDAAPI tcuMulticastBindMem(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUmemGenericAllocationHandle memHandle, size_t memOffset, size_t size, unsigned long long flags);
typedef CUresult CUDAAPI tcuMulticastCreate(CUmemGenericAllocationHandle * mcHandle, const CUmulticastObjectProp * prop);
typedef CUresult CUDAAPI tcuMulticastGetGranularity(size_t * granularity, const CUmulticastObjectProp * prop, CUmulticastGranularity_flags option);
typedef CUresult CUDAAPI tcuMulticastUnbind(CUmemGenericAllocationHandle mcHandle, CUdevice dev, size_t mcOffset, size_t size);
typedef CUresult CUDAAPI tcuOccupancyAvailableDynamicSMemPerBlock(size_t * dynamicSmemSize, CUfunction func, int numBlocks, int blockSize);
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessor(int * numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int * numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
typedef CUresult CUDAAPI tcuOccupancyMaxActiveClusters(int * numClusters, CUfunction func, const CUlaunchConfig * config);
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSize(int * minGridSize, int * blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int * minGridSize, int * blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags);
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialClusterSize(int * clusterSize, CUfunction func, const CUlaunchConfig * config);
typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void * ptr, unsigned int numbytes);
typedef CUresult CUDAAPI tcuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute * attributes, void ** data, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuPointerSetAttribute(const void * value, CUpointer_attribute attribute, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuSignalExternalSemaphoresAsync(const CUexternalSemaphore * extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS * paramsArray, unsigned int numExtSems, CUstream stream);
typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void * userData, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamBatchMemOp_v2(CUstream stream, unsigned int count, CUstreamBatchMemOpParams * paramArray, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamBeginCapture_v2(CUstream hStream, CUstreamCaptureMode mode);
typedef CUresult CUDAAPI tcuStreamCopyAttributes(CUstream dst, CUstream src);
typedef CUresult CUDAAPI tcuStreamCreate(CUstream * phStream, unsigned int Flags);
typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream * phStream, unsigned int flags, int priority);
typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamEndCapture(CUstream hStream, CUgraph * phGraph);
typedef CUresult CUDAAPI tcuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue * value_out);
typedef CUresult CUDAAPI tcuStreamGetCaptureInfo_v2(CUstream hStream, CUstreamCaptureStatus * captureStatus_out, cuuint64_t * id_out, CUgraph * graph_out, const CUgraphNode ** dependencies_out, size_t * numDependencies_out);
typedef CUresult CUDAAPI tcuStreamGetCtx(CUstream hStream, CUcontext * pctx);
typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned int * flags);
typedef CUresult CUDAAPI tcuStreamGetId(CUstream hStream, unsigned long long * streamId);
typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int * priority);
typedef CUresult CUDAAPI tcuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus * captureStatus);
typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue * value);
typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode * dependencies, size_t numDependencies, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
typedef CUresult CUDAAPI tcuStreamWaitValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWaitValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWriteValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWriteValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject * pSurfObject, const CUDA_RESOURCE_DESC * pResDesc);
typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject);
typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC * pResDesc, CUsurfObject surfObject);
typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray * phArray, CUsurfref hSurfRef);
typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuTensorMapEncodeIm2col(CUtensorMap * tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void * globalAddress, const cuuint64_t * globalDim, const cuuint64_t * globalStrides, const int * pixelBoxLowerCorner, const int * pixelBoxUpperCorner, cuuint32_t channelsPerPixel, cuuint32_t pixelsPerColumn, const cuuint32_t * elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill);
typedef CUresult CUDAAPI tcuTensorMapEncodeTiled(CUtensorMap * tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void * globalAddress, const cuuint64_t * globalDim, const cuuint64_t * globalStrides, const cuuint32_t * boxDim, const cuuint32_t * elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill);
typedef CUresult CUDAAPI tcuTensorMapReplaceAddress(CUtensorMap * tensorMap, void * globalAddress);
typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject * pTexObject, const CUDA_RESOURCE_DESC * pResDesc, const CUDA_TEXTURE_DESC * pTexDesc, const CUDA_RESOURCE_VIEW_DESC * pResViewDesc);
typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
typedef CUresult CUDAAPI tcuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC * pResDesc, CUtexObject texObject);
typedef CUresult CUDAAPI tcuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC * pResViewDesc, CUtexObject texObject);
typedef CUresult CUDAAPI tcuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC * pTexDesc, CUtexObject texObject);
typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref * pTexRef);
typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode * pam, CUtexref hTexRef, int dim);
typedef CUresult CUDAAPI tcuTexRefGetAddress_v2(CUdeviceptr * pdptr, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray * phArray, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetBorderColor(float * pBorderColor, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode * pfm, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int * pFlags, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format * pFormat, int * pNumChannels, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMaxAnisotropy(int * pmaxAniso, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmapFilterMode(CUfilter_mode * pfm, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelBias(float * pbias, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelClamp(float * pminMipmapLevelClamp, float * pmaxMipmapLevelClamp, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmappedArray(CUmipmappedArray * phMipmappedArray, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR * desc, CUdeviceptr dptr, size_t Pitch);
typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t * ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuTexRefSetBorderColor(CUtexref hTexRef, float * pBorderColor);
typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso);
typedef CUresult CUDAAPI tcuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuThreadExchangeStreamCaptureMode(CUstreamCaptureMode * mode);
typedef CUresult CUDAAPI tcuUserObjectCreate(CUuserObject * object_out, void * ptr, CUhostFn destroy, unsigned int initialRefcount, unsigned int flags);
typedef CUresult CUDAAPI tcuUserObjectRelease(CUuserObject object, unsigned int count);
typedef CUresult CUDAAPI tcuUserObjectRetain(CUuserObject object, unsigned int count);
typedef CUresult CUDAAPI tcuWaitExternalSemaphoresAsync(const CUexternalSemaphore * extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS * paramsArray, unsigned int numExtSems, CUstream stream);
typedef cudaError_t CUDAAPI tcudaArrayGetInfo(struct cudaChannelFormatDesc * desc, struct cudaExtent * extent, unsigned int * flags, cudaArray_t array);
typedef cudaError_t CUDAAPI tcudaArrayGetMemoryRequirements(struct cudaArrayMemoryRequirements * memoryRequirements, cudaArray_t array, int device);
typedef cudaError_t CUDAAPI tcudaArrayGetPlane(cudaArray_t * pPlaneArray, cudaArray_t hArray, unsigned int planeIdx);
typedef cudaError_t CUDAAPI tcudaArrayGetSparseProperties(struct cudaArraySparseProperties * sparseProperties, cudaArray_t array);
typedef cudaError_t CUDAAPI tcudaChooseDevice(int * device, const struct cudaDeviceProp * prop);
typedef struct cudaChannelFormatDesc CUDAAPI tcudaCreateChannelDesc(int x, int y, int z, int w, enum cudaChannelFormatKind f);
typedef cudaError_t CUDAAPI tcudaCreateSurfaceObject(cudaSurfaceObject_t * pSurfObject, const struct cudaResourceDesc * pResDesc);
typedef cudaError_t CUDAAPI tcudaCreateTextureObject(cudaTextureObject_t * pTexObject, const struct cudaResourceDesc * pResDesc, const struct cudaTextureDesc * pTexDesc, const struct cudaResourceViewDesc * pResViewDesc);
typedef cudaError_t CUDAAPI tcudaCtxResetPersistingL2Cache();
typedef cudaError_t CUDAAPI tcudaDestroyExternalMemory(cudaExternalMemory_t extMem);
typedef cudaError_t CUDAAPI tcudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem);
typedef cudaError_t CUDAAPI tcudaDestroySurfaceObject(cudaSurfaceObject_t surfObject);
typedef cudaError_t CUDAAPI tcudaDestroyTextureObject(cudaTextureObject_t texObject);
typedef cudaError_t CUDAAPI tcudaDeviceCanAccessPeer(int * canAccessPeer, int device, int peerDevice);
typedef cudaError_t CUDAAPI tcudaDeviceDisablePeerAccess(int peerDevice);
typedef cudaError_t CUDAAPI tcudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaDeviceFlushGPUDirectRDMAWrites(enum cudaFlushGPUDirectRDMAWritesTarget target, enum cudaFlushGPUDirectRDMAWritesScope scope);
typedef cudaError_t CUDAAPI tcudaDeviceGetAttribute(int * value, enum cudaDeviceAttr attr, int device);
typedef cudaError_t CUDAAPI tcudaDeviceGetByPCIBusId(int * device, const char * pciBusId);
typedef cudaError_t CUDAAPI tcudaDeviceGetCacheConfig(enum cudaFuncCache * pCacheConfig);
typedef cudaError_t CUDAAPI tcudaDeviceGetDefaultMemPool(cudaMemPool_t * memPool, int device);
typedef cudaError_t CUDAAPI tcudaDeviceGetGraphMemAttribute(int device, enum cudaGraphMemAttributeType attr, void * value);
typedef cudaError_t CUDAAPI tcudaDeviceGetLimit(size_t * pValue, enum cudaLimit limit);
typedef cudaError_t CUDAAPI tcudaDeviceGetMemPool(cudaMemPool_t * memPool, int device);
typedef cudaError_t CUDAAPI tcudaDeviceGetNvSciSyncAttributes(void * nvSciSyncAttrList, int device, int flags);
typedef cudaError_t CUDAAPI tcudaDeviceGetP2PAttribute(int * value, enum cudaDeviceP2PAttr attr, int srcDevice, int dstDevice);
typedef cudaError_t CUDAAPI tcudaDeviceGetPCIBusId(char * pciBusId, int len, int device);
typedef cudaError_t CUDAAPI tcudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig * pConfig);
typedef cudaError_t CUDAAPI tcudaDeviceGetStreamPriorityRange(int * leastPriority, int * greatestPriority);
typedef cudaError_t CUDAAPI tcudaDeviceGetTexture1DLinearMaxWidth(size_t * maxWidthInElements, const struct cudaChannelFormatDesc * fmtDesc, int device);
typedef cudaError_t CUDAAPI tcudaDeviceGraphMemTrim(int device);
typedef cudaError_t CUDAAPI tcudaDeviceReset();
typedef cudaError_t CUDAAPI tcudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig);
typedef cudaError_t CUDAAPI tcudaDeviceSetGraphMemAttribute(int device, enum cudaGraphMemAttributeType attr, void * value);
typedef cudaError_t CUDAAPI tcudaDeviceSetLimit(enum cudaLimit limit, size_t value);
typedef cudaError_t CUDAAPI tcudaDeviceSetMemPool(int device, cudaMemPool_t memPool);
typedef cudaError_t CUDAAPI tcudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config);
typedef cudaError_t CUDAAPI tcudaDeviceSynchronize();
typedef cudaError_t CUDAAPI tcudaDriverGetVersion(int * driverVersion);
typedef cudaError_t CUDAAPI tcudaEventCreate(cudaEvent_t * event);
typedef cudaError_t CUDAAPI tcudaEventCreateWithFlags(cudaEvent_t * event, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaEventDestroy(cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaEventElapsedTime(float * ms, cudaEvent_t start, cudaEvent_t end);
typedef cudaError_t CUDAAPI tcudaEventQuery(cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaEventRecord(cudaEvent_t event, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaEventSynchronize(cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaExternalMemoryGetMappedBuffer(void ** devPtr, cudaExternalMemory_t extMem, const struct cudaExternalMemoryBufferDesc * bufferDesc);
typedef cudaError_t CUDAAPI tcudaExternalMemoryGetMappedMipmappedArray(cudaMipmappedArray_t * mipmap, cudaExternalMemory_t extMem, const struct cudaExternalMemoryMipmappedArrayDesc * mipmapDesc);
typedef cudaError_t CUDAAPI tcudaFree(void * devPtr);
typedef cudaError_t CUDAAPI tcudaFreeArray(cudaArray_t array);
typedef cudaError_t CUDAAPI tcudaFreeAsync(void * devPtr, cudaStream_t hStream);
typedef cudaError_t CUDAAPI tcudaFreeHost(void * ptr);
typedef cudaError_t CUDAAPI tcudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray);
typedef cudaError_t CUDAAPI tcudaFuncGetAttributes(struct cudaFuncAttributes * attr, const void * func);
typedef cudaError_t CUDAAPI tcudaFuncSetAttribute(const void * func, enum cudaFuncAttribute attr, int value);
typedef cudaError_t CUDAAPI tcudaFuncSetCacheConfig(const void * func, enum cudaFuncCache cacheConfig);
typedef cudaError_t CUDAAPI tcudaFuncSetSharedMemConfig(const void * func, enum cudaSharedMemConfig config);
typedef cudaError_t CUDAAPI tcudaGetChannelDesc(struct cudaChannelFormatDesc * desc, cudaArray_const_t array);
typedef cudaError_t CUDAAPI tcudaGetDevice(int * device);
typedef cudaError_t CUDAAPI tcudaGetDeviceCount(int * count);
typedef cudaError_t CUDAAPI tcudaGetDeviceFlags(unsigned int * flags);
typedef cudaError_t CUDAAPI tcudaGetDeviceProperties_v2(struct cudaDeviceProp * prop, int device);
typedef cudaError_t CUDAAPI tcudaGetDriverEntryPoint(const char * symbol, void ** funcPtr, unsigned long long flags, enum cudaDriverEntryPointQueryResult * driverStatus);
typedef const char * CUDAAPI tcudaGetErrorName(cudaError_t error);
typedef const char * CUDAAPI tcudaGetErrorString(cudaError_t error);
typedef cudaError_t CUDAAPI tcudaGetExportTable(const void ** ppExportTable, const cudaUUID_t * pExportTableId);
typedef cudaError_t CUDAAPI tcudaGetFuncBySymbol(cudaFunction_t * functionPtr, const void * symbolPtr);
typedef cudaError_t CUDAAPI tcudaGetKernel(cudaKernel_t * kernelPtr, const void * entryFuncAddr);
typedef cudaError_t CUDAAPI tcudaGetLastError();
typedef cudaError_t CUDAAPI tcudaGetMipmappedArrayLevel(cudaArray_t * levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level);
typedef cudaError_t CUDAAPI tcudaGetSurfaceObjectResourceDesc(struct cudaResourceDesc * pResDesc, cudaSurfaceObject_t surfObject);
typedef cudaError_t CUDAAPI tcudaGetSymbolAddress(void ** devPtr, const void * symbol);
typedef cudaError_t CUDAAPI tcudaGetSymbolSize(size_t * size, const void * symbol);
typedef cudaError_t CUDAAPI tcudaGetTextureObjectResourceDesc(struct cudaResourceDesc * pResDesc, cudaTextureObject_t texObject);
typedef cudaError_t CUDAAPI tcudaGetTextureObjectResourceViewDesc(struct cudaResourceViewDesc * pResViewDesc, cudaTextureObject_t texObject);
typedef cudaError_t CUDAAPI tcudaGetTextureObjectTextureDesc(struct cudaTextureDesc * pTexDesc, cudaTextureObject_t texObject);
typedef cudaError_t CUDAAPI tcudaGraphAddChildGraphNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, cudaGraph_t childGraph);
typedef cudaError_t CUDAAPI tcudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t * from, const cudaGraphNode_t * to, size_t numDependencies);
typedef cudaError_t CUDAAPI tcudaGraphAddEmptyNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies);
typedef cudaError_t CUDAAPI tcudaGraphAddEventRecordNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaGraphAddEventWaitNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaGraphAddExternalSemaphoresSignalNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, const struct cudaExternalSemaphoreSignalNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphAddExternalSemaphoresWaitNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, const struct cudaExternalSemaphoreWaitNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphAddHostNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, const struct cudaHostNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphAddKernelNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, const struct cudaKernelNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphAddMemAllocNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, struct cudaMemAllocNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphAddMemFreeNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, void * dptr);
typedef cudaError_t CUDAAPI tcudaGraphAddMemcpyNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, const struct cudaMemcpy3DParms * pCopyParams);
typedef cudaError_t CUDAAPI tcudaGraphAddMemcpyNode1D(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, void * dst, const void * src, size_t count, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphAddMemcpyNodeFromSymbol(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, void * dst, const void * symbol, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphAddMemcpyNodeToSymbol(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, const void * symbol, const void * src, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphAddMemsetNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, const struct cudaMemsetParams * pMemsetParams);
typedef cudaError_t CUDAAPI tcudaGraphAddNode(cudaGraphNode_t * pGraphNode, cudaGraph_t graph, const cudaGraphNode_t * pDependencies, size_t numDependencies, struct cudaGraphNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t * pGraph);
typedef cudaError_t CUDAAPI tcudaGraphClone(cudaGraph_t * pGraphClone, cudaGraph_t originalGraph);
typedef cudaError_t CUDAAPI tcudaGraphCreate(cudaGraph_t * pGraph, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaGraphDebugDotPrint(cudaGraph_t graph, const char * path, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaGraphDestroy(cudaGraph_t graph);
typedef cudaError_t CUDAAPI tcudaGraphDestroyNode(cudaGraphNode_t node);
typedef cudaError_t CUDAAPI tcudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t * event_out);
typedef cudaError_t CUDAAPI tcudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t * event_out);
typedef cudaError_t CUDAAPI tcudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph);
typedef cudaError_t CUDAAPI tcudaGraphExecDestroy(cudaGraphExec_t graphExec);
typedef cudaError_t CUDAAPI tcudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaGraphExecExternalSemaphoresSignalNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const struct cudaExternalSemaphoreSignalNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExecExternalSemaphoresWaitNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const struct cudaExternalSemaphoreWaitNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long * flags);
typedef cudaError_t CUDAAPI tcudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaHostNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaKernelNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemcpy3DParms * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void * dst, const void * src, size_t count, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void * dst, const void * symbol, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphExecMemcpyNodeSetParamsToSymbol(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void * symbol, const void * src, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemsetParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, struct cudaGraphNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphExecUpdateResultInfo * resultInfo);
typedef cudaError_t CUDAAPI tcudaGraphExternalSemaphoresSignalNodeGetParams(cudaGraphNode_t hNode, struct cudaExternalSemaphoreSignalNodeParams * params_out);
typedef cudaError_t CUDAAPI tcudaGraphExternalSemaphoresSignalNodeSetParams(cudaGraphNode_t hNode, const struct cudaExternalSemaphoreSignalNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphExternalSemaphoresWaitNodeGetParams(cudaGraphNode_t hNode, struct cudaExternalSemaphoreWaitNodeParams * params_out);
typedef cudaError_t CUDAAPI tcudaGraphExternalSemaphoresWaitNodeSetParams(cudaGraphNode_t hNode, const struct cudaExternalSemaphoreWaitNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t * from, cudaGraphNode_t * to, size_t * numEdges);
typedef cudaError_t CUDAAPI tcudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t * nodes, size_t * numNodes);
typedef cudaError_t CUDAAPI tcudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t * pRootNodes, size_t * pNumRootNodes);
typedef cudaError_t CUDAAPI tcudaGraphHostNodeGetParams(cudaGraphNode_t node, struct cudaHostNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphHostNodeSetParams(cudaGraphNode_t node, const struct cudaHostNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphInstantiate(cudaGraphExec_t * pGraphExec, cudaGraph_t graph, unsigned long long flags);
typedef cudaError_t CUDAAPI tcudaGraphInstantiateWithFlags(cudaGraphExec_t * pGraphExec, cudaGraph_t graph, unsigned long long flags);
typedef cudaError_t CUDAAPI tcudaGraphInstantiateWithParams(cudaGraphExec_t * pGraphExec, cudaGraph_t graph, cudaGraphInstantiateParams * instantiateParams);
typedef cudaError_t CUDAAPI tcudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst);
typedef cudaError_t CUDAAPI tcudaGraphKernelNodeGetAttribute(cudaGraphNode_t hNode, cudaLaunchAttributeID attr, cudaLaunchAttributeValue * value_out);
typedef cudaError_t CUDAAPI tcudaGraphKernelNodeGetParams(cudaGraphNode_t node, struct cudaKernelNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, cudaLaunchAttributeID attr, const cudaLaunchAttributeValue * value);
typedef cudaError_t CUDAAPI tcudaGraphKernelNodeSetParams(cudaGraphNode_t node, const struct cudaKernelNodeParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaGraphMemAllocNodeGetParams(cudaGraphNode_t node, struct cudaMemAllocNodeParams * params_out);
typedef cudaError_t CUDAAPI tcudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void * dptr_out);
typedef cudaError_t CUDAAPI tcudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, struct cudaMemcpy3DParms * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const struct cudaMemcpy3DParms * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void * dst, const void * src, size_t count, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphMemcpyNodeSetParamsFromSymbol(cudaGraphNode_t node, void * dst, const void * symbol, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphMemcpyNodeSetParamsToSymbol(cudaGraphNode_t node, const void * symbol, const void * src, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaGraphMemsetNodeGetParams(cudaGraphNode_t node, struct cudaMemsetParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const struct cudaMemsetParams * pNodeParams);
typedef cudaError_t CUDAAPI tcudaGraphNodeFindInClone(cudaGraphNode_t * pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph);
typedef cudaError_t CUDAAPI tcudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t * pDependencies, size_t * pNumDependencies);
typedef cudaError_t CUDAAPI tcudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t * pDependentNodes, size_t * pNumDependentNodes);
typedef cudaError_t CUDAAPI tcudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int * isEnabled);
typedef cudaError_t CUDAAPI tcudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType * pType);
typedef cudaError_t CUDAAPI tcudaGraphNodeSetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled);
typedef cudaError_t CUDAAPI tcudaGraphNodeSetParams(cudaGraphNode_t node, struct cudaGraphNodeParams * nodeParams);
typedef cudaError_t CUDAAPI tcudaGraphReleaseUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count);
typedef cudaError_t CUDAAPI tcudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t * from, const cudaGraphNode_t * to, size_t numDependencies);
typedef cudaError_t CUDAAPI tcudaGraphRetainUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaGraphicsMapResources(int count, cudaGraphicsResource_t * resources, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaGraphicsResourceGetMappedMipmappedArray(cudaMipmappedArray_t * mipmappedArray, cudaGraphicsResource_t resource);
typedef cudaError_t CUDAAPI tcudaGraphicsResourceGetMappedPointer(void ** devPtr, size_t * size, cudaGraphicsResource_t resource);
typedef cudaError_t CUDAAPI tcudaGraphicsResourceSetMapFlags(cudaGraphicsResource_t resource, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaGraphicsSubResourceGetMappedArray(cudaArray_t * array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel);
typedef cudaError_t CUDAAPI tcudaGraphicsUnmapResources(int count, cudaGraphicsResource_t * resources, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaGraphicsUnregisterResource(cudaGraphicsResource_t resource);
typedef cudaError_t CUDAAPI tcudaHostAlloc(void ** pHost, size_t size, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaHostGetDevicePointer(void ** pDevice, void * pHost, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaHostGetFlags(unsigned int * pFlags, void * pHost);
typedef cudaError_t CUDAAPI tcudaHostRegister(void * ptr, size_t size, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaHostUnregister(void * ptr);
typedef cudaError_t CUDAAPI tcudaImportExternalMemory(cudaExternalMemory_t * extMem_out, const struct cudaExternalMemoryHandleDesc * memHandleDesc);
typedef cudaError_t CUDAAPI tcudaImportExternalSemaphore(cudaExternalSemaphore_t * extSem_out, const struct cudaExternalSemaphoreHandleDesc * semHandleDesc);
typedef cudaError_t CUDAAPI tcudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaIpcCloseMemHandle(void * devPtr);
typedef cudaError_t CUDAAPI tcudaIpcGetEventHandle(cudaIpcEventHandle_t * handle, cudaEvent_t event);
typedef cudaError_t CUDAAPI tcudaIpcGetMemHandle(cudaIpcMemHandle_t * handle, void * devPtr);
typedef cudaError_t CUDAAPI tcudaIpcOpenEventHandle(cudaEvent_t * event, cudaIpcEventHandle_t handle);
typedef cudaError_t CUDAAPI tcudaIpcOpenMemHandle(void ** devPtr, cudaIpcMemHandle_t handle, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaLaunchCooperativeKernel(const void * func, dim3 gridDim, dim3 blockDim, void ** args, size_t sharedMem, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaLaunchCooperativeKernelMultiDevice(struct cudaLaunchParams * launchParamsList, unsigned int numDevices, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void * userData);
typedef cudaError_t CUDAAPI tcudaLaunchKernel(const void * func, dim3 gridDim, dim3 blockDim, void ** args, size_t sharedMem, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaLaunchKernelExC(const cudaLaunchConfig_t * config, const void * func, void ** args);
typedef cudaError_t CUDAAPI tcudaMalloc(void ** devPtr, size_t size);
typedef cudaError_t CUDAAPI tcudaMalloc3D(struct cudaPitchedPtr * pitchedDevPtr, struct cudaExtent extent);
typedef cudaError_t CUDAAPI tcudaMalloc3DArray(cudaArray_t * array, const struct cudaChannelFormatDesc * desc, struct cudaExtent extent, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaMallocArray(cudaArray_t * array, const struct cudaChannelFormatDesc * desc, size_t width, size_t height, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaMallocAsync(void ** devPtr, size_t size, cudaStream_t hStream);
typedef cudaError_t CUDAAPI tcudaMallocFromPoolAsync(void ** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMallocHost(void ** ptr, size_t size);
typedef cudaError_t CUDAAPI tcudaMallocManaged(void ** devPtr, size_t size, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaMallocMipmappedArray(cudaMipmappedArray_t * mipmappedArray, const struct cudaChannelFormatDesc * desc, struct cudaExtent extent, unsigned int numLevels, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaMallocPitch(void ** devPtr, size_t * pitch, size_t width, size_t height);
typedef cudaError_t CUDAAPI tcudaMemAdvise(const void * devPtr, size_t count, enum cudaMemoryAdvise advice, int device);
typedef cudaError_t CUDAAPI tcudaMemAdvise_v2(const void * devPtr, size_t count, enum cudaMemoryAdvise advice, struct cudaMemLocation location);
typedef cudaError_t CUDAAPI tcudaMemGetInfo(size_t * free, size_t * total);
typedef cudaError_t CUDAAPI tcudaMemPoolCreate(cudaMemPool_t * memPool, const struct cudaMemPoolProps * poolProps);
typedef cudaError_t CUDAAPI tcudaMemPoolDestroy(cudaMemPool_t memPool);
typedef cudaError_t CUDAAPI tcudaMemPoolExportPointer(struct cudaMemPoolPtrExportData * exportData, void * ptr);
typedef cudaError_t CUDAAPI tcudaMemPoolExportToShareableHandle(void * shareableHandle, cudaMemPool_t memPool, enum cudaMemAllocationHandleType handleType, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaMemPoolGetAccess(enum cudaMemAccessFlags * flags, cudaMemPool_t memPool, struct cudaMemLocation * location);
typedef cudaError_t CUDAAPI tcudaMemPoolGetAttribute(cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void * value);
typedef cudaError_t CUDAAPI tcudaMemPoolImportFromShareableHandle(cudaMemPool_t * memPool, void * shareableHandle, enum cudaMemAllocationHandleType handleType, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaMemPoolImportPointer(void ** ptr, cudaMemPool_t memPool, struct cudaMemPoolPtrExportData * exportData);
typedef cudaError_t CUDAAPI tcudaMemPoolSetAccess(cudaMemPool_t memPool, const struct cudaMemAccessDesc * descList, size_t count);
typedef cudaError_t CUDAAPI tcudaMemPoolSetAttribute(cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void * value);
typedef cudaError_t CUDAAPI tcudaMemPoolTrimTo(cudaMemPool_t memPool, size_t minBytesToKeep);
typedef cudaError_t CUDAAPI tcudaMemPrefetchAsync(const void * devPtr, size_t count, int dstDevice, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemPrefetchAsync_v2(const void * devPtr, size_t count, struct cudaMemLocation location, unsigned int flags, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemRangeGetAttribute(void * data, size_t dataSize, enum cudaMemRangeAttribute attribute, const void * devPtr, size_t count);
typedef cudaError_t CUDAAPI tcudaMemRangeGetAttributes(void ** data, size_t * dataSizes, enum cudaMemRangeAttribute * attributes, size_t numAttributes, const void * devPtr, size_t count);
typedef cudaError_t CUDAAPI tcudaMemcpy(void * dst, const void * src, size_t count, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpy2D(void * dst, size_t dpitch, const void * src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpy2DAsync(void * dst, size_t dpitch, const void * src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpy2DFromArray(void * dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpy2DFromArrayAsync(void * dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void * src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpy2DToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void * src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpy3D(const struct cudaMemcpy3DParms * p);
typedef cudaError_t CUDAAPI tcudaMemcpy3DAsync(const struct cudaMemcpy3DParms * p, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms * p);
typedef cudaError_t CUDAAPI tcudaMemcpy3DPeerAsync(const struct cudaMemcpy3DPeerParms * p, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpyArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpyAsync(void * dst, const void * src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpyFromArray(void * dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpyFromArrayAsync(void * dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpyFromSymbol(void * dst, const void * symbol, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpyFromSymbolAsync(void * dst, const void * symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpyPeer(void * dst, int dstDevice, const void * src, int srcDevice, size_t count);
typedef cudaError_t CUDAAPI tcudaMemcpyPeerAsync(void * dst, int dstDevice, const void * src, int srcDevice, size_t count, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void * src, size_t count, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpyToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void * src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemcpyToSymbol(const void * symbol, const void * src, size_t count, size_t offset, enum cudaMemcpyKind kind);
typedef cudaError_t CUDAAPI tcudaMemcpyToSymbolAsync(const void * symbol, const void * src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemset(void * devPtr, int value, size_t count);
typedef cudaError_t CUDAAPI tcudaMemset2D(void * devPtr, size_t pitch, int value, size_t width, size_t height);
typedef cudaError_t CUDAAPI tcudaMemset2DAsync(void * devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemset3D(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent);
typedef cudaError_t CUDAAPI tcudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMemsetAsync(void * devPtr, int value, size_t count, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaMipmappedArrayGetMemoryRequirements(struct cudaArrayMemoryRequirements * memoryRequirements, cudaMipmappedArray_t mipmap, int device);
typedef cudaError_t CUDAAPI tcudaMipmappedArrayGetSparseProperties(struct cudaArraySparseProperties * sparseProperties, cudaMipmappedArray_t mipmap);
typedef cudaError_t CUDAAPI tcudaOccupancyAvailableDynamicSMemPerBlock(size_t * dynamicSmemSize, const void * func, int numBlocks, int blockSize);
typedef cudaError_t CUDAAPI tcudaOccupancyMaxActiveBlocksPerMultiprocessor(int * numBlocks, const void * func, int blockSize, size_t dynamicSMemSize);
typedef cudaError_t CUDAAPI tcudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int * numBlocks, const void * func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaOccupancyMaxActiveClusters(int * numClusters, const void * func, const cudaLaunchConfig_t * launchConfig);
typedef cudaError_t CUDAAPI tcudaOccupancyMaxPotentialClusterSize(int * clusterSize, const void * func, const cudaLaunchConfig_t * launchConfig);
typedef cudaError_t CUDAAPI tcudaPeekAtLastError();
typedef cudaError_t CUDAAPI tcudaPointerGetAttributes(struct cudaPointerAttributes * attributes, const void * ptr);
typedef cudaError_t CUDAAPI tcudaProfilerStart();
typedef cudaError_t CUDAAPI tcudaProfilerStop();
typedef cudaError_t CUDAAPI tcudaRuntimeGetVersion(int * runtimeVersion);
typedef cudaError_t CUDAAPI tcudaSetDevice(int device);
typedef cudaError_t CUDAAPI tcudaSetDeviceFlags(unsigned int flags);
typedef cudaError_t CUDAAPI tcudaSetDoubleForDevice(double * d);
typedef cudaError_t CUDAAPI tcudaSetDoubleForHost(double * d);
typedef cudaError_t CUDAAPI tcudaSetValidDevices(int * device_arr, int len);
typedef cudaError_t CUDAAPI tcudaSignalExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t * extSemArray, const struct cudaExternalSemaphoreSignalParams * paramsArray, unsigned int numExtSems, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, void * userData, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaStreamAttachMemAsync(cudaStream_t stream, void * devPtr, size_t length, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode);
typedef cudaError_t CUDAAPI tcudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src);
typedef cudaError_t CUDAAPI tcudaStreamCreate(cudaStream_t * pStream);
typedef cudaError_t CUDAAPI tcudaStreamCreateWithFlags(cudaStream_t * pStream, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaStreamCreateWithPriority(cudaStream_t * pStream, unsigned int flags, int priority);
typedef cudaError_t CUDAAPI tcudaStreamDestroy(cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaStreamEndCapture(cudaStream_t stream, cudaGraph_t * pGraph);
typedef cudaError_t CUDAAPI tcudaStreamGetAttribute(cudaStream_t hStream, cudaLaunchAttributeID attr, cudaLaunchAttributeValue * value_out);
typedef cudaError_t CUDAAPI tcudaStreamGetCaptureInfo_v2(cudaStream_t stream, enum cudaStreamCaptureStatus * captureStatus_out, unsigned long long * id_out, cudaGraph_t * graph_out, const cudaGraphNode_t ** dependencies_out, size_t * numDependencies_out);
typedef cudaError_t CUDAAPI tcudaStreamGetFlags(cudaStream_t hStream, unsigned int * flags);
typedef cudaError_t CUDAAPI tcudaStreamGetId(cudaStream_t hStream, unsigned long long * streamId);
typedef cudaError_t CUDAAPI tcudaStreamGetPriority(cudaStream_t hStream, int * priority);
typedef cudaError_t CUDAAPI tcudaStreamIsCapturing(cudaStream_t stream, enum cudaStreamCaptureStatus * pCaptureStatus);
typedef cudaError_t CUDAAPI tcudaStreamQuery(cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaStreamSetAttribute(cudaStream_t hStream, cudaLaunchAttributeID attr, const cudaLaunchAttributeValue * value);
typedef cudaError_t CUDAAPI tcudaStreamSynchronize(cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaStreamUpdateCaptureDependencies(cudaStream_t stream, cudaGraphNode_t * dependencies, size_t numDependencies, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode * mode);
typedef cudaError_t CUDAAPI tcudaThreadExit();
typedef cudaError_t CUDAAPI tcudaThreadGetCacheConfig(enum cudaFuncCache * pCacheConfig);
typedef cudaError_t CUDAAPI tcudaThreadGetLimit(size_t * pValue, enum cudaLimit limit);
typedef cudaError_t CUDAAPI tcudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig);
typedef cudaError_t CUDAAPI tcudaThreadSetLimit(enum cudaLimit limit, size_t value);
typedef cudaError_t CUDAAPI tcudaThreadSynchronize();
typedef cudaError_t CUDAAPI tcudaUserObjectCreate(cudaUserObject_t * object_out, void * ptr, cudaHostFn_t destroy, unsigned int initialRefcount, unsigned int flags);
typedef cudaError_t CUDAAPI tcudaUserObjectRelease(cudaUserObject_t object, unsigned int count);
typedef cudaError_t CUDAAPI tcudaUserObjectRetain(cudaUserObject_t object, unsigned int count);
typedef cudaError_t CUDAAPI tcudaWaitExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t * extSemArray, const struct cudaExternalSemaphoreWaitParams * paramsArray, unsigned int numExtSems, cudaStream_t stream);
typedef cudaError_t CUDAAPI tcudaGraphicsGLRegisterImage( struct cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags );
typedef cudaError_t CUDAAPI tcudaGraphicsGLRegisterBuffer( struct cudaGraphicsResource** resource, GLuint buffer, unsigned int flags );
typedef nvrtcResult CUDAAPI tnvrtcAddNameExpression(nvrtcProgram prog, const char *const name_expression);
typedef nvrtcResult CUDAAPI tnvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char *const * options);
typedef nvrtcResult CUDAAPI tnvrtcCreateProgram(nvrtcProgram * prog, const char * src, const char * name, int numHeaders, const char *const * headers, const char *const * includeNames);
typedef nvrtcResult CUDAAPI tnvrtcDestroyProgram(nvrtcProgram * prog);
typedef nvrtcResult CUDAAPI tnvrtcGetCUBIN(nvrtcProgram prog, char * cubin);
typedef nvrtcResult CUDAAPI tnvrtcGetCUBINSize(nvrtcProgram prog, size_t * cubinSizeRet);
typedef const char * CUDAAPI tnvrtcGetErrorString(nvrtcResult result);
typedef nvrtcResult CUDAAPI tnvrtcGetLTOIR(nvrtcProgram prog, char * LTOIR);
typedef nvrtcResult CUDAAPI tnvrtcGetLTOIRSize(nvrtcProgram prog, size_t * LTOIRSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetLoweredName(nvrtcProgram prog, const char *const name_expression, const char ** lowered_name);
typedef nvrtcResult CUDAAPI tnvrtcGetNVVM(nvrtcProgram prog, char * nvvm);
typedef nvrtcResult CUDAAPI tnvrtcGetNVVMSize(nvrtcProgram prog, size_t * nvvmSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetNumSupportedArchs(int * numArchs);
typedef nvrtcResult CUDAAPI tnvrtcGetOptiXIR(nvrtcProgram prog, char * optixir);
typedef nvrtcResult CUDAAPI tnvrtcGetOptiXIRSize(nvrtcProgram prog, size_t * optixirSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetPTX(nvrtcProgram prog, char * ptx);
typedef nvrtcResult CUDAAPI tnvrtcGetPTXSize(nvrtcProgram prog, size_t * ptxSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetProgramLog(nvrtcProgram prog, char * log);
typedef nvrtcResult CUDAAPI tnvrtcGetProgramLogSize(nvrtcProgram prog, size_t * logSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetSupportedArchs(int * supportedArchs);
typedef nvrtcResult CUDAAPI tnvrtcVersion(int * major, int * minor);
extern tcuArray3DCreate_v2 *cuArray3DCreate_v2_oro;
extern tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2_oro;
extern tcuArrayCreate_v2 *cuArrayCreate_v2_oro;
extern tcuArrayDestroy *cuArrayDestroy_oro;
extern tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2_oro;
extern tcuArrayGetMemoryRequirements *cuArrayGetMemoryRequirements_oro;
extern tcuArrayGetPlane *cuArrayGetPlane_oro;
extern tcuArrayGetSparseProperties *cuArrayGetSparseProperties_oro;
extern tcuCoredumpGetAttribute *cuCoredumpGetAttribute_oro;
extern tcuCoredumpGetAttributeGlobal *cuCoredumpGetAttributeGlobal_oro;
extern tcuCoredumpSetAttribute *cuCoredumpSetAttribute_oro;
extern tcuCoredumpSetAttributeGlobal *cuCoredumpSetAttributeGlobal_oro;
extern tcuCtxAttach *cuCtxAttach_oro;
extern tcuCtxCreate_v2 *cuCtxCreate_v2_oro;
extern tcuCtxCreate_v3 *cuCtxCreate_v3_oro;
extern tcuCtxDestroy_v2 *cuCtxDestroy_v2_oro;
extern tcuCtxDetach *cuCtxDetach_oro;
extern tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess_oro;
extern tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess_oro;
extern tcuCtxGetApiVersion *cuCtxGetApiVersion_oro;
extern tcuCtxGetCacheConfig *cuCtxGetCacheConfig_oro;
extern tcuCtxGetCurrent *cuCtxGetCurrent_oro;
extern tcuCtxGetDevice *cuCtxGetDevice_oro;
extern tcuCtxGetExecAffinity *cuCtxGetExecAffinity_oro;
extern tcuCtxGetFlags *cuCtxGetFlags_oro;
extern tcuCtxGetId *cuCtxGetId_oro;
extern tcuCtxGetLimit *cuCtxGetLimit_oro;
extern tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig_oro;
extern tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange_oro;
extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2_oro;
extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2_oro;
extern tcuCtxResetPersistingL2Cache *cuCtxResetPersistingL2Cache_oro;
extern tcuCtxSetCacheConfig *cuCtxSetCacheConfig_oro;
extern tcuCtxSetCurrent *cuCtxSetCurrent_oro;
extern tcuCtxSetFlags *cuCtxSetFlags_oro;
extern tcuCtxSetLimit *cuCtxSetLimit_oro;
extern tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig_oro;
extern tcuCtxSynchronize *cuCtxSynchronize_oro;
extern tcuDestroyExternalMemory *cuDestroyExternalMemory_oro;
extern tcuDestroyExternalSemaphore *cuDestroyExternalSemaphore_oro;
extern tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer_oro;
extern tcuDeviceComputeCapability *cuDeviceComputeCapability_oro;
extern tcuDeviceGet *cuDeviceGet_oro;
extern tcuDeviceGetAttribute *cuDeviceGetAttribute_oro;
extern tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId_oro;
extern tcuDeviceGetCount *cuDeviceGetCount_oro;
extern tcuDeviceGetDefaultMemPool *cuDeviceGetDefaultMemPool_oro;
extern tcuDeviceGetExecAffinitySupport *cuDeviceGetExecAffinitySupport_oro;
extern tcuDeviceGetGraphMemAttribute *cuDeviceGetGraphMemAttribute_oro;
extern tcuDeviceGetLuid *cuDeviceGetLuid_oro;
extern tcuDeviceGetMemPool *cuDeviceGetMemPool_oro;
extern tcuDeviceGetName *cuDeviceGetName_oro;
extern tcuDeviceGetNvSciSyncAttributes *cuDeviceGetNvSciSyncAttributes_oro;
extern tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute_oro;
extern tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId_oro;
extern tcuDeviceGetProperties *cuDeviceGetProperties_oro;
extern tcuDeviceGetTexture1DLinearMaxWidth *cuDeviceGetTexture1DLinearMaxWidth_oro;
extern tcuDeviceGetUuid *cuDeviceGetUuid_oro;
extern tcuDeviceGetUuid_v2 *cuDeviceGetUuid_v2_oro;
extern tcuDeviceGraphMemTrim *cuDeviceGraphMemTrim_oro;
extern tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState_oro;
extern tcuDevicePrimaryCtxRelease_v2 *cuDevicePrimaryCtxRelease_v2_oro;
extern tcuDevicePrimaryCtxReset_v2 *cuDevicePrimaryCtxReset_v2_oro;
extern tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain_oro;
extern tcuDevicePrimaryCtxSetFlags_v2 *cuDevicePrimaryCtxSetFlags_v2_oro;
extern tcuDeviceSetGraphMemAttribute *cuDeviceSetGraphMemAttribute_oro;
extern tcuDeviceSetMemPool *cuDeviceSetMemPool_oro;
extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2_oro;
extern tcuDriverGetVersion *cuDriverGetVersion_oro;
extern tcuEventCreate *cuEventCreate_oro;
extern tcuEventDestroy_v2 *cuEventDestroy_v2_oro;
extern tcuEventElapsedTime *cuEventElapsedTime_oro;
extern tcuEventQuery *cuEventQuery_oro;
extern tcuEventRecord *cuEventRecord_oro;
extern tcuEventRecordWithFlags *cuEventRecordWithFlags_oro;
extern tcuEventSynchronize *cuEventSynchronize_oro;
extern tcuExternalMemoryGetMappedBuffer *cuExternalMemoryGetMappedBuffer_oro;
extern tcuExternalMemoryGetMappedMipmappedArray *cuExternalMemoryGetMappedMipmappedArray_oro;
extern tcuFlushGPUDirectRDMAWrites *cuFlushGPUDirectRDMAWrites_oro;
extern tcuFuncGetAttribute *cuFuncGetAttribute_oro;
extern tcuFuncGetModule *cuFuncGetModule_oro;
extern tcuFuncSetAttribute *cuFuncSetAttribute_oro;
extern tcuFuncSetBlockShape *cuFuncSetBlockShape_oro;
extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig_oro;
extern tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig_oro;
extern tcuFuncSetSharedSize *cuFuncSetSharedSize_oro;
extern tcuGetErrorName *cuGetErrorName_oro;
extern tcuGetErrorString *cuGetErrorString_oro;
extern tcuGetExportTable *cuGetExportTable_oro;
extern tcuGetProcAddress_v2 *cuGetProcAddress_v2_oro;
extern tcuGraphAddBatchMemOpNode *cuGraphAddBatchMemOpNode_oro;
extern tcuGraphAddChildGraphNode *cuGraphAddChildGraphNode_oro;
extern tcuGraphAddDependencies *cuGraphAddDependencies_oro;
extern tcuGraphAddEmptyNode *cuGraphAddEmptyNode_oro;
extern tcuGraphAddEventRecordNode *cuGraphAddEventRecordNode_oro;
extern tcuGraphAddEventWaitNode *cuGraphAddEventWaitNode_oro;
extern tcuGraphAddExternalSemaphoresSignalNode *cuGraphAddExternalSemaphoresSignalNode_oro;
extern tcuGraphAddExternalSemaphoresWaitNode *cuGraphAddExternalSemaphoresWaitNode_oro;
extern tcuGraphAddHostNode *cuGraphAddHostNode_oro;
extern tcuGraphAddKernelNode_v2 *cuGraphAddKernelNode_v2_oro;
extern tcuGraphAddMemAllocNode *cuGraphAddMemAllocNode_oro;
extern tcuGraphAddMemFreeNode *cuGraphAddMemFreeNode_oro;
extern tcuGraphAddMemcpyNode *cuGraphAddMemcpyNode_oro;
extern tcuGraphAddMemsetNode *cuGraphAddMemsetNode_oro;
extern tcuGraphAddNode *cuGraphAddNode_oro;
extern tcuGraphBatchMemOpNodeGetParams *cuGraphBatchMemOpNodeGetParams_oro;
extern tcuGraphBatchMemOpNodeSetParams *cuGraphBatchMemOpNodeSetParams_oro;
extern tcuGraphChildGraphNodeGetGraph *cuGraphChildGraphNodeGetGraph_oro;
extern tcuGraphClone *cuGraphClone_oro;
extern tcuGraphCreate *cuGraphCreate_oro;
extern tcuGraphDebugDotPrint *cuGraphDebugDotPrint_oro;
extern tcuGraphDestroy *cuGraphDestroy_oro;
extern tcuGraphDestroyNode *cuGraphDestroyNode_oro;
extern tcuGraphEventRecordNodeGetEvent *cuGraphEventRecordNodeGetEvent_oro;
extern tcuGraphEventRecordNodeSetEvent *cuGraphEventRecordNodeSetEvent_oro;
extern tcuGraphEventWaitNodeGetEvent *cuGraphEventWaitNodeGetEvent_oro;
extern tcuGraphEventWaitNodeSetEvent *cuGraphEventWaitNodeSetEvent_oro;
extern tcuGraphExecBatchMemOpNodeSetParams *cuGraphExecBatchMemOpNodeSetParams_oro;
extern tcuGraphExecChildGraphNodeSetParams *cuGraphExecChildGraphNodeSetParams_oro;
extern tcuGraphExecDestroy *cuGraphExecDestroy_oro;
extern tcuGraphExecEventRecordNodeSetEvent *cuGraphExecEventRecordNodeSetEvent_oro;
extern tcuGraphExecEventWaitNodeSetEvent *cuGraphExecEventWaitNodeSetEvent_oro;
extern tcuGraphExecExternalSemaphoresSignalNodeSetParams *cuGraphExecExternalSemaphoresSignalNodeSetParams_oro;
extern tcuGraphExecExternalSemaphoresWaitNodeSetParams *cuGraphExecExternalSemaphoresWaitNodeSetParams_oro;
extern tcuGraphExecGetFlags *cuGraphExecGetFlags_oro;
extern tcuGraphExecHostNodeSetParams *cuGraphExecHostNodeSetParams_oro;
extern tcuGraphExecKernelNodeSetParams_v2 *cuGraphExecKernelNodeSetParams_v2_oro;
extern tcuGraphExecMemcpyNodeSetParams *cuGraphExecMemcpyNodeSetParams_oro;
extern tcuGraphExecMemsetNodeSetParams *cuGraphExecMemsetNodeSetParams_oro;
extern tcuGraphExecNodeSetParams *cuGraphExecNodeSetParams_oro;
extern tcuGraphExecUpdate_v2 *cuGraphExecUpdate_v2_oro;
extern tcuGraphExternalSemaphoresSignalNodeGetParams *cuGraphExternalSemaphoresSignalNodeGetParams_oro;
extern tcuGraphExternalSemaphoresSignalNodeSetParams *cuGraphExternalSemaphoresSignalNodeSetParams_oro;
extern tcuGraphExternalSemaphoresWaitNodeGetParams *cuGraphExternalSemaphoresWaitNodeGetParams_oro;
extern tcuGraphExternalSemaphoresWaitNodeSetParams *cuGraphExternalSemaphoresWaitNodeSetParams_oro;
extern tcuGraphGetEdges *cuGraphGetEdges_oro;
extern tcuGraphGetNodes *cuGraphGetNodes_oro;
extern tcuGraphGetRootNodes *cuGraphGetRootNodes_oro;
extern tcuGraphHostNodeGetParams *cuGraphHostNodeGetParams_oro;
extern tcuGraphHostNodeSetParams *cuGraphHostNodeSetParams_oro;
extern tcuGraphInstantiateWithFlags *cuGraphInstantiateWithFlags_oro;
extern tcuGraphInstantiateWithParams *cuGraphInstantiateWithParams_oro;
extern tcuGraphKernelNodeCopyAttributes *cuGraphKernelNodeCopyAttributes_oro;
extern tcuGraphKernelNodeGetAttribute *cuGraphKernelNodeGetAttribute_oro;
extern tcuGraphKernelNodeGetParams_v2 *cuGraphKernelNodeGetParams_v2_oro;
extern tcuGraphKernelNodeSetAttribute *cuGraphKernelNodeSetAttribute_oro;
extern tcuGraphKernelNodeSetParams_v2 *cuGraphKernelNodeSetParams_v2_oro;
extern tcuGraphLaunch *cuGraphLaunch_oro;
extern tcuGraphMemAllocNodeGetParams *cuGraphMemAllocNodeGetParams_oro;
extern tcuGraphMemFreeNodeGetParams *cuGraphMemFreeNodeGetParams_oro;
extern tcuGraphMemcpyNodeGetParams *cuGraphMemcpyNodeGetParams_oro;
extern tcuGraphMemcpyNodeSetParams *cuGraphMemcpyNodeSetParams_oro;
extern tcuGraphMemsetNodeGetParams *cuGraphMemsetNodeGetParams_oro;
extern tcuGraphMemsetNodeSetParams *cuGraphMemsetNodeSetParams_oro;
extern tcuGraphNodeFindInClone *cuGraphNodeFindInClone_oro;
extern tcuGraphNodeGetDependencies *cuGraphNodeGetDependencies_oro;
extern tcuGraphNodeGetDependentNodes *cuGraphNodeGetDependentNodes_oro;
extern tcuGraphNodeGetEnabled *cuGraphNodeGetEnabled_oro;
extern tcuGraphNodeGetType *cuGraphNodeGetType_oro;
extern tcuGraphNodeSetEnabled *cuGraphNodeSetEnabled_oro;
extern tcuGraphNodeSetParams *cuGraphNodeSetParams_oro;
extern tcuGraphReleaseUserObject *cuGraphReleaseUserObject_oro;
extern tcuGraphRemoveDependencies *cuGraphRemoveDependencies_oro;
extern tcuGraphRetainUserObject *cuGraphRetainUserObject_oro;
extern tcuGraphUpload *cuGraphUpload_oro;
extern tcuGraphicsMapResources *cuGraphicsMapResources_oro;
extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray_oro;
extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2_oro;
extern tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2_oro;
extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray_oro;
extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources_oro;
extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource_oro;
extern tcuImportExternalMemory *cuImportExternalMemory_oro;
extern tcuImportExternalSemaphore *cuImportExternalSemaphore_oro;
extern tcuInit *cuInit_oro;
extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle_oro;
extern tcuIpcGetEventHandle *cuIpcGetEventHandle_oro;
extern tcuIpcGetMemHandle *cuIpcGetMemHandle_oro;
extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle_oro;
extern tcuIpcOpenMemHandle_v2 *cuIpcOpenMemHandle_v2_oro;
extern tcuKernelGetAttribute *cuKernelGetAttribute_oro;
extern tcuKernelGetFunction *cuKernelGetFunction_oro;
extern tcuKernelSetAttribute *cuKernelSetAttribute_oro;
extern tcuKernelSetCacheConfig *cuKernelSetCacheConfig_oro;
extern tcuLaunch *cuLaunch_oro;
extern tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel_oro;
extern tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice_oro;
extern tcuLaunchGrid *cuLaunchGrid_oro;
extern tcuLaunchGridAsync *cuLaunchGridAsync_oro;
extern tcuLaunchHostFunc *cuLaunchHostFunc_oro;
extern tcuLaunchKernel *cuLaunchKernel_oro;
extern tcuLaunchKernelEx *cuLaunchKernelEx_oro;
extern tcuLibraryGetGlobal *cuLibraryGetGlobal_oro;
extern tcuLibraryGetKernel *cuLibraryGetKernel_oro;
extern tcuLibraryGetManaged *cuLibraryGetManaged_oro;
extern tcuLibraryGetModule *cuLibraryGetModule_oro;
extern tcuLibraryGetUnifiedFunction *cuLibraryGetUnifiedFunction_oro;
extern tcuLibraryLoadData *cuLibraryLoadData_oro;
extern tcuLibraryLoadFromFile *cuLibraryLoadFromFile_oro;
extern tcuLibraryUnload *cuLibraryUnload_oro;
extern tcuLinkAddData_v2 *cuLinkAddData_v2_oro;
extern tcuLinkAddFile_v2 *cuLinkAddFile_v2_oro;
extern tcuLinkComplete *cuLinkComplete_oro;
extern tcuLinkCreate_v2 *cuLinkCreate_v2_oro;
extern tcuLinkDestroy *cuLinkDestroy_oro;
extern tcuMemAddressFree *cuMemAddressFree_oro;
extern tcuMemAddressReserve *cuMemAddressReserve_oro;
extern tcuMemAdvise *cuMemAdvise_oro;
extern tcuMemAdvise_v2 *cuMemAdvise_v2_oro;
extern tcuMemAllocAsync *cuMemAllocAsync_oro;