ToolDAQFramework
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
helper_cuda.h
Go to the documentation of this file.
1 
12 // These are CUDA Helper functions for initialization and error checking
14 
15 #ifndef HELPER_CUDA_H
16 #define HELPER_CUDA_H
17 
18 #pragma once
19 
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 
24 #include <helper_string.h>
25 
26 #ifndef EXIT_WAIVED
27 #define EXIT_WAIVED 2
28 #endif
29 
30 // Note, it is required that your SDK sample to include the proper header files, please
31 // refer the CUDA examples for examples of the needed CUDA headers, which may change depending
32 // on which CUDA functions are used.
33 
34 // CUDA Runtime error messages
35 #ifdef __DRIVER_TYPES_H__
36 static const char *_cudaGetErrorEnum(cudaError_t error)
37 {
38  switch (error)
39  {
40  case cudaSuccess:
41  return "cudaSuccess";
42 
43  case cudaErrorMissingConfiguration:
44  return "cudaErrorMissingConfiguration";
45 
46  case cudaErrorMemoryAllocation:
47  return "cudaErrorMemoryAllocation";
48 
49  case cudaErrorInitializationError:
50  return "cudaErrorInitializationError";
51 
52  case cudaErrorLaunchFailure:
53  return "cudaErrorLaunchFailure";
54 
55  case cudaErrorPriorLaunchFailure:
56  return "cudaErrorPriorLaunchFailure";
57 
58  case cudaErrorLaunchTimeout:
59  return "cudaErrorLaunchTimeout";
60 
61  case cudaErrorLaunchOutOfResources:
62  return "cudaErrorLaunchOutOfResources";
63 
64  case cudaErrorInvalidDeviceFunction:
65  return "cudaErrorInvalidDeviceFunction";
66 
67  case cudaErrorInvalidConfiguration:
68  return "cudaErrorInvalidConfiguration";
69 
70  case cudaErrorInvalidDevice:
71  return "cudaErrorInvalidDevice";
72 
73  case cudaErrorInvalidValue:
74  return "cudaErrorInvalidValue";
75 
76  case cudaErrorInvalidPitchValue:
77  return "cudaErrorInvalidPitchValue";
78 
79  case cudaErrorInvalidSymbol:
80  return "cudaErrorInvalidSymbol";
81 
82  case cudaErrorMapBufferObjectFailed:
83  return "cudaErrorMapBufferObjectFailed";
84 
85  case cudaErrorUnmapBufferObjectFailed:
86  return "cudaErrorUnmapBufferObjectFailed";
87 
88  case cudaErrorInvalidHostPointer:
89  return "cudaErrorInvalidHostPointer";
90 
91  case cudaErrorInvalidDevicePointer:
92  return "cudaErrorInvalidDevicePointer";
93 
94  case cudaErrorInvalidTexture:
95  return "cudaErrorInvalidTexture";
96 
97  case cudaErrorInvalidTextureBinding:
98  return "cudaErrorInvalidTextureBinding";
99 
100  case cudaErrorInvalidChannelDescriptor:
101  return "cudaErrorInvalidChannelDescriptor";
102 
103  case cudaErrorInvalidMemcpyDirection:
104  return "cudaErrorInvalidMemcpyDirection";
105 
106  case cudaErrorAddressOfConstant:
107  return "cudaErrorAddressOfConstant";
108 
109  case cudaErrorTextureFetchFailed:
110  return "cudaErrorTextureFetchFailed";
111 
112  case cudaErrorTextureNotBound:
113  return "cudaErrorTextureNotBound";
114 
115  case cudaErrorSynchronizationError:
116  return "cudaErrorSynchronizationError";
117 
118  case cudaErrorInvalidFilterSetting:
119  return "cudaErrorInvalidFilterSetting";
120 
121  case cudaErrorInvalidNormSetting:
122  return "cudaErrorInvalidNormSetting";
123 
124  case cudaErrorMixedDeviceExecution:
125  return "cudaErrorMixedDeviceExecution";
126 
127  case cudaErrorCudartUnloading:
128  return "cudaErrorCudartUnloading";
129 
130  case cudaErrorUnknown:
131  return "cudaErrorUnknown";
132 
133  case cudaErrorNotYetImplemented:
134  return "cudaErrorNotYetImplemented";
135 
136  case cudaErrorMemoryValueTooLarge:
137  return "cudaErrorMemoryValueTooLarge";
138 
139  case cudaErrorInvalidResourceHandle:
140  return "cudaErrorInvalidResourceHandle";
141 
142  case cudaErrorNotReady:
143  return "cudaErrorNotReady";
144 
145  case cudaErrorInsufficientDriver:
146  return "cudaErrorInsufficientDriver";
147 
148  case cudaErrorSetOnActiveProcess:
149  return "cudaErrorSetOnActiveProcess";
150 
151  case cudaErrorInvalidSurface:
152  return "cudaErrorInvalidSurface";
153 
154  case cudaErrorNoDevice:
155  return "cudaErrorNoDevice";
156 
157  case cudaErrorECCUncorrectable:
158  return "cudaErrorECCUncorrectable";
159 
160  case cudaErrorSharedObjectSymbolNotFound:
161  return "cudaErrorSharedObjectSymbolNotFound";
162 
163  case cudaErrorSharedObjectInitFailed:
164  return "cudaErrorSharedObjectInitFailed";
165 
166  case cudaErrorUnsupportedLimit:
167  return "cudaErrorUnsupportedLimit";
168 
169  case cudaErrorDuplicateVariableName:
170  return "cudaErrorDuplicateVariableName";
171 
172  case cudaErrorDuplicateTextureName:
173  return "cudaErrorDuplicateTextureName";
174 
175  case cudaErrorDuplicateSurfaceName:
176  return "cudaErrorDuplicateSurfaceName";
177 
178  case cudaErrorDevicesUnavailable:
179  return "cudaErrorDevicesUnavailable";
180 
181  case cudaErrorInvalidKernelImage:
182  return "cudaErrorInvalidKernelImage";
183 
184  case cudaErrorNoKernelImageForDevice:
185  return "cudaErrorNoKernelImageForDevice";
186 
187  case cudaErrorIncompatibleDriverContext:
188  return "cudaErrorIncompatibleDriverContext";
189 
190  case cudaErrorPeerAccessAlreadyEnabled:
191  return "cudaErrorPeerAccessAlreadyEnabled";
192 
193  case cudaErrorPeerAccessNotEnabled:
194  return "cudaErrorPeerAccessNotEnabled";
195 
196  case cudaErrorDeviceAlreadyInUse:
197  return "cudaErrorDeviceAlreadyInUse";
198 
199  case cudaErrorProfilerDisabled:
200  return "cudaErrorProfilerDisabled";
201 
202  case cudaErrorProfilerNotInitialized:
203  return "cudaErrorProfilerNotInitialized";
204 
205  case cudaErrorProfilerAlreadyStarted:
206  return "cudaErrorProfilerAlreadyStarted";
207 
208  case cudaErrorProfilerAlreadyStopped:
209  return "cudaErrorProfilerAlreadyStopped";
210 
211  /* Since CUDA 4.0*/
212  case cudaErrorAssert:
213  return "cudaErrorAssert";
214 
215  case cudaErrorTooManyPeers:
216  return "cudaErrorTooManyPeers";
217 
218  case cudaErrorHostMemoryAlreadyRegistered:
219  return "cudaErrorHostMemoryAlreadyRegistered";
220 
221  case cudaErrorHostMemoryNotRegistered:
222  return "cudaErrorHostMemoryNotRegistered";
223 
224  /* Since CUDA 5.0 */
225  case cudaErrorOperatingSystem:
226  return "cudaErrorOperatingSystem";
227 
228  case cudaErrorPeerAccessUnsupported:
229  return "cudaErrorPeerAccessUnsupported";
230 
231  case cudaErrorLaunchMaxDepthExceeded:
232  return "cudaErrorLaunchMaxDepthExceeded";
233 
234  case cudaErrorLaunchFileScopedTex:
235  return "cudaErrorLaunchFileScopedTex";
236 
237  case cudaErrorLaunchFileScopedSurf:
238  return "cudaErrorLaunchFileScopedSurf";
239 
240  case cudaErrorSyncDepthExceeded:
241  return "cudaErrorSyncDepthExceeded";
242 
243  case cudaErrorLaunchPendingCountExceeded:
244  return "cudaErrorLaunchPendingCountExceeded";
245 
246  case cudaErrorNotPermitted:
247  return "cudaErrorNotPermitted";
248 
249  case cudaErrorNotSupported:
250  return "cudaErrorNotSupported";
251 
252  /* Since CUDA 6.0 */
253  case cudaErrorHardwareStackError:
254  return "cudaErrorHardwareStackError";
255 
256  case cudaErrorIllegalInstruction:
257  return "cudaErrorIllegalInstruction";
258 
259  case cudaErrorMisalignedAddress:
260  return "cudaErrorMisalignedAddress";
261 
262  case cudaErrorInvalidAddressSpace:
263  return "cudaErrorInvalidAddressSpace";
264 
265  case cudaErrorInvalidPc:
266  return "cudaErrorInvalidPc";
267 
268  case cudaErrorIllegalAddress:
269  return "cudaErrorIllegalAddress";
270 
271  /* Since CUDA 6.5*/
272  case cudaErrorInvalidPtx:
273  return "cudaErrorInvalidPtx";
274 
275  case cudaErrorInvalidGraphicsContext:
276  return "cudaErrorInvalidGraphicsContext";
277 
278  case cudaErrorStartupFailure:
279  return "cudaErrorStartupFailure";
280 
281  case cudaErrorApiFailureBase:
282  return "cudaErrorApiFailureBase";
283  }
284 
285  return "<unknown>";
286 }
287 #endif
288 
289 #ifdef __cuda_cuda_h__
290 // CUDA Driver API errors
291 static const char *_cudaGetErrorEnum(CUresult error)
292 {
293  switch (error)
294  {
295  case CUDA_SUCCESS:
296  return "CUDA_SUCCESS";
297 
298  case CUDA_ERROR_INVALID_VALUE:
299  return "CUDA_ERROR_INVALID_VALUE";
300 
301  case CUDA_ERROR_OUT_OF_MEMORY:
302  return "CUDA_ERROR_OUT_OF_MEMORY";
303 
304  case CUDA_ERROR_NOT_INITIALIZED:
305  return "CUDA_ERROR_NOT_INITIALIZED";
306 
307  case CUDA_ERROR_DEINITIALIZED:
308  return "CUDA_ERROR_DEINITIALIZED";
309 
310  case CUDA_ERROR_PROFILER_DISABLED:
311  return "CUDA_ERROR_PROFILER_DISABLED";
312 
313  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
314  return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
315 
316  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
317  return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
318 
319  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
320  return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
321 
322  case CUDA_ERROR_NO_DEVICE:
323  return "CUDA_ERROR_NO_DEVICE";
324 
325  case CUDA_ERROR_INVALID_DEVICE:
326  return "CUDA_ERROR_INVALID_DEVICE";
327 
328  case CUDA_ERROR_INVALID_IMAGE:
329  return "CUDA_ERROR_INVALID_IMAGE";
330 
331  case CUDA_ERROR_INVALID_CONTEXT:
332  return "CUDA_ERROR_INVALID_CONTEXT";
333 
334  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
335  return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
336 
337  case CUDA_ERROR_MAP_FAILED:
338  return "CUDA_ERROR_MAP_FAILED";
339 
340  case CUDA_ERROR_UNMAP_FAILED:
341  return "CUDA_ERROR_UNMAP_FAILED";
342 
343  case CUDA_ERROR_ARRAY_IS_MAPPED:
344  return "CUDA_ERROR_ARRAY_IS_MAPPED";
345 
346  case CUDA_ERROR_ALREADY_MAPPED:
347  return "CUDA_ERROR_ALREADY_MAPPED";
348 
349  case CUDA_ERROR_NO_BINARY_FOR_GPU:
350  return "CUDA_ERROR_NO_BINARY_FOR_GPU";
351 
352  case CUDA_ERROR_ALREADY_ACQUIRED:
353  return "CUDA_ERROR_ALREADY_ACQUIRED";
354 
355  case CUDA_ERROR_NOT_MAPPED:
356  return "CUDA_ERROR_NOT_MAPPED";
357 
358  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
359  return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
360 
361  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
362  return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
363 
364  case CUDA_ERROR_ECC_UNCORRECTABLE:
365  return "CUDA_ERROR_ECC_UNCORRECTABLE";
366 
367  case CUDA_ERROR_UNSUPPORTED_LIMIT:
368  return "CUDA_ERROR_UNSUPPORTED_LIMIT";
369 
370  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
371  return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
372 
373  case CUDA_ERROR_INVALID_SOURCE:
374  return "CUDA_ERROR_INVALID_SOURCE";
375 
376  case CUDA_ERROR_FILE_NOT_FOUND:
377  return "CUDA_ERROR_FILE_NOT_FOUND";
378 
379  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
380  return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
381 
382  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
383  return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
384 
385  case CUDA_ERROR_OPERATING_SYSTEM:
386  return "CUDA_ERROR_OPERATING_SYSTEM";
387 
388  case CUDA_ERROR_INVALID_HANDLE:
389  return "CUDA_ERROR_INVALID_HANDLE";
390 
391  case CUDA_ERROR_NOT_FOUND:
392  return "CUDA_ERROR_NOT_FOUND";
393 
394  case CUDA_ERROR_NOT_READY:
395  return "CUDA_ERROR_NOT_READY";
396 
397  case CUDA_ERROR_LAUNCH_FAILED:
398  return "CUDA_ERROR_LAUNCH_FAILED";
399 
400  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
401  return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
402 
403  case CUDA_ERROR_LAUNCH_TIMEOUT:
404  return "CUDA_ERROR_LAUNCH_TIMEOUT";
405 
406  case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
407  return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
408 
409  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
410  return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
411 
412  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
413  return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
414 
415  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
416  return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
417 
418  case CUDA_ERROR_CONTEXT_IS_DESTROYED:
419  return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
420 
421  case CUDA_ERROR_ASSERT:
422  return "CUDA_ERROR_ASSERT";
423 
424  case CUDA_ERROR_TOO_MANY_PEERS:
425  return "CUDA_ERROR_TOO_MANY_PEERS";
426 
427  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
428  return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
429 
430  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
431  return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
432 
433  case CUDA_ERROR_UNKNOWN:
434  return "CUDA_ERROR_UNKNOWN";
435  }
436 
437  return "<unknown>";
438 }
439 #endif
440 
441 #ifdef CUBLAS_API_H_
442 // cuBLAS API errors
443 static const char *_cudaGetErrorEnum(cublasStatus_t error)
444 {
445  switch (error)
446  {
447  case CUBLAS_STATUS_SUCCESS:
448  return "CUBLAS_STATUS_SUCCESS";
449 
450  case CUBLAS_STATUS_NOT_INITIALIZED:
451  return "CUBLAS_STATUS_NOT_INITIALIZED";
452 
453  case CUBLAS_STATUS_ALLOC_FAILED:
454  return "CUBLAS_STATUS_ALLOC_FAILED";
455 
456  case CUBLAS_STATUS_INVALID_VALUE:
457  return "CUBLAS_STATUS_INVALID_VALUE";
458 
459  case CUBLAS_STATUS_ARCH_MISMATCH:
460  return "CUBLAS_STATUS_ARCH_MISMATCH";
461 
462  case CUBLAS_STATUS_MAPPING_ERROR:
463  return "CUBLAS_STATUS_MAPPING_ERROR";
464 
465  case CUBLAS_STATUS_EXECUTION_FAILED:
466  return "CUBLAS_STATUS_EXECUTION_FAILED";
467 
468  case CUBLAS_STATUS_INTERNAL_ERROR:
469  return "CUBLAS_STATUS_INTERNAL_ERROR";
470  }
471 
472  return "<unknown>";
473 }
474 #endif
475 
476 #ifdef _CUFFT_H_
477 // cuFFT API errors
478 static const char *_cudaGetErrorEnum(cufftResult error)
479 {
480  switch (error)
481  {
482  case CUFFT_SUCCESS:
483  return "CUFFT_SUCCESS";
484 
485  case CUFFT_INVALID_PLAN:
486  return "CUFFT_INVALID_PLAN";
487 
488  case CUFFT_ALLOC_FAILED:
489  return "CUFFT_ALLOC_FAILED";
490 
491  case CUFFT_INVALID_TYPE:
492  return "CUFFT_INVALID_TYPE";
493 
494  case CUFFT_INVALID_VALUE:
495  return "CUFFT_INVALID_VALUE";
496 
497  case CUFFT_INTERNAL_ERROR:
498  return "CUFFT_INTERNAL_ERROR";
499 
500  case CUFFT_EXEC_FAILED:
501  return "CUFFT_EXEC_FAILED";
502 
503  case CUFFT_SETUP_FAILED:
504  return "CUFFT_SETUP_FAILED";
505 
506  case CUFFT_INVALID_SIZE:
507  return "CUFFT_INVALID_SIZE";
508 
509  case CUFFT_UNALIGNED_DATA:
510  return "CUFFT_UNALIGNED_DATA";
511 
512  case CUFFT_INCOMPLETE_PARAMETER_LIST:
513  return "CUFFT_INCOMPLETE_PARAMETER_LIST";
514 
515  case CUFFT_INVALID_DEVICE:
516  return "CUFFT_INVALID_DEVICE";
517 
518  case CUFFT_PARSE_ERROR:
519  return "CUFFT_PARSE_ERROR";
520 
521  case CUFFT_NO_WORKSPACE:
522  return "CUFFT_NO_WORKSPACE";
523 
524  case CUFFT_NOT_IMPLEMENTED:
525  return "CUFFT_NOT_IMPLEMENTED";
526 
527  case CUFFT_LICENSE_ERROR:
528  return "CUFFT_LICENSE_ERROR";
529  }
530 
531  return "<unknown>";
532 }
533 #endif
534 
535 
536 #ifdef CUSPARSEAPI
537 // cuSPARSE API errors
538 static const char *_cudaGetErrorEnum(cusparseStatus_t error)
539 {
540  switch (error)
541  {
542  case CUSPARSE_STATUS_SUCCESS:
543  return "CUSPARSE_STATUS_SUCCESS";
544 
545  case CUSPARSE_STATUS_NOT_INITIALIZED:
546  return "CUSPARSE_STATUS_NOT_INITIALIZED";
547 
548  case CUSPARSE_STATUS_ALLOC_FAILED:
549  return "CUSPARSE_STATUS_ALLOC_FAILED";
550 
551  case CUSPARSE_STATUS_INVALID_VALUE:
552  return "CUSPARSE_STATUS_INVALID_VALUE";
553 
554  case CUSPARSE_STATUS_ARCH_MISMATCH:
555  return "CUSPARSE_STATUS_ARCH_MISMATCH";
556 
557  case CUSPARSE_STATUS_MAPPING_ERROR:
558  return "CUSPARSE_STATUS_MAPPING_ERROR";
559 
560  case CUSPARSE_STATUS_EXECUTION_FAILED:
561  return "CUSPARSE_STATUS_EXECUTION_FAILED";
562 
563  case CUSPARSE_STATUS_INTERNAL_ERROR:
564  return "CUSPARSE_STATUS_INTERNAL_ERROR";
565 
566  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
567  return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
568  }
569 
570  return "<unknown>";
571 }
572 #endif
573 
574 #ifdef CURAND_H_
575 // cuRAND API errors
576 static const char *_cudaGetErrorEnum(curandStatus_t error)
577 {
578  switch (error)
579  {
580  case CURAND_STATUS_SUCCESS:
581  return "CURAND_STATUS_SUCCESS";
582 
583  case CURAND_STATUS_VERSION_MISMATCH:
584  return "CURAND_STATUS_VERSION_MISMATCH";
585 
586  case CURAND_STATUS_NOT_INITIALIZED:
587  return "CURAND_STATUS_NOT_INITIALIZED";
588 
589  case CURAND_STATUS_ALLOCATION_FAILED:
590  return "CURAND_STATUS_ALLOCATION_FAILED";
591 
592  case CURAND_STATUS_TYPE_ERROR:
593  return "CURAND_STATUS_TYPE_ERROR";
594 
595  case CURAND_STATUS_OUT_OF_RANGE:
596  return "CURAND_STATUS_OUT_OF_RANGE";
597 
598  case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
599  return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
600 
601  case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
602  return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
603 
604  case CURAND_STATUS_LAUNCH_FAILURE:
605  return "CURAND_STATUS_LAUNCH_FAILURE";
606 
607  case CURAND_STATUS_PREEXISTING_FAILURE:
608  return "CURAND_STATUS_PREEXISTING_FAILURE";
609 
610  case CURAND_STATUS_INITIALIZATION_FAILED:
611  return "CURAND_STATUS_INITIALIZATION_FAILED";
612 
613  case CURAND_STATUS_ARCH_MISMATCH:
614  return "CURAND_STATUS_ARCH_MISMATCH";
615 
616  case CURAND_STATUS_INTERNAL_ERROR:
617  return "CURAND_STATUS_INTERNAL_ERROR";
618  }
619 
620  return "<unknown>";
621 }
622 #endif
623 
624 #ifdef NV_NPPIDEFS_H
625 // NPP API errors
626 static const char *_cudaGetErrorEnum(NppStatus error)
627 {
628  switch (error)
629  {
630  case NPP_NOT_SUPPORTED_MODE_ERROR:
631  return "NPP_NOT_SUPPORTED_MODE_ERROR";
632 
633  case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
634  return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
635 
636  case NPP_RESIZE_NO_OPERATION_ERROR:
637  return "NPP_RESIZE_NO_OPERATION_ERROR";
638 
639  case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
640  return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
641 
642 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
643 
644  case NPP_BAD_ARG_ERROR:
645  return "NPP_BAD_ARGUMENT_ERROR";
646 
647  case NPP_COEFF_ERROR:
648  return "NPP_COEFFICIENT_ERROR";
649 
650  case NPP_RECT_ERROR:
651  return "NPP_RECTANGLE_ERROR";
652 
653  case NPP_QUAD_ERROR:
654  return "NPP_QUADRANGLE_ERROR";
655 
656  case NPP_MEM_ALLOC_ERR:
657  return "NPP_MEMORY_ALLOCATION_ERROR";
658 
659  case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
660  return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
661 
662  case NPP_INVALID_INPUT:
663  return "NPP_INVALID_INPUT";
664 
665  case NPP_POINTER_ERROR:
666  return "NPP_POINTER_ERROR";
667 
668  case NPP_WARNING:
669  return "NPP_WARNING";
670 
671  case NPP_ODD_ROI_WARNING:
672  return "NPP_ODD_ROI_WARNING";
673 #else
674 
675  // These are for CUDA 5.5 or higher
676  case NPP_BAD_ARGUMENT_ERROR:
677  return "NPP_BAD_ARGUMENT_ERROR";
678 
679  case NPP_COEFFICIENT_ERROR:
680  return "NPP_COEFFICIENT_ERROR";
681 
682  case NPP_RECTANGLE_ERROR:
683  return "NPP_RECTANGLE_ERROR";
684 
685  case NPP_QUADRANGLE_ERROR:
686  return "NPP_QUADRANGLE_ERROR";
687 
688  case NPP_MEMORY_ALLOCATION_ERR:
689  return "NPP_MEMORY_ALLOCATION_ERROR";
690 
691  case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
692  return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
693 
694  case NPP_INVALID_HOST_POINTER_ERROR:
695  return "NPP_INVALID_HOST_POINTER_ERROR";
696 
697  case NPP_INVALID_DEVICE_POINTER_ERROR:
698  return "NPP_INVALID_DEVICE_POINTER_ERROR";
699 #endif
700 
701  case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
702  return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
703 
704  case NPP_TEXTURE_BIND_ERROR:
705  return "NPP_TEXTURE_BIND_ERROR";
706 
707  case NPP_WRONG_INTERSECTION_ROI_ERROR:
708  return "NPP_WRONG_INTERSECTION_ROI_ERROR";
709 
710  case NPP_NOT_EVEN_STEP_ERROR:
711  return "NPP_NOT_EVEN_STEP_ERROR";
712 
713  case NPP_INTERPOLATION_ERROR:
714  return "NPP_INTERPOLATION_ERROR";
715 
716  case NPP_RESIZE_FACTOR_ERROR:
717  return "NPP_RESIZE_FACTOR_ERROR";
718 
719  case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
720  return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
721 
722 
723 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
724 
725  case NPP_MEMFREE_ERR:
726  return "NPP_MEMFREE_ERR";
727 
728  case NPP_MEMSET_ERR:
729  return "NPP_MEMSET_ERR";
730 
731  case NPP_MEMCPY_ERR:
732  return "NPP_MEMCPY_ERROR";
733 
734  case NPP_MIRROR_FLIP_ERR:
735  return "NPP_MIRROR_FLIP_ERR";
736 #else
737 
738  case NPP_MEMFREE_ERROR:
739  return "NPP_MEMFREE_ERROR";
740 
741  case NPP_MEMSET_ERROR:
742  return "NPP_MEMSET_ERROR";
743 
744  case NPP_MEMCPY_ERROR:
745  return "NPP_MEMCPY_ERROR";
746 
747  case NPP_MIRROR_FLIP_ERROR:
748  return "NPP_MIRROR_FLIP_ERROR";
749 #endif
750 
751  case NPP_ALIGNMENT_ERROR:
752  return "NPP_ALIGNMENT_ERROR";
753 
754  case NPP_STEP_ERROR:
755  return "NPP_STEP_ERROR";
756 
757  case NPP_SIZE_ERROR:
758  return "NPP_SIZE_ERROR";
759 
760  case NPP_NULL_POINTER_ERROR:
761  return "NPP_NULL_POINTER_ERROR";
762 
763  case NPP_CUDA_KERNEL_EXECUTION_ERROR:
764  return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
765 
766  case NPP_NOT_IMPLEMENTED_ERROR:
767  return "NPP_NOT_IMPLEMENTED_ERROR";
768 
769  case NPP_ERROR:
770  return "NPP_ERROR";
771 
772  case NPP_SUCCESS:
773  return "NPP_SUCCESS";
774 
775  case NPP_WRONG_INTERSECTION_QUAD_WARNING:
776  return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
777 
778  case NPP_MISALIGNED_DST_ROI_WARNING:
779  return "NPP_MISALIGNED_DST_ROI_WARNING";
780 
781  case NPP_AFFINE_QUAD_INCORRECT_WARNING:
782  return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
783 
784  case NPP_DOUBLE_SIZE_WARNING:
785  return "NPP_DOUBLE_SIZE_WARNING";
786 
787  case NPP_WRONG_INTERSECTION_ROI_WARNING:
788  return "NPP_WRONG_INTERSECTION_ROI_WARNING";
789 
790 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
791  /* These are 6.0 or higher */
792  case NPP_LUT_PALETTE_BITSIZE_ERROR:
793  return "NPP_LUT_PALETTE_BITSIZE_ERROR";
794 
795  case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
796  return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
797 
798  case NPP_QUALITY_INDEX_ERROR:
799  return "NPP_QUALITY_INDEX_ERROR";
800 
801  case NPP_CHANNEL_ORDER_ERROR:
802  return "NPP_CHANNEL_ORDER_ERROR";
803 
804  case NPP_ZERO_MASK_VALUE_ERROR:
805  return "NPP_ZERO_MASK_VALUE_ERROR";
806 
807  case NPP_NUMBER_OF_CHANNELS_ERROR:
808  return "NPP_NUMBER_OF_CHANNELS_ERROR";
809 
810  case NPP_COI_ERROR:
811  return "NPP_COI_ERROR";
812 
813  case NPP_DIVISOR_ERROR:
814  return "NPP_DIVISOR_ERROR";
815 
816  case NPP_CHANNEL_ERROR:
817  return "NPP_CHANNEL_ERROR";
818 
819  case NPP_STRIDE_ERROR:
820  return "NPP_STRIDE_ERROR";
821 
822  case NPP_ANCHOR_ERROR:
823  return "NPP_ANCHOR_ERROR";
824 
825  case NPP_MASK_SIZE_ERROR:
826  return "NPP_MASK_SIZE_ERROR";
827 
828  case NPP_MOMENT_00_ZERO_ERROR:
829  return "NPP_MOMENT_00_ZERO_ERROR";
830 
831  case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
832  return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
833 
834  case NPP_THRESHOLD_ERROR:
835  return "NPP_THRESHOLD_ERROR";
836 
837  case NPP_CONTEXT_MATCH_ERROR:
838  return "NPP_CONTEXT_MATCH_ERROR";
839 
840  case NPP_FFT_FLAG_ERROR:
841  return "NPP_FFT_FLAG_ERROR";
842 
843  case NPP_FFT_ORDER_ERROR:
844  return "NPP_FFT_ORDER_ERROR";
845 
846  case NPP_SCALE_RANGE_ERROR:
847  return "NPP_SCALE_RANGE_ERROR";
848 
849  case NPP_DATA_TYPE_ERROR:
850  return "NPP_DATA_TYPE_ERROR";
851 
852  case NPP_OUT_OFF_RANGE_ERROR:
853  return "NPP_OUT_OFF_RANGE_ERROR";
854 
855  case NPP_DIVIDE_BY_ZERO_ERROR:
856  return "NPP_DIVIDE_BY_ZERO_ERROR";
857 
858  case NPP_RANGE_ERROR:
859  return "NPP_RANGE_ERROR";
860 
861  case NPP_NO_MEMORY_ERROR:
862  return "NPP_NO_MEMORY_ERROR";
863 
864  case NPP_ERROR_RESERVED:
865  return "NPP_ERROR_RESERVED";
866 
867  case NPP_NO_OPERATION_WARNING:
868  return "NPP_NO_OPERATION_WARNING";
869 
870  case NPP_DIVIDE_BY_ZERO_WARNING:
871  return "NPP_DIVIDE_BY_ZERO_WARNING";
872 #endif
873 
874  }
875 
876  return "<unknown>";
877 }
878 #endif
879 
880 #ifdef __DRIVER_TYPES_H__
881 #ifndef DEVICE_RESET
882 #define DEVICE_RESET cudaDeviceReset();
883 #endif
884 #else
885 #ifndef DEVICE_RESET
886 #define DEVICE_RESET
887 #endif
888 #endif
889 
890 template< typename T >
891 void check(T result, char const *const func, const char *const file, int const line)
892 {
893  if (result)
894  {
895  fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
896  file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
898  // Make sure we call CUDA Device Reset before exiting
899  exit(EXIT_FAILURE);
900  }
901 }
902 
903 #ifdef __DRIVER_TYPES_H__
904 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
905 #define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
906 
907 // This will output the proper error string when calling cudaGetLastError
908 #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__)
909 
910 inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
911 {
912  cudaError_t err = cudaGetLastError();
913 
914  if (cudaSuccess != err)
915  {
916  fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
917  file, line, errorMessage, (int)err, cudaGetErrorString(err));
919  exit(EXIT_FAILURE);
920  }
921 }
922 #endif
923 
924 #ifndef MAX
925 #define MAX(a,b) (a > b ? a : b)
926 #endif
927 
928 // Float To Int conversion
929 inline int ftoi(float value)
930 {
931  return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
932 }
933 
934 // Beginning of GPU Architecture definitions
935 inline int _ConvertSMVer2Cores(int major, int minor)
936 {
937  // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
938  typedef struct
939  {
940  int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
941  int Cores;
942  } sSMtoCores;
943 
944  sSMtoCores nGpuArchCoresPerSM[] =
945  {
946  { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
947  { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
948  { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
949  { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
950  { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
951  { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
952  { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
953  { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
954  { -1, -1 }
955  };
956 
957  int index = 0;
958 
959  while (nGpuArchCoresPerSM[index].SM != -1)
960  {
961  if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
962  {
963  return nGpuArchCoresPerSM[index].Cores;
964  }
965 
966  index++;
967  }
968 
969  // If we don't find the values, we default use the previous one to run properly
970  printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
971  return nGpuArchCoresPerSM[index-1].Cores;
972 }
973 // end of GPU Architecture definitions
974 
975 #ifdef __CUDA_RUNTIME_H__
976 // General GPU Device CUDA Initialization
977 inline int gpuDeviceInit(int devID)
978 {
979  int device_count;
980  checkCudaErrors(cudaGetDeviceCount(&device_count));
981 
982  if (device_count == 0)
983  {
984  fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
985  exit(EXIT_FAILURE);
986  }
987 
988  if (devID < 0)
989  {
990  devID = 0;
991  }
992 
993  if (devID > device_count-1)
994  {
995  fprintf(stderr, "\n");
996  fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
997  fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
998  fprintf(stderr, "\n");
999  return -devID;
1000  }
1001 
1002  cudaDeviceProp deviceProp;
1003  checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
1004 
1005  if (deviceProp.computeMode == cudaComputeModeProhibited)
1006  {
1007  fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
1008  return -1;
1009  }
1010 
1011  if (deviceProp.major < 1)
1012  {
1013  fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
1014  exit(EXIT_FAILURE);
1015  }
1016 
1017  checkCudaErrors(cudaSetDevice(devID));
1018  printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
1019 
1020  return devID;
1021 }
1022 
1023 // This function returns the best GPU (with maximum GFLOPS)
1024 inline int gpuGetMaxGflopsDeviceId()
1025 {
1026  int current_device = 0, sm_per_multiproc = 0;
1027  int max_perf_device = 0;
1028  int device_count = 0, best_SM_arch = 0;
1029  int devices_prohibited = 0;
1030 
1031  unsigned long long max_compute_perf = 0;
1032  cudaDeviceProp deviceProp;
1033  cudaGetDeviceCount(&device_count);
1034 
1035  checkCudaErrors(cudaGetDeviceCount(&device_count));
1036 
1037  if (device_count == 0)
1038  {
1039  fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
1040  exit(EXIT_FAILURE);
1041  }
1042 
1043  // Find the best major SM Architecture GPU device
1044  while (current_device < device_count)
1045  {
1046  cudaGetDeviceProperties(&deviceProp, current_device);
1047 
1048  // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
1049  if (deviceProp.computeMode != cudaComputeModeProhibited)
1050  {
1051  if (deviceProp.major > 0 && deviceProp.major < 9999)
1052  {
1053  best_SM_arch = MAX(best_SM_arch, deviceProp.major);
1054  }
1055  }
1056  else
1057  {
1058  devices_prohibited++;
1059  }
1060 
1061  current_device++;
1062  }
1063 
1064  if (devices_prohibited == device_count)
1065  {
1066  fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
1067  exit(EXIT_FAILURE);
1068  }
1069 
1070  // Find the best CUDA capable GPU device
1071  current_device = 0;
1072 
1073  while (current_device < device_count)
1074  {
1075  cudaGetDeviceProperties(&deviceProp, current_device);
1076 
1077  // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
1078  if (deviceProp.computeMode != cudaComputeModeProhibited)
1079  {
1080  if (deviceProp.major == 9999 && deviceProp.minor == 9999)
1081  {
1082  sm_per_multiproc = 1;
1083  }
1084  else
1085  {
1086  sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
1087  }
1088 
1089  unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
1090 
1091  if (compute_perf > max_compute_perf)
1092  {
1093  // If we find GPU with SM major > 2, search only these
1094  if (best_SM_arch > 2)
1095  {
1096  // If our device==dest_SM_arch, choose this, or else pass
1097  if (deviceProp.major == best_SM_arch)
1098  {
1099  max_compute_perf = compute_perf;
1100  max_perf_device = current_device;
1101  }
1102  }
1103  else
1104  {
1105  max_compute_perf = compute_perf;
1106  max_perf_device = current_device;
1107  }
1108  }
1109  }
1110 
1111  ++current_device;
1112  }
1113 
1114  return max_perf_device;
1115 }
1116 
1117 
1118 // Initialization code to find the best CUDA Device
1119 inline int findCudaDevice(int argc, const char **argv)
1120 {
1121  cudaDeviceProp deviceProp;
1122  int devID = 0;
1123 
1124  // If the command-line has a device number specified, use it
1125  if (checkCmdLineFlag(argc, argv, "device"))
1126  {
1127  devID = getCmdLineArgumentInt(argc, argv, "device=");
1128 
1129  if (devID < 0)
1130  {
1131  printf("Invalid command line parameter\n ");
1132  exit(EXIT_FAILURE);
1133  }
1134  else
1135  {
1136  devID = gpuDeviceInit(devID);
1137 
1138  if (devID < 0)
1139  {
1140  printf("exiting...\n");
1141  exit(EXIT_FAILURE);
1142  }
1143  }
1144  }
1145  else
1146  {
1147  // Otherwise pick the device with highest Gflops/s
1148  devID = gpuGetMaxGflopsDeviceId();
1149  checkCudaErrors(cudaSetDevice(devID));
1150  checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
1151  printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
1152  }
1153 
1154  return devID;
1155 }
1156 
1157 // General check for CUDA GPU SM Capabilities
1158 inline bool checkCudaCapabilities(int major_version, int minor_version)
1159 {
1160  cudaDeviceProp deviceProp;
1161  deviceProp.major = 0;
1162  deviceProp.minor = 0;
1163  int dev;
1164 
1165  checkCudaErrors(cudaGetDevice(&dev));
1166  checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
1167 
1168  if ((deviceProp.major > major_version) ||
1169  (deviceProp.major == major_version && deviceProp.minor >= minor_version))
1170  {
1171  printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
1172  return true;
1173  }
1174  else
1175  {
1176  printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
1177  return false;
1178  }
1179 }
1180 #endif
1181 
1182 // end of CUDA Helper Functions
1183 
1184 
1185 #endif
int ftoi(float value)
Definition: helper_cuda.h:929
bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
int _ConvertSMVer2Cores(int major, int minor)
Definition: helper_cuda.h:935
#define MAX(a, b)
Definition: helper_cuda.h:925
#define DEVICE_RESET
Definition: helper_cuda.h:886
void check(T result, char const *const func, const char *const file, int const line)
Definition: helper_cuda.h:891
int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)