21 #define cutilDrvSafeCallNoSync(err) __cuSafeCallNoSync (err, __FILE__, __LINE__)
22 #define cutilDrvSafeCall(err) __cuSafeCall (err, __FILE__, __LINE__)
23 #define cutilDrvCtxSync() __cuCtxSync (__FILE__, __LINE__)
24 #define cutilDrvCheckMsg(msg) __cuCheckMsg (msg, __FILE__, __LINE__)
25 #define cutilDrvAlignOffset(offset, alignment) ( offset = (offset + (alignment-1)) & ~((alignment-1)) )
28 inline void __cuSafeCallNoSync( CUresult err,
const char *file,
const int line )
30 if( CUDA_SUCCESS != err) {
31 fprintf(stderr,
"cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
36 inline void __cuSafeCall( CUresult err,
const char *file,
const int line )
38 __cuSafeCallNoSync( err, file, line );
41 inline void __cuCtxSync(
const char *file,
const int line )
43 CUresult err = cuCtxSynchronize();
44 if( CUDA_SUCCESS != err ) {
45 fprintf(stderr,
"cuCtxSynchronize() API error = %04d in file <%s>, line %i.\n",
51 #define MIN(a,b) ((a < b) ? a : b)
52 #define MAX(a,b) ((a > b) ? a : b)
55 inline int _ConvertSMVer2CoresDrvApi(
int major,
int minor)
63 sSMtoCores nGpuArchCoresPerSM[] =
74 while (nGpuArchCoresPerSM[index].SM != -1) {
75 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
76 return nGpuArchCoresPerSM[index].Cores;
80 printf(
"MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
86 inline int cutilDrvGetMaxGflopsDeviceId()
88 CUdevice current_device = 0;
89 CUdevice max_perf_device = 0;
91 int max_compute_perf = 0;
95 cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count));
98 while ( current_device < device_count ) {
101 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));
102 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device));
103 if (major > 0 && major < 9999) {
104 best_SM_arch = MAX(best_SM_arch, major);
111 while( current_device < device_count ) {
112 int multiProcessorCount;
116 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount,
117 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
119 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate,
120 CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
122 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));
123 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device));
125 int sm_per_multiproc = (major == 9999 && minor == 9999) ? 1 : _ConvertSMVer2CoresDrvApi(major, minor);
127 int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
128 if( compute_perf > max_compute_perf ) {
130 if ( best_SM_arch > 2 ) {
132 if (major == best_SM_arch) {
133 max_compute_perf = compute_perf;
134 max_perf_device = current_device;
138 max_compute_perf = compute_perf;
139 max_perf_device = current_device;
144 return max_perf_device;
148 inline int cutilDrvGetMaxGflopsGraphicsDeviceId()
150 CUdevice current_device = 0;
151 CUdevice max_perf_device = 0;
152 int device_count = 0;
153 int max_compute_perf = 0;
154 int best_SM_arch = 0;
157 cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count));
160 while ( current_device < device_count ) {
161 char deviceName[256];
165 cutilDrvSafeCallNoSync( cuDeviceGetName(deviceName, 256, current_device) );
166 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));
167 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device));
168 cutilDrvSafeCallNoSync( cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device) );
171 if (major > 0 && major < 9999) {
172 best_SM_arch = MAX(best_SM_arch, major);
180 while( current_device < device_count ) {
181 int multiProcessorCount;
186 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount,
187 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
189 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate,
190 CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
192 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));
193 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device));
195 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device ) );
197 int sm_per_multiproc = (major == 9999 && minor == 9999) ? 1 : _ConvertSMVer2CoresDrvApi(major, minor);
202 int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
203 if( compute_perf > max_compute_perf ) {
205 if ( best_SM_arch > 2 ) {
207 if (major == best_SM_arch) {
208 max_compute_perf = compute_perf;
209 max_perf_device = current_device;
213 max_compute_perf = compute_perf;
214 max_perf_device = current_device;
220 return max_perf_device;
223 inline void __cuCheckMsg(
const char * msg,
const char *file,
const int line )
225 CUresult err = cuCtxSynchronize();
226 if( CUDA_SUCCESS != err) {
227 fprintf(stderr,
"cutilDrvCheckMsg -> %s", msg);
228 fprintf(stderr,
"cutilDrvCheckMsg -> cuCtxSynchronize API error = %04d in file <%s>, line %i.\n",
235 #if __DEVICE_EMULATION__
236 inline int cutilDeviceInitDrv(
int ARGC,
char **ARGV) { }
238 inline int cutilDeviceInitDrv(
int ARGC,
char ** ARGV)
242 CUresult err = cuInit(0);
243 if (CUDA_SUCCESS == err)
244 cutilDrvSafeCallNoSync(cuDeviceGetCount(&deviceCount));
245 if (deviceCount == 0) {
246 fprintf(stderr,
"CUTIL DeviceInitDrv error: no devices supporting CUDA\n");
250 cutGetCmdLineArgumenti(ARGC, (
const char **) ARGV,
"device", &dev);
251 if (dev < 0) dev = 0;
252 if (dev > deviceCount-1) {
253 fprintf(stderr,
"\n");
254 fprintf(stderr,
">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
255 fprintf(stderr,
">> cutilDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
256 fprintf(stderr,
"\n");
259 cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, dev));
261 cuDeviceGetName(name, 100, cuDevice);
262 if (cutCheckCmdLineFlag(ARGC, (
const char **) ARGV,
"quiet") == CUTFalse) {
263 printf(
"> Using CUDA Device [%d]: %s\n", dev, name);
270 #if __DEVICE_EMULATION__
271 inline CUdevice cutilChooseCudaDeviceDrv(
int argc,
char **argv,
int *p_devID)
273 inline CUdevice cutilChooseCudaDeviceDrv(
int argc,
char **argv,
int *p_devID)
278 if( cutCheckCmdLineFlag(argc, (
const char**)argv,
"device") ) {
279 devID = cutilDeviceInitDrv(argc, argv);
281 printf(
"exiting...\n");
287 devID = cutilDrvGetMaxGflopsDeviceId();
288 cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, devID));
289 cuDeviceGetName(name, 100, cuDevice);
290 printf(
"> Using CUDA Device [%d]: %s\n", devID, name);
292 cuDeviceGet(&cuDevice, devID);
293 if (p_devID) *p_devID = devID;
300 inline void cutilDrvCudaCheckCtxLost(
const char *errorMessage,
const char *file,
const int line )
302 CUresult err = cuCtxSynchronize();
303 if( CUDA_ERROR_INVALID_CONTEXT != err) {
304 fprintf(stderr,
"Cuda error: %s in file '%s' in line %i\n",
305 errorMessage, file, line );
308 err = cuCtxSynchronize();
309 if( CUDA_SUCCESS != err) {
310 fprintf(stderr,
"Cuda error: %s in file '%s' in line %i\n",
311 errorMessage, file, line );
318 #define STRCASECMP _stricmp
320 #define STRCASECMP strcasecmp
326 #define STRNCASECMP _strnicmp
328 #define STRNCASECMP strncasecmp
332 inline void __cutilDrvQAFinish(
int argc,
char **argv,
bool bStatus)
334 const char *sStatus[] = {
"FAILED",
"PASSED",
"WAIVED", NULL };
337 for (
int i=1; i < argc; i++) {
338 if (!STRCASECMP(argv[i],
"-qatest") || !STRCASECMP(argv[i],
"-noprompt")) {
344 printf(
"&&&& %s %s", sStatus[bStatus], argv[0]);
345 for (
int i=1; i < argc; i++) printf(
" %s", argv[i]);
347 printf(
"[%s] test result\n%s\n", argv[0], sStatus[bStatus]);
352 inline bool cutilDrvCudaDevCapabilities(
int major_version,
int minor_version,
int deviceNum,
int argc,
char** argv)
354 int major, minor, dev;
355 char device_name[256];
357 #ifdef __DEVICE_EMULATION__
358 printf(
"> Compute Device Emulation Mode \n");
361 cutilDrvSafeCallNoSync( cuDeviceGet(&dev, deviceNum) );
362 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev));
363 cutilDrvSafeCallNoSync (cuDeviceGetAttribute (&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev));
364 cutilDrvSafeCallNoSync( cuDeviceGetName(device_name, 256, dev) );
366 if((major > major_version) ||
367 (major == major_version && minor >= minor_version))
369 printf(
"> Device %d: < %s >, Compute SM %d.%d detected\n", dev, device_name, major, minor);
374 printf(
"There is no device supporting CUDA compute capability %d.%d.\n", major_version, minor_version);
375 __cutilDrvQAFinish(argc, argv,
true);
381 inline bool cutilDrvCudaCapabilities(
int major_version,
int minor_version,
int argc,
char **argv)
383 return cutilDrvCudaDevCapabilities(major_version, minor_version, 0, argc, argv);