15 #ifdef _DEBUG // Do this only in debug mode...
16 # define WINDOWS_LEAN_AND_MEAN
32 #define cutilSafeCallNoSync(err) __cudaSafeCallNoSync(err, __FILE__, __LINE__)
33 #define cutilSafeCall(err) __cudaSafeCall (err, __FILE__, __LINE__)
34 #define cutilSafeThreadSync() __cudaSafeThreadSync(__FILE__, __LINE__)
35 #define cufftSafeCall(err) __cufftSafeCall (err, __FILE__, __LINE__)
36 #define cutilCheckError(err) __cutilCheckError (err, __FILE__, __LINE__)
37 #define cutilCheckMsg(msg) __cutilGetLastError (msg, __FILE__, __LINE__)
38 #define cutilCheckMsgAndSync(msg) __cutilGetLastErrorAndSync (msg, __FILE__, __LINE__)
39 #define cutilSafeMalloc(mallocCall) __cutilSafeMalloc ((mallocCall), __FILE__, __LINE__)
40 #define cutilCondition(val) __cutilCondition (val, __FILE__, __LINE__)
41 #define cutilExit(argc, argv) __cutilExit (argc, argv)
43 inline cudaError cutilDeviceSynchronize()
45 return cudaDeviceSynchronize();
48 inline cudaError cutilDeviceReset()
50 return cudaDeviceReset();
53 inline void __cutilCondition(
int val,
char *file,
int line)
55 if( CUTFalse == cutCheckCondition( val, file, line ) ) {
60 inline void __cutilExit(
int argc,
char **argv)
62 if (!cutCheckCmdLineFlag(argc, (
const char**)argv,
"noprompt")) {
63 printf(
"\nPress ENTER to exit...\n");
71 #define MIN(a,b) ((a < b) ? a : b)
72 #define MAX(a,b) ((a > b) ? a : b)
75 inline int _ConvertSMVer2Cores(
int major,
int minor)
83 sSMtoCores nGpuArchCoresPerSM[] =
94 while (nGpuArchCoresPerSM[index].SM != -1) {
95 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
96 return nGpuArchCoresPerSM[index].Cores;
100 printf(
"MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
106 inline int cutGetMaxGflopsDeviceId()
108 int current_device = 0;
109 int max_compute_perf = 0;
110 int max_perf_device = 0;
111 int device_count = 0;
112 int best_SM_arch = 0;
114 cudaGetDeviceCount( &device_count );
116 while ( current_device < device_count ) {
117 cudaDeviceProp deviceProp;
118 cudaGetDeviceProperties( &deviceProp, current_device );
119 if (deviceProp.major > 0 && deviceProp.major < 9999) {
120 best_SM_arch = MAX(best_SM_arch, deviceProp.major);
127 while( current_device < device_count ) {
128 cudaDeviceProp deviceProp;
129 cudaGetDeviceProperties( &deviceProp, current_device );
130 int sm_per_multiproc = (deviceProp.major == 9999 && deviceProp.minor == 9999) ? 1 : _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
132 int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
133 if( compute_perf > max_compute_perf ) {
135 if ( best_SM_arch > 2 ) {
137 if (deviceProp.major == best_SM_arch) {
138 max_compute_perf = compute_perf;
139 max_perf_device = current_device;
142 max_compute_perf = compute_perf;
143 max_perf_device = current_device;
148 return max_perf_device;
152 inline int cutGetMaxGflopsGraphicsDeviceId()
154 int current_device = 0;
155 int max_compute_perf = 0;
156 int max_perf_device = 0;
157 int device_count = 0;
158 int best_SM_arch = 0;
161 cudaGetDeviceCount( &device_count );
163 while ( current_device < device_count ) {
164 cudaDeviceProp deviceProp;
165 cudaGetDeviceProperties( &deviceProp, current_device );
167 if (deviceProp.tccDriver) bTCC = 1;
170 if (deviceProp.major > 0 && deviceProp.major < 9999) {
171 best_SM_arch = MAX(best_SM_arch, deviceProp.major);
179 while( current_device < device_count ) {
180 cudaDeviceProp deviceProp;
181 cudaGetDeviceProperties( &deviceProp, current_device );
182 int sm_per_multiproc = (deviceProp.major == 9999 && deviceProp.minor == 9999) ? 1 : _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
184 if (deviceProp.tccDriver) bTCC = 1;
188 int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
189 if( compute_perf > max_compute_perf ) {
191 if ( best_SM_arch > 2 ) {
193 if (deviceProp.major == best_SM_arch) {
194 max_compute_perf = compute_perf;
195 max_perf_device = current_device;
199 max_compute_perf = compute_perf;
200 max_perf_device = current_device;
206 return max_perf_device;
212 # ifdef _DEBUG // Do this only in debug mode...
213 inline void VSPrintf(FILE *file, LPCSTR fmt, ...)
215 std::size_t fmt2_sz = 2048;
216 char *fmt2 = (
char*)malloc(fmt2_sz);
218 va_start(vlist, fmt);
219 while((_vsnprintf(fmt2, fmt2_sz, fmt, vlist)) < 0)
223 fmt2 = (
char*)malloc(fmt2_sz);
225 OutputDebugStringA(fmt2);
229 #define FPRINTF(a) VSPrintf a
231 #define FPRINTF(a) fprintf a
236 #define FPRINTF(a) fprintf a
239 #define FPRINTF(a) fprintf a
245 inline void __cudaSafeCallNoSync( cudaError err,
const char *file,
const int line )
247 if( cudaSuccess != err) {
248 FPRINTF((stderr,
"%s(%i) : cudaSafeCallNoSync() Runtime API error : %s.\n",
249 file, line, cudaGetErrorString( err) ));
254 inline void __cudaSafeCall( cudaError err,
const char *file,
const int line )
256 if( cudaSuccess != err) {
257 FPRINTF((stderr,
"%s(%i) : cudaSafeCall() Runtime API error : %s.\n",
258 file, line, cudaGetErrorString( err) ));
263 inline void __cudaSafeThreadSync(
const char *file,
const int line )
265 cudaError err = cutilDeviceSynchronize();
266 if ( cudaSuccess != err) {
267 FPRINTF((stderr,
"%s(%i) : cudaDeviceSynchronize() Runtime API error : %s.\n",
268 file, line, cudaGetErrorString( err) ));
273 inline void __cufftSafeCall( cufftResult err,
const char *file,
const int line )
275 if( CUFFT_SUCCESS != err) {
276 FPRINTF((stderr,
"%s(%i) : cufftSafeCall() CUFFT error.\n",
282 inline void __cutilCheckError( CUTBoolean err,
const char *file,
const int line )
284 if( CUTTrue != err) {
285 FPRINTF((stderr,
"%s(%i) : CUTIL CUDA error.\n",
291 inline void __cutilGetLastError(
const char *errorMessage,
const char *file,
const int line )
293 cudaError_t err = cudaGetLastError();
294 if( cudaSuccess != err) {
295 FPRINTF((stderr,
"%s(%i) : cutilCheckMsg() CUTIL CUDA error : %s : %s.\n",
296 file, line, errorMessage, cudaGetErrorString( err) ));
301 inline void __cutilGetLastErrorAndSync(
const char *errorMessage,
const char *file,
const int line )
303 cudaError_t err = cudaGetLastError();
304 if( cudaSuccess != err) {
305 FPRINTF((stderr,
"%s(%i) : cutilCheckMsg() CUTIL CUDA error : %s : %s.\n",
306 file, line, errorMessage, cudaGetErrorString( err) ));
310 err = cutilDeviceSynchronize();
311 if( cudaSuccess != err) {
312 FPRINTF((stderr,
"%s(%i) : cutilCheckMsg cudaDeviceSynchronize error: %s : %s.\n",
313 file, line, errorMessage, cudaGetErrorString( err) ));
318 inline void __cutilSafeMalloc(
void *pointer,
const char *file,
const int line )
321 FPRINTF((stderr,
"%s(%i) : cutilSafeMalloc host malloc failure\n",
327 #if __DEVICE_EMULATION__
328 inline int cutilDeviceInit(
int ARGC,
char **ARGV) { }
329 inline int cutilChooseCudaDevice(
int ARGC,
char **ARGV) { }
331 inline int cutilDeviceInit(
int ARGC,
char **ARGV)
334 cutilSafeCallNoSync(cudaGetDeviceCount(&deviceCount));
335 if (deviceCount == 0) {
336 FPRINTF((stderr,
"CUTIL CUDA error: no devices supporting CUDA.\n"));
340 cutGetCmdLineArgumenti(ARGC, (
const char **) ARGV,
"device", &dev);
343 if (dev > deviceCount-1) {
344 fprintf(stderr,
"\n");
345 fprintf(stderr,
">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
346 fprintf(stderr,
">> cutilDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
347 fprintf(stderr,
"\n");
350 cudaDeviceProp deviceProp;
351 cutilSafeCallNoSync(cudaGetDeviceProperties(&deviceProp, dev));
352 if (deviceProp.major < 1) {
353 FPRINTF((stderr,
"cutil error: GPU device does not support CUDA.\n"));
356 printf(
"> Using CUDA device [%d]: %s\n", dev, deviceProp.name);
357 cutilSafeCall(cudaSetDevice(dev));
363 inline int cutilChooseCudaDevice(
int argc,
char **argv)
365 cudaDeviceProp deviceProp;
368 if( cutCheckCmdLineFlag(argc, (
const char**)argv,
"device") ) {
369 devID = cutilDeviceInit(argc, argv);
371 printf(
"exiting...\n");
372 cutilExit(argc, argv);
377 devID = cutGetMaxGflopsDeviceId();
378 cutilSafeCallNoSync( cudaSetDevice( devID ) );
379 cutilSafeCallNoSync( cudaGetDeviceProperties(&deviceProp, devID) );
380 printf(
"> Using CUDA device [%d]: %s\n", devID, deviceProp.name);
388 inline void cutilCudaCheckCtxLost(
const char *errorMessage,
const char *file,
const int line )
390 cudaError_t err = cudaGetLastError();
391 if( cudaSuccess != err) {
392 FPRINTF((stderr,
"%s(%i) : CUDA error: %s : %s.\n",
393 file, line, errorMessage, cudaGetErrorString( err) ));
396 err = cutilDeviceSynchronize();
397 if( cudaSuccess != err) {
398 FPRINTF((stderr,
"%s(%i) : CUDA error: %s : %s.\n",
399 file, line, errorMessage, cudaGetErrorString( err) ));
406 #define STRCASECMP _stricmp
408 #define STRCASECMP strcasecmp
414 #define STRNCASECMP _strnicmp
416 #define STRNCASECMP strncasecmp
420 inline void __cutilQAFinish(
int argc,
char **argv,
bool bStatus)
422 const char *sStatus[] = {
"FAILED",
"PASSED",
"WAIVED", NULL };
425 for (
int i=1; i < argc; i++) {
426 if (!STRCASECMP(argv[i],
"-qatest") || !STRCASECMP(argv[i],
"-noprompt")) {
432 printf(
"&&&& %s %s", sStatus[bStatus], argv[0]);
433 for (
int i=1; i < argc; i++) printf(
" %s", argv[i]);
436 printf(
"[%s] test result\n%s\n", argv[0], sStatus[bStatus]);
441 inline bool cutilCudaCapabilities(
int major_version,
int minor_version,
int argc,
char **argv)
443 cudaDeviceProp deviceProp;
444 deviceProp.major = 0;
445 deviceProp.minor = 0;
448 #ifdef __DEVICE_EMULATION__
449 printf(
"> Compute Device Emulation Mode \n");
452 cutilSafeCall( cudaGetDevice(&dev) );
453 cutilSafeCall( cudaGetDeviceProperties(&deviceProp, dev));
455 if((deviceProp.major > major_version) ||
456 (deviceProp.major == major_version && deviceProp.minor >= minor_version)) {
457 printf(
"> Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
461 printf(
"There is no device supporting CUDA compute capability %d.%d.\n", major_version, minor_version);
462 __cutilQAFinish(argc, argv,
true);