#include "gpu.h" #include using namespace std; #include void gpu_init() { clog << "GPU Init" << endl; int ndev; cudaGetDeviceCount(&ndev); clog << "There are " << ndev << " GPUs" << endl; } void gpu_fini() { clog << "GPU Fini" << endl; } void gpu_run(float* u,float* v,float* w,int n) { clog << "GPU Run" << endl; } #if 0 #include #include #include #include #include #include int main(int argc,char* argv[]) { CUresult res; int ndev; res = cudaGetDeviceCount(&ndev); res = cudaThreadSynchronize(); if (ndev == 0) printf("There is no device supporting CUDA\n"); #if 1 int dev; for (dev = 0; dev < ndev; ++dev) { cudaDeviceProp deviceProp; res = cudaGetDeviceProperties(&deviceProp, dev); res = cudaThreadSynchronize(); if (dev == 0) { if (deviceProp.major == 9999 && deviceProp.minor == 9999) printf("There is no device supporting CUDA.\n"); else if (ndev == 1) printf("There is 1 device supporting CUDA\n"); else printf("There are %d devices supporting CUDA\n", ndev); } printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name); printf(" Major revision number: %d\n", deviceProp.major); printf(" Minor revision number: %d\n", deviceProp.minor); printf(" Total amount of global memory: %u bytes\n", deviceProp.totalGlobalMem); #if CUDART_VERSION >= 2000 printf(" Number of multiprocessors: %d\n", deviceProp.multiProcessorCount); printf(" Number of cores: %d\n", 8 * deviceProp.multiProcessorCount); #endif printf(" Total amount of constant memory: %u bytes\n", deviceProp.totalConstMem); printf(" Total amount of shared memory per block: %u bytes\n", deviceProp.sharedMemPerBlock); printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock); printf(" Warp size: %d\n", deviceProp.warpSize); printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock); printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]); printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); printf(" Maximum memory pitch: %u bytes\n", deviceProp.memPitch); printf(" Texture alignment: %u bytes\n", deviceProp.textureAlignment); printf(" Clock rate: %.2f GHz\n", deviceProp.clockRate * 1e-6f); #if CUDART_VERSION >= 2000 printf(" Concurrent copy and execution: %s\n", deviceProp.deviceOverlap ? "Yes" : "No"); #endif } printf("\nTest PASSED\n"); #endif return 0; } #endif