#include "gpu.h"
#include <iostream>

using namespace std;

#include <cuda_runtime.h>

void gpu_init()
{
	clog << "GPU Init" << endl;

	int	ndev;

	cudaGetDeviceCount(&ndev);

	clog << "There are " << ndev << " GPUs" << endl;
}

void gpu_fini()
{
	clog << "GPU Fini" << endl;
}

void gpu_run(float* u,float* v,float* w,int n)
{
	clog << "GPU Run" << endl;
}

#if 0
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <cuda.h>
#include <cuda_runtime.h>
#include <driver_types.h>

int main(int argc,char* argv[])
{
	CUresult res;
    int ndev;

	res = cudaGetDeviceCount(&ndev);
	res = cudaThreadSynchronize();
    if (ndev == 0)
        printf("There is no device supporting CUDA\n");
#if 1
    int dev;
    for (dev = 0; dev < ndev; ++dev) {
        cudaDeviceProp deviceProp;
        res = cudaGetDeviceProperties(&deviceProp, dev);
		res = cudaThreadSynchronize();
        if (dev == 0) {
            if (deviceProp.major == 9999 && deviceProp.minor == 9999)
                printf("There is no device supporting CUDA.\n");
            else if (ndev == 1)
                printf("There is 1 device supporting CUDA\n");
            else
                printf("There are %d devices supporting CUDA\n", ndev);
        }
        printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
        printf("  Major revision number:                         %d\n",
               deviceProp.major);
        printf("  Minor revision number:                         %d\n",
               deviceProp.minor);
        printf("  Total amount of global memory:                 %u bytes\n",
               deviceProp.totalGlobalMem);
    #if CUDART_VERSION >= 2000
        printf("  Number of multiprocessors:                     %d\n",
               deviceProp.multiProcessorCount);
        printf("  Number of cores:                               %d\n",
               8 * deviceProp.multiProcessorCount);
    #endif
        printf("  Total amount of constant memory:               %u bytes\n",
               deviceProp.totalConstMem); 
        printf("  Total amount of shared memory per block:       %u bytes\n",
               deviceProp.sharedMemPerBlock);
        printf("  Total number of registers available per block: %d\n",
               deviceProp.regsPerBlock);
        printf("  Warp size:                                     %d\n",
               deviceProp.warpSize);
        printf("  Maximum number of threads per block:           %d\n",
               deviceProp.maxThreadsPerBlock);
        printf("  Maximum sizes of each dimension of a block:    %d x %d x %d\n",
               deviceProp.maxThreadsDim[0],
               deviceProp.maxThreadsDim[1],
               deviceProp.maxThreadsDim[2]);
        printf("  Maximum sizes of each dimension of a grid:     %d x %d x %d\n",
               deviceProp.maxGridSize[0],
               deviceProp.maxGridSize[1],
               deviceProp.maxGridSize[2]);
        printf("  Maximum memory pitch:                          %u bytes\n",
               deviceProp.memPitch);
        printf("  Texture alignment:                             %u bytes\n",
               deviceProp.textureAlignment);
        printf("  Clock rate:                                    %.2f GHz\n",
               deviceProp.clockRate * 1e-6f);
    #if CUDART_VERSION >= 2000
        printf("  Concurrent copy and execution:                 %s\n",
               deviceProp.deviceOverlap ? "Yes" : "No");
    #endif
    }
    printf("\nTest PASSED\n");
#endif
	return 0;
}
#endif