General-purpose programming on GPU

First steps in CUDA

Giuseppe Bilotta, Eugenio Rustico, Alexis Hérault

DMI — Università di Catania
Sezione di Catania — INGV

First steps in CUDA

Initialize every element in a vector

/* usual C/C++ includes */
#include <stdio.h>
#include <malloc.h>

/* CUDA includes */
#include <cuda_runtime_api.h>

/* Kernel: set each vector element to the number of the block
   running the corresponding thread */
__global__ void initVector(float *vec, const size_t size) {
    unsigned int idx = blockDim.x*blockIdx.x + threadIdx.x;
    vec[idx] = blockIdx.x;
}

const unsigned int vector_size = 256;
const unsigned int block_size = 32;

int main (int argc, char **argv) {
    int deviceCount = -1; // number of devices
    int dev = 0;

    /* Host and device vector pointers */
    float *hVector, *dVector;

    cudaGetDeviceCount(&deviceCount);

    if (deviceCount == 0) {
        fprintf(stderr, "No CUDA devices found\n");
        return 1;
    }

    cudaError_t error = cudaSetDevice(dev);
    if (error != cudaSuccess) {
        fprintf(stderr, "Error setting device to %d: %s\n",
            dev, cudaGetErrorString(error));
        return 1;
    }

    /* Allocate host vector */
    hVector = (float *)malloc(vector_size*sizeof(float));
    if (hVector == NULL) {
        fprintf(stderr, "Unable to allocate host vector\n");
        return 1;
    }

    /* Allocate device vector */
    error = cudaMalloc(&dVector, vector_size*sizeof(float));
    if (error != cudaSuccess) {
        fprintf(stderr, "Unable to allocate device memory: %s\n",
            cudaGetErrorString(error));
        return 1;
    }

    /* Zero device vector */
    cudaMemset(dVector, 0, vector_size*sizeof(float));

    /* Launch kernel: the first parameter is the number of blocks, the second the block size */
    initVector<<<vector_size/block_size, block_size>>>(dVector, vector_size);

    /* Wait for kernel execution to finish */
    error = cudaThreadSynchronize();
    if (error != cudaSuccess) {
        fprintf(stderr, "initVector failed: %s\n",
            cudaGetErrorString(error));
        return 1;
    }

    /* Copy data from device to host */
    error = cudaMemcpy(hVector, dVector, vector_size*sizeof(float), cudaMemcpyDeviceToHost);
    if (error != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed: %s\n",
            cudaGetErrorString(error));
        return 1;
    }

    for (unsigned int i=0; i < vector_size; ++i) {
        printf("%3u: %g", i, hVector[i]);
        if ((i+1) & 0x0f)
            printf(", ");
        else
            printf("\n");
    }

    return 0;

}

  0: 0,   1: 0,   2: 0,   3: 0,   4: 0,   5: 0,   6: 0,   7: 0,   8: 0,   9: 0,  10: 0,  11: 0,  12: 0,  13: 0,  14: 0,  15: 0
 16: 0,  17: 0,  18: 0,  19: 0,  20: 0,  21: 0,  22: 0,  23: 0,  24: 0,  25: 0,  26: 0,  27: 0,  28: 0,  29: 0,  30: 0,  31: 0
 32: 1,  33: 1,  34: 1,  35: 1,  36: 1,  37: 1,  38: 1,  39: 1,  40: 1,  41: 1,  42: 1,  43: 1,  44: 1,  45: 1,  46: 1,  47: 1
 48: 1,  49: 1,  50: 1,  51: 1,  52: 1,  53: 1,  54: 1,  55: 1,  56: 1,  57: 1,  58: 1,  59: 1,  60: 1,  61: 1,  62: 1,  63: 1
 64: 2,  65: 2,  66: 2,  67: 2,  68: 2,  69: 2,  70: 2,  71: 2,  72: 2,  73: 2,  74: 2,  75: 2,  76: 2,  77: 2,  78: 2,  79: 2
 80: 2,  81: 2,  82: 2,  83: 2,  84: 2,  85: 2,  86: 2,  87: 2,  88: 2,  89: 2,  90: 2,  91: 2,  92: 2,  93: 2,  94: 2,  95: 2
 96: 3,  97: 3,  98: 3,  99: 3, 100: 3, 101: 3, 102: 3, 103: 3, 104: 3, 105: 3, 106: 3, 107: 3, 108: 3, 109: 3, 110: 3, 111: 3
112: 3, 113: 3, 114: 3, 115: 3, 116: 3, 117: 3, 118: 3, 119: 3, 120: 3, 121: 3, 122: 3, 123: 3, 124: 3, 125: 3, 126: 3, 127: 3
128: 4, 129: 4, 130: 4, 131: 4, 132: 4, 133: 4, 134: 4, 135: 4, 136: 4, 137: 4, 138: 4, 139: 4, 140: 4, 141: 4, 142: 4, 143: 4
144: 4, 145: 4, 146: 4, 147: 4, 148: 4, 149: 4, 150: 4, 151: 4, 152: 4, 153: 4, 154: 4, 155: 4, 156: 4, 157: 4, 158: 4, 159: 4
160: 5, 161: 5, 162: 5, 163: 5, 164: 5, 165: 5, 166: 5, 167: 5, 168: 5, 169: 5, 170: 5, 171: 5, 172: 5, 173: 5, 174: 5, 175: 5
176: 5, 177: 5, 178: 5, 179: 5, 180: 5, 181: 5, 182: 5, 183: 5, 184: 5, 185: 5, 186: 5, 187: 5, 188: 5, 189: 5, 190: 5, 191: 5
192: 6, 193: 6, 194: 6, 195: 6, 196: 6, 197: 6, 198: 6, 199: 6, 200: 6, 201: 6, 202: 6, 203: 6, 204: 6, 205: 6, 206: 6, 207: 6
208: 6, 209: 6, 210: 6, 211: 6, 212: 6, 213: 6, 214: 6, 215: 6, 216: 6, 217: 6, 218: 6, 219: 6, 220: 6, 221: 6, 222: 6, 223: 6
224: 7, 225: 7, 226: 7, 227: 7, 228: 7, 229: 7, 230: 7, 231: 7, 232: 7, 233: 7, 234: 7, 235: 7, 236: 7, 237: 7, 238: 7, 239: 7
240: 7, 241: 7, 242: 7, 243: 7, 244: 7, 245: 7, 246: 7, 247: 7, 248: 7, 249: 7, 250: 7, 251: 7, 252: 7, 253: 7, 254: 7, 255: 7