General-purpose programming on GPU
First steps in CUDA
Giuseppe Bilotta, Eugenio Rustico, Alexis Hérault
DMI — Università di Catania
Sezione di Catania — INGV
First steps in CUDA
Initialize every element in a vector
/* usual C/C++ includes */
#include <stdio.h>
#include <malloc.h>
/* CUDA includes */
#include <cuda_runtime_api.h>
/* Kernel: set each vector element to the number of the block
running the corresponding thread */
__global__ void initVector(float *vec, const size_t size) {
unsigned int idx = blockDim.x*blockIdx.x + threadIdx.x;
vec[idx] = blockIdx.x;
}
const unsigned int vector_size = 256;
const unsigned int block_size = 32;
int main (int argc, char **argv) {
int deviceCount = -1; // number of devices
int dev = 0;
/* Host and device vector pointers */
float *hVector, *dVector;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "No CUDA devices found\n");
return 1;
}
cudaError_t error = cudaSetDevice(dev);
if (error != cudaSuccess) {
fprintf(stderr, "Error setting device to %d: %s\n",
dev, cudaGetErrorString(error));
return 1;
}
/* Allocate host vector */
hVector = (float *)malloc(vector_size*sizeof(float));
if (hVector == NULL) {
fprintf(stderr, "Unable to allocate host vector\n");
return 1;
}
/* Allocate device vector */
error = cudaMalloc(&dVector, vector_size*sizeof(float));
if (error != cudaSuccess) {
fprintf(stderr, "Unable to allocate device memory: %s\n",
cudaGetErrorString(error));
return 1;
}
/* Zero device vector */
cudaMemset(dVector, 0, vector_size*sizeof(float));
/* Launch kernel: the first parameter is the number of blocks, the second the block size */
initVector<<<vector_size/block_size, block_size>>>(dVector, vector_size);
/* Wait for kernel execution to finish */
error = cudaThreadSynchronize();
if (error != cudaSuccess) {
fprintf(stderr, "initVector failed: %s\n",
cudaGetErrorString(error));
return 1;
}
/* Copy data from device to host */
error = cudaMemcpy(hVector, dVector, vector_size*sizeof(float), cudaMemcpyDeviceToHost);
if (error != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed: %s\n",
cudaGetErrorString(error));
return 1;
}
for (unsigned int i=0; i < vector_size; ++i) {
printf("%3u: %g", i, hVector[i]);
if ((i+1) & 0x0f)
printf(", ");
else
printf("\n");
}
return 0;
}
0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 15: 0
16: 0, 17: 0, 18: 0, 19: 0, 20: 0, 21: 0, 22: 0, 23: 0, 24: 0, 25: 0, 26: 0, 27: 0, 28: 0, 29: 0, 30: 0, 31: 0
32: 1, 33: 1, 34: 1, 35: 1, 36: 1, 37: 1, 38: 1, 39: 1, 40: 1, 41: 1, 42: 1, 43: 1, 44: 1, 45: 1, 46: 1, 47: 1
48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 59: 1, 60: 1, 61: 1, 62: 1, 63: 1
64: 2, 65: 2, 66: 2, 67: 2, 68: 2, 69: 2, 70: 2, 71: 2, 72: 2, 73: 2, 74: 2, 75: 2, 76: 2, 77: 2, 78: 2, 79: 2
80: 2, 81: 2, 82: 2, 83: 2, 84: 2, 85: 2, 86: 2, 87: 2, 88: 2, 89: 2, 90: 2, 91: 2, 92: 2, 93: 2, 94: 2, 95: 2
96: 3, 97: 3, 98: 3, 99: 3, 100: 3, 101: 3, 102: 3, 103: 3, 104: 3, 105: 3, 106: 3, 107: 3, 108: 3, 109: 3, 110: 3, 111: 3
112: 3, 113: 3, 114: 3, 115: 3, 116: 3, 117: 3, 118: 3, 119: 3, 120: 3, 121: 3, 122: 3, 123: 3, 124: 3, 125: 3, 126: 3, 127: 3
128: 4, 129: 4, 130: 4, 131: 4, 132: 4, 133: 4, 134: 4, 135: 4, 136: 4, 137: 4, 138: 4, 139: 4, 140: 4, 141: 4, 142: 4, 143: 4
144: 4, 145: 4, 146: 4, 147: 4, 148: 4, 149: 4, 150: 4, 151: 4, 152: 4, 153: 4, 154: 4, 155: 4, 156: 4, 157: 4, 158: 4, 159: 4
160: 5, 161: 5, 162: 5, 163: 5, 164: 5, 165: 5, 166: 5, 167: 5, 168: 5, 169: 5, 170: 5, 171: 5, 172: 5, 173: 5, 174: 5, 175: 5
176: 5, 177: 5, 178: 5, 179: 5, 180: 5, 181: 5, 182: 5, 183: 5, 184: 5, 185: 5, 186: 5, 187: 5, 188: 5, 189: 5, 190: 5, 191: 5
192: 6, 193: 6, 194: 6, 195: 6, 196: 6, 197: 6, 198: 6, 199: 6, 200: 6, 201: 6, 202: 6, 203: 6, 204: 6, 205: 6, 206: 6, 207: 6
208: 6, 209: 6, 210: 6, 211: 6, 212: 6, 213: 6, 214: 6, 215: 6, 216: 6, 217: 6, 218: 6, 219: 6, 220: 6, 221: 6, 222: 6, 223: 6
224: 7, 225: 7, 226: 7, 227: 7, 228: 7, 229: 7, 230: 7, 231: 7, 232: 7, 233: 7, 234: 7, 235: 7, 236: 7, 237: 7, 238: 7, 239: 7
240: 7, 241: 7, 242: 7, 243: 7, 244: 7, 245: 7, 246: 7, 247: 7, 248: 7, 249: 7, 250: 7, 251: 7, 252: 7, 253: 7, 254: 7, 255: 7