General-purpose programming on GPU

C and C++ programming basics

Giuseppe Bilotta, Eugenio Rustico, Alexis Hérault

DMI — Università di Catania
Sezione di Catania — INGV

C++ programming basics

Introduction

C++ is a C-based object-oriented programming language. It is mostly compatible with C. Some differences:

syntax:
- one-line comments (from // to end-of-line);
- variables can be declared anywhere in the body of a function;
- struct somename ⇒ somename;
- actual pass-by-reference;
language:
- namespaces;
- classes;
- new operator to allocate structs, classes, etc;
- function overloading;
- templates;
standard library:
- strings, vectors, maps;
- I/O streams.

New syntax

/* C-style variable declaration */
struct point pt;

// C++-style variable declaration
point pt2;

/* it can be achieved in C with: */
typedef struct point point;

/* C-style (still valid in C++) */
int i;
for (i = 0; i < N; ++i) {
    /* actual loop code */
}

// C++-style
for (int i = 0; i < N; ++i) {
    // actual loop code
}

Note: C++ source files have a .cpp or .cc extension. The executable must be linked against the C++ standard library:

gcc sourcefile.cc -o executablename -lstdc++

Pass by reference
#include <stdio.h> void swap(int a, int b) { int tmp = a; a = b; b = tmp; } int main(void) { int x = 1; int y = 0; swap(x, y); /* nothing happened! */ printf("x=%d,y=%d\n", x, y); }	#include <stdio.h> void swap(int a, int b) { int tmp = a; a = b; b = tmp; } int main(void) { int x = 1; int y = 0; swap(&x, &y); /* the values are now swapped */ printf("x=%d,y=%d\n", x, y); }	#include <stdio.h> void swap(int &a, int &b) { int tmp = a; a = b; b = tmp; } int main(void) { int x = 1; int y = 0; swap(x, y); /* the values are now swapped */ printf("x=%d,y=%d\n", x, y); }
`x=1,y=0`	`x=0,y=1`	`x=0,y=1`

#include <stdio.h>

void swap(int a, int b)
{
    int tmp = a;
    a = b;
    b = tmp;
}

int main(void)
{
    int x = 1;
    int y = 0;
    swap(x, y);
    /* nothing happened! */
    printf("x=%d,y=%d\n", x, y);
}

#include <stdio.h>

void swap(int *a, int *b)
{
    int tmp = *a;
    *a = *b;
    *b = tmp;
}

int main(void)
{
    int x = 1;
    int y = 0;
    swap(&x, &y);
    /* the values are now swapped */
    printf("x=%d,y=%d\n", x, y);
}

#include <stdio.h>

void swap(int &a, int &b)
{
    int tmp = a;
    a = b;
    b = tmp;
}

int main(void)
{
    int x = 1;
    int y = 0;
    swap(x, y);
    /* the values are now swapped */
    printf("x=%d,y=%d\n", x, y);
}

x=1,y=0

x=0,y=1

x=0,y=1

Overloading

In C, each function has one signature. You must change the name even when conceptually it does the same thing on different types.

“Overloading” addition in `C`
#include <stdio.h> typedef struct point2d { float x; float y; } point2d; typedef struct point3d { float x; float y; float z;} point3d; point2d plus2d(const point2d pt1, const point2d pt2) { point2d result; result.x = pt1->x + pt2->x; result.y = pt1->y + pt2->y; return result; } point3d plus3d(const point3d pt1, const point3d pt2) { point3d result; result.x = pt1->x + pt2->x; result.y = pt1->y + pt2->y; result.z = pt1->z + pt2->z; return result; } int main(void) { point2d e1_2d = { 1, 0 }, e2_2d = { 0, 1 }; point2d sum_2d = plus2d(&e1_2d, &e2_2d); point3d e1_3d = { 1, 0, 0}, e2_3d = { 0, 1, 0 }; point3d sum_3d = plus3d(&e1_3d, &e2_3d); printf("the 2D sum is (%f, %f)\n", sum_2d.x, sum_2d.y); printf("the 3D sum is (%f, %f, %f)\n", sum_3d.x, sum_3d.y, sum_3d.z); }	`the 2D sum is (1.000000, 1.000000) the 3D sum is (1.000000, 1.000000, 0.000000)`

#include <stdio.h>

typedef struct point2d { float x; float y; } point2d;
typedef struct point3d { float x; float y; float z;} point3d;

point2d plus2d(const point2d *pt1, const point2d *pt2) {
    point2d result;
    result.x = pt1->x + pt2->x;
    result.y = pt1->y + pt2->y;
    return result;
}

point3d plus3d(const point3d *pt1, const point3d *pt2) {
    point3d result;
    result.x = pt1->x + pt2->x;
    result.y = pt1->y + pt2->y;
    result.z = pt1->z + pt2->z;
    return result;
}

int main(void) {
    point2d e1_2d = { 1, 0 }, e2_2d = { 0, 1 };
    point2d sum_2d = plus2d(&e1_2d, &e2_2d);

    point3d e1_3d = { 1, 0, 0}, e2_3d = { 0, 1, 0 };
    point3d sum_3d = plus3d(&e1_3d, &e2_3d);

    printf("the 2D sum is (%f, %f)\n", sum_2d.x, sum_2d.y);
    printf("the 3D sum is (%f, %f, %f)\n", sum_3d.x, sum_3d.y, sum_3d.z);
}

the 2D sum is (1.000000, 1.000000)
the 3D sum is (1.000000, 1.000000, 0.000000)

In C++, you can use the same name as long as the compiler can understand which one to use

Overloading addition in `C++`, with function
#include <stdio.h> struct point2d { float x; float y; }; struct point3d { float x; float y; float z;}; point2d plus(const point2d &pt1, const point2d &pt2) { point2d result; result.x = pt1.x + pt2.x; result.y = pt1.y + pt2.y; return result; } point3d plus(const point3d &pt1, const point3d &pt2) { point3d result; result.x = pt1.x + pt2.x; result.y = pt1.y + pt2.y; result.z = pt1.z + pt2.z; return result; } int main(void) { point2d e1_2d = { 1, 0 }, e2_2d = { 0, 1 }; point2d sum_2d = plus(e1_2d, e2_2d); point3d e1_3d = { 1, 0, 0}, e2_3d = { 0, 1, 0 }; point3d sum_3d = plus(e1_3d, e2_3d); printf("the 2D sum is (%f, %f)\n", sum_2d.x, sum_2d.y); printf("the 3D sum is (%f, %f, %f)\n", sum_3d.x, sum_3d.y, sum_3d.z); }	`the 2D sum is (1.000000, 1.000000) the 3D sum is (1.000000, 1.000000, 0.000000)`

#include <stdio.h>

struct point2d { float x; float y; };
struct point3d { float x; float y; float z;};

point2d plus(const point2d &pt1, const point2d &pt2) {
    point2d result;
    result.x = pt1.x + pt2.x;
    result.y = pt1.y + pt2.y;
    return result;
}

point3d plus(const point3d &pt1, const point3d &pt2) {
    point3d result;
    result.x = pt1.x + pt2.x;
    result.y = pt1.y + pt2.y;
    result.z = pt1.z + pt2.z;
    return result;
}

int main(void) {
    point2d e1_2d = { 1, 0 }, e2_2d = { 0, 1 };
    point2d sum_2d = plus(e1_2d, e2_2d);

    point3d e1_3d = { 1, 0, 0}, e2_3d = { 0, 1, 0 };
    point3d sum_3d = plus(e1_3d, e2_3d);

    printf("the 2D sum is (%f, %f)\n", sum_2d.x, sum_2d.y);
    printf("the 3D sum is (%f, %f, %f)\n", sum_3d.x, sum_3d.y, sum_3d.z);
}

the 2D sum is (1.000000, 1.000000)
the 3D sum is (1.000000, 1.000000, 0.000000)

In C++, you can even overload the standard operators:

Overloading addition in `C++`, with operators
#include <stdio.h> struct point2d { float x; float y; }; struct point3d { float x; float y; float z;}; point2d operator +(const point2d &pt1, const point2d &pt2) { point2d result; result.x = pt1.x + pt2.x; result.y = pt1.y + pt2.y; return result; } point3d operator +(const point3d &pt1, const point3d &pt2) { point3d result; result.x = pt1.x + pt2.x; result.y = pt1.y + pt2.y; result.z = pt1.z + pt2.z; return result; } int main(void) { point2d e1_2d = { 1, 0 }, e2_2d = { 0, 1 }; point2d sum_2d = e1_2d + e2_2d; point3d e1_3d = { 1, 0, 0}, e2_3d = { 0, 1, 0 }; point3d sum_3d = e1_3d + e2_3d; printf("the 2D sum is (%f, %f)\n", sum_2d.x, sum_2d.y); printf("the 3D sum is (%f, %f, %f)\n", sum_3d.x, sum_3d.y, sum_3d.z); }	`the 2D sum is (1.000000, 1.000000) the 3D sum is (1.000000, 1.000000, 0.000000)`

#include <stdio.h>

struct point2d { float x; float y; };
struct point3d { float x; float y; float z;};

point2d operator +(const point2d &pt1, const point2d &pt2) {
    point2d result;
    result.x = pt1.x + pt2.x;
    result.y = pt1.y + pt2.y;
    return result;
}

point3d operator +(const point3d &pt1, const point3d &pt2) {
    point3d result;
    result.x = pt1.x + pt2.x;
    result.y = pt1.y + pt2.y;
    result.z = pt1.z + pt2.z;
    return result;
}

int main(void) {
    point2d e1_2d = { 1, 0 }, e2_2d = { 0, 1 };
    point2d sum_2d = e1_2d + e2_2d;

    point3d e1_3d = { 1, 0, 0}, e2_3d = { 0, 1, 0 };
    point3d sum_3d = e1_3d + e2_3d;

    printf("the 2D sum is (%f, %f)\n", sum_2d.x, sum_2d.y);
    printf("the 3D sum is (%f, %f, %f)\n", sum_3d.x, sum_3d.y, sum_3d.z);
}

the 2D sum is (1.000000, 1.000000)
the 3D sum is (1.000000, 1.000000, 0.000000)

Templating

In some cases, the operations to be done are exactly the same. In C, you still need different function names.

#include <math.h>

/* return the biggest of the two values */
double fmax(double x, double y);
float fmaxf(float x, float y);
long double fmaxl(long double x, long double y);

/* no max() function in C for integer types */

Can we do something with macros perhaps?

`MAX()` macro
#include <stdio.h> #define MAX(a, b) ((b < a) ? a : b) int main(void) { int i = 1, j = 5, k = MAX(i, j); float fi = 1.1f, fj = 2.3f, fk = MAX(fi, fj); printf("i=%d, j=%d, k=%d\n", i, j, k); printf("fi=%f, fj=%f, fk=%f\n", fi, fj, fk); }	int main(void) { int i = 1, j = 5, k = ((j < i) ? i : j); float fi = 1.1f, fj = 2.3f, fk = ((fj < fi) ? fi : fj); printf("i=%d, j=%d, k=%d\n", i, j, k); printf("fi=%f, fj=%f, fk=%f\n", fi, fj, fk); }
`i=1, j=5, k=5 fi=1.100000, fj=2.300000, fk=2.300000`

#include <stdio.h>

#define MAX(a, b) ((b < a) ? a : b)

int main(void) {
    int i = 1, j = 5, k = MAX(i, j);
    float fi = 1.1f, fj = 2.3f, fk = MAX(fi, fj);

    printf("i=%d, j=%d, k=%d\n", i, j, k);
    printf("fi=%f, fj=%f, fk=%f\n", fi, fj, fk);
}

int main(void) {
 int i = 1, j = 5, k = ((j < i) ? i : j);
 float fi = 1.1f, fj = 2.3f, fk = ((fj < fi) ? fi : fj);

 printf("i=%d, j=%d, k=%d\n", i, j, k);
 printf("fi=%f, fj=%f, fk=%f\n", fi, fj, fk);
}

i=1, j=5, k=5
fi=1.100000, fj=2.300000, fk=2.300000

Double evaluation of arguments!

`MAX()` macro with double evaluation
#include <stdio.h> /* DANGEROUS! Double evaluation: the biggest argument gets evaluated twice / #define MAX(a, b) ((b < a) ? a : b) int main(void) { int i = 1, j = 5; int k = MAX(++i, ++j); / j gets incremented twice! */ printf("i=%d, j=%d, k=%d\n", i, j, k); }	int main(void) { int i = 1, j = 5; int k = ((++j < ++i) ? ++i : ++j); printf("i=%d, j=%d, k=%d\n", i, j, k); }
`i=2, j=7, k=7`

#include <stdio.h>

/* DANGEROUS! Double evaluation: the biggest argument gets evaluated twice */
#define MAX(a, b) ((b < a) ? a : b)

int main(void) {
    int i = 1, j = 5;
    int k = MAX(++i, ++j); /* j gets incremented twice! */

    printf("i=%d, j=%d, k=%d\n", i, j, k);
}

int main(void) {
 int i = 1, j = 5;
 int k = ((++j < ++i) ? ++i : ++j);

 printf("i=%d, j=%d, k=%d\n", i, j, k);
}

i=2, j=7, k=7

In C++ we can use templates

`max()` function template
#include <stdio.h> /* no double evaluation here / template <typename T> const T& max(const T& a, const T&b) { return (b < a) ? a : b; } int main(void) { int i = 1, j = 5; int k = max(++i, ++j); / j only gets incremented once */ printf("i=%d, j=%d, k=%d\n", i, j, k); }	`i=2, j=6, k=6`

#include <stdio.h>

/* no double evaluation here */
template <typename T> const T& max(const T& a, const T&b) {
    return (b < a) ? a : b;
}

int main(void) {
    int i = 1, j = 5;
    int k = max(++i, ++j); /* j only gets incremented once */

    printf("i=%d, j=%d, k=%d\n", i, j, k);
}

i=2, j=6, k=6

Actually, max is already defined in the C++ standard library, in a slightly more complex way. It is found in the algorithm include file, together with a lot of other useful functions such as sort, merge, etc.

We will not discuss the C++ standard library here, and only mention a small set of the object-oriented features of C++ (CUDA has very limited support for them).

`struct` enhancements

`struct` initialization
#include <stdio.h> /* a structure collecting the physical parameters for a simulation / struct PhysParams { float gravity; // system gravity, in m/s^2 float density; // fluid density, in kg/m^3 float viscosity; // kinematic viscosity, in m^2/s PhysParams() { / default to water on Earth / gravity = 9.81f; density = 1000.0f; viscosity = 1.0e-6f; } }; int main(void) { / default / PhysParams p1; / oil */ PhysParams p2; p2.density = 920.0f; p2.viscosity = 8.3e-5f; printf("Parameters #1: g = %g, rho = %g, nu = %g\n", p1.gravity, p1.density, p1.viscosity); printf("Parameters #2: g = %g, rho = %g, nu = %g\n", p2.gravity, p2.density, p2.viscosity); }	`Parameters #1: g = 9.81, rho = 1000, nu = 1e-06 Parameters #2: g = 9.81, rho = 920, nu = 8.3e-05`

#include <stdio.h>

/* a structure collecting the physical parameters for a simulation */
struct PhysParams {
    float gravity; // system gravity, in m/s^2
    float density; // fluid density, in kg/m^3
    float viscosity; // kinematic viscosity, in m^2/s
    PhysParams() {
        /* default to water on Earth */
        gravity = 9.81f;
        density = 1000.0f;
        viscosity = 1.0e-6f;
    }
};

int main(void) {
    /* default */
    PhysParams p1;
    /* oil */
    PhysParams p2;
    p2.density = 920.0f;
    p2.viscosity = 8.3e-5f;

    printf("Parameters #1: g = %g, rho = %g, nu = %g\n",
        p1.gravity, p1.density, p1.viscosity);

    printf("Parameters #2: g = %g, rho = %g, nu = %g\n",
        p2.gravity, p2.density, p2.viscosity);
}

Parameters #1: g = 9.81, rho = 1000, nu = 1e-06
Parameters #2: g = 9.81, rho = 920, nu = 8.3e-05

`struct` allocation and initialization (`new` and `delete`)
#include <stdio.h> #include <stdlib.h> #include <new> template <typename T> T random_value(const T &min, const T &max) { T rnd = T(rand())/T(RAND_MAX); /* rnd is now a value of type T between 0 and 1 / return (T(1) - rnd)min + rndmax; } / a structure collecting the physical parameters for a simulation / struct PhysParams { float gravity, density, viscosity; PhysParams() { / random fluid on Earth / gravity = 9.81f; density = random_value(900.0f, 2700.0f); viscosity = random_value(0.0f, 1.0e-3f); } }; / new is the equivalent of malloc + init. By default it throws an exception but it can be used with the nothrow option to return NULL just like malloc / // nothrow is defined in the std namespace using namespace std; #define N 10 int main(void) { PhysParams fluid = new (nothrow) PhysParams; PhysParams *fluidlist = new (nothrow) PhysParams[N]; if (!fluid \|\| !fluidlist) { fprintf(stderr, "failed to allocate fluids\n"); return 1; } printf("Parameters #X: g = %g, rho = %e, nu = %e\n", fluid->gravity, fluid->density, fluid->viscosity); for (int i=0; i < N; ++i) { printf("Parameters #%d: g = %g, rho = %e, nu = %e\n", i, fluidlist[i].gravity, fluidlist[i].density, fluidlist[i].viscosity); } delete fluid; delete[] fluidlist; }	Parameters #X: g = 9.81, rho = 2.412338e+03, nu = 3.943830e-04 Parameters #0: g = 9.81, rho = 2.309579e+03, nu = 7.984401e-04 Parameters #1: g = 9.81, rho = 2.540965e+03, nu = 1.975514e-04 Parameters #2: g = 9.81, rho = 1.503401e+03, nu = 7.682297e-04 Parameters #3: g = 9.81, rho = 1.399995e+03, nu = 5.539700e-04 Parameters #4: g = 9.81, rho = 1.759315e+03, nu = 6.288709e-04 Parameters #5: g = 9.81, rho = 1.556612e+03, nu = 5.134009e-04 Parameters #6: g = 9.81, rho = 2.614013e+03, nu = 9.161952e-04 Parameters #7: g = 9.81, rho = 2.044281e+03, nu = 7.172970e-04 Parameters #8: g = 9.81, rho = 1.154885e+03, nu = 6.069689e-04 Parameters #9: g = 9.81, rho = 9.293410e+02, nu = 2.428868e-04

#include <stdio.h>
#include <stdlib.h>
#include <new>

template <typename T>
T random_value(const T &min, const T &max) {
    T rnd = T(rand())/T(RAND_MAX);
    /* rnd is now a value of type T between 0 and 1 */
    return (T(1) - rnd)*min + rnd*max;
}

/* a structure collecting the physical parameters for a simulation */
struct PhysParams {
    float gravity, density, viscosity;
    PhysParams() {
        /* random fluid on Earth */
        gravity = 9.81f;
        density = random_value(900.0f, 2700.0f);
        viscosity = random_value(0.0f, 1.0e-3f);
    }
};

/* new is the equivalent of malloc + init. By default it throws an exception
   but it can be used with the nothrow option to return NULL just like
   malloc */

// nothrow is defined in the std namespace
using namespace std;

#define N 10

int main(void) {
    PhysParams *fluid = new (nothrow) PhysParams;
    PhysParams *fluidlist = new (nothrow) PhysParams[N];

    if (!fluid || !fluidlist) {
        fprintf(stderr, "failed to allocate fluids\n");
        return 1;
    }

    printf("Parameters #X: g = %g, rho = %e, nu = %e\n",
        fluid->gravity, fluid->density, fluid->viscosity);
    for (int i=0; i < N; ++i) {
        printf("Parameters #%d: g = %g, rho = %e, nu = %e\n",
            i, fluidlist[i].gravity, fluidlist[i].density,
            fluidlist[i].viscosity);
    }

    delete fluid; delete[] fluidlist;
}

Parameters #X: g = 9.81, rho = 2.412338e+03, nu = 3.943830e-04
Parameters #0: g = 9.81, rho = 2.309579e+03, nu = 7.984401e-04
Parameters #1: g = 9.81, rho = 2.540965e+03, nu = 1.975514e-04
Parameters #2: g = 9.81, rho = 1.503401e+03, nu = 7.682297e-04
Parameters #3: g = 9.81, rho = 1.399995e+03, nu = 5.539700e-04
Parameters #4: g = 9.81, rho = 1.759315e+03, nu = 6.288709e-04
Parameters #5: g = 9.81, rho = 1.556612e+03, nu = 5.134009e-04
Parameters #6: g = 9.81, rho = 2.614013e+03, nu = 9.161952e-04
Parameters #7: g = 9.81, rho = 2.044281e+03, nu = 7.172970e-04
Parameters #8: g = 9.81, rho = 1.154885e+03, nu = 6.069689e-04
Parameters #9: g = 9.81, rho = 9.293410e+02, nu = 2.428868e-04