General-purpose programming on GPU

C and C++ programming basics

Giuseppe Bilotta, Eugenio Rustico, Alexis Hérault

DMI — Università di Catania
Sezione di Catania — INGV

C programming basics (part 2)

Returning an array from a function

Question: can we return an array from a function?

A statically allocated array fails:

Returning a pointer to a local variable (wrong)
#include <stdio.h>

#define N 32

int* calc_avg(const int in[]) {
    int out[N];
    int i = 0;

    out[i] = (in[i] + in[i+1])/2;

    for (++i; i < N-1; ++i) {
        out[i] = (in[i-1] + in[i] + in[i+1])/3;
    }

    out[i] = (in[i-1] + in[i])/2;

    return out;
}

int main(void) {
    int vec[N];
    int *avg;
    int i;

    for (i=0; i < N; ++i)
        vec[i] = i & 1 ? (3*i +1) : (i>>1);

    avg = calc_avg(vec);

    for (i=0; i < N; ++i)
        printf("%u\t%d\t%d\n", i, vec[i], avg[i]);
}
code/return-array.c: In function ‘calc_avg’:
code/return-array.c:17: warning: function returns address of local variable
0    0   2
1   4   11174
2   1   4196309
3   10  0
4   2   9
5   16  7
6   3   1034195664
7   22  11174
8   4   18
9   28  12
10  5   0
11  34  0
12  6   1275768192
13  40  32767
14  7   4195328
15  46  0
16  8   1275768416
17  52  32767
18  9   1032016629
19  58  11174
20  10  0
21  64  0
22  11  2
23  70  0
24  12  0
25  76  0
26  13  0
27  82  0
28  14  4196300
29  88  0
30  15  1037693696
31  94  11174

It works if it's dynamically allocated:

Returning a pointer to a local variable (fixed)
#include <stdio.h>
#include <stdlib.h>

#define N 32

int* calc_avg(const int in[]) {
    int *out = calloc(N,sizeof(*in));
    int i = 0;

    out[i] = (in[i] + in[i+1])/2;

    for (++i; i < N-1; ++i) {
        out[i] = (in[i-1] + in[i] + in[i+1])/3;
    }

    out[i] = (in[i-1] + in[i])/2;

    return out;
}

int main(void) {
    int vec[N];
    int *avg;
    int i;

    for (i=0; i < N; ++i)
        vec[i] = i & 1 ? (3*i +1) : (i>>1);

    avg = calc_avg(vec);

    for (i=0; i < N; ++i)
        printf("%u\t%d\t%d\n", i, vec[i], avg[i]);
}
0    0   2
1   4   1
2   1   5
3   10  4
4   2   9
5   16  7
6   3   13
7   22  9
8   4   18
9   28  12
10  5   22
11  34  15
12  6   26
13  40  17
14  7   31
15  46  20
16  8   35
17  52  23
18  9   39
19  58  25
20  10  44
21  64  28
22  11  48
23  70  31
24  12  52
25  76  33
26  13  57
27  82  36
28  14  61
29  88  39
30  15  65
31  94  54

Local variables are allocated on the stack, which gets freed when the function returns. If you want to return a pointer to something, you must use malloc and friends.

Better idea: let the caller pass you a non-const pointer to pre-allocated memory instead, and return void.

Overflowing malloc

Overflowing malloc
#include <stdio.h>
#include <stdlib.h>

int main(void) {
    long int *vector;
    /* size_t is unsigned. Assuming 2's complement, -1 cast to size_t
      means a size_t with all bits set to 1, i.e. the highest unsigned
      value it can represent */
    size_t numels = (size_t)-1 / sizeof(*vector) + 1;

    /* this overflows! */
    size_t mem = numels*sizeof(*vector);

    printf("allocating %lu bytes for %lu elements of size %lu\n",
        mem, numels, sizeof(*vector));

    vector = malloc(mem);

    if (vector == NULL) {
        fprintf(stderr, "allocation failed\n");
        return 1;
    }

    /* allocation succeeds, but we got less memory than we wanted.
      we can now access memory beyond what was assigned to us:
      security risk, might segfault, etc */
    printf("the last element is %d\n", vector[numels-1]);
}
allocating 0 bytes for 2305843009213693952 elements of size 8
the last element is 33

Multidimensional arrays with malloc

A two-dimensional array There are at least three ways to allocate and use multidimensional arrays with size decided at runtime.

If we want to use double-index notation A[i][j] we need to allocate memory for the data but also for the indices. We can be more efficient if we limit ourselves to the A[i*ncols+j] notation.


Per-row allocation Indexed allocation Linear allocation


Multidimensional arrays (worst)
/* file: multi-malloc0.h */
#include <stdio.h>
#include <stdlib.h>

typedef unsigned int uint;

/* allocate each row, returning the number of rows for which
   allocation succeeded */
int init_rows(int **matrix, uint rows, uint cols) {
    size_t memsize = cols*sizeof(**matrix);
    uint row;
    for (row = 0; row < rows; ++row) {
        printf("allocating %lu bytes for row %u\n",
            memsize, row);
        matrix[row] = (int *)malloc(memsize);
        if (matrix[row] == NULL)
            break;
    }
    return row;
}

/* free each row */
void free_rows(int **matrix, uint rows) {
    uint row;
    for (row = 0; row < rows; ++row)
        free(matrix[row]);
}

void init_matrix(int **matrix, uint rows, uint cols) {
    uint row, col;
    for (row = 0; row < rows; ++row)
        for (col = 0; col < cols; ++col)
            matrix[row][col] = row - col;
}

void print_matrix(int **matrix, uint rows, uint cols)
{
    uint row, col;
    for (row = 0; row < rows; ++row) {
        for (col = 0; col < cols; ++col) {
            printf("%d", matrix[row][col]);
            if (col < cols - 1)
                printf("\t");
        }
        printf("\n");
    }
}
/* file: multi-malloc0.c */
#include "multi-malloc0.h"

int main(int argc, char **argv) {
    int **A;
    uint rows, cols, els;
    size_t memsize;

    if (argc != 3) {
        fprintf(stderr, "please specify two numbers\n");
        return 1;
    }

    rows = atoi(argv[1]); cols = atoi(argv[2]);

    /* check overflow for rows*sizeof(*A) */
    if ((size_t)-1 / sizeof(**A) < rows) {
        fprintf(stderr, "too many elements!\n");
        return 1;
    }

    memsize = rows*sizeof(*A);
    printf("allocating %lu bytes for %u row indices\n", memsize, rows);
    A = (int **)malloc(memsize);
    if (A == NULL) {
        fprintf(stderr, "allocation failed\n");
        return 1;
    }

    /* check overflow for cols*sizeof(**A) */
    if ((size_t)-1 / sizeof(**A) < cols) {
        fprintf(stderr, "too many elements!\n");
        return 1;
    }

    /* allocate rows */
    els = init_rows(A, rows, cols);
    if (els != rows) {
        fprintf(stderr, "allocation failed at row %d", els);
        free_rows(A, els);
        return 1;
    }
    init_matrix(A, rows, cols);
    print_matrix(A, rows, cols);

    free_rows(A, els); free(A);
}
allocating 24 bytes for 3 row indices
allocating 16 bytes for row 0
allocating 16 bytes for row 1
allocating 16 bytes for row 2
0   -1  -2  -3
1   0   -1  -2
2   1   0   -1

Multidimensional arrays (decent)
/* file: multi-malloc.h */
#include <stdio.h>
#include <stdlib.h>

typedef unsigned int uint;

void init_rows(int **matrix, uint rows, uint cols, int *mem) {
    uint row;
    for (row = 0; row < rows; ++ row)
        matrix[row] = mem + row*cols;
}

void init_matrix(int **matrix, uint rows, uint cols) {
    uint row, col;
    for (row = 0; row < rows; ++row)
        for (col = 0; col < cols; ++col)
            matrix[row][col] = row - col;
}

void print_matrix(int **matrix, uint rows, uint cols)
{
    uint row, col;
    for (row = 0; row < rows; ++row) {
        for (col = 0; col < cols; ++col) {
            printf("%d", matrix[row][col]);
            if (col < cols - 1)
                printf("\t");
        }
        printf("\n");
    }
}
/* file: multi-malloc.c */
#include "multi-malloc.h"

int main(int argc, char **argv) {
    int **A; int *A_mem;
    uint rows, cols, els;
    size_t memsize;

    if (argc != 3) {
        fprintf(stderr, "please specify two numbers\n");
        return 1;
    }

    rows = atoi(argv[1]); cols = atoi(argv[2]);
    els = cols * rows; /* this can overflow too, actually */

    /* check overflow for rows*sizeof(*A) */
    if ((size_t)-1 / sizeof(*A) < rows) {
        fprintf(stderr, "too many elements!\n");
        return 1;
    }

    memsize = rows*sizeof(*A);
    printf("allocating %lu bytes for %u row indices\n", memsize, rows);
    A = (int **)malloc(memsize);
    if (A == NULL) {
        fprintf(stderr, "allocation failed\n");
        return 1;
    }

    /* check overflow for els*sizeof(*A) */
    if ((size_t)-1 / sizeof(**A) < els) {
        fprintf(stderr, "too many elements!\n");
        return 1;
    }

    memsize = els*sizeof(**A);
    printf("allocating %lu bytes for %u (%ux%u) elements\n",
        memsize, els, rows, cols);

    A_mem = (int *)malloc(memsize);
    if (A_mem == NULL) {
        fprintf(stderr, "allocation failed\n");
        free(A);
        return 1;
    }

    init_rows(A, rows, cols, A_mem);
    init_matrix(A, rows, cols);
    print_matrix(A, rows, cols);

    free(A_mem); free(A);
}
allocating 24 bytes for 3 row indices
allocating 48 bytes for 12 (3x4) elements
0   -1  -2  -3
1   0   -1  -2
2   1   0   -1

Multidimensional arrays (best)
/* file: multi-malloc2.h */
#include <stdio.h>
#include <stdlib.h>

typedef unsigned int uint;

void init_matrix(int *matrix, uint rows, uint cols) {
    uint row, col;
    for (row = 0; row < rows; ++row)
        for (col = 0; col < cols; ++col)
            matrix[row*cols+col] = row - col;
}

void print_matrix(int *matrix, uint rows, uint cols)
{
    uint row, col;
    for (row = 0; row < rows; ++row) {
        for (col = 0; col < cols; ++col) {
            printf("%d", matrix[row*cols+col]);
            if (col < cols - 1)
                printf("\t");
        }
        printf("\n");
    }
}
/* file: multi-malloc2.c */
#include "multi-malloc2.h"

int main(int argc, char **argv) {
    int *A;
    uint rows, cols, els;
    size_t mem = sizeof(*A);

    if (argc != 3) {
        fprintf(stderr, "please specify two numbers\n");
        return 1;
    }

    rows = atoi(argv[1]); cols = atoi(argv[2]);
    els = cols * rows; /* this can overflow too! */

    if ((size_t)-1 / mem < els) {
        fprintf(stderr, "too many elements!\n");
        return 1;
    }
    mem *= els;
    printf("allocating %lu bytes for %u (%ux%u) elements\n",
        mem, els, rows, cols);

    A = (int *)malloc(mem);
    if (A == NULL) {
        fprintf(stderr, "allocation failed\n");
        return 1;
    }

    init_matrix(A, rows, cols);
    print_matrix(A, rows, cols);

    free(A);
}
allocating 48 bytes for 12 (3x4) elements
0   -1  -2  -3
1   0   -1  -2
2   1   0   -1

Basic input/output

Hello world (write to file)
#include <stdio.h>

char fname[] = "hello-write.txt";

int main(void)
{
    FILE *fd = fopen(fname, "w");
    if (fd == NULL) {
        fprintf(stderr, "failed to open %s for writing!\n", fname);
        return 1;
    }
    fprintf(fd, "Hello world!\n");
    if (ferror(fd)) {
        fprintf(stderr, "error writing to %s!\n", fname);
        fclose(fd);
        return 1;
    }
    fclose(fd);
    return 0;
}
Hello world!

Read vector
#include <stdio.h>
#include <stdlib.h>

char fname[] = "read-vec.dat";

int main(void)
{
    int *vec, min, max;
    unsigned int size;
    unsigned int i;
    int scancheck;
    FILE *fd;

    fd = fopen(fname, "rb");
    if (fd == NULL) {
        fprintf(stderr, "failed to open %s!\n", fname); return 1;
    }

    /* read the intial "N=..." sequence */
    scancheck = fscanf(fd, "N=%u", &size);
    if (scancheck != 1) {
        fprintf(stderr, "file does not specify number of elements\n"); return 1;
    }
    if ((size_t)-1 / size < sizeof(*vec)) {
        fprintf(stderr, "too many elements\n"); return 1;
    }
    vec = (int *)calloc(size, sizeof(*vec));
    if (vec == NULL) {
        fprintf(stderr, "failed to allocate memory\n"); return 1;
    }
    for (i = 0; i < size; ++i) {
        /* fscanf automatically skips whitespace */
        scancheck = fscanf(fd, "%d", vec + i);
        if (scancheck != 1) {
            fprintf(stderr, "failed to read element %u\n", i); return 1;
        } else if(ferror(fd)) {
            fprintf(stderr, "error in input file\n"); return 1;
        } else if (feof(fd)) {
            fprintf(stderr, "file ended prematurely\n"); return 1;
        }
    }
    close(fd);

    printf("%u elements read\n");

    return 0;
}
N=16 0 2 4 6
failed to read element 4

Read comma-separated data
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct student {
    char famname[256]; char givname[256]; char regnum[256];
    unsigned int mark;
};

char fname[] = "read-ws.dat";

#define MAXSTUD 256

int main(void)
{
    unsigned int i, best, worst;
    FILE *fd;
    /* use calloc() to ensure data is all 0 at the beginning */
    struct student *studs = calloc(MAXSTUD,sizeof(struct student));
    if (studs == NULL) {
        fprintf(stderr, "failed to allocate memory for student records!\n");
        return 1;
    }

    fd = fopen(fname, "rb");
    if (fd == NULL) {
        fprintf(stderr, "failed to open %s!\n", fname);
        return 1;
    }

    for (i=0; i < MAXSTUD; ++i) {
        char line[256];
        size_t last = 0;
        /* reads a line, or up to sizeof(line) bytes */
        char *ptr = fgets(line, sizeof(line), fd);
        /* ptr is NULL if an error or EOF was met */
        if (ptr == NULL) {
            if (feof(fd)) break;
            fprintf(stderr, "error reading from %s!\n", fname);
            return 1;
        }
        /* strlen gives the length of a C string */
        last = strlen(ptr);
        /* if fgets read 255 characters, it means it didn't manage to get a
       full line. this should be handled properly, we just quit */
        if (last == 255) {
            fprintf(stderr, "line %d too long in %s!\n", i, fname);
            return 1;
        }

        last -= 1;
        /* now last points to the last character in the string
          (before the null byte). If it's a newline, we want to
          stop before it */
        while (ptr[last] == 0x0D || ptr[last] == 0x0A)
            ptr[last--] = '\0';
        /* split a line at given delimiters */
        ptr = strtok(line, ",");
        strcpy(studs[i].famname, ptr);
        if (ptr = strtok(NULL, ","))
                strcpy(studs[i].givname, ptr);
        if (ptr = strtok(NULL, ","))
            strcpy(studs[i].regnum, ptr);
        if (ptr = strtok(NULL, ","))
            studs[i].mark = atoi(ptr);

        if (i == 0) {
            best = worst = 0;
        } else if (studs[i].mark < studs[worst].mark) {
            worst = i;
        } else if (studs[i].mark > studs[best].mark) {
            best = i;
        }
    }

    if (!feof(fd)) {
        fprintf(stderr, "too many records, stopping at %d\n", i);
    } else {
        fprintf(stderr, "%d records read\n", i);
    }
    fclose(fd);

    printf("best student: %s %s (%s) with mark %u\n",
        studs[best].givname, studs[best].famname,
        studs[best].regnum, studs[best].mark);
    printf("worst student: %s %s (%s) with mark %u\n",
        studs[worst].givname, studs[worst].famname,
        studs[worst].regnum, studs[worst].mark);

    return 0;
}
Pallino,Pinco,A01/2345,18
Tizio,Altro,M03/6661,30
Caio,Pure,B52/85743,15
Sempronio,Non Puo' Mancare,C06/C1F41,22
4 records read
best student: Altro Tizio (M03/6661) with mark 30
worst student: Pure Caio (B52/85743) with mark 15

Binary read/write:

#include <stdio.h>

size_t fread(void *ptr, size_t size, site_t nitems, FILE* fd);
size_t fwrite(const void *ptr, size_t size, site_t nitems, FILE* fd);

/* examples */

int vec[N];

bytes_written = fwrite(vec, sizeof(*vec), N, fd);
if (bytes_written / N < sizeof(*vec))
    error();

bytes_read = fread(vec, sizeof(*vec), N, fd);
if (bytes_read / N < sizeof(*vec))
    error();

Peeking at the next character (getc, ungetc)

/* skip blanks and comments */
int skip_junk(FILE *fd) {
    int comment = 0;
    int peek;
    int skippable = 1;
    while (skippable) {
        peek = getc(fd);
        if (ferror(fd) || feof(fd))
            break;
        switch (peek) {
        case 0x0D: /* carriage-return */
        case 0x0A: /* newline */
            comment = 0;
            break;
        case '#':
            comment = 1;
        case 0x20: /* space */
        case 0x09: /* tab */
            break;
        default:
            skippable = comment;
            break;
        }
    }
    ungetc(peek, fd);
    return ferror(fd) || feof(fd);
}

Get your feet wet/3

Transpose PBM/PGM images

Write a program that can read PBM and PGM file(s) specified on the command line, and write a transposed version of them.


PBM and PGM examples


Transposed images