Home CPSC 425

Introduction to Pthreads

Overview

POSIX threads (Pthreads) is a library for writing multi-threaded programs. It can be used on POSIX systems (Unix, Linux, OSX), but also Windows.

It is a C library, but can be used with C++ as well.


Hello World

The following program illustrates the very basics of Pthreads:


#include <pthread.h>
#include <stdio.h>

#define THREADS 4

/* the function called for each thread */
void* f(void* data) {
    printf("Hello from a thread!\n");
    pthread_exit(NULL);
}

int main() {
    /* an array of threads */
    pthread_t threads[THREADS];

    /* start all of the threads */
    for (int t = 0; t < THREADS; t++) {
        pthread_create(&threads[t], NULL, f, NULL);
    }

    pthread_exit(NULL);
}
pthread_create takes 4 parameters:
  1. The pthread_t object to create.
  2. The "thread attributes" which is often left NULL.
  3. A pointer to a function to begin executing. The function must take and return a void*.
  4. The data to pass into the thread (can be NULL).

Compiling Pthreads

Pthreads can be compiled with gcc or clang, as long as we pass the "-pthread" option:

$ gcc hello.c -pthread
$ ./a.out

Without the pthread option, we will get linker errors.


Pthreads and C++

Pthreads can be used with C++ programs as well:


#include <pthread.h>
#include <iostream>

const int THREADS = 4;

/* the function called for each thread */
void* f(void* data) {
    std::cout << "Hello from a thread!" << std::endl;
    pthread_exit(NULL);
}

int main() {
    /* an array of threads */
    pthread_t threads[THREADS];

    for (int t = 0; t < THREADS; t++) {
        pthread_create(&threads[t], NULL, f, NULL);
    }

    pthread_exit(NULL);
}

Compile as above, except with g++ or clang++.


Passing Data to a Thread

To pass data to your thread, you need to pass it as a void* to the last parameter of pthread_create.

The example below shows how this can be done:


#include <pthread.h>
#include <stdio.h>

#define THREADS 4

void* f(void* idp) {
    int id = * (int*) idp;
    printf("Thread %d checking in!\n", id);
    pthread_exit(NULL);
}

int main() {
    pthread_t threads[THREADS];
    int ids[THREADS];

    for (int i = 0; i < THREADS; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, f, &ids[i]);
    }

    pthread_exit(NULL);
}

Here we pass a pointer to an int and then cast it as a void*. This is done so that any type of data can be passed into a thread.

You must also ensure that each thread's input parameters are kept safe. For instance, if we tried to reuse one int for all of the threads, it would not work properly:


#include <pthread.h>
#include <stdio.h>

#define THREADS 4

void* f(void* idp) {
    int id = * (int*) idp;
    printf("Thread %d checking in!\n", id);
    pthread_exit(NULL);
}

int main() { 
    pthread_t threads[THREADS];
    /* now they all share an id -- this will not work well */
    int id; 

    for (int i = 0; i < THREADS; i++) {
        id = i;
        pthread_create(&threads[i], NULL, f, &id);
    }

    pthread_exit(NULL);
}


Joining Threads

The main function does not automatically wait for the threads it spawns to finish:


#include <unistd.h>
#include <pthread.h>
#include <stdio.h>

#define THREADS 4

/* the function called for each thread */
void* f(void* idp) {
    int id = * (int*) idp;
    sleep(1);
    printf("Thread %d checking in!\n", id);
    pthread_exit(NULL);
}

int main() {
    /* an array of threads */
    pthread_t threads[THREADS];
    int ids[THREADS];

    for (int i = 0; i < THREADS; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, f, &ids[i]);
    }

    printf("All threads finished!\n");
    return 0;
}

(If we have the main function call pthread_exit(NULL) instead of return 0, then the threads will at least get a chance to finish).

Most times however, we will have points where we want to wait for all worker threads to finish. This can be done by joining the thread with the pthread_join function:


#include <unistd.h>
#include <pthread.h>
#include <stdio.h>

#define THREADS 4

/* the function called for each thread */
void* f(void* idp) {
    int id = * (int*) idp;
    sleep(1);
    printf("Thread %d checking in!\n", id);
    pthread_exit(NULL);
}

int main() {
    /* an array of threads */
    pthread_t threads[THREADS];
    int ids[THREADS];

    /* spawn the threads */
    for (int i = 0; i < THREADS; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, f, &ids[i]);
    }

    /* wait for the threads to finish */
    for (i = 0; i < THREADS; i++) {
        pthread_join(threads[i], NULL);
    }

    printf("All threads finished!\n");

    pthread_exit(NULL);
}

Returning Data from a Thread

Pthreads also uses void* types to return data from a thread. An issue with this is that we cannot return a local variable. What is wrong with this:


void* f(void* idp) {
    int answer;

    /* do calculation ... */  

    return (void*) &answer;
}

In order to get around this, we can use malloc/new to allocate memory for each thread:


void* f(void* idp) {
    int* answer = malloc(sizeof(int));

    /* do calculation ... */  

    return (void*) answer;
}

But then we must free/delete the memory in the main function!

The last parameter to pthread_join is a pointer in which to place the return value:


int* result;
pthread_join(threads[i], (void**) &result);
free(result);

Example: Parallel Sum

The following example shows how we could use Pthreads to compute the sum of a range of numbers:


#include <stdlib.h>
#include <pthread.h>
#include <stdio.h>

#define THREADS 4
#define START 0
#define END 10000

/* the function called for each thread */
void* sum_part(void* idp) {
    /* get our thread id */
    int id = * (int*) idp;

    /* calculate the start and end points by evenly dividing the range */
    int start = ((END - START) / THREADS) * id;
    int end = start + ((END - START) / THREADS) - 1;

    /* the last thread needs to do all remaining ones */
    if (id == (THREADS - 1)) {
        end = END;
    }

    /* allocate space for the answer */
    int* answer = malloc(sizeof(int));

    /* do the calculation */
    for (int i = start; i <= end; i++) {
        *answer += i;
    }

    /* debugging output */
    printf("Thread %d: sum(%d, %d) = %d\n", id, start, end, *answer);

    pthread_exit(answer);
}

int main() {
    /* an array of threads */
    pthread_t threads[THREADS];
    int ids[THREADS];

    /* spawn all threads */
    for (int i = 0; i < THREADS; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, sum_part, &ids[i]);
    }

    /* join all threads collecting answer */
    int answer = 0;
    for (int i = 0; i < THREADS; i++) {
        int* partial;
        pthread_join(threads[i], (void**) &partial);
        answer += *partial;
        free(partial);
    }

    /* now all results are in */
    printf("Final answer = %d.\n", answer);
    pthread_exit(NULL);
}

Copyright © 2018 Ian Finlayson | Licensed under a Creative Commons Attribution 4.0 International License.