Critical Sections & Mutexes

Critical Sections

Recall that a critical section is section of code which accesses some shared resource and which must not be accessed by multiple threads at the same time.

For example, the following version of the sum program is non-deterministic due to an un-handled critical section:


#include <stdlib.h>
#include <pthread.h>
#include <stdio.h>

#define THREADS 32
#define START 0
#define END 100000

/* a global variable */
unsigned long sum = 0;

/* the function called for each thread */
void* sum_part(void* idp) {
    /* get our thread id */
    int id = * (int*) idp;

    /* calculate the start and end points by evenly dividing the range */
    unsigned long start = ((END - START) / THREADS) * id;
    unsigned long end = start + ((END - START) / THREADS) - 1;

    /* the last thread needs to do all remaining ones */
    if (id == (THREADS - 1)) {
        end = END;
    }

    /* do the calculation */
    unsigned long i;
    for (i = start; i <= end; i++) {
        sum += i;
    }

    return NULL;
}

int main() {
    /* an array of threads */
    pthread_t threads[THREADS];
    int ids[THREADS];
    int i;

    /* spawn all threads */
    for (i = 0; i < THREADS; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, sum_part, &ids[i]);
    }

    /* join all threads collecting answer */
    for (i = 0; i < THREADS; i++) {
        pthread_join(threads[i], NULL);
    }

    /* now all results are in */
    printf("Final answer = %lu.\n", sum);
    pthread_exit(NULL);
}

This program may get the correct answer (5000050000), but it may not.

Mutexes

A mutex is a structure for forcing a critical section to be executed by only one thread at a time. First we must create a mutex variable:


pthread_mutex_t sum_lock;

It must then be initialized with the following code:


pthread_mutex_init(&sum_lock, NULL);

Note that, we cannot do this inside the threads, because then each thread will have its own mutex, defeating the purpose. The mutex has to be shared amongst all threads.

Finally, we wrap our critical section inside of lock/unlock calls:


pthread_mutex_lock(&sum_lock);

/* critical section here */

pthread_mutex_unlock(&sum_lock);

Mutex Example

We can fix the above program by wrapping the updates to sum inside of a mutex:


#include <stdlib.h>
#include <pthread.h>
#include <stdio.h>

#define THREADS 32
#define START 0
#define END 100000

/* a global variable */
unsigned long sum = 0;

/* a mutex protecting it */
pthread_mutex_t sum_lock;

/* the function called for each thread */
void* sum_part(void* idp) {
    /* get our thread id */
    int id = * (int*) idp;

    /* calculate the start and end points by evenly dividing the range */
    unsigned long start = ((END - START) / THREADS) * id;
    unsigned long end = start + ((END - START) / THREADS) - 1;

    /* the last thread needs to do all remaining ones */
    if (id == (THREADS - 1)) {
        end = END;
    }

    /* do the calculation */
    unsigned long i;
    for (i = start; i <= end; i++) {
        /* critical section! */
        pthread_mutex_lock(&sum_lock);
        sum += i;
        pthread_mutex_unlock(&sum_lock);
    }

    return NULL;
}

int main() {
    /* an array of threads */
    pthread_t threads[THREADS];
    int ids[THREADS];
    int i;

    /* initialize the mutex */
    pthread_mutex_init(&sum_lock, NULL);

    /* spawn all threads */
    for (i = 0; i < THREADS; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, sum_part, &ids[i]);
    }

    /* join all threads collecting answer */
    for (i = 0; i < THREADS; i++) {
        pthread_join(threads[i], NULL);
    }

    /* now all results are in */
    printf("Final answer = %lu.\n", sum);
    pthread_exit(NULL);
}

Now the program will always produce the correct answer. However, the mutex slows the program down, and not using shared state at all would be faster!

Command Line Arguments

One handy trick for testing threaded programs is to make the number of threads a command line parameter. So rather than having to recompile our code to change the number of threads, we can pass it to the program on the command line:

The following example illustrates how to do this:


#include <stdlib.h>
#include <pthread.h>
#include <stdio.h>

/* the number of threads */
int num_threads;
#define START 0
#define END 10000

/* the function called for each thread */
void* sum_part(void* idp) {
    /* get our thread id */
    int id = * (int*) idp;

    /* calculate the start and end points by evenly dividing the range */
    int start = ((END - START) / num_threads) * id;
    int end = start + ((END - START) / num_threads) - 1;

    /* the last thread needs to do all remaining ones */
    if (id == (num_threads - 1)) {
        end = END;
    }

    /* allocate space for the answer */
    int* answer = malloc(sizeof(int));

    /* do the calculation */
    int i;
    for (i = start; i <= end; i++) {
        *answer += i;
    }

    /* debugging output */
    printf("Thread %d: sum(%d, %d) = %d\n", id, start, end, *answer);

    return answer;
}

int main (int argc, char** argv) {
    /* get the number of threads */
    if (argc < 2) {
        printf("Pass the number of threads to use!\n");
        return 0;
    } else {
        num_threads = atoi(argv[1]);
    }

    /* an array of threads */
    pthread_t threads[num_threads];
    int ids[num_threads];
    int i;

    /* spawn all threads */
    for (i = 0; i < num_threads; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, sum_part, &ids[i]);
    }

    /* join all threads collecting answer */
    int answer = 0;
    for (i = 0; i < num_threads; i++) {
        int* partial;
        pthread_join(threads[i], (void**) &partial);
        answer += *partial;
        free(partial);
    }

    /* now all results are in */
    printf("Final answer = %d.\n", answer);
    pthread_exit(NULL);
}

Note that this example uses variable length arrays. This is a feature of C, but officially not of C++. g++ does support variable length arrays in C++ code, however.

Example: Parallel Output

The following program searches an array for a particular string, printing out each match. How can we change it so that the output is never mangled?


#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <stdio.h>

/* max size of arrays */
#define STRING_SIZE 64
#define NUM_STRINGS 100000

/* the number of threads */
int num_threads;

/* global variables, used to specify what to search for each thread */
char strings[NUM_STRINGS][STRING_SIZE];
int num_strings;
char target[STRING_SIZE];

/* the function called for each thread */
void* search_thread(void* idp) {
    /* get our thread id */
    int id = * (int*) idp;

    /* calculate the start and end points by evenly dividing the range */
    int start = (num_strings / num_threads) * id;
    int end = start + (num_strings / num_threads) - 1;

    /* the last thread needs to do all remaining ones */
    if (id == (num_threads - 1)) {
        end = num_strings;
    }

    /* search the array for the given string */
    int i;
    for (i = start; i < end; i++) {
        if (strstr(strings[i], target)) {
            printf("%s", strings[i]);
            printf(" matches ");
            printf("%s", target);
            printf(" on line ");
            printf("%d", i);
            printf("\n"); 
        }
    }

    return NULL;
}

/* the main thread loads up a file of strings and asks the user what to search for */
int main (int argc, char** argv) {
    /* get the number of threads */
    if (argc < 2) {
        printf("Pass the number of threads to use!\n");
        return 0;
    } else {
        num_threads = atoi(argv[1]);
    }

    /* read in the dictionary file! */
    FILE* fp = fopen("words.txt", "r");
    int i = 0;
    do {
        fscanf(fp, "%s", strings[i]);
        i++;
    } while (!feof(fp));
    num_strings = i;

    /* ask the user what to search for */
    printf("What text would you like to search for?\n");
    scanf("%s", target);

    /* an array of threads */
    pthread_t threads[num_threads];
    int ids[num_threads];

    /* spawn all threads */
    for (i = 0; i < num_threads; i++) {
        ids[i] = i;
        pthread_create(&threads[i], NULL, search_thread, &ids[i]);
    }

    /* join all threads */
    for (i = 0; i < num_threads; i++) {
        pthread_join(threads[i], NULL);
    }

    return 0;
}