LF MPMC queue improvements

Small improvements of the LF MPMC queue

Making the LF MPMC queue generic and in a seperate header file
This commit is contained in:
2026-03-09 13:21:45 +01:00
parent b2f444af00
commit a299c4a1e1
9 changed files with 230 additions and 148 deletions

1
.gitignore vendored
View File

@@ -4,3 +4,4 @@ file_hasher.rdi
file_hasher.exe file_hasher.exe
file_hashes.txt file_hashes.txt
file_list.txt file_list.txt
temp.c

71
arena.c
View File

@@ -1,6 +1,6 @@
#include "arena.h" #pragma once
#include "base.h"
#include "arena.h"
/* ============================================================ /* ============================================================
Helper functions Helper functions
============================================================ */ ============================================================ */
@@ -820,70 +820,3 @@ mem_arena_temp arena_scratch_get(mem_arena **conflicts, u32 num_conflicts) {
} }
void arena_scratch_release(mem_arena_temp scratch) { arena_temp_end(scratch); } void arena_scratch_release(mem_arena_temp scratch) { arena_temp_end(scratch); }
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
u32 plat_get_pagesize(void) {
SYSTEM_INFO sysinfo = {0};
GetSystemInfo(&sysinfo);
return sysinfo.dwPageSize;
}
void *plat_mem_reserve(u64 size) {
return VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_READWRITE);
}
b32 plat_mem_commit(void *ptr, u64 size) {
void *ret = VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE);
return ret != NULL;
}
b32 plat_mem_decommit(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_DECOMMIT);
}
b32 plat_mem_release(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_RELEASE);
}
#elif defined(__linux__)
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE
#endif
#include <sys/mman.h>
#include <unistd.h>
u32 plat_get_pagesize(void) { return (u32)sysconf(_SC_PAGESIZE); }
void *plat_mem_reserve(u64 size) {
void *out = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (out == MAP_FAILED) {
return NULL;
}
return out;
}
b32 plat_mem_commit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_READ | PROT_WRITE);
return ret == 0;
}
b32 plat_mem_decommit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_NONE);
if (ret != 0)
return false;
ret = madvise(ptr, size, MADV_DONTNEED);
return ret == 0;
}
b32 plat_mem_release(void *ptr, u64 size) {
i32 ret = munmap(ptr, size);
return ret == 0;
}
#endif

47
arena.h
View File

@@ -1,45 +1,7 @@
#pragma once #pragma once
#include "base.h" #include "base.h"
// #define _CRT_SECURE_NO_WARNINGS
//
// #include <assert.h>
// #include <stdbool.h>
// #include <stdint.h>
// #include <stdio.h>
// #include <string.h>
//
// /* ------------------------------------------------------------
// Base types
// ------------------------------------------------------------ */
//
// typedef uint8_t u8;
// typedef uint32_t u32;
// typedef uint64_t u64;
// typedef int32_t i32;
// typedef int b32;
//
// /* ------------------------------------------------------------
// Size helpers
// ------------------------------------------------------------ */
//
// #define KiB(x) ((u64)(x) * 1024ULL)
// #define MiB(x) (KiB(x) * 1024ULL)
//
// /* ------------------------------------------------------------
// Alignment helpers
// ------------------------------------------------------------ */
//
// #define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
//
// /* ------------------------------------------------------------
// Assert
// ------------------------------------------------------------ */
//
// #ifndef ASSERT
// #define ASSERT(x) assert(x)
// #endif
//
/* /*
=============================================================================== ===============================================================================
ARENA USAGE GUIDE ARENA USAGE GUIDE
@@ -417,10 +379,3 @@ void arena_scratch_release(mem_arena_temp scratch);
#define ARENA_PUSH_NZ(arena, size) arena_push((arena), (size), false) #define ARENA_PUSH_NZ(arena, size) arena_push((arena), (size), false)
#define arena_pop(arena_ptr) arena_pop_to((arena_ptr), 1) #define arena_pop(arena_ptr) arena_pop_to((arena_ptr), 1)
u32 plat_get_pagesize(void);
void *plat_mem_reserve(u64 size);
b32 plat_mem_commit(void *ptr, u64 size);
b32 plat_mem_decommit(void *ptr, u64 size);
b32 plat_mem_release(void *ptr, u64 size);

View File

@@ -1,5 +1,4 @@
#ifndef BASE_H #pragma once
#define BASE_H
#include <assert.h> #include <assert.h>
#include <stdbool.h> #include <stdbool.h>
@@ -58,4 +57,73 @@ typedef double f64;
#define ASSERT(x) assert(x) #define ASSERT(x) assert(x)
#endif #endif
#endif // Base.h /* ------------------------------------------------------------
Some helper functions
------------------------------------------------------------ */
#if defined(_WIN32) || defined(_WIN64)
#include <Windows.h>
static u32 plat_get_pagesize(void) {
SYSTEM_INFO sysinfo = {0};
GetSystemInfo(&sysinfo);
return sysinfo.dwPageSize;
}
static void *plat_mem_reserve(u64 size) {
return VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_READWRITE);
}
static b32 plat_mem_commit(void *ptr, u64 size) {
void *ret = VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE);
return ret != NULL;
}
static b32 plat_mem_decommit(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_DECOMMIT);
}
static b32 plat_mem_release(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_RELEASE);
}
#elif defined(__linux__)
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE
#endif
#include <sys/mman.h>
#include <unistd.h>
static u32 plat_get_pagesize(void) { return (u32)sysconf(_SC_PAGESIZE); }
static void *plat_mem_reserve(u64 size) {
void *out = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (out == MAP_FAILED) {
return NULL;
}
return out;
}
static b32 plat_mem_commit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_READ | PROT_WRITE);
return ret == 0;
}
static b32 plat_mem_decommit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_NONE);
if (ret != 0)
return false;
ret = madvise(ptr, size, MADV_DONTNEED);
return ret == 0;
}
static b32 plat_mem_release(void *ptr, u64 size) {
i32 ret = munmap(ptr, size);
return ret == 0;
}
#endif

69
base.h
View File

@@ -90,3 +90,72 @@ typedef double f64;
#endif #endif
#define NDEBUG // Comment to enable asserts #define NDEBUG // Comment to enable asserts
/* ------------------------------------------------------------
Some helper functions
------------------------------------------------------------ */
#if defined(_WIN32) || defined(_WIN64)
static u32 plat_get_pagesize(void) {
SYSTEM_INFO sysinfo = {0};
GetSystemInfo(&sysinfo);
return sysinfo.dwPageSize;
}
static void *plat_mem_reserve(u64 size) {
return VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_READWRITE);
}
static b32 plat_mem_commit(void *ptr, u64 size) {
void *ret = VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE);
return ret != NULL;
}
static b32 plat_mem_decommit(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_DECOMMIT);
}
static b32 plat_mem_release(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_RELEASE);
}
#elif defined(__linux__)
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE
#endif
#include <sys/mman.h>
#include <unistd.h>
static u32 plat_get_pagesize(void) { return (u32)sysconf(_SC_PAGESIZE); }
static void *plat_mem_reserve(u64 size) {
void *out = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (out == MAP_FAILED) {
return NULL;
}
return out;
}
static b32 plat_mem_commit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_READ | PROT_WRITE);
return ret == 0;
}
static b32 plat_mem_decommit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_NONE);
if (ret != 0)
return false;
ret = madvise(ptr, size, MADV_DONTNEED);
return ret == 0;
}
static b32 plat_mem_release(void *ptr, u64 size) {
i32 ret = munmap(ptr, size);
return ret == 0;
}
#endif

View File

@@ -20,8 +20,10 @@ Reorder helper functions
v3.4: Rewriting hash_worker() to export file_hashes.txt v3.4: Rewriting hash_worker() to export file_hashes.txt
v4.0: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end 3.5: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end
Using #pragma once to ensure that a given header file is included only once in a single compilation unit Using #pragma once to ensure that a given header file is included only once in a single compilation unit
Forcing xxhash to use the stack instead of the heap Forcing xxhash to use the stack instead of the heap
Making the hashing buffer reusable instead of malloc every file Making the hashing buffer reusable instead of malloc every file
Implementing a general purpose arena to replace small allocations Implementing a general purpose arena to replace small allocations
Small improvements of the LF MPMC queue
Making the LF MPMC queue generic and in a seperate header file

Binary file not shown.

105
lf_mpmc.h
View File

@@ -1,26 +1,37 @@
#pragma once #pragma once
/*note:
After producers finishes, push N poison pills where N = number of consumer
threads.
for (size_t i = 0; i < num_threads; i++) {
mpmc_push(&g_file_queue, NULL);
}
*/
#include "base.h" #include "base.h"
typedef struct { #define CACHELINE 64
#if defined(_MSC_VER)
#define CACHE_ALIGN __declspec(align(CACHELINE))
#else
#define CACHE_ALIGN __attribute__((aligned(CACHELINE)))
#endif
#if defined(__GNUC__) || defined(__clang__)
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
static void cpu_pause(void) {
#if defined(_MSC_VER) || defined(__x86_64__) || defined(__i386__)
_mm_pause();
#endif
}
typedef struct CACHE_ALIGN {
atomic_size_t seq; atomic_size_t seq;
void *data; void *data;
char pad[64 - sizeof(atomic_size_t) - sizeof(void *)]; char pad[64 - sizeof(atomic_size_t) - sizeof(void *)];
} MPMCSlot; } MPMCSlot;
typedef struct { typedef struct {
atomic_size_t head; CACHE_ALIGN atomic_size_t head;
char pad1[64]; CACHE_ALIGN atomic_size_t tail;
atomic_size_t tail;
char pad2[64];
size_t capacity; size_t capacity;
size_t mask; size_t mask;
@@ -38,12 +49,19 @@ typedef struct {
// files can lead to ODR violations (multiple definition errors if included in // files can lead to ODR violations (multiple definition errors if included in
// more than one file) // more than one file)
/* ----------------------------------------------------------- */
/* INIT */
/* ----------------------------------------------------------- */
static void mpmc_init(MPMCQueue *q, size_t max_capacity) { static void mpmc_init(MPMCQueue *q, size_t max_capacity) {
if ((max_capacity & (max_capacity - 1)) != 0) { if (!max_capacity) {
fprintf(stderr, "capacity must be power of two\n"); fprintf(stderr, "capacity must positive\n");
exit(1); exit(1);
} }
u32 pagesize = plat_get_pagesize();
max_capacity = ALIGN_UP_POW2(max_capacity, pagesize);
q->capacity = max_capacity; q->capacity = max_capacity;
q->mask = max_capacity - 1; q->mask = max_capacity - 1;
@@ -56,11 +74,10 @@ static void mpmc_init(MPMCQueue *q, size_t max_capacity) {
exit(1); exit(1);
} }
q->commit_step = (64ull * 1024 * 1024) / sizeof(MPMCSlot); q->commit_step = pagesize;
atomic_flag_clear(&q->commit_lock); atomic_flag_clear(&q->commit_lock);
q->committed = q->commit_step; q->committed = q->commit_step;
VirtualAlloc(q->slots, q->commit_step * sizeof(MPMCSlot), MEM_COMMIT, VirtualAlloc(q->slots, q->commit_step * sizeof(MPMCSlot), MEM_COMMIT,
PAGE_READWRITE); PAGE_READWRITE);
@@ -73,6 +90,9 @@ static void mpmc_init(MPMCQueue *q, size_t max_capacity) {
atomic_init(&q->tail, 0); atomic_init(&q->tail, 0);
} }
/* ----------------------------------------------------------- */
/* COMMIT MORE MEMORY */
/* ----------------------------------------------------------- */
static void mpmc_commit_more(MPMCQueue *q) { static void mpmc_commit_more(MPMCQueue *q) {
if (atomic_flag_test_and_set(&q->commit_lock)) if (atomic_flag_test_and_set(&q->commit_lock))
@@ -111,6 +131,9 @@ static void mpmc_commit_more(MPMCQueue *q) {
atomic_flag_clear(&q->commit_lock); atomic_flag_clear(&q->commit_lock);
} }
/* ----------------------------------------------------------- */
/* PUSH */
/* ----------------------------------------------------------- */
static void mpmc_push(MPMCQueue *q, void *item) { static void mpmc_push(MPMCQueue *q, void *item) {
MPMCSlot *slot; MPMCSlot *slot;
size_t pos; size_t pos;
@@ -123,7 +146,7 @@ static void mpmc_push(MPMCQueue *q, void *item) {
size_t committed = size_t committed =
atomic_load_explicit(&q->committed, memory_order_relaxed); atomic_load_explicit(&q->committed, memory_order_relaxed);
if (pos >= committed) { if (unlikely(pos >= committed)) {
mpmc_commit_more(q); mpmc_commit_more(q);
continue; continue;
} }
@@ -133,7 +156,7 @@ static void mpmc_push(MPMCQueue *q, void *item) {
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire); size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
intptr_t diff = (intptr_t)seq - (intptr_t)pos; intptr_t diff = (intptr_t)seq - (intptr_t)pos;
if (diff == 0) { if (likely(diff == 0)) {
if (atomic_compare_exchange_weak_explicit(&q->tail, &pos, pos + 1, if (atomic_compare_exchange_weak_explicit(&q->tail, &pos, pos + 1,
memory_order_relaxed, memory_order_relaxed,
@@ -155,6 +178,9 @@ static void mpmc_push(MPMCQueue *q, void *item) {
atomic_store_explicit(&slot->seq, pos + 1, memory_order_release); atomic_store_explicit(&slot->seq, pos + 1, memory_order_release);
} }
/* ----------------------------------------------------------- */
/* POP */
/* ----------------------------------------------------------- */
static void *mpmc_pop(MPMCQueue *q) { static void *mpmc_pop(MPMCQueue *q) {
MPMCSlot *slot; MPMCSlot *slot;
size_t pos; size_t pos;
@@ -169,7 +195,7 @@ static void *mpmc_pop(MPMCQueue *q) {
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire); size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
intptr_t diff = (intptr_t)seq - (intptr_t)(pos + 1); intptr_t diff = (intptr_t)seq - (intptr_t)(pos + 1);
if (diff == 0) { if (likely(diff == 0)) {
if (atomic_compare_exchange_weak_explicit(&q->head, &pos, pos + 1, if (atomic_compare_exchange_weak_explicit(&q->head, &pos, pos + 1,
memory_order_relaxed, memory_order_relaxed,
@@ -183,13 +209,10 @@ static void *mpmc_pop(MPMCQueue *q) {
} else { // slot is still transitioning (written by another thread) } else { // slot is still transitioning (written by another thread)
if (++spins > 10) { if (++spins > 10) {
SwitchToThread(); // yield CPU SwitchToThread(); // yield CPU
spins = 0; spins = 0;
} else { } else {
cpu_pause();
_mm_pause(); // busy waiting
} }
} }
} }
@@ -200,3 +223,37 @@ static void *mpmc_pop(MPMCQueue *q) {
return data; return data;
} }
/* ----------------------------------------------------------- */
/* PUSH POISON */
/* ----------------------------------------------------------- */
/*note:
After producers finishes, push N poison pills where N = number of consumer
threads, this is necessary to stop the consumers.
*/
static void mpmc_producers_finished(MPMCQueue *q, u8 consumer_count) {
for (u8 i = 0; i < consumer_count; i++) {
mpmc_push(q, NULL);
}
}
/* ----------------------------------------------------------- */
/* MPMC Cleanup */
/* ----------------------------------------------------------- */
static void mpmc_finish(MPMCQueue *q) {
if (!q)
return;
if (q->slots) {
VirtualFree(q->slots, 0, MEM_RELEASE);
q->slots = NULL;
}
q->capacity = 0;
q->mask = 0;
atomic_store_explicit(&q->head, 0, memory_order_relaxed);
atomic_store_explicit(&q->tail, 0, memory_order_relaxed);
atomic_store_explicit(&q->committed, 0, memory_order_relaxed);
}

View File

@@ -1,4 +1,3 @@
#include "arena.h"
#include "platform.h" #include "platform.h"
// ----------------------------- Globals ------------------------------------ // ----------------------------- Globals ------------------------------------
@@ -475,7 +474,7 @@ int main(int argc, char **argv) {
arena_free(&gp_arena, (u8 **)&buf, len); arena_free(&gp_arena, (u8 **)&buf, len);
// Add some extra threads to overlap I/O more aggressively // Add some extra threads to overlap I/O more aggressively
size_t num_threads = hw_threads * 2; u8 num_threads = hw_threads * 2;
if (num_threads < 2) if (num_threads < 2)
num_threads = 2; num_threads = 2;
@@ -527,9 +526,7 @@ int main(int argc, char **argv) {
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE); WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
for (size_t i = 0; i < num_threads; i++) { mpmc_producers_finished(&g_file_queue, num_threads);
mpmc_push(&g_file_queue, NULL);
}
atomic_store(&g_scan_done, 1); atomic_store(&g_scan_done, 1);