hashers now use thread local arena

Instead of writing directly to file_hashes.txt, hash_workers now are
using a local arena, writing everything once at the end

using #pragma once to ensure that a given header file is included only
once in a single compilation unit
This commit is contained in:
2026-03-08 10:46:05 +01:00
parent ee02b83094
commit dd0797df79
7 changed files with 177 additions and 73 deletions

15
arena.c
View File

@@ -1,6 +1,5 @@
#include "base.h"
#include "arena.h"
#include "base.h"
/* ============================================================
Helper functions
@@ -132,9 +131,6 @@ mem_arena *arena_create(arena_params *params) { // mk create
u32 pagesize = arena_pagesize();
u64 align = params->align ? params->align : ARENA_ALIGN;
ASSERT((align & (align - 1)) == 0);
u64 reserve_size = ALIGN_UP_POW2(params->reserve_size, pagesize);
u64 commit_size =
params->commit_size ? ALIGN_UP_POW2(params->commit_size, pagesize) : 0;
@@ -181,7 +177,7 @@ mem_arena *arena_create(arena_params *params) { // mk create
arena->commit_size = commit_size;
arena->commit_pos = commit_size;
arena->align = align;
arena->align = params->align;
arena->push_size = 0;
arena->allow_free_list = params->allow_free_list;
@@ -360,7 +356,8 @@ void *arena_push(mem_arena **arena_ptr, u64 size, bool zero) { // mk push
u64 local_pre = ALIGN_UP_POW2(local_pos, selected->align);
u64 local_post = local_pre + size;
if (local_post > selected->reserve_size) {
if (local_post > selected->reserve_size -
ALIGN_UP_POW2(sizeof(mem_arena), selected->align)) {
if (arena->allow_free_list && arena->push_size == 0) {
u64 tail_start = selected->pos;
@@ -605,8 +602,8 @@ void *arena_swapback_pop(mem_arena **arena_ptr, u64 index) { // mk swapback
mem_arena *owner = arena_block_from_index(arena, index);
if (!owner) {
fprintf(stderr, "ERROR: Swapback pop failed, index out of range");
return NULL;
fprintf(stderr, "ERROR: Swapback pop failed, index out of range");
return NULL;
}
u8 *owner_base = (u8 *)owner + ALIGN_UP_POW2(sizeof(mem_arena), owner->align);
u8 *arena_base = (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);

View File

@@ -1,6 +1,4 @@
#ifndef BASE_ARENA_H
#define BASE_ARENA_H
#pragma once
#include "base.h"
// #define _CRT_SECURE_NO_WARNINGS
@@ -425,5 +423,3 @@ void *plat_mem_reserve(u64 size);
b32 plat_mem_commit(void *ptr, u64 size);
b32 plat_mem_decommit(void *ptr, u64 size);
b32 plat_mem_release(void *ptr, u64 size);
#endif // BASE_ARENA_H

61
arena_base.h Normal file
View File

@@ -0,0 +1,61 @@
#ifndef BASE_H
#define BASE_H
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/* ------------------------------------------------------------
Base types
------------------------------------------------------------ */
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
typedef i8 b8;
typedef int b32;
typedef float f32;
typedef double f64;
/* ------------------------------------------------------------
Size helpers
------------------------------------------------------------ */
#define KiB(x) ((u64)(x) * 1024ULL)
#define MiB(x) (KiB(x) * 1024ULL)
/* ------------------------------------------------------------
Min / Max helpers
------------------------------------------------------------ */
#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
/* ------------------------------------------------------------
Alignment helpers
------------------------------------------------------------ */
#define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
/* ------------------------------------------------------------
Assert
------------------------------------------------------------ */
#ifndef ASSERT
#define ASSERT(x) assert(x)
#endif
#endif // Base.h

39
base.h
View File

@@ -1,11 +1,40 @@
#ifndef BASE_H
#define BASE_H
#pragma once
#include <assert.h>
#include <immintrin.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#if defined(_WIN32) || defined(_WIN64)
#define PLATFORM_WINDOWS 1
#include <aclapi.h>
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <windows.h>
#define strdup _strdup
#else
#include <dirent.h>
#include <fcntl.h>
#include <pthread.h>
#include <pwd.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#define XXH_VECTOR \
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
#define XXH_INLINE_ALL
#include "xxhash.c"
#include "xxhash.h"
/* ------------------------------------------------------------
Base types
@@ -25,12 +54,14 @@ typedef int b32;
typedef float f32;
typedef double f64;
/* ------------------------------------------------------------
Size helpers
------------------------------------------------------------ */
#define KiB(x) ((u64)(x) * 1024ULL)
#define MiB(x) (KiB(x) * 1024ULL)
#define GiB(x) (MiB(x) * 1024ULL)
/* ------------------------------------------------------------
Min / Max helpers
@@ -48,7 +79,7 @@ typedef double f64;
Alignment helpers
------------------------------------------------------------ */
#define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
#define ALIGN_UP_POW2(x, a) ((a) ? (((x) + ((a) - 1)) & ~((a) - 1)) : (x))
/* ------------------------------------------------------------
Assert
@@ -58,4 +89,4 @@ typedef double f64;
#define ASSERT(x) assert(x)
#endif
#endif // Base.h
#define NDEBUG // Comment to enable asserts

View File

@@ -20,3 +20,7 @@ Fix bug multiple threads committing at the same time, fixed by using atomic_flag
Reorder helper functions
v3.4: Rewriting hash_worker() to export file_hashes.txt
v4.0: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end
using #pragma once to ensure that a given header file is included only once in a single compilation unit
forcing xxhash to use the stack instead of the heap

View File

@@ -1,39 +1,10 @@
#pragma once
#pragma once // ensure that a given header file is included only once in a
// single compilation unit
#include <immintrin.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#if defined(_WIN32) || defined(_WIN64)
#define PLATFORM_WINDOWS 1
#include <aclapi.h>
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <windows.h>
#define strdup _strdup
#else
#include <dirent.h>
#include <fcntl.h>
#include <pthread.h>
#include <pwd.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#define XXH_VECTOR \
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
#define XXH_INLINE_ALL
#include "xxhash.c"
#include "xxhash.h"
#include "arena.h"
#include "base.h"
#include "arena.c"
// ----------------------------- Config -------------------------------------
#define FILE_HASHES_TXT "file_hashes.txt"
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
@@ -124,6 +95,11 @@ typedef struct {
static MPMCQueue g_file_queue;
typedef struct {
MPMCQueue *queue;
mem_arena *arena;
} WorkerContext;
/* Scan folders */
typedef struct DirQueue DirQueue;

View File

@@ -1,5 +1,4 @@
#include "platform.h"
#include <stdio.h>
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_files_found = 0;
@@ -409,14 +408,10 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex) {
// ------------------------- Hash worker --------------------------------
static DWORD WINAPI hash_worker(LPVOID arg) {
MPMCQueue *q = (MPMCQueue *)arg;
static CRITICAL_SECTION append_cs;
static LONG init = 0;
if (InterlockedCompareExchange(&init, 1, 0) == 0) {
InitializeCriticalSection(&append_cs);
}
WorkerContext *ctx = (WorkerContext *)arg;
MPMCQueue *q = ctx->queue;
mem_arena *local_arena = ctx->arena;
for (;;) {
FileEntry *fe = mpmc_pop(q);
@@ -432,16 +427,14 @@ static DWORD WINAPI hash_worker(LPVOID arg) {
double size_kib = (double)fe->size_bytes / 1024.0;
EnterCriticalSection(&append_cs);
char stack_buf[1024];
FILE *hf = fopen(FILE_HASHES_TXT, "a");
if (hf) {
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hash, fe->path, size_kib,
created, modified, fe->owner);
fclose(hf);
}
int len =
snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n",
hash, fe->path, size_kib, created, modified, fe->owner);
LeaveCriticalSection(&append_cs);
char *dst = arena_push(&local_arena, len, false);
memcpy(dst, stack_buf, len);
atomic_fetch_add(&g_files_hashed, 1);
@@ -627,7 +620,7 @@ int main(int argc, char **argv) {
// Step 1: Scan all folders
// -------------------------------
mpmc_init(&g_file_queue, 1024 * 1024 * 1024);
mpmc_init(&g_file_queue, GiB(1));
DirQueue q;
memset(&q, 0, sizeof(q));
@@ -636,11 +629,30 @@ int main(int argc, char **argv) {
q.active = 0;
// starting hash threads
arena_params params = {
.reserve_size = GiB(1),
.commit_size = MiB(16),
.align = 0,
.push_size = 0,
.allow_free_list = true,
.allow_swapback = false,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 0,
};
WorkerContext workers[num_threads];
for (int i = 0; i < num_threads; i++) {
workers[i].queue = &g_file_queue;
workers[i].arena = arena_create(&params);
}
HANDLE *hash_threads = malloc(sizeof(HANDLE) * num_threads);
for (size_t i = 0; i < num_threads; ++i) {
hash_threads[i] =
CreateThread(NULL, 0, hash_worker, &g_file_queue, 0, NULL);
hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL);
}
// starting scan threads
@@ -662,7 +674,6 @@ int main(int argc, char **argv) {
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
// mpmc_finish(&g_file_queue);
// debug
for (size_t i = 0; i < num_threads; i++) {
mpmc_push(&g_file_queue, NULL);
@@ -694,11 +705,39 @@ int main(int argc, char **argv) {
CloseHandle(hash_threads[i]);
free(hash_threads);
// free(g_file_queue.items);
WaitForSingleObject(progress, INFINITE);
CloseHandle(progress);
// write file_hashes.txt
// FILE *f = fopen(FILE_HASHES_TXT, "wb");
//
// for (int i = 0; i < num_threads; i++) {
// mem_arena *arena = workers[i].arena;
//
// u8 *arena_base =
// (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
// fwrite(arena_base, 1, arena->pos, f);
// }
//
// fclose(f);
HANDLE h = CreateFileA(FILE_HASHES_TXT, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, NULL);
for (int i = 0; i < num_threads; i++) {
mem_arena *arena = workers[i].arena;
DWORD written;
u8 *arena_base =
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
WriteFile(h, arena_base, (DWORD)arena->pos, &written, NULL);
}
// done time
double total_seconds = timer_stop(&total_timer);