Using xxhash xxh_x86dispatch to select the best SIMD instruction set at runtime

This dispatcher can not be added in a unity build and we must remove
AVX2 or AVX512 compilation flags, link xxh_x86dispatch.c in the
compilation command. The compilaiton throws two warnings about function
with internal linkage but not defined, they are defined in
xxh_x86dispatch.c so it's harmless warnings
This commit is contained in:
2026-03-13 16:24:31 +01:00
parent c1abada7ba
commit d35858df01
3 changed files with 56 additions and 56 deletions

View File

@@ -41,3 +41,5 @@ Replacing Malloc and strdup in scan helper function with FileEntry and path aren
Making the MPMC queue support when producers are consumers at the same time by adding a variable work, mpmc_push_work() that increments work and mpmc_task_done() that decrements work, and if work = 0 calls mpmc_producers_finished() that pushes poinsons to wake up sleeping threads and make them return NULL
Replacing DirQueue, a queue growable with realloc with the MPMC queue
4.1: Using xxhash xxh_x86dispatch to select the best SIMD instruction set at runtime, this dispatcher can not be added in a unity build and we must remove AVX2 or AVX512 compilation flags, link xxh_x86dispatch.c in the compilation command. The compilaiton throws two warnings about function with internal linkage but not defined, they are defined in xxh_x86dispatch.c so it's harmless warnings

View File

@@ -7,12 +7,9 @@
#include "arena.c"
#define XXH_VECTOR \
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
// xxhash include
#define XXH_INLINE_ALL
#include "xxhash.c"
#include "xxhash.h"
#include "xxh_x86dispatch.h"
// ----------------------------- Config -------------------------------------
#define FILE_HASHES_TXT "file_hashes.txt"
@@ -21,6 +18,15 @@
#define READ_BLOCK (64 * 1024) // 64KB blocks
// ----------------------------- Data types ---------------------------------
// Timer
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER end;
} HiResTimer;
static LARGE_INTEGER g_qpc_freq;
// File entry
typedef struct FileEntry {
char *path;
@@ -30,31 +36,7 @@ typedef struct FileEntry {
char owner[128]; // resolved owner name
} FileEntry;
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified);
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size);
/* scan folder timer*/
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER end;
} HiResTimer;
static LARGE_INTEGER g_qpc_freq;
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
static double timer_stop(HiResTimer *t) {
QueryPerformanceCounter(&t->end);
return (double)(t->end.QuadPart - t->start.QuadPart) /
(double)g_qpc_freq.QuadPart;
}
// Workers context
// Threads context
typedef struct {
u8 num_threads;

View File

@@ -7,6 +7,35 @@ static atomic_uint_fast64_t g_bytes_processed = 0;
static atomic_int g_scan_done = 0;
// ============================= Utils ======================================
// ----------------------------- Timer functions --------------
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
static double timer_stop(HiResTimer *t) {
QueryPerformanceCounter(&t->end);
return (double)(t->end.QuadPart - t->start.QuadPart) /
(double)g_qpc_freq.QuadPart;
}
// ----------------------------- Get instruction set --------------
const char *get_xxhash_instruction_set(void) {
int vecID = XXH_featureTest();
switch (vecID) {
case XXH_SCALAR:
return "Scalar (portable C)";
case XXH_SSE2:
return "SSE2";
case XXH_AVX2:
return "AVX2";
case XXH_AVX512:
return "AVX-512";
default:
return "Unknown";
}
}
// ----------------------------- Normalize path --------------
static void normalize_path(char *p) {
char *src = p;
@@ -427,6 +456,10 @@ int main(int argc, char **argv) {
// Logical threads = CPU cores * 2
size_t num_threads = hw_threads * 2;
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
// -------------------------------
// Scanning and hashing
// -------------------------------
@@ -492,9 +525,7 @@ int main(int argc, char **argv) {
for (size_t i = 0; i < num_scan_threads; ++i)
CloseHandle(scan_threads[i]);
for (size_t i = 0; i < num_hash_threads; i++) {
mpmc_push(&file_queue, NULL);
}
mpmc_producers_finished(&file_queue, num_hash_threads);
atomic_store(&g_scan_done, 1);
@@ -531,33 +562,18 @@ int main(int argc, char **argv) {
// Export file_hashes.txt
// -------------------------------
// FILE *f = fopen(FILE_HASHES_TXT, "wb");
//
// for (int i = 0; i < num_threads; i++) {
// mem_arena *arena = workers[i].arena;
//
// u8 *arena_base =
// (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
// fwrite(arena_base, 1, arena->pos, f);
// }
//
// fclose(f);
FILE *f = fopen(FILE_HASHES_TXT, "wb");
HANDLE h = CreateFileA(FILE_HASHES_TXT, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, NULL);
for (int i = 0; i < num_threads; i++) {
mem_arena *arena = workers[i].arena;
for (int i = 0; i < num_hash_threads; i++) {
mem_arena *local_hash_arena = workers[i].arena;
DWORD written;
u8 *arena_base = (u8 *)local_hash_arena +
ALIGN_UP_POW2(sizeof(mem_arena), local_hash_arena->align);
WriteFile(h, arena_base, (DWORD)local_hash_arena->pos, &written, NULL);
u8 *arena_base =
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
fwrite(arena_base, 1, arena->pos, f);
}
fclose(f);
// -------------------------------
// Print summary
// -------------------------------