Files
filehasher/platform.h
amir 0cf0d6c26a Bug fixes in the lock free mpmc queue
Fix bug slots used before initialization,compare and swap is protecting
updating committed, but it is not protecting the memory initialization.
Adding atomic_flag commit_lock to protect against that

Fix bug multiple threads committing at the same time, fixed by using
atomic_flag commit_lock and re-checking committed after acquiring the
lock
2026-03-07 10:29:48 +01:00

176 lines
3.8 KiB
C

#pragma once
#include <immintrin.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#if defined(_WIN32) || defined(_WIN64)
#define PLATFORM_WINDOWS 1
#include <aclapi.h>
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <windows.h>
#define strdup _strdup
#else
#include <dirent.h>
#include <fcntl.h>
#include <pthread.h>
#include <pwd.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#define XXH_VECTOR \
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
#define XXH_INLINE_ALL
#include "xxhash.c"
#include "xxhash.h"
// ----------------------------- Config -------------------------------------
#define FILE_HASHES_TXT "file_hashes.txt"
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
#define MAX_PATHLEN 4096
#define READ_BLOCK (64 * 1024) // 64KB blocks
// ----------------------------- Data types ---------------------------------
typedef struct FileEntry {
char *path;
uint64_t size_bytes;
uint64_t created_time; // epoch
uint64_t modified_time; // epoch seconds
char owner[128]; // resolved owner name
} FileEntry;
/* File path and metadata */
static void normalize_path(char *p) {
char *src = p;
char *dst = p;
int prev_slash = 0;
while (*src) {
char c = *src++;
if (c == '\\' || c == '/') {
if (!prev_slash) {
*dst++ = '/';
prev_slash = 1;
}
} else {
*dst++ = c;
prev_slash = 0;
}
}
*dst = '\0';
}
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified);
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size);
/* scan folder timer*/
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER end;
} HiResTimer;
static LARGE_INTEGER g_qpc_freq;
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
static double timer_stop(HiResTimer *t) {
QueryPerformanceCounter(&t->end);
return (double)(t->end.QuadPart - t->start.QuadPart) /
(double)g_qpc_freq.QuadPart;
}
// ============================================================
// Simple Mutex-Based MPMC Queue (FileEntry*)
// ============================================================
typedef struct {
atomic_size_t seq;
FileEntry *data;
char pad[64 - sizeof(atomic_size_t) - sizeof(FileEntry *)];
} MPMCSlot;
typedef struct {
atomic_size_t head;
char pad1[64];
atomic_size_t tail;
char pad2[64];
size_t capacity;
size_t mask;
atomic_size_t committed;
size_t commit_step;
atomic_flag commit_lock;
MPMCSlot *slots;
} MPMCQueue;
static MPMCQueue g_file_queue;
/* Scan folders */
typedef struct DirQueue DirQueue;
void scan_folder_windows_parallel(const char *base, DirQueue *q);
void scan_folder_posix_parallel(const char *base, DirQueue *q);
typedef struct DirJob {
char *path;
struct DirJob *next;
} DirJob;
typedef struct DirQueue {
char **items;
size_t count;
size_t cap;
size_t active;
int stop;
#if PLATFORM_WINDOWS
CRITICAL_SECTION cs;
CONDITION_VARIABLE cv;
#else
pthread_mutex_t mutex;
pthread_cond_t cond;
#endif
} DirQueue;
/* Hashing */
typedef struct Job {
FileEntry *file;
struct Job *next;
} Job;
typedef struct {
Job *head;
Job *tail;
CRITICAL_SECTION cs;
CONDITION_VARIABLE cv;
atomic_size_t count; // queued jobs
int stop;
} JobQueue;
typedef struct {
JobQueue *queue;
atomic_size_t *done_counter;
size_t total_jobs;
atomic_int *live_workers;
} WorkerArg;