Files
filehasher/platform.h
amir dd0797df79 hashers now use thread local arena
Instead of writing directly to file_hashes.txt, hash_workers now are
using a local arena, writing everything once at the end

using #pragma once to ensure that a given header file is included only
once in a single compilation unit
2026-03-08 10:46:05 +01:00

152 lines
3.2 KiB
C

#pragma once // ensure that a given header file is included only once in a
// single compilation unit
#include "arena.h"
#include "base.h"
#include "arena.c"
// ----------------------------- Config -------------------------------------
#define FILE_HASHES_TXT "file_hashes.txt"
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
#define MAX_PATHLEN 4096
#define READ_BLOCK (64 * 1024) // 64KB blocks
// ----------------------------- Data types ---------------------------------
typedef struct FileEntry {
char *path;
uint64_t size_bytes;
uint64_t created_time; // epoch
uint64_t modified_time; // epoch seconds
char owner[128]; // resolved owner name
} FileEntry;
/* File path and metadata */
static void normalize_path(char *p) {
char *src = p;
char *dst = p;
int prev_slash = 0;
while (*src) {
char c = *src++;
if (c == '\\' || c == '/') {
if (!prev_slash) {
*dst++ = '/';
prev_slash = 1;
}
} else {
*dst++ = c;
prev_slash = 0;
}
}
*dst = '\0';
}
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified);
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size);
/* scan folder timer*/
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER end;
} HiResTimer;
static LARGE_INTEGER g_qpc_freq;
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
static double timer_stop(HiResTimer *t) {
QueryPerformanceCounter(&t->end);
return (double)(t->end.QuadPart - t->start.QuadPart) /
(double)g_qpc_freq.QuadPart;
}
// ============================================================
// Simple Mutex-Based MPMC Queue (FileEntry*)
// ============================================================
typedef struct {
atomic_size_t seq;
FileEntry *data;
char pad[64 - sizeof(atomic_size_t) - sizeof(FileEntry *)];
} MPMCSlot;
typedef struct {
atomic_size_t head;
char pad1[64];
atomic_size_t tail;
char pad2[64];
size_t capacity;
size_t mask;
atomic_size_t committed;
size_t commit_step;
atomic_flag commit_lock;
MPMCSlot *slots;
} MPMCQueue;
static MPMCQueue g_file_queue;
typedef struct {
MPMCQueue *queue;
mem_arena *arena;
} WorkerContext;
/* Scan folders */
typedef struct DirQueue DirQueue;
void scan_folder_windows_parallel(const char *base, DirQueue *q);
void scan_folder_posix_parallel(const char *base, DirQueue *q);
typedef struct DirJob {
char *path;
struct DirJob *next;
} DirJob;
typedef struct DirQueue {
char **items;
size_t count;
size_t cap;
size_t active;
int stop;
#if PLATFORM_WINDOWS
CRITICAL_SECTION cs;
CONDITION_VARIABLE cv;
#else
pthread_mutex_t mutex;
pthread_cond_t cond;
#endif
} DirQueue;
/* Hashing */
typedef struct Job {
FileEntry *file;
struct Job *next;
} Job;
typedef struct {
Job *head;
Job *tail;
CRITICAL_SECTION cs;
CONDITION_VARIABLE cv;
atomic_size_t count; // queued jobs
int stop;
} JobQueue;
typedef struct {
JobQueue *queue;
atomic_size_t *done_counter;
size_t total_jobs;
atomic_int *live_workers;
} WorkerArg;