Files
filehasher/platform_windows.c
amir 0cf0d6c26a Bug fixes in the lock free mpmc queue
Fix bug slots used before initialization,compare and swap is protecting
updating committed, but it is not protecting the memory initialization.
Adding atomic_flag commit_lock to protect against that

Fix bug multiple threads committing at the same time, fixed by using
atomic_flag commit_lock and re-checking committed after acquiring the
lock
2026-03-07 10:29:48 +01:00

691 lines
17 KiB
C

#include "platform.h"
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_files_found = 0;
static atomic_uint_fast64_t g_files_hashed = 0;
static atomic_uint_fast64_t g_bytes_processed = 0;
static atomic_int g_scan_done = 0;
// __________________________________________________________________________
// ----------------------------- Utils --------------------------------------
static void perror_exit(const char *msg) {
perror(msg);
exit(1);
}
static void *xmalloc(size_t n) {
void *p = malloc(n);
if (!p)
perror_exit("malloc");
return p;
}
// ----------------------------- Convert filetime to epoch --------------
static uint64_t filetime_to_epoch(const FILETIME *ft) {
ULARGE_INTEGER ull;
ull.LowPart = ft->dwLowDateTime;
ull.HighPart = ft->dwHighDateTime;
// Windows epoch (1601) → Unix epoch (1970)
return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL;
}
// ----------------------------- Resolve file owner ---------------------
static void get_file_owner(const char *path, char *out, size_t out_sz) {
PSID sid = NULL;
PSECURITY_DESCRIPTOR sd = NULL;
if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION,
&sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) {
char name[64], domain[64];
DWORD name_len = sizeof(name);
DWORD domain_len = sizeof(domain);
SID_NAME_USE use;
if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len,
&use)) {
snprintf(out, out_sz, "%s\\%s", domain, name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
} else {
snprintf(out, out_sz, "UNKNOWN");
}
if (sd)
LocalFree(sd);
}
// ----------------------------- Format time helper -------------------------
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
#if PLATFORM_WINDOWS
localtime_s(&tm, &tt);
#else
localtime_r(&tt, &tm);
#endif
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// --------------- parallel directory scanning ----------------
void mpmc_init(MPMCQueue *q, size_t max_capacity) {
if ((max_capacity & (max_capacity - 1)) != 0) {
fprintf(stderr, "capacity must be power of two\n");
exit(1);
}
q->capacity = max_capacity;
q->mask = max_capacity - 1;
size_t bytes = sizeof(MPMCSlot) * max_capacity;
q->slots = VirtualAlloc(NULL, bytes, MEM_RESERVE, PAGE_READWRITE);
if (!q->slots) {
fprintf(stderr, "VirtualAlloc reserve failed\n");
exit(1);
}
q->commit_step = (64ull * 1024 * 1024) / sizeof(MPMCSlot);
atomic_flag_clear(&q->commit_lock);
q->committed = q->commit_step;
VirtualAlloc(q->slots, q->commit_step * sizeof(MPMCSlot), MEM_COMMIT,
PAGE_READWRITE);
for (size_t i = 0; i < q->committed; i++) {
atomic_init(&q->slots[i].seq, i);
q->slots[i].data = NULL;
}
atomic_init(&q->head, 0);
atomic_init(&q->tail, 0);
}
static void mpmc_commit_more(MPMCQueue *q) {
if (atomic_flag_test_and_set(&q->commit_lock))
return;
size_t start = atomic_load_explicit(&q->committed, memory_order_acquire);
size_t tail = atomic_load_explicit(&q->tail, memory_order_relaxed);
// another thread already committed enough
if (tail < start) {
atomic_flag_clear(&q->commit_lock);
return;
}
if (start >= q->capacity) {
atomic_flag_clear(&q->commit_lock);
return;
}
size_t new_commit = start + q->commit_step;
if (new_commit > q->capacity)
new_commit = q->capacity;
size_t count = new_commit - start;
VirtualAlloc(&q->slots[start], count * sizeof(MPMCSlot), MEM_COMMIT,
PAGE_READWRITE);
for (size_t i = start; i < new_commit; i++) {
atomic_init(&q->slots[i].seq, i);
q->slots[i].data = NULL;
}
atomic_store_explicit(&q->committed, new_commit, memory_order_release);
atomic_flag_clear(&q->commit_lock);
}
void mpmc_push(MPMCQueue *q, FileEntry *item) {
MPMCSlot *slot;
size_t pos;
for (;;) {
pos = atomic_load_explicit(&q->tail, memory_order_relaxed);
// ensure the slot is committed BEFORE accessing it
size_t committed =
atomic_load_explicit(&q->committed, memory_order_relaxed);
if (pos >= committed) {
mpmc_commit_more(q);
continue;
}
slot = &q->slots[pos & q->mask];
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
intptr_t diff = (intptr_t)seq - (intptr_t)pos;
if (diff == 0) {
if (atomic_compare_exchange_weak_explicit(&q->tail, &pos, pos + 1,
memory_order_relaxed,
memory_order_relaxed))
break;
} else if (diff < 0) {
Sleep(100); // queue actually full
} else {
Sleep(100);
}
}
slot->data = item;
atomic_store_explicit(&slot->seq, pos + 1, memory_order_release);
}
FileEntry *mpmc_pop(MPMCQueue *q) {
MPMCSlot *slot;
size_t pos;
int spins = 0;
for (;;) {
pos = atomic_load_explicit(&q->head, memory_order_relaxed);
slot = &q->slots[pos & q->mask];
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
intptr_t diff = (intptr_t)seq - (intptr_t)(pos + 1);
if (diff == 0) {
if (atomic_compare_exchange_weak_explicit(&q->head, &pos, pos + 1,
memory_order_relaxed,
memory_order_relaxed))
break;
} else if (diff < 0) {
Sleep(500);
} else {
if (++spins > 10) {
SwitchToThread(); // yield CPU
spins = 0;
} else {
_mm_pause();
}
}
}
FileEntry *data = slot->data;
atomic_store_explicit(&slot->seq, pos + q->capacity, memory_order_release);
return data;
}
// Add queue helper functions
static void dirqueue_push(DirQueue *q, const char *path) {
EnterCriticalSection(&q->cs);
if (q->count + 1 > q->cap) {
q->cap = q->cap ? q->cap * 2 : 1024;
q->items = realloc(q->items, q->cap * sizeof(char *));
}
q->items[q->count++] = _strdup(path);
WakeConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
static char *dirqueue_pop(DirQueue *q) {
EnterCriticalSection(&q->cs);
while (q->count == 0 && q->active > 0) {
SleepConditionVariableCS(&q->cv, &q->cs, INFINITE);
}
if (q->count == 0 && q->active == 0) {
LeaveCriticalSection(&q->cs);
return NULL; // truly done
}
char *dir = q->items[--q->count];
q->active++;
LeaveCriticalSection(&q->cs);
return dir;
}
static void dirqueue_done(DirQueue *q) {
EnterCriticalSection(&q->cs);
q->active--;
WakeAllConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
static DWORD WINAPI scan_worker(LPVOID arg) {
DirQueue *q = (DirQueue *)arg;
for (;;) {
char *dir = dirqueue_pop(q);
if (!dir)
break;
scan_folder_windows_parallel(dir, q);
free(dir);
dirqueue_done(q);
}
return 0;
}
// Scanning directory function
void scan_folder_windows_parallel(const char *base, DirQueue *q) {
char search[MAX_PATHLEN];
snprintf(search, sizeof(search), "%s\\*", base);
WIN32_FIND_DATAA fd;
HANDLE h = FindFirstFileA(search, &fd);
if (h == INVALID_HANDLE_VALUE)
return;
do {
if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, ".."))
continue;
char full[MAX_PATHLEN];
snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName);
if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
continue;
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
dirqueue_push(q, full);
} else {
atomic_fetch_add(&g_files_found, 1);
FileEntry *fe = malloc(sizeof(FileEntry));
memset(fe, 0, sizeof(FileEntry));
char norm[MAX_PATHLEN];
strncpy(norm, full, sizeof(norm) - 1);
norm[sizeof(norm) - 1] = 0;
normalize_path(norm);
fe->path = _strdup(norm);
platform_get_file_times(full, &fe->created_time, &fe->modified_time);
platform_get_file_owner(full, fe->owner, sizeof(fe->owner));
LARGE_INTEGER size;
HANDLE hf =
CreateFileA(full, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hf != INVALID_HANDLE_VALUE) {
if (GetFileSizeEx(hf, &size))
fe->size_bytes = (uint64_t)size.QuadPart;
CloseHandle(hf);
}
mpmc_push(&g_file_queue, fe);
}
} while (FindNextFileA(h, &fd));
FindClose(h);
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper.
// On Windows try to use overlapped synchronous chunked reads for higher
// throughput.
HANDLE hFile =
CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
strcpy(out_hex, "ERROR");
return;
}
XXH128_hash_t h;
XXH3_state_t *state = XXH3_createState();
XXH3_128bits_reset(state);
BYTE *buf = (BYTE *)malloc(READ_BLOCK);
DWORD read = 0;
BOOL ok;
while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) {
XXH3_128bits_update(state, buf, (size_t)read);
atomic_fetch_add(&g_bytes_processed, (uint64_t)read);
}
h = XXH3_128bits_digest(state);
XXH3_freeState(state);
CloseHandle(hFile);
free(buf);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ------------------------- Hash worker --------------------------------
static DWORD WINAPI hash_worker(LPVOID arg) {
MPMCQueue *q = (MPMCQueue *)arg;
for (;;) {
FileEntry *fe = mpmc_pop(q);
if (!fe)
break; // poison pill
char hash[HASH_STRLEN];
xxh3_hash_file_stream(fe->path, hash);
atomic_fetch_add(&g_files_hashed, 1);
free(fe->path);
free(fe);
}
return 0;
}
// ----------------------------- Progress display ---------------------------
DWORD WINAPI progress_thread(void *arg) {
LARGE_INTEGER freq, start;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&start);
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
for (;;) {
uint64_t found = atomic_load(&g_files_found);
uint64_t hashed = atomic_load(&g_files_hashed);
uint64_t bytes = atomic_load(&g_bytes_processed);
int scan_done = atomic_load(&g_scan_done);
LARGE_INTEGER now;
QueryPerformanceCounter(&now);
double t = (double)(now.QuadPart - start.QuadPart) / (double)freq.QuadPart;
if (last_time == 0.0) {
last_time = t;
last_bytes = bytes;
}
double dt = t - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = t;
}
if (!scan_done) {
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ",
(unsigned long long)found, (unsigned long long)hashed,
displayed_speed);
} else {
double pct = found ? (double)hashed / (double)found : 0.0;
int barw = 40;
int filled = (int)(pct * barw);
char bar[64];
int p = 0;
bar[p++] = '[';
for (int i = 0; i < filled; i++)
bar[p++] = '#';
for (int i = filled; i < barw; i++)
bar[p++] = '.';
bar[p++] = ']';
bar[p] = 0;
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0,
(unsigned long long)hashed, (unsigned long long)found,
displayed_speed);
}
fflush(stdout);
if (scan_done && hashed == found)
break;
Sleep(100);
}
printf("\n");
return 0;
}
// ----------------------------- Get file metadata -------------------------
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
WIN32_FILE_ATTRIBUTE_DATA fad;
if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) {
*out_created = filetime_to_epoch(&fad.ftCreationTime);
*out_modified = filetime_to_epoch(&fad.ftLastWriteTime);
} else {
*out_created = 0;
*out_modified = 0;
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
get_file_owner(path, out_owner, out_owner_size);
}
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
// -------------------------------
// Scanning and total timer init
// -------------------------------
timer_init();
HiResTimer total_timer;
HiResTimer scan_timer;
timer_start(&total_timer);
timer_start(&scan_timer);
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0) {
printf("Enter folder to process (Enter = current folder): ");
fflush(stdout);
char buf[MAX_PATHLEN];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0)
strcpy(folders[0], ".");
else
strncpy(folders[0], buf, MAX_PATHLEN - 1);
folder_count = 1;
}
// -------------------------------
// Display selected folders
// -------------------------------
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Detect hardware threads (CPU cores)
// -------------------------------
size_t hw_threads = 1;
// --- Windows: detect PHYSICAL cores (not logical threads) ---
DWORD len = 0;
GetLogicalProcessorInformation(NULL, &len);
SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)malloc(len);
if (GetLogicalProcessorInformation(buf, &len)) {
DWORD count = 0;
DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
for (DWORD i = 0; i < n; i++) {
if (buf[i].Relationship == RelationProcessorCore)
count++;
}
if (count > 0)
hw_threads = count;
}
free(buf);
// Add some extra threads to overlap I/O more aggressively
size_t num_threads = hw_threads * 2;
if (num_threads < 2)
num_threads = 2;
// -------------------------------
// Step 1: Scan all folders
// -------------------------------
mpmc_init(&g_file_queue, 1024 * 1024 * 1024);
DirQueue q;
memset(&q, 0, sizeof(q));
InitializeCriticalSection(&q.cs);
InitializeConditionVariable(&q.cv);
q.active = 0;
// starting hash threads
HANDLE *hash_threads = malloc(sizeof(HANDLE) * num_threads);
for (size_t i = 0; i < num_threads; ++i) {
hash_threads[i] =
CreateThread(NULL, 0, hash_worker, &g_file_queue, 0, NULL);
}
// starting scan threads
HANDLE progress = CreateThread(NULL, 0, progress_thread, NULL, 0, NULL);
for (int i = 0; i < folder_count; ++i) {
dirqueue_push(&q, folders[i]);
}
size_t scan_threads = hw_threads;
if (scan_threads < 2)
scan_threads = 2;
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
for (size_t i = 0; i < scan_threads; ++i) {
scan_tids[i] =
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL);
}
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
// mpmc_finish(&g_file_queue);
// debug
for (size_t i = 0; i < num_threads; i++) {
mpmc_push(&g_file_queue, NULL);
}
atomic_store(&g_scan_done, 1);
for (size_t i = 0; i < scan_threads; ++i)
CloseHandle(scan_tids[i]);
free(scan_tids);
double scan_seconds = timer_stop(&scan_timer);
size_t total_found = atomic_load(&g_files_found);
printf("\r%*s\r", 120, ""); // clear_console_line
printf("Completed scanning in %.2f seconds, found %zu files\n\n",
scan_seconds, total_found);
// if no files found
if (total_found == 0) {
printf("No files found.\n");
return 0;
}
// stop hashing threads
WaitForMultipleObjects((DWORD)num_threads, hash_threads, TRUE, INFINITE);
for (size_t i = 0; i < num_threads; ++i)
CloseHandle(hash_threads[i]);
free(hash_threads);
// free(g_file_queue.items);
WaitForSingleObject(progress, INFINITE);
CloseHandle(progress);
// done time
double total_seconds = timer_stop(&total_timer);
printf("Completed hashing %zu files\n", total_found);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / total_seconds;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n", total_seconds);
return 0;
}