diff --git a/arena.h b/arena.h index 85e1bbe..282ab70 100644 --- a/arena.h +++ b/arena.h @@ -293,7 +293,8 @@ typedef enum arena_commit_policy { typedef struct arena_params { u64 reserve_size; // size of one arena block u64 commit_size; // initial commit size - u64 align; // allocation alignment (0 = default) + u64 align; // allocation alignment, 0 to disable and ARENA_ALIGN to align + // according to architecture // Element size rules: // - stack mode : push_size > 0 (mandatory) diff --git a/binaries/changelog.txt b/binaries/changelog.txt index 9c3bd29..ddceca4 100644 --- a/binaries/changelog.txt +++ b/binaries/changelog.txt @@ -7,7 +7,6 @@ v2.0: Multi threaded scan v2.1: Uses AVX2 instead of SSE2 v3.0: Simple mutex/critical section based MPMC queue -reusable hashing buffer v3.1: Lock free MPMC queue Vyukov-style @@ -22,5 +21,7 @@ Reorder helper functions v3.4: Rewriting hash_worker() to export file_hashes.txt v4.0: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end -using #pragma once to ensure that a given header file is included only once in a single compilation unit -forcing xxhash to use the stack instead of the heap +Using #pragma once to ensure that a given header file is included only once in a single compilation unit +Forcing xxhash to use the stack instead of the heap +Making the hashing buffer reusable instead of malloc every file +Implementing a general purpose arena to replace small allocations diff --git a/platform.h b/platform.h index 8fb5d30..db5b7fa 100644 --- a/platform.h +++ b/platform.h @@ -21,29 +21,6 @@ typedef struct FileEntry { char owner[128]; // resolved owner name } FileEntry; -/* File path and metadata */ -static void normalize_path(char *p) { - char *src = p; - char *dst = p; - int prev_slash = 0; - - while (*src) { - char c = *src++; - - if (c == '\\' || c == '/') { - if (!prev_slash) { - *dst++ = '/'; - prev_slash = 1; - } - } else { - *dst++ = c; - prev_slash = 0; - } - } - - *dst = '\0'; -} - void platform_get_file_times(const char *path, uint64_t *out_created, uint64_t *out_modified); void platform_get_file_owner(const char *path, char *out_owner, @@ -68,7 +45,7 @@ static double timer_stop(HiResTimer *t) { } // ============================================================ -// Simple Mutex-Based MPMC Queue (FileEntry*) +// Simple lock free MPMC Queue // ============================================================ typedef struct { @@ -127,25 +104,3 @@ typedef struct DirQueue { pthread_cond_t cond; #endif } DirQueue; - -/* Hashing */ -typedef struct Job { - FileEntry *file; - struct Job *next; -} Job; - -typedef struct { - Job *head; - Job *tail; - CRITICAL_SECTION cs; - CONDITION_VARIABLE cv; - atomic_size_t count; // queued jobs - int stop; -} JobQueue; - -typedef struct { - JobQueue *queue; - atomic_size_t *done_counter; - size_t total_jobs; - atomic_int *live_workers; -} WorkerArg; diff --git a/platform_windows.c b/platform_windows.c index 290d4f4..8894cab 100644 --- a/platform_windows.c +++ b/platform_windows.c @@ -1,3 +1,4 @@ +#include "arena.h" #include "platform.h" // ----------------------------- Globals ------------------------------------ @@ -5,19 +6,29 @@ static atomic_uint_fast64_t g_files_found = 0; static atomic_uint_fast64_t g_files_hashed = 0; static atomic_uint_fast64_t g_bytes_processed = 0; static atomic_int g_scan_done = 0; -// __________________________________________________________________________ -// ----------------------------- Utils -------------------------------------- -static void perror_exit(const char *msg) { - perror(msg); - exit(1); -} +// ============================= Utils ====================================== +// ----------------------------- Normalize path -------------- +static void normalize_path(char *p) { + char *src = p; + char *dst = p; + int prev_slash = 0; -static void *xmalloc(size_t n) { - void *p = malloc(n); - if (!p) - perror_exit("malloc"); - return p; + while (*src) { + char c = *src++; + + if (c == '\\' || c == '/') { + if (!prev_slash) { + *dst++ = '/'; + prev_slash = 1; + } + } else { + *dst++ = c; + prev_slash = 0; + } + } + + *dst = '\0'; } // ----------------------------- Convert filetime to epoch -------------- @@ -29,7 +40,6 @@ static uint64_t filetime_to_epoch(const FILETIME *ft) { // Windows epoch (1601) → Unix epoch (1970) return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL; } - // ----------------------------- Format time helper ------------------------- static void format_time(uint64_t t, char *out, size_t out_sz) { if (t == 0) { @@ -258,6 +268,7 @@ FileEntry *mpmc_pop(MPMCQueue *q) { return data; } +// --------------- parallel directory scanning ---------------- // Add queue helper functions static void dirqueue_push(DirQueue *q, const char *path) { EnterCriticalSection(&q->cs); @@ -375,7 +386,7 @@ void scan_folder_windows_parallel(const char *base, DirQueue *q) { } // ----------------------------- Hashing helpers ----------------------------- -static void xxh3_hash_file_stream(const char *path, char *out_hex) { +static void xxh3_hash_file_stream(const char *path, char *out_hex, BYTE *buf) { // compute XXH3_128 over file. POSIX and Windows use standard reads in this // helper. // On Windows try to use overlapped synchronous chunked reads for higher @@ -391,7 +402,6 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex) { XXH3_state_t state; XXH3_128bits_reset(&state); - BYTE *buf = (BYTE *)malloc(READ_BLOCK); DWORD read = 0; BOOL ok; while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) { @@ -400,7 +410,6 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex) { } h = XXH3_128bits_digest(&state); CloseHandle(hFile); - free(buf); snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, (unsigned long long)h.low64); } @@ -411,6 +420,7 @@ static DWORD WINAPI hash_worker(LPVOID arg) { WorkerContext *ctx = (WorkerContext *)arg; MPMCQueue *q = ctx->queue; mem_arena *local_arena = ctx->arena; + BYTE *buf = (BYTE *)malloc(READ_BLOCK); for (;;) { FileEntry *fe = mpmc_pop(q); @@ -418,7 +428,7 @@ static DWORD WINAPI hash_worker(LPVOID arg) { break; char hash[HASH_STRLEN]; - xxh3_hash_file_stream(fe->path, hash); + xxh3_hash_file_stream(fe->path, hash, buf); char created[32], modified[32]; format_time(fe->created_time, created, sizeof(created)); @@ -440,6 +450,7 @@ static DWORD WINAPI hash_worker(LPVOID arg) { free(fe->path); free(fe); } + free(buf); return 0; } @@ -587,6 +598,23 @@ int main(int argc, char **argv) { printf(" - %s\n", folders[i]); } + // ------------------------------- + // Creating a general purpose arena + // ------------------------------- + arena_params params = { + .reserve_size = GiB(1), + .commit_size = MiB(16), + .align = 0, + .push_size = 0, + .allow_free_list = true, + .allow_swapback = false, + .growth_policy = ARENA_GROWTH_NORMAL, + .commit_policy = ARENA_COMMIT_LAZY, + .max_nbre_blocks = 1, + }; + + mem_arena *gp_arena = arena_create(¶ms); + // ------------------------------- // Detect hardware threads (CPU cores) // ------------------------------- @@ -596,7 +624,7 @@ int main(int argc, char **argv) { GetLogicalProcessorInformation(NULL, &len); SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf = - (SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)malloc(len); + (SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)arena_push(&gp_arena, len, true); if (GetLogicalProcessorInformation(buf, &len)) { DWORD count = 0; @@ -608,7 +636,7 @@ int main(int argc, char **argv) { if (count > 0) hw_threads = count; } - free(buf); + arena_free(&gp_arena, (u8 **)&buf, len); // Add some extra threads to overlap I/O more aggressively size_t num_threads = hw_threads * 2; @@ -628,19 +656,6 @@ int main(int argc, char **argv) { q.active = 0; // starting hash threads - - arena_params params = { - .reserve_size = GiB(1), - .commit_size = MiB(16), - .align = 0, - .push_size = 0, - .allow_free_list = true, - .allow_swapback = false, - .growth_policy = ARENA_GROWTH_NORMAL, - .commit_policy = ARENA_COMMIT_LAZY, - .max_nbre_blocks = 0, - }; - WorkerContext workers[num_threads]; for (int i = 0; i < num_threads; i++) { @@ -648,7 +663,8 @@ int main(int argc, char **argv) { workers[i].arena = arena_create(¶ms); } - HANDLE *hash_threads = malloc(sizeof(HANDLE) * num_threads); + HANDLE *hash_threads = + arena_push(&gp_arena, sizeof(HANDLE) * num_threads, true); for (size_t i = 0; i < num_threads; ++i) { hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL); @@ -665,7 +681,9 @@ int main(int argc, char **argv) { if (scan_threads < 2) scan_threads = 2; - HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads); + HANDLE *scan_tids = + arena_push(&gp_arena, sizeof(HANDLE) * scan_threads, true); + for (size_t i = 0; i < scan_threads; ++i) { scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL); @@ -673,7 +691,6 @@ int main(int argc, char **argv) { WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE); - // debug for (size_t i = 0; i < num_threads; i++) { mpmc_push(&g_file_queue, NULL); } @@ -682,7 +699,8 @@ int main(int argc, char **argv) { for (size_t i = 0; i < scan_threads; ++i) CloseHandle(scan_tids[i]); - free(scan_tids); + + arena_free(&gp_arena, (u8 **)&scan_tids, sizeof(HANDLE) * scan_threads); double scan_seconds = timer_stop(&scan_timer); size_t total_found = atomic_load(&g_files_found); @@ -703,7 +721,7 @@ int main(int argc, char **argv) { for (size_t i = 0; i < num_threads; ++i) CloseHandle(hash_threads[i]); - free(hash_threads); + arena_free(&gp_arena, (u8 **)&hash_threads, sizeof(HANDLE) * num_threads); WaitForSingleObject(progress, INFINITE); CloseHandle(progress); @@ -727,14 +745,14 @@ int main(int argc, char **argv) { for (int i = 0; i < num_threads; i++) { - mem_arena *arena = workers[i].arena; + mem_arena *local_hash_arena = workers[i].arena; DWORD written; - u8 *arena_base = - (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align); + u8 *arena_base = (u8 *)local_hash_arena + + ALIGN_UP_POW2(sizeof(mem_arena), local_hash_arena->align); - WriteFile(h, arena_base, (DWORD)arena->pos, &written, NULL); + WriteFile(h, arena_base, (DWORD)local_hash_arena->pos, &written, NULL); } // done time