Making the hashing buffer reusable instead of malloc every file

This commit is contained in:
2026-03-08 10:59:13 +01:00
parent c846952cbf
commit 2fc9bf31b0
2 changed files with 9 additions and 7 deletions

View File

@@ -7,7 +7,6 @@ v2.0: Multi threaded scan
v2.1: Uses AVX2 instead of SSE2 v2.1: Uses AVX2 instead of SSE2
v3.0: Simple mutex/critical section based MPMC queue v3.0: Simple mutex/critical section based MPMC queue
reusable hashing buffer
v3.1: Lock free MPMC queue Vyukov-style v3.1: Lock free MPMC queue Vyukov-style
@@ -22,5 +21,6 @@ Reorder helper functions
v3.4: Rewriting hash_worker() to export file_hashes.txt v3.4: Rewriting hash_worker() to export file_hashes.txt
v4.0: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end v4.0: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end
using #pragma once to ensure that a given header file is included only once in a single compilation unit Using #pragma once to ensure that a given header file is included only once in a single compilation unit
forcing xxhash to use the stack instead of the heap Forcing xxhash to use the stack instead of the heap
Making the hashing buffer reusable instead of malloc every file

View File

@@ -375,7 +375,7 @@ void scan_folder_windows_parallel(const char *base, DirQueue *q) {
} }
// ----------------------------- Hashing helpers ----------------------------- // ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex) { static void xxh3_hash_file_stream(const char *path, char *out_hex, BYTE *buf) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this // compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper. // helper.
// On Windows try to use overlapped synchronous chunked reads for higher // On Windows try to use overlapped synchronous chunked reads for higher
@@ -391,7 +391,7 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex) {
XXH3_state_t state; XXH3_state_t state;
XXH3_128bits_reset(&state); XXH3_128bits_reset(&state);
BYTE *buf = (BYTE *)malloc(READ_BLOCK); // BYTE *buf = (BYTE *)malloc(READ_BLOCK);
DWORD read = 0; DWORD read = 0;
BOOL ok; BOOL ok;
while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) { while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) {
@@ -400,7 +400,7 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex) {
} }
h = XXH3_128bits_digest(&state); h = XXH3_128bits_digest(&state);
CloseHandle(hFile); CloseHandle(hFile);
free(buf); // free(buf);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64); (unsigned long long)h.low64);
} }
@@ -411,6 +411,7 @@ static DWORD WINAPI hash_worker(LPVOID arg) {
WorkerContext *ctx = (WorkerContext *)arg; WorkerContext *ctx = (WorkerContext *)arg;
MPMCQueue *q = ctx->queue; MPMCQueue *q = ctx->queue;
mem_arena *local_arena = ctx->arena; mem_arena *local_arena = ctx->arena;
BYTE *buf = (BYTE *)malloc(READ_BLOCK);
for (;;) { for (;;) {
FileEntry *fe = mpmc_pop(q); FileEntry *fe = mpmc_pop(q);
@@ -418,7 +419,7 @@ static DWORD WINAPI hash_worker(LPVOID arg) {
break; break;
char hash[HASH_STRLEN]; char hash[HASH_STRLEN];
xxh3_hash_file_stream(fe->path, hash); xxh3_hash_file_stream(fe->path, hash, buf);
char created[32], modified[32]; char created[32], modified[32];
format_time(fe->created_time, created, sizeof(created)); format_time(fe->created_time, created, sizeof(created));
@@ -440,6 +441,7 @@ static DWORD WINAPI hash_worker(LPVOID arg) {
free(fe->path); free(fe->path);
free(fe); free(fe);
} }
free(buf);
return 0; return 0;
} }