Linux porting

Porting to linux
Reorganising the code
Improving the scan function
This commit is contained in:
2026-03-16 23:20:22 +01:00
parent ed0326d796
commit bdfae3110b
9 changed files with 1194 additions and 1384 deletions

View File

@@ -1,3 +1,24 @@
# filehasher
Collects some metadata and hashes files.
Collects some metadata and hashes files.
## Building:
### Windows:
#### Release:
clang-cl /O3 file_hasher.c xxh_x86dispatch.c advapi32.lib
clang -O3 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
gcc -O3 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
#### Debug:
clang-cl /Zi /Od file_hasher.c xxh_x86dispatch.c advapi32.lib
clang -g -O0 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
gcc -g -O0 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
### Linux:
#### Release:
clang -O3 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher
gcc -O3 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher
#### Debug:
clang -g -O0 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher
gcc -g -O0 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher

6
base.h
View File

@@ -146,6 +146,9 @@ static void plat_sem_destroy(plat_sem *s) {
}
}
// Sleep
static void sleep_ms(int ms) { Sleep(ms); }
#elif defined(__linux__)
// Memory allocation
@@ -211,4 +214,7 @@ static void plat_sem_post(plat_sem *s, u32 count) {
static void plat_sem_destroy(plat_sem *s) { sem_destroy(&s->sem); }
// Sleep
static void sleep_ms(int ms) { usleep(ms * 1000); }
#endif

View File

@@ -45,3 +45,7 @@ Replacing DirQueue, a queue growable with realloc with the MPMC queue
4.1: Using xxhash xxh_x86dispatch to select the best SIMD instruction set at runtime, this dispatcher can not be added in a unity build and we must remove AVX2 or AVX512 compilation flags, link xxh_x86dispatch.c in the compilation command. The compilaiton throws two warnings about function with internal linkage but not defined, they are defined in xxh_x86dispatch.c so it's harmless warnings
Fixing user prompt parsing
4.5: Porting to linux
Reorganising the code
Improving the scan function

View File

@@ -1,7 +1,225 @@
#define _CRT_SECURE_NO_WARNINGS
#include "platform.c"
#if defined(_WIN32) || defined(_WIN64)
#include "platform_windows.c"
#else
#include "platform_posix.c"
#endif
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (folder_count < 64) {
normalize_path(argv[i]);
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0) {
printf("Enter folders to process (Enter = current folder): ");
fflush(stdout);
char buf[KiB(32)];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0) {
strcpy(folders[0], ".");
folder_count = 1;
} else {
folder_count = parse_paths(buf, folders, 64);
}
}
// Display selected folders
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Scanning and total timer init
// -------------------------------
// LARGE_INTEGER g_freq;
timer_init();
HiResTimer total_timer;
HiResTimer scan_timer;
timer_start(&total_timer);
timer_start(&scan_timer);
// -------------------------------
// Creating a general purpose arena
// -------------------------------
arena_params params = {
.reserve_size = GiB(1),
.commit_size = MiB(16),
.align = 0,
.push_size = 0,
.allow_free_list = true,
.allow_swapback = false,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 1,
};
mem_arena *gp_arena = arena_create(&params);
// -------------------------------
// Detect hardware threads
// -------------------------------
// --- Windows: detect PHYSICAL cores (not logical threads) ---
size_t hw_threads = platform_physical_cores();
// Logical threads = CPU cores * 2
size_t num_threads = hw_threads * 2;
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
// -------------------------------
// Scanning and hashing
// -------------------------------
MPMCQueue dir_queue;
mpmc_init(&dir_queue, MiB(1));
MPMCQueue file_queue;
mpmc_init(&file_queue, MiB(1));
// Starting hash threads
size_t num_hash_threads = num_threads;
WorkerContext workers[num_hash_threads];
Thread *hash_threads =
arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true);
for (size_t i = 0; i < num_hash_threads; ++i) {
workers[i].arena = arena_create(&params);
workers[i].file_queue = &file_queue;
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker, &workers[i]) !=
0) {
fprintf(stderr, "Failed to create hash thread %zu\n", i);
exit(1);
}
}
// Starting progress printing thread
Thread progress_thread_handle;
if (thread_create(&progress_thread_handle, (ThreadFunc)progress_thread,
NULL) != 0) {
fprintf(stderr, "Failed to create progress thread\n");
exit(1);
}
// Starting scan threads
size_t num_scan_threads = num_threads;
ScannerContext scanners[num_scan_threads];
Thread *scan_threads =
arena_push(&gp_arena, sizeof(Thread) * num_scan_threads, true);
for (size_t i = 0; i < num_scan_threads; i++) {
scanners[i].num_threads = num_scan_threads;
scanners[i].path_arena = arena_create(&params);
scanners[i].meta_arena = arena_create(&params);
scanners[i].dir_queue = &dir_queue;
scanners[i].file_queue = &file_queue;
if (thread_create(&scan_threads[i], (ThreadFunc)scan_worker,
&scanners[i]) != 0) {
fprintf(stderr, "Failed to create scan thread %zu\n", i);
exit(1);
}
}
// Initial folder push
for (int i = 0; i < folder_count; i++) {
size_t len = strlen(folders[i]) + 1;
char *path = arena_push(&scanners[0].path_arena, len, false);
memcpy(path, folders[i], len);
mpmc_push_work(&dir_queue, path);
}
// Stop scan threads
thread_wait_multiple(scan_threads, num_scan_threads);
for (size_t i = 0; i < num_scan_threads; ++i) {
thread_close(&scan_threads[i]);
}
mpmc_producers_finished(&file_queue, num_hash_threads);
atomic_store(&g_scan_done, 1);
arena_free(&gp_arena, (u8 **)&scan_threads,
sizeof(Thread) * num_scan_threads);
double scan_seconds = timer_elapsed(&scan_timer);
size_t total_found = atomic_load(&g_files_found);
printf("\r%*s\r", 120, ""); // clear_console_line
printf("Completed scanning in %.2f seconds, found %zu files\n\n",
scan_seconds, total_found);
// If no files found
if (total_found == 0) {
printf("No files found.\n");
return 0;
}
// Stop hashing threads
thread_wait_multiple(hash_threads, num_hash_threads);
for (size_t i = 0; i < num_hash_threads; ++i) {
thread_close(&hash_threads[i]);
}
arena_free(&gp_arena, (u8 **)&hash_threads,
sizeof(Thread) * num_hash_threads);
// Stop progress printing thread
thread_join(&progress_thread_handle);
thread_close(&progress_thread_handle);
// -------------------------------
// Export file_hashes.txt
// -------------------------------
FILE *f = fopen(FILE_HASHES_TXT, "wb");
for (int i = 0; i < num_threads; i++) {
mem_arena *arena = workers[i].arena;
u8 *arena_base =
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
fwrite(arena_base, 1, arena->pos, f);
}
fclose(f);
// -------------------------------
// Print summary
// -------------------------------
double total_seconds = timer_elapsed(&total_timer);
printf("Completed hashing %zu files\n", total_found);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / total_seconds;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n\n", total_seconds);
return 0;
}

View File

@@ -173,11 +173,11 @@ static void mpmc_push(MPMCQueue *q, void *item) {
} else if (diff < 0) { // queue actually full
Sleep(1000);
sleep_ms(1000);
} else { // waiting to grow
Sleep(0);
sleep_ms(0);
}
}
@@ -220,11 +220,11 @@ static void mpmc_push_work(MPMCQueue *q, void *item) {
} else if (diff < 0) { // queue actually full
Sleep(1000);
sleep_ms(1000);
} else { // waiting to grow
Sleep(0);
sleep_ms(0);
}
}
@@ -264,7 +264,7 @@ static void *mpmc_pop(MPMCQueue *q) {
} else { // slot is still transitioning (written by another thread)
if (++spins > 10) {
Sleep(0); // yield CPU
sleep_ms(0); // yield CPU
spins = 0;
} else {
cpu_pause();

933
platform.c Normal file
View File

@@ -0,0 +1,933 @@
#pragma once // ensure that a given header file is included only once in a
// single compilation unit
#define _CRT_SECURE_NO_WARNINGS
#include "arena.h"
#include "base.h"
#include "lf_mpmc.h"
#include "arena.c"
// xxhash include
#define XXH_INLINE_ALL
#include "xxh_x86dispatch.h"
// ----------------------------- Config -------------------------------------
#define FILE_HASHES_TXT "file_hashes.txt"
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
#define MAX_PATHLEN 4096
#define READ_BLOCK (KiB(64))
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_files_found = 0;
static atomic_uint_fast64_t g_files_hashed = 0;
static atomic_uint_fast64_t g_bytes_processed = 0;
static atomic_int g_scan_done = 0;
// ================== OS-agnostic functions abstraction =====================
// ----------------------------- Timer functions --------------
typedef struct {
u64 start;
u64 now;
} HiResTimer;
#if defined(_WIN32) || defined(_WIN64)
static LARGE_INTEGER g_freq;
static void timer_init(void) { QueryPerformanceFrequency(&g_freq); }
static void timer_start(HiResTimer *t) {
LARGE_INTEGER v;
QueryPerformanceCounter(&v);
t->start = v.QuadPart;
}
static double timer_elapsed(HiResTimer *t) {
LARGE_INTEGER v;
QueryPerformanceCounter(&v);
t->now = v.QuadPart;
return (double)(t->now - t->start) / (double)g_freq.QuadPart;
}
#elif defined(__linux__)
void timer_init(void) {}
void timer_start(HiResTimer *t) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
t->start = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
double timer_elapsed(HiResTimer *t) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
uint64_t now = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
return (double)(now - t->start) / 1e9;
}
#endif
// ----------------------------- Get HW info --------------
#if defined(_WIN32) || defined(_WIN64)
size_t platform_physical_cores(void) {
DWORD len = 0;
GetLogicalProcessorInformation(NULL, &len);
SYSTEM_LOGICAL_PROCESSOR_INFORMATION buf[len];
GetLogicalProcessorInformation(buf, &len);
DWORD count = 0;
DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
for (DWORD i = 0; i < n; i++) {
if (buf[i].Relationship == RelationProcessorCore)
count++;
}
return count ? count : 1;
}
#elif defined(__linux__)
size_t platform_physical_cores(void) {
long n = sysconf(_SC_NPROCESSORS_ONLN);
return n > 0 ? (size_t)n : 1;
}
#endif
const char *get_xxhash_instruction_set(void) {
int vecID = XXH_featureTest();
switch (vecID) {
case XXH_SCALAR:
return "Scalar (portable C)";
case XXH_SSE2:
return "SSE2";
case XXH_AVX2:
return "AVX2";
case XXH_AVX512:
return "AVX-512";
default:
return "Unknown";
}
}
// -------------------- File IO -------------------
#if defined(_WIN32) || defined(_WIN64)
typedef HANDLE FileHandle;
#define INVALID_FILE_HANDLE INVALID_HANDLE_VALUE
// File open function
static FileHandle os_file_open(const char *path) {
return CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
}
// File read function
static int os_file_read(FileHandle handle, void *buf, size_t count,
uint64_t *bytes_read) {
DWORD read = 0;
BOOL result = ReadFile(handle, buf, (DWORD)count, &read, NULL);
*bytes_read = read;
return (result && read > 0) ? 0 : -1;
}
// File close function
static void os_file_close(FileHandle handle) { CloseHandle(handle); }
#elif defined(__linux__)
typedef int FileHandle;
#define INVALID_FILE_HANDLE (-1)
// File open function
static FileHandle os_file_open(const char *path) {
return open(path, O_RDONLY | O_NOFOLLOW);
}
// File read function
static int os_file_read(FileHandle handle, void *buf, size_t count,
uint64_t *bytes_read) {
ssize_t result = read(handle, buf, count);
if (result >= 0) {
*bytes_read = (uint64_t)result;
return 0;
}
*bytes_read = 0;
return -1;
}
// File close function
static void os_file_close(FileHandle handle) { close(handle); }
#endif
// -------------------- Thread abstraction -------------------
// Threads context
typedef struct {
u8 num_threads;
mem_arena *path_arena;
mem_arena *meta_arena;
MPMCQueue *dir_queue;
MPMCQueue *file_queue;
} ScannerContext;
typedef struct {
mem_arena *arena;
MPMCQueue *file_queue;
} WorkerContext;
#if defined(_WIN32) || defined(_WIN64)
typedef HANDLE ThreadHandle;
typedef DWORD(WINAPI *ThreadFunc)(void *);
#define THREAD_RETURN DWORD WINAPI
#define THREAD_RETURN_VALUE 0;
typedef struct {
ThreadHandle handle;
int valid; // Track if thread was successfully created
} Thread;
// Thread function wrapper to handle different return types
#define THREAD_FUNCTION(name) DWORD WINAPI name(LPVOID arg)
// Thread creation function
static int thread_create(Thread *thread, ThreadFunc func, void *arg) {
thread->handle =
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, NULL);
return (thread->handle != NULL) ? 0 : -1;
}
// Thread join function
static int thread_join(Thread *thread) {
return (WaitForSingleObject(thread->handle, INFINITE) == WAIT_OBJECT_0) ? 0
: -1;
}
// Thread close/detach function
static void thread_close(Thread *thread) { CloseHandle(thread->handle); }
// Wait for multiple threads
static int thread_wait_multiple(Thread *threads, size_t count) {
HANDLE handles[64]; // Max 64 threads for Windows
for (size_t i = 0; i < count; i++) {
handles[i] = threads[i].handle;
}
return (WaitForMultipleObjects((DWORD)count, handles, TRUE, INFINITE) ==
WAIT_OBJECT_0)
? 0
: -1;
}
#elif defined(__linux__)
typedef pthread_t ThreadHandle;
typedef void *(*ThreadFunc)(void *);
#define THREAD_RETURN void *
#define THREAD_RETURN_VALUE NULL;
typedef struct {
ThreadHandle handle;
int valid; // Track if thread was successfully created
} Thread;
// Thread function wrapper to handle different return types
typedef struct {
void *(*func)(void *);
void *arg;
} ThreadWrapper;
static void *thread_start_routine(void *arg) {
ThreadWrapper *wrapper = (ThreadWrapper *)arg;
void *result = wrapper->func(wrapper->arg);
free(wrapper);
return result;
}
// Thread creation function
static int thread_create(Thread *thread, ThreadFunc func, void *arg) {
int ret = pthread_create(&thread->handle, NULL, func, arg);
if (ret == 0) {
thread->valid = 1;
}
return ret;
}
// Thread join function
static int thread_join(Thread *thread) {
int ret = pthread_join(thread->handle, NULL);
thread->valid = 0;
return ret;
}
// Thread close/detach function
static void thread_close(Thread *thread) {
if (thread->valid) {
pthread_detach(thread->handle);
thread->valid = 0;
}
}
// Wait for multiple threads
static int thread_wait_multiple(Thread *threads, size_t count) {
for (size_t i = 0; i < count; i++) {
if (thread_join(&threads[i]) != 0) {
return -1;
}
}
return 0;
}
#endif
// ======================== Get file metadata ========================
// -------------------- Path parsing -------------------
static void normalize_path(char *p) {
char *src = p;
char *dst = p;
int prev_slash = 0;
while (*src) {
char c = *src++;
if (c == '\\' || c == '/') {
if (!prev_slash) {
*dst++ = '/';
prev_slash = 1;
}
} else {
*dst++ = c;
prev_slash = 0;
}
}
*dst = '\0';
}
static int parse_paths(char *line, char folders[][MAX_PATHLEN],
int max_folders) {
int count = 0;
char *p = line;
while (*p && count < max_folders) {
while (*p && isspace((unsigned char)*p))
p++;
if (!*p)
break;
char *start;
char quote = 0;
if (*p == '"' || *p == '\'') {
quote = *p++;
start = p;
while (*p && *p != quote)
p++;
} else {
start = p;
while (*p && !isspace((unsigned char)*p))
p++;
}
size_t len = p - start;
if (len >= MAX_PATHLEN)
len = MAX_PATHLEN - 1;
memcpy(folders[count], start, len);
folders[count][len] = 0;
normalize_path(folders[count]);
count++;
if (quote && *p == quote)
p++;
}
return count;
}
// ----------------------------- File time -------------------------
#if defined(_WIN32) || defined(_WIN64)
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
localtime_s(&tm, &tt);
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// ----------------------------- Convert filetime to epoch --------------
static uint64_t filetime_to_epoch(const FILETIME *ft) {
ULARGE_INTEGER ull;
ull.LowPart = ft->dwLowDateTime;
ull.HighPart = ft->dwHighDateTime;
// Windows epoch (1601) ¬ニメ Unix epoch (1970)
return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL;
}
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
WIN32_FILE_ATTRIBUTE_DATA fad;
if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) {
*out_created = filetime_to_epoch(&fad.ftCreationTime);
*out_modified = filetime_to_epoch(&fad.ftLastWriteTime);
} else {
*out_created = 0;
*out_modified = 0;
}
}
#elif defined(__linux__)
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
localtime_r(&tt, &tm);
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
struct stat st;
if (stat(path, &st) == 0) {
*out_created = (uint64_t)st.st_ctime;
*out_modified = (uint64_t)st.st_mtime;
} else {
*out_created = 0;
*out_modified = 0;
}
}
#endif
// ----------------------------- File owner ---------------------
#if defined(_WIN32) || defined(_WIN64)
static void get_file_owner(const char *path, char *out, size_t out_sz) {
PSID sid = NULL;
PSECURITY_DESCRIPTOR sd = NULL;
if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION,
&sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) {
char name[64], domain[64];
DWORD name_len = sizeof(name);
DWORD domain_len = sizeof(domain);
SID_NAME_USE use;
if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len,
&use)) {
snprintf(out, out_sz, "%s\\%s", domain, name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
} else {
snprintf(out, out_sz, "UNKNOWN");
}
if (sd)
LocalFree(sd);
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
get_file_owner(path, out_owner, out_owner_size);
}
#elif defined(__linux__)
static void get_file_owner(uid_t uid, char *out, size_t out_sz) {
struct passwd *pw = getpwuid(uid);
if (pw) {
snprintf(out, out_sz, "%s", pw->pw_name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
struct stat st;
if (stat(path, &st) == 0) {
get_file_owner(st.st_uid, out_owner, out_owner_size);
} else {
snprintf(out_owner, out_owner_size, "UNKNOWN");
}
}
#endif
// ----------------------------- Scan helpers -----------------------------
typedef struct FileEntry {
char *path;
uint64_t size_bytes;
uint64_t created_time; // epoch
uint64_t modified_time; // epoch seconds
char owner[128]; // resolved owner name
} FileEntry;
typedef struct {
char buffer[MAX_PATHLEN];
char *base_end; // Points to end of base path
char *filename_pos; // Points to where filename should be written
size_t base_len;
} PathBuilder;
static void path_builder_init(PathBuilder *pb, const char *base) {
pb->base_len = strlen(base);
memcpy(pb->buffer, base, pb->base_len);
pb->base_end = pb->buffer + pb->base_len;
#if defined(_WIN32) || defined(_WIN64)
*pb->base_end = '\\';
#elif defined(__linux__)
*pb->base_end = '/';
#endif
// Ensure null termination
*(pb->base_end + 1) = '\0';
pb->filename_pos = pb->base_end + 1;
}
static void path_builder_set_filename(PathBuilder *pb, const char *filename,
size_t name_len) {
memcpy(pb->filename_pos, filename, name_len);
pb->filename_pos[name_len] = '\0'; // Ensure null termination
}
static char *path_builder_dup_arena(PathBuilder *pb, mem_arena *arena,
bool zero) {
// Calculate total length including base + separator + filename + null
// terminator
size_t total_len =
(pb->filename_pos - pb->buffer) + strlen(pb->filename_pos) + 1;
char *dup = arena_push(&arena, total_len, zero);
memcpy(dup, pb->buffer, total_len);
return dup;
}
#if defined(_WIN32) || defined(_WIN64)
void scan_folder(const char *base, ScannerContext *ctx) {
PathBuilder pb;
path_builder_init(&pb, base);
char search[MAX_PATHLEN];
memcpy(search, pb.buffer, pb.base_len + 1); // Copy base + separator
memcpy(search + pb.base_len + 1, "*", 2); // Add "*" and null
WIN32_FIND_DATAA fd;
HANDLE h = FindFirstFileA(search, &fd);
if (h == INVALID_HANDLE_VALUE)
return;
do {
// Skip . and ..
if (fd.cFileName[0] == '.' &&
(fd.cFileName[1] == 0 ||
(fd.cFileName[1] == '.' && fd.cFileName[2] == 0)))
continue;
if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
continue;
size_t name_len = strlen(fd.cFileName);
path_builder_set_filename(&pb, fd.cFileName, name_len);
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
char *dir = path_builder_dup_arena(&pb, ctx->path_arena, false);
mpmc_push_work(ctx->dir_queue, dir);
} else {
atomic_fetch_add(&g_files_found, 1);
FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true);
// Create a temporary copy for normalization to avoid corrupting pb.buffer
char temp_path[MAX_PATHLEN];
memcpy(temp_path, pb.buffer,
(pb.filename_pos - pb.buffer) + name_len + 1);
normalize_path(temp_path);
fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, false);
strcpy(fe->path, temp_path);
platform_get_file_times(pb.buffer, &fe->created_time, &fe->modified_time);
platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner));
fe->size_bytes = ((uint64_t)fd.nFileSizeHigh << 32) | fd.nFileSizeLow;
mpmc_push(ctx->file_queue, fe);
}
} while (FindNextFileA(h, &fd));
FindClose(h);
}
#elif defined(__linux__)
// To test
// Choice 1
// static int platform_get_file_times_fd(int dir_fd, const char *name,
// time_t *created, time_t *modified) {
// struct stat st;
// if (fstatat(dir_fd, name, &st, 0) == 0) {
// *created = st.st_ctime; // or st.st_birthtime on systems that support it
// *modified = st.st_mtime;
// return 0;
// }
// return -1;
// }
//
// static int platform_get_file_owner_fd(int dir_fd, const char *name, char
// *owner,
// size_t owner_size) {
// struct stat st;
// if (fstatat(dir_fd, name, &st, 0) == 0) {
// // You'll need to implement uid to username conversion
// // This is just a placeholder
// snprintf(owner, owner_size, "uid:%d", st.st_uid);
// return 0;
// }
// return -1;
// }
//
// void scan_folder(const char *base, ScannerContext *ctx) {
// PathBuilder pb;
// path_builder_init(&pb, base);
//
// int dir_fd = open(base, O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
// if (dir_fd == -1)
// return;
//
// DIR *dir = fdopendir(dir_fd);
// if (!dir) {
// close(dir_fd);
// return;
// }
//
// struct dirent *entry;
//
// while ((entry = readdir(dir)) != NULL) {
// if (entry->d_name[0] == '.' &&
// (entry->d_name[1] == 0 ||
// (entry->d_name[1] == '.' && entry->d_name[2] == 0)))
// continue;
//
// size_t name_len = strlen(entry->d_name);
// path_builder_set_filename(&pb, entry->d_name, name_len);
//
// int file_type = DT_UNKNOWN;
// #ifdef _DIRENT_HAVE_D_TYPE
// file_type = entry->d_type;
// #endif
//
// // Fast path using d_type
// if (file_type != DT_UNKNOWN) {
// if (file_type == DT_LNK)
// continue; // Skip symlinks
//
// if (file_type == DT_DIR) {
// char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false);
// mpmc_push_work(ctx->dir_queue, dir_path);
// continue;
// }
//
// if (file_type == DT_REG) {
// atomic_fetch_add(&g_files_found, 1);
// FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry),
// true);
//
// // Use fstatat for file info
// struct stat st;
// if (fstatat(dir_fd, entry->d_name, &st, 0) == 0) {
// // Convert times using fd variant
// platform_get_file_times_fd(dir_fd, entry->d_name,
// &fe->created_time,
// &fe->modified_time);
// platform_get_file_owner_fd(dir_fd, entry->d_name, fe->owner,
// sizeof(fe->owner));
// fe->size_bytes = (uint64_t)st.st_size;
//
// // Normalize path
// char temp_path[MAX_PATHLEN];
// memcpy(temp_path, pb.buffer,
// (pb.filename_pos - pb.buffer) + name_len + 1);
// normalize_path(temp_path);
//
// fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1,
// false); strcpy(fe->path, temp_path);
//
// mpmc_push(ctx->file_queue, fe);
// }
// continue;
// }
// }
//
// // Fallback for unknown types
// struct stat st;
// if (fstatat(dir_fd, entry->d_name, &st, AT_SYMLINK_NOFOLLOW) == 0) {
// if (S_ISLNK(st.st_mode))
// continue;
//
// if (S_ISDIR(st.st_mode)) {
// char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false);
// mpmc_push_work(ctx->dir_queue, dir_path);
// } else if (S_ISREG(st.st_mode)) {
// atomic_fetch_add(&g_files_found, 1);
// FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry),
// true);
//
// platform_get_file_times(pb.buffer, &fe->created_time,
// &fe->modified_time);
// platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner));
// fe->size_bytes = (uint64_t)st.st_size;
//
// char temp_path[MAX_PATHLEN];
// memcpy(temp_path, pb.buffer,
// (pb.filename_pos - pb.buffer) + name_len + 1);
// normalize_path(temp_path);
//
// fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1,
// false); strcpy(fe->path, temp_path);
//
// mpmc_push(ctx->file_queue, fe);
// }
// }
// }
//
// closedir(dir); // Closes dir_fd automatically
// }
// Choice 2
void scan_folder(const char *base, ScannerContext *ctx) {
PathBuilder pb;
path_builder_init(&pb, base);
DIR *dir = opendir(base);
if (!dir)
return;
struct dirent *entry;
struct stat st;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_name[0] == '.' &&
(entry->d_name[1] == 0 ||
(entry->d_name[1] == '.' && entry->d_name[2] == 0)))
continue;
size_t name_len = strlen(entry->d_name);
path_builder_set_filename(&pb, entry->d_name, name_len);
if (lstat(pb.buffer, &st) == 0 && S_ISLNK(st.st_mode))
continue;
if (stat(pb.buffer, &st) == 0) {
if (S_ISDIR(st.st_mode)) {
char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false);
mpmc_push_work(ctx->dir_queue, dir_path);
} else {
atomic_fetch_add(&g_files_found, 1);
FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true);
// Create a temporary copy for normalization
char temp_path[MAX_PATHLEN];
memcpy(temp_path, pb.buffer,
(pb.filename_pos - pb.buffer) + name_len + 1);
normalize_path(temp_path);
fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, false);
strcpy(fe->path, temp_path);
platform_get_file_times(pb.buffer, &fe->created_time,
&fe->modified_time);
platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner));
fe->size_bytes = (uint64_t)st.st_size;
mpmc_push(ctx->file_queue, fe);
}
}
}
closedir(dir);
}
#endif
// ------------------------- Scan worker --------------------------------
static THREAD_RETURN scan_worker(void *arg) {
ScannerContext *ctx = (ScannerContext *)arg;
for (;;) {
char *dir = mpmc_pop(ctx->dir_queue);
if (!dir)
break;
scan_folder(dir, ctx);
mpmc_task_done(ctx->dir_queue, ctx->num_threads);
}
return THREAD_RETURN_VALUE;
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex,
unsigned char *buf) {
XXH128_hash_t h;
XXH3_state_t state;
XXH3_128bits_reset(&state);
FileHandle handle = os_file_open(path);
if (handle == INVALID_FILE_HANDLE) {
strcpy(out_hex, "ERROR");
return;
}
uint64_t bytes_read;
while (os_file_read(handle, buf, READ_BLOCK, &bytes_read) == 0 &&
bytes_read > 0) {
XXH3_128bits_update(&state, buf, (size_t)bytes_read);
atomic_fetch_add(&g_bytes_processed, bytes_read);
}
os_file_close(handle);
h = XXH3_128bits_digest(&state);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ------------------------- Hash worker --------------------------------
static THREAD_RETURN hash_worker(void *arg) {
WorkerContext *ctx = (WorkerContext *)arg;
unsigned char *buf = (unsigned char *)malloc(READ_BLOCK);
for (;;) {
FileEntry *fe = mpmc_pop(ctx->file_queue);
if (!fe)
break;
char hash[HASH_STRLEN];
xxh3_hash_file_stream(fe->path, hash, buf);
char created[32], modified[32];
format_time(fe->created_time, created, sizeof(created));
format_time(fe->modified_time, modified, sizeof(modified));
double size_kib = (double)fe->size_bytes / 1024.0;
char stack_buf[1024];
int len =
snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n",
hash, fe->path, size_kib, created, modified, fe->owner);
char *dst = arena_push(&ctx->arena, len, false);
memcpy(dst, stack_buf, len);
atomic_fetch_add(&g_files_hashed, 1);
}
free(buf);
return THREAD_RETURN_VALUE;
}
// ----------------------------- Progress display ---------------------------
static THREAD_RETURN progress_thread(void *arg) {
(void)arg; // Unused parameter
HiResTimer progress_timer;
timer_start(&progress_timer);
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
for (;;) {
uint64_t found = atomic_load(&g_files_found);
uint64_t hashed = atomic_load(&g_files_hashed);
uint64_t bytes = atomic_load(&g_bytes_processed);
int scan_done = atomic_load(&g_scan_done);
double t = timer_elapsed(&progress_timer);
if (last_time == 0.0) {
last_time = t;
last_bytes = bytes;
}
double dt = t - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = t;
}
if (!scan_done) {
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ",
(unsigned long long)found, (unsigned long long)hashed,
displayed_speed);
} else {
double pct = found ? (double)hashed / (double)found : 0.0;
int barw = 40;
int filled = (int)(pct * barw);
char bar[64];
int p = 0;
bar[p++] = '[';
for (int i = 0; i < filled; i++)
bar[p++] = '#';
for (int i = filled; i < barw; i++)
bar[p++] = '.';
bar[p++] = ']';
bar[p] = 0;
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0,
(unsigned long long)hashed, (unsigned long long)found,
displayed_speed);
}
fflush(stdout);
if (scan_done && hashed == found)
break;
sleep_ms(100);
}
printf("\n");
return THREAD_RETURN_VALUE;
}

View File

@@ -1,53 +0,0 @@
#pragma once // ensure that a given header file is included only once in a
// single compilation unit
#include "arena.h"
#include "base.h"
#include "lf_mpmc.h"
#include "arena.c"
// xxhash include
#define XXH_INLINE_ALL
#include "xxh_x86dispatch.h"
// ----------------------------- Config -------------------------------------
#define FILE_HASHES_TXT "file_hashes.txt"
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
#define MAX_PATHLEN 4096
#define READ_BLOCK (64 * 1024) // 64KB blocks
// ----------------------------- Data types ---------------------------------
// Timer
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER end;
} HiResTimer;
static LARGE_INTEGER g_qpc_freq;
// File entry
typedef struct FileEntry {
char *path;
uint64_t size_bytes;
uint64_t created_time; // epoch
uint64_t modified_time; // epoch seconds
char owner[128]; // resolved owner name
} FileEntry;
// Threads context
typedef struct {
u8 num_threads;
mem_arena *path_arena;
mem_arena *meta_arena;
MPMCQueue *dir_queue;
MPMCQueue *file_queue;
} ScannerContext;
typedef struct {
mem_arena *arena;
MPMCQueue *file_queue;
} WorkerContext;

View File

@@ -1,678 +0,0 @@
#include "platform.h"
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_bytes_processed = 0;
FileEntry *g_entries = NULL;
size_t g_entry_count = 0;
size_t g_entry_capacity = 0;
// ----------------------------- Utils --------------------------------------
static void perror_exit(const char *msg) {
perror(msg);
exit(1);
}
static void *xmalloc(size_t n) {
void *p = malloc(n);
if (!p)
perror_exit("malloc");
return p;
}
static void add_entry(const FileEntry *src) {
if (g_entry_count + 1 > g_entry_capacity) {
g_entry_capacity = g_entry_capacity ? g_entry_capacity * 2 : 1024;
g_entries = realloc(g_entries, sizeof(FileEntry) * g_entry_capacity);
if (!g_entries)
perror_exit("realloc");
}
FileEntry *dst = &g_entries[g_entry_count++];
memset(dst, 0, sizeof(*dst));
dst->size_bytes = src->size_bytes;
dst->created_time = src->created_time;
dst->modified_time = src->modified_time;
if (src->path)
dst->path = strdup(src->path);
strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1);
dst->owner[sizeof(dst->owner) - 1] = '\0';
}
static void free_entries(void) {
for (size_t i = 0; i < g_entry_count; ++i) {
free(g_entries[i].path);
}
free(g_entries);
g_entries = NULL;
g_entry_count = 0;
g_entry_capacity = 0;
}
// ----------------------------- Owner lookup ------------------------------
static void get_file_owner(uid_t uid, char *out, size_t out_sz) {
struct passwd *pw = getpwuid(uid);
if (pw) {
snprintf(out, out_sz, "%s", pw->pw_name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
}
// ----------------------------- Format time helper -------------------------
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
#if PLATFORM_WINDOWS
localtime_s(&tm, &tt);
#else
localtime_r(&tt, &tm);
#endif
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// --------------- parallel directory scanning ----------------
// Add queue helper functions
static void dirqueue_push(DirQueue *q, const char *path) {
DirJob *job = malloc(sizeof(*job));
job->path = strdup(path);
job->next = NULL;
pthread_mutex_lock(&q->mutex);
if (q->tail)
q->tail->next = job;
else
q->head = job;
q->tail = job;
pthread_cond_signal(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
static char *dirqueue_pop(DirQueue *q) {
pthread_mutex_lock(&q->mutex);
while (!q->head && !q->stop)
pthread_cond_wait(&q->cond, &q->mutex);
if (q->stop) {
pthread_mutex_unlock(&q->mutex);
return NULL;
}
DirJob *job = q->head;
q->head = job->next;
if (!q->head)
q->tail = NULL;
q->active_workers++;
pthread_mutex_unlock(&q->mutex);
char *path = job->path;
free(job);
return path;
}
static void dirqueue_done(DirQueue *q) {
pthread_mutex_lock(&q->mutex);
q->active_workers--;
if (!q->head && q->active_workers == 0) {
q->stop = 1;
pthread_cond_broadcast(&q->cond);
}
pthread_mutex_unlock(&q->mutex);
}
// Scanning directory worker thread function
static void scan_worker(void *arg) {
DirQueue *q = arg;
for (;;) {
char *dir = dirqueue_pop(q);
if (!dir)
break;
scan_folder_posix_parallel(dir, q);
free(dir);
dirqueue_done(q);
}
}
// Scanning directory function
void scan_folder_posix_parallel(const char *base, DirQueue *q) {
DIR *d = opendir(base);
if (!d)
return;
struct dirent *ent;
while ((ent = readdir(d))) {
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
char full[MAX_PATHLEN];
snprintf(full, sizeof(full), "%s/%s", base, ent->d_name);
struct stat st;
if (lstat(full, &st) != 0)
continue;
if (S_ISDIR(st.st_mode)) {
dirqueue_push(q, full);
} else if (S_ISREG(st.st_mode)) {
FileEntry fe;
memset(&fe, 0, sizeof(fe));
normalize_path(full);
fe.path = full;
fe.size_bytes = (uint64_t)st.st_size;
fe.created_time = (uint64_t)st.st_ctime;
fe.modified_time = (uint64_t)st.st_mtime;
get_file_owner(st.st_uid, fe.owner, sizeof(fe.owner));
add_entry(&fe);
}
}
closedir(d);
}
// ----------------------------- Job queue ----------------------------------
static void jobqueue_init(JobQueue *q) {
q->head = q->tail = NULL;
atomic_store(&q->count, 0);
q->stop = 0;
pthread_mutex_init(&q->mutex, NULL);
pthread_cond_init(&q->cond, NULL);
}
static void jobqueue_push(JobQueue *q, Job *job) {
pthread_mutex_lock(&q->mutex);
job->next = NULL;
if (q->tail)
q->tail->next = job;
else
q->head = job;
q->tail = job;
atomic_fetch_add(&q->count, 1);
pthread_cond_signal(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
static Job *jobqueue_pop(JobQueue *q) {
pthread_mutex_lock(&q->mutex);
while (!q->head && !q->stop)
pthread_cond_wait(&q->cond, &q->mutex);
if (q->stop && !q->head) {
pthread_mutex_unlock(&q->mutex);
return NULL;
}
Job *j = q->head;
q->head = j->next;
if (!q->head)
q->tail = NULL;
pthread_mutex_unlock(&q->mutex);
if (j)
atomic_fetch_sub(&q->count, 1);
return j;
}
static void jobqueue_stop(JobQueue *q) {
pthread_mutex_lock(&q->mutex);
q->stop = 1;
pthread_cond_broadcast(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper.
int fd = open(path, O_RDONLY);
if (fd < 0) {
strcpy(out_hex, "ERROR");
return;
}
XXH128_hash_t h;
XXH3_state_t *state = XXH3_createState();
XXH3_128bits_reset(state);
unsigned char *buf = (unsigned char *)malloc(READ_BLOCK);
ssize_t r;
while ((r = read(fd, buf, READ_BLOCK)) > 0) {
XXH3_128bits_update(state, buf, (size_t)r);
atomic_fetch_add(&g_bytes_processed, (uint64_t)r);
}
h = XXH3_128bits_digest(state);
XXH3_freeState(state);
close(fd);
free(buf);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ----------------------------- Worker --------------------------------------
static void *worker_thread_posix(void *argp) {
WorkerArg *w = (WorkerArg *)argp;
JobQueue *q = w->queue;
for (;;) {
Job *job = jobqueue_pop(q);
if (!job)
break;
char hex[HASH_STRLEN];
xxh3_hash_file_stream(job->file->path, hex);
// append to file_hashes.txt atomically: we will store results to a temp
// buffer and write them at the end (to avoid synchronization issues). But
// for simplicity, here we append directly using a file lock (fopen+fwrite
// guarded by mutex). We'll store results in job->file->path? Instead,
// simple global append with a mutex. Using a file-level append lock:
static pthread_mutex_t append_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&append_mutex);
FILE *hf = fopen(FILE_HASHES_TXT, "a");
if (hf) {
char created[32], modified[32];
format_time(job->file->created_time, created, sizeof(created));
format_time(job->file->modified_time, modified, sizeof(modified));
double size_kib = (double)job->file->size_bytes / (1024.0);
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib,
created, modified, job->file->owner);
fclose(hf);
}
pthread_mutex_unlock(&append_mutex);
atomic_fetch_add(w->done_counter, 1);
free(job);
}
atomic_fetch_sub(w->live_workers, 1);
return NULL;
}
// ----------------------------- Progress display ---------------------------
static void print_progress(size_t done, size_t total) {
const int barw = 40;
double pct = total ? (double)done / (double)total : 0.0;
int filled = (int)(pct * barw + 0.5);
printf("\r[");
for (int i = 0; i < filled; ++i)
putchar('#');
for (int i = filled; i < barw; ++i)
putchar(' ');
printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total);
fflush(stdout);
}
// ----------------------------- Helpers: load/save --------------------------
static int file_exists(const char *path) {
struct stat st;
return (stat(path, &st) == 0);
}
static void save_file_list(const char *list_path) {
FILE *f = fopen(list_path, "w");
if (!f) {
perror("fopen file_list");
return;
}
for (size_t i = 0; i < g_entry_count; ++i) {
fprintf(f, "%s\n", g_entries[i].path);
}
fclose(f);
}
static void load_file_list(const char *list_path) {
FILE *f = fopen(list_path, "r");
if (!f)
return;
char line[MAX_PATHLEN];
while (fgets(line, sizeof(line), f)) {
line[strcspn(line, "\r\n")] = 0;
FileEntry fe;
memset(&fe, 0, sizeof(fe));
fe.path = line;
/* Populate metadata from filesystem */
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
add_entry(&fe);
}
fclose(f);
}
// Read existing hashes into memory map for resume
// Simple linear search mapping: returns 1 if path has hash found (and writes
// into out_hex)
static int find_hash_in_file(const char *hashfile, const char *path,
char *out_hex) {
FILE *f = fopen(hashfile, "r");
if (!f)
return 0;
char p[MAX_PATHLEN];
char h[128];
int found = 0;
while (fscanf(f, "%4095s %127s", p, h) == 2) {
if (strcmp(p, path) == 0) {
strncpy(out_hex, h, HASH_STRLEN);
out_hex[HASH_STRLEN - 1] = 0;
found = 1;
break;
}
}
fclose(f);
return found;
}
// ----------------------------- Get file metadata -------------------------
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
struct stat st;
if (stat(path, &st) == 0) {
*out_created = (uint64_t)st.st_ctime;
*out_modified = (uint64_t)st.st_mtime;
} else {
*out_created = 0;
*out_modified = 0;
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
struct stat st;
if (stat(path, &st) == 0) {
get_file_owner(st.st_uid, out_owner, out_owner_size);
} else {
snprintf(out_owner, out_owner_size, "UNKNOWN");
}
}
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
int resume = 0;
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-resume") == 0) {
resume = 1;
} else {
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0 && !resume) {
printf("Enter folder to process (Enter = current folder): ");
fflush(stdout);
char buf[MAX_PATHLEN];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0)
strcpy(folders[0], ".");
else
strncpy(folders[0], buf, MAX_PATHLEN - 1);
folder_count = 1;
} else if (folder_count == 0 && resume) {
strcpy(folders[0], ".");
folder_count = 1;
}
// -------------------------------
// Display selected folders
// -------------------------------
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Detect hardware threads (CPU cores)
// -------------------------------
size_t hw_threads = 1;
long cpus = sysconf(_SC_NPROCESSORS_ONLN);
if (cpus > 0)
hw_threads = (size_t)cpus;
// Add some extra threads to overlap I/O more aggressively
size_t num_threads = hw_threads * 2;
if (num_threads < 2)
num_threads = 2;
// -------------------------------
// Step 1: Scan all folders
// -------------------------------
if (!resume) {
DirQueue q = {0};
pthread_mutex_init(&q.mutex, NULL);
pthread_cond_init(&q.cond, NULL);
// Seed queue
for (int i = 0; i < folder_count; ++i)
dirqueue_push(&q, folders[i]);
pthread_t *threads = malloc(sizeof(pthread_t) * num_threads);
for (size_t i = 0; i < num_threads; ++i)
pthread_create(&threads[i], NULL, (void *(*)(void *))scan_worker, &q);
for (size_t i = 0; i < num_threads; ++i)
pthread_join(threads[i], NULL);
free(threads);
pthread_mutex_destroy(&q.mutex);
pthread_cond_destroy(&q.cond);
printf("Found %zu files. Saving to %s\n", g_entry_count, FILE_LIST_TXT);
save_file_list(FILE_LIST_TXT);
} else {
if (!file_exists(FILE_LIST_TXT)) {
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
return 1;
}
load_file_list(FILE_LIST_TXT);
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
}
if (g_entry_count == 0) {
printf("No files to process.\n");
return 0;
}
// If resume: create map of which files are already hashed
char **existing_hash = calloc(g_entry_count, sizeof(char *));
for (size_t i = 0; i < g_entry_count; ++i)
existing_hash[i] = NULL;
if (resume && file_exists(FILE_HASHES_TXT)) {
// For simplicity we parse hash file and match lines to list entries.
for (size_t i = 0; i < g_entry_count; ++i) {
char hex[HASH_STRLEN] = {0};
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
existing_hash[i] = strdup(hex);
}
}
}
// Prepare job queue of only missing files (or all if not resume)
JobQueue queue;
jobqueue_init(&queue);
size_t total_jobs = 0;
for (size_t i = 0; i < g_entry_count; ++i) {
if (resume && existing_hash[i])
continue;
Job *j = (Job *)malloc(sizeof(Job));
j->file = &g_entries[i];
j->next = NULL;
jobqueue_push(&queue, j);
++total_jobs;
}
if (total_jobs == 0) {
printf("Nothing to do — all files already hashed.\n");
return 0;
}
// Remove old hashes file if we're recomputing from scratch.
if (!resume) {
// create/overwrite hashes file
FILE *hf = fopen(FILE_HASHES_TXT, "w");
if (hf)
fclose(hf);
} // if resume, we append only missing
// Starting thread pool
atomic_size_t done_counter;
atomic_store(&done_counter, 0);
atomic_int live_workers;
atomic_store(&live_workers, (int)num_threads);
WorkerArg warg = {.queue = &queue,
.done_counter = &done_counter,
.total_jobs = total_jobs,
.live_workers = &live_workers};
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
// Launch threads
pthread_t *tids = malloc(sizeof(pthread_t) * num_threads);
for (size_t i = 0; i < num_threads; ++i) {
pthread_create(&tids[i], NULL, worker_thread_posix, &warg);
}
// Progress / timer
struct timespec tstart, tnow;
clock_gettime(CLOCK_MONOTONIC, &tstart);
size_t last_done = 0;
// ---------- Correct real-time MB/s (stable & accurate) ----------
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
char linebuf[256];
for (;;) {
size_t done = (size_t)atomic_load(&done_counter);
// ---- monotonic time ----
clock_gettime(CLOCK_MONOTONIC, &tnow);
double now =
(tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9;
// ---- bytes so far ----
uint64_t bytes = atomic_load(&g_bytes_processed);
// ---- real sampler (independent of UI sleep) ----
if (last_time == 0.0) {
last_time = now;
last_bytes = bytes;
}
double dt = now - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = now;
}
// ---- progress bar build ----
const int barw = 40;
double pct = total_jobs ? (double)done / (double)total_jobs : 0.0;
int filled = (int)(pct * barw + 0.5);
int p = 0;
p += snprintf(linebuf + p, sizeof(linebuf) - p, "[");
for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, "#");
for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, ".");
snprintf(linebuf + p, sizeof(linebuf) - p,
"] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs,
displayed_speed);
printf("\r%s", linebuf);
fflush(stdout);
if (done >= total_jobs)
break;
usleep(100000);
}
printf("\n\n");
// stop queue and join threads
jobqueue_stop(&queue);
for (size_t i = 0; i < num_threads; ++i)
pthread_join(tids[i], NULL);
// done time
clock_gettime(CLOCK_MONOTONIC, &tnow);
double elapsed =
(tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9;
printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / elapsed;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
// If resume: we appended missing entries. If not resume: we wrote all results
// during workers. Note: This program appends hashes as workers finish. This
// avoids holding all hashes in RAM.
// Cleanup
for (size_t i = 0; i < g_entry_count; ++i)
if (existing_hash[i])
free(existing_hash[i]);
free(existing_hash);
free_entries();
return 0;
}

View File

@@ -1,641 +0,0 @@
#include "base.h"
#include "platform.h"
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_files_found = 0;
static atomic_uint_fast64_t g_files_hashed = 0;
static atomic_uint_fast64_t g_bytes_processed = 0;
static atomic_int g_scan_done = 0;
// ============================= Utils ======================================
// ----------------------------- Timer functions --------------
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
static double timer_stop(HiResTimer *t) {
QueryPerformanceCounter(&t->end);
return (double)(t->end.QuadPart - t->start.QuadPart) /
(double)g_qpc_freq.QuadPart;
}
// ----------------------------- Get instruction set --------------
const char *get_xxhash_instruction_set(void) {
int vecID = XXH_featureTest();
switch (vecID) {
case XXH_SCALAR:
return "Scalar (portable C)";
case XXH_SSE2:
return "SSE2";
case XXH_AVX2:
return "AVX2";
case XXH_AVX512:
return "AVX-512";
default:
return "Unknown";
}
}
// -------------------- Path parsing -------------------
static void normalize_path(char *p) {
char *src = p;
char *dst = p;
int prev_slash = 0;
while (*src) {
char c = *src++;
if (c == '\\' || c == '/') {
if (!prev_slash) {
*dst++ = '/';
prev_slash = 1;
}
} else {
*dst++ = c;
prev_slash = 0;
}
}
*dst = '\0';
}
static int parse_paths(char *line, char folders[][MAX_PATHLEN],
int max_folders) {
int count = 0;
char *p = line;
while (*p && count < max_folders) {
while (*p && isspace((unsigned char)*p))
p++;
if (!*p)
break;
char *start;
char quote = 0;
if (*p == '"' || *p == '\'') {
quote = *p++;
start = p;
while (*p && *p != quote)
p++;
} else {
start = p;
while (*p && !isspace((unsigned char)*p))
p++;
}
size_t len = p - start;
if (len >= MAX_PATHLEN)
len = MAX_PATHLEN - 1;
memcpy(folders[count], start, len);
folders[count][len] = 0;
normalize_path(folders[count]);
count++;
if (quote && *p == quote)
p++;
}
return count;
}
// ----------------------------- Convert filetime to epoch --------------
static uint64_t filetime_to_epoch(const FILETIME *ft) {
ULARGE_INTEGER ull;
ull.LowPart = ft->dwLowDateTime;
ull.HighPart = ft->dwHighDateTime;
// Windows epoch (1601) → Unix epoch (1970)
return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL;
}
// ----------------------------- Format time helper -------------------------
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
#if PLATFORM_WINDOWS
localtime_s(&tm, &tt);
#else
localtime_r(&tt, &tm);
#endif
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// ----------------------------- Resolve file owner ---------------------
static void get_file_owner(const char *path, char *out, size_t out_sz) {
PSID sid = NULL;
PSECURITY_DESCRIPTOR sd = NULL;
if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION,
&sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) {
char name[64], domain[64];
DWORD name_len = sizeof(name);
DWORD domain_len = sizeof(domain);
SID_NAME_USE use;
if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len,
&use)) {
snprintf(out, out_sz, "%s\\%s", domain, name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
} else {
snprintf(out, out_sz, "UNKNOWN");
}
if (sd)
LocalFree(sd);
}
// ----------------------------- Get file metadata -------------------------
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
WIN32_FILE_ATTRIBUTE_DATA fad;
if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) {
*out_created = filetime_to_epoch(&fad.ftCreationTime);
*out_modified = filetime_to_epoch(&fad.ftLastWriteTime);
} else {
*out_created = 0;
*out_modified = 0;
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
get_file_owner(path, out_owner, out_owner_size);
}
// ----------------------------- Scan helpers -----------------------------
void scan_folder_windows_parallel(const char *base, ScannerContext *ctx) {
char search[MAX_PATHLEN];
snprintf(search, sizeof(search), "%s\\*", base);
WIN32_FIND_DATAA fd;
HANDLE h = FindFirstFileA(search, &fd);
if (h == INVALID_HANDLE_VALUE)
return;
do {
if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, ".."))
continue;
char full[MAX_PATHLEN];
snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName);
if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
continue;
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
size_t len = strlen(full) + 1;
char *dir = arena_push(&ctx->path_arena, len, false);
memcpy(dir, full, len);
mpmc_push_work(ctx->dir_queue, dir);
} else {
atomic_fetch_add(&g_files_found, 1);
FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true);
char norm[MAX_PATHLEN];
strncpy(norm, full, sizeof(norm) - 1);
norm[sizeof(norm) - 1] = 0;
normalize_path(norm);
size_t len = strlen(norm) + 1;
char *path = arena_push(&ctx->path_arena, len, false);
memcpy(path, norm, len);
fe->path = path;
platform_get_file_times(full, &fe->created_time, &fe->modified_time);
platform_get_file_owner(full, fe->owner, sizeof(fe->owner));
fe->size_bytes = ((uint64_t)fd.nFileSizeHigh << 32) | fd.nFileSizeLow;
mpmc_push(ctx->file_queue, fe);
}
} while (FindNextFileA(h, &fd));
FindClose(h);
}
// ------------------------- Scan worker --------------------------------
static DWORD WINAPI scan_worker(LPVOID arg) {
ScannerContext *ctx = arg;
for (;;) {
char *dir = mpmc_pop(ctx->dir_queue);
if (!dir)
break;
scan_folder_windows_parallel(dir, ctx);
mpmc_task_done(ctx->dir_queue, ctx->num_threads);
}
return 0;
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex, BYTE *buf) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper.
// On Windows try to use overlapped synchronous chunked reads for higher
// throughput.
HANDLE hFile =
CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
strcpy(out_hex, "ERROR");
return;
}
XXH128_hash_t h;
XXH3_state_t state;
XXH3_128bits_reset(&state);
DWORD read = 0;
BOOL ok;
while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) {
XXH3_128bits_update(&state, buf, (size_t)read);
atomic_fetch_add(&g_bytes_processed, (uint64_t)read);
}
h = XXH3_128bits_digest(&state);
CloseHandle(hFile);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ------------------------- Hash worker --------------------------------
static DWORD WINAPI hash_worker(LPVOID arg) {
WorkerContext *ctx = (WorkerContext *)arg;
BYTE *buf = (BYTE *)malloc(READ_BLOCK);
for (;;) {
FileEntry *fe = mpmc_pop(ctx->file_queue);
if (!fe)
break;
char hash[HASH_STRLEN];
xxh3_hash_file_stream(fe->path, hash, buf);
char created[32], modified[32];
format_time(fe->created_time, created, sizeof(created));
format_time(fe->modified_time, modified, sizeof(modified));
double size_kib = (double)fe->size_bytes / 1024.0;
char stack_buf[1024];
int len =
snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n",
hash, fe->path, size_kib, created, modified, fe->owner);
char *dst = arena_push(&ctx->arena, len, false);
memcpy(dst, stack_buf, len);
atomic_fetch_add(&g_files_hashed, 1);
}
// free(buf); It will be freed by the system when the program exits
return 0;
}
// ----------------------------- Progress display ---------------------------
DWORD WINAPI progress_thread(void *arg) {
LARGE_INTEGER freq, start;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&start);
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
for (;;) {
uint64_t found = atomic_load(&g_files_found);
uint64_t hashed = atomic_load(&g_files_hashed);
uint64_t bytes = atomic_load(&g_bytes_processed);
int scan_done = atomic_load(&g_scan_done);
LARGE_INTEGER now;
QueryPerformanceCounter(&now);
double t = (double)(now.QuadPart - start.QuadPart) / (double)freq.QuadPart;
if (last_time == 0.0) {
last_time = t;
last_bytes = bytes;
}
double dt = t - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = t;
}
if (!scan_done) {
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ",
(unsigned long long)found, (unsigned long long)hashed,
displayed_speed);
} else {
double pct = found ? (double)hashed / (double)found : 0.0;
int barw = 40;
int filled = (int)(pct * barw);
char bar[64];
int p = 0;
bar[p++] = '[';
for (int i = 0; i < filled; i++)
bar[p++] = '#';
for (int i = filled; i < barw; i++)
bar[p++] = '.';
bar[p++] = ']';
bar[p] = 0;
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0,
(unsigned long long)hashed, (unsigned long long)found,
displayed_speed);
}
fflush(stdout);
if (scan_done && hashed == found)
break;
Sleep(100);
}
printf("\n");
return 0;
}
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (folder_count < 64) {
normalize_path(argv[i]);
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0) {
printf("Enter folders to process (Enter = current folder): ");
fflush(stdout);
char buf[KiB(32)];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0) {
strcpy(folders[0], ".");
folder_count = 1;
} else {
folder_count = parse_paths(buf, folders, 64);
}
}
// Display selected folders
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Scanning and total timer init
// -------------------------------
timer_init();
HiResTimer total_timer;
HiResTimer scan_timer;
timer_start(&total_timer);
timer_start(&scan_timer);
// -------------------------------
// Creating a general purpose arena
// -------------------------------
arena_params params = {
.reserve_size = GiB(1),
.commit_size = MiB(16),
.align = 0,
.push_size = 0,
.allow_free_list = true,
.allow_swapback = false,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 1,
};
mem_arena *gp_arena = arena_create(&params);
// -------------------------------
// Detect hardware threads
// -------------------------------
size_t hw_threads = 1;
// --- Windows: detect PHYSICAL cores (not logical threads) ---
DWORD len = 0;
GetLogicalProcessorInformation(NULL, &len);
SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)arena_push(&gp_arena, len, true);
if (GetLogicalProcessorInformation(buf, &len)) {
DWORD count = 0;
DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
for (DWORD i = 0; i < n; i++) {
if (buf[i].Relationship == RelationProcessorCore)
count++;
}
if (count > 0)
hw_threads = count;
}
arena_free(&gp_arena, (u8 **)&buf, len);
// Logical threads = CPU cores * 2
size_t num_threads = hw_threads * 2;
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
// -------------------------------
// Scanning and hashing
// -------------------------------
MPMCQueue dir_queue;
mpmc_init(&dir_queue, MiB(1));
MPMCQueue file_queue;
mpmc_init(&file_queue, MiB(1));
// starting hash threads
size_t num_hash_threads = num_threads;
WorkerContext workers[num_hash_threads];
HANDLE *hash_threads =
arena_push(&gp_arena, sizeof(HANDLE) * num_hash_threads, true);
for (size_t i = 0; i < num_hash_threads; ++i) {
workers[i].arena = arena_create(&params);
workers[i].file_queue = &file_queue;
hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL);
}
// starting progress printing thread
HANDLE progress = CreateThread(NULL, 0, progress_thread, NULL, 0, NULL);
// starting scan threads
size_t num_scan_threads = num_threads;
ScannerContext scanners[num_scan_threads];
HANDLE *scan_threads =
arena_push(&gp_arena, sizeof(HANDLE) * num_scan_threads, true);
for (size_t i = 0; i < num_scan_threads; i++) {
scanners[i].num_threads = num_scan_threads;
scanners[i].path_arena = arena_create(&params);
scanners[i].meta_arena = arena_create(&params);
scanners[i].dir_queue = &dir_queue;
scanners[i].file_queue = &file_queue;
scan_threads[i] = CreateThread(NULL, 0, scan_worker, &scanners[i], 0, NULL);
}
// Initial folder push
for (int i = 0; i < folder_count; i++) {
size_t len = strlen(folders[i]) + 1;
char *path = arena_push(&scanners[0].path_arena, len, false);
memcpy(path, folders[i], len);
mpmc_push_work(&dir_queue, path);
}
// Stop scan threads
WaitForMultipleObjects((DWORD)num_scan_threads, scan_threads, TRUE, INFINITE);
for (size_t i = 0; i < num_scan_threads; ++i)
CloseHandle(scan_threads[i]);
mpmc_producers_finished(&file_queue, num_hash_threads);
atomic_store(&g_scan_done, 1);
arena_free(&gp_arena, (u8 **)&scan_threads,
sizeof(HANDLE) * num_scan_threads);
double scan_seconds = timer_stop(&scan_timer);
size_t total_found = atomic_load(&g_files_found);
printf("\r%*s\r", 120, ""); // clear_console_line
printf("Completed scanning in %.2f seconds, found %zu files\n\n",
scan_seconds, total_found);
// If no files found
if (total_found == 0) {
printf("No files found.\n");
return 0;
}
// Stop hashing threads
WaitForMultipleObjects((DWORD)num_hash_threads, hash_threads, TRUE, INFINITE);
for (size_t i = 0; i < num_hash_threads; ++i)
CloseHandle(hash_threads[i]);
arena_free(&gp_arena, (u8 **)&hash_threads,
sizeof(HANDLE) * num_hash_threads);
// Stop progress printing thread
WaitForSingleObject(progress, INFINITE);
CloseHandle(progress);
// -------------------------------
// Export file_hashes.txt
// -------------------------------
FILE *f = fopen(FILE_HASHES_TXT, "wb");
for (int i = 0; i < num_threads; i++) {
mem_arena *arena = workers[i].arena;
u8 *arena_base =
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
fwrite(arena_base, 1, arena->pos, f);
}
fclose(f);
// -------------------------------
// Print summary
// -------------------------------
double total_seconds = timer_stop(&total_timer);
printf("Completed hashing %zu files\n", total_found);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / total_seconds;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n\n", total_seconds);
return 0;
}