diff --git a/README.md b/README.md index e0aa47b..1b7f080 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,24 @@ # filehasher -Collects some metadata and hashes files. \ No newline at end of file +Collects some metadata and hashes files. + +## Building: +### Windows: +#### Release: +clang-cl /O3 file_hasher.c xxh_x86dispatch.c advapi32.lib +clang -O3 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher +gcc -O3 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher + +#### Debug: +clang-cl /Zi /Od file_hasher.c xxh_x86dispatch.c advapi32.lib +clang -g -O0 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher +gcc -g -O0 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher + +### Linux: +#### Release: +clang -O3 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher +gcc -O3 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher + +#### Debug: +clang -g -O0 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher +gcc -g -O0 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher diff --git a/base.h b/base.h index 2ac8251..675e2d1 100644 --- a/base.h +++ b/base.h @@ -146,6 +146,9 @@ static void plat_sem_destroy(plat_sem *s) { } } +// Sleep +static void sleep_ms(int ms) { Sleep(ms); } + #elif defined(__linux__) // Memory allocation @@ -211,4 +214,7 @@ static void plat_sem_post(plat_sem *s, u32 count) { static void plat_sem_destroy(plat_sem *s) { sem_destroy(&s->sem); } +// Sleep +static void sleep_ms(int ms) { usleep(ms * 1000); } + #endif diff --git a/binaries/changelog.txt b/binaries/changelog.txt index 2f24deb..049d2ff 100644 --- a/binaries/changelog.txt +++ b/binaries/changelog.txt @@ -45,3 +45,7 @@ Replacing DirQueue, a queue growable with realloc with the MPMC queue 4.1: Using xxhash xxh_x86dispatch to select the best SIMD instruction set at runtime, this dispatcher can not be added in a unity build and we must remove AVX2 or AVX512 compilation flags, link xxh_x86dispatch.c in the compilation command. The compilaiton throws two warnings about function with internal linkage but not defined, they are defined in xxh_x86dispatch.c so it's harmless warnings Fixing user prompt parsing + +4.5: Porting to linux +Reorganising the code +Improving the scan function diff --git a/file_hasher.c b/file_hasher.c index 9080a36..8a2b57f 100644 --- a/file_hasher.c +++ b/file_hasher.c @@ -1,7 +1,223 @@ -#define _CRT_SECURE_NO_WARNINGS +#include "platform.c" -#if defined(_WIN32) || defined(_WIN64) -#include "platform_windows.c" -#else -#include "platform_posix.c" -#endif +// ----------------------------- Main --------------------------------------- +int main(int argc, char **argv) { + char folders[64][MAX_PATHLEN]; // up to 64 input folders + int folder_count = 0; + + // ------------------------------- + // Parse arguments + // ------------------------------- + for (int i = 1; i < argc; ++i) { + if (folder_count < 64) { + normalize_path(argv[i]); + strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); + folders[folder_count][MAX_PATHLEN - 1] = 0; + folder_count++; + } + } + + // ------------------------------- + // Ask user if no folders provided + // ------------------------------- + if (folder_count == 0) { + printf("Enter folders to process (Enter = current folder): "); + fflush(stdout); + + char buf[KiB(32)]; + + if (!fgets(buf, sizeof(buf), stdin)) + return 1; + + buf[strcspn(buf, "\r\n")] = 0; + + if (buf[0] == 0) { + strcpy(folders[0], "."); + folder_count = 1; + } else { + folder_count = parse_paths(buf, folders, 64); + } + } + + // Display selected folders + printf("Processing %d folder(s):\n", folder_count); + for (int i = 0; i < folder_count; ++i) { + printf(" - %s\n", folders[i]); + } + + // ------------------------------- + // Scanning and total timer init + // ------------------------------- + timer_init(); + + HiResTimer total_timer; + HiResTimer scan_timer; + + timer_start(&total_timer); + timer_start(&scan_timer); + + // ------------------------------- + // Creating a general purpose arena + // ------------------------------- + arena_params params = { + .reserve_size = GiB(1), + .commit_size = MiB(16), + .align = 0, + .push_size = 0, + .allow_free_list = true, + .allow_swapback = false, + .growth_policy = ARENA_GROWTH_NORMAL, + .commit_policy = ARENA_COMMIT_LAZY, + .max_nbre_blocks = 1, + }; + + mem_arena *gp_arena = arena_create(¶ms); + + // ------------------------------- + // Detect hardware threads + // ------------------------------- + // --- Windows: detect PHYSICAL cores (not logical threads) --- + size_t hw_threads = platform_physical_cores(); + + // Logical threads = CPU cores * 2 + size_t num_threads = hw_threads * 2; + + printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads, + hw_threads); + printf(" Selected instruction set: %s\n", get_xxhash_instruction_set()); + + // ------------------------------- + // Scanning and hashing + // ------------------------------- + MPMCQueue dir_queue; + mpmc_init(&dir_queue, MiB(1)); + + MPMCQueue file_queue; + mpmc_init(&file_queue, MiB(1)); + + // Starting hash threads + size_t num_hash_threads = num_threads; + + WorkerContext workers[num_hash_threads]; + Thread *hash_threads = + arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true); + + for (size_t i = 0; i < num_hash_threads; ++i) { + workers[i].arena = arena_create(¶ms); + workers[i].file_queue = &file_queue; + + if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker, &workers[i]) != + 0) { + fprintf(stderr, "Failed to create hash thread %zu\n", i); + exit(1); + } + } + + // Starting progress printing thread + Thread progress_thread_handle; + if (thread_create(&progress_thread_handle, (ThreadFunc)progress_thread, + NULL) != 0) { + fprintf(stderr, "Failed to create progress thread\n"); + exit(1); + } + + // Starting scan threads + size_t num_scan_threads = num_threads; + + ScannerContext scanners[num_scan_threads]; + Thread *scan_threads = + arena_push(&gp_arena, sizeof(Thread) * num_scan_threads, true); + + for (size_t i = 0; i < num_scan_threads; i++) { + scanners[i].num_threads = num_scan_threads; + scanners[i].path_arena = arena_create(¶ms); + scanners[i].meta_arena = arena_create(¶ms); + scanners[i].dir_queue = &dir_queue; + scanners[i].file_queue = &file_queue; + + if (thread_create(&scan_threads[i], (ThreadFunc)scan_worker, + &scanners[i]) != 0) { + fprintf(stderr, "Failed to create scan thread %zu\n", i); + exit(1); + } + } + + // Initial folder push + for (int i = 0; i < folder_count; i++) { + size_t len = strlen(folders[i]) + 1; + char *path = arena_push(&scanners[0].path_arena, len, false); + memcpy(path, folders[i], len); + mpmc_push_work(&dir_queue, path); + } + + // Stop scan threads + thread_wait_multiple(scan_threads, num_scan_threads); + + for (size_t i = 0; i < num_scan_threads; ++i) { + thread_close(&scan_threads[i]); + } + + mpmc_producers_finished(&file_queue, num_hash_threads); + + atomic_store(&g_scan_done, 1); + + arena_free(&gp_arena, (u8 **)&scan_threads, + sizeof(Thread) * num_scan_threads); + + double scan_seconds = timer_elapsed(&scan_timer); + size_t total_found = atomic_load(&g_files_found); + + printf("\r%*s\r", 120, ""); // clear_console_line + printf("Completed scanning in %.2f seconds, found %zu files\n\n", + scan_seconds, total_found); + + // If no files found + if (total_found == 0) { + printf("No files found.\n"); + return 0; + } + + // Stop hashing threads + thread_wait_multiple(hash_threads, num_hash_threads); + + for (size_t i = 0; i < num_hash_threads; ++i) { + thread_close(&hash_threads[i]); + } + + arena_free(&gp_arena, (u8 **)&hash_threads, + sizeof(Thread) * num_hash_threads); + + // Stop progress printing thread + thread_join(&progress_thread_handle); + thread_close(&progress_thread_handle); + + // ------------------------------- + // Export file_hashes.txt + // ------------------------------- + + FILE *f = fopen(FILE_HASHES_TXT, "wb"); + + for (int i = 0; i < num_threads; i++) { + mem_arena *arena = workers[i].arena; + u8 *arena_base = + (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align); + fwrite(arena_base, 1, arena->pos, f); + } + + fclose(f); + + // ------------------------------- + // Print summary + // ------------------------------- + double total_seconds = timer_elapsed(&total_timer); + + printf("Completed hashing %zu files\n", total_found); + + uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed); + double total_mb = (double)total_bytes / (1024.0 * 1024.0); + double avg_mbps = total_mb / total_seconds; + printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); + printf(" Total time : %.2f seconds\n\n", total_seconds); + + return 0; +} diff --git a/lf_mpmc.h b/lf_mpmc.h index 0524923..2288f26 100644 --- a/lf_mpmc.h +++ b/lf_mpmc.h @@ -173,11 +173,11 @@ static void mpmc_push(MPMCQueue *q, void *item) { } else if (diff < 0) { // queue actually full - Sleep(1000); + sleep_ms(1000); } else { // waiting to grow - Sleep(0); + sleep_ms(0); } } @@ -220,11 +220,11 @@ static void mpmc_push_work(MPMCQueue *q, void *item) { } else if (diff < 0) { // queue actually full - Sleep(1000); + sleep_ms(1000); } else { // waiting to grow - Sleep(0); + sleep_ms(0); } } @@ -264,7 +264,7 @@ static void *mpmc_pop(MPMCQueue *q) { } else { // slot is still transitioning (written by another thread) if (++spins > 10) { - Sleep(0); // yield CPU + sleep_ms(0); // yield CPU spins = 0; } else { cpu_pause(); diff --git a/platform.c b/platform.c new file mode 100644 index 0000000..162e985 --- /dev/null +++ b/platform.c @@ -0,0 +1,942 @@ +#pragma once // ensure that a given header file is included only once in a + // single compilation unit +#define _CRT_SECURE_NO_WARNINGS + +#include "arena.h" +#include "base.h" +#include "lf_mpmc.h" + +#include "arena.c" + +// xxhash include +#define XXH_INLINE_ALL +#include "xxh_x86dispatch.h" + +// ----------------------------- Config ------------------------------------- +#define FILE_HASHES_TXT "file_hashes.txt" +#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null +#define MAX_PATHLEN 4096 +#define READ_BLOCK (KiB(64)) + +// ----------------------------- Globals ------------------------------------ +static atomic_uint_fast64_t g_files_found = 0; +static atomic_uint_fast64_t g_files_hashed = 0; +static atomic_uint_fast64_t g_bytes_processed = 0; +static atomic_int g_scan_done = 0; + +// ================== OS-agnostic functions abstraction ===================== +// ----------------------------- Timer functions -------------- +typedef struct { + u64 start; + u64 now; +} HiResTimer; + +#if defined(_WIN32) || defined(_WIN64) + +static LARGE_INTEGER g_freq; + +static void timer_init(void) { QueryPerformanceFrequency(&g_freq); } + +static void timer_start(HiResTimer *t) { + LARGE_INTEGER v; + QueryPerformanceCounter(&v); + t->start = v.QuadPart; +} + +static double timer_elapsed(HiResTimer *t) { + LARGE_INTEGER v; + QueryPerformanceCounter(&v); + t->now = v.QuadPart; + + return (double)(t->now - t->start) / (double)g_freq.QuadPart; +} + +#elif defined(__linux__) + +void timer_init(void) {} + +void timer_start(HiResTimer *t) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + t->start = ts.tv_sec * 1000000000ULL + ts.tv_nsec; +} + +double timer_elapsed(HiResTimer *t) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + + uint64_t now = ts.tv_sec * 1000000000ULL + ts.tv_nsec; + + return (double)(now - t->start) / 1e9; +} + +#endif + +// ----------------------------- Get HW info -------------- +#if defined(_WIN32) || defined(_WIN64) + +size_t platform_physical_cores(void) { + DWORD len = 0; + GetLogicalProcessorInformation(NULL, &len); + + SYSTEM_LOGICAL_PROCESSOR_INFORMATION buf[len]; + + GetLogicalProcessorInformation(buf, &len); + DWORD count = 0; + DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + for (DWORD i = 0; i < n; i++) { + if (buf[i].Relationship == RelationProcessorCore) + count++; + } + return count ? count : 1; +} + +#elif defined(__linux__) + +size_t platform_physical_cores(void) { + long n = sysconf(_SC_NPROCESSORS_ONLN); + return n > 0 ? (size_t)n : 1; +} + +#endif + +const char *get_xxhash_instruction_set(void) { + int vecID = XXH_featureTest(); + + switch (vecID) { + case XXH_SCALAR: + return "Scalar (portable C)"; + case XXH_SSE2: + return "SSE2"; + case XXH_AVX2: + return "AVX2"; + case XXH_AVX512: + return "AVX-512"; + default: + return "Unknown"; + } +} + +// -------------------- File IO ------------------- + +#if defined(_WIN32) || defined(_WIN64) +typedef HANDLE FileHandle; +#define INVALID_FILE_HANDLE INVALID_HANDLE_VALUE + +// File open function +static FileHandle os_file_open(const char *path) { + return CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); +} + +// File read function +static int os_file_read(FileHandle handle, void *buf, size_t count, + uint64_t *bytes_read) { + DWORD read = 0; + BOOL result = ReadFile(handle, buf, (DWORD)count, &read, NULL); + *bytes_read = read; + return (result && read > 0) ? 0 : -1; +} + +// File close function +static void os_file_close(FileHandle handle) { CloseHandle(handle); } + +#elif defined(__linux__) +typedef int FileHandle; +#define INVALID_FILE_HANDLE (-1) + +// File open function +static FileHandle os_file_open(const char *path) { + return open(path, O_RDONLY | O_NOFOLLOW); +} + +// File read function +static int os_file_read(FileHandle handle, void *buf, size_t count, + uint64_t *bytes_read) { + ssize_t result = read(handle, buf, count); + if (result >= 0) { + *bytes_read = (uint64_t)result; + return 0; + } + *bytes_read = 0; + return -1; +} + +// File close function +static void os_file_close(FileHandle handle) { close(handle); } + +#endif + +// -------------------- Thread abstraction ------------------- +// Threads context +typedef struct { + u8 num_threads; + + mem_arena *path_arena; + mem_arena *meta_arena; + + MPMCQueue *dir_queue; + MPMCQueue *file_queue; +} ScannerContext; + +typedef struct { + mem_arena *arena; + MPMCQueue *file_queue; +} WorkerContext; + +#if defined(_WIN32) || defined(_WIN64) +typedef HANDLE ThreadHandle; +typedef DWORD(WINAPI *ThreadFunc)(void *); +#define THREAD_RETURN DWORD WINAPI +#define THREAD_RETURN_VALUE 0; + +typedef struct { + ThreadHandle handle; + int valid; // Track if thread was successfully created +} Thread; + +// Thread function wrapper to handle different return types +#define THREAD_FUNCTION(name) DWORD WINAPI name(LPVOID arg) + +// Thread creation function +static int thread_create(Thread *thread, ThreadFunc func, void *arg) { + thread->handle = + CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, NULL); + return (thread->handle != NULL) ? 0 : -1; +} + +// Thread join function +static int thread_join(Thread *thread) { + return (WaitForSingleObject(thread->handle, INFINITE) == WAIT_OBJECT_0) ? 0 + : -1; +} + +// Thread close/detach function +static void thread_close(Thread *thread) { CloseHandle(thread->handle); } + +// Wait for multiple threads +static int thread_wait_multiple(Thread *threads, size_t count) { + HANDLE handles[64]; // Max 64 threads for Windows + for (size_t i = 0; i < count; i++) { + handles[i] = threads[i].handle; + } + return (WaitForMultipleObjects((DWORD)count, handles, TRUE, INFINITE) == + WAIT_OBJECT_0) + ? 0 + : -1; +} + +#elif defined(__linux__) +typedef pthread_t ThreadHandle; +typedef void *(*ThreadFunc)(void *); +#define THREAD_RETURN void * +#define THREAD_RETURN_VALUE NULL; + +typedef struct { + ThreadHandle handle; + int valid; // Track if thread was successfully created +} Thread; + +// Thread function wrapper to handle different return types +typedef struct { + void *(*func)(void *); + void *arg; +} ThreadWrapper; + +static void *thread_start_routine(void *arg) { + ThreadWrapper *wrapper = (ThreadWrapper *)arg; + void *result = wrapper->func(wrapper->arg); + free(wrapper); + return result; +} + +// Thread creation function +static int thread_create(Thread *thread, ThreadFunc func, void *arg) { + int ret = pthread_create(&thread->handle, NULL, func, arg); + if (ret == 0) { + thread->valid = 1; + } + return ret; +} + +// Thread join function +static int thread_join(Thread *thread) { + int ret = pthread_join(thread->handle, NULL); + thread->valid = 0; + return ret; +} + +// Thread close/detach function +static void thread_close(Thread *thread) { + if (thread->valid) { + pthread_detach(thread->handle); + thread->valid = 0; + } +} + +// Wait for multiple threads +static int thread_wait_multiple(Thread *threads, size_t count) { + for (size_t i = 0; i < count; i++) { + if (thread_join(&threads[i]) != 0) { + return -1; + } + } + return 0; +} + +#endif + +// ======================== Get file metadata ======================== +// -------------------- Path parsing ------------------- +static void normalize_path(char *p) { + char *src = p; + char *dst = p; + int prev_slash = 0; + + while (*src) { + char c = *src++; + + if (c == '\\' || c == '/') { + if (!prev_slash) { + *dst++ = '/'; + prev_slash = 1; + } + } else { + *dst++ = c; + prev_slash = 0; + } + } + + *dst = '\0'; +} + +static int parse_paths(char *line, char folders[][MAX_PATHLEN], + int max_folders) { + int count = 0; + char *p = line; + + while (*p && count < max_folders) { + + while (*p && isspace((unsigned char)*p)) + p++; + + if (!*p) + break; + + char *start; + char quote = 0; + + if (*p == '"' || *p == '\'') { + quote = *p++; + start = p; + + while (*p && *p != quote) + p++; + } else { + start = p; + + while (*p && !isspace((unsigned char)*p)) + p++; + } + + size_t len = p - start; + if (len >= MAX_PATHLEN) + len = MAX_PATHLEN - 1; + + memcpy(folders[count], start, len); + folders[count][len] = 0; + + normalize_path(folders[count]); + + count++; + + if (quote && *p == quote) + p++; + } + return count; +} + +// ----------------------------- File time ------------------------- +#if defined(_WIN32) || defined(_WIN64) +static void format_time(uint64_t t, char *out, size_t out_sz) { + if (t == 0) { + snprintf(out, out_sz, "N/A"); + return; + } + + time_t tt = (time_t)t; + struct tm tm; + + localtime_s(&tm, &tt); + + strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); +} +// ----------------------------- Convert filetime to epoch -------------- +static uint64_t filetime_to_epoch(const FILETIME *ft) { + ULARGE_INTEGER ull; + ull.LowPart = ft->dwLowDateTime; + ull.HighPart = ft->dwHighDateTime; + + // Windows epoch (1601) ¬ニメ Unix epoch (1970) + return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL; +} + +void platform_get_file_times(const char *path, uint64_t *out_created, + uint64_t *out_modified) { + WIN32_FILE_ATTRIBUTE_DATA fad; + if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { + *out_created = filetime_to_epoch(&fad.ftCreationTime); + *out_modified = filetime_to_epoch(&fad.ftLastWriteTime); + } else { + *out_created = 0; + *out_modified = 0; + } +} + +#elif defined(__linux__) +static void format_time(uint64_t t, char *out, size_t out_sz) { + if (t == 0) { + snprintf(out, out_sz, "N/A"); + return; + } + + time_t tt = (time_t)t; + struct tm tm; + + localtime_r(&tt, &tm); + + strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); +} + +void platform_get_file_times(const char *path, uint64_t *out_created, + uint64_t *out_modified) { + struct stat st; + if (stat(path, &st) == 0) { + *out_created = (uint64_t)st.st_ctime; + *out_modified = (uint64_t)st.st_mtime; + } else { + *out_created = 0; + *out_modified = 0; + } +} + +#endif + +// ----------------------------- File owner --------------------- +#if defined(_WIN32) || defined(_WIN64) +static void get_file_owner(const char *path, char *out, size_t out_sz) { + PSID sid = NULL; + PSECURITY_DESCRIPTOR sd = NULL; + + if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION, + &sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) { + + char name[64], domain[64]; + DWORD name_len = sizeof(name); + DWORD domain_len = sizeof(domain); + SID_NAME_USE use; + + if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len, + &use)) { + snprintf(out, out_sz, "%s\\%s", domain, name); + } else { + snprintf(out, out_sz, "UNKNOWN"); + } + } else { + snprintf(out, out_sz, "UNKNOWN"); + } + + if (sd) + LocalFree(sd); +} + +void platform_get_file_owner(const char *path, char *out_owner, + size_t out_owner_size) { + get_file_owner(path, out_owner, out_owner_size); +} + +#elif defined(__linux__) +static void get_file_owner(uid_t uid, char *out, size_t out_sz) { + struct passwd *pw = getpwuid(uid); + if (pw) { + snprintf(out, out_sz, "%s", pw->pw_name); + } else { + snprintf(out, out_sz, "UNKNOWN"); + } +} + +void platform_get_file_owner(const char *path, char *out_owner, + size_t out_owner_size) { + struct stat st; + if (stat(path, &st) == 0) { + get_file_owner(st.st_uid, out_owner, out_owner_size); + } else { + snprintf(out_owner, out_owner_size, "UNKNOWN"); + } +} + +#endif + +// ----------------------------- Scan helpers ----------------------------- +typedef struct FileEntry { + char *path; + + uint64_t size_bytes; + uint64_t created_time; // epoch + uint64_t modified_time; // epoch seconds + char owner[128]; // resolved owner name +} FileEntry; + +typedef struct { + char buffer[MAX_PATHLEN]; + char *base_end; // Points to end of base path + char *filename_pos; // Points to where filename should be written + size_t base_len; +} PathBuilder; + +static void path_builder_init(PathBuilder *pb, const char *base) { + pb->base_len = strlen(base); + memcpy(pb->buffer, base, pb->base_len); + pb->base_end = pb->buffer + pb->base_len; + +#if defined(_WIN32) || defined(_WIN64) + *pb->base_end = '\\'; +#elif defined(__linux__) + *pb->base_end = '/'; +#endif + + // Ensure null termination + *(pb->base_end + 1) = '\0'; + pb->filename_pos = pb->base_end + 1; +} + +static void path_builder_set_filename(PathBuilder *pb, const char *filename, + size_t name_len) { + memcpy(pb->filename_pos, filename, name_len); + pb->filename_pos[name_len] = '\0'; // Ensure null termination +} + +static char *path_builder_dup_arena(PathBuilder *pb, mem_arena *arena, + bool zero) { + // Calculate total length including base + separator + filename + null + // terminator + size_t total_len = + (pb->filename_pos - pb->buffer) + strlen(pb->filename_pos) + 1; + char *dup = arena_push(&arena, total_len, zero); + memcpy(dup, pb->buffer, total_len); + return dup; +} + +#if defined(_WIN32) || defined(_WIN64) +void scan_folder(const char *base, ScannerContext *ctx) { + PathBuilder pb; + path_builder_init(&pb, base); + + char search[MAX_PATHLEN]; + memcpy(search, pb.buffer, pb.base_len + 1); // Copy base + separator + memcpy(search + pb.base_len + 1, "*", 2); // Add "*" and null + + WIN32_FIND_DATAA fd; + HANDLE h = FindFirstFileA(search, &fd); + if (h == INVALID_HANDLE_VALUE) + return; + + do { + // Skip . and .. + if (fd.cFileName[0] == '.' && + (fd.cFileName[1] == 0 || + (fd.cFileName[1] == '.' && fd.cFileName[2] == 0))) + continue; + + if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + continue; + + size_t name_len = strlen(fd.cFileName); + path_builder_set_filename(&pb, fd.cFileName, name_len); + + if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + char *dir = path_builder_dup_arena(&pb, ctx->path_arena, false); + mpmc_push_work(ctx->dir_queue, dir); + } else { + atomic_fetch_add(&g_files_found, 1); + + FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true); + + // Create a temporary copy for normalization to avoid corrupting pb.buffer + char temp_path[MAX_PATHLEN]; + memcpy(temp_path, pb.buffer, + (pb.filename_pos - pb.buffer) + name_len + 1); + normalize_path(temp_path); + + fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, false); + strcpy(fe->path, temp_path); + + platform_get_file_times(pb.buffer, &fe->created_time, &fe->modified_time); + platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner)); + fe->size_bytes = ((uint64_t)fd.nFileSizeHigh << 32) | fd.nFileSizeLow; + + mpmc_push(ctx->file_queue, fe); + } + + } while (FindNextFileA(h, &fd)); + + FindClose(h); +} + +#elif defined(__linux__) +To test +Choice 1 +static int platform_get_file_times_fd(int dir_fd, const char *name, + time_t *created, time_t *modified) { + struct stat st; + if (fstatat(dir_fd, name, &st, 0) == 0) { + *created = st.st_ctime; // or st.st_birthtime on systems that support it + *modified = st.st_mtime; + return 0; + } + return -1; +} + +static int platform_get_file_owner_fd(int dir_fd, const char *name, char *owner, + size_t owner_size) { + struct stat st; + if (fstatat(dir_fd, name, &st, 0) == 0) { + struct passwd pw; + struct passwd *result; + char buffer[4096]; // Sufficiently large buffer for passwd data + + // Reentrant version (thread-safe) + if (getpwuid_r(st.st_uid, &pw, buffer, sizeof(buffer), &result) == 0 && + result != NULL && result->pw_name != NULL) { + strncpy(owner, result->pw_name, owner_size - 1); + owner[owner_size - 1] = '\0'; + } else { + // Fallback to uid + snprintf(owner, owner_size, "uid:%d", st.st_uid); + } + return 0; + } + return -1; + + +void scan_folder(const char *base, ScannerContext *ctx) { + PathBuilder pb; + path_builder_init(&pb, base); + + int dir_fd = open(base, O_RDONLY | O_DIRECTORY | O_NOFOLLOW); + if (dir_fd == -1) + return; + + DIR *dir = fdopendir(dir_fd); + if (!dir) { + close(dir_fd); + return; + } + + struct dirent *entry; + + while ((entry = readdir(dir)) != NULL) { + if (entry->d_name[0] == '.' && + (entry->d_name[1] == 0 || + (entry->d_name[1] == '.' && entry->d_name[2] == 0))) + continue; + + size_t name_len = strlen(entry->d_name); + path_builder_set_filename(&pb, entry->d_name, name_len); + + int file_type = DT_UNKNOWN; +#ifdef _DIRENT_HAVE_D_TYPE + file_type = entry->d_type; +#endif + + // Fast path using d_type + if (file_type != DT_UNKNOWN) { + if (file_type == DT_LNK) + continue; // Skip symlinks + + if (file_type == DT_DIR) { + char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false); + mpmc_push_work(ctx->dir_queue, dir_path); + continue; + } + + if (file_type == DT_REG) { + atomic_fetch_add(&g_files_found, 1); + FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), + true); + + // Use fstatat for file info + struct stat st; + if (fstatat(dir_fd, entry->d_name, &st, 0) == 0) { + // Convert times using fd variant + platform_get_file_times_fd(dir_fd, entry->d_name, + &fe->created_time, + &fe->modified_time); + platform_get_file_owner_fd(dir_fd, entry->d_name, fe->owner, + sizeof(fe->owner)); + fe->size_bytes = (uint64_t)st.st_size; + + // Normalize path + char temp_path[MAX_PATHLEN]; + memcpy(temp_path, pb.buffer, + (pb.filename_pos - pb.buffer) + name_len + 1); + normalize_path(temp_path); + + fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, + false); strcpy(fe->path, temp_path); + + mpmc_push(ctx->file_queue, fe); + } + continue; + } + } + + // Fallback for unknown types + struct stat st; + if (fstatat(dir_fd, entry->d_name, &st, AT_SYMLINK_NOFOLLOW) == 0) { + if (S_ISLNK(st.st_mode)) + continue; + + if (S_ISDIR(st.st_mode)) { + char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false); + mpmc_push_work(ctx->dir_queue, dir_path); + } else if (S_ISREG(st.st_mode)) { + atomic_fetch_add(&g_files_found, 1); + FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), + true); + + platform_get_file_times(pb.buffer, &fe->created_time, + &fe->modified_time); + platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner)); + fe->size_bytes = (uint64_t)st.st_size; + + char temp_path[MAX_PATHLEN]; + memcpy(temp_path, pb.buffer, + (pb.filename_pos - pb.buffer) + name_len + 1); + normalize_path(temp_path); + + fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, + false); strcpy(fe->path, temp_path); + + mpmc_push(ctx->file_queue, fe); + } + } + } + + closedir(dir); // Closes dir_fd automatically +} + +// Choice 2 + +// void scan_folder(const char *base, ScannerContext *ctx) { +// PathBuilder pb; +// path_builder_init(&pb, base); +// +// DIR *dir = opendir(base); +// if (!dir) +// return; +// +// struct dirent *entry; +// struct stat st; +// +// while ((entry = readdir(dir)) != NULL) { +// if (entry->d_name[0] == '.' && +// (entry->d_name[1] == 0 || +// (entry->d_name[1] == '.' && entry->d_name[2] == 0))) +// continue; +// +// size_t name_len = strlen(entry->d_name); +// path_builder_set_filename(&pb, entry->d_name, name_len); +// +// if (lstat(pb.buffer, &st) == 0 && S_ISLNK(st.st_mode)) +// continue; +// +// if (stat(pb.buffer, &st) == 0) { +// if (S_ISDIR(st.st_mode)) { +// char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false); +// mpmc_push_work(ctx->dir_queue, dir_path); +// } else { +// atomic_fetch_add(&g_files_found, 1); +// +// FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true); +// +// // Create a temporary copy for normalization +// char temp_path[MAX_PATHLEN]; +// memcpy(temp_path, pb.buffer, +// (pb.filename_pos - pb.buffer) + name_len + 1); +// normalize_path(temp_path); +// +// fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, false); +// strcpy(fe->path, temp_path); +// +// platform_get_file_times(pb.buffer, &fe->created_time, +// &fe->modified_time); +// platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner)); +// fe->size_bytes = (uint64_t)st.st_size; +// +// mpmc_push(ctx->file_queue, fe); +// } +// } +// } +// +// closedir(dir); +// } + +#endif + +// ------------------------- Scan worker -------------------------------- +static THREAD_RETURN scan_worker(void *arg) { + ScannerContext *ctx = (ScannerContext *)arg; + + for (;;) { + char *dir = mpmc_pop(ctx->dir_queue); + if (!dir) + break; + + scan_folder(dir, ctx); + + mpmc_task_done(ctx->dir_queue, ctx->num_threads); + } + + return THREAD_RETURN_VALUE; +} + +// ----------------------------- Hashing helpers ----------------------------- +static void xxh3_hash_file_stream(const char *path, char *out_hex, + unsigned char *buf) { + XXH128_hash_t h; + XXH3_state_t state; + XXH3_128bits_reset(&state); + + FileHandle handle = os_file_open(path); + if (handle == INVALID_FILE_HANDLE) { + strcpy(out_hex, "ERROR"); + return; + } + + uint64_t bytes_read; + while (os_file_read(handle, buf, READ_BLOCK, &bytes_read) == 0 && + bytes_read > 0) { + XXH3_128bits_update(&state, buf, (size_t)bytes_read); + atomic_fetch_add(&g_bytes_processed, bytes_read); + } + + os_file_close(handle); + + h = XXH3_128bits_digest(&state); + snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, + (unsigned long long)h.low64); +} + +// ------------------------- Hash worker -------------------------------- +static THREAD_RETURN hash_worker(void *arg) { + WorkerContext *ctx = (WorkerContext *)arg; + unsigned char *buf = (unsigned char *)malloc(READ_BLOCK); + + for (;;) { + FileEntry *fe = mpmc_pop(ctx->file_queue); + if (!fe) + break; + + char hash[HASH_STRLEN]; + xxh3_hash_file_stream(fe->path, hash, buf); + + char created[32], modified[32]; + format_time(fe->created_time, created, sizeof(created)); + format_time(fe->modified_time, modified, sizeof(modified)); + + double size_kib = (double)fe->size_bytes / 1024.0; + + char stack_buf[1024]; + + int len = + snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n", + hash, fe->path, size_kib, created, modified, fe->owner); + + char *dst = arena_push(&ctx->arena, len, false); + memcpy(dst, stack_buf, len); + + atomic_fetch_add(&g_files_hashed, 1); + } + + free(buf); + + return THREAD_RETURN_VALUE; +} + +// ----------------------------- Progress display --------------------------- +static THREAD_RETURN progress_thread(void *arg) { + (void)arg; // Unused parameter + + HiResTimer progress_timer; + timer_start(&progress_timer); + + uint64_t last_bytes = atomic_load(&g_bytes_processed); + double last_time = 0.0; + + double displayed_speed = 0.0; + const double sample_interval = 0.5; + + for (;;) { + uint64_t found = atomic_load(&g_files_found); + uint64_t hashed = atomic_load(&g_files_hashed); + uint64_t bytes = atomic_load(&g_bytes_processed); + int scan_done = atomic_load(&g_scan_done); + + double t = timer_elapsed(&progress_timer); + + if (last_time == 0.0) { + last_time = t; + last_bytes = bytes; + } + + double dt = t - last_time; + + if (dt >= sample_interval) { + uint64_t db = bytes - last_bytes; + + if (db > 0 && dt > 0.0001) { + displayed_speed = (double)db / (1024.0 * 1024.0) / dt; + } + + last_bytes = bytes; + last_time = t; + } + + if (!scan_done) { + printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ", + (unsigned long long)found, (unsigned long long)hashed, + displayed_speed); + } else { + double pct = found ? (double)hashed / (double)found : 0.0; + int barw = 40; + int filled = (int)(pct * barw); + + char bar[64]; + int p = 0; + + bar[p++] = '['; + for (int i = 0; i < filled; i++) + bar[p++] = '#'; + for (int i = filled; i < barw; i++) + bar[p++] = '.'; + bar[p++] = ']'; + bar[p] = 0; + + printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0, + (unsigned long long)hashed, (unsigned long long)found, + displayed_speed); + } + + fflush(stdout); + + if (scan_done && hashed == found) + break; + + sleep_ms(100); + } + + printf("\n"); + + return THREAD_RETURN_VALUE; +} diff --git a/platform.h b/platform.h deleted file mode 100644 index 7b89cfc..0000000 --- a/platform.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once // ensure that a given header file is included only once in a - // single compilation unit - -#include "arena.h" -#include "base.h" -#include "lf_mpmc.h" - -#include "arena.c" - -// xxhash include -#define XXH_INLINE_ALL -#include "xxh_x86dispatch.h" - -// ----------------------------- Config ------------------------------------- -#define FILE_HASHES_TXT "file_hashes.txt" -#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null -#define MAX_PATHLEN 4096 -#define READ_BLOCK (64 * 1024) // 64KB blocks - -// ----------------------------- Data types --------------------------------- -// Timer -typedef struct { - LARGE_INTEGER start; - LARGE_INTEGER end; -} HiResTimer; - -static LARGE_INTEGER g_qpc_freq; - -// File entry -typedef struct FileEntry { - char *path; - - uint64_t size_bytes; - uint64_t created_time; // epoch - uint64_t modified_time; // epoch seconds - char owner[128]; // resolved owner name -} FileEntry; - -// Threads context -typedef struct { - u8 num_threads; - - mem_arena *path_arena; - mem_arena *meta_arena; - - MPMCQueue *dir_queue; - MPMCQueue *file_queue; -} ScannerContext; - -typedef struct { - mem_arena *arena; - MPMCQueue *file_queue; -} WorkerContext; diff --git a/platform_posix.c b/platform_posix.c deleted file mode 100644 index ede307a..0000000 --- a/platform_posix.c +++ /dev/null @@ -1,678 +0,0 @@ -#include "platform.h" - -// ----------------------------- Globals ------------------------------------ -static atomic_uint_fast64_t g_bytes_processed = 0; -FileEntry *g_entries = NULL; -size_t g_entry_count = 0; -size_t g_entry_capacity = 0; - -// ----------------------------- Utils -------------------------------------- -static void perror_exit(const char *msg) { - perror(msg); - exit(1); -} - -static void *xmalloc(size_t n) { - void *p = malloc(n); - if (!p) - perror_exit("malloc"); - return p; -} - -static void add_entry(const FileEntry *src) { - if (g_entry_count + 1 > g_entry_capacity) { - g_entry_capacity = g_entry_capacity ? g_entry_capacity * 2 : 1024; - g_entries = realloc(g_entries, sizeof(FileEntry) * g_entry_capacity); - if (!g_entries) - perror_exit("realloc"); - } - - FileEntry *dst = &g_entries[g_entry_count++]; - memset(dst, 0, sizeof(*dst)); - - dst->size_bytes = src->size_bytes; - dst->created_time = src->created_time; - dst->modified_time = src->modified_time; - - if (src->path) - dst->path = strdup(src->path); - - strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1); - dst->owner[sizeof(dst->owner) - 1] = '\0'; -} - -static void free_entries(void) { - for (size_t i = 0; i < g_entry_count; ++i) { - free(g_entries[i].path); - } - - free(g_entries); - g_entries = NULL; - g_entry_count = 0; - g_entry_capacity = 0; -} - -// ----------------------------- Owner lookup ------------------------------ -static void get_file_owner(uid_t uid, char *out, size_t out_sz) { - struct passwd *pw = getpwuid(uid); - if (pw) { - snprintf(out, out_sz, "%s", pw->pw_name); - } else { - snprintf(out, out_sz, "UNKNOWN"); - } -} - -// ----------------------------- Format time helper ------------------------- -static void format_time(uint64_t t, char *out, size_t out_sz) { - if (t == 0) { - snprintf(out, out_sz, "N/A"); - return; - } - - time_t tt = (time_t)t; - struct tm tm; - -#if PLATFORM_WINDOWS - localtime_s(&tm, &tt); -#else - localtime_r(&tt, &tm); -#endif - - strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); -} - -// --------------- parallel directory scanning ---------------- - -// Add queue helper functions -static void dirqueue_push(DirQueue *q, const char *path) { - DirJob *job = malloc(sizeof(*job)); - job->path = strdup(path); - job->next = NULL; - - pthread_mutex_lock(&q->mutex); - - if (q->tail) - q->tail->next = job; - else - q->head = job; - - q->tail = job; - - pthread_cond_signal(&q->cond); - pthread_mutex_unlock(&q->mutex); -} - -static char *dirqueue_pop(DirQueue *q) { - pthread_mutex_lock(&q->mutex); - while (!q->head && !q->stop) - pthread_cond_wait(&q->cond, &q->mutex); - - if (q->stop) { - pthread_mutex_unlock(&q->mutex); - return NULL; - } - - DirJob *job = q->head; - q->head = job->next; - if (!q->head) - q->tail = NULL; - - q->active_workers++; - pthread_mutex_unlock(&q->mutex); - - char *path = job->path; - free(job); - return path; -} - -static void dirqueue_done(DirQueue *q) { - pthread_mutex_lock(&q->mutex); - q->active_workers--; - - if (!q->head && q->active_workers == 0) { - q->stop = 1; - pthread_cond_broadcast(&q->cond); - } - pthread_mutex_unlock(&q->mutex); -} - -// Scanning directory worker thread function -static void scan_worker(void *arg) { - DirQueue *q = arg; - - for (;;) { - char *dir = dirqueue_pop(q); - if (!dir) - break; - - scan_folder_posix_parallel(dir, q); - - free(dir); - dirqueue_done(q); - } -} - -// Scanning directory function -void scan_folder_posix_parallel(const char *base, DirQueue *q) { - DIR *d = opendir(base); - if (!d) - return; - - struct dirent *ent; - while ((ent = readdir(d))) { - if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) - continue; - - char full[MAX_PATHLEN]; - snprintf(full, sizeof(full), "%s/%s", base, ent->d_name); - - struct stat st; - if (lstat(full, &st) != 0) - continue; - - if (S_ISDIR(st.st_mode)) { - dirqueue_push(q, full); - } else if (S_ISREG(st.st_mode)) { - FileEntry fe; - memset(&fe, 0, sizeof(fe)); - - normalize_path(full); - - fe.path = full; - fe.size_bytes = (uint64_t)st.st_size; - fe.created_time = (uint64_t)st.st_ctime; - fe.modified_time = (uint64_t)st.st_mtime; - - get_file_owner(st.st_uid, fe.owner, sizeof(fe.owner)); - - add_entry(&fe); - } - } - closedir(d); -} - -// ----------------------------- Job queue ---------------------------------- -static void jobqueue_init(JobQueue *q) { - q->head = q->tail = NULL; - atomic_store(&q->count, 0); - q->stop = 0; - pthread_mutex_init(&q->mutex, NULL); - pthread_cond_init(&q->cond, NULL); -} - -static void jobqueue_push(JobQueue *q, Job *job) { - pthread_mutex_lock(&q->mutex); - job->next = NULL; - if (q->tail) - q->tail->next = job; - else - q->head = job; - q->tail = job; - atomic_fetch_add(&q->count, 1); - pthread_cond_signal(&q->cond); - pthread_mutex_unlock(&q->mutex); -} - -static Job *jobqueue_pop(JobQueue *q) { - pthread_mutex_lock(&q->mutex); - while (!q->head && !q->stop) - pthread_cond_wait(&q->cond, &q->mutex); - if (q->stop && !q->head) { - pthread_mutex_unlock(&q->mutex); - return NULL; - } - Job *j = q->head; - q->head = j->next; - if (!q->head) - q->tail = NULL; - pthread_mutex_unlock(&q->mutex); - if (j) - atomic_fetch_sub(&q->count, 1); - return j; -} - -static void jobqueue_stop(JobQueue *q) { - pthread_mutex_lock(&q->mutex); - q->stop = 1; - pthread_cond_broadcast(&q->cond); - pthread_mutex_unlock(&q->mutex); -} - -// ----------------------------- Hashing helpers ----------------------------- -static void xxh3_hash_file_stream(const char *path, char *out_hex) { - // compute XXH3_128 over file. POSIX and Windows use standard reads in this - // helper. - int fd = open(path, O_RDONLY); - if (fd < 0) { - strcpy(out_hex, "ERROR"); - return; - } - XXH128_hash_t h; - XXH3_state_t *state = XXH3_createState(); - XXH3_128bits_reset(state); - unsigned char *buf = (unsigned char *)malloc(READ_BLOCK); - ssize_t r; - while ((r = read(fd, buf, READ_BLOCK)) > 0) { - XXH3_128bits_update(state, buf, (size_t)r); - atomic_fetch_add(&g_bytes_processed, (uint64_t)r); - } - - h = XXH3_128bits_digest(state); - XXH3_freeState(state); - close(fd); - free(buf); - snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, - (unsigned long long)h.low64); -} - -// ----------------------------- Worker -------------------------------------- -static void *worker_thread_posix(void *argp) { - WorkerArg *w = (WorkerArg *)argp; - JobQueue *q = w->queue; - for (;;) { - Job *job = jobqueue_pop(q); - if (!job) - break; - char hex[HASH_STRLEN]; - xxh3_hash_file_stream(job->file->path, hex); - - // append to file_hashes.txt atomically: we will store results to a temp - // buffer and write them at the end (to avoid synchronization issues). But - // for simplicity, here we append directly using a file lock (fopen+fwrite - // guarded by mutex). We'll store results in job->file->path? Instead, - // simple global append with a mutex. Using a file-level append lock: - static pthread_mutex_t append_mutex = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_lock(&append_mutex); - FILE *hf = fopen(FILE_HASHES_TXT, "a"); - if (hf) { - char created[32], modified[32]; - - format_time(job->file->created_time, created, sizeof(created)); - format_time(job->file->modified_time, modified, sizeof(modified)); - double size_kib = (double)job->file->size_bytes / (1024.0); - - fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib, - created, modified, job->file->owner); - fclose(hf); - } - pthread_mutex_unlock(&append_mutex); - - atomic_fetch_add(w->done_counter, 1); - free(job); - } - atomic_fetch_sub(w->live_workers, 1); - return NULL; -} - -// ----------------------------- Progress display --------------------------- -static void print_progress(size_t done, size_t total) { - const int barw = 40; - double pct = total ? (double)done / (double)total : 0.0; - int filled = (int)(pct * barw + 0.5); - printf("\r["); - for (int i = 0; i < filled; ++i) - putchar('#'); - for (int i = filled; i < barw; ++i) - putchar(' '); - printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total); - fflush(stdout); -} - -// ----------------------------- Helpers: load/save -------------------------- -static int file_exists(const char *path) { - struct stat st; - return (stat(path, &st) == 0); -} - -static void save_file_list(const char *list_path) { - FILE *f = fopen(list_path, "w"); - if (!f) { - perror("fopen file_list"); - return; - } - for (size_t i = 0; i < g_entry_count; ++i) { - fprintf(f, "%s\n", g_entries[i].path); - } - fclose(f); -} - -static void load_file_list(const char *list_path) { - FILE *f = fopen(list_path, "r"); - if (!f) - return; - - char line[MAX_PATHLEN]; - - while (fgets(line, sizeof(line), f)) { - line[strcspn(line, "\r\n")] = 0; - - FileEntry fe; - memset(&fe, 0, sizeof(fe)); - - fe.path = line; - - /* Populate metadata from filesystem */ - platform_get_file_times(line, &fe.created_time, &fe.modified_time); - - platform_get_file_owner(line, fe.owner, sizeof(fe.owner)); - - add_entry(&fe); - } - - fclose(f); -} - -// Read existing hashes into memory map for resume -// Simple linear search mapping: returns 1 if path has hash found (and writes -// into out_hex) -static int find_hash_in_file(const char *hashfile, const char *path, - char *out_hex) { - FILE *f = fopen(hashfile, "r"); - if (!f) - return 0; - char p[MAX_PATHLEN]; - char h[128]; - int found = 0; - while (fscanf(f, "%4095s %127s", p, h) == 2) { - if (strcmp(p, path) == 0) { - strncpy(out_hex, h, HASH_STRLEN); - out_hex[HASH_STRLEN - 1] = 0; - found = 1; - break; - } - } - fclose(f); - return found; -} -// ----------------------------- Get file metadata ------------------------- -void platform_get_file_times(const char *path, uint64_t *out_created, - uint64_t *out_modified) { - struct stat st; - if (stat(path, &st) == 0) { - *out_created = (uint64_t)st.st_ctime; - *out_modified = (uint64_t)st.st_mtime; - } else { - *out_created = 0; - *out_modified = 0; - } -} - -void platform_get_file_owner(const char *path, char *out_owner, - size_t out_owner_size) { - struct stat st; - if (stat(path, &st) == 0) { - get_file_owner(st.st_uid, out_owner, out_owner_size); - } else { - snprintf(out_owner, out_owner_size, "UNKNOWN"); - } -} - -// ----------------------------- Main --------------------------------------- -int main(int argc, char **argv) { - char folders[64][MAX_PATHLEN]; // up to 64 input folders - int folder_count = 0; - int resume = 0; - - // ------------------------------- - // Parse arguments - // ------------------------------- - for (int i = 1; i < argc; ++i) { - if (strcmp(argv[i], "-resume") == 0) { - resume = 1; - } else { - if (folder_count < 64) { - strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); - folders[folder_count][MAX_PATHLEN - 1] = 0; - folder_count++; - } - } - } - - // ------------------------------- - // Ask user if no folders provided - // ------------------------------- - if (folder_count == 0 && !resume) { - printf("Enter folder to process (Enter = current folder): "); - fflush(stdout); - - char buf[MAX_PATHLEN]; - if (!fgets(buf, sizeof(buf), stdin)) - return 1; - buf[strcspn(buf, "\r\n")] = 0; - - if (buf[0] == 0) - strcpy(folders[0], "."); - else - strncpy(folders[0], buf, MAX_PATHLEN - 1); - - folder_count = 1; - } else if (folder_count == 0 && resume) { - strcpy(folders[0], "."); - folder_count = 1; - } - - // ------------------------------- - // Display selected folders - // ------------------------------- - printf("Processing %d folder(s):\n", folder_count); - for (int i = 0; i < folder_count; ++i) { - printf(" - %s\n", folders[i]); - } - - // ------------------------------- - // Detect hardware threads (CPU cores) - // ------------------------------- - size_t hw_threads = 1; - long cpus = sysconf(_SC_NPROCESSORS_ONLN); - if (cpus > 0) - hw_threads = (size_t)cpus; - - // Add some extra threads to overlap I/O more aggressively - size_t num_threads = hw_threads * 2; - if (num_threads < 2) - num_threads = 2; - - // ------------------------------- - // Step 1: Scan all folders - // ------------------------------- - if (!resume) { - DirQueue q = {0}; - pthread_mutex_init(&q.mutex, NULL); - pthread_cond_init(&q.cond, NULL); - - // Seed queue - for (int i = 0; i < folder_count; ++i) - dirqueue_push(&q, folders[i]); - - pthread_t *threads = malloc(sizeof(pthread_t) * num_threads); - - for (size_t i = 0; i < num_threads; ++i) - pthread_create(&threads[i], NULL, (void *(*)(void *))scan_worker, &q); - - for (size_t i = 0; i < num_threads; ++i) - pthread_join(threads[i], NULL); - - free(threads); - - pthread_mutex_destroy(&q.mutex); - pthread_cond_destroy(&q.cond); - - printf("Found %zu files. Saving to %s\n", g_entry_count, FILE_LIST_TXT); - save_file_list(FILE_LIST_TXT); - } else { - if (!file_exists(FILE_LIST_TXT)) { - fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT); - return 1; - } - load_file_list(FILE_LIST_TXT); - printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT); - } - - if (g_entry_count == 0) { - printf("No files to process.\n"); - return 0; - } - - // If resume: create map of which files are already hashed - char **existing_hash = calloc(g_entry_count, sizeof(char *)); - for (size_t i = 0; i < g_entry_count; ++i) - existing_hash[i] = NULL; - - if (resume && file_exists(FILE_HASHES_TXT)) { - // For simplicity we parse hash file and match lines to list entries. - for (size_t i = 0; i < g_entry_count; ++i) { - char hex[HASH_STRLEN] = {0}; - if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) { - existing_hash[i] = strdup(hex); - } - } - } - - // Prepare job queue of only missing files (or all if not resume) - JobQueue queue; - jobqueue_init(&queue); - - size_t total_jobs = 0; - for (size_t i = 0; i < g_entry_count; ++i) { - if (resume && existing_hash[i]) - continue; - Job *j = (Job *)malloc(sizeof(Job)); - j->file = &g_entries[i]; - j->next = NULL; - jobqueue_push(&queue, j); - ++total_jobs; - } - - if (total_jobs == 0) { - printf("Nothing to do — all files already hashed.\n"); - return 0; - } - - // Remove old hashes file if we're recomputing from scratch. - if (!resume) { - // create/overwrite hashes file - FILE *hf = fopen(FILE_HASHES_TXT, "w"); - if (hf) - fclose(hf); - } // if resume, we append only missing - - // Starting thread pool - - atomic_size_t done_counter; - atomic_store(&done_counter, 0); - atomic_int live_workers; - atomic_store(&live_workers, (int)num_threads); - - WorkerArg warg = {.queue = &queue, - .done_counter = &done_counter, - .total_jobs = total_jobs, - .live_workers = &live_workers}; - - printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads, - hw_threads); - - // Launch threads - pthread_t *tids = malloc(sizeof(pthread_t) * num_threads); - for (size_t i = 0; i < num_threads; ++i) { - pthread_create(&tids[i], NULL, worker_thread_posix, &warg); - } - - // Progress / timer - struct timespec tstart, tnow; - clock_gettime(CLOCK_MONOTONIC, &tstart); - - size_t last_done = 0; - - // ---------- Correct real-time MB/s (stable & accurate) ---------- - uint64_t last_bytes = atomic_load(&g_bytes_processed); - double last_time = 0.0; - double displayed_speed = 0.0; - const double sample_interval = 0.5; - char linebuf[256]; - - for (;;) { - size_t done = (size_t)atomic_load(&done_counter); - - // ---- monotonic time ---- - clock_gettime(CLOCK_MONOTONIC, &tnow); - double now = - (tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9; - - // ---- bytes so far ---- - uint64_t bytes = atomic_load(&g_bytes_processed); - - // ---- real sampler (independent of UI sleep) ---- - if (last_time == 0.0) { - last_time = now; - last_bytes = bytes; - } - - double dt = now - last_time; - if (dt >= sample_interval) { - uint64_t db = bytes - last_bytes; - - if (db > 0 && dt > 0.0001) { - displayed_speed = (double)db / (1024.0 * 1024.0) / dt; - } - - last_bytes = bytes; - last_time = now; - } - - // ---- progress bar build ---- - const int barw = 40; - double pct = total_jobs ? (double)done / (double)total_jobs : 0.0; - int filled = (int)(pct * barw + 0.5); - - int p = 0; - p += snprintf(linebuf + p, sizeof(linebuf) - p, "["); - for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i) - p += snprintf(linebuf + p, sizeof(linebuf) - p, "#"); - for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i) - p += snprintf(linebuf + p, sizeof(linebuf) - p, "."); - - snprintf(linebuf + p, sizeof(linebuf) - p, - "] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs, - displayed_speed); - - printf("\r%s", linebuf); - fflush(stdout); - - if (done >= total_jobs) - break; - - usleep(100000); - } - - printf("\n\n"); - - // stop queue and join threads - jobqueue_stop(&queue); - for (size_t i = 0; i < num_threads; ++i) - pthread_join(tids[i], NULL); - - // done time - clock_gettime(CLOCK_MONOTONIC, &tnow); - double elapsed = - (tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9; - - printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed); - uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed); - double total_mb = (double)total_bytes / (1024.0 * 1024.0); - double avg_mbps = total_mb / elapsed; - printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); - - // If resume: we appended missing entries. If not resume: we wrote all results - // during workers. Note: This program appends hashes as workers finish. This - // avoids holding all hashes in RAM. - - // Cleanup - for (size_t i = 0; i < g_entry_count; ++i) - if (existing_hash[i]) - free(existing_hash[i]); - free(existing_hash); - - free_entries(); - - return 0; -} diff --git a/platform_windows.c b/platform_windows.c deleted file mode 100644 index 36323df..0000000 --- a/platform_windows.c +++ /dev/null @@ -1,641 +0,0 @@ -#include "base.h" -#include "platform.h" - -// ----------------------------- Globals ------------------------------------ -static atomic_uint_fast64_t g_files_found = 0; -static atomic_uint_fast64_t g_files_hashed = 0; -static atomic_uint_fast64_t g_bytes_processed = 0; -static atomic_int g_scan_done = 0; - -// ============================= Utils ====================================== -// ----------------------------- Timer functions -------------- -static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); } - -static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); } - -static double timer_stop(HiResTimer *t) { - QueryPerformanceCounter(&t->end); - return (double)(t->end.QuadPart - t->start.QuadPart) / - (double)g_qpc_freq.QuadPart; -} - -// ----------------------------- Get instruction set -------------- -const char *get_xxhash_instruction_set(void) { - int vecID = XXH_featureTest(); - - switch (vecID) { - case XXH_SCALAR: - return "Scalar (portable C)"; - case XXH_SSE2: - return "SSE2"; - case XXH_AVX2: - return "AVX2"; - case XXH_AVX512: - return "AVX-512"; - default: - return "Unknown"; - } -} - -// -------------------- Path parsing ------------------- -static void normalize_path(char *p) { - char *src = p; - char *dst = p; - int prev_slash = 0; - - while (*src) { - char c = *src++; - - if (c == '\\' || c == '/') { - if (!prev_slash) { - *dst++ = '/'; - prev_slash = 1; - } - } else { - *dst++ = c; - prev_slash = 0; - } - } - - *dst = '\0'; -} - -static int parse_paths(char *line, char folders[][MAX_PATHLEN], - int max_folders) { - int count = 0; - char *p = line; - - while (*p && count < max_folders) { - - while (*p && isspace((unsigned char)*p)) - p++; - - if (!*p) - break; - - char *start; - char quote = 0; - - if (*p == '"' || *p == '\'') { - quote = *p++; - start = p; - - while (*p && *p != quote) - p++; - } else { - start = p; - - while (*p && !isspace((unsigned char)*p)) - p++; - } - - size_t len = p - start; - if (len >= MAX_PATHLEN) - len = MAX_PATHLEN - 1; - - memcpy(folders[count], start, len); - folders[count][len] = 0; - - normalize_path(folders[count]); - - count++; - - if (quote && *p == quote) - p++; - } - return count; -} - -// ----------------------------- Convert filetime to epoch -------------- -static uint64_t filetime_to_epoch(const FILETIME *ft) { - ULARGE_INTEGER ull; - ull.LowPart = ft->dwLowDateTime; - ull.HighPart = ft->dwHighDateTime; - - // Windows epoch (1601) → Unix epoch (1970) - return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL; -} -// ----------------------------- Format time helper ------------------------- -static void format_time(uint64_t t, char *out, size_t out_sz) { - if (t == 0) { - snprintf(out, out_sz, "N/A"); - return; - } - - time_t tt = (time_t)t; - struct tm tm; - -#if PLATFORM_WINDOWS - localtime_s(&tm, &tt); -#else - localtime_r(&tt, &tm); -#endif - - strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); -} - -// ----------------------------- Resolve file owner --------------------- -static void get_file_owner(const char *path, char *out, size_t out_sz) { - PSID sid = NULL; - PSECURITY_DESCRIPTOR sd = NULL; - - if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION, - &sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) { - - char name[64], domain[64]; - DWORD name_len = sizeof(name); - DWORD domain_len = sizeof(domain); - SID_NAME_USE use; - - if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len, - &use)) { - snprintf(out, out_sz, "%s\\%s", domain, name); - } else { - snprintf(out, out_sz, "UNKNOWN"); - } - } else { - snprintf(out, out_sz, "UNKNOWN"); - } - - if (sd) - LocalFree(sd); -} - -// ----------------------------- Get file metadata ------------------------- -void platform_get_file_times(const char *path, uint64_t *out_created, - uint64_t *out_modified) { - WIN32_FILE_ATTRIBUTE_DATA fad; - if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { - *out_created = filetime_to_epoch(&fad.ftCreationTime); - *out_modified = filetime_to_epoch(&fad.ftLastWriteTime); - } else { - *out_created = 0; - *out_modified = 0; - } -} - -void platform_get_file_owner(const char *path, char *out_owner, - size_t out_owner_size) { - get_file_owner(path, out_owner, out_owner_size); -} - -// ----------------------------- Scan helpers ----------------------------- -void scan_folder_windows_parallel(const char *base, ScannerContext *ctx) { - char search[MAX_PATHLEN]; - snprintf(search, sizeof(search), "%s\\*", base); - - WIN32_FIND_DATAA fd; - HANDLE h = FindFirstFileA(search, &fd); - if (h == INVALID_HANDLE_VALUE) - return; - - do { - if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, "..")) - continue; - - char full[MAX_PATHLEN]; - snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName); - - if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) - continue; - - if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { - size_t len = strlen(full) + 1; - - char *dir = arena_push(&ctx->path_arena, len, false); - memcpy(dir, full, len); - - mpmc_push_work(ctx->dir_queue, dir); - } else { - - atomic_fetch_add(&g_files_found, 1); - - FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true); - - char norm[MAX_PATHLEN]; - strncpy(norm, full, sizeof(norm) - 1); - norm[sizeof(norm) - 1] = 0; - normalize_path(norm); - - size_t len = strlen(norm) + 1; - - char *path = arena_push(&ctx->path_arena, len, false); - memcpy(path, norm, len); - - fe->path = path; - - platform_get_file_times(full, &fe->created_time, &fe->modified_time); - - platform_get_file_owner(full, fe->owner, sizeof(fe->owner)); - - fe->size_bytes = ((uint64_t)fd.nFileSizeHigh << 32) | fd.nFileSizeLow; - - mpmc_push(ctx->file_queue, fe); - } - - } while (FindNextFileA(h, &fd)); - - FindClose(h); -} - -// ------------------------- Scan worker -------------------------------- -static DWORD WINAPI scan_worker(LPVOID arg) { - ScannerContext *ctx = arg; - - for (;;) { - char *dir = mpmc_pop(ctx->dir_queue); - if (!dir) - break; - - scan_folder_windows_parallel(dir, ctx); - - mpmc_task_done(ctx->dir_queue, ctx->num_threads); - } - return 0; -} - -// ----------------------------- Hashing helpers ----------------------------- -static void xxh3_hash_file_stream(const char *path, char *out_hex, BYTE *buf) { - // compute XXH3_128 over file. POSIX and Windows use standard reads in this - // helper. - // On Windows try to use overlapped synchronous chunked reads for higher - // throughput. - HANDLE hFile = - CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, - OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); - if (hFile == INVALID_HANDLE_VALUE) { - strcpy(out_hex, "ERROR"); - return; - } - XXH128_hash_t h; - XXH3_state_t state; - XXH3_128bits_reset(&state); - - DWORD read = 0; - BOOL ok; - while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) { - XXH3_128bits_update(&state, buf, (size_t)read); - atomic_fetch_add(&g_bytes_processed, (uint64_t)read); - } - h = XXH3_128bits_digest(&state); - CloseHandle(hFile); - snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, - (unsigned long long)h.low64); -} - -// ------------------------- Hash worker -------------------------------- -static DWORD WINAPI hash_worker(LPVOID arg) { - - WorkerContext *ctx = (WorkerContext *)arg; - BYTE *buf = (BYTE *)malloc(READ_BLOCK); - - for (;;) { - FileEntry *fe = mpmc_pop(ctx->file_queue); - if (!fe) - break; - - char hash[HASH_STRLEN]; - xxh3_hash_file_stream(fe->path, hash, buf); - - char created[32], modified[32]; - format_time(fe->created_time, created, sizeof(created)); - format_time(fe->modified_time, modified, sizeof(modified)); - - double size_kib = (double)fe->size_bytes / 1024.0; - - char stack_buf[1024]; - - int len = - snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n", - hash, fe->path, size_kib, created, modified, fe->owner); - - char *dst = arena_push(&ctx->arena, len, false); - memcpy(dst, stack_buf, len); - - atomic_fetch_add(&g_files_hashed, 1); - } - // free(buf); It will be freed by the system when the program exits - - return 0; -} - -// ----------------------------- Progress display --------------------------- -DWORD WINAPI progress_thread(void *arg) { - - LARGE_INTEGER freq, start; - QueryPerformanceFrequency(&freq); - QueryPerformanceCounter(&start); - - uint64_t last_bytes = atomic_load(&g_bytes_processed); - double last_time = 0.0; - - double displayed_speed = 0.0; - const double sample_interval = 0.5; - - for (;;) { - - uint64_t found = atomic_load(&g_files_found); - uint64_t hashed = atomic_load(&g_files_hashed); - uint64_t bytes = atomic_load(&g_bytes_processed); - int scan_done = atomic_load(&g_scan_done); - - LARGE_INTEGER now; - QueryPerformanceCounter(&now); - - double t = (double)(now.QuadPart - start.QuadPart) / (double)freq.QuadPart; - - if (last_time == 0.0) { - last_time = t; - last_bytes = bytes; - } - - double dt = t - last_time; - - if (dt >= sample_interval) { - uint64_t db = bytes - last_bytes; - - if (db > 0 && dt > 0.0001) { - displayed_speed = (double)db / (1024.0 * 1024.0) / dt; - } - - last_bytes = bytes; - last_time = t; - } - - if (!scan_done) { - - printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ", - (unsigned long long)found, (unsigned long long)hashed, - displayed_speed); - - } else { - - double pct = found ? (double)hashed / (double)found : 0.0; - - int barw = 40; - int filled = (int)(pct * barw); - - char bar[64]; - int p = 0; - - bar[p++] = '['; - - for (int i = 0; i < filled; i++) - bar[p++] = '#'; - - for (int i = filled; i < barw; i++) - bar[p++] = '.'; - - bar[p++] = ']'; - bar[p] = 0; - - printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0, - (unsigned long long)hashed, (unsigned long long)found, - displayed_speed); - } - - fflush(stdout); - - if (scan_done && hashed == found) - break; - - Sleep(100); - } - - printf("\n"); - - return 0; -} - -// ----------------------------- Main --------------------------------------- -int main(int argc, char **argv) { - char folders[64][MAX_PATHLEN]; // up to 64 input folders - int folder_count = 0; - - // ------------------------------- - // Parse arguments - // ------------------------------- - for (int i = 1; i < argc; ++i) { - if (folder_count < 64) { - normalize_path(argv[i]); - strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); - folders[folder_count][MAX_PATHLEN - 1] = 0; - folder_count++; - } - } - - // ------------------------------- - // Ask user if no folders provided - // ------------------------------- - if (folder_count == 0) { - printf("Enter folders to process (Enter = current folder): "); - fflush(stdout); - - char buf[KiB(32)]; - - if (!fgets(buf, sizeof(buf), stdin)) - return 1; - - buf[strcspn(buf, "\r\n")] = 0; - - if (buf[0] == 0) { - strcpy(folders[0], "."); - folder_count = 1; - } else { - folder_count = parse_paths(buf, folders, 64); - } - } - - // Display selected folders - printf("Processing %d folder(s):\n", folder_count); - for (int i = 0; i < folder_count; ++i) { - printf(" - %s\n", folders[i]); - } - - // ------------------------------- - // Scanning and total timer init - // ------------------------------- - timer_init(); - - HiResTimer total_timer; - HiResTimer scan_timer; - - timer_start(&total_timer); - timer_start(&scan_timer); - - // ------------------------------- - // Creating a general purpose arena - // ------------------------------- - arena_params params = { - .reserve_size = GiB(1), - .commit_size = MiB(16), - .align = 0, - .push_size = 0, - .allow_free_list = true, - .allow_swapback = false, - .growth_policy = ARENA_GROWTH_NORMAL, - .commit_policy = ARENA_COMMIT_LAZY, - .max_nbre_blocks = 1, - }; - - mem_arena *gp_arena = arena_create(¶ms); - - // ------------------------------- - // Detect hardware threads - // ------------------------------- - size_t hw_threads = 1; - // --- Windows: detect PHYSICAL cores (not logical threads) --- - DWORD len = 0; - GetLogicalProcessorInformation(NULL, &len); - - SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf = - (SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)arena_push(&gp_arena, len, true); - - if (GetLogicalProcessorInformation(buf, &len)) { - DWORD count = 0; - DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - for (DWORD i = 0; i < n; i++) { - if (buf[i].Relationship == RelationProcessorCore) - count++; - } - if (count > 0) - hw_threads = count; - } - arena_free(&gp_arena, (u8 **)&buf, len); - - // Logical threads = CPU cores * 2 - size_t num_threads = hw_threads * 2; - - printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads, - hw_threads); - printf(" Selected instruction set: %s\n", get_xxhash_instruction_set()); - - // ------------------------------- - // Scanning and hashing - // ------------------------------- - MPMCQueue dir_queue; - mpmc_init(&dir_queue, MiB(1)); - - MPMCQueue file_queue; - mpmc_init(&file_queue, MiB(1)); - - // starting hash threads - size_t num_hash_threads = num_threads; - - WorkerContext workers[num_hash_threads]; - - HANDLE *hash_threads = - arena_push(&gp_arena, sizeof(HANDLE) * num_hash_threads, true); - - for (size_t i = 0; i < num_hash_threads; ++i) { - - workers[i].arena = arena_create(¶ms); - workers[i].file_queue = &file_queue; - - hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL); - } - - // starting progress printing thread - HANDLE progress = CreateThread(NULL, 0, progress_thread, NULL, 0, NULL); - - // starting scan threads - size_t num_scan_threads = num_threads; - - ScannerContext scanners[num_scan_threads]; - - HANDLE *scan_threads = - arena_push(&gp_arena, sizeof(HANDLE) * num_scan_threads, true); - - for (size_t i = 0; i < num_scan_threads; i++) { - scanners[i].num_threads = num_scan_threads; - - scanners[i].path_arena = arena_create(¶ms); - scanners[i].meta_arena = arena_create(¶ms); - - scanners[i].dir_queue = &dir_queue; - scanners[i].file_queue = &file_queue; - - scan_threads[i] = CreateThread(NULL, 0, scan_worker, &scanners[i], 0, NULL); - } - - // Initial folder push - for (int i = 0; i < folder_count; i++) { - - size_t len = strlen(folders[i]) + 1; - - char *path = arena_push(&scanners[0].path_arena, len, false); - memcpy(path, folders[i], len); - - mpmc_push_work(&dir_queue, path); - } - - // Stop scan threads - WaitForMultipleObjects((DWORD)num_scan_threads, scan_threads, TRUE, INFINITE); - - for (size_t i = 0; i < num_scan_threads; ++i) - CloseHandle(scan_threads[i]); - - mpmc_producers_finished(&file_queue, num_hash_threads); - - atomic_store(&g_scan_done, 1); - - arena_free(&gp_arena, (u8 **)&scan_threads, - sizeof(HANDLE) * num_scan_threads); - - double scan_seconds = timer_stop(&scan_timer); - size_t total_found = atomic_load(&g_files_found); - - printf("\r%*s\r", 120, ""); // clear_console_line - printf("Completed scanning in %.2f seconds, found %zu files\n\n", - scan_seconds, total_found); - - // If no files found - if (total_found == 0) { - printf("No files found.\n"); - return 0; - } - - // Stop hashing threads - WaitForMultipleObjects((DWORD)num_hash_threads, hash_threads, TRUE, INFINITE); - - for (size_t i = 0; i < num_hash_threads; ++i) - CloseHandle(hash_threads[i]); - - arena_free(&gp_arena, (u8 **)&hash_threads, - sizeof(HANDLE) * num_hash_threads); - - // Stop progress printing thread - WaitForSingleObject(progress, INFINITE); - CloseHandle(progress); - - // ------------------------------- - // Export file_hashes.txt - // ------------------------------- - - FILE *f = fopen(FILE_HASHES_TXT, "wb"); - - for (int i = 0; i < num_threads; i++) { - mem_arena *arena = workers[i].arena; - - u8 *arena_base = - (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align); - fwrite(arena_base, 1, arena->pos, f); - } - - fclose(f); - - // ------------------------------- - // Print summary - // ------------------------------- - double total_seconds = timer_stop(&total_timer); - - printf("Completed hashing %zu files\n", total_found); - - uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed); - double total_mb = (double)total_bytes / (1024.0 * 1024.0); - double avg_mbps = total_mb / total_seconds; - printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); - printf(" Total time : %.2f seconds\n\n", total_seconds); - - return 0; -}