#include "platform.h" // ----------------------------- Globals ------------------------------------ static atomic_uint_fast64_t g_files_found = 0; static atomic_uint_fast64_t g_files_hashed = 0; static atomic_uint_fast64_t g_bytes_processed = 0; static atomic_int g_scan_done = 0; // ============================= Utils ====================================== // ----------------------------- Normalize path -------------- static void normalize_path(char *p) { char *src = p; char *dst = p; int prev_slash = 0; while (*src) { char c = *src++; if (c == '\\' || c == '/') { if (!prev_slash) { *dst++ = '/'; prev_slash = 1; } } else { *dst++ = c; prev_slash = 0; } } *dst = '\0'; } // ----------------------------- Convert filetime to epoch -------------- static uint64_t filetime_to_epoch(const FILETIME *ft) { ULARGE_INTEGER ull; ull.LowPart = ft->dwLowDateTime; ull.HighPart = ft->dwHighDateTime; // Windows epoch (1601) → Unix epoch (1970) return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL; } // ----------------------------- Format time helper ------------------------- static void format_time(uint64_t t, char *out, size_t out_sz) { if (t == 0) { snprintf(out, out_sz, "N/A"); return; } time_t tt = (time_t)t; struct tm tm; #if PLATFORM_WINDOWS localtime_s(&tm, &tt); #else localtime_r(&tt, &tm); #endif strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); } // ----------------------------- Resolve file owner --------------------- static void get_file_owner(const char *path, char *out, size_t out_sz) { PSID sid = NULL; PSECURITY_DESCRIPTOR sd = NULL; if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION, &sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) { char name[64], domain[64]; DWORD name_len = sizeof(name); DWORD domain_len = sizeof(domain); SID_NAME_USE use; if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len, &use)) { snprintf(out, out_sz, "%s\\%s", domain, name); } else { snprintf(out, out_sz, "UNKNOWN"); } } else { snprintf(out, out_sz, "UNKNOWN"); } if (sd) LocalFree(sd); } // ----------------------------- Get file metadata ------------------------- void platform_get_file_times(const char *path, uint64_t *out_created, uint64_t *out_modified) { WIN32_FILE_ATTRIBUTE_DATA fad; if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { *out_created = filetime_to_epoch(&fad.ftCreationTime); *out_modified = filetime_to_epoch(&fad.ftLastWriteTime); } else { *out_created = 0; *out_modified = 0; } } void platform_get_file_owner(const char *path, char *out_owner, size_t out_owner_size) { get_file_owner(path, out_owner, out_owner_size); } // --------------- parallel directory scanning ---------------- // Add queue helper functions static void dirqueue_push(DirQueue *q, const char *path) { EnterCriticalSection(&q->cs); if (q->count + 1 > q->cap) { q->cap = q->cap ? q->cap * 2 : 1024; q->items = realloc(q->items, q->cap * sizeof(char *)); } q->items[q->count++] = _strdup(path); WakeConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } static char *dirqueue_pop(DirQueue *q) { EnterCriticalSection(&q->cs); while (q->count == 0 && q->active > 0) { SleepConditionVariableCS(&q->cv, &q->cs, INFINITE); } if (q->count == 0 && q->active == 0) { LeaveCriticalSection(&q->cs); return NULL; // truly done } char *dir = q->items[--q->count]; q->active++; LeaveCriticalSection(&q->cs); return dir; } static void dirqueue_done(DirQueue *q) { EnterCriticalSection(&q->cs); q->active--; WakeAllConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } static DWORD WINAPI scan_worker(LPVOID arg) { DirQueue *q = (DirQueue *)arg; for (;;) { char *dir = dirqueue_pop(q); if (!dir) break; scan_folder_windows_parallel(dir, q); free(dir); dirqueue_done(q); } return 0; } // Scanning directory function void scan_folder_windows_parallel(const char *base, DirQueue *q) { char search[MAX_PATHLEN]; snprintf(search, sizeof(search), "%s\\*", base); WIN32_FIND_DATAA fd; HANDLE h = FindFirstFileA(search, &fd); if (h == INVALID_HANDLE_VALUE) return; do { if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, "..")) continue; char full[MAX_PATHLEN]; snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName); if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) continue; if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { dirqueue_push(q, full); } else { atomic_fetch_add(&g_files_found, 1); FileEntry *fe = malloc(sizeof(FileEntry)); memset(fe, 0, sizeof(FileEntry)); char norm[MAX_PATHLEN]; strncpy(norm, full, sizeof(norm) - 1); norm[sizeof(norm) - 1] = 0; normalize_path(norm); fe->path = _strdup(norm); platform_get_file_times(full, &fe->created_time, &fe->modified_time); platform_get_file_owner(full, fe->owner, sizeof(fe->owner)); LARGE_INTEGER size; HANDLE hf = CreateFileA(full, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hf != INVALID_HANDLE_VALUE) { if (GetFileSizeEx(hf, &size)) fe->size_bytes = (uint64_t)size.QuadPart; CloseHandle(hf); } mpmc_push(&g_file_queue, fe); } } while (FindNextFileA(h, &fd)); FindClose(h); } // ----------------------------- Hashing helpers ----------------------------- static void xxh3_hash_file_stream(const char *path, char *out_hex, BYTE *buf) { // compute XXH3_128 over file. POSIX and Windows use standard reads in this // helper. // On Windows try to use overlapped synchronous chunked reads for higher // throughput. HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (hFile == INVALID_HANDLE_VALUE) { strcpy(out_hex, "ERROR"); return; } XXH128_hash_t h; XXH3_state_t state; XXH3_128bits_reset(&state); DWORD read = 0; BOOL ok; while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) { XXH3_128bits_update(&state, buf, (size_t)read); atomic_fetch_add(&g_bytes_processed, (uint64_t)read); } h = XXH3_128bits_digest(&state); CloseHandle(hFile); snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, (unsigned long long)h.low64); } // ------------------------- Hash worker -------------------------------- static DWORD WINAPI hash_worker(LPVOID arg) { WorkerContext *ctx = (WorkerContext *)arg; MPMCQueue *q = ctx->queue; mem_arena *local_arena = ctx->arena; BYTE *buf = (BYTE *)malloc(READ_BLOCK); for (;;) { FileEntry *fe = mpmc_pop(q); if (!fe) break; char hash[HASH_STRLEN]; xxh3_hash_file_stream(fe->path, hash, buf); char created[32], modified[32]; format_time(fe->created_time, created, sizeof(created)); format_time(fe->modified_time, modified, sizeof(modified)); double size_kib = (double)fe->size_bytes / 1024.0; char stack_buf[1024]; int len = snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n", hash, fe->path, size_kib, created, modified, fe->owner); char *dst = arena_push(&local_arena, len, false); memcpy(dst, stack_buf, len); atomic_fetch_add(&g_files_hashed, 1); free(fe->path); free(fe); } free(buf); return 0; } // ----------------------------- Progress display --------------------------- DWORD WINAPI progress_thread(void *arg) { LARGE_INTEGER freq, start; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&start); uint64_t last_bytes = atomic_load(&g_bytes_processed); double last_time = 0.0; double displayed_speed = 0.0; const double sample_interval = 0.5; for (;;) { uint64_t found = atomic_load(&g_files_found); uint64_t hashed = atomic_load(&g_files_hashed); uint64_t bytes = atomic_load(&g_bytes_processed); int scan_done = atomic_load(&g_scan_done); LARGE_INTEGER now; QueryPerformanceCounter(&now); double t = (double)(now.QuadPart - start.QuadPart) / (double)freq.QuadPart; if (last_time == 0.0) { last_time = t; last_bytes = bytes; } double dt = t - last_time; if (dt >= sample_interval) { uint64_t db = bytes - last_bytes; if (db > 0 && dt > 0.0001) { displayed_speed = (double)db / (1024.0 * 1024.0) / dt; } last_bytes = bytes; last_time = t; } if (!scan_done) { printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ", (unsigned long long)found, (unsigned long long)hashed, displayed_speed); } else { double pct = found ? (double)hashed / (double)found : 0.0; int barw = 40; int filled = (int)(pct * barw); char bar[64]; int p = 0; bar[p++] = '['; for (int i = 0; i < filled; i++) bar[p++] = '#'; for (int i = filled; i < barw; i++) bar[p++] = '.'; bar[p++] = ']'; bar[p] = 0; printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0, (unsigned long long)hashed, (unsigned long long)found, displayed_speed); } fflush(stdout); if (scan_done && hashed == found) break; Sleep(100); } printf("\n"); return 0; } // ----------------------------- Main --------------------------------------- int main(int argc, char **argv) { char folders[64][MAX_PATHLEN]; // up to 64 input folders int folder_count = 0; // ------------------------------- // Scanning and total timer init // ------------------------------- timer_init(); HiResTimer total_timer; HiResTimer scan_timer; timer_start(&total_timer); timer_start(&scan_timer); // ------------------------------- // Parse arguments // ------------------------------- for (int i = 1; i < argc; ++i) { if (folder_count < 64) { strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); folders[folder_count][MAX_PATHLEN - 1] = 0; folder_count++; } } // ------------------------------- // Ask user if no folders provided // ------------------------------- if (folder_count == 0) { printf("Enter folder to process (Enter = current folder): "); fflush(stdout); char buf[MAX_PATHLEN]; if (!fgets(buf, sizeof(buf), stdin)) return 1; buf[strcspn(buf, "\r\n")] = 0; if (buf[0] == 0) strcpy(folders[0], "."); else strncpy(folders[0], buf, MAX_PATHLEN - 1); folder_count = 1; } // ------------------------------- // Display selected folders // ------------------------------- printf("Processing %d folder(s):\n", folder_count); for (int i = 0; i < folder_count; ++i) { printf(" - %s\n", folders[i]); } // ------------------------------- // Creating a general purpose arena // ------------------------------- arena_params params = { .reserve_size = GiB(1), .commit_size = MiB(16), .align = 0, .push_size = 0, .allow_free_list = true, .allow_swapback = false, .growth_policy = ARENA_GROWTH_NORMAL, .commit_policy = ARENA_COMMIT_LAZY, .max_nbre_blocks = 1, }; mem_arena *gp_arena = arena_create(¶ms); // ------------------------------- // Detect hardware threads (CPU cores) // ------------------------------- size_t hw_threads = 1; // --- Windows: detect PHYSICAL cores (not logical threads) --- DWORD len = 0; GetLogicalProcessorInformation(NULL, &len); SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)arena_push(&gp_arena, len, true); if (GetLogicalProcessorInformation(buf, &len)) { DWORD count = 0; DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); for (DWORD i = 0; i < n; i++) { if (buf[i].Relationship == RelationProcessorCore) count++; } if (count > 0) hw_threads = count; } arena_free(&gp_arena, (u8 **)&buf, len); // Add some extra threads to overlap I/O more aggressively u8 num_threads = hw_threads * 2; if (num_threads < 2) num_threads = 2; // ------------------------------- // Step 1: Scan all folders // ------------------------------- mpmc_init(&g_file_queue, MiB(1)); DirQueue q; memset(&q, 0, sizeof(q)); InitializeCriticalSection(&q.cs); InitializeConditionVariable(&q.cv); q.active = 0; // starting hash threads WorkerContext workers[num_threads]; for (int i = 0; i < num_threads; i++) { workers[i].queue = &g_file_queue; workers[i].arena = arena_create(¶ms); } HANDLE *hash_threads = arena_push(&gp_arena, sizeof(HANDLE) * num_threads, true); for (size_t i = 0; i < num_threads; ++i) { hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL); } // starting scan threads HANDLE progress = CreateThread(NULL, 0, progress_thread, NULL, 0, NULL); for (int i = 0; i < folder_count; ++i) { dirqueue_push(&q, folders[i]); } size_t scan_threads = hw_threads; if (scan_threads < 2) scan_threads = 2; HANDLE *scan_tids = arena_push(&gp_arena, sizeof(HANDLE) * scan_threads, true); for (size_t i = 0; i < scan_threads; ++i) { scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL); } WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE); mpmc_producers_finished(&g_file_queue, num_threads); atomic_store(&g_scan_done, 1); for (size_t i = 0; i < scan_threads; ++i) CloseHandle(scan_tids[i]); arena_free(&gp_arena, (u8 **)&scan_tids, sizeof(HANDLE) * scan_threads); double scan_seconds = timer_stop(&scan_timer); size_t total_found = atomic_load(&g_files_found); printf("\r%*s\r", 120, ""); // clear_console_line printf("Completed scanning in %.2f seconds, found %zu files\n\n", scan_seconds, total_found); // if no files found if (total_found == 0) { printf("No files found.\n"); return 0; } // stop hashing threads WaitForMultipleObjects((DWORD)num_threads, hash_threads, TRUE, INFINITE); for (size_t i = 0; i < num_threads; ++i) CloseHandle(hash_threads[i]); arena_free(&gp_arena, (u8 **)&hash_threads, sizeof(HANDLE) * num_threads); WaitForSingleObject(progress, INFINITE); CloseHandle(progress); // write file_hashes.txt // FILE *f = fopen(FILE_HASHES_TXT, "wb"); // // for (int i = 0; i < num_threads; i++) { // mem_arena *arena = workers[i].arena; // // u8 *arena_base = // (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align); // fwrite(arena_base, 1, arena->pos, f); // } // // fclose(f); HANDLE h = CreateFileA(FILE_HASHES_TXT, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); for (int i = 0; i < num_threads; i++) { mem_arena *local_hash_arena = workers[i].arena; DWORD written; u8 *arena_base = (u8 *)local_hash_arena + ALIGN_UP_POW2(sizeof(mem_arena), local_hash_arena->align); WriteFile(h, arena_base, (DWORD)local_hash_arena->pos, &written, NULL); } // done time double total_seconds = timer_stop(&total_timer); printf("Completed hashing %zu files\n", total_found); uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed); double total_mb = (double)total_bytes / (1024.0 * 1024.0); double avg_mbps = total_mb / total_seconds; printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); printf(" Total time : %.2f seconds\n", total_seconds); return 0; }