#include "platform.h" // ----------------------------- Globals ------------------------------------ static atomic_uint_fast64_t g_files_found = 0; static atomic_uint_fast64_t g_files_hashed = 0; static atomic_uint_fast64_t g_bytes_processed = 0; static atomic_int g_scan_done = 0; // __________________________________________________________________________ // ----------------------------- Utils -------------------------------------- static void perror_exit(const char *msg) { perror(msg); exit(1); } static void *xmalloc(size_t n) { void *p = malloc(n); if (!p) perror_exit("malloc"); return p; } // ----------------------------- Convert filetime to epoch -------------- static uint64_t filetime_to_epoch(const FILETIME *ft) { ULARGE_INTEGER ull; ull.LowPart = ft->dwLowDateTime; ull.HighPart = ft->dwHighDateTime; // Windows epoch (1601) → Unix epoch (1970) return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL; } // ----------------------------- Resolve file owner --------------------- static void get_file_owner(const char *path, char *out, size_t out_sz) { PSID sid = NULL; PSECURITY_DESCRIPTOR sd = NULL; if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION, &sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) { char name[64], domain[64]; DWORD name_len = sizeof(name); DWORD domain_len = sizeof(domain); SID_NAME_USE use; if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len, &use)) { snprintf(out, out_sz, "%s\\%s", domain, name); } else { snprintf(out, out_sz, "UNKNOWN"); } } else { snprintf(out, out_sz, "UNKNOWN"); } if (sd) LocalFree(sd); } // ----------------------------- Format time helper ------------------------- static void format_time(uint64_t t, char *out, size_t out_sz) { if (t == 0) { snprintf(out, out_sz, "N/A"); return; } time_t tt = (time_t)t; struct tm tm; #if PLATFORM_WINDOWS localtime_s(&tm, &tt); #else localtime_r(&tt, &tm); #endif strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); } // --------------- parallel directory scanning ---------------- void mpmc_init(MPMCQueue *q, size_t max_capacity) { if ((max_capacity & (max_capacity - 1)) != 0) { fprintf(stderr, "capacity must be power of two\n"); exit(1); } q->capacity = max_capacity; q->mask = max_capacity - 1; size_t bytes = sizeof(MPMCSlot) * max_capacity; q->slots = VirtualAlloc(NULL, bytes, MEM_RESERVE, PAGE_READWRITE); if (!q->slots) { fprintf(stderr, "VirtualAlloc reserve failed\n"); exit(1); } q->commit_step = (64ull * 1024 * 1024) / sizeof(MPMCSlot); q->committed = q->commit_step; VirtualAlloc(q->slots, q->commit_step * sizeof(MPMCSlot), MEM_COMMIT, PAGE_READWRITE); for (size_t i = 0; i < q->committed; i++) { atomic_init(&q->slots[i].seq, i); q->slots[i].data = NULL; } atomic_init(&q->head, 0); atomic_init(&q->tail, 0); } static void mpmc_commit_more(MPMCQueue *q) { size_t start = q->committed; size_t new_commit = start + q->commit_step; if (new_commit > q->capacity) new_commit = q->capacity; if (!atomic_compare_exchange_strong(&q->committed, &start, new_commit)) return; // another thread already committed size_t count = new_commit - start; VirtualAlloc(&q->slots[start], count * sizeof(MPMCSlot), MEM_COMMIT, PAGE_READWRITE); for (size_t i = start; i < new_commit; i++) { atomic_init(&q->slots[i].seq, i); q->slots[i].data = NULL; } } void mpmc_push(MPMCQueue *q, FileEntry *item) { MPMCSlot *slot; size_t pos; for (;;) { pos = atomic_load_explicit(&q->tail, memory_order_relaxed); slot = &q->slots[pos & q->mask]; size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire); intptr_t diff = (intptr_t)seq - (intptr_t)pos; if (diff == 0) { if (atomic_compare_exchange_weak_explicit(&q->tail, &pos, pos + 1, memory_order_relaxed, memory_order_relaxed)) break; } else if (diff < 0) { size_t committed = atomic_load_explicit(&q->committed, memory_order_relaxed); if (pos >= committed) { mpmc_commit_more(q); continue; } Sleep(1000); // queue actually full } else { Sleep(1000); } } slot->data = item; atomic_store_explicit(&slot->seq, pos + 1, memory_order_release); } FileEntry *mpmc_pop(MPMCQueue *q) { MPMCSlot *slot; size_t pos; int spins = 0; for (;;) { pos = atomic_load_explicit(&q->head, memory_order_relaxed); slot = &q->slots[pos & q->mask]; size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire); intptr_t diff = (intptr_t)seq - (intptr_t)(pos + 1); if (diff == 0) { if (atomic_compare_exchange_weak_explicit(&q->head, &pos, pos + 1, memory_order_relaxed, memory_order_relaxed)) break; } else if (diff < 0) { Sleep(1000); } else { if (++spins > 10) { SwitchToThread(); // yield CPU spins = 0; } else { _mm_pause(); } } } FileEntry *data = slot->data; atomic_store_explicit(&slot->seq, pos + q->capacity, memory_order_release); return data; } // Add queue helper functions static void dirqueue_push(DirQueue *q, const char *path) { EnterCriticalSection(&q->cs); if (q->count + 1 > q->cap) { q->cap = q->cap ? q->cap * 2 : 1024; q->items = realloc(q->items, q->cap * sizeof(char *)); } q->items[q->count++] = _strdup(path); WakeConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } static char *dirqueue_pop(DirQueue *q) { EnterCriticalSection(&q->cs); while (q->count == 0 && q->active > 0) { SleepConditionVariableCS(&q->cv, &q->cs, INFINITE); } if (q->count == 0 && q->active == 0) { LeaveCriticalSection(&q->cs); return NULL; // truly done } char *dir = q->items[--q->count]; q->active++; LeaveCriticalSection(&q->cs); return dir; } static void dirqueue_done(DirQueue *q) { EnterCriticalSection(&q->cs); q->active--; WakeAllConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } static DWORD WINAPI scan_worker(LPVOID arg) { DirQueue *q = (DirQueue *)arg; for (;;) { char *dir = dirqueue_pop(q); if (!dir) break; scan_folder_windows_parallel(dir, q); free(dir); dirqueue_done(q); } return 0; } // Scanning directory function void scan_folder_windows_parallel(const char *base, DirQueue *q) { char search[MAX_PATHLEN]; snprintf(search, sizeof(search), "%s\\*", base); WIN32_FIND_DATAA fd; HANDLE h = FindFirstFileA(search, &fd); if (h == INVALID_HANDLE_VALUE) return; do { if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, "..")) continue; char full[MAX_PATHLEN]; snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName); if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) continue; if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { dirqueue_push(q, full); } else { atomic_fetch_add(&g_files_found, 1); FileEntry *fe = malloc(sizeof(FileEntry)); memset(fe, 0, sizeof(FileEntry)); char norm[MAX_PATHLEN]; strncpy(norm, full, sizeof(norm) - 1); norm[sizeof(norm) - 1] = 0; normalize_path(norm); fe->path = _strdup(norm); platform_get_file_times(full, &fe->created_time, &fe->modified_time); platform_get_file_owner(full, fe->owner, sizeof(fe->owner)); LARGE_INTEGER size; HANDLE hf = CreateFileA(full, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hf != INVALID_HANDLE_VALUE) { if (GetFileSizeEx(hf, &size)) fe->size_bytes = (uint64_t)size.QuadPart; CloseHandle(hf); } mpmc_push(&g_file_queue, fe); } } while (FindNextFileA(h, &fd)); FindClose(h); } // ----------------------------- Hashing helpers ----------------------------- static void xxh3_hash_file_stream(const char *path, char *out_hex) { // compute XXH3_128 over file. POSIX and Windows use standard reads in this // helper. // On Windows try to use overlapped synchronous chunked reads for higher // throughput. HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (hFile == INVALID_HANDLE_VALUE) { strcpy(out_hex, "ERROR"); return; } XXH128_hash_t h; XXH3_state_t *state = XXH3_createState(); XXH3_128bits_reset(state); BYTE *buf = (BYTE *)malloc(READ_BLOCK); DWORD read = 0; BOOL ok; while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) { XXH3_128bits_update(state, buf, (size_t)read); atomic_fetch_add(&g_bytes_processed, (uint64_t)read); } h = XXH3_128bits_digest(state); XXH3_freeState(state); CloseHandle(hFile); free(buf); snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, (unsigned long long)h.low64); } // ------------------------- Hash worker -------------------------------- static DWORD WINAPI hash_worker(LPVOID arg) { MPMCQueue *q = (MPMCQueue *)arg; for (;;) { FileEntry *fe = mpmc_pop(q); if (!fe) break; // poison pill char hash[HASH_STRLEN]; xxh3_hash_file_stream(fe->path, hash); atomic_fetch_add(&g_files_hashed, 1); free(fe->path); free(fe); } return 0; } // ----------------------------- Progress display --------------------------- DWORD WINAPI progress_thread(void *arg) { LARGE_INTEGER freq, start; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&start); uint64_t last_bytes = atomic_load(&g_bytes_processed); double last_time = 0.0; double displayed_speed = 0.0; const double sample_interval = 0.5; for (;;) { uint64_t found = atomic_load(&g_files_found); uint64_t hashed = atomic_load(&g_files_hashed); uint64_t bytes = atomic_load(&g_bytes_processed); int scan_done = atomic_load(&g_scan_done); LARGE_INTEGER now; QueryPerformanceCounter(&now); double t = (double)(now.QuadPart - start.QuadPart) / (double)freq.QuadPart; if (last_time == 0.0) { last_time = t; last_bytes = bytes; } double dt = t - last_time; if (dt >= sample_interval) { uint64_t db = bytes - last_bytes; if (db > 0 && dt > 0.0001) { displayed_speed = (double)db / (1024.0 * 1024.0) / dt; } last_bytes = bytes; last_time = t; } if (!scan_done) { printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ", (unsigned long long)found, (unsigned long long)hashed, displayed_speed); } else { double pct = found ? (double)hashed / (double)found : 0.0; int barw = 40; int filled = (int)(pct * barw); char bar[64]; int p = 0; bar[p++] = '['; for (int i = 0; i < filled; i++) bar[p++] = '#'; for (int i = filled; i < barw; i++) bar[p++] = '.'; bar[p++] = ']'; bar[p] = 0; printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0, (unsigned long long)hashed, (unsigned long long)found, displayed_speed); } fflush(stdout); if (scan_done && hashed == found) break; Sleep(100); } printf("\n"); return 0; } // ----------------------------- Get file metadata ------------------------- void platform_get_file_times(const char *path, uint64_t *out_created, uint64_t *out_modified) { WIN32_FILE_ATTRIBUTE_DATA fad; if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { *out_created = filetime_to_epoch(&fad.ftCreationTime); *out_modified = filetime_to_epoch(&fad.ftLastWriteTime); } else { *out_created = 0; *out_modified = 0; } } void platform_get_file_owner(const char *path, char *out_owner, size_t out_owner_size) { get_file_owner(path, out_owner, out_owner_size); } // ----------------------------- Main --------------------------------------- int main(int argc, char **argv) { char folders[64][MAX_PATHLEN]; // up to 64 input folders int folder_count = 0; // ------------------------------- // Scanning and total timer init // ------------------------------- timer_init(); HiResTimer total_timer; HiResTimer scan_timer; timer_start(&total_timer); timer_start(&scan_timer); // ------------------------------- // Parse arguments // ------------------------------- for (int i = 1; i < argc; ++i) { if (folder_count < 64) { strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); folders[folder_count][MAX_PATHLEN - 1] = 0; folder_count++; } } // ------------------------------- // Ask user if no folders provided // ------------------------------- if (folder_count == 0) { printf("Enter folder to process (Enter = current folder): "); fflush(stdout); char buf[MAX_PATHLEN]; if (!fgets(buf, sizeof(buf), stdin)) return 1; buf[strcspn(buf, "\r\n")] = 0; if (buf[0] == 0) strcpy(folders[0], "."); else strncpy(folders[0], buf, MAX_PATHLEN - 1); folder_count = 1; } // ------------------------------- // Display selected folders // ------------------------------- printf("Processing %d folder(s):\n", folder_count); for (int i = 0; i < folder_count; ++i) { printf(" - %s\n", folders[i]); } // ------------------------------- // Detect hardware threads (CPU cores) // ------------------------------- size_t hw_threads = 1; // --- Windows: detect PHYSICAL cores (not logical threads) --- DWORD len = 0; GetLogicalProcessorInformation(NULL, &len); SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)malloc(len); if (GetLogicalProcessorInformation(buf, &len)) { DWORD count = 0; DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); for (DWORD i = 0; i < n; i++) { if (buf[i].Relationship == RelationProcessorCore) count++; } if (count > 0) hw_threads = count; } free(buf); // Add some extra threads to overlap I/O more aggressively size_t num_threads = hw_threads * 2; if (num_threads < 2) num_threads = 2; // ------------------------------- // Step 1: Scan all folders // ------------------------------- mpmc_init(&g_file_queue, 1024 * 1024 * 1024); DirQueue q; memset(&q, 0, sizeof(q)); InitializeCriticalSection(&q.cs); InitializeConditionVariable(&q.cv); q.active = 0; // starting hash threads HANDLE *hash_threads = malloc(sizeof(HANDLE) * num_threads); for (size_t i = 0; i < num_threads; ++i) { hash_threads[i] = CreateThread(NULL, 0, hash_worker, &g_file_queue, 0, NULL); } // starting scan threads HANDLE progress = CreateThread(NULL, 0, progress_thread, NULL, 0, NULL); for (int i = 0; i < folder_count; ++i) { dirqueue_push(&q, folders[i]); } size_t scan_threads = hw_threads; if (scan_threads < 2) scan_threads = 2; HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads); for (size_t i = 0; i < scan_threads; ++i) { scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL); } WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE); // mpmc_finish(&g_file_queue); // debug for (size_t i = 0; i < num_threads; i++) { mpmc_push(&g_file_queue, NULL); } atomic_store(&g_scan_done, 1); for (size_t i = 0; i < scan_threads; ++i) CloseHandle(scan_tids[i]); free(scan_tids); double scan_seconds = timer_stop(&scan_timer); size_t total_found = atomic_load(&g_files_found); printf("\r%*s\r", 120, ""); // clear_console_line printf("Completed scanning in %.2f seconds, found %zu files\n\n", scan_seconds, total_found); // if no files found if (total_found == 0) { printf("No files found.\n"); return 0; } // stop hashing threads WaitForMultipleObjects((DWORD)num_threads, hash_threads, TRUE, INFINITE); for (size_t i = 0; i < num_threads; ++i) CloseHandle(hash_threads[i]); free(hash_threads); // free(g_file_queue.items); WaitForSingleObject(progress, INFINITE); CloseHandle(progress); // done time double total_seconds = timer_stop(&total_timer); printf("Completed hashing %zu files\n", total_found); uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed); double total_mb = (double)total_bytes / (1024.0 * 1024.0); double avg_mbps = total_mb / total_seconds; printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); printf(" Total time : %.2f seconds\n", total_seconds); return 0; }