#include "platform.h" // ----------------------------- Globals ------------------------------------ FileEntry *g_entries = NULL; size_t g_entry_count = 0; size_t g_entry_capacity = 0; static atomic_int g_scan_done = 0; static atomic_size_t g_files_found = 0; static atomic_uint_fast64_t g_bytes_processed = 0; // __________________________________________________________________________ static CRITICAL_SECTION g_entries_cs; // ----------------------------- Utils -------------------------------------- static void perror_exit(const char *msg) { perror(msg); exit(1); } static void *xmalloc(size_t n) { void *p = malloc(n); if (!p) perror_exit("malloc"); return p; } static void global_entries_push(const FileEntry *src) { if (g_entry_count == g_entry_capacity) { size_t newcap = g_entry_capacity ? g_entry_capacity * 2 : 1024; g_entries = realloc(g_entries, newcap * sizeof(FileEntry)); if (!g_entries) perror_exit("realloc"); g_entry_capacity = newcap; } FileEntry *dst = &g_entries[g_entry_count++]; memset(dst, 0, sizeof(*dst)); dst->size_bytes = src->size_bytes; dst->created_time = src->created_time; dst->modified_time = src->modified_time; dst->path = strdup(src->path); strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1); } static void free_entries(void) { for (size_t i = 0; i < g_entry_count; ++i) { free(g_entries[i].path); } free(g_entries); g_entries = NULL; g_entry_count = 0; g_entry_capacity = 0; } // ----------------------------- Convert filetime to epoch -------------- static uint64_t filetime_to_epoch(const FILETIME *ft) { ULARGE_INTEGER ull; ull.LowPart = ft->dwLowDateTime; ull.HighPart = ft->dwHighDateTime; // Windows epoch (1601) → Unix epoch (1970) return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL; } // ----------------------------- Resolve file owner --------------------- static void get_file_owner(const char *path, char *out, size_t out_sz) { PSID sid = NULL; PSECURITY_DESCRIPTOR sd = NULL; if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION, &sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) { char name[64], domain[64]; DWORD name_len = sizeof(name); DWORD domain_len = sizeof(domain); SID_NAME_USE use; if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len, &use)) { snprintf(out, out_sz, "%s\\%s", domain, name); } else { snprintf(out, out_sz, "UNKNOWN"); } } else { snprintf(out, out_sz, "UNKNOWN"); } if (sd) LocalFree(sd); } // ----------------------------- Format time helper ------------------------- static void format_time(uint64_t t, char *out, size_t out_sz) { if (t == 0) { snprintf(out, out_sz, "N/A"); return; } time_t tt = (time_t)t; struct tm tm; #if PLATFORM_WINDOWS localtime_s(&tm, &tt); #else localtime_r(&tt, &tm); #endif strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); } // --------------- parallel directory scanning ---------------- static void entrybuf_init(EntryBuffer *b) { b->entries = NULL; b->count = 0; b->capacity = 0; } static void entrybuf_push(EntryBuffer *b, const FileEntry *src) { if (b->count == b->capacity) { size_t newcap = b->capacity ? b->capacity * 2 : 256; b->entries = realloc(b->entries, newcap * sizeof(FileEntry)); if (!b->entries) perror_exit("realloc"); b->capacity = newcap; } FileEntry *dst = &b->entries[b->count++]; memset(dst, 0, sizeof(*dst)); dst->size_bytes = src->size_bytes; dst->created_time = src->created_time; dst->modified_time = src->modified_time; dst->path = strdup(src->path); strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1); } // Add queue helper functions static void dirqueue_push(DirQueue *q, const char *path) { EnterCriticalSection(&q->cs); if (q->count + 1 > q->cap) { q->cap = q->cap ? q->cap * 2 : 1024; q->items = realloc(q->items, q->cap * sizeof(char *)); } q->items[q->count++] = _strdup(path); WakeConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } static char *dirqueue_pop(DirQueue *q) { EnterCriticalSection(&q->cs); while (q->count == 0 && q->active > 0) { SleepConditionVariableCS(&q->cv, &q->cs, INFINITE); } if (q->count == 0 && q->active == 0) { LeaveCriticalSection(&q->cs); return NULL; // truly done } char *dir = q->items[--q->count]; q->active++; LeaveCriticalSection(&q->cs); return dir; } static void dirqueue_done(DirQueue *q) { EnterCriticalSection(&q->cs); q->active--; WakeAllConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } static DWORD WINAPI scan_worker(LPVOID arg) { DirQueue *q = (DirQueue *)arg; EntryBuffer local; entrybuf_init(&local); for (;;) { char *dir = dirqueue_pop(q); if (!dir) break; scan_folder_windows_parallel(dir, q, &local); // debug // printf("[T%lu] scanning %s\n", GetCurrentThreadId(), dir); // debug free(dir); dirqueue_done(q); } // merge once at end EnterCriticalSection(&g_entries_cs); if (g_entry_count + local.count > g_entry_capacity) { g_entry_capacity = g_entry_count + local.count; g_entries = realloc(g_entries, g_entry_capacity * sizeof(FileEntry)); if (!g_entries) perror_exit("realloc"); } memcpy(&g_entries[g_entry_count], local.entries, local.count * sizeof(FileEntry)); g_entry_count += local.count; LeaveCriticalSection(&g_entries_cs); free(local.entries); return 0; } // Scanning directory function void scan_folder_windows_parallel(const char *base, DirQueue *q, EntryBuffer *buf) { char search[MAX_PATHLEN]; snprintf(search, sizeof(search), "%s\\*", base); WIN32_FIND_DATAA fd; HANDLE h = FindFirstFileA(search, &fd); if (h == INVALID_HANDLE_VALUE) return; do { if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, "..")) continue; char full[MAX_PATHLEN]; snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName); if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) continue; if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { dirqueue_push(q, full); } else { atomic_fetch_add(&g_files_found, 1); FileEntry fe; memset(&fe, 0, sizeof(fe)); char norm[MAX_PATHLEN]; strncpy(norm, full, sizeof(norm) - 1); norm[sizeof(norm) - 1] = 0; normalize_path(norm); fe.path = norm; platform_get_file_times(full, &fe.created_time, &fe.modified_time); platform_get_file_owner(full, fe.owner, sizeof(fe.owner)); LARGE_INTEGER size; HANDLE hf = CreateFileA(full, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hf != INVALID_HANDLE_VALUE) { if (GetFileSizeEx(hf, &size)) fe.size_bytes = (uint64_t)size.QuadPart; CloseHandle(hf); } entrybuf_push(buf, &fe); } } while (FindNextFileA(h, &fd)); FindClose(h); } // Scan progress thread static DWORD WINAPI scan_progress_thread(LPVOID arg) { (void)arg; for (;;) { if (atomic_load(&g_scan_done)) break; Sleep(100); // 0.2 seconds size_t count = atomic_load(&g_files_found); printf("\rScanning... %zu files found", count); fflush(stdout); } return 0; } // ----------------------------- Job queue ---------------------------------- static void jobqueue_init(JobQueue *q) { q->head = q->tail = NULL; atomic_store(&q->count, 0); q->stop = 0; InitializeCriticalSection(&q->cs); InitializeConditionVariable(&q->cv); } static void jobqueue_push(JobQueue *q, Job *job) { EnterCriticalSection(&q->cs); job->next = NULL; if (q->tail) q->tail->next = job; else q->head = job; q->tail = job; atomic_fetch_add(&q->count, 1); WakeConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } static Job *jobqueue_pop(JobQueue *q) { EnterCriticalSection(&q->cs); while (!q->head && !q->stop) SleepConditionVariableCS(&q->cv, &q->cs, INFINITE); if (q->stop && !q->head) { LeaveCriticalSection(&q->cs); return NULL; } Job *j = q->head; q->head = j->next; if (!q->head) q->tail = NULL; LeaveCriticalSection(&q->cs); if (j) atomic_fetch_sub(&q->count, 1); return j; } static void jobqueue_stop(JobQueue *q) { EnterCriticalSection(&q->cs); q->stop = 1; WakeAllConditionVariable(&q->cv); LeaveCriticalSection(&q->cs); } // ----------------------------- Hashing helpers ----------------------------- static void xxh3_hash_file_stream(const char *path, char *out_hex) { // compute XXH3_128 over file. POSIX and Windows use standard reads in this // helper. // On Windows try to use overlapped synchronous chunked reads for higher // throughput. HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (hFile == INVALID_HANDLE_VALUE) { strcpy(out_hex, "ERROR"); return; } XXH128_hash_t h; XXH3_state_t *state = XXH3_createState(); XXH3_128bits_reset(state); BYTE *buf = (BYTE *)malloc(READ_BLOCK); DWORD read = 0; BOOL ok; while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) { XXH3_128bits_update(state, buf, (size_t)read); atomic_fetch_add(&g_bytes_processed, (uint64_t)read); } h = XXH3_128bits_digest(state); XXH3_freeState(state); CloseHandle(hFile); free(buf); snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, (unsigned long long)h.low64); } // ----------------------------- Worker -------------------------------------- static DWORD WINAPI worker_thread_windows(LPVOID argp) { WorkerArg *w = (WorkerArg *)argp; JobQueue *q = w->queue; for (;;) { Job *job = jobqueue_pop(q); if (!job) break; char hex[HASH_STRLEN]; // On Windows we use overlapped ReadFile for large files would be better, // but ReadFile with NULL overlapped is sufficient inside parallel threads. xxh3_hash_file_stream(job->file->path, hex); // append to hashes file using a critical section to avoid races static CRITICAL_SECTION append_cs; static LONG init = 0; if (InterlockedCompareExchange(&init, 1, 1) == 0) { // first time initialize InitializeCriticalSection(&append_cs); InterlockedExchange(&init, 1); } EnterCriticalSection(&append_cs); FILE *hf = fopen(FILE_HASHES_TXT, "a"); if (hf) { char created[32], modified[32]; format_time(job->file->created_time, created, sizeof(created)); format_time(job->file->modified_time, modified, sizeof(modified)); double size_kib = (double)job->file->size_bytes / (1024.0); fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib, created, modified, job->file->owner); fclose(hf); } LeaveCriticalSection(&append_cs); atomic_fetch_add(w->done_counter, 1); free(job); } atomic_fetch_sub(w->live_workers, 1); return 0; } // ----------------------------- Progress display --------------------------- static void print_progress(size_t done, size_t total) { const int barw = 40; double pct = total ? (double)done / (double)total : 0.0; int filled = (int)(pct * barw + 0.5); printf("\r["); for (int i = 0; i < filled; ++i) putchar('#'); for (int i = filled; i < barw; ++i) putchar(' '); printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total); fflush(stdout); } // ----------------------------- Helpers: load/save -------------------------- static int file_exists(const char *path) { DWORD attr = GetFileAttributesA(path); return attr != INVALID_FILE_ATTRIBUTES; } static void save_file_list(const char *list_path) { FILE *f = fopen(list_path, "w"); if (!f) { perror("fopen file_list"); return; } for (size_t i = 0; i < g_entry_count; ++i) { fprintf(f, "%s\n", g_entries[i].path); } fclose(f); } // ----------------------------- Get file metadata ------------------------- void platform_get_file_times(const char *path, uint64_t *out_created, uint64_t *out_modified) { WIN32_FILE_ATTRIBUTE_DATA fad; if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { *out_created = filetime_to_epoch(&fad.ftCreationTime); *out_modified = filetime_to_epoch(&fad.ftLastWriteTime); } else { *out_created = 0; *out_modified = 0; } } void platform_get_file_owner(const char *path, char *out_owner, size_t out_owner_size) { get_file_owner(path, out_owner, out_owner_size); } // ----------------------------- Main --------------------------------------- int main(int argc, char **argv) { char folders[64][MAX_PATHLEN]; // up to 64 input folders int folder_count = 0; // ------------------------------- // Scanning and total timer init // ------------------------------- timer_init(); HiResTimer total_timer; HiResTimer scan_timer; HiResTimer hash_timer; timer_start(&total_timer); timer_start(&scan_timer); // ------------------------------- // Parse arguments // ------------------------------- for (int i = 1; i < argc; ++i) { if (folder_count < 64) { strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); folders[folder_count][MAX_PATHLEN - 1] = 0; folder_count++; } } // ------------------------------- // Ask user if no folders provided // ------------------------------- if (folder_count == 0) { printf("Enter folder to process (Enter = current folder): "); fflush(stdout); char buf[MAX_PATHLEN]; if (!fgets(buf, sizeof(buf), stdin)) return 1; buf[strcspn(buf, "\r\n")] = 0; if (buf[0] == 0) strcpy(folders[0], "."); else strncpy(folders[0], buf, MAX_PATHLEN - 1); folder_count = 1; } // ------------------------------- // Display selected folders // ------------------------------- printf("Processing %d folder(s):\n", folder_count); for (int i = 0; i < folder_count; ++i) { printf(" - %s\n", folders[i]); } // ------------------------------- // Detect hardware threads (CPU cores) // ------------------------------- size_t hw_threads = 1; // --- Windows: detect PHYSICAL cores (not logical threads) --- DWORD len = 0; GetLogicalProcessorInformation(NULL, &len); SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)malloc(len); if (GetLogicalProcessorInformation(buf, &len)) { DWORD count = 0; DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); for (DWORD i = 0; i < n; i++) { if (buf[i].Relationship == RelationProcessorCore) count++; } if (count > 0) hw_threads = count; } free(buf); // Add some extra threads to overlap I/O more aggressively size_t num_threads = hw_threads * 2; if (num_threads < 2) num_threads = 2; // ------------------------------- // Step 1: Scan all folders // ------------------------------- InitializeCriticalSection(&g_entries_cs); DirQueue q; memset(&q, 0, sizeof(q)); InitializeCriticalSection(&q.cs); InitializeConditionVariable(&q.cv); q.active = 0; HANDLE scan_progress = CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL); for (int i = 0; i < folder_count; ++i) { dirqueue_push(&q, folders[i]); } size_t scan_threads = hw_threads; if (scan_threads < 2) scan_threads = 2; HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads); for (size_t i = 0; i < scan_threads; ++i) { scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL); } WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE); atomic_store(&g_scan_done, 1); WaitForSingleObject(scan_progress, INFINITE); CloseHandle(scan_progress); for (size_t i = 0; i < scan_threads; ++i) CloseHandle(scan_tids[i]); free(scan_tids); double scan_seconds = timer_stop(&scan_timer); double scan_rate = (double)g_entry_count / scan_seconds; printf(". Scan rate : %.1f files/sec\n", scan_rate); printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds, FILE_LIST_TXT); save_file_list(FILE_LIST_TXT); if (g_entry_count == 0) { printf("No files to process.\n"); return 0; } DeleteCriticalSection(&g_entries_cs); // Prepare job queue JobQueue queue; jobqueue_init(&queue); size_t total_jobs = 0; for (size_t i = 0; i < g_entry_count; ++i) { Job *j = (Job *)malloc(sizeof(Job)); j->file = &g_entries[i]; j->next = NULL; jobqueue_push(&queue, j); ++total_jobs; } if (total_jobs == 0) { printf("Nothing to do — all files already hashed.\n"); return 0; } FILE *hf = fopen(FILE_HASHES_TXT, "w"); if (hf) fclose(hf); // Starting thread pool atomic_size_t done_counter; atomic_store(&done_counter, 0); atomic_int live_workers; atomic_store(&live_workers, (int)num_threads); WorkerArg warg = {.queue = &queue, .done_counter = &done_counter, .total_jobs = total_jobs, .live_workers = &live_workers}; printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads, hw_threads); // Launch threads HANDLE *tids = malloc(sizeof(HANDLE) * num_threads); for (size_t i = 0; i < num_threads; ++i) { tids[i] = CreateThread(NULL, 0, worker_thread_windows, &warg, 0, NULL); } // Progress / timer struct timespec tstart, tnow; // fallback for windows LARGE_INTEGER freq, start_li; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&start_li); size_t last_done = 0; // --------------- Hashing speed MB/s ---------------- uint64_t last_bytes = atomic_load(&g_bytes_processed); double last_time = 0.0; double displayed_speed = 0.0; const double sample_interval = 0.5; char linebuf[256]; for (;;) { size_t done = (size_t)atomic_load(&done_counter); // ---- monotonic time ---- LARGE_INTEGER now_li; QueryPerformanceCounter(&now_li); double now = (double)(now_li.QuadPart - start_li.QuadPart) / (double)freq.QuadPart; // ---- total processed bytes ---- uint64_t bytes = atomic_load(&g_bytes_processed); // ---- real sampler (independent of UI sleep) ---- if (last_time == 0.0) { last_time = now; last_bytes = bytes; } double dt = now - last_time; if (dt >= sample_interval) { uint64_t db = bytes - last_bytes; if (db > 0 && dt > 0.0001) { displayed_speed = (double)db / (1024.0 * 1024.0) / dt; } last_bytes = bytes; last_time = now; } // ---- progress bar build ---- const int barw = 40; double pct = total_jobs ? (double)done / (double)total_jobs : 0.0; int filled = (int)(pct * barw + 0.5); int p = 0; p += snprintf(linebuf + p, sizeof(linebuf) - p, "["); for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i) p += snprintf(linebuf + p, sizeof(linebuf) - p, "#"); for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i) p += snprintf(linebuf + p, sizeof(linebuf) - p, "."); snprintf(linebuf + p, sizeof(linebuf) - p, "] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs, displayed_speed); printf("\r%s", linebuf); fflush(stdout); if (done >= total_jobs) break; Sleep(100); } printf("\n\n"); // stop queue and join threads jobqueue_stop(&queue); WaitForMultipleObjects((DWORD)num_threads, tids, TRUE, INFINITE); for (size_t i = 0; i < num_threads; ++i) CloseHandle(tids[i]); // done time LARGE_INTEGER end_li; QueryPerformanceCounter(&end_li); double elapsed = (double)(end_li.QuadPart - start_li.QuadPart) / (double)freq.QuadPart; double total_seconds = timer_stop(&total_timer); printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed); uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed); double total_mb = (double)total_bytes / (1024.0 * 1024.0); double avg_mbps = total_mb / elapsed; printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); printf(" Total time : %.2f seconds\n", total_seconds); return 0; }