#include "platform.h" // ----------------------------- Globals ------------------------------------ static atomic_uint_fast64_t g_bytes_processed = 0; FileEntry *g_entries = NULL; size_t g_entry_count = 0; size_t g_entry_capacity = 0; // ----------------------------- Utils -------------------------------------- static void perror_exit(const char *msg) { perror(msg); exit(1); } static void *xmalloc(size_t n) { void *p = malloc(n); if (!p) perror_exit("malloc"); return p; } static void add_entry(const FileEntry *src) { if (g_entry_count + 1 > g_entry_capacity) { g_entry_capacity = g_entry_capacity ? g_entry_capacity * 2 : 1024; g_entries = realloc(g_entries, sizeof(FileEntry) * g_entry_capacity); if (!g_entries) perror_exit("realloc"); } FileEntry *dst = &g_entries[g_entry_count++]; memset(dst, 0, sizeof(*dst)); dst->size_bytes = src->size_bytes; dst->created_time = src->created_time; dst->modified_time = src->modified_time; if (src->path) dst->path = strdup(src->path); strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1); dst->owner[sizeof(dst->owner) - 1] = '\0'; } static void free_entries(void) { for (size_t i = 0; i < g_entry_count; ++i) { free(g_entries[i].path); } free(g_entries); g_entries = NULL; g_entry_count = 0; g_entry_capacity = 0; } // ----------------------------- Owner lookup ------------------------------ static void get_file_owner(uid_t uid, char *out, size_t out_sz) { struct passwd *pw = getpwuid(uid); if (pw) { snprintf(out, out_sz, "%s", pw->pw_name); } else { snprintf(out, out_sz, "UNKNOWN"); } } // ----------------------------- Format time helper ------------------------- static void format_time(uint64_t t, char *out, size_t out_sz) { if (t == 0) { snprintf(out, out_sz, "N/A"); return; } time_t tt = (time_t)t; struct tm tm; #if PLATFORM_WINDOWS localtime_s(&tm, &tt); #else localtime_r(&tt, &tm); #endif strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); } // --------------- parallel directory scanning ---------------- // Add queue helper functions static void dirqueue_push(DirQueue *q, const char *path) { DirJob *job = malloc(sizeof(*job)); job->path = strdup(path); job->next = NULL; pthread_mutex_lock(&q->mutex); if (q->tail) q->tail->next = job; else q->head = job; q->tail = job; pthread_cond_signal(&q->cond); pthread_mutex_unlock(&q->mutex); } static char *dirqueue_pop(DirQueue *q) { pthread_mutex_lock(&q->mutex); while (!q->head && !q->stop) pthread_cond_wait(&q->cond, &q->mutex); if (q->stop) { pthread_mutex_unlock(&q->mutex); return NULL; } DirJob *job = q->head; q->head = job->next; if (!q->head) q->tail = NULL; q->active_workers++; pthread_mutex_unlock(&q->mutex); char *path = job->path; free(job); return path; } static void dirqueue_done(DirQueue *q) { pthread_mutex_lock(&q->mutex); q->active_workers--; if (!q->head && q->active_workers == 0) { q->stop = 1; pthread_cond_broadcast(&q->cond); } pthread_mutex_unlock(&q->mutex); } // Scanning directory worker thread function static void scan_worker(void *arg) { DirQueue *q = arg; for (;;) { char *dir = dirqueue_pop(q); if (!dir) break; scan_folder_posix_parallel(dir, q); free(dir); dirqueue_done(q); } } // Scanning directory function void scan_folder_posix_parallel(const char *base, DirQueue *q) { DIR *d = opendir(base); if (!d) return; struct dirent *ent; while ((ent = readdir(d))) { if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) continue; char full[MAX_PATHLEN]; snprintf(full, sizeof(full), "%s/%s", base, ent->d_name); struct stat st; if (lstat(full, &st) != 0) continue; if (S_ISDIR(st.st_mode)) { dirqueue_push(q, full); } else if (S_ISREG(st.st_mode)) { FileEntry fe; memset(&fe, 0, sizeof(fe)); normalize_path(full); fe.path = full; fe.size_bytes = (uint64_t)st.st_size; fe.created_time = (uint64_t)st.st_ctime; fe.modified_time = (uint64_t)st.st_mtime; get_file_owner(st.st_uid, fe.owner, sizeof(fe.owner)); add_entry(&fe); } } closedir(d); } // ----------------------------- Job queue ---------------------------------- static void jobqueue_init(JobQueue *q) { q->head = q->tail = NULL; atomic_store(&q->count, 0); q->stop = 0; pthread_mutex_init(&q->mutex, NULL); pthread_cond_init(&q->cond, NULL); } static void jobqueue_push(JobQueue *q, Job *job) { pthread_mutex_lock(&q->mutex); job->next = NULL; if (q->tail) q->tail->next = job; else q->head = job; q->tail = job; atomic_fetch_add(&q->count, 1); pthread_cond_signal(&q->cond); pthread_mutex_unlock(&q->mutex); } static Job *jobqueue_pop(JobQueue *q) { pthread_mutex_lock(&q->mutex); while (!q->head && !q->stop) pthread_cond_wait(&q->cond, &q->mutex); if (q->stop && !q->head) { pthread_mutex_unlock(&q->mutex); return NULL; } Job *j = q->head; q->head = j->next; if (!q->head) q->tail = NULL; pthread_mutex_unlock(&q->mutex); if (j) atomic_fetch_sub(&q->count, 1); return j; } static void jobqueue_stop(JobQueue *q) { pthread_mutex_lock(&q->mutex); q->stop = 1; pthread_cond_broadcast(&q->cond); pthread_mutex_unlock(&q->mutex); } // ----------------------------- Hashing helpers ----------------------------- static void xxh3_hash_file_stream(const char *path, char *out_hex) { // compute XXH3_128 over file. POSIX and Windows use standard reads in this // helper. int fd = open(path, O_RDONLY); if (fd < 0) { strcpy(out_hex, "ERROR"); return; } XXH128_hash_t h; XXH3_state_t *state = XXH3_createState(); XXH3_128bits_reset(state); unsigned char *buf = (unsigned char *)malloc(READ_BLOCK); ssize_t r; while ((r = read(fd, buf, READ_BLOCK)) > 0) { XXH3_128bits_update(state, buf, (size_t)r); atomic_fetch_add(&g_bytes_processed, (uint64_t)r); } h = XXH3_128bits_digest(state); XXH3_freeState(state); close(fd); free(buf); snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, (unsigned long long)h.low64); } // ----------------------------- Worker -------------------------------------- static void *worker_thread_posix(void *argp) { WorkerArg *w = (WorkerArg *)argp; JobQueue *q = w->queue; for (;;) { Job *job = jobqueue_pop(q); if (!job) break; char hex[HASH_STRLEN]; xxh3_hash_file_stream(job->file->path, hex); // append to file_hashes.txt atomically: we will store results to a temp // buffer and write them at the end (to avoid synchronization issues). But // for simplicity, here we append directly using a file lock (fopen+fwrite // guarded by mutex). We'll store results in job->file->path? Instead, // simple global append with a mutex. Using a file-level append lock: static pthread_mutex_t append_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock(&append_mutex); FILE *hf = fopen(FILE_HASHES_TXT, "a"); if (hf) { char created[32], modified[32]; format_time(job->file->created_time, created, sizeof(created)); format_time(job->file->modified_time, modified, sizeof(modified)); double size_kib = (double)job->file->size_bytes / (1024.0); fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib, created, modified, job->file->owner); fclose(hf); } pthread_mutex_unlock(&append_mutex); atomic_fetch_add(w->done_counter, 1); free(job); } atomic_fetch_sub(w->live_workers, 1); return NULL; } // ----------------------------- Progress display --------------------------- static void print_progress(size_t done, size_t total) { const int barw = 40; double pct = total ? (double)done / (double)total : 0.0; int filled = (int)(pct * barw + 0.5); printf("\r["); for (int i = 0; i < filled; ++i) putchar('#'); for (int i = filled; i < barw; ++i) putchar(' '); printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total); fflush(stdout); } // ----------------------------- Helpers: load/save -------------------------- static int file_exists(const char *path) { struct stat st; return (stat(path, &st) == 0); } static void save_file_list(const char *list_path) { FILE *f = fopen(list_path, "w"); if (!f) { perror("fopen file_list"); return; } for (size_t i = 0; i < g_entry_count; ++i) { fprintf(f, "%s\n", g_entries[i].path); } fclose(f); } static void load_file_list(const char *list_path) { FILE *f = fopen(list_path, "r"); if (!f) return; char line[MAX_PATHLEN]; while (fgets(line, sizeof(line), f)) { line[strcspn(line, "\r\n")] = 0; FileEntry fe; memset(&fe, 0, sizeof(fe)); fe.path = line; /* Populate metadata from filesystem */ platform_get_file_times(line, &fe.created_time, &fe.modified_time); platform_get_file_owner(line, fe.owner, sizeof(fe.owner)); add_entry(&fe); } fclose(f); } // Read existing hashes into memory map for resume // Simple linear search mapping: returns 1 if path has hash found (and writes // into out_hex) static int find_hash_in_file(const char *hashfile, const char *path, char *out_hex) { FILE *f = fopen(hashfile, "r"); if (!f) return 0; char p[MAX_PATHLEN]; char h[128]; int found = 0; while (fscanf(f, "%4095s %127s", p, h) == 2) { if (strcmp(p, path) == 0) { strncpy(out_hex, h, HASH_STRLEN); out_hex[HASH_STRLEN - 1] = 0; found = 1; break; } } fclose(f); return found; } // ----------------------------- Get file metadata ------------------------- void platform_get_file_times(const char *path, uint64_t *out_created, uint64_t *out_modified) { struct stat st; if (stat(path, &st) == 0) { *out_created = (uint64_t)st.st_ctime; *out_modified = (uint64_t)st.st_mtime; } else { *out_created = 0; *out_modified = 0; } } void platform_get_file_owner(const char *path, char *out_owner, size_t out_owner_size) { struct stat st; if (stat(path, &st) == 0) { get_file_owner(st.st_uid, out_owner, out_owner_size); } else { snprintf(out_owner, out_owner_size, "UNKNOWN"); } } // ----------------------------- Main --------------------------------------- int main(int argc, char **argv) { char folders[64][MAX_PATHLEN]; // up to 64 input folders int folder_count = 0; int resume = 0; // ------------------------------- // Parse arguments // ------------------------------- for (int i = 1; i < argc; ++i) { if (strcmp(argv[i], "-resume") == 0) { resume = 1; } else { if (folder_count < 64) { strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); folders[folder_count][MAX_PATHLEN - 1] = 0; folder_count++; } } } // ------------------------------- // Ask user if no folders provided // ------------------------------- if (folder_count == 0 && !resume) { printf("Enter folder to process (Enter = current folder): "); fflush(stdout); char buf[MAX_PATHLEN]; if (!fgets(buf, sizeof(buf), stdin)) return 1; buf[strcspn(buf, "\r\n")] = 0; if (buf[0] == 0) strcpy(folders[0], "."); else strncpy(folders[0], buf, MAX_PATHLEN - 1); folder_count = 1; } else if (folder_count == 0 && resume) { strcpy(folders[0], "."); folder_count = 1; } // ------------------------------- // Display selected folders // ------------------------------- printf("Processing %d folder(s):\n", folder_count); for (int i = 0; i < folder_count; ++i) { printf(" - %s\n", folders[i]); } // ------------------------------- // Detect hardware threads (CPU cores) // ------------------------------- size_t hw_threads = 1; long cpus = sysconf(_SC_NPROCESSORS_ONLN); if (cpus > 0) hw_threads = (size_t)cpus; // Add some extra threads to overlap I/O more aggressively size_t num_threads = hw_threads * 2; if (num_threads < 2) num_threads = 2; // ------------------------------- // Step 1: Scan all folders // ------------------------------- if (!resume) { DirQueue q = {0}; pthread_mutex_init(&q.mutex, NULL); pthread_cond_init(&q.cond, NULL); // Seed queue for (int i = 0; i < folder_count; ++i) dirqueue_push(&q, folders[i]); pthread_t *threads = malloc(sizeof(pthread_t) * num_threads); for (size_t i = 0; i < num_threads; ++i) pthread_create(&threads[i], NULL, (void *(*)(void *))scan_worker, &q); for (size_t i = 0; i < num_threads; ++i) pthread_join(threads[i], NULL); free(threads); pthread_mutex_destroy(&q.mutex); pthread_cond_destroy(&q.cond); printf("Found %zu files. Saving to %s\n", g_entry_count, FILE_LIST_TXT); save_file_list(FILE_LIST_TXT); } else { if (!file_exists(FILE_LIST_TXT)) { fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT); return 1; } load_file_list(FILE_LIST_TXT); printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT); } if (g_entry_count == 0) { printf("No files to process.\n"); return 0; } // If resume: create map of which files are already hashed char **existing_hash = calloc(g_entry_count, sizeof(char *)); for (size_t i = 0; i < g_entry_count; ++i) existing_hash[i] = NULL; if (resume && file_exists(FILE_HASHES_TXT)) { // For simplicity we parse hash file and match lines to list entries. for (size_t i = 0; i < g_entry_count; ++i) { char hex[HASH_STRLEN] = {0}; if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) { existing_hash[i] = strdup(hex); } } } // Prepare job queue of only missing files (or all if not resume) JobQueue queue; jobqueue_init(&queue); size_t total_jobs = 0; for (size_t i = 0; i < g_entry_count; ++i) { if (resume && existing_hash[i]) continue; Job *j = (Job *)malloc(sizeof(Job)); j->file = &g_entries[i]; j->next = NULL; jobqueue_push(&queue, j); ++total_jobs; } if (total_jobs == 0) { printf("Nothing to do — all files already hashed.\n"); return 0; } // Remove old hashes file if we're recomputing from scratch. if (!resume) { // create/overwrite hashes file FILE *hf = fopen(FILE_HASHES_TXT, "w"); if (hf) fclose(hf); } // if resume, we append only missing // Starting thread pool atomic_size_t done_counter; atomic_store(&done_counter, 0); atomic_int live_workers; atomic_store(&live_workers, (int)num_threads); WorkerArg warg = {.queue = &queue, .done_counter = &done_counter, .total_jobs = total_jobs, .live_workers = &live_workers}; printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads, hw_threads); // Launch threads pthread_t *tids = malloc(sizeof(pthread_t) * num_threads); for (size_t i = 0; i < num_threads; ++i) { pthread_create(&tids[i], NULL, worker_thread_posix, &warg); } // Progress / timer struct timespec tstart, tnow; clock_gettime(CLOCK_MONOTONIC, &tstart); size_t last_done = 0; // ---------- Correct real-time MB/s (stable & accurate) ---------- uint64_t last_bytes = atomic_load(&g_bytes_processed); double last_time = 0.0; double displayed_speed = 0.0; const double sample_interval = 0.5; char linebuf[256]; for (;;) { size_t done = (size_t)atomic_load(&done_counter); // ---- monotonic time ---- clock_gettime(CLOCK_MONOTONIC, &tnow); double now = (tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9; // ---- bytes so far ---- uint64_t bytes = atomic_load(&g_bytes_processed); // ---- real sampler (independent of UI sleep) ---- if (last_time == 0.0) { last_time = now; last_bytes = bytes; } double dt = now - last_time; if (dt >= sample_interval) { uint64_t db = bytes - last_bytes; if (db > 0 && dt > 0.0001) { displayed_speed = (double)db / (1024.0 * 1024.0) / dt; } last_bytes = bytes; last_time = now; } // ---- progress bar build ---- const int barw = 40; double pct = total_jobs ? (double)done / (double)total_jobs : 0.0; int filled = (int)(pct * barw + 0.5); int p = 0; p += snprintf(linebuf + p, sizeof(linebuf) - p, "["); for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i) p += snprintf(linebuf + p, sizeof(linebuf) - p, "#"); for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i) p += snprintf(linebuf + p, sizeof(linebuf) - p, "."); snprintf(linebuf + p, sizeof(linebuf) - p, "] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs, displayed_speed); printf("\r%s", linebuf); fflush(stdout); if (done >= total_jobs) break; usleep(100000); } printf("\n\n"); // stop queue and join threads jobqueue_stop(&queue); for (size_t i = 0; i < num_threads; ++i) pthread_join(tids[i], NULL); // done time clock_gettime(CLOCK_MONOTONIC, &tnow); double elapsed = (tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9; printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed); uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed); double total_mb = (double)total_bytes / (1024.0 * 1024.0); double avg_mbps = total_mb / elapsed; printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); // If resume: we appended missing entries. If not resume: we wrote all results // during workers. Note: This program appends hashes as workers finish. This // avoids holding all hashes in RAM. // Cleanup for (size_t i = 0; i < g_entry_count; ++i) if (existing_hash[i]) free(existing_hash[i]); free(existing_hash); free_entries(); return 0; }