Files
filehasher/file_hasher.c
amir 81d47fb675 Linux porting
Porting to linux
Reorganising the code
Improving the scan function
2026-03-18 23:38:54 +01:00

224 lines
6.2 KiB
C

#include "platform.c"
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (folder_count < 64) {
normalize_path(argv[i]);
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0) {
printf("Enter folders to process (Enter = current folder): ");
fflush(stdout);
char buf[KiB(32)];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0) {
strcpy(folders[0], ".");
folder_count = 1;
} else {
folder_count = parse_paths(buf, folders, 64);
}
}
// Display selected folders
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Scanning and total timer init
// -------------------------------
timer_init();
HiResTimer total_timer;
HiResTimer scan_timer;
timer_start(&total_timer);
timer_start(&scan_timer);
// -------------------------------
// Creating a general purpose arena
// -------------------------------
arena_params params = {
.reserve_size = GiB(1),
.commit_size = MiB(16),
.align = 0,
.push_size = 0,
.allow_free_list = true,
.allow_swapback = false,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 1,
};
mem_arena *gp_arena = arena_create(&params);
// -------------------------------
// Detect hardware threads
// -------------------------------
// --- Windows: detect PHYSICAL cores (not logical threads) ---
size_t hw_threads = platform_physical_cores();
// Logical threads = CPU cores * 2
size_t num_threads = hw_threads * 2;
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
// -------------------------------
// Scanning and hashing
// -------------------------------
MPMCQueue dir_queue;
mpmc_init(&dir_queue, MiB(1));
MPMCQueue file_queue;
mpmc_init(&file_queue, MiB(1));
// Starting hash threads
size_t num_hash_threads = num_threads;
WorkerContext workers[num_hash_threads];
Thread *hash_threads =
arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true);
for (size_t i = 0; i < num_hash_threads; ++i) {
workers[i].arena = arena_create(&params);
workers[i].file_queue = &file_queue;
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker, &workers[i]) !=
0) {
fprintf(stderr, "Failed to create hash thread %zu\n", i);
exit(1);
}
}
// Starting progress printing thread
Thread progress_thread_handle;
if (thread_create(&progress_thread_handle, (ThreadFunc)progress_thread,
NULL) != 0) {
fprintf(stderr, "Failed to create progress thread\n");
exit(1);
}
// Starting scan threads
size_t num_scan_threads = num_threads;
ScannerContext scanners[num_scan_threads];
Thread *scan_threads =
arena_push(&gp_arena, sizeof(Thread) * num_scan_threads, true);
for (size_t i = 0; i < num_scan_threads; i++) {
scanners[i].num_threads = num_scan_threads;
scanners[i].path_arena = arena_create(&params);
scanners[i].meta_arena = arena_create(&params);
scanners[i].dir_queue = &dir_queue;
scanners[i].file_queue = &file_queue;
if (thread_create(&scan_threads[i], (ThreadFunc)scan_worker,
&scanners[i]) != 0) {
fprintf(stderr, "Failed to create scan thread %zu\n", i);
exit(1);
}
}
// Initial folder push
for (int i = 0; i < folder_count; i++) {
size_t len = strlen(folders[i]) + 1;
char *path = arena_push(&scanners[0].path_arena, len, false);
memcpy(path, folders[i], len);
mpmc_push_work(&dir_queue, path);
}
// Stop scan threads
thread_wait_multiple(scan_threads, num_scan_threads);
for (size_t i = 0; i < num_scan_threads; ++i) {
thread_close(&scan_threads[i]);
}
mpmc_producers_finished(&file_queue, num_hash_threads);
atomic_store(&g_scan_done, 1);
arena_free(&gp_arena, (u8 **)&scan_threads,
sizeof(Thread) * num_scan_threads);
double scan_seconds = timer_elapsed(&scan_timer);
size_t total_found = atomic_load(&g_files_found);
printf("\r%*s\r", 120, ""); // clear_console_line
printf("Completed scanning in %.2f seconds, found %zu files\n\n",
scan_seconds, total_found);
// If no files found
if (total_found == 0) {
printf("No files found.\n");
return 0;
}
// Stop hashing threads
thread_wait_multiple(hash_threads, num_hash_threads);
for (size_t i = 0; i < num_hash_threads; ++i) {
thread_close(&hash_threads[i]);
}
arena_free(&gp_arena, (u8 **)&hash_threads,
sizeof(Thread) * num_hash_threads);
// Stop progress printing thread
thread_join(&progress_thread_handle);
thread_close(&progress_thread_handle);
// -------------------------------
// Export file_hashes.txt
// -------------------------------
FILE *f = fopen(FILE_HASHES_TXT, "wb");
for (int i = 0; i < num_threads; i++) {
mem_arena *arena = workers[i].arena;
u8 *arena_base =
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
fwrite(arena_base, 1, arena->pos, f);
}
fclose(f);
// -------------------------------
// Print summary
// -------------------------------
double total_seconds = timer_elapsed(&total_timer);
printf("Completed hashing %zu files\n", total_found);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / total_seconds;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n\n", total_seconds);
return 0;
}