From b89526d724ea7f3aecf7a37c56294cdce76206d8 Mon Sep 17 00:00:00 2001 From: amir Date: Sat, 28 Feb 2026 19:09:28 +0100 Subject: [PATCH] Removing -resume functionality --- .gitignore | 3 + platform.h | 2 +- platform_windows.c | 192 +++++++++++---------------------------------- xxhash.h | 2 +- 4 files changed, 51 insertions(+), 148 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6fc1500 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +file_hasher.exe +file_hashes.txt +file_list.txt diff --git a/platform.h b/platform.h index d3a15cd..4a6cec7 100644 --- a/platform.h +++ b/platform.h @@ -31,7 +31,7 @@ #include #endif -#define XXH_VECTOR XXH_AVX2 //don't compile with gcc see xxhash.h line 4082 +// #define XXH_VECTOR XXH_AVX2 // don't compile with gcc see xxhash.h line 4082 #define XXH_INLINE_ALL #include "xxhash.c" #include "xxhash.h" diff --git a/platform_windows.c b/platform_windows.c index 461156e..b106ca1 100644 --- a/platform_windows.c +++ b/platform_windows.c @@ -446,57 +446,7 @@ static void save_file_list(const char *list_path) { fclose(f); } -static void load_file_list(const char *list_path) { - FILE *f = fopen(list_path, "r"); - if (!f) - return; - - char line[MAX_PATHLEN]; - - while (fgets(line, sizeof(line), f)) { - line[strcspn(line, "\r\n")] = 0; - - FileEntry fe; - memset(&fe, 0, sizeof(fe)); - - fe.path = line; - normalize_path(fe.path); - - /* Populate metadata from filesystem */ - platform_get_file_times(line, &fe.created_time, &fe.modified_time); - - platform_get_file_owner(line, fe.owner, sizeof(fe.owner)); - - global_entries_push(&fe); - } - - fclose(f); -} - -// Read existing hashes into memory map for resume -// Simple linear search mapping: returns 1 if path has hash found (and writes -// into out_hex) - // ----------------------------- Get file metadata ------------------------- -static int find_hash_in_file(const char *hashfile, const char *path, - char *out_hex) { - FILE *f = fopen(hashfile, "r"); - if (!f) - return 0; - char p[MAX_PATHLEN]; - char h[128]; - int found = 0; - while (fscanf(f, "%4095s %127s", p, h) == 2) { - if (strcmp(p, path) == 0) { - strncpy(out_hex, h, HASH_STRLEN); - out_hex[HASH_STRLEN - 1] = 0; - found = 1; - break; - } - } - fclose(f); - return found; -} void platform_get_file_times(const char *path, uint64_t *out_created, uint64_t *out_modified) { WIN32_FILE_ATTRIBUTE_DATA fad; @@ -518,7 +468,6 @@ void platform_get_file_owner(const char *path, char *out_owner, int main(int argc, char **argv) { char folders[64][MAX_PATHLEN]; // up to 64 input folders int folder_count = 0; - int resume = 0; // ------------------------------- // Scanning and total timer init @@ -536,21 +485,17 @@ int main(int argc, char **argv) { // Parse arguments // ------------------------------- for (int i = 1; i < argc; ++i) { - if (strcmp(argv[i], "-resume") == 0) { - resume = 1; - } else { - if (folder_count < 64) { - strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); - folders[folder_count][MAX_PATHLEN - 1] = 0; - folder_count++; - } + if (folder_count < 64) { + strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1); + folders[folder_count][MAX_PATHLEN - 1] = 0; + folder_count++; } } // ------------------------------- // Ask user if no folders provided // ------------------------------- - if (folder_count == 0 && !resume) { + if (folder_count == 0) { printf("Enter folder to process (Enter = current folder): "); fflush(stdout); @@ -565,9 +510,6 @@ int main(int argc, char **argv) { strncpy(folders[0], buf, MAX_PATHLEN - 1); folder_count = 1; - } else if (folder_count == 0 && resume) { - strcpy(folders[0], "."); - folder_count = 1; } // ------------------------------- @@ -611,86 +553,60 @@ int main(int argc, char **argv) { // ------------------------------- InitializeCriticalSection(&g_entries_cs); - if (!resume) { - DirQueue q; - memset(&q, 0, sizeof(q)); - InitializeCriticalSection(&q.cs); - InitializeConditionVariable(&q.cv); - q.active = 0; + DirQueue q; + memset(&q, 0, sizeof(q)); + InitializeCriticalSection(&q.cs); + InitializeConditionVariable(&q.cv); + q.active = 0; - HANDLE scan_progress = - CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL); + HANDLE scan_progress = + CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL); - for (int i = 0; i < folder_count; ++i) { - dirqueue_push(&q, folders[i]); - } - - size_t scan_threads = hw_threads; - if (scan_threads < 2) - scan_threads = 2; - - HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads); - for (size_t i = 0; i < scan_threads; ++i) { - scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, - &q, 0, NULL); - } - - WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE); - - atomic_store(&g_scan_done, 1); - WaitForSingleObject(scan_progress, INFINITE); - CloseHandle(scan_progress); - - for (size_t i = 0; i < scan_threads; ++i) - CloseHandle(scan_tids[i]); - free(scan_tids); - - double scan_seconds = timer_stop(&scan_timer); - double scan_rate = (double)g_entry_count / scan_seconds; - - printf(". Scan rate : %.1f files/sec\n", scan_rate); - printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds, - FILE_LIST_TXT); - save_file_list(FILE_LIST_TXT); - - } else { - if (!file_exists(FILE_LIST_TXT)) { - fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT); - return 1; - } - load_file_list(FILE_LIST_TXT); - printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT); + for (int i = 0; i < folder_count; ++i) { + dirqueue_push(&q, folders[i]); } + size_t scan_threads = hw_threads; + if (scan_threads < 2) + scan_threads = 2; + + HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads); + for (size_t i = 0; i < scan_threads; ++i) { + scan_tids[i] = + CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL); + } + + WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE); + + atomic_store(&g_scan_done, 1); + WaitForSingleObject(scan_progress, INFINITE); + CloseHandle(scan_progress); + + for (size_t i = 0; i < scan_threads; ++i) + CloseHandle(scan_tids[i]); + free(scan_tids); + + double scan_seconds = timer_stop(&scan_timer); + double scan_rate = (double)g_entry_count / scan_seconds; + + printf(". Scan rate : %.1f files/sec\n", scan_rate); + printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds, + FILE_LIST_TXT); + save_file_list(FILE_LIST_TXT); + if (g_entry_count == 0) { printf("No files to process.\n"); return 0; } DeleteCriticalSection(&g_entries_cs); - // If resume: create map of which files are already hashed - char **existing_hash = calloc(g_entry_count, sizeof(char *)); - for (size_t i = 0; i < g_entry_count; ++i) - existing_hash[i] = NULL; - if (resume && file_exists(FILE_HASHES_TXT)) { - // For simplicity we parse hash file and match lines to list entries. - for (size_t i = 0; i < g_entry_count; ++i) { - char hex[HASH_STRLEN] = {0}; - if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) { - existing_hash[i] = strdup(hex); - } - } - } - - // Prepare job queue of only missing files (or all if not resume) + // Prepare job queue JobQueue queue; jobqueue_init(&queue); size_t total_jobs = 0; for (size_t i = 0; i < g_entry_count; ++i) { - if (resume && existing_hash[i]) - continue; Job *j = (Job *)malloc(sizeof(Job)); j->file = &g_entries[i]; j->next = NULL; @@ -703,13 +619,9 @@ int main(int argc, char **argv) { return 0; } - // Remove old hashes file if we're recomputing from scratch. - if (!resume) { - // create/overwrite hashes file - FILE *hf = fopen(FILE_HASHES_TXT, "w"); - if (hf) - fclose(hf); - } // if resume, we append only missing + FILE *hf = fopen(FILE_HASHES_TXT, "w"); + if (hf) + fclose(hf); // Starting thread pool atomic_size_t done_counter; @@ -825,17 +737,5 @@ int main(int argc, char **argv) { printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps); printf(" Total time : %.2f seconds\n", total_seconds); - // If resume: we appended missing entries. If not resume: we wrote all results - // during workers. Note: This program appends hashes as workers finish. This - // avoids holding all hashes in RAM. - - // Cleanup - for (size_t i = 0; i < g_entry_count; ++i) - if (existing_hash[i]) - free(existing_hash[i]); - free(existing_hash); - - free_entries(); - return 0; } diff --git a/xxhash.h b/xxhash.h index f21ff1f..77fa932 100644 --- a/xxhash.h +++ b/xxhash.h @@ -5101,7 +5101,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) { //DEBUG - // printf("\nUsing AVX2\n"); + printf("\nUsing AVX2\n"); XXH_ASSERT((((size_t)acc) & 31) == 0); { __m256i* const xacc = (__m256i *) acc; /* Unaligned. This is mainly for pointer arithmetic, and because