Removing -resume functionality

This commit is contained in:
2026-02-28 19:09:28 +01:00
parent 1744309b50
commit b89526d724
4 changed files with 51 additions and 148 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
file_hasher.exe
file_hashes.txt
file_list.txt

View File

@@ -31,7 +31,7 @@
#include <unistd.h>
#endif
#define XXH_VECTOR XXH_AVX2 //don't compile with gcc see xxhash.h line 4082
// #define XXH_VECTOR XXH_AVX2 // don't compile with gcc see xxhash.h line 4082
#define XXH_INLINE_ALL
#include "xxhash.c"
#include "xxhash.h"

View File

@@ -446,57 +446,7 @@ static void save_file_list(const char *list_path) {
fclose(f);
}
static void load_file_list(const char *list_path) {
FILE *f = fopen(list_path, "r");
if (!f)
return;
char line[MAX_PATHLEN];
while (fgets(line, sizeof(line), f)) {
line[strcspn(line, "\r\n")] = 0;
FileEntry fe;
memset(&fe, 0, sizeof(fe));
fe.path = line;
normalize_path(fe.path);
/* Populate metadata from filesystem */
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
global_entries_push(&fe);
}
fclose(f);
}
// Read existing hashes into memory map for resume
// Simple linear search mapping: returns 1 if path has hash found (and writes
// into out_hex)
// ----------------------------- Get file metadata -------------------------
static int find_hash_in_file(const char *hashfile, const char *path,
char *out_hex) {
FILE *f = fopen(hashfile, "r");
if (!f)
return 0;
char p[MAX_PATHLEN];
char h[128];
int found = 0;
while (fscanf(f, "%4095s %127s", p, h) == 2) {
if (strcmp(p, path) == 0) {
strncpy(out_hex, h, HASH_STRLEN);
out_hex[HASH_STRLEN - 1] = 0;
found = 1;
break;
}
}
fclose(f);
return found;
}
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
WIN32_FILE_ATTRIBUTE_DATA fad;
@@ -518,7 +468,6 @@ void platform_get_file_owner(const char *path, char *out_owner,
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
int resume = 0;
// -------------------------------
// Scanning and total timer init
@@ -536,21 +485,17 @@ int main(int argc, char **argv) {
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-resume") == 0) {
resume = 1;
} else {
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0 && !resume) {
if (folder_count == 0) {
printf("Enter folder to process (Enter = current folder): ");
fflush(stdout);
@@ -565,9 +510,6 @@ int main(int argc, char **argv) {
strncpy(folders[0], buf, MAX_PATHLEN - 1);
folder_count = 1;
} else if (folder_count == 0 && resume) {
strcpy(folders[0], ".");
folder_count = 1;
}
// -------------------------------
@@ -611,86 +553,60 @@ int main(int argc, char **argv) {
// -------------------------------
InitializeCriticalSection(&g_entries_cs);
if (!resume) {
DirQueue q;
memset(&q, 0, sizeof(q));
InitializeCriticalSection(&q.cs);
InitializeConditionVariable(&q.cv);
q.active = 0;
DirQueue q;
memset(&q, 0, sizeof(q));
InitializeCriticalSection(&q.cs);
InitializeConditionVariable(&q.cv);
q.active = 0;
HANDLE scan_progress =
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
HANDLE scan_progress =
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
for (int i = 0; i < folder_count; ++i) {
dirqueue_push(&q, folders[i]);
}
size_t scan_threads = hw_threads;
if (scan_threads < 2)
scan_threads = 2;
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
for (size_t i = 0; i < scan_threads; ++i) {
scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker,
&q, 0, NULL);
}
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
atomic_store(&g_scan_done, 1);
WaitForSingleObject(scan_progress, INFINITE);
CloseHandle(scan_progress);
for (size_t i = 0; i < scan_threads; ++i)
CloseHandle(scan_tids[i]);
free(scan_tids);
double scan_seconds = timer_stop(&scan_timer);
double scan_rate = (double)g_entry_count / scan_seconds;
printf(". Scan rate : %.1f files/sec\n", scan_rate);
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
FILE_LIST_TXT);
save_file_list(FILE_LIST_TXT);
} else {
if (!file_exists(FILE_LIST_TXT)) {
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
return 1;
}
load_file_list(FILE_LIST_TXT);
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
for (int i = 0; i < folder_count; ++i) {
dirqueue_push(&q, folders[i]);
}
size_t scan_threads = hw_threads;
if (scan_threads < 2)
scan_threads = 2;
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
for (size_t i = 0; i < scan_threads; ++i) {
scan_tids[i] =
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL);
}
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
atomic_store(&g_scan_done, 1);
WaitForSingleObject(scan_progress, INFINITE);
CloseHandle(scan_progress);
for (size_t i = 0; i < scan_threads; ++i)
CloseHandle(scan_tids[i]);
free(scan_tids);
double scan_seconds = timer_stop(&scan_timer);
double scan_rate = (double)g_entry_count / scan_seconds;
printf(". Scan rate : %.1f files/sec\n", scan_rate);
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
FILE_LIST_TXT);
save_file_list(FILE_LIST_TXT);
if (g_entry_count == 0) {
printf("No files to process.\n");
return 0;
}
DeleteCriticalSection(&g_entries_cs);
// If resume: create map of which files are already hashed
char **existing_hash = calloc(g_entry_count, sizeof(char *));
for (size_t i = 0; i < g_entry_count; ++i)
existing_hash[i] = NULL;
if (resume && file_exists(FILE_HASHES_TXT)) {
// For simplicity we parse hash file and match lines to list entries.
for (size_t i = 0; i < g_entry_count; ++i) {
char hex[HASH_STRLEN] = {0};
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
existing_hash[i] = strdup(hex);
}
}
}
// Prepare job queue of only missing files (or all if not resume)
// Prepare job queue
JobQueue queue;
jobqueue_init(&queue);
size_t total_jobs = 0;
for (size_t i = 0; i < g_entry_count; ++i) {
if (resume && existing_hash[i])
continue;
Job *j = (Job *)malloc(sizeof(Job));
j->file = &g_entries[i];
j->next = NULL;
@@ -703,13 +619,9 @@ int main(int argc, char **argv) {
return 0;
}
// Remove old hashes file if we're recomputing from scratch.
if (!resume) {
// create/overwrite hashes file
FILE *hf = fopen(FILE_HASHES_TXT, "w");
if (hf)
fclose(hf);
} // if resume, we append only missing
FILE *hf = fopen(FILE_HASHES_TXT, "w");
if (hf)
fclose(hf);
// Starting thread pool
atomic_size_t done_counter;
@@ -825,17 +737,5 @@ int main(int argc, char **argv) {
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n", total_seconds);
// If resume: we appended missing entries. If not resume: we wrote all results
// during workers. Note: This program appends hashes as workers finish. This
// avoids holding all hashes in RAM.
// Cleanup
for (size_t i = 0; i < g_entry_count; ++i)
if (existing_hash[i])
free(existing_hash[i]);
free(existing_hash);
free_entries();
return 0;
}

View File

@@ -5101,7 +5101,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
const void* XXH_RESTRICT secret)
{
//DEBUG
// printf("\nUsing AVX2\n");
printf("\nUsing AVX2\n");
XXH_ASSERT((((size_t)acc) & 31) == 0);
{ __m256i* const xacc = (__m256i *) acc;
/* Unaligned. This is mainly for pointer arithmetic, and because