forked from amir/filehasher
Removing -resume functionality
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
file_hasher.exe
|
||||
file_hashes.txt
|
||||
file_list.txt
|
||||
@@ -31,7 +31,7 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#define XXH_VECTOR XXH_AVX2 //don't compile with gcc see xxhash.h line 4082
|
||||
// #define XXH_VECTOR XXH_AVX2 // don't compile with gcc see xxhash.h line 4082
|
||||
#define XXH_INLINE_ALL
|
||||
#include "xxhash.c"
|
||||
#include "xxhash.h"
|
||||
|
||||
@@ -446,57 +446,7 @@ static void save_file_list(const char *list_path) {
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
static void load_file_list(const char *list_path) {
|
||||
FILE *f = fopen(list_path, "r");
|
||||
if (!f)
|
||||
return;
|
||||
|
||||
char line[MAX_PATHLEN];
|
||||
|
||||
while (fgets(line, sizeof(line), f)) {
|
||||
line[strcspn(line, "\r\n")] = 0;
|
||||
|
||||
FileEntry fe;
|
||||
memset(&fe, 0, sizeof(fe));
|
||||
|
||||
fe.path = line;
|
||||
normalize_path(fe.path);
|
||||
|
||||
/* Populate metadata from filesystem */
|
||||
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
|
||||
|
||||
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
|
||||
|
||||
global_entries_push(&fe);
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
// Read existing hashes into memory map for resume
|
||||
// Simple linear search mapping: returns 1 if path has hash found (and writes
|
||||
// into out_hex)
|
||||
|
||||
// ----------------------------- Get file metadata -------------------------
|
||||
static int find_hash_in_file(const char *hashfile, const char *path,
|
||||
char *out_hex) {
|
||||
FILE *f = fopen(hashfile, "r");
|
||||
if (!f)
|
||||
return 0;
|
||||
char p[MAX_PATHLEN];
|
||||
char h[128];
|
||||
int found = 0;
|
||||
while (fscanf(f, "%4095s %127s", p, h) == 2) {
|
||||
if (strcmp(p, path) == 0) {
|
||||
strncpy(out_hex, h, HASH_STRLEN);
|
||||
out_hex[HASH_STRLEN - 1] = 0;
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return found;
|
||||
}
|
||||
void platform_get_file_times(const char *path, uint64_t *out_created,
|
||||
uint64_t *out_modified) {
|
||||
WIN32_FILE_ATTRIBUTE_DATA fad;
|
||||
@@ -518,7 +468,6 @@ void platform_get_file_owner(const char *path, char *out_owner,
|
||||
int main(int argc, char **argv) {
|
||||
char folders[64][MAX_PATHLEN]; // up to 64 input folders
|
||||
int folder_count = 0;
|
||||
int resume = 0;
|
||||
|
||||
// -------------------------------
|
||||
// Scanning and total timer init
|
||||
@@ -536,21 +485,17 @@ int main(int argc, char **argv) {
|
||||
// Parse arguments
|
||||
// -------------------------------
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (strcmp(argv[i], "-resume") == 0) {
|
||||
resume = 1;
|
||||
} else {
|
||||
if (folder_count < 64) {
|
||||
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
|
||||
folders[folder_count][MAX_PATHLEN - 1] = 0;
|
||||
folder_count++;
|
||||
}
|
||||
if (folder_count < 64) {
|
||||
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
|
||||
folders[folder_count][MAX_PATHLEN - 1] = 0;
|
||||
folder_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------
|
||||
// Ask user if no folders provided
|
||||
// -------------------------------
|
||||
if (folder_count == 0 && !resume) {
|
||||
if (folder_count == 0) {
|
||||
printf("Enter folder to process (Enter = current folder): ");
|
||||
fflush(stdout);
|
||||
|
||||
@@ -565,9 +510,6 @@ int main(int argc, char **argv) {
|
||||
strncpy(folders[0], buf, MAX_PATHLEN - 1);
|
||||
|
||||
folder_count = 1;
|
||||
} else if (folder_count == 0 && resume) {
|
||||
strcpy(folders[0], ".");
|
||||
folder_count = 1;
|
||||
}
|
||||
|
||||
// -------------------------------
|
||||
@@ -611,86 +553,60 @@ int main(int argc, char **argv) {
|
||||
// -------------------------------
|
||||
InitializeCriticalSection(&g_entries_cs);
|
||||
|
||||
if (!resume) {
|
||||
DirQueue q;
|
||||
memset(&q, 0, sizeof(q));
|
||||
InitializeCriticalSection(&q.cs);
|
||||
InitializeConditionVariable(&q.cv);
|
||||
q.active = 0;
|
||||
DirQueue q;
|
||||
memset(&q, 0, sizeof(q));
|
||||
InitializeCriticalSection(&q.cs);
|
||||
InitializeConditionVariable(&q.cv);
|
||||
q.active = 0;
|
||||
|
||||
HANDLE scan_progress =
|
||||
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
|
||||
HANDLE scan_progress =
|
||||
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
|
||||
|
||||
for (int i = 0; i < folder_count; ++i) {
|
||||
dirqueue_push(&q, folders[i]);
|
||||
}
|
||||
|
||||
size_t scan_threads = hw_threads;
|
||||
if (scan_threads < 2)
|
||||
scan_threads = 2;
|
||||
|
||||
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
|
||||
for (size_t i = 0; i < scan_threads; ++i) {
|
||||
scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker,
|
||||
&q, 0, NULL);
|
||||
}
|
||||
|
||||
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
||||
|
||||
atomic_store(&g_scan_done, 1);
|
||||
WaitForSingleObject(scan_progress, INFINITE);
|
||||
CloseHandle(scan_progress);
|
||||
|
||||
for (size_t i = 0; i < scan_threads; ++i)
|
||||
CloseHandle(scan_tids[i]);
|
||||
free(scan_tids);
|
||||
|
||||
double scan_seconds = timer_stop(&scan_timer);
|
||||
double scan_rate = (double)g_entry_count / scan_seconds;
|
||||
|
||||
printf(". Scan rate : %.1f files/sec\n", scan_rate);
|
||||
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
|
||||
FILE_LIST_TXT);
|
||||
save_file_list(FILE_LIST_TXT);
|
||||
|
||||
} else {
|
||||
if (!file_exists(FILE_LIST_TXT)) {
|
||||
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
|
||||
return 1;
|
||||
}
|
||||
load_file_list(FILE_LIST_TXT);
|
||||
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
|
||||
for (int i = 0; i < folder_count; ++i) {
|
||||
dirqueue_push(&q, folders[i]);
|
||||
}
|
||||
|
||||
size_t scan_threads = hw_threads;
|
||||
if (scan_threads < 2)
|
||||
scan_threads = 2;
|
||||
|
||||
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
|
||||
for (size_t i = 0; i < scan_threads; ++i) {
|
||||
scan_tids[i] =
|
||||
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL);
|
||||
}
|
||||
|
||||
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
||||
|
||||
atomic_store(&g_scan_done, 1);
|
||||
WaitForSingleObject(scan_progress, INFINITE);
|
||||
CloseHandle(scan_progress);
|
||||
|
||||
for (size_t i = 0; i < scan_threads; ++i)
|
||||
CloseHandle(scan_tids[i]);
|
||||
free(scan_tids);
|
||||
|
||||
double scan_seconds = timer_stop(&scan_timer);
|
||||
double scan_rate = (double)g_entry_count / scan_seconds;
|
||||
|
||||
printf(". Scan rate : %.1f files/sec\n", scan_rate);
|
||||
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
|
||||
FILE_LIST_TXT);
|
||||
save_file_list(FILE_LIST_TXT);
|
||||
|
||||
if (g_entry_count == 0) {
|
||||
printf("No files to process.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
DeleteCriticalSection(&g_entries_cs);
|
||||
// If resume: create map of which files are already hashed
|
||||
char **existing_hash = calloc(g_entry_count, sizeof(char *));
|
||||
for (size_t i = 0; i < g_entry_count; ++i)
|
||||
existing_hash[i] = NULL;
|
||||
|
||||
if (resume && file_exists(FILE_HASHES_TXT)) {
|
||||
// For simplicity we parse hash file and match lines to list entries.
|
||||
for (size_t i = 0; i < g_entry_count; ++i) {
|
||||
char hex[HASH_STRLEN] = {0};
|
||||
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
|
||||
existing_hash[i] = strdup(hex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare job queue of only missing files (or all if not resume)
|
||||
// Prepare job queue
|
||||
JobQueue queue;
|
||||
jobqueue_init(&queue);
|
||||
|
||||
size_t total_jobs = 0;
|
||||
for (size_t i = 0; i < g_entry_count; ++i) {
|
||||
if (resume && existing_hash[i])
|
||||
continue;
|
||||
Job *j = (Job *)malloc(sizeof(Job));
|
||||
j->file = &g_entries[i];
|
||||
j->next = NULL;
|
||||
@@ -703,13 +619,9 @@ int main(int argc, char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Remove old hashes file if we're recomputing from scratch.
|
||||
if (!resume) {
|
||||
// create/overwrite hashes file
|
||||
FILE *hf = fopen(FILE_HASHES_TXT, "w");
|
||||
if (hf)
|
||||
fclose(hf);
|
||||
} // if resume, we append only missing
|
||||
FILE *hf = fopen(FILE_HASHES_TXT, "w");
|
||||
if (hf)
|
||||
fclose(hf);
|
||||
|
||||
// Starting thread pool
|
||||
atomic_size_t done_counter;
|
||||
@@ -825,17 +737,5 @@ int main(int argc, char **argv) {
|
||||
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
|
||||
printf(" Total time : %.2f seconds\n", total_seconds);
|
||||
|
||||
// If resume: we appended missing entries. If not resume: we wrote all results
|
||||
// during workers. Note: This program appends hashes as workers finish. This
|
||||
// avoids holding all hashes in RAM.
|
||||
|
||||
// Cleanup
|
||||
for (size_t i = 0; i < g_entry_count; ++i)
|
||||
if (existing_hash[i])
|
||||
free(existing_hash[i]);
|
||||
free(existing_hash);
|
||||
|
||||
free_entries();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
2
xxhash.h
2
xxhash.h
@@ -5101,7 +5101,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
||||
const void* XXH_RESTRICT secret)
|
||||
{
|
||||
//DEBUG
|
||||
// printf("\nUsing AVX2\n");
|
||||
printf("\nUsing AVX2\n");
|
||||
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
||||
{ __m256i* const xacc = (__m256i *) acc;
|
||||
/* Unaligned. This is mainly for pointer arithmetic, and because
|
||||
|
||||
Reference in New Issue
Block a user