Removing -resume functionality
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
file_hasher.exe
|
||||||
|
file_hashes.txt
|
||||||
|
file_list.txt
|
||||||
@@ -31,7 +31,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define XXH_VECTOR XXH_AVX2 //don't compile with gcc see xxhash.h line 4082
|
// #define XXH_VECTOR XXH_AVX2 // don't compile with gcc see xxhash.h line 4082
|
||||||
#define XXH_INLINE_ALL
|
#define XXH_INLINE_ALL
|
||||||
#include "xxhash.c"
|
#include "xxhash.c"
|
||||||
#include "xxhash.h"
|
#include "xxhash.h"
|
||||||
|
|||||||
@@ -446,57 +446,7 @@ static void save_file_list(const char *list_path) {
|
|||||||
fclose(f);
|
fclose(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void load_file_list(const char *list_path) {
|
|
||||||
FILE *f = fopen(list_path, "r");
|
|
||||||
if (!f)
|
|
||||||
return;
|
|
||||||
|
|
||||||
char line[MAX_PATHLEN];
|
|
||||||
|
|
||||||
while (fgets(line, sizeof(line), f)) {
|
|
||||||
line[strcspn(line, "\r\n")] = 0;
|
|
||||||
|
|
||||||
FileEntry fe;
|
|
||||||
memset(&fe, 0, sizeof(fe));
|
|
||||||
|
|
||||||
fe.path = line;
|
|
||||||
normalize_path(fe.path);
|
|
||||||
|
|
||||||
/* Populate metadata from filesystem */
|
|
||||||
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
|
|
||||||
|
|
||||||
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
|
|
||||||
|
|
||||||
global_entries_push(&fe);
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(f);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read existing hashes into memory map for resume
|
|
||||||
// Simple linear search mapping: returns 1 if path has hash found (and writes
|
|
||||||
// into out_hex)
|
|
||||||
|
|
||||||
// ----------------------------- Get file metadata -------------------------
|
// ----------------------------- Get file metadata -------------------------
|
||||||
static int find_hash_in_file(const char *hashfile, const char *path,
|
|
||||||
char *out_hex) {
|
|
||||||
FILE *f = fopen(hashfile, "r");
|
|
||||||
if (!f)
|
|
||||||
return 0;
|
|
||||||
char p[MAX_PATHLEN];
|
|
||||||
char h[128];
|
|
||||||
int found = 0;
|
|
||||||
while (fscanf(f, "%4095s %127s", p, h) == 2) {
|
|
||||||
if (strcmp(p, path) == 0) {
|
|
||||||
strncpy(out_hex, h, HASH_STRLEN);
|
|
||||||
out_hex[HASH_STRLEN - 1] = 0;
|
|
||||||
found = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fclose(f);
|
|
||||||
return found;
|
|
||||||
}
|
|
||||||
void platform_get_file_times(const char *path, uint64_t *out_created,
|
void platform_get_file_times(const char *path, uint64_t *out_created,
|
||||||
uint64_t *out_modified) {
|
uint64_t *out_modified) {
|
||||||
WIN32_FILE_ATTRIBUTE_DATA fad;
|
WIN32_FILE_ATTRIBUTE_DATA fad;
|
||||||
@@ -518,7 +468,6 @@ void platform_get_file_owner(const char *path, char *out_owner,
|
|||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
char folders[64][MAX_PATHLEN]; // up to 64 input folders
|
char folders[64][MAX_PATHLEN]; // up to 64 input folders
|
||||||
int folder_count = 0;
|
int folder_count = 0;
|
||||||
int resume = 0;
|
|
||||||
|
|
||||||
// -------------------------------
|
// -------------------------------
|
||||||
// Scanning and total timer init
|
// Scanning and total timer init
|
||||||
@@ -536,21 +485,17 @@ int main(int argc, char **argv) {
|
|||||||
// Parse arguments
|
// Parse arguments
|
||||||
// -------------------------------
|
// -------------------------------
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
if (strcmp(argv[i], "-resume") == 0) {
|
if (folder_count < 64) {
|
||||||
resume = 1;
|
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
|
||||||
} else {
|
folders[folder_count][MAX_PATHLEN - 1] = 0;
|
||||||
if (folder_count < 64) {
|
folder_count++;
|
||||||
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
|
|
||||||
folders[folder_count][MAX_PATHLEN - 1] = 0;
|
|
||||||
folder_count++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------
|
// -------------------------------
|
||||||
// Ask user if no folders provided
|
// Ask user if no folders provided
|
||||||
// -------------------------------
|
// -------------------------------
|
||||||
if (folder_count == 0 && !resume) {
|
if (folder_count == 0) {
|
||||||
printf("Enter folder to process (Enter = current folder): ");
|
printf("Enter folder to process (Enter = current folder): ");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
@@ -565,9 +510,6 @@ int main(int argc, char **argv) {
|
|||||||
strncpy(folders[0], buf, MAX_PATHLEN - 1);
|
strncpy(folders[0], buf, MAX_PATHLEN - 1);
|
||||||
|
|
||||||
folder_count = 1;
|
folder_count = 1;
|
||||||
} else if (folder_count == 0 && resume) {
|
|
||||||
strcpy(folders[0], ".");
|
|
||||||
folder_count = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------
|
// -------------------------------
|
||||||
@@ -611,86 +553,60 @@ int main(int argc, char **argv) {
|
|||||||
// -------------------------------
|
// -------------------------------
|
||||||
InitializeCriticalSection(&g_entries_cs);
|
InitializeCriticalSection(&g_entries_cs);
|
||||||
|
|
||||||
if (!resume) {
|
DirQueue q;
|
||||||
DirQueue q;
|
memset(&q, 0, sizeof(q));
|
||||||
memset(&q, 0, sizeof(q));
|
InitializeCriticalSection(&q.cs);
|
||||||
InitializeCriticalSection(&q.cs);
|
InitializeConditionVariable(&q.cv);
|
||||||
InitializeConditionVariable(&q.cv);
|
q.active = 0;
|
||||||
q.active = 0;
|
|
||||||
|
|
||||||
HANDLE scan_progress =
|
HANDLE scan_progress =
|
||||||
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
|
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
|
||||||
|
|
||||||
for (int i = 0; i < folder_count; ++i) {
|
for (int i = 0; i < folder_count; ++i) {
|
||||||
dirqueue_push(&q, folders[i]);
|
dirqueue_push(&q, folders[i]);
|
||||||
}
|
|
||||||
|
|
||||||
size_t scan_threads = hw_threads;
|
|
||||||
if (scan_threads < 2)
|
|
||||||
scan_threads = 2;
|
|
||||||
|
|
||||||
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
|
|
||||||
for (size_t i = 0; i < scan_threads; ++i) {
|
|
||||||
scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker,
|
|
||||||
&q, 0, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
|
||||||
|
|
||||||
atomic_store(&g_scan_done, 1);
|
|
||||||
WaitForSingleObject(scan_progress, INFINITE);
|
|
||||||
CloseHandle(scan_progress);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < scan_threads; ++i)
|
|
||||||
CloseHandle(scan_tids[i]);
|
|
||||||
free(scan_tids);
|
|
||||||
|
|
||||||
double scan_seconds = timer_stop(&scan_timer);
|
|
||||||
double scan_rate = (double)g_entry_count / scan_seconds;
|
|
||||||
|
|
||||||
printf(". Scan rate : %.1f files/sec\n", scan_rate);
|
|
||||||
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
|
|
||||||
FILE_LIST_TXT);
|
|
||||||
save_file_list(FILE_LIST_TXT);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
if (!file_exists(FILE_LIST_TXT)) {
|
|
||||||
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
load_file_list(FILE_LIST_TXT);
|
|
||||||
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t scan_threads = hw_threads;
|
||||||
|
if (scan_threads < 2)
|
||||||
|
scan_threads = 2;
|
||||||
|
|
||||||
|
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
|
||||||
|
for (size_t i = 0; i < scan_threads; ++i) {
|
||||||
|
scan_tids[i] =
|
||||||
|
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
||||||
|
|
||||||
|
atomic_store(&g_scan_done, 1);
|
||||||
|
WaitForSingleObject(scan_progress, INFINITE);
|
||||||
|
CloseHandle(scan_progress);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < scan_threads; ++i)
|
||||||
|
CloseHandle(scan_tids[i]);
|
||||||
|
free(scan_tids);
|
||||||
|
|
||||||
|
double scan_seconds = timer_stop(&scan_timer);
|
||||||
|
double scan_rate = (double)g_entry_count / scan_seconds;
|
||||||
|
|
||||||
|
printf(". Scan rate : %.1f files/sec\n", scan_rate);
|
||||||
|
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
|
||||||
|
FILE_LIST_TXT);
|
||||||
|
save_file_list(FILE_LIST_TXT);
|
||||||
|
|
||||||
if (g_entry_count == 0) {
|
if (g_entry_count == 0) {
|
||||||
printf("No files to process.\n");
|
printf("No files to process.\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
DeleteCriticalSection(&g_entries_cs);
|
DeleteCriticalSection(&g_entries_cs);
|
||||||
// If resume: create map of which files are already hashed
|
|
||||||
char **existing_hash = calloc(g_entry_count, sizeof(char *));
|
|
||||||
for (size_t i = 0; i < g_entry_count; ++i)
|
|
||||||
existing_hash[i] = NULL;
|
|
||||||
|
|
||||||
if (resume && file_exists(FILE_HASHES_TXT)) {
|
// Prepare job queue
|
||||||
// For simplicity we parse hash file and match lines to list entries.
|
|
||||||
for (size_t i = 0; i < g_entry_count; ++i) {
|
|
||||||
char hex[HASH_STRLEN] = {0};
|
|
||||||
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
|
|
||||||
existing_hash[i] = strdup(hex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prepare job queue of only missing files (or all if not resume)
|
|
||||||
JobQueue queue;
|
JobQueue queue;
|
||||||
jobqueue_init(&queue);
|
jobqueue_init(&queue);
|
||||||
|
|
||||||
size_t total_jobs = 0;
|
size_t total_jobs = 0;
|
||||||
for (size_t i = 0; i < g_entry_count; ++i) {
|
for (size_t i = 0; i < g_entry_count; ++i) {
|
||||||
if (resume && existing_hash[i])
|
|
||||||
continue;
|
|
||||||
Job *j = (Job *)malloc(sizeof(Job));
|
Job *j = (Job *)malloc(sizeof(Job));
|
||||||
j->file = &g_entries[i];
|
j->file = &g_entries[i];
|
||||||
j->next = NULL;
|
j->next = NULL;
|
||||||
@@ -703,13 +619,9 @@ int main(int argc, char **argv) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove old hashes file if we're recomputing from scratch.
|
FILE *hf = fopen(FILE_HASHES_TXT, "w");
|
||||||
if (!resume) {
|
if (hf)
|
||||||
// create/overwrite hashes file
|
fclose(hf);
|
||||||
FILE *hf = fopen(FILE_HASHES_TXT, "w");
|
|
||||||
if (hf)
|
|
||||||
fclose(hf);
|
|
||||||
} // if resume, we append only missing
|
|
||||||
|
|
||||||
// Starting thread pool
|
// Starting thread pool
|
||||||
atomic_size_t done_counter;
|
atomic_size_t done_counter;
|
||||||
@@ -825,17 +737,5 @@ int main(int argc, char **argv) {
|
|||||||
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
|
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
|
||||||
printf(" Total time : %.2f seconds\n", total_seconds);
|
printf(" Total time : %.2f seconds\n", total_seconds);
|
||||||
|
|
||||||
// If resume: we appended missing entries. If not resume: we wrote all results
|
|
||||||
// during workers. Note: This program appends hashes as workers finish. This
|
|
||||||
// avoids holding all hashes in RAM.
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
for (size_t i = 0; i < g_entry_count; ++i)
|
|
||||||
if (existing_hash[i])
|
|
||||||
free(existing_hash[i]);
|
|
||||||
free(existing_hash);
|
|
||||||
|
|
||||||
free_entries();
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
2
xxhash.h
2
xxhash.h
@@ -5101,7 +5101,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
|||||||
const void* XXH_RESTRICT secret)
|
const void* XXH_RESTRICT secret)
|
||||||
{
|
{
|
||||||
//DEBUG
|
//DEBUG
|
||||||
// printf("\nUsing AVX2\n");
|
printf("\nUsing AVX2\n");
|
||||||
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
||||||
{ __m256i* const xacc = (__m256i *) acc;
|
{ __m256i* const xacc = (__m256i *) acc;
|
||||||
/* Unaligned. This is mainly for pointer arithmetic, and because
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
||||||
|
|||||||
Reference in New Issue
Block a user