842 lines
23 KiB
C
842 lines
23 KiB
C
#include "platform.h"
|
|
|
|
// ----------------------------- Globals ------------------------------------
|
|
FileEntry *g_entries = NULL;
|
|
size_t g_entry_count = 0;
|
|
size_t g_entry_capacity = 0;
|
|
static atomic_int g_scan_done = 0;
|
|
static atomic_size_t g_files_found = 0;
|
|
static atomic_uint_fast64_t g_bytes_processed = 0;
|
|
// __________________________________________________________________________
|
|
static CRITICAL_SECTION g_entries_cs;
|
|
|
|
// ----------------------------- Utils --------------------------------------
|
|
static void perror_exit(const char *msg) {
|
|
perror(msg);
|
|
exit(1);
|
|
}
|
|
|
|
static void *xmalloc(size_t n) {
|
|
void *p = malloc(n);
|
|
if (!p)
|
|
perror_exit("malloc");
|
|
return p;
|
|
}
|
|
|
|
static void global_entries_push(const FileEntry *src) {
|
|
if (g_entry_count == g_entry_capacity) {
|
|
size_t newcap = g_entry_capacity ? g_entry_capacity * 2 : 1024;
|
|
g_entries = realloc(g_entries, newcap * sizeof(FileEntry));
|
|
if (!g_entries)
|
|
perror_exit("realloc");
|
|
g_entry_capacity = newcap;
|
|
}
|
|
|
|
FileEntry *dst = &g_entries[g_entry_count++];
|
|
memset(dst, 0, sizeof(*dst));
|
|
|
|
dst->size_bytes = src->size_bytes;
|
|
dst->created_time = src->created_time;
|
|
dst->modified_time = src->modified_time;
|
|
dst->path = strdup(src->path);
|
|
strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1);
|
|
}
|
|
|
|
static void free_entries(void) {
|
|
for (size_t i = 0; i < g_entry_count; ++i) {
|
|
free(g_entries[i].path);
|
|
}
|
|
|
|
free(g_entries);
|
|
g_entries = NULL;
|
|
g_entry_count = 0;
|
|
g_entry_capacity = 0;
|
|
}
|
|
|
|
// ----------------------------- Convert filetime to epoch --------------
|
|
static uint64_t filetime_to_epoch(const FILETIME *ft) {
|
|
ULARGE_INTEGER ull;
|
|
ull.LowPart = ft->dwLowDateTime;
|
|
ull.HighPart = ft->dwHighDateTime;
|
|
|
|
// Windows epoch (1601) → Unix epoch (1970)
|
|
return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL;
|
|
}
|
|
|
|
// ----------------------------- Resolve file owner ---------------------
|
|
static void get_file_owner(const char *path, char *out, size_t out_sz) {
|
|
PSID sid = NULL;
|
|
PSECURITY_DESCRIPTOR sd = NULL;
|
|
|
|
if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION,
|
|
&sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) {
|
|
|
|
char name[64], domain[64];
|
|
DWORD name_len = sizeof(name);
|
|
DWORD domain_len = sizeof(domain);
|
|
SID_NAME_USE use;
|
|
|
|
if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len,
|
|
&use)) {
|
|
snprintf(out, out_sz, "%s\\%s", domain, name);
|
|
} else {
|
|
snprintf(out, out_sz, "UNKNOWN");
|
|
}
|
|
} else {
|
|
snprintf(out, out_sz, "UNKNOWN");
|
|
}
|
|
|
|
if (sd)
|
|
LocalFree(sd);
|
|
}
|
|
|
|
// ----------------------------- Format time helper -------------------------
|
|
static void format_time(uint64_t t, char *out, size_t out_sz) {
|
|
if (t == 0) {
|
|
snprintf(out, out_sz, "N/A");
|
|
return;
|
|
}
|
|
|
|
time_t tt = (time_t)t;
|
|
struct tm tm;
|
|
|
|
#if PLATFORM_WINDOWS
|
|
localtime_s(&tm, &tt);
|
|
#else
|
|
localtime_r(&tt, &tm);
|
|
#endif
|
|
|
|
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
|
|
}
|
|
|
|
// --------------- parallel directory scanning ----------------
|
|
static void entrybuf_init(EntryBuffer *b) {
|
|
b->entries = NULL;
|
|
b->count = 0;
|
|
b->capacity = 0;
|
|
}
|
|
|
|
static void entrybuf_push(EntryBuffer *b, const FileEntry *src) {
|
|
if (b->count == b->capacity) {
|
|
size_t newcap = b->capacity ? b->capacity * 2 : 256;
|
|
b->entries = realloc(b->entries, newcap * sizeof(FileEntry));
|
|
if (!b->entries)
|
|
perror_exit("realloc");
|
|
b->capacity = newcap;
|
|
}
|
|
|
|
FileEntry *dst = &b->entries[b->count++];
|
|
memset(dst, 0, sizeof(*dst));
|
|
|
|
dst->size_bytes = src->size_bytes;
|
|
dst->created_time = src->created_time;
|
|
dst->modified_time = src->modified_time;
|
|
dst->path = strdup(src->path);
|
|
strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1);
|
|
}
|
|
|
|
// Add queue helper functions
|
|
static void dirqueue_push(DirQueue *q, const char *path) {
|
|
EnterCriticalSection(&q->cs);
|
|
|
|
if (q->count + 1 > q->cap) {
|
|
q->cap = q->cap ? q->cap * 2 : 1024;
|
|
q->items = realloc(q->items, q->cap * sizeof(char *));
|
|
}
|
|
|
|
q->items[q->count++] = _strdup(path);
|
|
|
|
WakeConditionVariable(&q->cv);
|
|
LeaveCriticalSection(&q->cs);
|
|
}
|
|
|
|
static char *dirqueue_pop(DirQueue *q) {
|
|
EnterCriticalSection(&q->cs);
|
|
|
|
while (q->count == 0 && q->active > 0) {
|
|
SleepConditionVariableCS(&q->cv, &q->cs, INFINITE);
|
|
}
|
|
|
|
if (q->count == 0 && q->active == 0) {
|
|
LeaveCriticalSection(&q->cs);
|
|
return NULL; // truly done
|
|
}
|
|
|
|
char *dir = q->items[--q->count];
|
|
q->active++;
|
|
|
|
LeaveCriticalSection(&q->cs);
|
|
return dir;
|
|
}
|
|
|
|
static void dirqueue_done(DirQueue *q) {
|
|
EnterCriticalSection(&q->cs);
|
|
q->active--;
|
|
WakeAllConditionVariable(&q->cv);
|
|
LeaveCriticalSection(&q->cs);
|
|
}
|
|
static DWORD WINAPI scan_worker(LPVOID arg) {
|
|
DirQueue *q = (DirQueue *)arg;
|
|
|
|
EntryBuffer local;
|
|
entrybuf_init(&local);
|
|
|
|
for (;;) {
|
|
char *dir = dirqueue_pop(q);
|
|
if (!dir)
|
|
break;
|
|
|
|
scan_folder_windows_parallel(dir, q, &local);
|
|
// debug
|
|
// printf("[T%lu] scanning %s\n", GetCurrentThreadId(), dir);
|
|
// debug
|
|
|
|
free(dir);
|
|
dirqueue_done(q);
|
|
}
|
|
|
|
// merge once at end
|
|
EnterCriticalSection(&g_entries_cs);
|
|
|
|
if (g_entry_count + local.count > g_entry_capacity) {
|
|
g_entry_capacity = g_entry_count + local.count;
|
|
g_entries = realloc(g_entries, g_entry_capacity * sizeof(FileEntry));
|
|
if (!g_entries)
|
|
perror_exit("realloc");
|
|
}
|
|
|
|
memcpy(&g_entries[g_entry_count], local.entries,
|
|
local.count * sizeof(FileEntry));
|
|
g_entry_count += local.count;
|
|
|
|
LeaveCriticalSection(&g_entries_cs);
|
|
|
|
free(local.entries);
|
|
return 0;
|
|
}
|
|
|
|
// Scanning directory function
|
|
void scan_folder_windows_parallel(const char *base, DirQueue *q,
|
|
EntryBuffer *buf) {
|
|
char search[MAX_PATHLEN];
|
|
snprintf(search, sizeof(search), "%s\\*", base);
|
|
|
|
WIN32_FIND_DATAA fd;
|
|
HANDLE h = FindFirstFileA(search, &fd);
|
|
if (h == INVALID_HANDLE_VALUE)
|
|
return;
|
|
|
|
do {
|
|
if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, ".."))
|
|
continue;
|
|
|
|
char full[MAX_PATHLEN];
|
|
snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName);
|
|
|
|
if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
|
|
continue;
|
|
|
|
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
|
|
dirqueue_push(q, full);
|
|
} else {
|
|
|
|
atomic_fetch_add(&g_files_found, 1);
|
|
|
|
FileEntry fe;
|
|
memset(&fe, 0, sizeof(fe));
|
|
|
|
char norm[MAX_PATHLEN];
|
|
strncpy(norm, full, sizeof(norm) - 1);
|
|
norm[sizeof(norm) - 1] = 0;
|
|
normalize_path(norm);
|
|
fe.path = norm;
|
|
|
|
platform_get_file_times(full, &fe.created_time, &fe.modified_time);
|
|
|
|
platform_get_file_owner(full, fe.owner, sizeof(fe.owner));
|
|
|
|
LARGE_INTEGER size;
|
|
HANDLE hf =
|
|
CreateFileA(full, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE,
|
|
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
|
|
|
if (hf != INVALID_HANDLE_VALUE) {
|
|
if (GetFileSizeEx(hf, &size))
|
|
fe.size_bytes = (uint64_t)size.QuadPart;
|
|
CloseHandle(hf);
|
|
}
|
|
|
|
entrybuf_push(buf, &fe);
|
|
}
|
|
} while (FindNextFileA(h, &fd));
|
|
|
|
FindClose(h);
|
|
}
|
|
|
|
// Scan progress thread
|
|
static DWORD WINAPI scan_progress_thread(LPVOID arg) {
|
|
(void)arg;
|
|
|
|
for (;;) {
|
|
if (atomic_load(&g_scan_done))
|
|
break;
|
|
|
|
Sleep(100); // 0.2 seconds
|
|
|
|
size_t count = atomic_load(&g_files_found);
|
|
|
|
printf("\rScanning... %zu files found", count);
|
|
fflush(stdout);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// ----------------------------- Job queue ----------------------------------
|
|
static void jobqueue_init(JobQueue *q) {
|
|
q->head = q->tail = NULL;
|
|
atomic_store(&q->count, 0);
|
|
q->stop = 0;
|
|
InitializeCriticalSection(&q->cs);
|
|
InitializeConditionVariable(&q->cv);
|
|
}
|
|
|
|
static void jobqueue_push(JobQueue *q, Job *job) {
|
|
EnterCriticalSection(&q->cs);
|
|
job->next = NULL;
|
|
if (q->tail)
|
|
q->tail->next = job;
|
|
else
|
|
q->head = job;
|
|
q->tail = job;
|
|
atomic_fetch_add(&q->count, 1);
|
|
WakeConditionVariable(&q->cv);
|
|
LeaveCriticalSection(&q->cs);
|
|
}
|
|
|
|
static Job *jobqueue_pop(JobQueue *q) {
|
|
EnterCriticalSection(&q->cs);
|
|
while (!q->head && !q->stop)
|
|
SleepConditionVariableCS(&q->cv, &q->cs, INFINITE);
|
|
if (q->stop && !q->head) {
|
|
LeaveCriticalSection(&q->cs);
|
|
return NULL;
|
|
}
|
|
Job *j = q->head;
|
|
q->head = j->next;
|
|
if (!q->head)
|
|
q->tail = NULL;
|
|
LeaveCriticalSection(&q->cs);
|
|
if (j)
|
|
atomic_fetch_sub(&q->count, 1);
|
|
return j;
|
|
}
|
|
|
|
static void jobqueue_stop(JobQueue *q) {
|
|
EnterCriticalSection(&q->cs);
|
|
q->stop = 1;
|
|
WakeAllConditionVariable(&q->cv);
|
|
LeaveCriticalSection(&q->cs);
|
|
}
|
|
|
|
// ----------------------------- Hashing helpers -----------------------------
|
|
static void xxh3_hash_file_stream(const char *path, char *out_hex) {
|
|
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
|
|
// helper.
|
|
// On Windows try to use overlapped synchronous chunked reads for higher
|
|
// throughput.
|
|
HANDLE hFile =
|
|
CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
|
|
OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
|
|
if (hFile == INVALID_HANDLE_VALUE) {
|
|
strcpy(out_hex, "ERROR");
|
|
return;
|
|
}
|
|
XXH128_hash_t h;
|
|
XXH3_state_t *state = XXH3_createState();
|
|
XXH3_128bits_reset(state);
|
|
|
|
BYTE *buf = (BYTE *)malloc(READ_BLOCK);
|
|
DWORD read = 0;
|
|
BOOL ok;
|
|
while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) {
|
|
XXH3_128bits_update(state, buf, (size_t)read);
|
|
atomic_fetch_add(&g_bytes_processed, (uint64_t)read);
|
|
}
|
|
h = XXH3_128bits_digest(state);
|
|
XXH3_freeState(state);
|
|
CloseHandle(hFile);
|
|
free(buf);
|
|
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
|
|
(unsigned long long)h.low64);
|
|
}
|
|
|
|
// ----------------------------- Worker --------------------------------------
|
|
static DWORD WINAPI worker_thread_windows(LPVOID argp) {
|
|
WorkerArg *w = (WorkerArg *)argp;
|
|
JobQueue *q = w->queue;
|
|
for (;;) {
|
|
Job *job = jobqueue_pop(q);
|
|
if (!job)
|
|
break;
|
|
char hex[HASH_STRLEN];
|
|
// On Windows we use overlapped ReadFile for large files would be better,
|
|
// but ReadFile with NULL overlapped is sufficient inside parallel threads.
|
|
xxh3_hash_file_stream(job->file->path, hex);
|
|
|
|
// append to hashes file using a critical section to avoid races
|
|
static CRITICAL_SECTION append_cs;
|
|
static LONG init = 0;
|
|
if (InterlockedCompareExchange(&init, 1, 1) == 0) {
|
|
// first time initialize
|
|
InitializeCriticalSection(&append_cs);
|
|
InterlockedExchange(&init, 1);
|
|
}
|
|
EnterCriticalSection(&append_cs);
|
|
FILE *hf = fopen(FILE_HASHES_TXT, "a");
|
|
if (hf) {
|
|
char created[32], modified[32];
|
|
|
|
format_time(job->file->created_time, created, sizeof(created));
|
|
format_time(job->file->modified_time, modified, sizeof(modified));
|
|
double size_kib = (double)job->file->size_bytes / (1024.0);
|
|
|
|
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib,
|
|
created, modified, job->file->owner);
|
|
fclose(hf);
|
|
}
|
|
LeaveCriticalSection(&append_cs);
|
|
|
|
atomic_fetch_add(w->done_counter, 1);
|
|
free(job);
|
|
}
|
|
atomic_fetch_sub(w->live_workers, 1);
|
|
return 0;
|
|
}
|
|
|
|
// ----------------------------- Progress display ---------------------------
|
|
static void print_progress(size_t done, size_t total) {
|
|
const int barw = 40;
|
|
double pct = total ? (double)done / (double)total : 0.0;
|
|
int filled = (int)(pct * barw + 0.5);
|
|
printf("\r[");
|
|
for (int i = 0; i < filled; ++i)
|
|
putchar('#');
|
|
for (int i = filled; i < barw; ++i)
|
|
putchar(' ');
|
|
printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total);
|
|
fflush(stdout);
|
|
}
|
|
|
|
// ----------------------------- Helpers: load/save --------------------------
|
|
static int file_exists(const char *path) {
|
|
DWORD attr = GetFileAttributesA(path);
|
|
return attr != INVALID_FILE_ATTRIBUTES;
|
|
}
|
|
|
|
static void save_file_list(const char *list_path) {
|
|
FILE *f = fopen(list_path, "w");
|
|
if (!f) {
|
|
perror("fopen file_list");
|
|
return;
|
|
}
|
|
for (size_t i = 0; i < g_entry_count; ++i) {
|
|
fprintf(f, "%s\n", g_entries[i].path);
|
|
}
|
|
fclose(f);
|
|
}
|
|
|
|
static void load_file_list(const char *list_path) {
|
|
FILE *f = fopen(list_path, "r");
|
|
if (!f)
|
|
return;
|
|
|
|
char line[MAX_PATHLEN];
|
|
|
|
while (fgets(line, sizeof(line), f)) {
|
|
line[strcspn(line, "\r\n")] = 0;
|
|
|
|
FileEntry fe;
|
|
memset(&fe, 0, sizeof(fe));
|
|
|
|
fe.path = line;
|
|
normalize_path(fe.path);
|
|
|
|
/* Populate metadata from filesystem */
|
|
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
|
|
|
|
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
|
|
|
|
global_entries_push(&fe);
|
|
}
|
|
|
|
fclose(f);
|
|
}
|
|
|
|
// Read existing hashes into memory map for resume
|
|
// Simple linear search mapping: returns 1 if path has hash found (and writes
|
|
// into out_hex)
|
|
|
|
// ----------------------------- Get file metadata -------------------------
|
|
static int find_hash_in_file(const char *hashfile, const char *path,
|
|
char *out_hex) {
|
|
FILE *f = fopen(hashfile, "r");
|
|
if (!f)
|
|
return 0;
|
|
char p[MAX_PATHLEN];
|
|
char h[128];
|
|
int found = 0;
|
|
while (fscanf(f, "%4095s %127s", p, h) == 2) {
|
|
if (strcmp(p, path) == 0) {
|
|
strncpy(out_hex, h, HASH_STRLEN);
|
|
out_hex[HASH_STRLEN - 1] = 0;
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
fclose(f);
|
|
return found;
|
|
}
|
|
void platform_get_file_times(const char *path, uint64_t *out_created,
|
|
uint64_t *out_modified) {
|
|
WIN32_FILE_ATTRIBUTE_DATA fad;
|
|
if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) {
|
|
*out_created = filetime_to_epoch(&fad.ftCreationTime);
|
|
*out_modified = filetime_to_epoch(&fad.ftLastWriteTime);
|
|
} else {
|
|
*out_created = 0;
|
|
*out_modified = 0;
|
|
}
|
|
}
|
|
|
|
void platform_get_file_owner(const char *path, char *out_owner,
|
|
size_t out_owner_size) {
|
|
get_file_owner(path, out_owner, out_owner_size);
|
|
}
|
|
|
|
// ----------------------------- Main ---------------------------------------
|
|
int main(int argc, char **argv) {
|
|
char folders[64][MAX_PATHLEN]; // up to 64 input folders
|
|
int folder_count = 0;
|
|
int resume = 0;
|
|
|
|
// -------------------------------
|
|
// Scanning and total timer init
|
|
// -------------------------------
|
|
timer_init();
|
|
|
|
HiResTimer total_timer;
|
|
HiResTimer scan_timer;
|
|
HiResTimer hash_timer;
|
|
|
|
timer_start(&total_timer);
|
|
timer_start(&scan_timer);
|
|
|
|
// -------------------------------
|
|
// Parse arguments
|
|
// -------------------------------
|
|
for (int i = 1; i < argc; ++i) {
|
|
if (strcmp(argv[i], "-resume") == 0) {
|
|
resume = 1;
|
|
} else {
|
|
if (folder_count < 64) {
|
|
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
|
|
folders[folder_count][MAX_PATHLEN - 1] = 0;
|
|
folder_count++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// -------------------------------
|
|
// Ask user if no folders provided
|
|
// -------------------------------
|
|
if (folder_count == 0 && !resume) {
|
|
printf("Enter folder to process (Enter = current folder): ");
|
|
fflush(stdout);
|
|
|
|
char buf[MAX_PATHLEN];
|
|
if (!fgets(buf, sizeof(buf), stdin))
|
|
return 1;
|
|
buf[strcspn(buf, "\r\n")] = 0;
|
|
|
|
if (buf[0] == 0)
|
|
strcpy(folders[0], ".");
|
|
else
|
|
strncpy(folders[0], buf, MAX_PATHLEN - 1);
|
|
|
|
folder_count = 1;
|
|
} else if (folder_count == 0 && resume) {
|
|
strcpy(folders[0], ".");
|
|
folder_count = 1;
|
|
}
|
|
|
|
// -------------------------------
|
|
// Display selected folders
|
|
// -------------------------------
|
|
printf("Processing %d folder(s):\n", folder_count);
|
|
for (int i = 0; i < folder_count; ++i) {
|
|
printf(" - %s\n", folders[i]);
|
|
}
|
|
|
|
// -------------------------------
|
|
// Detect hardware threads (CPU cores)
|
|
// -------------------------------
|
|
size_t hw_threads = 1;
|
|
// --- Windows: detect PHYSICAL cores (not logical threads) ---
|
|
DWORD len = 0;
|
|
GetLogicalProcessorInformation(NULL, &len);
|
|
|
|
SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf =
|
|
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)malloc(len);
|
|
|
|
if (GetLogicalProcessorInformation(buf, &len)) {
|
|
DWORD count = 0;
|
|
DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
|
|
for (DWORD i = 0; i < n; i++) {
|
|
if (buf[i].Relationship == RelationProcessorCore)
|
|
count++;
|
|
}
|
|
if (count > 0)
|
|
hw_threads = count;
|
|
}
|
|
free(buf);
|
|
|
|
// Add some extra threads to overlap I/O more aggressively
|
|
size_t num_threads = hw_threads * 2;
|
|
if (num_threads < 2)
|
|
num_threads = 2;
|
|
|
|
// -------------------------------
|
|
// Step 1: Scan all folders
|
|
// -------------------------------
|
|
InitializeCriticalSection(&g_entries_cs);
|
|
|
|
if (!resume) {
|
|
DirQueue q;
|
|
memset(&q, 0, sizeof(q));
|
|
InitializeCriticalSection(&q.cs);
|
|
InitializeConditionVariable(&q.cv);
|
|
q.active = 0;
|
|
|
|
HANDLE scan_progress =
|
|
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
|
|
|
|
for (int i = 0; i < folder_count; ++i) {
|
|
dirqueue_push(&q, folders[i]);
|
|
}
|
|
|
|
size_t scan_threads = hw_threads;
|
|
if (scan_threads < 2)
|
|
scan_threads = 2;
|
|
|
|
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
|
|
for (size_t i = 0; i < scan_threads; ++i) {
|
|
scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker,
|
|
&q, 0, NULL);
|
|
}
|
|
|
|
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
|
|
|
atomic_store(&g_scan_done, 1);
|
|
WaitForSingleObject(scan_progress, INFINITE);
|
|
CloseHandle(scan_progress);
|
|
|
|
for (size_t i = 0; i < scan_threads; ++i)
|
|
CloseHandle(scan_tids[i]);
|
|
free(scan_tids);
|
|
|
|
double scan_seconds = timer_stop(&scan_timer);
|
|
double scan_rate = (double)g_entry_count / scan_seconds;
|
|
|
|
printf(". Scan rate : %.1f files/sec\n", scan_rate);
|
|
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
|
|
FILE_LIST_TXT);
|
|
save_file_list(FILE_LIST_TXT);
|
|
|
|
} else {
|
|
if (!file_exists(FILE_LIST_TXT)) {
|
|
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
|
|
return 1;
|
|
}
|
|
load_file_list(FILE_LIST_TXT);
|
|
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
|
|
}
|
|
|
|
if (g_entry_count == 0) {
|
|
printf("No files to process.\n");
|
|
return 0;
|
|
}
|
|
|
|
DeleteCriticalSection(&g_entries_cs);
|
|
// If resume: create map of which files are already hashed
|
|
char **existing_hash = calloc(g_entry_count, sizeof(char *));
|
|
for (size_t i = 0; i < g_entry_count; ++i)
|
|
existing_hash[i] = NULL;
|
|
|
|
if (resume && file_exists(FILE_HASHES_TXT)) {
|
|
// For simplicity we parse hash file and match lines to list entries.
|
|
for (size_t i = 0; i < g_entry_count; ++i) {
|
|
char hex[HASH_STRLEN] = {0};
|
|
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
|
|
existing_hash[i] = strdup(hex);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Prepare job queue of only missing files (or all if not resume)
|
|
JobQueue queue;
|
|
jobqueue_init(&queue);
|
|
|
|
size_t total_jobs = 0;
|
|
for (size_t i = 0; i < g_entry_count; ++i) {
|
|
if (resume && existing_hash[i])
|
|
continue;
|
|
Job *j = (Job *)malloc(sizeof(Job));
|
|
j->file = &g_entries[i];
|
|
j->next = NULL;
|
|
jobqueue_push(&queue, j);
|
|
++total_jobs;
|
|
}
|
|
|
|
if (total_jobs == 0) {
|
|
printf("Nothing to do — all files already hashed.\n");
|
|
return 0;
|
|
}
|
|
|
|
// Remove old hashes file if we're recomputing from scratch.
|
|
if (!resume) {
|
|
// create/overwrite hashes file
|
|
FILE *hf = fopen(FILE_HASHES_TXT, "w");
|
|
if (hf)
|
|
fclose(hf);
|
|
} // if resume, we append only missing
|
|
|
|
// Starting thread pool
|
|
atomic_size_t done_counter;
|
|
atomic_store(&done_counter, 0);
|
|
atomic_int live_workers;
|
|
atomic_store(&live_workers, (int)num_threads);
|
|
|
|
WorkerArg warg = {.queue = &queue,
|
|
.done_counter = &done_counter,
|
|
.total_jobs = total_jobs,
|
|
.live_workers = &live_workers};
|
|
|
|
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
|
|
hw_threads);
|
|
|
|
// Launch threads
|
|
HANDLE *tids = malloc(sizeof(HANDLE) * num_threads);
|
|
for (size_t i = 0; i < num_threads; ++i) {
|
|
tids[i] = CreateThread(NULL, 0, worker_thread_windows, &warg, 0, NULL);
|
|
}
|
|
|
|
// Progress / timer
|
|
struct timespec tstart, tnow;
|
|
// fallback for windows
|
|
LARGE_INTEGER freq, start_li;
|
|
QueryPerformanceFrequency(&freq);
|
|
QueryPerformanceCounter(&start_li);
|
|
|
|
size_t last_done = 0;
|
|
|
|
// --------------- Hashing speed MB/s ----------------
|
|
uint64_t last_bytes = atomic_load(&g_bytes_processed);
|
|
double last_time = 0.0;
|
|
double displayed_speed = 0.0;
|
|
const double sample_interval = 0.5;
|
|
char linebuf[256];
|
|
|
|
for (;;) {
|
|
size_t done = (size_t)atomic_load(&done_counter);
|
|
|
|
// ---- monotonic time ----
|
|
LARGE_INTEGER now_li;
|
|
QueryPerformanceCounter(&now_li);
|
|
double now =
|
|
(double)(now_li.QuadPart - start_li.QuadPart) / (double)freq.QuadPart;
|
|
|
|
// ---- total processed bytes ----
|
|
uint64_t bytes = atomic_load(&g_bytes_processed);
|
|
|
|
// ---- real sampler (independent of UI sleep) ----
|
|
if (last_time == 0.0) {
|
|
last_time = now;
|
|
last_bytes = bytes;
|
|
}
|
|
|
|
double dt = now - last_time;
|
|
if (dt >= sample_interval) {
|
|
uint64_t db = bytes - last_bytes;
|
|
|
|
if (db > 0 && dt > 0.0001) {
|
|
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
|
|
}
|
|
|
|
last_bytes = bytes;
|
|
last_time = now;
|
|
}
|
|
|
|
// ---- progress bar build ----
|
|
const int barw = 40;
|
|
double pct = total_jobs ? (double)done / (double)total_jobs : 0.0;
|
|
int filled = (int)(pct * barw + 0.5);
|
|
|
|
int p = 0;
|
|
p += snprintf(linebuf + p, sizeof(linebuf) - p, "[");
|
|
for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i)
|
|
p += snprintf(linebuf + p, sizeof(linebuf) - p, "#");
|
|
for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i)
|
|
p += snprintf(linebuf + p, sizeof(linebuf) - p, ".");
|
|
|
|
snprintf(linebuf + p, sizeof(linebuf) - p,
|
|
"] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs,
|
|
displayed_speed);
|
|
|
|
printf("\r%s", linebuf);
|
|
fflush(stdout);
|
|
|
|
if (done >= total_jobs)
|
|
break;
|
|
|
|
Sleep(100);
|
|
}
|
|
|
|
printf("\n\n");
|
|
|
|
// stop queue and join threads
|
|
jobqueue_stop(&queue);
|
|
WaitForMultipleObjects((DWORD)num_threads, tids, TRUE, INFINITE);
|
|
for (size_t i = 0; i < num_threads; ++i)
|
|
CloseHandle(tids[i]);
|
|
|
|
// done time
|
|
LARGE_INTEGER end_li;
|
|
QueryPerformanceCounter(&end_li);
|
|
double elapsed =
|
|
(double)(end_li.QuadPart - start_li.QuadPart) / (double)freq.QuadPart;
|
|
double total_seconds = timer_stop(&total_timer);
|
|
|
|
printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed);
|
|
|
|
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
|
|
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
|
|
double avg_mbps = total_mb / elapsed;
|
|
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
|
|
printf(" Total time : %.2f seconds\n", total_seconds);
|
|
|
|
// If resume: we appended missing entries. If not resume: we wrote all results
|
|
// during workers. Note: This program appends hashes as workers finish. This
|
|
// avoids holding all hashes in RAM.
|
|
|
|
// Cleanup
|
|
for (size_t i = 0; i < g_entry_count; ++i)
|
|
if (existing_hash[i])
|
|
free(existing_hash[i]);
|
|
free(existing_hash);
|
|
|
|
free_entries();
|
|
|
|
return 0;
|
|
}
|