942 lines
25 KiB
C
942 lines
25 KiB
C
#pragma once // ensure that a given header file is included only once in a
|
|
// single compilation unit
|
|
#define _CRT_SECURE_NO_WARNINGS
|
|
|
|
#include "arena.h"
|
|
#include "base.h"
|
|
#include "lf_mpmc.h"
|
|
|
|
#include "arena.c"
|
|
|
|
// xxhash include
|
|
#define XXH_INLINE_ALL
|
|
#include "xxh_x86dispatch.h"
|
|
#include <ctype.h>
|
|
|
|
// ----------------------------- Config -------------------------------------
|
|
#define FILE_HASHES_TXT "file_hashes.txt"
|
|
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
|
|
#define MAX_PATHLEN 4096
|
|
#define READ_BLOCK (KiB(64))
|
|
|
|
// ----------------------------- Globals ------------------------------------
|
|
static atomic_uint_fast64_t g_files_found = 0;
|
|
static atomic_uint_fast64_t g_files_hashed = 0;
|
|
static atomic_uint_fast64_t g_bytes_processed = 0;
|
|
static atomic_int g_scan_done = 0;
|
|
|
|
// ================== OS-agnostic functions abstraction =====================
|
|
// ----------------------------- Timer functions --------------
|
|
typedef struct {
|
|
u64 start;
|
|
u64 now;
|
|
} HiResTimer;
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
|
|
static LARGE_INTEGER g_freq;
|
|
|
|
static void timer_init(void) { QueryPerformanceFrequency(&g_freq); }
|
|
|
|
static void timer_start(HiResTimer *t) {
|
|
LARGE_INTEGER v;
|
|
QueryPerformanceCounter(&v);
|
|
t->start = v.QuadPart;
|
|
}
|
|
|
|
static double timer_elapsed(HiResTimer *t) {
|
|
LARGE_INTEGER v;
|
|
QueryPerformanceCounter(&v);
|
|
t->now = v.QuadPart;
|
|
|
|
return (double)(t->now - t->start) / (double)g_freq.QuadPart;
|
|
}
|
|
|
|
#elif defined(__linux__)
|
|
|
|
void timer_init(void) {}
|
|
|
|
void timer_start(HiResTimer *t) {
|
|
struct timespec ts;
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
t->start = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
|
|
}
|
|
|
|
double timer_elapsed(HiResTimer *t) {
|
|
struct timespec ts;
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
|
|
uint64_t now = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
|
|
|
|
return (double)(now - t->start) / 1e9;
|
|
}
|
|
|
|
#endif
|
|
|
|
// ----------------------------- Get HW info --------------
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
|
|
size_t platform_physical_cores(void) {
|
|
DWORD len = 0;
|
|
GetLogicalProcessorInformation(NULL, &len);
|
|
|
|
SYSTEM_LOGICAL_PROCESSOR_INFORMATION buf[len];
|
|
|
|
GetLogicalProcessorInformation(buf, &len);
|
|
DWORD count = 0;
|
|
DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
|
|
for (DWORD i = 0; i < n; i++) {
|
|
if (buf[i].Relationship == RelationProcessorCore)
|
|
count++;
|
|
}
|
|
return count ? count : 1;
|
|
}
|
|
|
|
#elif defined(__linux__)
|
|
|
|
size_t platform_physical_cores(void) {
|
|
long n = sysconf(_SC_NPROCESSORS_ONLN);
|
|
return n > 0 ? (size_t)n : 1;
|
|
}
|
|
|
|
#endif
|
|
|
|
const char *get_xxhash_instruction_set(void) {
|
|
int vecID = XXH_featureTest();
|
|
|
|
switch (vecID) {
|
|
case XXH_SCALAR:
|
|
return "Scalar (portable C)";
|
|
case XXH_SSE2:
|
|
return "SSE2";
|
|
case XXH_AVX2:
|
|
return "AVX2";
|
|
case XXH_AVX512:
|
|
return "AVX-512";
|
|
default:
|
|
return "Unknown";
|
|
}
|
|
}
|
|
|
|
// -------------------- File IO -------------------
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
typedef HANDLE FileHandle;
|
|
#define INVALID_FILE_HANDLE INVALID_HANDLE_VALUE
|
|
|
|
// File open function
|
|
static FileHandle os_file_open(const char *path) {
|
|
return CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE,
|
|
NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
|
|
}
|
|
|
|
// File read function
|
|
static int os_file_read(FileHandle handle, void *buf, size_t count,
|
|
uint64_t *bytes_read) {
|
|
DWORD read = 0;
|
|
BOOL result = ReadFile(handle, buf, (DWORD)count, &read, NULL);
|
|
*bytes_read = read;
|
|
return (result && read > 0) ? 0 : -1;
|
|
}
|
|
|
|
// File close function
|
|
static void os_file_close(FileHandle handle) { CloseHandle(handle); }
|
|
|
|
#elif defined(__linux__)
|
|
typedef int FileHandle;
|
|
#define INVALID_FILE_HANDLE (-1)
|
|
|
|
// File open function
|
|
static FileHandle os_file_open(const char *path) {
|
|
return open(path, O_RDONLY | O_NOFOLLOW);
|
|
}
|
|
|
|
// File read function
|
|
static int os_file_read(FileHandle handle, void *buf, size_t count,
|
|
uint64_t *bytes_read) {
|
|
ssize_t result = read(handle, buf, count);
|
|
if (result >= 0) {
|
|
*bytes_read = (uint64_t)result;
|
|
return 0;
|
|
}
|
|
*bytes_read = 0;
|
|
return -1;
|
|
}
|
|
|
|
// File close function
|
|
static void os_file_close(FileHandle handle) { close(handle); }
|
|
|
|
#endif
|
|
|
|
// -------------------- Thread abstraction -------------------
|
|
// Threads context
|
|
typedef struct {
|
|
u8 num_threads;
|
|
|
|
mem_arena *path_arena;
|
|
mem_arena *meta_arena;
|
|
|
|
MPMCQueue *dir_queue;
|
|
MPMCQueue *file_queue;
|
|
} ScannerContext;
|
|
|
|
typedef struct {
|
|
mem_arena *arena;
|
|
MPMCQueue *file_queue;
|
|
} WorkerContext;
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
typedef HANDLE ThreadHandle;
|
|
typedef DWORD(WINAPI *ThreadFunc)(void *);
|
|
#define THREAD_RETURN DWORD WINAPI
|
|
#define THREAD_RETURN_VALUE 0;
|
|
|
|
typedef struct {
|
|
ThreadHandle handle;
|
|
int valid; // Track if thread was successfully created
|
|
} Thread;
|
|
|
|
// Thread function wrapper to handle different return types
|
|
#define THREAD_FUNCTION(name) DWORD WINAPI name(LPVOID arg)
|
|
|
|
// Thread creation function
|
|
static int thread_create(Thread *thread, ThreadFunc func, void *arg) {
|
|
thread->handle =
|
|
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, NULL);
|
|
return (thread->handle != NULL) ? 0 : -1;
|
|
}
|
|
|
|
// Thread join function
|
|
static int thread_join(Thread *thread) {
|
|
return (WaitForSingleObject(thread->handle, INFINITE) == WAIT_OBJECT_0) ? 0
|
|
: -1;
|
|
}
|
|
|
|
// Thread close/detach function
|
|
static void thread_close(Thread *thread) { CloseHandle(thread->handle); }
|
|
|
|
// Wait for multiple threads
|
|
static int thread_wait_multiple(Thread *threads, size_t count) {
|
|
HANDLE handles[64]; // Max 64 threads for Windows
|
|
for (size_t i = 0; i < count; i++) {
|
|
handles[i] = threads[i].handle;
|
|
}
|
|
return (WaitForMultipleObjects((DWORD)count, handles, TRUE, INFINITE) ==
|
|
WAIT_OBJECT_0)
|
|
? 0
|
|
: -1;
|
|
}
|
|
|
|
#elif defined(__linux__)
|
|
typedef pthread_t ThreadHandle;
|
|
typedef void *(*ThreadFunc)(void *);
|
|
#define THREAD_RETURN void *
|
|
#define THREAD_RETURN_VALUE NULL;
|
|
|
|
typedef struct {
|
|
ThreadHandle handle;
|
|
int valid; // Track if thread was successfully created
|
|
} Thread;
|
|
|
|
// Thread function wrapper to handle different return types
|
|
typedef struct {
|
|
void *(*func)(void *);
|
|
void *arg;
|
|
} ThreadWrapper;
|
|
|
|
static void *thread_start_routine(void *arg) {
|
|
ThreadWrapper *wrapper = (ThreadWrapper *)arg;
|
|
void *result = wrapper->func(wrapper->arg);
|
|
free(wrapper);
|
|
return result;
|
|
}
|
|
|
|
// Thread creation function
|
|
static int thread_create(Thread *thread, ThreadFunc func, void *arg) {
|
|
int ret = pthread_create(&thread->handle, NULL, func, arg);
|
|
if (ret == 0) {
|
|
thread->valid = 1;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// Thread join function
|
|
static int thread_join(Thread *thread) {
|
|
int ret = pthread_join(thread->handle, NULL);
|
|
thread->valid = 0;
|
|
return ret;
|
|
}
|
|
|
|
// Thread close/detach function
|
|
static void thread_close(Thread *thread) {
|
|
if (thread->valid) {
|
|
pthread_detach(thread->handle);
|
|
thread->valid = 0;
|
|
}
|
|
}
|
|
|
|
// Wait for multiple threads
|
|
static int thread_wait_multiple(Thread *threads, size_t count) {
|
|
for (size_t i = 0; i < count; i++) {
|
|
if (thread_join(&threads[i]) != 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
// ======================== Get file metadata ========================
|
|
// -------------------- Path parsing -------------------
|
|
static void normalize_path(char *p) {
|
|
char *src = p;
|
|
char *dst = p;
|
|
int prev_slash = 0;
|
|
|
|
while (*src) {
|
|
char c = *src++;
|
|
|
|
if (c == '\\' || c == '/') {
|
|
if (!prev_slash) {
|
|
*dst++ = '/';
|
|
prev_slash = 1;
|
|
}
|
|
} else {
|
|
*dst++ = c;
|
|
prev_slash = 0;
|
|
}
|
|
}
|
|
|
|
*dst = '\0';
|
|
}
|
|
|
|
static int parse_paths(char *line, char folders[][MAX_PATHLEN],
|
|
int max_folders) {
|
|
int count = 0;
|
|
char *p = line;
|
|
|
|
while (*p && count < max_folders) {
|
|
|
|
while (*p && isspace((unsigned char)*p))
|
|
p++;
|
|
|
|
if (!*p)
|
|
break;
|
|
|
|
char *start;
|
|
char quote = 0;
|
|
|
|
if (*p == '"' || *p == '\'') {
|
|
quote = *p++;
|
|
start = p;
|
|
|
|
while (*p && *p != quote)
|
|
p++;
|
|
} else {
|
|
start = p;
|
|
|
|
while (*p && !isspace((unsigned char)*p))
|
|
p++;
|
|
}
|
|
|
|
size_t len = p - start;
|
|
if (len >= MAX_PATHLEN)
|
|
len = MAX_PATHLEN - 1;
|
|
|
|
memcpy(folders[count], start, len);
|
|
folders[count][len] = 0;
|
|
|
|
normalize_path(folders[count]);
|
|
|
|
count++;
|
|
|
|
if (quote && *p == quote)
|
|
p++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// ----------------------------- File time -------------------------
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
static void format_time(uint64_t t, char *out, size_t out_sz) {
|
|
if (t == 0) {
|
|
snprintf(out, out_sz, "N/A");
|
|
return;
|
|
}
|
|
|
|
time_t tt = (time_t)t;
|
|
struct tm tm;
|
|
|
|
localtime_s(&tm, &tt);
|
|
|
|
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
|
|
}
|
|
// ----------------------------- Convert filetime to epoch --------------
|
|
static uint64_t filetime_to_epoch(const FILETIME *ft) {
|
|
ULARGE_INTEGER ull;
|
|
ull.LowPart = ft->dwLowDateTime;
|
|
ull.HighPart = ft->dwHighDateTime;
|
|
|
|
// Windows epoch (1601) ¬ニメ Unix epoch (1970)
|
|
return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL;
|
|
}
|
|
|
|
void platform_get_file_times(const char *path, uint64_t *out_created,
|
|
uint64_t *out_modified) {
|
|
WIN32_FILE_ATTRIBUTE_DATA fad;
|
|
if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) {
|
|
*out_created = filetime_to_epoch(&fad.ftCreationTime);
|
|
*out_modified = filetime_to_epoch(&fad.ftLastWriteTime);
|
|
} else {
|
|
*out_created = 0;
|
|
*out_modified = 0;
|
|
}
|
|
}
|
|
|
|
#elif defined(__linux__)
|
|
static void format_time(uint64_t t, char *out, size_t out_sz) {
|
|
if (t == 0) {
|
|
snprintf(out, out_sz, "N/A");
|
|
return;
|
|
}
|
|
|
|
time_t tt = (time_t)t;
|
|
struct tm tm;
|
|
|
|
localtime_r(&tt, &tm);
|
|
|
|
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
|
|
}
|
|
|
|
void platform_get_file_times(const char *path, uint64_t *out_created,
|
|
uint64_t *out_modified) {
|
|
struct stat st;
|
|
if (stat(path, &st) == 0) {
|
|
*out_created = (uint64_t)st.st_ctime;
|
|
*out_modified = (uint64_t)st.st_mtime;
|
|
} else {
|
|
*out_created = 0;
|
|
*out_modified = 0;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
// ----------------------------- File owner ---------------------
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
static void get_file_owner(const char *path, char *out, size_t out_sz) {
|
|
PSID sid = NULL;
|
|
PSECURITY_DESCRIPTOR sd = NULL;
|
|
|
|
if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION,
|
|
&sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) {
|
|
|
|
char name[64], domain[64];
|
|
DWORD name_len = sizeof(name);
|
|
DWORD domain_len = sizeof(domain);
|
|
SID_NAME_USE use;
|
|
|
|
if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len,
|
|
&use)) {
|
|
snprintf(out, out_sz, "%s\\%s", domain, name);
|
|
} else {
|
|
snprintf(out, out_sz, "UNKNOWN");
|
|
}
|
|
} else {
|
|
snprintf(out, out_sz, "UNKNOWN");
|
|
}
|
|
|
|
if (sd)
|
|
LocalFree(sd);
|
|
}
|
|
|
|
void platform_get_file_owner(const char *path, char *out_owner,
|
|
size_t out_owner_size) {
|
|
get_file_owner(path, out_owner, out_owner_size);
|
|
}
|
|
|
|
#elif defined(__linux__)
|
|
static void get_file_owner(uid_t uid, char *out, size_t out_sz) {
|
|
struct passwd *pw = getpwuid(uid);
|
|
if (pw) {
|
|
snprintf(out, out_sz, "%s", pw->pw_name);
|
|
} else {
|
|
snprintf(out, out_sz, "UNKNOWN");
|
|
}
|
|
}
|
|
|
|
void platform_get_file_owner(const char *path, char *out_owner,
|
|
size_t out_owner_size) {
|
|
struct stat st;
|
|
if (stat(path, &st) == 0) {
|
|
get_file_owner(st.st_uid, out_owner, out_owner_size);
|
|
} else {
|
|
snprintf(out_owner, out_owner_size, "UNKNOWN");
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
// ----------------------------- Scan helpers -----------------------------
|
|
typedef struct FileEntry {
|
|
char *path;
|
|
|
|
uint64_t size_bytes;
|
|
uint64_t created_time; // epoch
|
|
uint64_t modified_time; // epoch seconds
|
|
char owner[128]; // resolved owner name
|
|
} FileEntry;
|
|
|
|
typedef struct {
|
|
char buffer[MAX_PATHLEN];
|
|
char *base_end; // Points to end of base path
|
|
char *filename_pos; // Points to where filename should be written
|
|
size_t base_len;
|
|
} PathBuilder;
|
|
|
|
static void path_builder_init(PathBuilder *pb, const char *base) {
|
|
pb->base_len = strlen(base);
|
|
memcpy(pb->buffer, base, pb->base_len);
|
|
pb->base_end = pb->buffer + pb->base_len;
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
*pb->base_end = '\\';
|
|
#elif defined(__linux__)
|
|
*pb->base_end = '/';
|
|
#endif
|
|
|
|
// Ensure null termination
|
|
*(pb->base_end + 1) = '\0';
|
|
pb->filename_pos = pb->base_end + 1;
|
|
}
|
|
|
|
static void path_builder_set_filename(PathBuilder *pb, const char *filename,
|
|
size_t name_len) {
|
|
memcpy(pb->filename_pos, filename, name_len);
|
|
pb->filename_pos[name_len] = '\0'; // Ensure null termination
|
|
}
|
|
|
|
static char *path_builder_dup_arena(PathBuilder *pb, mem_arena *arena,
|
|
bool zero) {
|
|
// Calculate total length including base + separator + filename + null
|
|
// terminator
|
|
size_t total_len =
|
|
(pb->filename_pos - pb->buffer) + strlen(pb->filename_pos) + 1;
|
|
char *dup = arena_push(&arena, total_len, zero);
|
|
memcpy(dup, pb->buffer, total_len);
|
|
return dup;
|
|
}
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
void scan_folder(const char *base, ScannerContext *ctx) {
|
|
PathBuilder pb;
|
|
path_builder_init(&pb, base);
|
|
|
|
char search[MAX_PATHLEN];
|
|
memcpy(search, pb.buffer, pb.base_len + 1); // Copy base + separator
|
|
memcpy(search + pb.base_len + 1, "*", 2); // Add "*" and null
|
|
|
|
WIN32_FIND_DATAA fd;
|
|
HANDLE h = FindFirstFileA(search, &fd);
|
|
if (h == INVALID_HANDLE_VALUE)
|
|
return;
|
|
|
|
do {
|
|
// Skip . and ..
|
|
if (fd.cFileName[0] == '.' &&
|
|
(fd.cFileName[1] == 0 ||
|
|
(fd.cFileName[1] == '.' && fd.cFileName[2] == 0)))
|
|
continue;
|
|
|
|
if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
|
|
continue;
|
|
|
|
size_t name_len = strlen(fd.cFileName);
|
|
path_builder_set_filename(&pb, fd.cFileName, name_len);
|
|
|
|
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
|
|
char *dir = path_builder_dup_arena(&pb, ctx->path_arena, false);
|
|
mpmc_push_work(ctx->dir_queue, dir);
|
|
} else {
|
|
atomic_fetch_add(&g_files_found, 1);
|
|
|
|
FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true);
|
|
|
|
// Create a temporary copy for normalization to avoid corrupting pb.buffer
|
|
char temp_path[MAX_PATHLEN];
|
|
memcpy(temp_path, pb.buffer,
|
|
(pb.filename_pos - pb.buffer) + name_len + 1);
|
|
normalize_path(temp_path);
|
|
|
|
fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, false);
|
|
strcpy(fe->path, temp_path);
|
|
|
|
platform_get_file_times(pb.buffer, &fe->created_time, &fe->modified_time);
|
|
platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner));
|
|
fe->size_bytes = ((uint64_t)fd.nFileSizeHigh << 32) | fd.nFileSizeLow;
|
|
|
|
mpmc_push(ctx->file_queue, fe);
|
|
}
|
|
|
|
} while (FindNextFileA(h, &fd));
|
|
|
|
FindClose(h);
|
|
}
|
|
|
|
#elif defined(__linux__)
|
|
static int platform_get_file_times_fd(int dir_fd, const char *name,
|
|
time_t *created, time_t *modified) {
|
|
struct stat st;
|
|
if (fstatat(dir_fd, name, &st, 0) == 0) {
|
|
*created = st.st_ctime; // or st.st_birthtime on systems that support it
|
|
*modified = st.st_mtime;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static int platform_get_file_owner_fd(int dir_fd, const char *name, char *owner,
|
|
size_t owner_size) {
|
|
struct stat st;
|
|
if (fstatat(dir_fd, name, &st, 0) == 0) {
|
|
struct passwd pw;
|
|
struct passwd *result;
|
|
char buffer[4096]; // Sufficiently large buffer for passwd data
|
|
|
|
// Reentrant version (thread-safe)
|
|
if (getpwuid_r(st.st_uid, &pw, buffer, sizeof(buffer), &result) == 0 &&
|
|
result != NULL && result->pw_name != NULL) {
|
|
strncpy(owner, result->pw_name, owner_size - 1);
|
|
owner[owner_size - 1] = '\0';
|
|
} else {
|
|
// Fallback to uid
|
|
snprintf(owner, owner_size, "uid:%d", st.st_uid);
|
|
}
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void scan_folder(const char *base, ScannerContext *ctx) {
|
|
PathBuilder pb;
|
|
path_builder_init(&pb, base);
|
|
|
|
int dir_fd = open(base, O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
|
|
if (dir_fd == -1)
|
|
return;
|
|
|
|
DIR *dir = fdopendir(dir_fd);
|
|
if (!dir) {
|
|
close(dir_fd);
|
|
return;
|
|
}
|
|
|
|
struct dirent *entry;
|
|
|
|
while ((entry = readdir(dir)) != NULL) {
|
|
if (entry->d_name[0] == '.' &&
|
|
(entry->d_name[1] == 0 ||
|
|
(entry->d_name[1] == '.' && entry->d_name[2] == 0)))
|
|
continue;
|
|
|
|
size_t name_len = strlen(entry->d_name);
|
|
path_builder_set_filename(&pb, entry->d_name, name_len);
|
|
|
|
int file_type = DT_UNKNOWN;
|
|
#ifdef _DIRENT_HAVE_D_TYPE
|
|
file_type = entry->d_type;
|
|
#endif
|
|
|
|
// Fast path using d_type
|
|
if (file_type != DT_UNKNOWN) {
|
|
if (file_type == DT_LNK)
|
|
continue; // Skip symlinks
|
|
|
|
if (file_type == DT_DIR) {
|
|
char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false);
|
|
mpmc_push_work(ctx->dir_queue, dir_path);
|
|
continue;
|
|
}
|
|
|
|
if (file_type == DT_REG) {
|
|
atomic_fetch_add(&g_files_found, 1);
|
|
FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry),
|
|
true);
|
|
|
|
// Use fstatat for file info
|
|
struct stat st;
|
|
if (fstatat(dir_fd, entry->d_name, &st, 0) == 0) {
|
|
// Convert times using fd variant
|
|
platform_get_file_times_fd(dir_fd, entry->d_name,
|
|
&fe->created_time,
|
|
&fe->modified_time);
|
|
platform_get_file_owner_fd(dir_fd, entry->d_name, fe->owner,
|
|
sizeof(fe->owner));
|
|
fe->size_bytes = (uint64_t)st.st_size;
|
|
|
|
// Normalize path
|
|
char temp_path[MAX_PATHLEN];
|
|
memcpy(temp_path, pb.buffer,
|
|
(pb.filename_pos - pb.buffer) + name_len + 1);
|
|
normalize_path(temp_path);
|
|
|
|
fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1,
|
|
false); strcpy(fe->path, temp_path);
|
|
|
|
mpmc_push(ctx->file_queue, fe);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Fallback for unknown types
|
|
struct stat st;
|
|
if (fstatat(dir_fd, entry->d_name, &st, AT_SYMLINK_NOFOLLOW) == 0) {
|
|
if (S_ISLNK(st.st_mode))
|
|
continue;
|
|
|
|
if (S_ISDIR(st.st_mode)) {
|
|
char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false);
|
|
mpmc_push_work(ctx->dir_queue, dir_path);
|
|
} else if (S_ISREG(st.st_mode)) {
|
|
atomic_fetch_add(&g_files_found, 1);
|
|
FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry),
|
|
true);
|
|
|
|
platform_get_file_times(pb.buffer, &fe->created_time,
|
|
&fe->modified_time);
|
|
platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner));
|
|
fe->size_bytes = (uint64_t)st.st_size;
|
|
|
|
char temp_path[MAX_PATHLEN];
|
|
memcpy(temp_path, pb.buffer,
|
|
(pb.filename_pos - pb.buffer) + name_len + 1);
|
|
normalize_path(temp_path);
|
|
|
|
fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1,
|
|
false); strcpy(fe->path, temp_path);
|
|
|
|
mpmc_push(ctx->file_queue, fe);
|
|
}
|
|
}
|
|
}
|
|
|
|
closedir(dir); // Closes dir_fd automatically
|
|
}
|
|
|
|
// Choice 2
|
|
|
|
// void scan_folder(const char *base, ScannerContext *ctx) {
|
|
// PathBuilder pb;
|
|
// path_builder_init(&pb, base);
|
|
//
|
|
// DIR *dir = opendir(base);
|
|
// if (!dir)
|
|
// return;
|
|
//
|
|
// struct dirent *entry;
|
|
// struct stat st;
|
|
//
|
|
// while ((entry = readdir(dir)) != NULL) {
|
|
// if (entry->d_name[0] == '.' &&
|
|
// (entry->d_name[1] == 0 ||
|
|
// (entry->d_name[1] == '.' && entry->d_name[2] == 0)))
|
|
// continue;
|
|
//
|
|
// size_t name_len = strlen(entry->d_name);
|
|
// path_builder_set_filename(&pb, entry->d_name, name_len);
|
|
//
|
|
// if (lstat(pb.buffer, &st) == 0 && S_ISLNK(st.st_mode))
|
|
// continue;
|
|
//
|
|
// if (stat(pb.buffer, &st) == 0) {
|
|
// if (S_ISDIR(st.st_mode)) {
|
|
// char *dir_path = path_builder_dup_arena(&pb, ctx->path_arena, false);
|
|
// mpmc_push_work(ctx->dir_queue, dir_path);
|
|
// } else {
|
|
// atomic_fetch_add(&g_files_found, 1);
|
|
//
|
|
// FileEntry *fe = arena_push(&ctx->meta_arena, sizeof(FileEntry), true);
|
|
//
|
|
// // Create a temporary copy for normalization
|
|
// char temp_path[MAX_PATHLEN];
|
|
// memcpy(temp_path, pb.buffer,
|
|
// (pb.filename_pos - pb.buffer) + name_len + 1);
|
|
// normalize_path(temp_path);
|
|
//
|
|
// fe->path = arena_push(&ctx->path_arena, strlen(temp_path) + 1, false);
|
|
// strcpy(fe->path, temp_path);
|
|
//
|
|
// platform_get_file_times(pb.buffer, &fe->created_time,
|
|
// &fe->modified_time);
|
|
// platform_get_file_owner(pb.buffer, fe->owner, sizeof(fe->owner));
|
|
// fe->size_bytes = (uint64_t)st.st_size;
|
|
//
|
|
// mpmc_push(ctx->file_queue, fe);
|
|
// }
|
|
// }
|
|
// }
|
|
//
|
|
// closedir(dir);
|
|
// }
|
|
|
|
#endif
|
|
|
|
// ------------------------- Scan worker --------------------------------
|
|
static THREAD_RETURN scan_worker(void *arg) {
|
|
ScannerContext *ctx = (ScannerContext *)arg;
|
|
|
|
for (;;) {
|
|
char *dir = mpmc_pop(ctx->dir_queue);
|
|
if (!dir)
|
|
break;
|
|
|
|
scan_folder(dir, ctx);
|
|
|
|
mpmc_task_done(ctx->dir_queue, ctx->num_threads);
|
|
}
|
|
|
|
return THREAD_RETURN_VALUE;
|
|
}
|
|
|
|
// ----------------------------- Hashing helpers -----------------------------
|
|
static void xxh3_hash_file_stream(const char *path, char *out_hex,
|
|
unsigned char *buf) {
|
|
XXH128_hash_t h;
|
|
XXH3_state_t state;
|
|
XXH3_128bits_reset(&state);
|
|
|
|
FileHandle handle = os_file_open(path);
|
|
if (handle == INVALID_FILE_HANDLE) {
|
|
strcpy(out_hex, "ERROR");
|
|
return;
|
|
}
|
|
|
|
uint64_t bytes_read;
|
|
while (os_file_read(handle, buf, READ_BLOCK, &bytes_read) == 0 &&
|
|
bytes_read > 0) {
|
|
XXH3_128bits_update(&state, buf, (size_t)bytes_read);
|
|
atomic_fetch_add(&g_bytes_processed, bytes_read);
|
|
}
|
|
|
|
os_file_close(handle);
|
|
|
|
h = XXH3_128bits_digest(&state);
|
|
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
|
|
(unsigned long long)h.low64);
|
|
}
|
|
|
|
// ------------------------- Hash worker --------------------------------
|
|
static THREAD_RETURN hash_worker(void *arg) {
|
|
WorkerContext *ctx = (WorkerContext *)arg;
|
|
unsigned char *buf = (unsigned char *)malloc(READ_BLOCK);
|
|
|
|
for (;;) {
|
|
FileEntry *fe = mpmc_pop(ctx->file_queue);
|
|
if (!fe)
|
|
break;
|
|
|
|
char hash[HASH_STRLEN];
|
|
xxh3_hash_file_stream(fe->path, hash, buf);
|
|
|
|
char created[32], modified[32];
|
|
format_time(fe->created_time, created, sizeof(created));
|
|
format_time(fe->modified_time, modified, sizeof(modified));
|
|
|
|
double size_kib = (double)fe->size_bytes / 1024.0;
|
|
|
|
char stack_buf[1024];
|
|
|
|
int len =
|
|
snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n",
|
|
hash, fe->path, size_kib, created, modified, fe->owner);
|
|
|
|
char *dst = arena_push(&ctx->arena, len, false);
|
|
memcpy(dst, stack_buf, len);
|
|
|
|
atomic_fetch_add(&g_files_hashed, 1);
|
|
}
|
|
|
|
free(buf);
|
|
|
|
return THREAD_RETURN_VALUE;
|
|
}
|
|
|
|
// ----------------------------- Progress display ---------------------------
|
|
static THREAD_RETURN progress_thread(void *arg) {
|
|
(void)arg; // Unused parameter
|
|
|
|
HiResTimer progress_timer;
|
|
timer_start(&progress_timer);
|
|
|
|
uint64_t last_bytes = atomic_load(&g_bytes_processed);
|
|
double last_time = 0.0;
|
|
|
|
double displayed_speed = 0.0;
|
|
const double sample_interval = 0.5;
|
|
|
|
for (;;) {
|
|
uint64_t found = atomic_load(&g_files_found);
|
|
uint64_t hashed = atomic_load(&g_files_hashed);
|
|
uint64_t bytes = atomic_load(&g_bytes_processed);
|
|
int scan_done = atomic_load(&g_scan_done);
|
|
|
|
double t = timer_elapsed(&progress_timer);
|
|
|
|
if (last_time == 0.0) {
|
|
last_time = t;
|
|
last_bytes = bytes;
|
|
}
|
|
|
|
double dt = t - last_time;
|
|
|
|
if (dt >= sample_interval) {
|
|
uint64_t db = bytes - last_bytes;
|
|
|
|
if (db > 0 && dt > 0.0001) {
|
|
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
|
|
}
|
|
|
|
last_bytes = bytes;
|
|
last_time = t;
|
|
}
|
|
|
|
if (!scan_done) {
|
|
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ",
|
|
(unsigned long long)found, (unsigned long long)hashed,
|
|
displayed_speed);
|
|
} else {
|
|
double pct = found ? (double)hashed / (double)found : 0.0;
|
|
int barw = 40;
|
|
int filled = (int)(pct * barw);
|
|
|
|
char bar[64];
|
|
int p = 0;
|
|
|
|
bar[p++] = '[';
|
|
for (int i = 0; i < filled; i++)
|
|
bar[p++] = '#';
|
|
for (int i = filled; i < barw; i++)
|
|
bar[p++] = '.';
|
|
bar[p++] = ']';
|
|
bar[p] = 0;
|
|
|
|
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0,
|
|
(unsigned long long)hashed, (unsigned long long)found,
|
|
displayed_speed);
|
|
}
|
|
|
|
fflush(stdout);
|
|
|
|
if (scan_done && hashed == found)
|
|
break;
|
|
|
|
sleep_ms(100);
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
return THREAD_RETURN_VALUE;
|
|
}
|