diff --git a/platform.c b/platform.c index 559a0d7..ea6efdf 100644 --- a/platform.c +++ b/platform.c @@ -19,7 +19,7 @@ static atomic_uint_fast64_t g_files_hashed = 0; static atomic_uint_fast64_t g_bytes_processed = 0; static atomic_int g_scan_done = 0; -#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null terminator +#define HASH_STRLEN 32 // 128-bit hex (32 chars) #define MAX_PATHLEN KiB(4) // ================== OS-agnostic functions abstraction ===================== // --------------------- Timer functions --------------------- @@ -94,7 +94,7 @@ size_t platform_physical_cores(void) { } #endif -const char *get_xxhash_instruction_set(void) { +char *get_xxhash_instruction_set(void) { int vecID = XXH_featureTest(); switch (vecID) { @@ -120,7 +120,7 @@ typedef HANDLE FileHandle; #define INVALID_FILE_HANDLE INVALID_HANDLE_VALUE // File open function -static FileHandle os_file_open(const char *path, DWORD flags) { +static FileHandle os_file_open(char *path, DWORD flags) { return CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, flags, NULL); } @@ -144,7 +144,7 @@ typedef int FileHandle; #define INVALID_FILE_HANDLE (-1) // File open function -static FileHandle os_file_open(const char *path, int flags) { +static FileHandle os_file_open(char *path, int flags) { // Combine your mandatory flags with the user-provided flag int fd = open(path, O_RDONLY | O_NOFOLLOW | flags); @@ -283,6 +283,67 @@ static int thread_wait_multiple(Thread *threads, size_t count) { #endif // ======================== Get file metadata ======================== +// ----------------------------- Formaters ----------------------------- +static void format_hash(char *dst, XXH128_hash_t h) { + + static const char hex[] = "0123456789abcdef"; + + // High 64 bits + for (int i = 0; i < 16; i++) { + dst[i] = hex[(h.high64 >> ((15 - i) * 4)) & 0xF]; + } + + // Low 64 bits + for (int i = 0; i < 16; i++) { + dst[16 + i] = hex[(h.low64 >> ((15 - i) * 4)) & 0xF]; + } +} + +static void format_size_kib(char *dst, uint64_t size_bytes) { + + // Reserve worst-case space + uint64_t kib_int = size_bytes / 1024; + uint64_t frac = ((size_bytes % 1024) * 100ULL) / 1024ULL; + + char *p = dst; + + // Count digits + uint64_t tmp = kib_int; + int digits = 1; + + while (tmp >= 10) { + tmp /= 10; + digits++; + } + + // Write integer part directly + p += digits; + + uint64_t v = kib_int; + + do { + *--p = (char)('0' + (v % 10)); + v /= 10; + } while (v); + + p = dst + digits; + + // Decimal part + *p++ = '.'; + *p++ = (char)('0' + (frac / 10)); + *p++ = (char)('0' + (frac % 10)); + *p = 0; +} + +static void str_copy(char *dst, const char *src, size_t dst_size) { + size_t len = strlen(src); + if (len >= dst_size) { + len = dst_size - 1; + } + memcpy(dst, src, len); + dst[len] = '\0'; +} + // -------------------- Path helpers ------------------- static void normalize_path(char *p) { char *src = p; @@ -376,7 +437,7 @@ typedef struct { size_t base_len; } PathBuilder; -static void path_builder_init(PathBuilder *pb, const char *base) { +static void path_builder_init(PathBuilder *pb, char *base) { pb->base_len = strlen(base); memcpy(pb->buffer, base, pb->base_len); pb->base_end = pb->buffer + pb->base_len; @@ -393,7 +454,7 @@ static void path_builder_init(PathBuilder *pb, const char *base) { pb->filename_pos = pb->base_end; } -static void path_builder_set_filename(PathBuilder *pb, const char *filename, +static void path_builder_set_filename(PathBuilder *pb, char *filename, size_t name_len) { memcpy(pb->filename_pos, filename, name_len); pb->filename_pos[name_len] = '\0'; // Ensure null termination @@ -412,22 +473,111 @@ static char *path_builder_dup_arena(PathBuilder *pb, mem_arena *arena, // ------------------------- File time ------------------------- #if FILE_TIMES -#if defined(_WIN32) || defined(_WIN64) -static void format_time(uint64_t t, char *out, size_t out_sz) { - if (t == 0) { - snprintf(out, out_sz, "N/A"); +static int is_leap_year(int year) { + return ((year % 4 == 0) && (year % 100 != 0)) || (year % 400 == 0); +} + +static void format_time(uint64_t unix_time, + char *out) { // Format time will output UTC time + + if (unix_time == 0) { + memcpy(out, "N/A", 3); return; } - time_t tt = (time_t)t; - struct tm tm; + static const int days_per_month[12] = {31, 28, 31, 30, 31, 30, + 31, 31, 30, 31, 30, 31}; - localtime_s(&tm, &tt); + uint64_t t = unix_time; - strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); + // --------------------------- + // Time of day + // --------------------------- + uint64_t secs_of_day = t % 86400ULL; + + int hour = (int)(secs_of_day / 3600ULL); + int minute = (int)((secs_of_day % 3600ULL) / 60ULL); + int second = (int)(secs_of_day % 60ULL); + + // --------------------------- + // Days since epoch + // --------------------------- + uint64_t days = t / 86400ULL; + + int year = 1970; + + while (1) { + int year_days = is_leap_year(year) ? 366 : 365; + + if (days < (uint64_t)year_days) + break; + + days -= year_days; + year++; + } + + // --------------------------- + // Month/day + // --------------------------- + int month = 0; + + while (1) { + int dim = days_per_month[month]; + + if (month == 1 && is_leap_year(year)) + dim = 29; + + if (days < (uint64_t)dim) + break; + + days -= dim; + month++; + } + + int day = (int)days + 1; + + // --------------------------- + // YYYY-MM-DD HH:MM:SS + // exactly 19 chars + // --------------------------- + + out[0] = '0' + ((year / 1000) % 10); + out[1] = '0' + ((year / 100) % 10); + out[2] = '0' + ((year / 10) % 10); + out[3] = '0' + (year % 10); + + out[4] = '-'; + + int mon = month + 1; + out[5] = '0' + (mon / 10); + out[6] = '0' + (mon % 10); + + out[7] = '-'; + + out[8] = '0' + (day / 10); + out[9] = '0' + (day % 10); + + out[10] = ' '; + + out[11] = '0' + (hour / 10); + out[12] = '0' + (hour % 10); + + out[13] = ':'; + + out[14] = '0' + (minute / 10); + out[15] = '0' + (minute % 10); + + out[16] = ':'; + + out[17] = '0' + (second / 10); + out[18] = '0' + (second % 10); + + // out[19] = '\0'; } + +#if defined(_WIN32) || defined(_WIN64) // ------------------ Convert filetime to epoch ------------------- -static uint64_t filetime_to_epoch(const FILETIME *ft) { +static uint64_t filetime_to_epoch(FILETIME *ft) { ULARGE_INTEGER ull; ull.LowPart = ft->dwLowDateTime; ull.HighPart = ft->dwHighDateTime; @@ -436,7 +586,7 @@ static uint64_t filetime_to_epoch(const FILETIME *ft) { return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL; } -void platform_get_file_times(const char *path, uint64_t *out_created, +void platform_get_file_times(char *path, uint64_t *out_created, uint64_t *out_modified) { WIN32_FILE_ATTRIBUTE_DATA fad; if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) { @@ -463,7 +613,7 @@ static void format_time(uint64_t t, char *out, size_t out_sz) { strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm); } -void platform_get_file_times(const char *path, uint64_t *out_created, +void platform_get_file_times(char *path, uint64_t *out_created, uint64_t *out_modified) { struct stat st; if (stat(path, &st) == 0) { @@ -475,8 +625,8 @@ void platform_get_file_times(const char *path, uint64_t *out_created, } } -static int platform_get_file_times_fd(int dir_fd, const char *name, - uint64_t *created, uint64_t *modified) { +static int platform_get_file_times_fd(int dir_fd, char *name, uint64_t *created, + uint64_t *modified) { struct stat st; if (fstatat(dir_fd, name, &st, 0) == 0) { *created = st.st_ctime; // or st.st_birthtime on systems that support it @@ -491,7 +641,7 @@ static int platform_get_file_times_fd(int dir_fd, const char *name, // -------------------- File owner --------------------- #if defined(_WIN32) || defined(_WIN64) #if FILE_OWNER -void platform_get_file_owner(const char *path, char *out_owner, +void platform_get_file_owner(char *path, char *out_owner, size_t out_owner_size) { PSID sid = NULL; PSECURITY_DESCRIPTOR sd = NULL; @@ -506,12 +656,23 @@ void platform_get_file_owner(const char *path, char *out_owner, if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len, &use)) { - snprintf(out_owner, out_owner_size, "%s\\%s", domain, name); + // Format: "domain\name" + size_t domain_len_actual = strlen(domain); + size_t name_len_actual = strlen(name); + size_t total = domain_len_actual + 1 + name_len_actual; + + if (total < out_owner_size) { + memcpy(out_owner, domain, domain_len_actual); + out_owner[domain_len_actual] = '\\'; + memcpy(out_owner + domain_len_actual + 1, name, name_len_actual + 1); + } else { + str_copy(out_owner, domain, out_owner_size); + } } else { - snprintf(out_owner, out_owner_size, "UNKNOWN"); + str_copy(out_owner, "UNKNOWN", out_owner_size); } } else { - snprintf(out_owner, out_owner_size, "UNKNOWN"); + str_copy(out_owner, "UNKNOWN", out_owner_size); } if (sd) @@ -521,7 +682,7 @@ void platform_get_file_owner(const char *path, char *out_owner, #elif defined(__linux__) #if FILE_OWNER -void platform_get_file_owner(const char *path, char *out_owner, +void platform_get_file_owner(char *path, char *out_owner, size_t out_owner_size) { struct stat st; const char *owner = "UNKNOWN"; @@ -533,25 +694,41 @@ void platform_get_file_owner(const char *path, char *out_owner, } } - snprintf(out_owner, out_owner_size, "%s", owner); + str_copy(out_owner, owner, out_owner_size); } -static int platform_get_file_owner_fd(int dir_fd, const char *name, char *owner, +static int platform_get_file_owner_fd(int dir_fd, char *name, char *owner, size_t owner_size) { struct stat st; if (fstatat(dir_fd, name, &st, 0) == 0) { struct passwd pw; struct passwd *result; - char buffer[4096]; // Sufficiently large buffer for passwd data + char buffer[4096]; - // Reentrant version (thread-safe) if (getpwuid_r(st.st_uid, &pw, buffer, sizeof(buffer), &result) == 0 && result != NULL && result->pw_name != NULL) { - strncpy(owner, result->pw_name, owner_size - 1); - owner[owner_size - 1] = '\0'; + str_copy(owner, result->pw_name, owner_size); } else { - // Fallback to uid - snprintf(owner, owner_size, "uid:%d", st.st_uid); + // Format: "uid:12345" + char tmp[16]; + char *tp = tmp + sizeof(tmp) - 1; + uint32_t uid = st.st_uid; + + *tp = '\0'; + do { + *--tp = (char)('0' + (uid % 10)); + uid /= 10; + } while (uid > 0); + + size_t prefix_len = 4; // "uid:" + if (prefix_len < owner_size) { + memcpy(owner, "uid:", prefix_len); + size_t uid_len = strlen(tp); + size_t remain = owner_size - prefix_len; + size_t copy_len = uid_len < remain ? uid_len : remain - 1; + memcpy(owner + prefix_len, tp, copy_len); + owner[prefix_len + copy_len] = '\0'; + } } return 0; } @@ -680,7 +857,7 @@ typedef struct FileEntry { } FileEntry; #if defined(_WIN32) || defined(_WIN64) -void scan_folder(const char *base, ScannerContext *ctx) { +void scan_folder(char *base, ScannerContext *ctx) { PathBuilder pb; path_builder_init(&pb, base); @@ -742,7 +919,7 @@ void scan_folder(const char *base, ScannerContext *ctx) { } #elif defined(__linux__) -void scan_folder(const char *base, ScannerContext *ctx) { +void scan_folder(char *base, ScannerContext *ctx) { PathBuilder pb; path_builder_init(&pb, base); @@ -891,7 +1068,7 @@ static THREAD_RETURN scan_worker(void *arg) { } // ----------------------------- Hashing helpers ----------------------------- -static void xxh3_hash_file_stream(const char *path, char *out_hex, +static void xxh3_hash_file_stream(char *path, char *out_hex, unsigned char *buf) { XXH128_hash_t h; XXH3_state_t state; @@ -913,14 +1090,14 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex, os_file_close(handle); h = XXH3_128bits_digest(&state); - snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64, - (unsigned long long)h.low64); + format_hash(out_hex, h); } // ------------------------- Hash worker -------------------------------- static THREAD_RETURN hash_worker(void *arg) { HasherContext *ctx = (HasherContext *)arg; void *buf = malloc(READ_BLOCK); + char *separator; for (;;) { FileEntry *fe = mpmc_pop(ctx->file_queue); @@ -930,7 +1107,8 @@ static THREAD_RETURN hash_worker(void *arg) { // Hash char *hash = arena_push(&ctx->arena, HASH_STRLEN, false); xxh3_hash_file_stream(fe->path, hash, buf); - arena_trim_string(&ctx->arena, hash, ARENA_TRIM_TAB); + separator = arena_push(&ctx->arena, 1, false); + *separator = '\t'; // Path u64 path_len = strlen(fe->path) + 1; @@ -939,27 +1117,22 @@ static THREAD_RETURN hash_worker(void *arg) { arena_trim_string(&ctx->arena, path, ARENA_TRIM_TAB); // Size - double size_kib = (double)fe->size_bytes / 1024.0; char *size = arena_push(&ctx->arena, 32, false); - snprintf(size, 32, "%.2f", size_kib); + format_size_kib(size, fe->size_bytes); arena_trim_string(&ctx->arena, size, ARENA_TRIM_NONE); // Times - char *separator; - #if FILE_TIMES separator = arena_push(&ctx->arena, 1, false); *separator = '\t'; - u64 time_size = 32; + char *created = arena_push(&ctx->arena, 19, false); + format_time(fe->created_time, created); + separator = arena_push(&ctx->arena, 1, false); + *separator = '\t'; - char *created = arena_push(&ctx->arena, time_size, false); - format_time(fe->created_time, created, time_size); - arena_trim_string(&ctx->arena, created, ARENA_TRIM_TAB); - - char *modified = arena_push(&ctx->arena, time_size, false); - format_time(fe->modified_time, modified, time_size); - arena_trim_string(&ctx->arena, modified, ARENA_TRIM_NONE); + char *modified = arena_push(&ctx->arena, 19, false); + format_time(fe->modified_time, modified); #endif // Owner @@ -985,21 +1158,103 @@ static THREAD_RETURN hash_worker(void *arg) { } // ------------------------- Progress display --------------------------- +// ============================================================ +// Console abstraction +// ============================================================ + +#if defined(_WIN32) || defined(_WIN64) +static HANDLE g_console_handle = NULL; + +static void console_init(void) { + g_console_handle = GetStdHandle(STD_OUTPUT_HANDLE); +} + +static void console_write(const char *buf, size_t len) { + DWORD written; + WriteConsoleA(g_console_handle, buf, (DWORD)len, &written, NULL); +} + +#elif defined(__linux__) + +static void console_init(void) {} + +static void console_write(const char *buf, size_t len) { + write(STDOUT_FILENO, buf, len); +} +#endif + +// -------------------------- Append helpers --------------------------- +static inline void buf_append_char(char **p, char c) { *(*p)++ = c; } + +static inline void buf_append_str(char **p, const char *s) { + while (*s) { + *(*p)++ = *s++; + } +} + +static inline void buf_append_u64(char **p, uint64_t v) { + + char tmp[32]; + int n = 0; + + do { + tmp[n++] = (char)('0' + (v % 10)); + v /= 10; + } while (v); + + while (n--) { + *(*p)++ = tmp[n]; + } +} + +static inline void buf_append_size_kib(char **p, uint64_t bytes) { + + uint64_t kib_int = bytes / 1024; + uint64_t frac = ((bytes % 1024) * 100ULL) / 1024ULL; + + buf_append_u64(p, kib_int); + + *(*p)++ = '.'; + *(*p)++ = (char)('0' + (frac / 10)); + *(*p)++ = (char)('0' + (frac % 10)); +} + +static inline void buf_append_percent_2(char **p, uint64_t num, uint64_t den) { + + if (den == 0) { + buf_append_str(p, "0.00"); + return; + } + + uint64_t scaled = (num * 10000ULL) / den; + + uint64_t whole = scaled / 100; + uint64_t frac = scaled % 100; + + buf_append_u64(p, whole); + + *(*p)++ = '.'; + *(*p)++ = (char)('0' + (frac / 10)); + *(*p)++ = (char)('0' + (frac % 10)); +} + static THREAD_RETURN progress_thread(void *arg) { (void)arg; + console_init(); + HiResTimer progress_timer; timer_start(&progress_timer); uint64_t last_bytes = 0; double last_time = 0.0; double displayed_speed = 0.0; - const double sample_interval = 0.5; + double sample_interval = 0.5; - // Hide cursor to prevent flickering - printf("\033[?25l"); + console_write("\033[?25l", 6); for (;;) { + uint64_t found = atomic_load(&g_files_found); uint64_t hashed = atomic_load(&g_files_hashed); uint64_t bytes = atomic_load(&g_bytes_processed); @@ -1010,47 +1265,91 @@ static THREAD_RETURN progress_thread(void *arg) { if (dt >= sample_interval) { uint64_t db = (bytes > last_bytes) ? bytes - last_bytes : 0; + displayed_speed = (double)db / (1024.0 * 1024.0) / dt; + last_bytes = bytes; last_time = t; } - printf("\r"); + uint64_t speed_x100 = (uint64_t)(displayed_speed * 100.0); + + char buffer[512]; + char *p = buffer; + + buf_append_char(&p, '\r'); if (!scan_done) { - printf("\033[1mScanning:\033[0m %llu files | Hashed: %llu | \033[32m%.2f " - "MB/s\033[0m ", - (unsigned long long)found, (unsigned long long)hashed, - displayed_speed); + + buf_append_str(&p, "\033[1mScanning:\033[0m "); + + buf_append_u64(&p, found); + + buf_append_str(&p, " files | Hashed: "); + + buf_append_u64(&p, hashed); + + buf_append_str(&p, " | \033[32m"); + + buf_append_u64(&p, speed_x100 / 100); + + buf_append_char(&p, '.'); + + buf_append_char(&p, '0' + ((speed_x100 / 10) % 10)); + buf_append_char(&p, '0' + (speed_x100 % 10)); + + buf_append_str(&p, " MB/s\033[0m "); + } else { - double pct = found ? (double)hashed / (double)found : 0.0; + int barw = 40; - int filled = (int)(pct * barw); + int filled = found ? (int)((hashed * barw) / found) : 0; + + buf_append_char(&p, '['); + + buf_append_str(&p, "\033[32m"); - printf("["); - // Print filled part in Green (\033[32m) - printf("\033[32m"); for (int i = 0; i < filled; i++) - putchar('#'); - // Reset color for empty part - printf("\033[0m"); - for (int i = filled; i < barw; i++) - putchar('.'); + buf_append_char(&p, '#'); - printf("] %6.2f%% (%llu/%llu) \033[32m%.2f MB/s\033[0m ", pct * 100.0, - (unsigned long long)hashed, (unsigned long long)found, - displayed_speed); + buf_append_str(&p, "\033[0m"); + + for (int i = filled; i < barw; i++) + buf_append_char(&p, '.'); + + buf_append_str(&p, "] "); + + buf_append_percent_2(&p, hashed, found); + + buf_append_str(&p, "% ("); + + buf_append_u64(&p, hashed); + + buf_append_char(&p, '/'); + + buf_append_u64(&p, found); + + buf_append_str(&p, ") \033[32m"); + + buf_append_u64(&p, speed_x100 / 100); + + buf_append_char(&p, '.'); + + buf_append_char(&p, '0' + ((speed_x100 / 10) % 10)); + buf_append_char(&p, '0' + (speed_x100 % 10)); + + buf_append_str(&p, " MB/s\033[0m "); } - fflush(stdout); + console_write(buffer, (size_t)(p - buffer)); if (scan_done && hashed == found) break; + sleep_ms(100); } - // Restore cursor (\033[?25h) and move to next line - printf("\033[?25h\n"); + console_write("\033[?25h\n", 7); return THREAD_RETURN_VALUE; } @@ -1872,6 +2171,7 @@ static void finalize_file(ThreadIoContext *restrict thread_ctx, FileReadContext *restrict file) { FileEntry *restrict fe = file->fe; + char *separator; os_file_close(file->file_handle); @@ -1881,13 +2181,10 @@ static void finalize_file(ThreadIoContext *restrict thread_ctx, if (file->use_incremental_hash) { // Large file: digest the accumulated hash state XXH128_hash_t h = XXH3_128bits_digest(&file->hash_state); - snprintf(hash, HASH_STRLEN, "%016llx%016llx", - (unsigned long long)h.high64, (unsigned long long)h.low64); + format_hash(hash, h); } else { // Small file: hash already computed, stored directly in single_hash - snprintf(hash, HASH_STRLEN, "%016llx%016llx", - (unsigned long long)file->single_hash.high64, - (unsigned long long)file->single_hash.low64); + format_hash(hash, file->single_hash); } } else { #if IORING_DEBUG_PRINTS @@ -1897,34 +2194,31 @@ static void finalize_file(ThreadIoContext *restrict thread_ctx, atomic_fetch_add(&g_io_ring_fallbacks, 1); xxh3_hash_file_stream(fe->path, hash, thread_ctx->fallback_buffer); } - arena_trim_string(&worker_ctx->arena, hash, ARENA_TRIM_TAB); + separator = arena_push(&worker_ctx->arena, 1, false); + *separator = '\t'; // Path - u64 path_len = strlen(fe->path) + 1; - char *path = arena_push(&worker_ctx->arena, path_len, ARENA_TRIM_TAB); - memcpy(path, fe->path, path_len); + char *path = arena_push(&worker_ctx->arena, MAX_PATHLEN, ARENA_TRIM_TAB); + memcpy(path, fe->path, MAX_PATHLEN); arena_trim_string(&worker_ctx->arena, path, ARENA_TRIM_TAB); // Size - double size_kib = (double)fe->size_bytes / 1024.0; char *size = arena_push(&worker_ctx->arena, 32, false); - snprintf(size, 32, "%.2f", size_kib); + format_size_kib(size, fe->size_bytes); arena_trim_string(&worker_ctx->arena, size, ARENA_TRIM_NONE); // Time - char *separator; #if FILE_TIMES separator = arena_push(&worker_ctx->arena, 1, false); *separator = '\t'; - u64 time_size = 32; - char *created = arena_push(&worker_ctx->arena, time_size, false); - format_time(fe->created_time, created, time_size); - arena_trim_string(&worker_ctx->arena, created, ARENA_TRIM_TAB); + char *created = arena_push(&worker_ctx->arena, 19, false); + format_time(fe->created_time, created); + separator = arena_push(&worker_ctx->arena, 1, false); + *separator = '\t'; - char *modified = arena_push(&worker_ctx->arena, time_size, false); - format_time(fe->modified_time, modified, time_size); - arena_trim_string(&worker_ctx->arena, modified, ARENA_TRIM_NONE); + char *modified = arena_push(&worker_ctx->arena, 19, false); + format_time(fe->modified_time, modified); #endif // Owner