diff --git a/.cache/clangd/index/arena.c.9D15F3F90FD5376F.idx b/.cache/clangd/index/arena.c.9D15F3F90FD5376F.idx deleted file mode 100644 index c010d84..0000000 Binary files a/.cache/clangd/index/arena.c.9D15F3F90FD5376F.idx and /dev/null differ diff --git a/.cache/clangd/index/arena.h.D70DBB2C6778A245.idx b/.cache/clangd/index/arena.h.D70DBB2C6778A245.idx deleted file mode 100644 index fc1a62e..0000000 Binary files a/.cache/clangd/index/arena.h.D70DBB2C6778A245.idx and /dev/null differ diff --git a/.cache/clangd/index/base.h.2DF61E974E1BB064.idx b/.cache/clangd/index/base.h.2DF61E974E1BB064.idx deleted file mode 100644 index cd6107e..0000000 Binary files a/.cache/clangd/index/base.h.2DF61E974E1BB064.idx and /dev/null differ diff --git a/.cache/clangd/index/file_hasher.c.AC1BEF31045A7497.idx b/.cache/clangd/index/file_hasher.c.AC1BEF31045A7497.idx deleted file mode 100644 index bdbec4d..0000000 Binary files a/.cache/clangd/index/file_hasher.c.AC1BEF31045A7497.idx and /dev/null differ diff --git a/.cache/clangd/index/lf_mpmc.h.FB4A8CD7AC664EBA.idx b/.cache/clangd/index/lf_mpmc.h.FB4A8CD7AC664EBA.idx deleted file mode 100644 index 26a0ed2..0000000 Binary files a/.cache/clangd/index/lf_mpmc.h.FB4A8CD7AC664EBA.idx and /dev/null differ diff --git a/.cache/clangd/index/platform.c.2B562A3FE6816950.idx b/.cache/clangd/index/platform.c.2B562A3FE6816950.idx deleted file mode 100644 index 4a35d14..0000000 Binary files a/.cache/clangd/index/platform.c.2B562A3FE6816950.idx and /dev/null differ diff --git a/.cache/clangd/index/xxh_x86dispatch.h.3E8D4826C191778C.idx b/.cache/clangd/index/xxh_x86dispatch.h.3E8D4826C191778C.idx deleted file mode 100644 index d56cdc9..0000000 Binary files a/.cache/clangd/index/xxh_x86dispatch.h.3E8D4826C191778C.idx and /dev/null differ diff --git a/.cache/clangd/index/xxhash.h.11DA710B069D4A59.idx b/.cache/clangd/index/xxhash.h.11DA710B069D4A59.idx deleted file mode 100644 index 349da00..0000000 Binary files a/.cache/clangd/index/xxhash.h.11DA710B069D4A59.idx and /dev/null differ diff --git a/.gitignore b/.gitignore index bb895bb..96c65c4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ file_hashes.txt Binaries/file_hashes.txt file_list.txt temp_code.c +/.cache/clangd/index diff --git a/binaries/changelog.txt b/binaries/changelog.txt index 049d2ff..1be6243 100644 --- a/binaries/changelog.txt +++ b/binaries/changelog.txt @@ -49,3 +49,7 @@ Fixing user prompt parsing 4.5: Porting to linux Reorganising the code Improving the scan function + +5.0: Implementing the IO Ring instead of buffered hashing, huge performance gains. +fixing the xxh_x86dispatch warnings +Updating the progress printing function diff --git a/file_hasher.c b/file_hasher.c index 682f7f7..772559d 100644 --- a/file_hasher.c +++ b/file_hasher.c @@ -74,7 +74,7 @@ int main(int argc, char **argv) { mem_arena *gp_arena = arena_create(¶ms); // ------------------------------- - // Detect hardware threads + // Detect hardware // ------------------------------- // --- Windows: detect PHYSICAL cores (not logical threads) --- size_t hw_threads = platform_physical_cores(); @@ -86,6 +86,8 @@ int main(int argc, char **argv) { hw_threads); printf(" Selected instruction set: %s\n", get_xxhash_instruction_set()); + // Align IO Ring block size to the system page size + u64 g_ioring_read_block = ALIGN_UP_POW2(IORING_READ_BLOCK, g_pagesize); // ------------------------------- // Scanning and hashing // ------------------------------- @@ -127,9 +129,8 @@ int main(int argc, char **argv) { workers[i].arena = arena_create(¶ms); workers[i].file_queue = &file_queue; - if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring, &workers[i]) - != - 0) { + if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring, + &workers[i]) != 0) { fprintf(stderr, "Failed to create hash thread %zu\n", i); exit(1); } diff --git a/platform.c b/platform.c index dd92946..0d611e7 100644 --- a/platform.c +++ b/platform.c @@ -803,17 +803,19 @@ static THREAD_RETURN hash_worker(void *arg) { // ------------------------- Progress display --------------------------- static THREAD_RETURN progress_thread(void *arg) { - (void)arg; // Unused parameter + (void)arg; HiResTimer progress_timer; timer_start(&progress_timer); - uint64_t last_bytes = atomic_load(&g_bytes_processed); + uint64_t last_bytes = 0; double last_time = 0.0; - double displayed_speed = 0.0; const double sample_interval = 0.5; + // Hide cursor to prevent flickering + printf("\033[?25l"); + for (;;) { uint64_t found = atomic_load(&g_files_found); uint64_t hashed = atomic_load(&g_files_hashed); @@ -821,27 +823,20 @@ static THREAD_RETURN progress_thread(void *arg) { int scan_done = atomic_load(&g_scan_done); double t = timer_elapsed(&progress_timer); - - if (last_time == 0.0) { - last_time = t; - last_bytes = bytes; - } - double dt = t - last_time; if (dt >= sample_interval) { - uint64_t db = bytes - last_bytes; - - if (db > 0 && dt > 0.0001) { - displayed_speed = (double)db / (1024.0 * 1024.0) / dt; - } - + uint64_t db = (bytes > last_bytes) ? bytes - last_bytes : 0; + displayed_speed = (double)db / (1024.0 * 1024.0) / dt; last_bytes = bytes; last_time = t; } + printf("\r"); + if (!scan_done) { - printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ", + printf("\033[1mScanning:\033[0m %llu files | Hashed: %llu | \033[32m%.2f " + "MB/s\033[0m ", (unsigned long long)found, (unsigned long long)hashed, displayed_speed); } else { @@ -849,18 +844,17 @@ static THREAD_RETURN progress_thread(void *arg) { int barw = 40; int filled = (int)(pct * barw); - char bar[64]; - int p = 0; - - bar[p++] = '['; + printf("["); + // Print filled part in Green (\033[32m) + printf("\033[32m"); for (int i = 0; i < filled; i++) - bar[p++] = '#'; + putchar('#'); + // Reset color for empty part + printf("\033[0m"); for (int i = filled; i < barw; i++) - bar[p++] = '.'; - bar[p++] = ']'; - bar[p] = 0; + putchar('.'); - printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0, + printf("] %6.2f%% (%llu/%llu) \033[32m%.2f MB/s\033[0m ", pct * 100.0, (unsigned long long)hashed, (unsigned long long)found, displayed_speed); } @@ -869,23 +863,23 @@ static THREAD_RETURN progress_thread(void *arg) { if (scan_done && hashed == found) break; - sleep_ms(100); } - printf("\n"); + // Restore cursor (\033[?25h) and move to next line + printf("\033[?25h\n"); return THREAD_RETURN_VALUE; } // ======================== Hash worker IO Ring ======================== // -------------------------- Configuration --------------------------- -#define IORING_READ_BLOCK (4096 * 64) #define NUM_BUFFERS_PER_THREAD 16 #define SUBMIT_TIMEOUT_MS 30000 #define USERDATA_REGISTER 1 - -// Global stats +#define IORING_READ_BLOCK (KiB(1024)) +// Globals +u64 g_ioring_read_block = 4096 * 64; static atomic_uint_fast64_t g_io_ring_fallbacks = 0; // -------------------------- Buffer structure --------------------------- @@ -964,28 +958,34 @@ static ThreadIoContext *io_ring_init_thread(void) { } // Initialize buffer pool - for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) { + IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD]; - // 4096 alignment - void *ptr = _aligned_malloc(IORING_READ_BLOCK, 4096); - if (!ptr) { + u64 buf_pool_size = g_ioring_read_block * NUM_BUFFERS_PER_THREAD; + + // Reserve and Commit the entire memory chunk + void *base_ptr = plat_mem_reserve(buf_pool_size); + if (base_ptr) { + if (!plat_mem_commit(base_ptr, buf_pool_size)) { + plat_mem_release(base_ptr, 0); return NULL; } + } else { + return NULL; + } - g_thread_ctx->buffers[i].data = ptr; + for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) { + + g_thread_ctx->buffers[i].data = (u8 *)base_ptr + (i * g_ioring_read_block); g_thread_ctx->buffer_pool[i] = i; g_thread_ctx->buffers[i].buffer_id = i; - } - g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD; - IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD]; - - for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) { buf_info[i].Address = g_thread_ctx->buffers[i].data; - buf_info[i].Length = IORING_READ_BLOCK; + buf_info[i].Length = (ULONG)g_ioring_read_block; } + g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD; + HRESULT hb = BuildIoRingRegisterBuffers( g_thread_ctx->ring, NUM_BUFFERS_PER_THREAD, buf_info, USERDATA_REGISTER); @@ -1010,9 +1010,7 @@ static void io_ring_cleanup_thread(void) { CloseHandle(g_thread_ctx->completion_event); if (g_thread_ctx->ring) CloseIoRing(g_thread_ctx->ring); - for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) { - _aligned_free(g_thread_ctx->buffers[i].data); - } + plat_mem_release(g_thread_ctx->buffers[0].data, 0); free(g_thread_ctx); g_thread_ctx = NULL; } @@ -1183,11 +1181,11 @@ static int submit_pending_reads(ThreadIoContext *ctx, size_t bytes_to_read; - if (remaining >= IORING_READ_BLOCK) { - bytes_to_read = IORING_READ_BLOCK; + if (remaining >= g_ioring_read_block) { + bytes_to_read = g_ioring_read_block; } else { // Round UP to sector size (4096) - bytes_to_read = (remaining + 4095) & ~4095; + bytes_to_read = ALIGN_UP_POW2(remaining, g_pagesize); } HRESULT hr = submit_read(ctx, file_ctx, buf, current_offset, bytes_to_read); @@ -1302,7 +1300,7 @@ static void xxh3_hash_file_parallel(ThreadIoContext *ctx, const char *path, // -------------------------- Hash worker I/O Ring --------------------------- static THREAD_RETURN hash_worker_io_ring(void *arg) { WorkerContext *ctx = (WorkerContext *)arg; - unsigned char *temp_buffer = (unsigned char *)malloc(IORING_READ_BLOCK); + unsigned char *temp_buffer = (unsigned char *)malloc(READ_BLOCK); char hash[HASH_STRLEN]; if (!temp_buffer) diff --git a/xxh_x86dispatch.c b/xxh_x86dispatch.c index ddb71e1..0c15820 100644 --- a/xxh_x86dispatch.c +++ b/xxh_x86dispatch.c @@ -1,5 +1,3 @@ -#define XXH_INLINE_ALL - /* * xxHash - Extremely Fast Hash algorithm * Copyright (C) 2020-2021 Yann Collet