Updating the IO Ring, Updating the progress printing fn

This commit is contained in:
2026-03-31 19:33:39 +01:00
parent d4ba121b56
commit 41ac164881
13 changed files with 56 additions and 54 deletions

1
.gitignore vendored
View File

@@ -6,3 +6,4 @@ file_hashes.txt
Binaries/file_hashes.txt Binaries/file_hashes.txt
file_list.txt file_list.txt
temp_code.c temp_code.c
/.cache/clangd/index

View File

@@ -49,3 +49,7 @@ Fixing user prompt parsing
4.5: Porting to linux 4.5: Porting to linux
Reorganising the code Reorganising the code
Improving the scan function Improving the scan function
5.0: Implementing the IO Ring instead of buffered hashing, huge performance gains.
fixing the xxh_x86dispatch warnings
Updating the progress printing function

View File

@@ -74,7 +74,7 @@ int main(int argc, char **argv) {
mem_arena *gp_arena = arena_create(&params); mem_arena *gp_arena = arena_create(&params);
// ------------------------------- // -------------------------------
// Detect hardware threads // Detect hardware
// ------------------------------- // -------------------------------
// --- Windows: detect PHYSICAL cores (not logical threads) --- // --- Windows: detect PHYSICAL cores (not logical threads) ---
size_t hw_threads = platform_physical_cores(); size_t hw_threads = platform_physical_cores();
@@ -86,6 +86,8 @@ int main(int argc, char **argv) {
hw_threads); hw_threads);
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set()); printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
// Align IO Ring block size to the system page size
u64 g_ioring_read_block = ALIGN_UP_POW2(IORING_READ_BLOCK, g_pagesize);
// ------------------------------- // -------------------------------
// Scanning and hashing // Scanning and hashing
// ------------------------------- // -------------------------------
@@ -127,9 +129,8 @@ int main(int argc, char **argv) {
workers[i].arena = arena_create(&params); workers[i].arena = arena_create(&params);
workers[i].file_queue = &file_queue; workers[i].file_queue = &file_queue;
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring, &workers[i]) if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring,
!= &workers[i]) != 0) {
0) {
fprintf(stderr, "Failed to create hash thread %zu\n", i); fprintf(stderr, "Failed to create hash thread %zu\n", i);
exit(1); exit(1);
} }

View File

@@ -803,17 +803,19 @@ static THREAD_RETURN hash_worker(void *arg) {
// ------------------------- Progress display --------------------------- // ------------------------- Progress display ---------------------------
static THREAD_RETURN progress_thread(void *arg) { static THREAD_RETURN progress_thread(void *arg) {
(void)arg; // Unused parameter (void)arg;
HiResTimer progress_timer; HiResTimer progress_timer;
timer_start(&progress_timer); timer_start(&progress_timer);
uint64_t last_bytes = atomic_load(&g_bytes_processed); uint64_t last_bytes = 0;
double last_time = 0.0; double last_time = 0.0;
double displayed_speed = 0.0; double displayed_speed = 0.0;
const double sample_interval = 0.5; const double sample_interval = 0.5;
// Hide cursor to prevent flickering
printf("\033[?25l");
for (;;) { for (;;) {
uint64_t found = atomic_load(&g_files_found); uint64_t found = atomic_load(&g_files_found);
uint64_t hashed = atomic_load(&g_files_hashed); uint64_t hashed = atomic_load(&g_files_hashed);
@@ -821,27 +823,20 @@ static THREAD_RETURN progress_thread(void *arg) {
int scan_done = atomic_load(&g_scan_done); int scan_done = atomic_load(&g_scan_done);
double t = timer_elapsed(&progress_timer); double t = timer_elapsed(&progress_timer);
if (last_time == 0.0) {
last_time = t;
last_bytes = bytes;
}
double dt = t - last_time; double dt = t - last_time;
if (dt >= sample_interval) { if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes; uint64_t db = (bytes > last_bytes) ? bytes - last_bytes : 0;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt; displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes; last_bytes = bytes;
last_time = t; last_time = t;
} }
printf("\r");
if (!scan_done) { if (!scan_done) {
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ", printf("\033[1mScanning:\033[0m %llu files | Hashed: %llu | \033[32m%.2f "
"MB/s\033[0m ",
(unsigned long long)found, (unsigned long long)hashed, (unsigned long long)found, (unsigned long long)hashed,
displayed_speed); displayed_speed);
} else { } else {
@@ -849,18 +844,17 @@ static THREAD_RETURN progress_thread(void *arg) {
int barw = 40; int barw = 40;
int filled = (int)(pct * barw); int filled = (int)(pct * barw);
char bar[64]; printf("[");
int p = 0; // Print filled part in Green (\033[32m)
printf("\033[32m");
bar[p++] = '[';
for (int i = 0; i < filled; i++) for (int i = 0; i < filled; i++)
bar[p++] = '#'; putchar('#');
// Reset color for empty part
printf("\033[0m");
for (int i = filled; i < barw; i++) for (int i = filled; i < barw; i++)
bar[p++] = '.'; putchar('.');
bar[p++] = ']';
bar[p] = 0;
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0, printf("] %6.2f%% (%llu/%llu) \033[32m%.2f MB/s\033[0m ", pct * 100.0,
(unsigned long long)hashed, (unsigned long long)found, (unsigned long long)hashed, (unsigned long long)found,
displayed_speed); displayed_speed);
} }
@@ -869,23 +863,23 @@ static THREAD_RETURN progress_thread(void *arg) {
if (scan_done && hashed == found) if (scan_done && hashed == found)
break; break;
sleep_ms(100); sleep_ms(100);
} }
printf("\n"); // Restore cursor (\033[?25h) and move to next line
printf("\033[?25h\n");
return THREAD_RETURN_VALUE; return THREAD_RETURN_VALUE;
} }
// ======================== Hash worker IO Ring ======================== // ======================== Hash worker IO Ring ========================
// -------------------------- Configuration --------------------------- // -------------------------- Configuration ---------------------------
#define IORING_READ_BLOCK (4096 * 64)
#define NUM_BUFFERS_PER_THREAD 16 #define NUM_BUFFERS_PER_THREAD 16
#define SUBMIT_TIMEOUT_MS 30000 #define SUBMIT_TIMEOUT_MS 30000
#define USERDATA_REGISTER 1 #define USERDATA_REGISTER 1
#define IORING_READ_BLOCK (KiB(1024))
// Global stats // Globals
u64 g_ioring_read_block = 4096 * 64;
static atomic_uint_fast64_t g_io_ring_fallbacks = 0; static atomic_uint_fast64_t g_io_ring_fallbacks = 0;
// -------------------------- Buffer structure --------------------------- // -------------------------- Buffer structure ---------------------------
@@ -964,28 +958,34 @@ static ThreadIoContext *io_ring_init_thread(void) {
} }
// Initialize buffer pool // Initialize buffer pool
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) { IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD];
// 4096 alignment u64 buf_pool_size = g_ioring_read_block * NUM_BUFFERS_PER_THREAD;
void *ptr = _aligned_malloc(IORING_READ_BLOCK, 4096);
if (!ptr) { // Reserve and Commit the entire memory chunk
void *base_ptr = plat_mem_reserve(buf_pool_size);
if (base_ptr) {
if (!plat_mem_commit(base_ptr, buf_pool_size)) {
plat_mem_release(base_ptr, 0);
return NULL;
}
} else {
return NULL; return NULL;
} }
g_thread_ctx->buffers[i].data = ptr; for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
g_thread_ctx->buffers[i].data = (u8 *)base_ptr + (i * g_ioring_read_block);
g_thread_ctx->buffer_pool[i] = i; g_thread_ctx->buffer_pool[i] = i;
g_thread_ctx->buffers[i].buffer_id = i; g_thread_ctx->buffers[i].buffer_id = i;
}
g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD;
IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD];
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
buf_info[i].Address = g_thread_ctx->buffers[i].data; buf_info[i].Address = g_thread_ctx->buffers[i].data;
buf_info[i].Length = IORING_READ_BLOCK; buf_info[i].Length = (ULONG)g_ioring_read_block;
} }
g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD;
HRESULT hb = BuildIoRingRegisterBuffers( HRESULT hb = BuildIoRingRegisterBuffers(
g_thread_ctx->ring, NUM_BUFFERS_PER_THREAD, buf_info, USERDATA_REGISTER); g_thread_ctx->ring, NUM_BUFFERS_PER_THREAD, buf_info, USERDATA_REGISTER);
@@ -1010,9 +1010,7 @@ static void io_ring_cleanup_thread(void) {
CloseHandle(g_thread_ctx->completion_event); CloseHandle(g_thread_ctx->completion_event);
if (g_thread_ctx->ring) if (g_thread_ctx->ring)
CloseIoRing(g_thread_ctx->ring); CloseIoRing(g_thread_ctx->ring);
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) { plat_mem_release(g_thread_ctx->buffers[0].data, 0);
_aligned_free(g_thread_ctx->buffers[i].data);
}
free(g_thread_ctx); free(g_thread_ctx);
g_thread_ctx = NULL; g_thread_ctx = NULL;
} }
@@ -1183,11 +1181,11 @@ static int submit_pending_reads(ThreadIoContext *ctx,
size_t bytes_to_read; size_t bytes_to_read;
if (remaining >= IORING_READ_BLOCK) { if (remaining >= g_ioring_read_block) {
bytes_to_read = IORING_READ_BLOCK; bytes_to_read = g_ioring_read_block;
} else { } else {
// Round UP to sector size (4096) // Round UP to sector size (4096)
bytes_to_read = (remaining + 4095) & ~4095; bytes_to_read = ALIGN_UP_POW2(remaining, g_pagesize);
} }
HRESULT hr = submit_read(ctx, file_ctx, buf, current_offset, bytes_to_read); HRESULT hr = submit_read(ctx, file_ctx, buf, current_offset, bytes_to_read);
@@ -1302,7 +1300,7 @@ static void xxh3_hash_file_parallel(ThreadIoContext *ctx, const char *path,
// -------------------------- Hash worker I/O Ring --------------------------- // -------------------------- Hash worker I/O Ring ---------------------------
static THREAD_RETURN hash_worker_io_ring(void *arg) { static THREAD_RETURN hash_worker_io_ring(void *arg) {
WorkerContext *ctx = (WorkerContext *)arg; WorkerContext *ctx = (WorkerContext *)arg;
unsigned char *temp_buffer = (unsigned char *)malloc(IORING_READ_BLOCK); unsigned char *temp_buffer = (unsigned char *)malloc(READ_BLOCK);
char hash[HASH_STRLEN]; char hash[HASH_STRLEN];
if (!temp_buffer) if (!temp_buffer)

View File

@@ -1,5 +1,3 @@
#define XXH_INLINE_ALL
/* /*
* xxHash - Extremely Fast Hash algorithm * xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2020-2021 Yann Collet * Copyright (C) 2020-2021 Yann Collet