Updating the IO Ring, Updating the progress printing fn

This commit is contained in:
2026-03-31 19:33:39 +01:00
parent d4ba121b56
commit 41ac164881
13 changed files with 56 additions and 54 deletions

1
.gitignore vendored
View File

@@ -6,3 +6,4 @@ file_hashes.txt
Binaries/file_hashes.txt
file_list.txt
temp_code.c
/.cache/clangd/index

View File

@@ -49,3 +49,7 @@ Fixing user prompt parsing
4.5: Porting to linux
Reorganising the code
Improving the scan function
5.0: Implementing the IO Ring instead of buffered hashing, huge performance gains.
fixing the xxh_x86dispatch warnings
Updating the progress printing function

View File

@@ -74,7 +74,7 @@ int main(int argc, char **argv) {
mem_arena *gp_arena = arena_create(&params);
// -------------------------------
// Detect hardware threads
// Detect hardware
// -------------------------------
// --- Windows: detect PHYSICAL cores (not logical threads) ---
size_t hw_threads = platform_physical_cores();
@@ -86,6 +86,8 @@ int main(int argc, char **argv) {
hw_threads);
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
// Align IO Ring block size to the system page size
u64 g_ioring_read_block = ALIGN_UP_POW2(IORING_READ_BLOCK, g_pagesize);
// -------------------------------
// Scanning and hashing
// -------------------------------
@@ -127,9 +129,8 @@ int main(int argc, char **argv) {
workers[i].arena = arena_create(&params);
workers[i].file_queue = &file_queue;
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring, &workers[i])
!=
0) {
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring,
&workers[i]) != 0) {
fprintf(stderr, "Failed to create hash thread %zu\n", i);
exit(1);
}

View File

@@ -803,17 +803,19 @@ static THREAD_RETURN hash_worker(void *arg) {
// ------------------------- Progress display ---------------------------
static THREAD_RETURN progress_thread(void *arg) {
(void)arg; // Unused parameter
(void)arg;
HiResTimer progress_timer;
timer_start(&progress_timer);
uint64_t last_bytes = atomic_load(&g_bytes_processed);
uint64_t last_bytes = 0;
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
// Hide cursor to prevent flickering
printf("\033[?25l");
for (;;) {
uint64_t found = atomic_load(&g_files_found);
uint64_t hashed = atomic_load(&g_files_hashed);
@@ -821,27 +823,20 @@ static THREAD_RETURN progress_thread(void *arg) {
int scan_done = atomic_load(&g_scan_done);
double t = timer_elapsed(&progress_timer);
if (last_time == 0.0) {
last_time = t;
last_bytes = bytes;
}
double dt = t - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
uint64_t db = (bytes > last_bytes) ? bytes - last_bytes : 0;
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
last_bytes = bytes;
last_time = t;
}
printf("\r");
if (!scan_done) {
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ",
printf("\033[1mScanning:\033[0m %llu files | Hashed: %llu | \033[32m%.2f "
"MB/s\033[0m ",
(unsigned long long)found, (unsigned long long)hashed,
displayed_speed);
} else {
@@ -849,18 +844,17 @@ static THREAD_RETURN progress_thread(void *arg) {
int barw = 40;
int filled = (int)(pct * barw);
char bar[64];
int p = 0;
bar[p++] = '[';
printf("[");
// Print filled part in Green (\033[32m)
printf("\033[32m");
for (int i = 0; i < filled; i++)
bar[p++] = '#';
putchar('#');
// Reset color for empty part
printf("\033[0m");
for (int i = filled; i < barw; i++)
bar[p++] = '.';
bar[p++] = ']';
bar[p] = 0;
putchar('.');
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0,
printf("] %6.2f%% (%llu/%llu) \033[32m%.2f MB/s\033[0m ", pct * 100.0,
(unsigned long long)hashed, (unsigned long long)found,
displayed_speed);
}
@@ -869,23 +863,23 @@ static THREAD_RETURN progress_thread(void *arg) {
if (scan_done && hashed == found)
break;
sleep_ms(100);
}
printf("\n");
// Restore cursor (\033[?25h) and move to next line
printf("\033[?25h\n");
return THREAD_RETURN_VALUE;
}
// ======================== Hash worker IO Ring ========================
// -------------------------- Configuration ---------------------------
#define IORING_READ_BLOCK (4096 * 64)
#define NUM_BUFFERS_PER_THREAD 16
#define SUBMIT_TIMEOUT_MS 30000
#define USERDATA_REGISTER 1
// Global stats
#define IORING_READ_BLOCK (KiB(1024))
// Globals
u64 g_ioring_read_block = 4096 * 64;
static atomic_uint_fast64_t g_io_ring_fallbacks = 0;
// -------------------------- Buffer structure ---------------------------
@@ -964,28 +958,34 @@ static ThreadIoContext *io_ring_init_thread(void) {
}
// Initialize buffer pool
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD];
// 4096 alignment
void *ptr = _aligned_malloc(IORING_READ_BLOCK, 4096);
if (!ptr) {
u64 buf_pool_size = g_ioring_read_block * NUM_BUFFERS_PER_THREAD;
// Reserve and Commit the entire memory chunk
void *base_ptr = plat_mem_reserve(buf_pool_size);
if (base_ptr) {
if (!plat_mem_commit(base_ptr, buf_pool_size)) {
plat_mem_release(base_ptr, 0);
return NULL;
}
} else {
return NULL;
}
g_thread_ctx->buffers[i].data = ptr;
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
g_thread_ctx->buffers[i].data = (u8 *)base_ptr + (i * g_ioring_read_block);
g_thread_ctx->buffer_pool[i] = i;
g_thread_ctx->buffers[i].buffer_id = i;
}
g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD;
IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD];
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
buf_info[i].Address = g_thread_ctx->buffers[i].data;
buf_info[i].Length = IORING_READ_BLOCK;
buf_info[i].Length = (ULONG)g_ioring_read_block;
}
g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD;
HRESULT hb = BuildIoRingRegisterBuffers(
g_thread_ctx->ring, NUM_BUFFERS_PER_THREAD, buf_info, USERDATA_REGISTER);
@@ -1010,9 +1010,7 @@ static void io_ring_cleanup_thread(void) {
CloseHandle(g_thread_ctx->completion_event);
if (g_thread_ctx->ring)
CloseIoRing(g_thread_ctx->ring);
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
_aligned_free(g_thread_ctx->buffers[i].data);
}
plat_mem_release(g_thread_ctx->buffers[0].data, 0);
free(g_thread_ctx);
g_thread_ctx = NULL;
}
@@ -1183,11 +1181,11 @@ static int submit_pending_reads(ThreadIoContext *ctx,
size_t bytes_to_read;
if (remaining >= IORING_READ_BLOCK) {
bytes_to_read = IORING_READ_BLOCK;
if (remaining >= g_ioring_read_block) {
bytes_to_read = g_ioring_read_block;
} else {
// Round UP to sector size (4096)
bytes_to_read = (remaining + 4095) & ~4095;
bytes_to_read = ALIGN_UP_POW2(remaining, g_pagesize);
}
HRESULT hr = submit_read(ctx, file_ctx, buf, current_offset, bytes_to_read);
@@ -1302,7 +1300,7 @@ static void xxh3_hash_file_parallel(ThreadIoContext *ctx, const char *path,
// -------------------------- Hash worker I/O Ring ---------------------------
static THREAD_RETURN hash_worker_io_ring(void *arg) {
WorkerContext *ctx = (WorkerContext *)arg;
unsigned char *temp_buffer = (unsigned char *)malloc(IORING_READ_BLOCK);
unsigned char *temp_buffer = (unsigned char *)malloc(READ_BLOCK);
char hash[HASH_STRLEN];
if (!temp_buffer)

View File

@@ -1,5 +1,3 @@
#define XXH_INLINE_ALL
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2020-2021 Yann Collet