Updating the IO Ring, Updating the progress printing fn
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,3 +6,4 @@ file_hashes.txt
|
||||
Binaries/file_hashes.txt
|
||||
file_list.txt
|
||||
temp_code.c
|
||||
/.cache/clangd/index
|
||||
|
||||
@@ -49,3 +49,7 @@ Fixing user prompt parsing
|
||||
4.5: Porting to linux
|
||||
Reorganising the code
|
||||
Improving the scan function
|
||||
|
||||
5.0: Implementing the IO Ring instead of buffered hashing, huge performance gains.
|
||||
fixing the xxh_x86dispatch warnings
|
||||
Updating the progress printing function
|
||||
|
||||
@@ -74,7 +74,7 @@ int main(int argc, char **argv) {
|
||||
mem_arena *gp_arena = arena_create(¶ms);
|
||||
|
||||
// -------------------------------
|
||||
// Detect hardware threads
|
||||
// Detect hardware
|
||||
// -------------------------------
|
||||
// --- Windows: detect PHYSICAL cores (not logical threads) ---
|
||||
size_t hw_threads = platform_physical_cores();
|
||||
@@ -86,6 +86,8 @@ int main(int argc, char **argv) {
|
||||
hw_threads);
|
||||
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
|
||||
|
||||
// Align IO Ring block size to the system page size
|
||||
u64 g_ioring_read_block = ALIGN_UP_POW2(IORING_READ_BLOCK, g_pagesize);
|
||||
// -------------------------------
|
||||
// Scanning and hashing
|
||||
// -------------------------------
|
||||
@@ -127,9 +129,8 @@ int main(int argc, char **argv) {
|
||||
workers[i].arena = arena_create(¶ms);
|
||||
workers[i].file_queue = &file_queue;
|
||||
|
||||
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring, &workers[i])
|
||||
!=
|
||||
0) {
|
||||
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring,
|
||||
&workers[i]) != 0) {
|
||||
fprintf(stderr, "Failed to create hash thread %zu\n", i);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
94
platform.c
94
platform.c
@@ -803,17 +803,19 @@ static THREAD_RETURN hash_worker(void *arg) {
|
||||
|
||||
// ------------------------- Progress display ---------------------------
|
||||
static THREAD_RETURN progress_thread(void *arg) {
|
||||
(void)arg; // Unused parameter
|
||||
(void)arg;
|
||||
|
||||
HiResTimer progress_timer;
|
||||
timer_start(&progress_timer);
|
||||
|
||||
uint64_t last_bytes = atomic_load(&g_bytes_processed);
|
||||
uint64_t last_bytes = 0;
|
||||
double last_time = 0.0;
|
||||
|
||||
double displayed_speed = 0.0;
|
||||
const double sample_interval = 0.5;
|
||||
|
||||
// Hide cursor to prevent flickering
|
||||
printf("\033[?25l");
|
||||
|
||||
for (;;) {
|
||||
uint64_t found = atomic_load(&g_files_found);
|
||||
uint64_t hashed = atomic_load(&g_files_hashed);
|
||||
@@ -821,27 +823,20 @@ static THREAD_RETURN progress_thread(void *arg) {
|
||||
int scan_done = atomic_load(&g_scan_done);
|
||||
|
||||
double t = timer_elapsed(&progress_timer);
|
||||
|
||||
if (last_time == 0.0) {
|
||||
last_time = t;
|
||||
last_bytes = bytes;
|
||||
}
|
||||
|
||||
double dt = t - last_time;
|
||||
|
||||
if (dt >= sample_interval) {
|
||||
uint64_t db = bytes - last_bytes;
|
||||
|
||||
if (db > 0 && dt > 0.0001) {
|
||||
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
|
||||
}
|
||||
|
||||
uint64_t db = (bytes > last_bytes) ? bytes - last_bytes : 0;
|
||||
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
|
||||
last_bytes = bytes;
|
||||
last_time = t;
|
||||
}
|
||||
|
||||
printf("\r");
|
||||
|
||||
if (!scan_done) {
|
||||
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ",
|
||||
printf("\033[1mScanning:\033[0m %llu files | Hashed: %llu | \033[32m%.2f "
|
||||
"MB/s\033[0m ",
|
||||
(unsigned long long)found, (unsigned long long)hashed,
|
||||
displayed_speed);
|
||||
} else {
|
||||
@@ -849,18 +844,17 @@ static THREAD_RETURN progress_thread(void *arg) {
|
||||
int barw = 40;
|
||||
int filled = (int)(pct * barw);
|
||||
|
||||
char bar[64];
|
||||
int p = 0;
|
||||
|
||||
bar[p++] = '[';
|
||||
printf("[");
|
||||
// Print filled part in Green (\033[32m)
|
||||
printf("\033[32m");
|
||||
for (int i = 0; i < filled; i++)
|
||||
bar[p++] = '#';
|
||||
putchar('#');
|
||||
// Reset color for empty part
|
||||
printf("\033[0m");
|
||||
for (int i = filled; i < barw; i++)
|
||||
bar[p++] = '.';
|
||||
bar[p++] = ']';
|
||||
bar[p] = 0;
|
||||
putchar('.');
|
||||
|
||||
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0,
|
||||
printf("] %6.2f%% (%llu/%llu) \033[32m%.2f MB/s\033[0m ", pct * 100.0,
|
||||
(unsigned long long)hashed, (unsigned long long)found,
|
||||
displayed_speed);
|
||||
}
|
||||
@@ -869,23 +863,23 @@ static THREAD_RETURN progress_thread(void *arg) {
|
||||
|
||||
if (scan_done && hashed == found)
|
||||
break;
|
||||
|
||||
sleep_ms(100);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
// Restore cursor (\033[?25h) and move to next line
|
||||
printf("\033[?25h\n");
|
||||
|
||||
return THREAD_RETURN_VALUE;
|
||||
}
|
||||
|
||||
// ======================== Hash worker IO Ring ========================
|
||||
// -------------------------- Configuration ---------------------------
|
||||
#define IORING_READ_BLOCK (4096 * 64)
|
||||
#define NUM_BUFFERS_PER_THREAD 16
|
||||
#define SUBMIT_TIMEOUT_MS 30000
|
||||
#define USERDATA_REGISTER 1
|
||||
|
||||
// Global stats
|
||||
#define IORING_READ_BLOCK (KiB(1024))
|
||||
// Globals
|
||||
u64 g_ioring_read_block = 4096 * 64;
|
||||
static atomic_uint_fast64_t g_io_ring_fallbacks = 0;
|
||||
|
||||
// -------------------------- Buffer structure ---------------------------
|
||||
@@ -964,28 +958,34 @@ static ThreadIoContext *io_ring_init_thread(void) {
|
||||
}
|
||||
|
||||
// Initialize buffer pool
|
||||
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
|
||||
IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD];
|
||||
|
||||
// 4096 alignment
|
||||
void *ptr = _aligned_malloc(IORING_READ_BLOCK, 4096);
|
||||
if (!ptr) {
|
||||
u64 buf_pool_size = g_ioring_read_block * NUM_BUFFERS_PER_THREAD;
|
||||
|
||||
// Reserve and Commit the entire memory chunk
|
||||
void *base_ptr = plat_mem_reserve(buf_pool_size);
|
||||
if (base_ptr) {
|
||||
if (!plat_mem_commit(base_ptr, buf_pool_size)) {
|
||||
plat_mem_release(base_ptr, 0);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
g_thread_ctx->buffers[i].data = ptr;
|
||||
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
|
||||
|
||||
g_thread_ctx->buffers[i].data = (u8 *)base_ptr + (i * g_ioring_read_block);
|
||||
|
||||
g_thread_ctx->buffer_pool[i] = i;
|
||||
g_thread_ctx->buffers[i].buffer_id = i;
|
||||
}
|
||||
g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD;
|
||||
|
||||
IORING_BUFFER_INFO buf_info[NUM_BUFFERS_PER_THREAD];
|
||||
|
||||
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
|
||||
buf_info[i].Address = g_thread_ctx->buffers[i].data;
|
||||
buf_info[i].Length = IORING_READ_BLOCK;
|
||||
buf_info[i].Length = (ULONG)g_ioring_read_block;
|
||||
}
|
||||
|
||||
g_thread_ctx->free_count = NUM_BUFFERS_PER_THREAD;
|
||||
|
||||
HRESULT hb = BuildIoRingRegisterBuffers(
|
||||
g_thread_ctx->ring, NUM_BUFFERS_PER_THREAD, buf_info, USERDATA_REGISTER);
|
||||
|
||||
@@ -1010,9 +1010,7 @@ static void io_ring_cleanup_thread(void) {
|
||||
CloseHandle(g_thread_ctx->completion_event);
|
||||
if (g_thread_ctx->ring)
|
||||
CloseIoRing(g_thread_ctx->ring);
|
||||
for (int i = 0; i < NUM_BUFFERS_PER_THREAD; i++) {
|
||||
_aligned_free(g_thread_ctx->buffers[i].data);
|
||||
}
|
||||
plat_mem_release(g_thread_ctx->buffers[0].data, 0);
|
||||
free(g_thread_ctx);
|
||||
g_thread_ctx = NULL;
|
||||
}
|
||||
@@ -1183,11 +1181,11 @@ static int submit_pending_reads(ThreadIoContext *ctx,
|
||||
|
||||
size_t bytes_to_read;
|
||||
|
||||
if (remaining >= IORING_READ_BLOCK) {
|
||||
bytes_to_read = IORING_READ_BLOCK;
|
||||
if (remaining >= g_ioring_read_block) {
|
||||
bytes_to_read = g_ioring_read_block;
|
||||
} else {
|
||||
// Round UP to sector size (4096)
|
||||
bytes_to_read = (remaining + 4095) & ~4095;
|
||||
bytes_to_read = ALIGN_UP_POW2(remaining, g_pagesize);
|
||||
}
|
||||
|
||||
HRESULT hr = submit_read(ctx, file_ctx, buf, current_offset, bytes_to_read);
|
||||
@@ -1302,7 +1300,7 @@ static void xxh3_hash_file_parallel(ThreadIoContext *ctx, const char *path,
|
||||
// -------------------------- Hash worker I/O Ring ---------------------------
|
||||
static THREAD_RETURN hash_worker_io_ring(void *arg) {
|
||||
WorkerContext *ctx = (WorkerContext *)arg;
|
||||
unsigned char *temp_buffer = (unsigned char *)malloc(IORING_READ_BLOCK);
|
||||
unsigned char *temp_buffer = (unsigned char *)malloc(READ_BLOCK);
|
||||
char hash[HASH_STRLEN];
|
||||
|
||||
if (!temp_buffer)
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
#define XXH_INLINE_ALL
|
||||
|
||||
/*
|
||||
* xxHash - Extremely Fast Hash algorithm
|
||||
* Copyright (C) 2020-2021 Yann Collet
|
||||
|
||||
Reference in New Issue
Block a user