Compare commits

..

1 Commits

Author SHA1 Message Date
f904337878 Using FindFirstFileA() instead of CreateFileA() to get the file size
Since we already call FindFirstFileA() and it returns the size there is
no need to open/close every file to get it's size
2026-03-10 19:54:52 +01:00
19 changed files with 1452 additions and 4170 deletions

6
.gitignore vendored
View File

@@ -3,9 +3,5 @@ file_hasher.ilk
file_hasher.rdi file_hasher.rdi
file_hasher.exe file_hasher.exe
file_hashes.txt file_hashes.txt
Binaries/file_hashes.txt
file_list.txt file_list.txt
temp_code.c temp.c
/.cache/clangd/index
/file_hasher
/io_uring_test

View File

@@ -1,26 +1,3 @@
# filehasher # filehasher
Collects some metadata and hashes files. Collects some metadata and hashes files.
## Building:
### Windows:
#### Release:
clang-cl /O3 file_hasher.c xxh_x86dispatch.c
Note: MinGW does not provide IO Ring headers yet, to fix that include ioringapi.c, this will dynamically load all the functions and define all the symbols necessary to replace the official header.
clang -O3 file_hasher.c xxh_x86dispatch.c -o file_hasher
gcc -O3 file_hasher.c xxh_x86dispatch.c -o file_hasher
#### Debug:
clang-cl /Zi /Od file_hasher.c xxh_x86dispatch.c
clang -g -O0 file_hasher.c xxh_x86dispatch.c -o file_hasher
gcc -g -O0 file_hasher.c xxh_x86dispatch.c -o file_hasher
### Linux:
#### Release:
clang -O3 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o file_hasher
gcc -O3 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o file_hasher
#### Debug:
clang -g -O0 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o file_hasher
gcc -g -O0 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o file_hasher

69
base.h
View File

@@ -1,52 +1,41 @@
#pragma once #pragma once
#define _CRT_SECURE_NO_WARNINGS
#if defined(_WIN32) || defined(_WIN64)
#if defined(_MSC_VER)
#pragma comment(lib, "advapi32.lib")
#endif
#include <aclapi.h>
#include <fcntl.h>
#include <io.h>
#include <ioringapi.h>
#include <ntioring_x.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <windows.h>
#include <winerror.h>
#elif defined(__linux__)
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <dirent.h>
#include <fcntl.h>
#include <liburing.h>
#include <pthread.h>
#include <pwd.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <unistd.h>
#include <poll.h>
#include <sys/eventfd.h>
#endif
#include <assert.h> #include <assert.h>
#include <ctype.h>
#include <immintrin.h> #include <immintrin.h>
#include <stdatomic.h> #include <stdatomic.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
#if defined(_WIN32) || defined(_WIN64)
#define PLATFORM_WINDOWS 1
#include <aclapi.h>
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <windows.h>
#define strdup _strdup
#else
#include <dirent.h>
#include <fcntl.h>
#include <pthread.h>
#include <pwd.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#define XXH_VECTOR \
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
#define XXH_INLINE_ALL
#include "xxhash.c"
#include "xxhash.h"
/* ------------------------------------------------------------ /* ------------------------------------------------------------
Base types Base types
------------------------------------------------------------ */ ------------------------------------------------------------ */
@@ -164,9 +153,6 @@ static void plat_sem_destroy(plat_sem *s) {
} }
} }
// Sleep
static void sleep_ms(int ms) { Sleep(ms); }
#elif defined(__linux__) #elif defined(__linux__)
// Memory allocation // Memory allocation
@@ -232,7 +218,4 @@ static void plat_sem_post(plat_sem *s, u32 count) {
static void plat_sem_destroy(plat_sem *s) { sem_destroy(&s->sem); } static void plat_sem_destroy(plat_sem *s) { sem_destroy(&s->sem); }
// Sleep
static void sleep_ms(int ms) { usleep(ms * 1000); }
#endif #endif

View File

@@ -35,23 +35,3 @@ Making the MPMC queue platform agnostic
Align the MPMC queue to pagesize Align the MPMC queue to pagesize
Getting file size from FindFirstFileA() instead of CreateFileA(), since we already call FindFirstFileA() and it returns the size there is no need to open/close every file to get it's size Getting file size from FindFirstFileA() instead of CreateFileA(), since we already call FindFirstFileA() and it returns the size there is no need to open/close every file to get it's size
Replacing Malloc and strdup in scan helper function with FileEntry and path arenas
Making the MPMC queue support when producers are consumers at the same time by adding a variable work, mpmc_push_work() that increments work and mpmc_task_done() that decrements work, and if work = 0 calls mpmc_producers_finished() that pushes poinsons to wake up sleeping threads and make them return NULL
Replacing DirQueue, a queue growable with realloc with the MPMC queue
4.1: Using xxhash xxh_x86dispatch to select the best SIMD instruction set at runtime, this dispatcher can not be added in a unity build and we must remove AVX2 or AVX512 compilation flags, link xxh_x86dispatch.c in the compilation command. The compilaiton throws two warnings about function with internal linkage but not defined, they are defined in xxh_x86dispatch.c so it's harmless warnings
Fixing user prompt parsing
4.5: Porting to linux
Reorganising the code
Improving the scan function
5.0: Implementing the IO Ring for windows and ui_uring for linux instead of buffered hashing, huge performance gains. The IO Ring is event driven, thread local, uses DMA and direct disk I/O, bypassing the OS cash completely, registred buffers, it supports bashing multiple submissions and can handle multiple files at the same time.
Hashing small files using XXH3_128bits() instead of the streaming pipeline(XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest()), this reduses the overhead of creating a state and digest, coupled with the IO Ring it improves the hashing of small files whose size is inferior to the size of IO Ring buffers
fixing the xxh_x86dispatch warnings
Updating the progress printing function

Binary file not shown.

View File

@@ -1,7 +0,0 @@
[
{
"directory": "D:/Code/c/filehasher",
"command": "clang-cl /O2 file_hasher.c xxh_x86dispatch.c",
"file": "file_hasher.c"
}
]

Binary file not shown.

View File

@@ -1,286 +1,7 @@
#include "platform.c" #define _CRT_SECURE_NO_WARNINGS
// ----------------------------- Main --------------------------------------- #if defined(_WIN32) || defined(_WIN64)
int main(int argc, char **argv) { #include "platform_windows.c"
char folders[64][MAX_PATHLEN]; // up to 64 input folders #else
int folder_count = 0; #include "platform_posix.c"
#endif
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (folder_count < 64) {
normalize_path(argv[i]);
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0) {
printf("Enter folders to process (Enter = current folder): ");
fflush(stdout);
char buf[KiB(32)];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0) {
strcpy(folders[0], ".");
folder_count = 1;
} else {
folder_count = parse_paths(buf, folders, 64);
}
}
// Display selected folders
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Scanning and total timer init
// -------------------------------
timer_init();
HiResTimer total_timer;
HiResTimer scan_timer;
timer_start(&total_timer);
timer_start(&scan_timer);
// -------------------------------
// Creating a general purpose arena
// -------------------------------
arena_params params = {
.reserve_size = GiB(1),
.commit_size = MiB(16),
.align = 0,
.push_size = 0,
.allow_free_list = true,
.allow_swapback = false,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 1,
};
mem_arena *gp_arena = arena_create(&params);
// -------------------------------
// Detect hardware
// -------------------------------
// --- Windows: detect PHYSICAL cores (not logical threads) ---
size_t hw_threads = platform_physical_cores();
// Logical threads = CPU cores * 2
size_t num_threads = hw_threads * 2;
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
// Align IO Ring block size to the system page size
g_ioring_buffer_size = ALIGN_UP_POW2(g_ioring_buffer_size, g_pagesize);
// -------------------------------
// Scanning and hashing
// -------------------------------
// test_io_ring();
MPMCQueue dir_queue;
mpmc_init(&dir_queue, MiB(1));
MPMCQueue file_queue;
mpmc_init(&file_queue, MiB(1));
// Starting hash threads
// size_t num_hash_threads = num_threads;
//
// WorkerContext workers[num_hash_threads];
// Thread *hash_threads =
// arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true);
//
// for (size_t i = 0; i < num_hash_threads; ++i) {
// workers[i].arena = arena_create(&params);
// workers[i].file_queue = &file_queue;
//
// if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker, &workers[i])
// !=
// 0) {
// fprintf(stderr, "Failed to create hash thread %zu\n", i);
// exit(1);
// }
// }
// Starting hash threads
size_t num_hash_threads = num_threads;
// size_t num_hash_threads = 1;
WorkerContext workers[num_hash_threads];
Thread *hash_threads =
arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true);
for (size_t i = 0; i < num_hash_threads; ++i) {
workers[i].arena = arena_create(&params);
workers[i].file_queue = &file_queue;
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_io_ring,
&workers[i]) != 0) {
fprintf(stderr, "Failed to create hash thread %zu\n", i);
exit(1);
}
}
// Starting hash threads
// size_t num_hash_threads = num_threads;
//
// WorkerContext workers[num_hash_threads];
// Thread *hash_threads =
// arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true);
//
// // Check if I/O Ring is available
// bool io_ring_available = false;
// HIORING test_ring = io_ring_init();
// if (test_ring) {
// io_ring_available = true;
// io_ring_cleanup(test_ring);
// // printf("I/O Ring is available, using high-performance async I/O\n");
// } else {
// printf("I/O Ring not available, using buffered I/O\n");
// }
//
// for (size_t i = 0; i < num_hash_threads; ++i) {
// workers[i].arena = arena_create(&params);
// workers[i].file_queue = &file_queue;
//
// // Select the appropriate worker function
// ThreadFunc fn = io_ring_available ? (ThreadFunc)hash_worker_io_ring
// : (ThreadFunc)hash_worker;
//
// if (thread_create(&hash_threads[i], fn, &workers[i]) != 0) {
// fprintf(stderr, "Failed to create hash thread %zu\n", i);
// exit(1);
// }
// }
// Starting progress printing thread
Thread progress_thread_handle;
if (thread_create(&progress_thread_handle, (ThreadFunc)progress_thread,
NULL) != 0) {
fprintf(stderr, "Failed to create progress thread\n");
exit(1);
}
// Starting scan threads
size_t num_scan_threads = num_threads;
ScannerContext scanners[num_scan_threads];
Thread *scan_threads =
arena_push(&gp_arena, sizeof(Thread) * num_scan_threads, true);
for (size_t i = 0; i < num_scan_threads; i++) {
scanners[i].num_threads = num_scan_threads;
scanners[i].path_arena = arena_create(&params);
scanners[i].meta_arena = arena_create(&params);
scanners[i].dir_queue = &dir_queue;
scanners[i].file_queue = &file_queue;
if (thread_create(&scan_threads[i], (ThreadFunc)scan_worker,
&scanners[i]) != 0) {
fprintf(stderr, "Failed to create scan thread %zu\n", i);
exit(1);
}
}
// Initial folder push
for (int i = 0; i < folder_count; i++) {
size_t len = strlen(folders[i]) + 1;
char *path = arena_push(&scanners[0].path_arena, len, false);
memcpy(path, folders[i], len);
mpmc_push_work(&dir_queue, path);
}
// Stop scan threads
thread_wait_multiple(scan_threads, num_scan_threads);
for (size_t i = 0; i < num_scan_threads; ++i) {
thread_close(&scan_threads[i]);
}
mpmc_producers_finished(&file_queue, num_hash_threads);
atomic_store(&g_scan_done, 1);
arena_free(&gp_arena, (u8 **)&scan_threads,
sizeof(Thread) * num_scan_threads);
double scan_seconds = timer_elapsed(&scan_timer);
size_t total_found = atomic_load(&g_files_found);
printf("\r%*s\r", 120, ""); // clear_console_line
printf("Completed scanning in %.2f seconds, found %zu files\n\n",
scan_seconds, total_found);
// If no files found
if (total_found == 0) {
printf("No files found.\n");
return 0;
}
// Stop hashing threads
thread_wait_multiple(hash_threads, num_hash_threads);
for (size_t i = 0; i < num_hash_threads; ++i) {
thread_close(&hash_threads[i]);
}
arena_free(&gp_arena, (u8 **)&hash_threads,
sizeof(Thread) * num_hash_threads);
// Stop progress printing thread
thread_join(&progress_thread_handle);
thread_close(&progress_thread_handle);
// -------------------------------
// Export file_hashes.txt
// -------------------------------
FILE *f = fopen(FILE_HASHES_TXT, "wb");
for (int i = 0; i < num_hash_threads; i++) {
mem_arena *arena = workers[i].arena;
u8 *arena_base =
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
fwrite(arena_base, 1, arena->pos, f);
}
fclose(f);
// -------------------------------
// Print summary
// -------------------------------
uint64_t incomplete = atomic_load(&g_io_ring_fallbacks);
if (incomplete > 0) {
printf("\nWARNING: I/O Ring incomplete files: %llu (fallback to buffered "
"I/O used)\n",
(unsigned long long)incomplete);
}
double total_seconds = timer_elapsed(&total_timer);
printf("Completed hashing %zu files\n", total_found);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / total_seconds;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n\n", total_seconds);
return 0;
}

View File

@@ -1,147 +0,0 @@
#pragma once
#include <ioringapi.h>
#include <ntioring_x.h>
// #include "ioringapi.c"
#include <winerror.h>
// Initialize I/O Ring
HIORING io_ring_init(void) {
// if (!io_ring_load_functions()) {
// printf("[I/O Ring] Failed to load functions\n");
// return NULL;
// }
IORING_CAPABILITIES caps;
ZeroMemory(&caps, sizeof(caps));
HRESULT hr = QueryIoRingCapabilities(&caps);
if (FAILED(hr)) {
printf("[I/O Ring] QueryIoRingCapabilities failed: 0x%08lx\n", hr);
return NULL;
}
// printf("[I/O Ring] MaxVersion=%d, MaxSubmission=%u, MaxCompletion=%u\n",
// (int)caps.MaxVersion, caps.MaxSubmissionQueueSize,
// caps.MaxCompletionQueueSize);
if (caps.MaxVersion < IORING_VERSION_1) {
printf("[I/O Ring] Version too old\n");
return NULL;
}
IORING_CREATE_FLAGS flags = {0};
HIORING ring = NULL;
// hr = CreateIoRing(IORING_VERSION_1, flags, 256, 512, &ring);
hr = CreateIoRing(caps.MaxVersion, flags, 256, 512, &ring);
if (FAILED(hr)) {
printf("[I/O Ring] CreateIoRing failed: 0x%08lx\n", hr);
return NULL;
}
// printf("[I/O Ring] Created successfully\n");
// Check if read operation is supported
// HRESULT io_ring_support = IsIoRingOpSupported(ring, IORING_OP_READ);
// if (io_ring_support == S_FALSE) {
// printf("[I/O Ring] Not supported, %ld /n", io_ring_support);
// }
// Get ring info
IORING_INFO info;
ZeroMemory(&info, sizeof(info));
GetIoRingInfo(ring, &info);
// printf("[I/O Ring] Submission: %u, Completion: %u\n",
// info.SubmissionQueueSize, info.CompletionQueueSize);
return ring;
}
void io_ring_cleanup(HIORING ring) {
if (ring) {
CloseIoRing(ring);
// printf("[I/O Ring] Closed\n");
}
}
// Read file using I/O Ring
int io_ring_read_file(HIORING ring, HANDLE hFile, void *buffer, DWORD size,
UINT64 offset) {
IORING_HANDLE_REF file_ref = IoRingHandleRefFromHandle(hFile);
IORING_BUFFER_REF buf_ref = IoRingBufferRefFromPointer(buffer);
HRESULT hr = BuildIoRingReadFile(ring, file_ref, buf_ref, size, offset,
(UINT_PTR)buffer, IOSQE_FLAGS_NONE);
if (FAILED(hr))
return -1;
UINT32 submitted = 0;
hr = SubmitIoRing(ring, 1, INFINITE, &submitted);
if (FAILED(hr) || submitted == 0)
return -1;
for (;;) {
IORING_CQE cqe;
hr = PopIoRingCompletion(ring, &cqe);
if (FAILED(hr))
continue;
if (cqe.UserData != (UINT_PTR)buffer)
continue;
if (FAILED(cqe.ResultCode))
return -1;
return (int)cqe.Information;
}
}
// Test function
void test_io_ring(void) {
printf("\n=== Testing I/O Ring ===\n");
HIORING ring = io_ring_init();
if (!ring) {
printf("I/O Ring not available\n");
return;
}
// Create test file
HANDLE hFile = CreateFileA("test.txt", GENERIC_READ | GENERIC_WRITE, 0, NULL,
CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile != INVALID_HANDLE_VALUE) {
char test_data[] =
"Hello, I/O Ring! This is a test of the Windows I/O Ring API.";
DWORD written;
WriteFile(hFile, test_data, sizeof(test_data), &written, NULL);
CloseHandle(hFile);
}
// Read using I/O Ring
hFile = CreateFileA("test.txt", GENERIC_READ, FILE_SHARE_READ, NULL,
OPEN_EXISTING, FILE_FLAG_OVERLAPPED, NULL);
if (hFile != INVALID_HANDLE_VALUE) {
char buffer[512] = {0};
int bytes = io_ring_read_file(ring, hFile, buffer, sizeof(buffer), 0);
if (bytes > 0) {
printf("Read %d bytes: %s\n", bytes, buffer);
} else {
printf("Failed to read file\n");
}
CloseHandle(hFile);
} else {
printf("Failed to open test file\n");
}
// Cleanup
DeleteFileA("test.txt");
io_ring_cleanup(ring);
printf("=== Test complete ===\n\n");
}

Binary file not shown.

View File

@@ -1,454 +0,0 @@
/*
# Compile
gcc -o io_uring_test io_uring_test.c -luring
# Run
./io_uring_test
*/
#include "base.h"
#include <stdint.h>
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <liburing.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#define TEST_FILE "test_io_uring.txt"
#define BUFFER_SIZE 4096
#define NUM_BUFFERS 4
// Colors for output
#define COLOR_GREEN "\033[0;32m"
#define COLOR_RED "\033[0;31m"
#define COLOR_YELLOW "\033[0;33m"
#define COLOR_BLUE "\033[0;34m"
#define COLOR_RESET "\033[0m"
// Test result tracking
typedef struct {
int passed;
int failed;
} TestResults;
static void print_success(const char *step) {
printf(COLOR_GREEN "[✓] SUCCESS: %s" COLOR_RESET "\n", step);
}
static void print_failure(const char *step, const char *error) {
printf(COLOR_RED "[✗] FAILED: %s - %s" COLOR_RESET "\n", step, error);
}
static void print_info(const char *msg) {
printf(COLOR_BLUE "[i] INFO: %s" COLOR_RESET "\n", msg);
}
static void print_step(const char *step) {
printf(COLOR_YELLOW "\n>>> Testing: %s" COLOR_RESET "\n", step);
}
// Create a test file with known content
static int create_test_file(void) {
const char *test_content =
"Hello, io_uring! This is a test file for async I/O operations.\n"
"Line 2: Testing reads with registered buffers.\n"
"Line 3: The quick brown fox jumps over the lazy dog.\n"
"Line 4: ABCDEFGHIJKLMNOPQRSTUVWXYZ\n"
"Line 5: 0123456789\n";
FILE *f = fopen(TEST_FILE, "w");
if (!f) {
perror("Failed to create test file");
return -1;
}
fprintf(f, "%s", test_content);
fclose(f);
print_info("Test file created successfully");
return 0;
}
// Test 1: Create io_uring instance
static int test_io_uring_create(struct io_uring *ring, TestResults *results) {
print_step("io_uring creation");
int ret = io_uring_queue_init(256, ring, 0);
if (ret < 0) {
print_failure("io_uring_queue_init", strerror(-ret));
results->failed++;
return -1;
}
print_success("io_uring instance created");
results->passed++;
return 0;
}
// Test 2: Register buffers
static int test_register_buffers(struct io_uring *ring, void **buffers,
struct iovec *iovs, TestResults *results) {
print_step("Buffer registration");
// Allocate and prepare buffers
size_t total_size = BUFFER_SIZE * NUM_BUFFERS;
*buffers = aligned_alloc(4096, total_size); // Page-aligned for O_DIRECT
if (!*buffers) {
print_failure("Buffer allocation", strerror(errno));
results->failed++;
return -1;
}
// Initialize iovecs
for (int i = 0; i < NUM_BUFFERS; i++) {
iovs[i].iov_base = (char *)*buffers + (i * BUFFER_SIZE);
iovs[i].iov_len = BUFFER_SIZE;
memset(iovs[i].iov_base, 0, BUFFER_SIZE);
}
int ret = io_uring_register_buffers(ring, iovs, NUM_BUFFERS);
if (ret < 0) {
print_failure("io_uring_register_buffers", strerror(-ret));
results->failed++;
return -1;
}
print_success("Buffers registered successfully");
results->passed++;
return 0;
}
// Test 3: Open file
// Modified test_open_file function
static int test_open_file(int *fd, TestResults *results) {
print_step("File opening");
// Get file size
struct stat st;
if (stat(TEST_FILE, &st) != 0) {
print_failure("stat", strerror(errno));
results->failed++;
return -1;
}
// Check if file size is page-aligned
int page_size = plat_get_pagesize();
size_t file_size = st.st_size;
printf(" File size: %zu bytes\n", file_size);
printf(" Page size: %d bytes\n", page_size);
if (file_size % page_size != 0) {
printf(" Extending read size from %zu to %zu bytes\n", file_size,
ALIGN_UP_POW2(file_size, page_size));
}
// Try O_DIRECT first
*fd = open(TEST_FILE, O_RDONLY | O_DIRECT);
if (*fd < 0) {
print_info("O_DIRECT failed, trying without it");
*fd = open(TEST_FILE, O_RDONLY);
if (*fd < 0) {
print_failure("open", strerror(errno));
results->failed++;
return -1;
}
print_info("Using buffered I/O (O_DIRECT not available)");
} else {
print_success("File opened with O_DIRECT");
}
results->passed++;
return 0;
}
// Test 4: Build and submit read operation
static int test_submit_read(struct io_uring *ring, int fd, struct iovec *iovs,
int buffer_id, uint64_t user_data,
TestResults *results) {
print_step("Building and submitting read operation");
// Get file size for proper alignment
struct stat st;
if (fstat(fd, &st) != 0) {
print_failure("fstat", strerror(errno));
results->failed++;
return -1;
}
u32 page_size = plat_get_pagesize();
size_t file_size = st.st_size;
size_t read_size = BUFFER_SIZE;
// For O_DIRECT, ensure read size is sector-aligned
if (read_size > file_size) {
read_size = ALIGN_UP_POW2(file_size, page_size);
printf(" Adjusted read size to %zu bytes for O_DIRECT alignment\n",
read_size);
}
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
if (!sqe) {
print_failure("io_uring_get_sqe", "No available SQE");
results->failed++;
return -1;
}
// Prepare read operation using registered buffer
io_uring_prep_read_fixed(sqe, fd, iovs[buffer_id].iov_base, read_size, 0,
buffer_id);
io_uring_sqe_set_data64(sqe, user_data);
int ret = io_uring_submit(ring);
if (ret < 0) {
print_failure("io_uring_submit", strerror(-ret));
results->failed++;
return -1;
}
print_success("Read operation submitted successfully");
results->passed++;
return 0;
}
// Test 5: Wait for completion
static int test_wait_completion(struct io_uring *ring,
struct io_uring_cqe **cqe,
TestResults *results) {
print_step("Waiting for completion");
int ret = io_uring_wait_cqe(ring, cqe);
if (ret < 0) {
print_failure("io_uring_wait_cqe", strerror(-ret));
results->failed++;
return -1;
}
print_success("Completion received");
results->passed++;
return 0;
}
// Test 6: Process completion
static int test_process_completion(struct io_uring_cqe *cqe,
uint64_t expected_user_data,
TestResults *results) {
print_step("Processing completion");
uint64_t user_data = io_uring_cqe_get_data64(cqe);
int res = cqe->res;
printf(" Completion data:\n");
printf(" User data: %lu (expected: %lu)\n", user_data, expected_user_data);
printf(" Result: %d bytes read\n", res);
if (user_data != expected_user_data) {
print_failure("User data mismatch",
"User data doesn't match expected value");
results->failed++;
return -1;
}
if (res < 0) {
print_failure("Read operation", strerror(-res));
results->failed++;
return -1;
}
print_success("Completion processed successfully");
results->passed++;
return res; // Return number of bytes read
}
// Test 7: Verify read data
static int test_verify_data(struct iovec *iovs, int buffer_id, int bytes_read,
TestResults *results) {
print_step("Data verification");
char *data = (char *)iovs[buffer_id].iov_base;
printf(" Read data (first 200 chars):\n");
printf(" ---\n");
for (int i = 0; i < bytes_read && i < 200; i++) {
putchar(data[i]);
}
if (bytes_read > 200)
printf("...");
printf("\n ---\n");
// Check if data is not empty
if (bytes_read == 0) {
print_failure("Data verification", "No data read");
results->failed++;
return -1;
}
// Check if data contains expected content
if (strstr(data, "io_uring") == NULL) {
print_failure("Data verification", "Expected content not found");
results->failed++;
return -1;
}
print_success("Data verified successfully");
results->passed++;
return 0;
}
// Test 8: Test multiple concurrent reads
static int test_concurrent_reads(struct io_uring *ring, int fd,
struct iovec *iovs, TestResults *results) {
print_step("Concurrent reads test");
int num_reads = 3;
int submitted = 0;
// Submit multiple reads
for (int i = 0; i < num_reads; i++) {
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
if (!sqe) {
print_failure("Getting SQE for concurrent read", "No available SQE");
results->failed++;
return -1;
}
off_t offset = i * 100; // Read from different offsets
io_uring_prep_read_fixed(sqe, fd, iovs[i].iov_base, BUFFER_SIZE, offset, i);
io_uring_sqe_set_data64(sqe, i);
submitted++;
}
int ret = io_uring_submit(ring);
if (ret != submitted) {
char msg[64];
snprintf(msg, sizeof(msg), "Expected %d, got %d", submitted, ret);
print_failure("Submitting concurrent reads", msg);
results->failed++;
return -1;
}
print_success("Concurrent reads submitted");
// Wait for and process completions
for (int i = 0; i < submitted; i++) {
struct io_uring_cqe *cqe;
ret = io_uring_wait_cqe(ring, &cqe);
if (ret < 0) {
print_failure("Waiting for concurrent read completion", strerror(-ret));
results->failed++;
return -1;
}
uint64_t user_data = io_uring_cqe_get_data64(cqe);
int res = cqe->res;
printf(" Concurrent read %lu completed: %d bytes read\n", user_data, res);
io_uring_cqe_seen(ring, cqe);
}
print_success("Concurrent reads completed successfully");
results->passed++;
return 0;
}
// Cleanup function
static void cleanup(struct io_uring *ring, int fd, void *buffers) {
if (fd >= 0)
close(fd);
if (buffers) {
io_uring_unregister_buffers(ring);
free(buffers);
}
io_uring_queue_exit(ring);
remove(TEST_FILE);
}
int main() {
TestResults results = {0, 0};
struct io_uring ring;
int fd = -1;
void *buffers = NULL;
struct iovec iovs[NUM_BUFFERS];
printf(COLOR_BLUE "\n========================================\n");
printf(" io_uring Test Suite\n");
printf("========================================\n" COLOR_RESET);
// Create test file
if (create_test_file() != 0) {
return 1;
}
// Test 1: Create io_uring
if (test_io_uring_create(&ring, &results) != 0) {
cleanup(&ring, fd, buffers);
return 1;
}
// Test 2: Register buffers
if (test_register_buffers(&ring, &buffers, iovs, &results) != 0) {
cleanup(&ring, fd, buffers);
return 1;
}
// Test 3: Open file
if (test_open_file(&fd, &results) != 0) {
cleanup(&ring, fd, buffers);
return 1;
}
// Test 4: Submit read
uint64_t test_user_data = 12345;
if (test_submit_read(&ring, fd, iovs, 0, test_user_data, &results) != 0) {
cleanup(&ring, fd, buffers);
return 1;
}
// Test 5: Wait for completion
struct io_uring_cqe *cqe;
if (test_wait_completion(&ring, &cqe, &results) != 0) {
cleanup(&ring, fd, buffers);
return 1;
}
// Test 6: Process completion
int bytes_read = test_process_completion(cqe, test_user_data, &results);
if (bytes_read < 0) {
cleanup(&ring, fd, buffers);
return 1;
}
io_uring_cqe_seen(&ring, cqe);
// Test 7: Verify data
if (test_verify_data(iovs, 0, bytes_read, &results) != 0) {
cleanup(&ring, fd, buffers);
return 1;
}
// Test 8: Concurrent reads
if (test_concurrent_reads(&ring, fd, iovs, &results) != 0) {
cleanup(&ring, fd, buffers);
return 1;
}
// Print summary
printf(COLOR_BLUE "\n========================================\n");
printf(" TEST SUMMARY\n");
printf("========================================\n" COLOR_RESET);
printf(" Total tests: %d\n", results.passed + results.failed);
printf(COLOR_GREEN " Passed: %d\n" COLOR_RESET, results.passed);
if (results.failed > 0) {
printf(COLOR_RED " Failed: %d\n" COLOR_RESET, results.failed);
} else {
printf(COLOR_GREEN " ✓ ALL TESTS PASSED!\n" COLOR_RESET);
}
// Cleanup
cleanup(&ring, fd, buffers);
return results.failed > 0 ? 1 : 0;
}

View File

@@ -1,285 +0,0 @@
#pragma once
#include <stdio.h>
#include <windows.h>
#include <winnt.h>
// Forward declarations
typedef struct IORING_HANDLE_REF IORING_HANDLE_REF;
typedef struct IORING_BUFFER_REF IORING_BUFFER_REF;
typedef void *HIORING;
/* --------------------- Types declaration --------------------- */
typedef enum IORING_CREATE_ADVISORY_FLAGS {
IORING_CREATE_ADVISORY_FLAGS_NONE,
IORING_CREATE_SKIP_BUILDER_PARAM_CHECKS
} IORING_CREATE_ADVISORY_FLAGS;
// Specifies advisory flags for creating an I/O ring with a call to
// CreateIoRing.
typedef enum IORING_CREATE_REQUIRED_FLAGS {
IORING_CREATE_REQUIRED_FLAGS_NONE
} IORING_CREATE_REQUIRED_FLAGS;
// Specifies required flags for creating an I/O ring with a call to
// CreateIoRing.
typedef enum IORING_REF_KIND {
IORING_REF_RAW = 0,
IORING_REF_REGISTERED = 1,
} IORING_REF_KIND;
// Specifies the type of an IORING_HANDLE_REF structure.
typedef enum IORING_SQE_FLAGS {
IOSQE_FLAGS_NONE,
IOSQE_FLAGS_DRAIN_PRECEDING_OPS
} IORING_SQE_FLAGS;
// Specifies kernel behavior options for I/O ring submission queue entries
// IORING_REGISTERED_BUFFER structure
typedef struct IORING_REGISTERED_BUFFER {
UINT32 Index;
UINT32 Offset;
} IORING_REGISTERED_BUFFER;
// IORING_HANDLE_REF
struct IORING_HANDLE_REF {
IORING_REF_KIND Kind;
union {
HANDLE Handle;
UINT32 Index;
} HandleUnion;
};
// Represents a reference to a file handle used in an I/O ring operation
// IORING_BUFFER_REF
struct IORING_BUFFER_REF {
IORING_REF_KIND Kind;
union {
void *Address;
IORING_REGISTERED_BUFFER IndexAndOffset;
} BufferUnion;
};
typedef struct IORING_BUFFER_INFO {
void *Address;
UINT32 Length;
} IORING_BUFFER_INFO;
// IORING_BUFFER_REF represents a reference to a buffer used in an I/O ring
// operation
// IORING_VERSION enumeration
typedef enum IORING_VERSION {
IORING_VERSION_INVALID = 0,
IORING_VERSION_1 = 1,
IORING_VERSION_2 = 2,
IORING_VERSION_3 = 3,
IORING_VERSION_4 = 4,
} IORING_VERSION;
typedef enum IORING_FEATURE_FLAGS {
IORING_FEATURE_FLAGS_NONE = 0,
IORING_FEATURE_UM_EMULATION = 1
} IORING_FEATURE_FLAGS;
// IORING_CAPABILITIES structure
typedef struct IORING_CAPABILITIES {
IORING_VERSION MaxVersion;
UINT32 MaxSubmissionQueueSize;
UINT32 MaxCompletionQueueSize;
IORING_FEATURE_FLAGS FeatureFlags;
} IORING_CAPABILITIES;
// Represents the IORING API capabilities.
// IORING_CQE structure
typedef struct IORING_CQE {
UINT_PTR UserData;
HRESULT ResultCode;
ULONG_PTR Information;
} IORING_CQE;
// Represents a completed I/O ring queue entry.
// IORING_CREATE_FLAGS structure
typedef struct IORING_CREATE_FLAGS {
IORING_CREATE_REQUIRED_FLAGS Required;
IORING_CREATE_ADVISORY_FLAGS Advisory;
} IORING_CREATE_FLAGS;
// Specifies flags for creating an I/O ring with a call to CreateIoRing.
// IORING_INFO structure
typedef struct IORING_INFO {
IORING_VERSION IoRingVersion;
IORING_CREATE_FLAGS Flags;
UINT32 SubmissionQueueSize;
UINT32 CompletionQueueSize;
} IORING_INFO;
// Represents the shape and version information for the specified I/O ring
// IORING_OP_CODE for IsIoRingOpSupported
typedef enum IORING_OP_CODE {
IORING_OP_NOP = 0,
IORING_OP_READ = 1,
IORING_OP_WRITE = 2,
IORING_OP_FLUSH = 3,
IORING_OP_REGISTER_BUFFERS = 4,
IORING_OP_REGISTER_FILES = 5,
IORING_OP_CANCEL = 6,
} IORING_OP_CODE;
/* --------------------- Dynamic loader --------------------- */
// Function pointer types
typedef BOOL(WINAPI *IsIoRingOpSupported_t)(HIORING, IORING_OP_CODE);
typedef HRESULT(WINAPI *QueryIoRingCapabilities_t)(IORING_CAPABILITIES *);
typedef HRESULT(WINAPI *GetIoRingInfo_t)(HIORING, IORING_INFO *);
typedef HRESULT(WINAPI *CreateIoRing_t)(IORING_VERSION, IORING_CREATE_FLAGS,
UINT32, UINT32, HIORING *);
typedef HRESULT(WINAPI *CloseIoRing_t)(HIORING);
typedef HRESULT(WINAPI *SubmitIoRing_t)(HIORING, UINT32, UINT32, UINT32 *);
typedef HRESULT(WINAPI *PopIoRingCompletion_t)(HIORING, IORING_CQE *);
typedef HRESULT(WINAPI *SetIoRingCompletionEvent_t)(HIORING, HANDLE);
typedef HRESULT(WINAPI *BuildIoRingCancelRequest_t)(HIORING, IORING_HANDLE_REF,
UINT_PTR, UINT_PTR);
typedef HRESULT(WINAPI *BuildIoRingReadFile_t)(HIORING, IORING_HANDLE_REF,
IORING_BUFFER_REF, UINT32,
UINT64, UINT_PTR,
IORING_SQE_FLAGS);
typedef HRESULT(WINAPI *BuildIoRingRegisterBuffers_t)(
HIORING, UINT32, IORING_BUFFER_INFO const[], UINT_PTR);
typedef HRESULT(WINAPI *BuildIoRingRegisterFileHandles_t)(HIORING, UINT32,
HANDLE const[],
UINT_PTR);
// Core:
// Queries the support of the specified operation for the specified I/O ring
static IsIoRingOpSupported_t IsIoRingOpSupported = NULL;
// Queries the OS for the supported capabilities for IORINGs
static QueryIoRingCapabilities_t QueryIoRingCapabilities = NULL;
// Gets information about the API version and queue sizes of an I/O ring
static GetIoRingInfo_t GetIoRingInfo = NULL;
// Creates a new instance of an I/O ring submission/completion queue pair and
// returns a handle for referencing the I/O ring
static CreateIoRing_t CreateIoRing = NULL;
// Closes an HIORING handle that was previously opened with a call to
// CreateIoRing
static CloseIoRing_t CloseIoRing = NULL;
// Submission / completion:
// Submits all constructed but not yet submitted entries to the kernels queue
// and optionally waits for a set of operations to complete
static SubmitIoRing_t SubmitIoRing = NULL;
// Pops a single entry from the completion queue, if one is available
static PopIoRingCompletion_t PopIoRingCompletion = NULL;
// Registers a completion queue event with an IORING
static SetIoRingCompletionEvent_t SetIoRingCompletionEvent = NULL;
// Operations:
// Performs an asynchronous read from a file using an I/O ring
static BuildIoRingReadFile_t BuildIoRingReadFile = NULL;
// Attempts to cancel a previously submitted I/O ring operation
static BuildIoRingCancelRequest_t BuildIoRingCancelRequest = NULL;
// Registers an array of buffers with the system for future I/O ring operations
static BuildIoRingRegisterBuffers_t BuildIoRingRegisterBuffers = NULL;
// Registers an array of file handles with the system for future I/O ring
// operations
static BuildIoRingRegisterFileHandles_t BuildIoRingRegisterFileHandles = NULL;
static int io_ring_loaded = 0;
static int io_ring_load_functions(void) {
if (io_ring_loaded)
return 1;
HMODULE hKernel = GetModuleHandleW(L"kernel32.dll");
if (!hKernel)
return 0;
IsIoRingOpSupported =
(IsIoRingOpSupported_t)GetProcAddress(hKernel, "IsIoRingOpSupported");
QueryIoRingCapabilities = (QueryIoRingCapabilities_t)GetProcAddress(
hKernel, "QueryIoRingCapabilities");
GetIoRingInfo = (GetIoRingInfo_t)GetProcAddress(hKernel, "GetIoRingInfo");
CreateIoRing = (CreateIoRing_t)GetProcAddress(hKernel, "CreateIoRing");
CloseIoRing = (CloseIoRing_t)GetProcAddress(hKernel, "CloseIoRing");
SubmitIoRing = (SubmitIoRing_t)GetProcAddress(hKernel, "SubmitIoRing");
PopIoRingCompletion =
(PopIoRingCompletion_t)GetProcAddress(hKernel, "PopIoRingCompletion");
SetIoRingCompletionEvent = (SetIoRingCompletionEvent_t)GetProcAddress(
hKernel, "SetIoRingCompletionEvent");
BuildIoRingReadFile =
(BuildIoRingReadFile_t)GetProcAddress(hKernel, "BuildIoRingReadFile");
BuildIoRingCancelRequest = (BuildIoRingCancelRequest_t)GetProcAddress(
hKernel, "BuildIoRingCancelRequest");
BuildIoRingRegisterBuffers = (BuildIoRingRegisterBuffers_t)GetProcAddress(
hKernel, "BuildIoRingRegisterBuffers");
BuildIoRingRegisterFileHandles =
(BuildIoRingRegisterFileHandles_t)GetProcAddress(
hKernel, "BuildIoRingRegisterFileHandles");
io_ring_loaded =
(IsIoRingOpSupported && QueryIoRingCapabilities && CreateIoRing &&
CloseIoRing && SubmitIoRing && PopIoRingCompletion &&
SetIoRingCompletionEvent && BuildIoRingReadFile &&
BuildIoRingCancelRequest && BuildIoRingRegisterBuffers &&
BuildIoRingRegisterFileHandles);
if (io_ring_loaded)
printf("[I/O Ring] Functions loaded\n");
else
printf("[I/O Ring] Some functions not available\n");
return io_ring_loaded;
}
/* ------------- Standard helper functions definition ------------- */
// Creates an instance of the IORING_BUFFER_REF structure with the provided
// buffer index and offset
static inline IORING_BUFFER_REF
IoRingBufferRefFromIndexAndOffset(UINT32 index, UINT32 offset) {
IORING_BUFFER_REF ref;
ref.Kind = IORING_REF_REGISTERED;
ref.BufferUnion.IndexAndOffset.Index = index;
ref.BufferUnion.IndexAndOffset.Offset = offset;
return ref;
}
// Creates an instance of the IORING_BUFFER_REF structure from the provided
// pointer
static IORING_BUFFER_REF IoRingBufferRefFromPointer(void *addr) {
IORING_BUFFER_REF ref;
ref.Kind = IORING_REF_RAW;
ref.BufferUnion.Address = addr;
return ref;
}
// Creates an instance of the IORING_HANDLE_REF structure from the provided file
// handle
static IORING_HANDLE_REF IoRingHandleRefFromHandle(HANDLE h) {
IORING_HANDLE_REF ref;
ref.Kind = IORING_REF_RAW;
ref.HandleUnion.Handle = h;
return ref;
}
// Creates an instance of the IORING_HANDLE_REF structure from the provided
// index
static inline IORING_HANDLE_REF IoRingHandleRefFromIndex(UINT32 index) {
IORING_HANDLE_REF ref;
ref.Kind = IORING_REF_REGISTERED; // MUST be registered
ref.HandleUnion.Index = index;
return ref;
}
// NOTE: If you are using index-based buffers or handles, make sure you have
// successfully called BuildIoRingRegisterBuffers or
// BuildIoRingRegisterFileHandles first so the kernel has a valid table to look
// into, otherwise the kernel will treat the index as an invalid memory
// address/handle.

115
lf_mpmc.h
View File

@@ -36,8 +36,6 @@ typedef struct {
CACHE_ALIGN atomic_size_t head; CACHE_ALIGN atomic_size_t head;
CACHE_ALIGN atomic_size_t tail; CACHE_ALIGN atomic_size_t tail;
CACHE_ALIGN atomic_size_t work_count;
size_t capacity; size_t capacity;
size_t mask; size_t mask;
@@ -93,7 +91,6 @@ static void mpmc_init(MPMCQueue *q, size_t max_capacity) {
atomic_init(&q->head, 0); atomic_init(&q->head, 0);
atomic_init(&q->tail, 0); atomic_init(&q->tail, 0);
atomic_init(&q->work_count, 0);
plat_sem_init(&q->items_sem, 0); plat_sem_init(&q->items_sem, 0);
} }
@@ -141,7 +138,6 @@ static void mpmc_commit_more(MPMCQueue *q) {
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */
/* PUSH */ /* PUSH */
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */
// Does not increment work
static void mpmc_push(MPMCQueue *q, void *item) { static void mpmc_push(MPMCQueue *q, void *item) {
MPMCSlot *slot; MPMCSlot *slot;
size_t pos; size_t pos;
@@ -173,11 +169,11 @@ static void mpmc_push(MPMCQueue *q, void *item) {
} else if (diff < 0) { // queue actually full } else if (diff < 0) { // queue actually full
sleep_ms(1000); Sleep(1000);
} else { // waiting to grow } else { // waiting to grow
sleep_ms(0); Sleep(0);
} }
} }
@@ -188,55 +184,8 @@ static void mpmc_push(MPMCQueue *q, void *item) {
plat_sem_post(&q->items_sem, 1); plat_sem_post(&q->items_sem, 1);
} }
// Increment work
static void mpmc_push_work(MPMCQueue *q, void *item) {
MPMCSlot *slot;
size_t pos;
for (;;) {
pos = atomic_load_explicit(&q->tail, memory_order_relaxed);
// ensure the slot is committed BEFORE accessing it
size_t committed =
atomic_load_explicit(&q->committed, memory_order_relaxed);
if (unlikely(pos >= committed)) {
mpmc_commit_more(q);
continue;
}
slot = &q->slots[pos & q->mask];
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
intptr_t diff = (intptr_t)seq - (intptr_t)pos;
if (likely(diff == 0)) {
if (atomic_compare_exchange_weak_explicit(&q->tail, &pos, pos + 1,
memory_order_relaxed,
memory_order_relaxed))
break;
} else if (diff < 0) { // queue actually full
sleep_ms(1000);
} else { // waiting to grow
sleep_ms(0);
}
}
slot->data = item;
atomic_store_explicit(&slot->seq, pos + 1, memory_order_release);
atomic_fetch_add(&q->work_count, 1);
plat_sem_post(&q->items_sem, 1);
}
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */
/* POP */ /* POP (blocking with semaphore) */
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */
static void *mpmc_pop(MPMCQueue *q) { static void *mpmc_pop(MPMCQueue *q) {
@@ -264,7 +213,7 @@ static void *mpmc_pop(MPMCQueue *q) {
} else { // slot is still transitioning (written by another thread) } else { // slot is still transitioning (written by another thread)
if (++spins > 10) { if (++spins > 10) {
sleep_ms(0); // yield CPU SwitchToThread(); // yield CPU
spins = 0; spins = 0;
} else { } else {
cpu_pause(); cpu_pause();
@@ -279,6 +228,52 @@ static void *mpmc_pop(MPMCQueue *q) {
return data; return data;
} }
/* ----------------------------------------------------------- */
/* TRY POP (non blocking) */
/* ----------------------------------------------------------- */
static b32 mpmc_try_pop(MPMCQueue *q, void **out) {
if (!plat_sem_trywait(&q->items_sem))
return false;
MPMCSlot *slot;
size_t pos;
int spins = 0;
for (;;) {
pos = atomic_load_explicit(&q->head, memory_order_relaxed);
slot = &q->slots[pos & q->mask];
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
intptr_t diff = (intptr_t)seq - (intptr_t)(pos + 1);
if (likely(diff == 0)) {
if (atomic_compare_exchange_weak_explicit(&q->head, &pos, pos + 1,
memory_order_relaxed,
memory_order_relaxed))
break;
} else {
if (++spins > 10) {
SwitchToThread();
spins = 0;
} else {
cpu_pause();
}
}
}
*out = slot->data;
atomic_store_explicit(&slot->seq, pos + q->capacity, memory_order_release);
return true;
}
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */
/* PUSH POISON */ /* PUSH POISON */
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */
@@ -293,16 +288,6 @@ static void mpmc_producers_finished(MPMCQueue *q, u8 consumer_count) {
} }
} }
/* ----------------------------------------------------------- */
/* Done */
/* ----------------------------------------------------------- */
static void mpmc_task_done(MPMCQueue *q, u8 consumer_count) {
size_t prev = atomic_fetch_sub(&q->work_count, 1);
if (prev == 1) {
mpmc_producers_finished(q, consumer_count);
}
}
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */
/* MPMC Cleanup */ /* MPMC Cleanup */
/* ----------------------------------------------------------- */ /* ----------------------------------------------------------- */

1921
platform.c

File diff suppressed because it is too large Load Diff

93
platform.h Normal file
View File

@@ -0,0 +1,93 @@
#pragma once // ensure that a given header file is included only once in a
// single compilation unit
#include "arena.h"
#include "base.h"
#include "lf_mpmc.h"
#include "arena.c"
// ----------------------------- Config -------------------------------------
#define FILE_HASHES_TXT "file_hashes.txt"
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
#define MAX_PATHLEN 4096
#define READ_BLOCK (64 * 1024) // 64KB blocks
// ----------------------------- Data types ---------------------------------
typedef struct FileEntry {
char *path;
uint64_t size_bytes;
uint64_t created_time; // epoch
uint64_t modified_time; // epoch seconds
char owner[128]; // resolved owner name
} FileEntry;
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified);
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size);
/* scan folder timer*/
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER end;
} HiResTimer;
static LARGE_INTEGER g_qpc_freq;
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
static double timer_stop(HiResTimer *t) {
QueryPerformanceCounter(&t->end);
return (double)(t->end.QuadPart - t->start.QuadPart) /
(double)g_qpc_freq.QuadPart;
}
// MPMC Queue
static MPMCQueue g_dir_queue;
static MPMCQueue g_file_queue;
typedef struct {
mem_arena *path_arena;
mem_arena *meta_arena;
MPMCQueue *dir_queue;
MPMCQueue *file_queue;
} ScannerContext;
typedef struct {
MPMCQueue *queue;
mem_arena *arena;
} WorkerContext;
/* Scan folders */
typedef struct DirQueue DirQueue;
typedef struct DirJob {
char *path;
struct DirJob *next;
} DirJob;
typedef struct DirQueue {
char **items;
size_t count;
size_t cap;
size_t active;
int stop;
#if PLATFORM_WINDOWS
CRITICAL_SECTION cs;
CONDITION_VARIABLE cv;
#else
pthread_mutex_t mutex;
pthread_cond_t cond;
#endif
} DirQueue;
// void scan_folder_windows_parallel(const char *base, ScannerContext *ctx);
// void scan_folder_posix_parallel(const char *base, ScannerContext *ctx);
void scan_folder_windows_parallel(const char *base, DirQueue *q);

678
platform_posix.c Normal file
View File

@@ -0,0 +1,678 @@
#include "platform.h"
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_bytes_processed = 0;
FileEntry *g_entries = NULL;
size_t g_entry_count = 0;
size_t g_entry_capacity = 0;
// ----------------------------- Utils --------------------------------------
static void perror_exit(const char *msg) {
perror(msg);
exit(1);
}
static void *xmalloc(size_t n) {
void *p = malloc(n);
if (!p)
perror_exit("malloc");
return p;
}
static void add_entry(const FileEntry *src) {
if (g_entry_count + 1 > g_entry_capacity) {
g_entry_capacity = g_entry_capacity ? g_entry_capacity * 2 : 1024;
g_entries = realloc(g_entries, sizeof(FileEntry) * g_entry_capacity);
if (!g_entries)
perror_exit("realloc");
}
FileEntry *dst = &g_entries[g_entry_count++];
memset(dst, 0, sizeof(*dst));
dst->size_bytes = src->size_bytes;
dst->created_time = src->created_time;
dst->modified_time = src->modified_time;
if (src->path)
dst->path = strdup(src->path);
strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1);
dst->owner[sizeof(dst->owner) - 1] = '\0';
}
static void free_entries(void) {
for (size_t i = 0; i < g_entry_count; ++i) {
free(g_entries[i].path);
}
free(g_entries);
g_entries = NULL;
g_entry_count = 0;
g_entry_capacity = 0;
}
// ----------------------------- Owner lookup ------------------------------
static void get_file_owner(uid_t uid, char *out, size_t out_sz) {
struct passwd *pw = getpwuid(uid);
if (pw) {
snprintf(out, out_sz, "%s", pw->pw_name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
}
// ----------------------------- Format time helper -------------------------
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
#if PLATFORM_WINDOWS
localtime_s(&tm, &tt);
#else
localtime_r(&tt, &tm);
#endif
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// --------------- parallel directory scanning ----------------
// Add queue helper functions
static void dirqueue_push(DirQueue *q, const char *path) {
DirJob *job = malloc(sizeof(*job));
job->path = strdup(path);
job->next = NULL;
pthread_mutex_lock(&q->mutex);
if (q->tail)
q->tail->next = job;
else
q->head = job;
q->tail = job;
pthread_cond_signal(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
static char *dirqueue_pop(DirQueue *q) {
pthread_mutex_lock(&q->mutex);
while (!q->head && !q->stop)
pthread_cond_wait(&q->cond, &q->mutex);
if (q->stop) {
pthread_mutex_unlock(&q->mutex);
return NULL;
}
DirJob *job = q->head;
q->head = job->next;
if (!q->head)
q->tail = NULL;
q->active_workers++;
pthread_mutex_unlock(&q->mutex);
char *path = job->path;
free(job);
return path;
}
static void dirqueue_done(DirQueue *q) {
pthread_mutex_lock(&q->mutex);
q->active_workers--;
if (!q->head && q->active_workers == 0) {
q->stop = 1;
pthread_cond_broadcast(&q->cond);
}
pthread_mutex_unlock(&q->mutex);
}
// Scanning directory worker thread function
static void scan_worker(void *arg) {
DirQueue *q = arg;
for (;;) {
char *dir = dirqueue_pop(q);
if (!dir)
break;
scan_folder_posix_parallel(dir, q);
free(dir);
dirqueue_done(q);
}
}
// Scanning directory function
void scan_folder_posix_parallel(const char *base, DirQueue *q) {
DIR *d = opendir(base);
if (!d)
return;
struct dirent *ent;
while ((ent = readdir(d))) {
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
char full[MAX_PATHLEN];
snprintf(full, sizeof(full), "%s/%s", base, ent->d_name);
struct stat st;
if (lstat(full, &st) != 0)
continue;
if (S_ISDIR(st.st_mode)) {
dirqueue_push(q, full);
} else if (S_ISREG(st.st_mode)) {
FileEntry fe;
memset(&fe, 0, sizeof(fe));
normalize_path(full);
fe.path = full;
fe.size_bytes = (uint64_t)st.st_size;
fe.created_time = (uint64_t)st.st_ctime;
fe.modified_time = (uint64_t)st.st_mtime;
get_file_owner(st.st_uid, fe.owner, sizeof(fe.owner));
add_entry(&fe);
}
}
closedir(d);
}
// ----------------------------- Job queue ----------------------------------
static void jobqueue_init(JobQueue *q) {
q->head = q->tail = NULL;
atomic_store(&q->count, 0);
q->stop = 0;
pthread_mutex_init(&q->mutex, NULL);
pthread_cond_init(&q->cond, NULL);
}
static void jobqueue_push(JobQueue *q, Job *job) {
pthread_mutex_lock(&q->mutex);
job->next = NULL;
if (q->tail)
q->tail->next = job;
else
q->head = job;
q->tail = job;
atomic_fetch_add(&q->count, 1);
pthread_cond_signal(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
static Job *jobqueue_pop(JobQueue *q) {
pthread_mutex_lock(&q->mutex);
while (!q->head && !q->stop)
pthread_cond_wait(&q->cond, &q->mutex);
if (q->stop && !q->head) {
pthread_mutex_unlock(&q->mutex);
return NULL;
}
Job *j = q->head;
q->head = j->next;
if (!q->head)
q->tail = NULL;
pthread_mutex_unlock(&q->mutex);
if (j)
atomic_fetch_sub(&q->count, 1);
return j;
}
static void jobqueue_stop(JobQueue *q) {
pthread_mutex_lock(&q->mutex);
q->stop = 1;
pthread_cond_broadcast(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper.
int fd = open(path, O_RDONLY);
if (fd < 0) {
strcpy(out_hex, "ERROR");
return;
}
XXH128_hash_t h;
XXH3_state_t *state = XXH3_createState();
XXH3_128bits_reset(state);
unsigned char *buf = (unsigned char *)malloc(READ_BLOCK);
ssize_t r;
while ((r = read(fd, buf, READ_BLOCK)) > 0) {
XXH3_128bits_update(state, buf, (size_t)r);
atomic_fetch_add(&g_bytes_processed, (uint64_t)r);
}
h = XXH3_128bits_digest(state);
XXH3_freeState(state);
close(fd);
free(buf);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ----------------------------- Worker --------------------------------------
static void *worker_thread_posix(void *argp) {
WorkerArg *w = (WorkerArg *)argp;
JobQueue *q = w->queue;
for (;;) {
Job *job = jobqueue_pop(q);
if (!job)
break;
char hex[HASH_STRLEN];
xxh3_hash_file_stream(job->file->path, hex);
// append to file_hashes.txt atomically: we will store results to a temp
// buffer and write them at the end (to avoid synchronization issues). But
// for simplicity, here we append directly using a file lock (fopen+fwrite
// guarded by mutex). We'll store results in job->file->path? Instead,
// simple global append with a mutex. Using a file-level append lock:
static pthread_mutex_t append_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&append_mutex);
FILE *hf = fopen(FILE_HASHES_TXT, "a");
if (hf) {
char created[32], modified[32];
format_time(job->file->created_time, created, sizeof(created));
format_time(job->file->modified_time, modified, sizeof(modified));
double size_kib = (double)job->file->size_bytes / (1024.0);
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib,
created, modified, job->file->owner);
fclose(hf);
}
pthread_mutex_unlock(&append_mutex);
atomic_fetch_add(w->done_counter, 1);
free(job);
}
atomic_fetch_sub(w->live_workers, 1);
return NULL;
}
// ----------------------------- Progress display ---------------------------
static void print_progress(size_t done, size_t total) {
const int barw = 40;
double pct = total ? (double)done / (double)total : 0.0;
int filled = (int)(pct * barw + 0.5);
printf("\r[");
for (int i = 0; i < filled; ++i)
putchar('#');
for (int i = filled; i < barw; ++i)
putchar(' ');
printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total);
fflush(stdout);
}
// ----------------------------- Helpers: load/save --------------------------
static int file_exists(const char *path) {
struct stat st;
return (stat(path, &st) == 0);
}
static void save_file_list(const char *list_path) {
FILE *f = fopen(list_path, "w");
if (!f) {
perror("fopen file_list");
return;
}
for (size_t i = 0; i < g_entry_count; ++i) {
fprintf(f, "%s\n", g_entries[i].path);
}
fclose(f);
}
static void load_file_list(const char *list_path) {
FILE *f = fopen(list_path, "r");
if (!f)
return;
char line[MAX_PATHLEN];
while (fgets(line, sizeof(line), f)) {
line[strcspn(line, "\r\n")] = 0;
FileEntry fe;
memset(&fe, 0, sizeof(fe));
fe.path = line;
/* Populate metadata from filesystem */
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
add_entry(&fe);
}
fclose(f);
}
// Read existing hashes into memory map for resume
// Simple linear search mapping: returns 1 if path has hash found (and writes
// into out_hex)
static int find_hash_in_file(const char *hashfile, const char *path,
char *out_hex) {
FILE *f = fopen(hashfile, "r");
if (!f)
return 0;
char p[MAX_PATHLEN];
char h[128];
int found = 0;
while (fscanf(f, "%4095s %127s", p, h) == 2) {
if (strcmp(p, path) == 0) {
strncpy(out_hex, h, HASH_STRLEN);
out_hex[HASH_STRLEN - 1] = 0;
found = 1;
break;
}
}
fclose(f);
return found;
}
// ----------------------------- Get file metadata -------------------------
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
struct stat st;
if (stat(path, &st) == 0) {
*out_created = (uint64_t)st.st_ctime;
*out_modified = (uint64_t)st.st_mtime;
} else {
*out_created = 0;
*out_modified = 0;
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
struct stat st;
if (stat(path, &st) == 0) {
get_file_owner(st.st_uid, out_owner, out_owner_size);
} else {
snprintf(out_owner, out_owner_size, "UNKNOWN");
}
}
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
int resume = 0;
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-resume") == 0) {
resume = 1;
} else {
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0 && !resume) {
printf("Enter folder to process (Enter = current folder): ");
fflush(stdout);
char buf[MAX_PATHLEN];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0)
strcpy(folders[0], ".");
else
strncpy(folders[0], buf, MAX_PATHLEN - 1);
folder_count = 1;
} else if (folder_count == 0 && resume) {
strcpy(folders[0], ".");
folder_count = 1;
}
// -------------------------------
// Display selected folders
// -------------------------------
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Detect hardware threads (CPU cores)
// -------------------------------
size_t hw_threads = 1;
long cpus = sysconf(_SC_NPROCESSORS_ONLN);
if (cpus > 0)
hw_threads = (size_t)cpus;
// Add some extra threads to overlap I/O more aggressively
size_t num_threads = hw_threads * 2;
if (num_threads < 2)
num_threads = 2;
// -------------------------------
// Step 1: Scan all folders
// -------------------------------
if (!resume) {
DirQueue q = {0};
pthread_mutex_init(&q.mutex, NULL);
pthread_cond_init(&q.cond, NULL);
// Seed queue
for (int i = 0; i < folder_count; ++i)
dirqueue_push(&q, folders[i]);
pthread_t *threads = malloc(sizeof(pthread_t) * num_threads);
for (size_t i = 0; i < num_threads; ++i)
pthread_create(&threads[i], NULL, (void *(*)(void *))scan_worker, &q);
for (size_t i = 0; i < num_threads; ++i)
pthread_join(threads[i], NULL);
free(threads);
pthread_mutex_destroy(&q.mutex);
pthread_cond_destroy(&q.cond);
printf("Found %zu files. Saving to %s\n", g_entry_count, FILE_LIST_TXT);
save_file_list(FILE_LIST_TXT);
} else {
if (!file_exists(FILE_LIST_TXT)) {
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
return 1;
}
load_file_list(FILE_LIST_TXT);
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
}
if (g_entry_count == 0) {
printf("No files to process.\n");
return 0;
}
// If resume: create map of which files are already hashed
char **existing_hash = calloc(g_entry_count, sizeof(char *));
for (size_t i = 0; i < g_entry_count; ++i)
existing_hash[i] = NULL;
if (resume && file_exists(FILE_HASHES_TXT)) {
// For simplicity we parse hash file and match lines to list entries.
for (size_t i = 0; i < g_entry_count; ++i) {
char hex[HASH_STRLEN] = {0};
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
existing_hash[i] = strdup(hex);
}
}
}
// Prepare job queue of only missing files (or all if not resume)
JobQueue queue;
jobqueue_init(&queue);
size_t total_jobs = 0;
for (size_t i = 0; i < g_entry_count; ++i) {
if (resume && existing_hash[i])
continue;
Job *j = (Job *)malloc(sizeof(Job));
j->file = &g_entries[i];
j->next = NULL;
jobqueue_push(&queue, j);
++total_jobs;
}
if (total_jobs == 0) {
printf("Nothing to do — all files already hashed.\n");
return 0;
}
// Remove old hashes file if we're recomputing from scratch.
if (!resume) {
// create/overwrite hashes file
FILE *hf = fopen(FILE_HASHES_TXT, "w");
if (hf)
fclose(hf);
} // if resume, we append only missing
// Starting thread pool
atomic_size_t done_counter;
atomic_store(&done_counter, 0);
atomic_int live_workers;
atomic_store(&live_workers, (int)num_threads);
WorkerArg warg = {.queue = &queue,
.done_counter = &done_counter,
.total_jobs = total_jobs,
.live_workers = &live_workers};
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
// Launch threads
pthread_t *tids = malloc(sizeof(pthread_t) * num_threads);
for (size_t i = 0; i < num_threads; ++i) {
pthread_create(&tids[i], NULL, worker_thread_posix, &warg);
}
// Progress / timer
struct timespec tstart, tnow;
clock_gettime(CLOCK_MONOTONIC, &tstart);
size_t last_done = 0;
// ---------- Correct real-time MB/s (stable & accurate) ----------
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
char linebuf[256];
for (;;) {
size_t done = (size_t)atomic_load(&done_counter);
// ---- monotonic time ----
clock_gettime(CLOCK_MONOTONIC, &tnow);
double now =
(tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9;
// ---- bytes so far ----
uint64_t bytes = atomic_load(&g_bytes_processed);
// ---- real sampler (independent of UI sleep) ----
if (last_time == 0.0) {
last_time = now;
last_bytes = bytes;
}
double dt = now - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = now;
}
// ---- progress bar build ----
const int barw = 40;
double pct = total_jobs ? (double)done / (double)total_jobs : 0.0;
int filled = (int)(pct * barw + 0.5);
int p = 0;
p += snprintf(linebuf + p, sizeof(linebuf) - p, "[");
for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, "#");
for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, ".");
snprintf(linebuf + p, sizeof(linebuf) - p,
"] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs,
displayed_speed);
printf("\r%s", linebuf);
fflush(stdout);
if (done >= total_jobs)
break;
usleep(100000);
}
printf("\n\n");
// stop queue and join threads
jobqueue_stop(&queue);
for (size_t i = 0; i < num_threads; ++i)
pthread_join(tids[i], NULL);
// done time
clock_gettime(CLOCK_MONOTONIC, &tnow);
double elapsed =
(tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9;
printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / elapsed;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
// If resume: we appended missing entries. If not resume: we wrote all results
// during workers. Note: This program appends hashes as workers finish. This
// avoids holding all hashes in RAM.
// Cleanup
for (size_t i = 0; i < g_entry_count; ++i)
if (existing_hash[i])
free(existing_hash[i]);
free(existing_hash);
free_entries();
return 0;
}

597
platform_windows.c Normal file
View File

@@ -0,0 +1,597 @@
#include "arena.h"
#include "platform.h"
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_files_found = 0;
static atomic_uint_fast64_t g_files_hashed = 0;
static atomic_uint_fast64_t g_bytes_processed = 0;
static atomic_int g_scan_done = 0;
// ============================= Utils ======================================
// ----------------------------- Normalize path --------------
static void normalize_path(char *p) {
char *src = p;
char *dst = p;
int prev_slash = 0;
while (*src) {
char c = *src++;
if (c == '\\' || c == '/') {
if (!prev_slash) {
*dst++ = '/';
prev_slash = 1;
}
} else {
*dst++ = c;
prev_slash = 0;
}
}
*dst = '\0';
}
// ----------------------------- Convert filetime to epoch --------------
static uint64_t filetime_to_epoch(const FILETIME *ft) {
ULARGE_INTEGER ull;
ull.LowPart = ft->dwLowDateTime;
ull.HighPart = ft->dwHighDateTime;
// Windows epoch (1601) → Unix epoch (1970)
return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL;
}
// ----------------------------- Format time helper -------------------------
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
#if PLATFORM_WINDOWS
localtime_s(&tm, &tt);
#else
localtime_r(&tt, &tm);
#endif
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// ----------------------------- Resolve file owner ---------------------
static void get_file_owner(const char *path, char *out, size_t out_sz) {
PSID sid = NULL;
PSECURITY_DESCRIPTOR sd = NULL;
if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION,
&sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) {
char name[64], domain[64];
DWORD name_len = sizeof(name);
DWORD domain_len = sizeof(domain);
SID_NAME_USE use;
if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len,
&use)) {
snprintf(out, out_sz, "%s\\%s", domain, name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
} else {
snprintf(out, out_sz, "UNKNOWN");
}
if (sd)
LocalFree(sd);
}
// ----------------------------- Get file metadata -------------------------
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
WIN32_FILE_ATTRIBUTE_DATA fad;
if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) {
*out_created = filetime_to_epoch(&fad.ftCreationTime);
*out_modified = filetime_to_epoch(&fad.ftLastWriteTime);
} else {
*out_created = 0;
*out_modified = 0;
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
get_file_owner(path, out_owner, out_owner_size);
}
// --------------- parallel directory scanning ----------------
// Add queue helper functions
static void dirqueue_push(DirQueue *q, const char *path) {
EnterCriticalSection(&q->cs);
if (q->count + 1 > q->cap) {
q->cap = q->cap ? q->cap * 2 : 1024;
q->items = realloc(q->items, q->cap * sizeof(char *));
}
q->items[q->count++] = _strdup(path);
WakeConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
static char *dirqueue_pop(DirQueue *q) {
EnterCriticalSection(&q->cs);
while (q->count == 0 && q->active > 0) {
SleepConditionVariableCS(&q->cv, &q->cs, INFINITE);
}
if (q->count == 0 && q->active == 0) {
LeaveCriticalSection(&q->cs);
return NULL; // truly done
}
char *dir = q->items[--q->count];
q->active++;
LeaveCriticalSection(&q->cs);
return dir;
}
static void dirqueue_done(DirQueue *q) {
EnterCriticalSection(&q->cs);
q->active--;
WakeAllConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
static DWORD WINAPI scan_worker(LPVOID arg) {
DirQueue *q = (DirQueue *)arg;
for (;;) {
char *dir = dirqueue_pop(q);
if (!dir)
break;
scan_folder_windows_parallel(dir, q);
free(dir);
dirqueue_done(q);
}
return 0;
}
// Scanning directory function
void scan_folder_windows_parallel(const char *base, DirQueue *q) {
char search[MAX_PATHLEN];
snprintf(search, sizeof(search), "%s\\*", base);
WIN32_FIND_DATAA fd;
HANDLE h = FindFirstFileA(search, &fd);
if (h == INVALID_HANDLE_VALUE)
return;
do {
if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, ".."))
continue;
char full[MAX_PATHLEN];
snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName);
if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
continue;
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
dirqueue_push(q, full);
} else {
atomic_fetch_add(&g_files_found, 1);
FileEntry *fe = malloc(sizeof(FileEntry));
memset(fe, 0, sizeof(FileEntry));
char norm[MAX_PATHLEN];
strncpy(norm, full, sizeof(norm) - 1);
norm[sizeof(norm) - 1] = 0;
normalize_path(norm);
fe->path = _strdup(norm);
platform_get_file_times(full, &fe->created_time, &fe->modified_time);
platform_get_file_owner(full, fe->owner, sizeof(fe->owner));
fe->size_bytes = ((uint64_t)fd.nFileSizeHigh << 32) | fd.nFileSizeLow;
mpmc_push(&g_file_queue, fe);
}
} while (FindNextFileA(h, &fd));
FindClose(h);
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex, BYTE *buf) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper.
// On Windows try to use overlapped synchronous chunked reads for higher
// throughput.
HANDLE hFile =
CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
strcpy(out_hex, "ERROR");
return;
}
XXH128_hash_t h;
XXH3_state_t state;
XXH3_128bits_reset(&state);
DWORD read = 0;
BOOL ok;
while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) {
XXH3_128bits_update(&state, buf, (size_t)read);
atomic_fetch_add(&g_bytes_processed, (uint64_t)read);
}
h = XXH3_128bits_digest(&state);
CloseHandle(hFile);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ------------------------- Hash worker --------------------------------
static DWORD WINAPI hash_worker(LPVOID arg) {
WorkerContext *ctx = (WorkerContext *)arg;
MPMCQueue *q = ctx->queue;
mem_arena *local_arena = ctx->arena;
BYTE *buf = (BYTE *)malloc(READ_BLOCK);
for (;;) {
FileEntry *fe = mpmc_pop(q);
if (!fe)
break;
char hash[HASH_STRLEN];
xxh3_hash_file_stream(fe->path, hash, buf);
char created[32], modified[32];
format_time(fe->created_time, created, sizeof(created));
format_time(fe->modified_time, modified, sizeof(modified));
double size_kib = (double)fe->size_bytes / 1024.0;
char stack_buf[1024];
int len =
snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n",
hash, fe->path, size_kib, created, modified, fe->owner);
char *dst = arena_push(&local_arena, len, false);
memcpy(dst, stack_buf, len);
atomic_fetch_add(&g_files_hashed, 1);
free(fe->path);
free(fe);
}
free(buf);
return 0;
}
// ----------------------------- Progress display ---------------------------
DWORD WINAPI progress_thread(void *arg) {
LARGE_INTEGER freq, start;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&start);
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
for (;;) {
uint64_t found = atomic_load(&g_files_found);
uint64_t hashed = atomic_load(&g_files_hashed);
uint64_t bytes = atomic_load(&g_bytes_processed);
int scan_done = atomic_load(&g_scan_done);
LARGE_INTEGER now;
QueryPerformanceCounter(&now);
double t = (double)(now.QuadPart - start.QuadPart) / (double)freq.QuadPart;
if (last_time == 0.0) {
last_time = t;
last_bytes = bytes;
}
double dt = t - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = t;
}
if (!scan_done) {
printf("\rScanning: %llu files | Hashed: %llu | %.2f MB/s ",
(unsigned long long)found, (unsigned long long)hashed,
displayed_speed);
} else {
double pct = found ? (double)hashed / (double)found : 0.0;
int barw = 40;
int filled = (int)(pct * barw);
char bar[64];
int p = 0;
bar[p++] = '[';
for (int i = 0; i < filled; i++)
bar[p++] = '#';
for (int i = filled; i < barw; i++)
bar[p++] = '.';
bar[p++] = ']';
bar[p] = 0;
printf("\r%s %6.2f%% (%llu / %llu) %.2f MB/s ", bar, pct * 100.0,
(unsigned long long)hashed, (unsigned long long)found,
displayed_speed);
}
fflush(stdout);
if (scan_done && hashed == found)
break;
Sleep(100);
}
printf("\n");
return 0;
}
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
// -------------------------------
// Scanning and total timer init
// -------------------------------
timer_init();
HiResTimer total_timer;
HiResTimer scan_timer;
timer_start(&total_timer);
timer_start(&scan_timer);
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0) {
printf("Enter folder to process (Enter = current folder): ");
fflush(stdout);
char buf[MAX_PATHLEN];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0)
strcpy(folders[0], ".");
else
strncpy(folders[0], buf, MAX_PATHLEN - 1);
folder_count = 1;
}
// -------------------------------
// Display selected folders
// -------------------------------
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Creating a general purpose arena
// -------------------------------
arena_params params = {
.reserve_size = GiB(1),
.commit_size = MiB(16),
.align = 0,
.push_size = 0,
.allow_free_list = true,
.allow_swapback = false,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 1,
};
mem_arena *gp_arena = arena_create(&params);
// -------------------------------
// Detect hardware threads (CPU cores)
// -------------------------------
size_t hw_threads = 1;
// --- Windows: detect PHYSICAL cores (not logical threads) ---
DWORD len = 0;
GetLogicalProcessorInformation(NULL, &len);
SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)arena_push(&gp_arena, len, true);
if (GetLogicalProcessorInformation(buf, &len)) {
DWORD count = 0;
DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
for (DWORD i = 0; i < n; i++) {
if (buf[i].Relationship == RelationProcessorCore)
count++;
}
if (count > 0)
hw_threads = count;
}
arena_free(&gp_arena, (u8 **)&buf, len);
// Add some extra threads to overlap I/O more aggressively
size_t num_threads = hw_threads * 2;
if (num_threads < 2)
num_threads = 2;
// -------------------------------
// Step 1: Scan all folders
// -------------------------------
mpmc_init(&g_file_queue, MiB(1));
DirQueue q;
memset(&q, 0, sizeof(q));
InitializeCriticalSection(&q.cs);
InitializeConditionVariable(&q.cv);
q.active = 0;
// starting hash threads
WorkerContext workers[num_threads];
for (int i = 0; i < num_threads; i++) {
workers[i].queue = &g_file_queue;
workers[i].arena = arena_create(&params);
}
HANDLE *hash_threads =
arena_push(&gp_arena, sizeof(HANDLE) * num_threads, true);
for (size_t i = 0; i < num_threads; ++i) {
hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL);
}
// starting scan threads
HANDLE progress = CreateThread(NULL, 0, progress_thread, NULL, 0, NULL);
for (int i = 0; i < folder_count; ++i) {
dirqueue_push(&q, folders[i]);
}
size_t scan_threads = hw_threads;
if (scan_threads < 2)
scan_threads = 2;
HANDLE *scan_tids =
arena_push(&gp_arena, sizeof(HANDLE) * scan_threads, true);
for (size_t i = 0; i < scan_threads; ++i) {
scan_tids[i] =
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker, &q, 0, NULL);
}
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
for (size_t i = 0; i < num_threads; i++) {
mpmc_push(&g_file_queue, NULL);
}
atomic_store(&g_scan_done, 1);
for (size_t i = 0; i < scan_threads; ++i)
CloseHandle(scan_tids[i]);
arena_free(&gp_arena, (u8 **)&scan_tids, sizeof(HANDLE) * scan_threads);
double scan_seconds = timer_stop(&scan_timer);
size_t total_found = atomic_load(&g_files_found);
printf("\r%*s\r", 120, ""); // clear_console_line
printf("Completed scanning in %.2f seconds, found %zu files\n\n",
scan_seconds, total_found);
// if no files found
if (total_found == 0) {
printf("No files found.\n");
return 0;
}
// stop hashing threads
WaitForMultipleObjects((DWORD)num_threads, hash_threads, TRUE, INFINITE);
for (size_t i = 0; i < num_threads; ++i)
CloseHandle(hash_threads[i]);
arena_free(&gp_arena, (u8 **)&hash_threads, sizeof(HANDLE) * num_threads);
WaitForSingleObject(progress, INFINITE);
CloseHandle(progress);
// write file_hashes.txt
// FILE *f = fopen(FILE_HASHES_TXT, "wb");
//
// for (int i = 0; i < num_threads; i++) {
// mem_arena *arena = workers[i].arena;
//
// u8 *arena_base =
// (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
// fwrite(arena_base, 1, arena->pos, f);
// }
//
// fclose(f);
HANDLE h = CreateFileA(FILE_HASHES_TXT, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, NULL);
for (int i = 0; i < num_threads; i++) {
mem_arena *local_hash_arena = workers[i].arena;
DWORD written;
u8 *arena_base = (u8 *)local_hash_arena +
ALIGN_UP_POW2(sizeof(mem_arena), local_hash_arena->align);
WriteFile(h, arena_base, (DWORD)local_hash_arena->pos, &written, NULL);
}
// done time
double total_seconds = timer_stop(&total_timer);
printf("Completed hashing %zu files\n", total_found);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / total_seconds;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n\n", total_seconds);
return 0;
}

View File

@@ -1,821 +0,0 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2020-2021 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*!
* @file xxh_x86dispatch.c
*
* Automatic dispatcher code for the @ref XXH3_family on x86-based targets.
*
* Optional add-on.
*
* **Compile this file with the default flags for your target.**
* Note that compiling with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`
* will make the resulting binary incompatible with cpus not supporting the requested instruction set.
*
* @defgroup dispatch x86 Dispatcher
* @{
*/
#if defined (__cplusplus)
extern "C" {
#endif
#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64))
# error "Dispatching is currently only supported on x86 and x86_64."
#endif
/*! @cond Doxygen ignores this part */
#ifndef XXH_HAS_INCLUDE
# ifdef __has_include
/*
* Not defined as XXH_HAS_INCLUDE(x) (function-like) because
* this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
*/
# define XXH_HAS_INCLUDE __has_include
# else
# define XXH_HAS_INCLUDE(x) 0
# endif
#endif
/*! @endcond */
/*!
* @def XXH_DISPATCH_SCALAR
* @brief Enables/dispatching the scalar code path.
*
* If this is defined to 0, SSE2 support is assumed. This reduces code size
* when the scalar path is not needed.
*
* This is automatically defined to 0 when...
* - SSE2 support is enabled in the compiler
* - Targeting x86_64
* - Targeting Android x86
* - Targeting macOS
*/
#ifndef XXH_DISPATCH_SCALAR
# if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \
|| defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \
|| defined(__ANDROID__) || defined(__APPLE__) /* Android or macOS */
# define XXH_DISPATCH_SCALAR 0 /* disable */
# else
# define XXH_DISPATCH_SCALAR 1
# endif
#endif
/*!
* @def XXH_DISPATCH_AVX2
* @brief Enables/disables dispatching for AVX2.
*
* This is automatically detected if it is not defined.
* - GCC 4.7 and later are known to support AVX2, but >4.9 is required for
* to get the AVX2 intrinsics and typedefs without -mavx -mavx2.
* - Visual Studio 2013 Update 2 and later are known to support AVX2.
* - The GCC/Clang internal header `<avx2intrin.h>` is detected. While this is
* not allowed to be included directly, it still appears in the builtin
* include path and is detectable with `__has_include`.
*
* @see XXH_AVX2
*/
#ifndef XXH_DISPATCH_AVX2
# if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \
|| (defined(_MSC_VER) && _MSC_VER >= 1900) /* VS 2015+ */ \
|| (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501) /* VS 2013 Update 2 */ \
|| XXH_HAS_INCLUDE(<avx2intrin.h>) /* GCC/Clang internal header */
# define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */
# else
# define XXH_DISPATCH_AVX2 0
# endif
#endif /* XXH_DISPATCH_AVX2 */
/*!
* @def XXH_DISPATCH_AVX512
* @brief Enables/disables dispatching for AVX512.
*
* Automatically detected if one of the following conditions is met:
* - GCC 4.9 and later are known to support AVX512.
* - Visual Studio 2017 and later are known to support AVX2.
* - The GCC/Clang internal header `<avx512fintrin.h>` is detected. While this
* is not allowed to be included directly, it still appears in the builtin
* include path and is detectable with `__has_include`.
*
* @see XXH_AVX512
*/
#ifndef XXH_DISPATCH_AVX512
# if (defined(__GNUC__) \
&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \
|| (defined(_MSC_VER) && _MSC_VER >= 1910) /* VS 2017+ */ \
|| XXH_HAS_INCLUDE(<avx512fintrin.h>) /* GCC/Clang internal header */
# define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */
# else
# define XXH_DISPATCH_AVX512 0
# endif
#endif /* XXH_DISPATCH_AVX512 */
/*!
* @def XXH_TARGET_SSE2
* @brief Allows a function to be compiled with SSE2 intrinsics.
*
* Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used
* even with `-mno-sse2`.
*
* @def XXH_TARGET_AVX2
* @brief Like @ref XXH_TARGET_SSE2, but for AVX2.
*
* @def XXH_TARGET_AVX512
* @brief Like @ref XXH_TARGET_SSE2, but for AVX512.
*
*/
#if defined(__GNUC__)
# include <emmintrin.h> /* SSE2 */
# if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
# include <immintrin.h> /* AVX2, AVX512F */
# endif
# define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
# define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
# define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
#elif defined(__clang__) && defined(_MSC_VER) /* clang-cl.exe */
# include <emmintrin.h> /* SSE2 */
# if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
# include <immintrin.h> /* AVX2, AVX512F */
# include <smmintrin.h>
# include <avxintrin.h>
# include <avx2intrin.h>
# include <avx512fintrin.h>
# endif
# define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
# define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
# define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
#elif defined(_MSC_VER)
# include <intrin.h>
# define XXH_TARGET_SSE2
# define XXH_TARGET_AVX2
# define XXH_TARGET_AVX512
#else
# error "Dispatching is currently not supported for your compiler."
#endif
/*! @cond Doxygen ignores this part */
#ifdef XXH_DISPATCH_DEBUG
/* debug logging */
# include <stdio.h>
# define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
#else
# define XXH_debugPrint(str) ((void)0)
# undef NDEBUG /* avoid redefinition */
# define NDEBUG
#endif
/*! @endcond */
#include <assert.h>
#ifndef XXH_DOXYGEN
#define XXH_INLINE_ALL
#define XXH_X86DISPATCH
#include "xxhash.h"
#endif
/*! @cond Doxygen ignores this part */
#ifndef XXH_HAS_ATTRIBUTE
# ifdef __has_attribute
# define XXH_HAS_ATTRIBUTE(...) __has_attribute(__VA_ARGS__)
# else
# define XXH_HAS_ATTRIBUTE(...) 0
# endif
#endif
/*! @endcond */
/*! @cond Doxygen ignores this part */
#if XXH_HAS_ATTRIBUTE(constructor)
# define XXH_CONSTRUCTOR __attribute__((constructor))
# define XXH_DISPATCH_MAYBE_NULL 0
#else
# define XXH_CONSTRUCTOR
# define XXH_DISPATCH_MAYBE_NULL 1
#endif
/*! @endcond */
/*! @cond Doxygen ignores this part */
/*
* Support both AT&T and Intel dialects
*
* GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
* compiled with -masm=intel. Instead, it supports dialect switching with
* curly braces: { AT&T syntax | Intel syntax }
*
* Clang's integrated assembler automatically converts AT&T syntax to Intel if
* needed, making the dialect switching useless (it isn't even supported).
*
* Note: Comments are written in the inline assembly itself.
*/
#ifdef __clang__
# define XXH_I_ATT(intel, att) att "\n\t"
#else
# define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t"
#endif
/*! @endcond */
/*!
* @private
* @brief Runs CPUID.
*
* @param eax , ecx The parameters to pass to CPUID, %eax and %ecx respectively.
* @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }`
*/
static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
{
#if defined(_MSC_VER)
__cpuidex((int*)abcd, eax, ecx);
#else
xxh_u32 ebx, edx;
# if defined(__i386__) && defined(__PIC__)
__asm__(
"# Call CPUID\n\t"
"#\n\t"
"# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
"# EBX, so we use EDI instead.\n\t"
XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi")
XXH_I_ATT("cpuid", "cpuid" )
XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi")
: "=D" (ebx),
# else
__asm__(
"# Call CPUID\n\t"
XXH_I_ATT("cpuid", "cpuid")
: "=b" (ebx),
# endif
"+a" (eax), "+c" (ecx), "=d" (edx));
abcd[0] = eax;
abcd[1] = ebx;
abcd[2] = ecx;
abcd[3] = edx;
#endif
}
/*
* Modified version of Intel's guide
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
*/
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
/*!
* @private
* @brief Runs `XGETBV`.
*
* While the CPU may support AVX2, the operating system might not properly save
* the full YMM/ZMM registers.
*
* xgetbv is used for detecting this: Any compliant operating system will define
* a set of flags in the xcr0 register indicating how it saves the AVX registers.
*
* You can manually disable this flag on Windows by running, as admin:
*
* bcdedit.exe /set xsavedisable 1
*
* and rebooting. Run the same command with 0 to re-enable it.
*/
static xxh_u64 XXH_xgetbv(void)
{
#if defined(_MSC_VER)
return _xgetbv(0); /* min VS2010 SP1 compiler is required */
#else
xxh_u32 xcr0_lo, xcr0_hi;
__asm__(
"# Call XGETBV\n\t"
"#\n\t"
"# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
"# the XGETBV opcode, so we encode it by hand instead.\n\t"
"# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
".byte 0x0f, 0x01, 0xd0\n\t"
: "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
#endif
}
#endif
/*! @cond Doxygen ignores this part */
#define XXH_SSE2_CPUID_MASK (1 << 26)
#define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
#define XXH_AVX2_CPUID_MASK (1 << 5)
#define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
#define XXH_AVX512F_CPUID_MASK (1 << 16)
#define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
/*! @endcond */
/*!
* @private
* @brief Returns the best XXH3 implementation.
*
* Runs various CPUID/XGETBV tests to try and determine the best implementation.
*
* @return The best @ref XXH_VECTOR implementation.
* @see XXH_VECTOR_TYPES
*/
int XXH_featureTest(void)
{
xxh_u32 abcd[4];
xxh_u32 max_leaves;
int best = XXH_SCALAR;
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
xxh_u64 xgetbv_val;
#endif
#if defined(__GNUC__) && defined(__i386__)
xxh_u32 cpuid_supported;
__asm__(
"# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
"# is supported in the EFLAGS on i386.\n\t"
"# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
"# The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
"# for the CPUID instruction. If a software procedure can set and\n\t"
"# clear this flag, the processor executing the procedure supports\n\t"
"# the CPUID instruction.\n\t"
"# <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
"#\n\t"
"# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
"# Save EFLAGS\n\t"
XXH_I_ATT("pushfd", "pushfl" )
"# Store EFLAGS\n\t"
XXH_I_ATT("pushfd", "pushfl" )
"# Invert the ID bit in stored EFLAGS\n\t"
XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)")
"# Load stored EFLAGS (with ID bit inverted)\n\t"
XXH_I_ATT("popfd", "popfl" )
"# Store EFLAGS again (ID bit may or not be inverted)\n\t"
XXH_I_ATT("pushfd", "pushfl" )
"# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
XXH_I_ATT("pop eax", "popl %%eax" )
"# eax = whichever bits were changed\n\t"
XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" )
"# Restore original EFLAGS\n\t"
XXH_I_ATT("popfd", "popfl" )
"# eax = zero if ID bit can't be changed, else non-zero\n\t"
XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" )
: "=a" (cpuid_supported) :: "cc");
if (XXH_unlikely(!cpuid_supported)) {
XXH_debugPrint("CPUID support is not detected!");
return best;
}
#endif
/* Check how many CPUID pages we have */
XXH_cpuid(0, 0, abcd);
max_leaves = abcd[0];
/* Shouldn't happen on hardware, but happens on some QEMU configs. */
if (XXH_unlikely(max_leaves == 0)) {
XXH_debugPrint("Max CPUID leaves == 0!");
return best;
}
/* Check for SSE2, OSXSAVE and xgetbv */
XXH_cpuid(1, 0, abcd);
/*
* Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
*/
if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK))
return best;
XXH_debugPrint("SSE2 support detected.");
best = XXH_SSE2;
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
/* Make sure we have enough leaves */
if (XXH_unlikely(max_leaves < 7))
return best;
/* Test for OSXSAVE and XGETBV */
if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK)
return best;
/* CPUID check for AVX features */
XXH_cpuid(7, 0, abcd);
xgetbv_val = XXH_xgetbv();
#if XXH_DISPATCH_AVX2
/* Validate that AVX2 is supported by the CPU */
if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK)
return best;
/* Validate that the OS supports YMM registers */
if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) {
XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
return best;
}
/* AVX2 supported */
XXH_debugPrint("AVX2 support detected.");
best = XXH_AVX2;
#endif
#if XXH_DISPATCH_AVX512
/* Check if AVX512F is supported by the CPU */
if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) {
XXH_debugPrint("AVX512F not supported by CPU");
return best;
}
/* Validate that the OS supports ZMM registers */
if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) {
XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
return best;
}
/* AVX512F supported */
XXH_debugPrint("AVX512F support detected.");
best = XXH_AVX512;
#endif
#endif
return best;
}
/* === Vector implementations === */
/*! @cond PRIVATE */
/*!
* @private
* @brief Defines the various dispatch functions.
*
* TODO: Consolidate?
*
* @param suffix The suffix for the functions, e.g. sse2 or scalar
* @param target XXH_TARGET_* or empty.
*/
#define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \
\
/* === XXH3, default variants === */ \
\
XXH_NO_INLINE target XXH64_hash_t \
XXHL64_default_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
size_t len) \
{ \
return XXH3_hashLong_64b_internal( \
input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \
); \
} \
\
/* === XXH3, Seeded variants === */ \
\
XXH_NO_INLINE target XXH64_hash_t \
XXHL64_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len, \
XXH64_hash_t seed) \
{ \
return XXH3_hashLong_64b_withSeed_internal( \
input, len, seed, XXH3_accumulate_##suffix, \
XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \
); \
} \
\
/* === XXH3, Secret variants === */ \
\
XXH_NO_INLINE target XXH64_hash_t \
XXHL64_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
size_t len, XXH_NOESCAPE const void* secret, \
size_t secretLen) \
{ \
return XXH3_hashLong_64b_internal( \
input, len, secret, secretLen, \
XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \
); \
} \
\
/* === XXH3 update variants === */ \
\
XXH_NO_INLINE target XXH_errorcode \
XXH3_update_##suffix(XXH_NOESCAPE XXH3_state_t* state, \
XXH_NOESCAPE const void* input, size_t len) \
{ \
return XXH3_update(state, (const xxh_u8*)input, len, \
XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix); \
} \
\
/* === XXH128 default variants === */ \
\
XXH_NO_INLINE target XXH128_hash_t \
XXHL128_default_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
size_t len) \
{ \
return XXH3_hashLong_128b_internal( \
input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \
); \
} \
\
/* === XXH128 Secret variants === */ \
\
XXH_NO_INLINE target XXH128_hash_t \
XXHL128_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
size_t len, \
XXH_NOESCAPE const void* XXH_RESTRICT secret, \
size_t secretLen) \
{ \
return XXH3_hashLong_128b_internal( \
input, len, (const xxh_u8*)secret, secretLen, \
XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix); \
} \
\
/* === XXH128 Seeded variants === */ \
\
XXH_NO_INLINE target XXH128_hash_t \
XXHL128_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len,\
XXH64_hash_t seed) \
{ \
return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \
XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix, \
XXH3_initCustomSecret_##suffix); \
}
/*! @endcond */
/* End XXH_DEFINE_DISPATCH_FUNCS */
/*! @cond Doxygen ignores this part */
#if XXH_DISPATCH_SCALAR
XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */)
#endif
XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2)
#if XXH_DISPATCH_AVX2
XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2)
#endif
#if XXH_DISPATCH_AVX512
XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512)
#endif
#undef XXH_DEFINE_DISPATCH_FUNCS
/*! @endcond */
/* ==== Dispatchers ==== */
/*! @cond Doxygen ignores this part */
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t);
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH_NOESCAPE XXH3_state_t*, XXH_NOESCAPE const void*, size_t);
typedef struct {
XXH3_dispatchx86_hashLong64_default hashLong64_default;
XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed;
XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
XXH3_dispatchx86_update update;
} XXH_dispatchFunctions_s;
#define XXH_NB_DISPATCHES 4
/*! @endcond */
/*!
* @private
* @brief Table of dispatchers for @ref XXH3_64bits().
*
* @pre The indices must match @ref XXH_VECTOR_TYPE.
*/
static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = {
#if XXH_DISPATCH_SCALAR
/* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar },
#else
/* Scalar */ { NULL, NULL, NULL, NULL },
#endif
/* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 },
#if XXH_DISPATCH_AVX2
/* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 },
#else
/* AVX2 */ { NULL, NULL, NULL, NULL },
#endif
#if XXH_DISPATCH_AVX512
/* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 }
#else
/* AVX512 */ { NULL, NULL, NULL, NULL }
#endif
};
/*!
* @private
* @brief The selected dispatch table for @ref XXH3_64bits().
*/
static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL };
/*! @cond Doxygen ignores this part */
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t);
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
typedef struct {
XXH3_dispatchx86_hashLong128_default hashLong128_default;
XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed;
XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
XXH3_dispatchx86_update update;
} XXH_dispatch128Functions_s;
/*! @endcond */
/*!
* @private
* @brief Table of dispatchers for @ref XXH3_128bits().
*
* @pre The indices must match @ref XXH_VECTOR_TYPE.
*/
static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = {
#if XXH_DISPATCH_SCALAR
/* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar },
#else
/* Scalar */ { NULL, NULL, NULL, NULL },
#endif
/* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 },
#if XXH_DISPATCH_AVX2
/* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 },
#else
/* AVX2 */ { NULL, NULL, NULL, NULL },
#endif
#if XXH_DISPATCH_AVX512
/* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 }
#else
/* AVX512 */ { NULL, NULL, NULL, NULL }
#endif
};
/*!
* @private
* @brief The selected dispatch table for @ref XXH3_64bits().
*/
static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL };
/*!
* @private
* @brief Runs a CPUID check and sets the correct dispatch tables.
*/
static XXH_CONSTRUCTOR void XXH_setDispatch(void)
{
int vecID = XXH_featureTest();
XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1);
assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
#if !XXH_DISPATCH_SCALAR
assert(vecID != XXH_SCALAR);
#endif
#if !XXH_DISPATCH_AVX512
assert(vecID != XXH_AVX512);
#endif
#if !XXH_DISPATCH_AVX2
assert(vecID != XXH_AVX2);
#endif
XXH_g_dispatch = XXH_kDispatch[vecID];
XXH_g_dispatch128 = XXH_kDispatch128[vecID];
}
/* ==== XXH3 public functions ==== */
/*! @cond Doxygen ignores this part */
static XXH64_hash_t
XXH3_hashLong_64b_defaultSecret_selection(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
{
(void)seed64; (void)secret; (void)secretLen;
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_default == NULL)
XXH_setDispatch();
return XXH_g_dispatch.hashLong64_default(input, len);
}
XXH64_hash_t XXH3_64bits_dispatch(XXH_NOESCAPE const void* input, size_t len)
{
return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
}
static XXH64_hash_t
XXH3_hashLong_64b_withSeed_selection(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
{
(void)secret; (void)secretLen;
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_seed == NULL)
XXH_setDispatch();
return XXH_g_dispatch.hashLong64_seed(input, len, seed64);
}
XXH64_hash_t XXH3_64bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
{
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
}
static XXH64_hash_t
XXH3_hashLong_64b_withSecret_selection(const void* XXH_RESTRICT input, size_t len,
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
{
(void)seed64;
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_secret == NULL)
XXH_setDispatch();
return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen);
}
XXH64_hash_t XXH3_64bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen)
{
return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
}
XXH_errorcode
XXH3_64bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
{
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.update == NULL)
XXH_setDispatch();
return XXH_g_dispatch.update(state, (const xxh_u8*)input, len);
}
/*! @endcond */
/* ==== XXH128 public functions ==== */
/*! @cond Doxygen ignores this part */
static XXH128_hash_t
XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len,
XXH64_hash_t seed64, const void* secret, size_t secretLen)
{
(void)seed64; (void)secret; (void)secretLen;
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_default == NULL)
XXH_setDispatch();
return XXH_g_dispatch128.hashLong128_default(input, len);
}
XXH128_hash_t XXH3_128bits_dispatch(XXH_NOESCAPE const void* input, size_t len)
{
return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
}
static XXH128_hash_t
XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len,
XXH64_hash_t seed64, const void* secret, size_t secretLen)
{
(void)secret; (void)secretLen;
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_seed == NULL)
XXH_setDispatch();
return XXH_g_dispatch128.hashLong128_seed(input, len, seed64);
}
XXH128_hash_t XXH3_128bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
{
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
}
static XXH128_hash_t
XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len,
XXH64_hash_t seed64, const void* secret, size_t secretLen)
{
(void)seed64;
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_secret == NULL)
XXH_setDispatch();
return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
}
XXH128_hash_t XXH3_128bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen)
{
return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
}
XXH_errorcode
XXH3_128bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
{
if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.update == NULL)
XXH_setDispatch();
return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len);
}
/*! @endcond */
#if defined (__cplusplus)
}
#endif
/*! @} */

View File

@@ -1,93 +0,0 @@
/*
* xxHash - XXH3 Dispatcher for x86-based targets
* Copyright (C) 2020-2024 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
#ifndef XXH_X86DISPATCH_H_13563687684
#define XXH_X86DISPATCH_H_13563687684
#include "xxhash.h" /* XXH64_hash_t, XXH3_state_t */
#if defined (__cplusplus)
extern "C" {
#endif
/*!
* @brief Returns the best XXH3 implementation for x86
*
* @return The best @ref XXH_VECTOR implementation.
* @see XXH_VECTOR_TYPES
*/
XXH_PUBLIC_API int XXH_featureTest(void);
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_dispatch(XXH_NOESCAPE const void* input, size_t len);
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed);
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen);
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len);
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_dispatch(XXH_NOESCAPE const void* input, size_t len);
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed);
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen);
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len);
#if defined (__cplusplus)
}
#endif
/* automatic replacement of XXH3 functions.
* can be disabled by setting XXH_DISPATCH_DISABLE_REPLACE */
#ifndef XXH_DISPATCH_DISABLE_REPLACE
# undef XXH3_64bits
# define XXH3_64bits XXH3_64bits_dispatch
# undef XXH3_64bits_withSeed
# define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch
# undef XXH3_64bits_withSecret
# define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch
# undef XXH3_64bits_update
# define XXH3_64bits_update XXH3_64bits_update_dispatch
# undef XXH128
# define XXH128 XXH3_128bits_withSeed_dispatch
# undef XXH3_128bits
# define XXH3_128bits XXH3_128bits_dispatch
# undef XXH3_128bits_withSeed
# define XXH3_128bits_withSeed XXH3_128bits_withSeed_dispatch
# undef XXH3_128bits_withSecret
# define XXH3_128bits_withSecret XXH3_128bits_withSecret_dispatch
# undef XXH3_128bits_update
# define XXH3_128bits_update XXH3_128bits_update_dispatch
#endif /* XXH_DISPATCH_DISABLE_REPLACE */
#endif /* XXH_X86DISPATCH_H_13563687684 */