forked from amir/filehasher
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2a7bed5036 | |||
| 4fac135dce | |||
| 16c6aeae65 | |||
| 7d2a24d0be | |||
| b8104b0fc7 | |||
| 759fdfda1e | |||
| 73aa4808f2 | |||
| fb83c3114f | |||
| 5cb47a17a2 | |||
| 0faf2bc792 | |||
| b4487cd3a6 | |||
| 3393129c5f | |||
| ab31776658 | |||
| b8e577b5bb | |||
| 0294498538 | |||
| 41ac164881 | |||
| d4ba121b56 |
8
.gitignore
vendored
8
.gitignore
vendored
@@ -3,5 +3,13 @@ file_hasher.ilk
|
||||
file_hasher.rdi
|
||||
file_hasher.exe
|
||||
file_hashes.txt
|
||||
/Binaries
|
||||
file_list.txt
|
||||
temp_code.c
|
||||
/.cache
|
||||
/file_hasher
|
||||
/io_uring_test
|
||||
/file_hasher
|
||||
/io_uring_test
|
||||
/compile_commands.json
|
||||
/build
|
||||
|
||||
284
CMakeLists.txt
Normal file
284
CMakeLists.txt
Normal file
@@ -0,0 +1,284 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(filehasher
|
||||
VERSION 1.0.0
|
||||
DESCRIPTION "High-performance file hasher with I/O Ring/io_uring support"
|
||||
LANGUAGES C
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Force compiler search order
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# On Windows, prefer clang-cl, then GCC, then Clang
|
||||
if(WIN32)
|
||||
# Try to force compiler order if not already set
|
||||
if(NOT CMAKE_C_COMPILER)
|
||||
# Search in preferred order
|
||||
find_program(CLANG_CL_COMPILER NAMES clang-cl)
|
||||
find_program(GCC_COMPILER NAMES gcc)
|
||||
find_program(CLANG_COMPILER NAMES clang)
|
||||
|
||||
if(CLANG_CL_COMPILER)
|
||||
message(STATUS "Found clang-cl as preferred compiler: ${CLANG_CL_COMPILER}")
|
||||
set(CMAKE_C_COMPILER "${CLANG_CL_COMPILER}" CACHE STRING "" FORCE)
|
||||
elseif(GCC_COMPILER)
|
||||
message(STATUS "Found GCC as fallback compiler: ${GCC_COMPILER}")
|
||||
set(CMAKE_C_COMPILER "${GCC_COMPILER}" CACHE STRING "" FORCE)
|
||||
elseif(CLANG_COMPILER)
|
||||
message(STATUS "Found Clang as last-resort compiler: ${CLANG_COMPILER}")
|
||||
set(CMAKE_C_COMPILER "${CLANG_COMPILER}" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
# On Linux, prefer GCC, then Clang
|
||||
if(NOT CMAKE_C_COMPILER)
|
||||
find_program(GCC_COMPILER NAMES gcc)
|
||||
find_program(CLANG_COMPILER NAMES clang)
|
||||
|
||||
if(GCC_COMPILER)
|
||||
message(STATUS "Found GCC as preferred compiler: ${GCC_COMPILER}")
|
||||
set(CMAKE_C_COMPILER "${GCC_COMPILER}" CACHE STRING "" FORCE)
|
||||
elseif(CLANG_COMPILER)
|
||||
message(STATUS "Found Clang as fallback compiler: ${CLANG_COMPILER}")
|
||||
set(CMAKE_C_COMPILER "${CLANG_COMPILER}" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Now project() will use the compiler we found
|
||||
# However, since we needed project() first to get C support,
|
||||
# we check what we actually got
|
||||
message(STATUS "Using compiler: ${CMAKE_C_COMPILER} (${CMAKE_C_COMPILER_ID})")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Platform and Compiler Detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if(WIN32)
|
||||
set(PLATFORM_WINDOWS TRUE)
|
||||
set(PLATFORM_NAME "Windows")
|
||||
else()
|
||||
set(PLATFORM_LINUX TRUE)
|
||||
set(PLATFORM_NAME "Linux")
|
||||
endif()
|
||||
|
||||
# Compiler type
|
||||
if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||
# Check if it's clang-cl
|
||||
if(CMAKE_C_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
|
||||
set(COMPILER_CLANG_CL TRUE)
|
||||
message(STATUS "Detected clang-cl (MSVC-compatible frontend)")
|
||||
else()
|
||||
set(COMPILER_CLANG_GNU TRUE)
|
||||
message(STATUS "Detected Clang (GNU-compatible frontend)")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
||||
set(COMPILER_GCC TRUE)
|
||||
message(STATUS "Detected GCC")
|
||||
elseif(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
||||
# We don't want MSVC, but if it's found, warn user
|
||||
message(FATAL_ERROR
|
||||
"MSVC (cl.exe) detected!\n"
|
||||
"This project requires clang-cl, GCC, or Clang.\n"
|
||||
"Please install one of these compilers or specify manually:\n"
|
||||
" cmake .. -DCMAKE_C_COMPILER=clang-cl\n"
|
||||
" cmake .. -DCMAKE_C_COMPILER=gcc\n"
|
||||
" cmake .. -DCMAKE_C_COMPILER=clang\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build System Selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if(NOT CMAKE_GENERATOR OR CMAKE_GENERATOR STREQUAL "")
|
||||
find_program(NINJA_EXECUTABLE NAMES ninja)
|
||||
if(NINJA_EXECUTABLE)
|
||||
message(STATUS "Using Ninja build system")
|
||||
set(CMAKE_GENERATOR "Ninja")
|
||||
else()
|
||||
message(STATUS "Ninja not found, using default generator: ${CMAKE_GENERATOR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source Files
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
set(SOURCES
|
||||
file_hasher.c
|
||||
xxhash.c
|
||||
xxh_x86dispatch.c
|
||||
)
|
||||
|
||||
# Headers for dependency tracking and IDE
|
||||
set(HEADERS
|
||||
arena.h
|
||||
base.h
|
||||
xxhash.h
|
||||
mt_mpmc.h
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Create Executable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
add_executable(${PROJECT_NAME}
|
||||
${SOURCES}
|
||||
${HEADERS}
|
||||
)
|
||||
|
||||
# Include directories
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compiler Flags - Exact match to your commands
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if(PLATFORM_WINDOWS)
|
||||
if(COMPILER_CLANG_CL)
|
||||
# === clang-cl flags ===
|
||||
# Release: /O2
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Release>:/O2>
|
||||
)
|
||||
# Debug: /Zi /Od
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Debug>:/Zi /Od>
|
||||
)
|
||||
# Common warnings
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE /W4)
|
||||
|
||||
elseif(COMPILER_GCC)
|
||||
# === GCC flags (Windows/MinGW) ===
|
||||
# Release: -O3
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Release>:-O3>
|
||||
)
|
||||
# Debug: -g -O0
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Debug>:-g -O0>
|
||||
)
|
||||
# Common warnings
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wextra)
|
||||
|
||||
elseif(COMPILER_CLANG_GNU)
|
||||
# === Clang flags (Windows, GNU frontend) ===
|
||||
# Release: -O3
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Release>:-O3>
|
||||
)
|
||||
# Debug: -g -O0
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Debug>:-g -O0>
|
||||
)
|
||||
# Common warnings
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wextra)
|
||||
endif()
|
||||
|
||||
# Windows-specific libraries
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
kernel32
|
||||
)
|
||||
|
||||
# Windows-specific defines
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE
|
||||
WIN32_LEAN_AND_MEAN
|
||||
_WIN32_WINNT=0x0A00 # Windows 10+
|
||||
)
|
||||
|
||||
# Set output name with .exe
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES
|
||||
SUFFIX ".exe"
|
||||
)
|
||||
|
||||
elseif(PLATFORM_LINUX)
|
||||
# === Linux GCC/Clang flags ===
|
||||
if(COMPILER_GCC OR COMPILER_CLANG_GNU)
|
||||
# Release: -O3
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Release>:-O3>
|
||||
)
|
||||
# Debug: -g -O0
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CONFIG:Debug>:-g -O0>
|
||||
)
|
||||
# Common warnings
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wextra)
|
||||
endif()
|
||||
|
||||
# Linux-specific libraries
|
||||
find_package(Threads REQUIRED)
|
||||
find_library(LIBURING_LIBRARY NAMES uring)
|
||||
|
||||
if(LIBURING_LIBRARY)
|
||||
message(STATUS "Found liburing: ${LIBURING_LIBRARY}")
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
Threads::Threads
|
||||
${LIBURING_LIBRARY}
|
||||
)
|
||||
else()
|
||||
message(FATAL_ERROR "liburing not found! Install liburing-dev or equivalent")
|
||||
endif()
|
||||
|
||||
# Linux-specific defines
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE
|
||||
_GNU_SOURCE
|
||||
)
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# C Standard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES
|
||||
C_STANDARD 11
|
||||
C_STANDARD_REQUIRED ON
|
||||
C_EXTENSIONS OFF
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build Configurations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Set default build type if not specified (matching your Release command)
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
|
||||
"Choose the type of build: Release or Debug" FORCE)
|
||||
message(STATUS "No build type specified, defaulting to Release")
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# IDE Support
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Info Target
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
add_custom_target(info
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "=== Build Configuration ==="
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Project: ${PROJECT_NAME}"
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Compiler: ${CMAKE_C_COMPILER} (${CMAKE_C_COMPILER_ID})"
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Frontend: $<IF:$<BOOL:${COMPILER_CLANG_CL}>,clang-cl,GNU>"
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Generator: ${CMAKE_GENERATOR}"
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Build Type: ${CMAKE_BUILD_TYPE}"
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "Platform: ${PLATFORM_NAME}"
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "============================"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Print final configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
message(STATUS "----------------------------------------")
|
||||
message(STATUS "Configuration Summary:")
|
||||
message(STATUS " Compiler: ${CMAKE_C_COMPILER}")
|
||||
message(STATUS " Build Type: ${CMAKE_BUILD_TYPE}")
|
||||
message(STATUS " Generator: ${CMAKE_GENERATOR}")
|
||||
message(STATUS " Platform: ${PLATFORM_NAME}")
|
||||
message(STATUS "----------------------------------------")
|
||||
285
README.md
285
README.md
@@ -1,24 +1,273 @@
|
||||
# filehasher
|
||||
|
||||
Collects some metadata and hashes files.
|
||||
# Presentation
|
||||
Collects some metadata and hashes files. It outputs the path, hash, size, creation and
|
||||
last modification dates and the author in file_hasher.txt.
|
||||
Creation and modification dates and author can be disabled in the config file.
|
||||
|
||||
## Building:
|
||||
### Windows:
|
||||
#### Release:
|
||||
clang-cl /O3 file_hasher.c xxh_x86dispatch.c advapi32.lib
|
||||
clang -O3 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
|
||||
gcc -O3 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
|
||||
It is a high performance cross platform Windows and Linux compatible program, it uses:
|
||||
* Multiple threads for scanning and hashing (multi-threading can be disabled in the config file).
|
||||
* Stores the generated data in thread local configurable arenas that support growing
|
||||
by committing more memory and chaining blocks.
|
||||
* Two Multi Producer Multi Consumer queues, one for the scanners and one between the scanners and hashers.
|
||||
* xxh3_128bits algorithm from xxhash, that supports SIMD instruction sets (SSE2, AVX2, AVX512)
|
||||
and uses a runtime dispatcher to select the best available instruction set.
|
||||
* IO Ring for asynchronous I/O in Windows and the equivalent io_uring in Linux.
|
||||
The implementation is event driven, thread local, uses DMA and direct disk I/O,
|
||||
bypassing the OS cache completely, registered buffers (and registered files in io_uring),
|
||||
it supports bashing multiple submissions and can handle multiple files at the same time.
|
||||
It can be disabled in the config file.
|
||||
* Fallback to buffered I/O if there is errors in the IO Ring path.
|
||||
|
||||
#### Debug:
|
||||
clang-cl /Zi /Od file_hasher.c xxh_x86dispatch.c advapi32.lib
|
||||
clang -g -O0 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
|
||||
gcc -g -O0 file_hasher.c xxh_x86dispatch.c -ladvapi32 -o file_hasher
|
||||
# Building
|
||||
|
||||
### Linux:
|
||||
#### Release:
|
||||
clang -O3 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher
|
||||
gcc -O3 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher
|
||||
## Windows
|
||||
**Requirements**: Make sur to use UCRT64 environment from MSYS2 instead of the standard MinGW environment.
|
||||
UCRT64 uses the modern Universal C Runtime (ucrtbase.dll), which supports the newest APIs,
|
||||
the standard MSYS2 uses the legacy msvcrt.dll and does not support IO Ring.
|
||||
To install:
|
||||
pacman -S mingw-w64-ucrt-x86_64-clang
|
||||
or:
|
||||
pacman -S mingw-w64-ucrt-x86_64-gcc
|
||||
|
||||
#### Debug:
|
||||
clang -g -O0 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher
|
||||
gcc -g -O0 -pthread file_hasher.c xxh_x86dispatch.c -o file_hasher
|
||||
pacman -Syu
|
||||
And add to path:
|
||||
C:\msys64\ucrt64\bin
|
||||
|
||||
Additionally, to use clang-cl install the latest version of Windows SDK and MSVC, or at least select these in Visual Studio Installer:
|
||||
* MSVC Build tools fo x64/86.
|
||||
* C++ Build tools core features.
|
||||
* MSBuild support for LLVM (clang-cl) toolset.
|
||||
* Windows Universal C runtime.
|
||||
* Windows Universal CRT SDK.
|
||||
* Windows 11 SDK.
|
||||
|
||||
And use the MSVC command prompt or run a script to add MSVC environment variables to current session.
|
||||
Ex: for PowerShell Terminal save as .ps1 (not persistent):
|
||||
```ps1
|
||||
# Add MS visual studio environment variables
|
||||
cmd /c '"C:\Program Files (x86)\Microsoft Visual Studio\18\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 && set' |
|
||||
ForEach-Object {
|
||||
if ($_ -match "^(.*?)=(.*)$") {
|
||||
Set-Item -Path "Env:$($matches[1])" -Value $matches[2]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Optional: to use the build system
|
||||
pacman -S mingw-w64-ucrt-x86_64-cmake
|
||||
The build system uses Ninja and fallsback to make, in Windows it prefers clang-cl > gcc > clang, and in Linux gcc > clang.
|
||||
|
||||
### Using a build system
|
||||
| Command | Description|
|
||||
| :--- | :--- |
|
||||
| ./build.bat | Build Release with best available compiler |
|
||||
| ./build.bat Debug | Build Debug |
|
||||
| ./build.bat clean | Clean and build Release |
|
||||
| ./build.bat Debug clean | Clean and build Debug |
|
||||
|
||||
### Release
|
||||
gcc -O3 file_hasher.c xxhash.c xxh_x86dispatch.c -o filehasher
|
||||
clang -O3 file_hasher.c xxhash.c xxh_x86dispatch.c -o filehasher
|
||||
clang-cl /O2 file_hasher.c xxhash.c xxh_x86dispatch.c
|
||||
|
||||
### Debug
|
||||
gcc -g -O0 file_hasher.c xxhash.c xxh_x86dispatch.c -o filehasher
|
||||
clang -g -O0 file_hasher.c xxhash.c xxh_x86dispatch.c -o filehasher
|
||||
clang-cl /Zi /Od file_hasher.c xxhash.c xxh_x86dispatch.c
|
||||
|
||||
## Linux
|
||||
**Requirements**: GCC or clang, optional CMake, Ninja or make.
|
||||
|
||||
### Using a build system
|
||||
| Command | Description|
|
||||
| :--- | :--- |
|
||||
| ./build.sh | Build Release with best available compiler |
|
||||
| ./build.sh Debug | Build Debug |
|
||||
| ./build.sh clean | Clean and build Release |
|
||||
| ./build.sh Debug clean | Clean and build Debug |
|
||||
|
||||
### Release
|
||||
gcc -O3 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o filehasher
|
||||
clang -O3 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o filehasher
|
||||
|
||||
### Debug
|
||||
gcc -g -O0 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o filehasher
|
||||
clang -g -O0 file_hasher.c xxhash.c xxh_x86dispatch.c -pthread -luring -o filehasher
|
||||
|
||||
# Notes about the IO Ring implementations
|
||||
## IO Ring
|
||||
|
||||
### File registration
|
||||
Registering files is a performance optimization that allows the kernel to allocate an array
|
||||
of descriptors/handles to pre-validate and maintain long-term references to file handles.
|
||||
Instead of passing a standard file descriptor/handle with every I/O request, you pass a simple integer
|
||||
index into a pre-registered table.
|
||||
|
||||
The Linux implementation has io_uring_register_files_scarse() to create an empty array of descriptors
|
||||
(initialized with -1) without having to create and initialize it in the user space, and we can
|
||||
use io_uring_register_files_update() to update one or more entries. Windows on the other hand
|
||||
is limited to BuildIoRingRegisterFileHandles() only, so we need to re register the entire array of handles
|
||||
each time. This is why there is a provided macro in config.h to disable or enable it.
|
||||
|
||||
#### *Why Register Files? (The Benefits)*
|
||||
When you use a standard file descriptor in a high-frequency I/O loop,
|
||||
the kernel must perform several "hidden" tasks for every single operation:
|
||||
* Permission Checks: Validating that the process still has the right to read/write
|
||||
that specific file.
|
||||
* Reference Counting: Incrementing the file's internal reference count at the start of
|
||||
the I/O and decrementing it at the end to ensure the file isn't closed while in use.
|
||||
* Object Lookup: Traversing the internal "file descriptor table" to find the actual
|
||||
kernel object associated with your integer ID.
|
||||
|
||||
Registering the files performs these checks once at registration time. Subsequent
|
||||
I/O operations skip these steps, significantly reducing CPU overhead and latency,
|
||||
especially when handling thousands of small I/O operations per second.
|
||||
|
||||
#### *Comparison: Linux vs. Windows Implementation*
|
||||
While both systems share the same core concept, their APIs and management styles differ significantly.
|
||||
|
||||
| Feature | Linux (`io_uring`) | Windows (`IoRing`) |
|
||||
| :--- | :--- | :--- |
|
||||
| **API Call** | `io_uring_register` | `BuildIoRingRegisterFileHandles` |
|
||||
| **Registration Method** | Synchronous system call; blocks until the table is set up. | Asynchronous request submitted to the ring like a read/write operation. |
|
||||
| **Partial Updates** | Supports `IORING_REGISTER_FILES_UPDATE` to swap specific indices. | No partial updates; a new registration replaces the entire table. |
|
||||
| **Scope of Operations** | Extremely broad (files, sockets, timers, signals, etc.). | Primarily focused on file storage (read, write, flush). |
|
||||
|
||||
### Completion Wait count and peek
|
||||
To avoid busy waiting when receiving CQEs, we can use io_uring_submit_and_wait() in Linux by entering a wait count,
|
||||
the threads sleep until the count of CQEs are received, in windows the wait_count is present in SubmitIoRing()
|
||||
but is not implemented yet, so we wait with a completion event for a single completion. Another limitation on the completion
|
||||
event is that the kernel will waik up the thread only when receiving the first CQE, after that we need to drain the completion
|
||||
queue completely before sleeping again, or we enter an eternal slumber.
|
||||
In the other hand, in Linux we can batch pop completions with io_uring_peek_batch_cqe() + io_uring_cq_advance(),
|
||||
in Windows we can only pop one completion at a time with PopIoRingCompletion() (equivalent to io_uring_peek_cqe() + io_uring_cqe_seen()).
|
||||
To simulate the same behavior as the Linux functions we use a double loop, an outer loop to control how much we wait
|
||||
and in inner loop to drain all the available completions.
|
||||
|
||||
### Filtering CQEs
|
||||
|
||||
Unlike Linux, The Windows implementation treats buffer and file registration
|
||||
as an asynchronous operation that we submit to the ring, similar to a read or write.
|
||||
Those operations produce CQEs (completion queue entries) that we filter here using
|
||||
cqe.UserData == USERDATA_REGISTER
|
||||
```c
|
||||
if (win_cqe.UserData == USERDATA_REGISTER)
|
||||
continue;
|
||||
```
|
||||
|
||||
## io_uring
|
||||
|
||||
### Creation flags
|
||||
io_uring provides a lot of configuration flags compared to IO Ring, some
|
||||
of them are at the creation and others during the operations, here what
|
||||
we use in this implementation at creation time and is lacking in the
|
||||
IO Ring implementation.
|
||||
|
||||
* IORING_SETUP_SINGLE_ISSUER: Since we are using a thread local io_uring, we can
|
||||
set this flag to remove the atomic operations.
|
||||
* IORING_SETUP_DEFER_TASKRUN: By default, the kernel sends an interrupts when a CQE
|
||||
is ready, we use this flag to disable this syscall and wait for a specific number of
|
||||
CQEs to be ready to group them, this reduces the number of syscall.
|
||||
|
||||
### Memlock limit warning
|
||||
|
||||
```c
|
||||
"WARNING: Buffer registration failed due to memlock limits (ENOMEM).\n"
|
||||
"Increase the limit to solve this warning.\n");
|
||||
```
|
||||
|
||||
The Memlock limit in Linux restricts the amount of memory that can be
|
||||
"locked" into physical RAM using the mlock() family of system calls. This
|
||||
prevents the operating system from swapping that memory out to disk.
|
||||
And registering buffers will lock the buffers memory so the hardware
|
||||
can access it directly without kernel intervention and prevents the kernel from
|
||||
swapping it to the SSD or HDD.
|
||||
This limit does not apply to a single process, but it applies to what all the runnig processes can lock, so in order
|
||||
to be able to register the buffers, we need to set it to unlimited or increase it to at least:
|
||||
num_hash_threads * NUM_BUFFERS_PER_THREAD * IORING_BUFFER_SIZE + extra memory reserved for other processes.
|
||||
|
||||
#### *Modifying the Limit*
|
||||
The method for changing the memlock limit depends on whether you are
|
||||
managing a user session or a system service.
|
||||
1. For Users and Interactive Sessions
|
||||
To permanently increase the limit for a specific user or group, modify
|
||||
the /etc/security/limits.conf file. Add the following lines:
|
||||
|
||||
```conf
|
||||
# Example for a specific user (replace 'username'), unlimited or a custom value in KB
|
||||
username soft memlock unlimited
|
||||
username hard memlock unlimitedhttps://wiki.postgresql.org/wiki/AIO
|
||||
```
|
||||
```conf
|
||||
# Example for all users
|
||||
* soft memlock unlimited
|
||||
* hard memlock unlimited
|
||||
```
|
||||
|
||||
Soft Limit: The value the user starts with; can be raised up to the
|
||||
hard limit.
|
||||
|
||||
Hard Limit: The absolute maximum; only a privileged user
|
||||
(root) can increase this. Values: Can be set in Kilobytes (KB) or as
|
||||
unlimited.
|
||||
|
||||
2. For Systemd Services
|
||||
Settings in limits.conf do not affect background services managed by
|
||||
systemd. To increase the limit for a service, edit its service file
|
||||
(e.g., /etc/systemd/system/myservice.service) and add:
|
||||
```conf
|
||||
[Service]
|
||||
LimitMEMLOCK=infinity
|
||||
```
|
||||
|
||||
#### *Why Register Buffers?*
|
||||
In a standard "unregistered" I/O operation, the kernel must perform several
|
||||
expensive steps for every single read or write:
|
||||
* Virtual-to-Physical Mapping: The kernel has to translate your application's
|
||||
virtual memory addresses into physical RAM addresses.
|
||||
* Page Pinning: The kernel must "pin" the memory pages (using get_user_pages)
|
||||
to prevent them from being swapped to disk or moved while the hardware
|
||||
(like your SSD) is writing to them.
|
||||
* TLB Overhead: Constant mapping and unmapping put pressure on the Translation
|
||||
Lookaside Buffer (TLB), which can slow down the CPU.
|
||||
|
||||
Registering the buffers performs all of this "pinning" and "mapping" once.
|
||||
|
||||
### Direct I/O: O_DIRECT (Linux) and FILE_FLAG_NO_BUFFERING (Windows)
|
||||
|
||||
Modern operating systems normally use a page cache when reading files. This means file
|
||||
data is first loaded into kernel memory and then copied to user space. While this improves
|
||||
performance for many workloads, it introduces extra memory usage and copy overhead.
|
||||
|
||||
Both Linux and Windows provide a way to bypass this cache and perform direct I/O:
|
||||
|
||||
Linux: O_DIRECT
|
||||
Windows: FILE_FLAG_NO_BUFFERING
|
||||
|
||||
These flags instruct the OS to transfer data directly between disk and user-provided buffers, avoiding the page cache.
|
||||
|
||||
#### *Benefits*
|
||||
1. Reduced memory overhead
|
||||
Avoids polluting the OS page cache
|
||||
Especially useful for large sequential reads (e.g. hashing, backups)
|
||||
2. Lower CPU usage
|
||||
Eliminates extra memory copies between kernel and user space
|
||||
3. Predictable performance
|
||||
No interference from cache eviction or readahead heuristics
|
||||
More consistent throughput for streaming workloads
|
||||
4. Better scalability
|
||||
Ideal for high-throughput, multi-threaded I/O pipelines
|
||||
Prevents cache contention between threads
|
||||
5. Avoids double caching
|
||||
Important when the application already manages its own buffering
|
||||
|
||||
#### *File system compatibility*
|
||||
Not all file systems are compatible with O_DIRECT, if we try to open files residing in an NTFS partition,
|
||||
most of the time it will fail, and some times it opens but the CQEs return with an error code bad
|
||||
descriptor, and it causes some lags.
|
||||
|
||||
To address this issue the program falls back to sequential read when the open fails, and falls back to
|
||||
buffered sequential hashing if we receive an error in the CQEs. There is also a file system detection
|
||||
that we can enable in the config file, it will enable the collection of the file system in scan_folder()
|
||||
and the file will be opened accordingly, but it costs one additional syscall / directory.
|
||||
|
||||
134
arena.c
134
arena.c
@@ -83,8 +83,7 @@ u64 arena_pos_from_ptr(mem_arena *arena, void *ptr) {
|
||||
|
||||
void *arena_ptr_from_pos(mem_arena *arena, u64 global_pos) {
|
||||
ASSERT(arena);
|
||||
ASSERT(global_pos >= 0);
|
||||
if (!arena || global_pos < 0) {
|
||||
if (!arena) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -197,7 +196,7 @@ mem_arena *arena_create(arena_params *params) { // mk create
|
||||
arena->free_list = arena_create(&(arena_params){
|
||||
.reserve_size = MiB(1),
|
||||
.commit_size = MiB(1),
|
||||
.align = ARENA_ALIGN,
|
||||
.align = ARENA_CACHE_ALIGN,
|
||||
.push_size = sizeof(arena_free_node),
|
||||
.allow_free_list = false,
|
||||
.free_list = NULL,
|
||||
@@ -437,12 +436,16 @@ void *arena_push(mem_arena **arena_ptr, u64 size, bool zero) { // mk push
|
||||
Commit memory if needed
|
||||
------------------------------------------------------------ */
|
||||
|
||||
if (local_post > selected->commit_pos) {
|
||||
u64 new_commit = ALIGN_UP_POW2(local_post, arena_pagesize());
|
||||
if (local_post > selected->commit_pos -
|
||||
ALIGN_UP_POW2(sizeof(mem_arena), selected->align)) {
|
||||
u64 new_commit = ALIGN_UP_POW2(
|
||||
local_post + ALIGN_UP_POW2(sizeof(mem_arena), selected->align),
|
||||
arena_pagesize());
|
||||
new_commit = MIN(new_commit, selected->reserve_size);
|
||||
|
||||
if (!plat_mem_commit((u8 *)selected + selected->commit_pos,
|
||||
new_commit - selected->commit_pos)) {
|
||||
printf("ERROR: Could not commit memory!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -486,7 +489,6 @@ void *arena_free(mem_arena **arena_ptr, u8 **ptr, u64 size) { // mk free
|
||||
Find owning block
|
||||
------------------------------------------------------------ */
|
||||
|
||||
mem_arena *selected = arena;
|
||||
mem_arena *owner = arena_block_from_ptr(arena, *ptr);
|
||||
ASSERT(owner);
|
||||
if (!owner) {
|
||||
@@ -498,7 +500,7 @@ void *arena_free(mem_arena **arena_ptr, u8 **ptr, u64 size) { // mk free
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 global_offset = arena_pos_from_ptr(arena, *ptr);
|
||||
if (global_offset == -1) {
|
||||
if (global_offset == UINT64_MAX) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -605,9 +607,6 @@ void *arena_swapback_pop(mem_arena **arena_ptr, u64 index) { // mk swapback
|
||||
fprintf(stderr, "ERROR: Swapback pop failed, index out of range");
|
||||
return NULL;
|
||||
}
|
||||
u8 *owner_base = (u8 *)owner + ALIGN_UP_POW2(sizeof(mem_arena), owner->align);
|
||||
u8 *arena_base = (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
|
||||
|
||||
u8 *dst = arena_ptr_from_index(arena, index);
|
||||
u8 *src = arena_ptr_from_index(arena, count);
|
||||
|
||||
@@ -623,6 +622,119 @@ void *arena_swapback_pop(mem_arena **arena_ptr, u64 index) { // mk swapback
|
||||
/* ============================================================
|
||||
Utilities
|
||||
============================================================ */
|
||||
typedef enum arena_trim_flags {
|
||||
ARENA_TRIM_NONE = 0,
|
||||
|
||||
ARENA_TRIM_SPACE = 1 << 0,
|
||||
ARENA_TRIM_TAB = 1 << 1,
|
||||
ARENA_TRIM_LF = 1 << 2,
|
||||
ARENA_TRIM_CR = 1 << 3,
|
||||
ARENA_TRIM_NUL = 1 << 4,
|
||||
|
||||
} arena_trim_flags;
|
||||
|
||||
u64 arena_trim_string(mem_arena **arena_ptr, char *str, u8 termination_flags) {
|
||||
ASSERT(arena_ptr);
|
||||
ASSERT(*arena_ptr);
|
||||
ASSERT(str);
|
||||
|
||||
if (!arena_ptr || !*arena_ptr || !str) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
mem_arena *arena = *arena_ptr;
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Find owning block
|
||||
------------------------------------------------------------ */
|
||||
|
||||
mem_arena *owner = arena_block_from_ptr(arena, (u8 *)str);
|
||||
|
||||
ASSERT(owner);
|
||||
if (!owner) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Must be current block
|
||||
------------------------------------------------------------ */
|
||||
|
||||
if (owner != arena) {
|
||||
fprintf(stderr, "arena_trim_string(): string is not "
|
||||
"in current arena block.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Compute string position
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 str_pos = arena_pos_from_ptr(arena, str);
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Original reserved size
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 allocated_size = arena->pos - str_pos;
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Compute sizes
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 str_size = strlen(str);
|
||||
|
||||
char *dst = str + str_size;
|
||||
u64 termination_size = 0;
|
||||
|
||||
if (termination_flags & ARENA_TRIM_SPACE) {
|
||||
*dst++ = ' ';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_TAB) {
|
||||
*dst++ = '\t';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_CR) {
|
||||
*dst++ = '\r';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_LF) {
|
||||
*dst++ = '\n';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_NUL) {
|
||||
*dst++ = '\0';
|
||||
termination_size++;
|
||||
}
|
||||
/* ------------------------------------------------------------
|
||||
Final used size
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 used_size = str_size + termination_size;
|
||||
|
||||
used_size = ALIGN_UP_POW2(used_size, arena->align);
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Overflow detection
|
||||
------------------------------------------------------------ */
|
||||
|
||||
if (used_size > allocated_size) {
|
||||
fprintf(stderr, "arena_trim_string(): string overflow "
|
||||
"detected.\n");
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Update arena position
|
||||
------------------------------------------------------------ */
|
||||
|
||||
arena->pos = str_pos + used_size;
|
||||
|
||||
return used_size;
|
||||
}
|
||||
|
||||
void *arena_clear(mem_arena **arena_ptr) { // mk clear
|
||||
|
||||
@@ -804,7 +916,7 @@ mem_arena_temp arena_scratch_get(mem_arena **conflicts, u32 num_conflicts) {
|
||||
arena_params params = {
|
||||
.reserve_size = MiB(64),
|
||||
.commit_size = MiB(1),
|
||||
.align = ARENA_ALIGN,
|
||||
.align = ARENA_CACHE_ALIGN,
|
||||
.push_size = 8,
|
||||
.allow_free_list = false,
|
||||
.allow_swapback = true,
|
||||
|
||||
2
arena.h
2
arena.h
@@ -239,7 +239,7 @@ void *arena_ptr_from_index(mem_arena *arena, u64 index);
|
||||
*/
|
||||
|
||||
#define ARENA_HEADER_SIZE (sizeof(mem_arena))
|
||||
#define ARENA_ALIGN (sizeof(void *))
|
||||
#define ARENA_CACHE_ALIGN (sizeof(void *))
|
||||
|
||||
// arena config
|
||||
typedef enum arena_growth_policy {
|
||||
|
||||
149
base.h
149
base.h
@@ -1,9 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma comment(lib, "advapi32.lib")
|
||||
#endif
|
||||
|
||||
#include <aclapi.h>
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#include <ioringapi.h> // Needs to be included before stdatomic to avoid errors
|
||||
#include <ntioring_x.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <windows.h>
|
||||
#include <winerror.h>
|
||||
|
||||
#elif defined(__linux__)
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <liburing.h>
|
||||
#include <poll.h>
|
||||
#include <pthread.h>
|
||||
#include <pwd.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <immintrin.h>
|
||||
#include <limits.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -11,25 +53,6 @@
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define PLATFORM_WINDOWS 1
|
||||
#include <aclapi.h>
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <windows.h>
|
||||
|
||||
#define strdup _strdup
|
||||
#else
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Base types
|
||||
------------------------------------------------------------ */
|
||||
@@ -83,16 +106,16 @@ typedef double f64;
|
||||
#define ASSERT(x) assert(x)
|
||||
#endif
|
||||
|
||||
#define NDEBUG // Comment to enable asserts
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG 1 // 0 to enable asserts
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Some helper functions
|
||||
------------------------------------------------------------ */
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
||||
// Memory allocation
|
||||
|
||||
static u32 plat_get_pagesize(void) {
|
||||
SYSTEM_INFO sysinfo = {0};
|
||||
GetSystemInfo(&sysinfo);
|
||||
@@ -109,58 +132,23 @@ static b32 plat_mem_commit(void *ptr, u64 size) {
|
||||
return ret != NULL;
|
||||
}
|
||||
|
||||
static b32 plat_mem_decommit(void *ptr, u64 size) {
|
||||
return VirtualFree(ptr, size, MEM_DECOMMIT);
|
||||
}
|
||||
// static b32 plat_mem_decommit(void *ptr, u64 size) { // Comment to prevent warning: unused function
|
||||
// return VirtualFree(ptr, size, MEM_DECOMMIT);
|
||||
// }
|
||||
|
||||
static b32 plat_mem_release(void *ptr, u64 size) {
|
||||
return VirtualFree(ptr, size, MEM_RELEASE);
|
||||
}
|
||||
|
||||
// Semaphores
|
||||
typedef struct plat_sem {
|
||||
HANDLE handle;
|
||||
} plat_sem;
|
||||
|
||||
static b32 plat_sem_init(plat_sem *s, u32 initial) {
|
||||
s->handle = CreateSemaphore(NULL, initial, LONG_MAX, NULL);
|
||||
return s->handle != NULL;
|
||||
}
|
||||
|
||||
static void plat_sem_wait(plat_sem *s) {
|
||||
WaitForSingleObject(s->handle, INFINITE);
|
||||
}
|
||||
|
||||
static b32 plat_sem_trywait(HANDLE sem) {
|
||||
DWORD r = WaitForSingleObject(sem, 0);
|
||||
return r == WAIT_OBJECT_0;
|
||||
}
|
||||
|
||||
static void plat_sem_post(plat_sem *s, u32 count) {
|
||||
ReleaseSemaphore(s->handle, count, NULL);
|
||||
}
|
||||
|
||||
static void plat_sem_destroy(plat_sem *s) {
|
||||
if (s->handle) {
|
||||
CloseHandle(s->handle);
|
||||
s->handle = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Sleep
|
||||
static void sleep_ms(int ms) { Sleep(ms); }
|
||||
|
||||
#elif defined(__linux__)
|
||||
|
||||
// Memory allocation
|
||||
|
||||
#ifndef _DEFAULT_SOURCE
|
||||
#define _DEFAULT_SOURCE
|
||||
#endif
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static u32 plat_get_pagesize(void) { return (u32)sysconf(_SC_PAGESIZE); }
|
||||
|
||||
static void *plat_mem_reserve(u64 size) {
|
||||
@@ -176,46 +164,19 @@ static b32 plat_mem_commit(void *ptr, u64 size) {
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
static b32 plat_mem_decommit(void *ptr, u64 size) {
|
||||
i32 ret = mprotect(ptr, size, PROT_NONE);
|
||||
if (ret != 0)
|
||||
return false;
|
||||
ret = madvise(ptr, size, MADV_DONTNEED);
|
||||
return ret == 0;
|
||||
}
|
||||
// static b32 plat_mem_decommit(void *ptr, u64 size) { // Comment to prevent warning: unused function
|
||||
// i32 ret = mprotect(ptr, size, PROT_NONE);
|
||||
// if (ret != 0)
|
||||
// return false;
|
||||
// ret = madvise(ptr, size, MADV_DONTNEED);
|
||||
// return ret == 0;
|
||||
// }
|
||||
|
||||
static b32 plat_mem_release(void *ptr, u64 size) {
|
||||
i32 ret = munmap(ptr, size);
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
// Semaphores
|
||||
#include <semaphore.h>
|
||||
|
||||
typedef struct plat_sem {
|
||||
sem_t sem;
|
||||
} plat_sem;
|
||||
|
||||
static b32 plat_sem_init(plat_sem *s, u32 initial) {
|
||||
return sem_init(&s->sem, 0, initial) == 0;
|
||||
}
|
||||
|
||||
static void plat_sem_wait(plat_sem *s) {
|
||||
while (sem_wait(&s->sem) == -1 && errno == EINTR) {
|
||||
}
|
||||
}
|
||||
|
||||
static b32 plat_sem_trywait(sem_t *sem) { return sem_trywait(sem) == 0; }
|
||||
|
||||
static void plat_sem_post(plat_sem *s, u32 count) {
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
sem_post(&s->sem);
|
||||
}
|
||||
}
|
||||
|
||||
static void plat_sem_destroy(plat_sem *s) { sem_destroy(&s->sem); }
|
||||
|
||||
// Sleep
|
||||
static void sleep_ms(int ms) { usleep(ms * 1000); }
|
||||
|
||||
#endif
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
172
build.bat
Normal file
172
build.bat
Normal file
@@ -0,0 +1,172 @@
|
||||
@echo off
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
set PROJECT_NAME=filehasher
|
||||
|
||||
:: ============================================================================
|
||||
:: build.bat
|
||||
:: ============================================================================
|
||||
|
||||
:: Get script directory (project root)
|
||||
set SCRIPT_DIR=%~dp0
|
||||
set SCRIPT_DIR=%SCRIPT_DIR:~0,-1%
|
||||
|
||||
:: ---------------------------------------------------------------------------
|
||||
:: Default values
|
||||
:: ---------------------------------------------------------------------------
|
||||
|
||||
set BUILD_TYPE=Release
|
||||
set CLEAN_BUILD=0
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Parse arguments
|
||||
:: --------------------------------------------------------------------------
|
||||
:parse_args
|
||||
if "%~1"=="" goto :main
|
||||
|
||||
if /i "%~1"=="Release" (
|
||||
set BUILD_TYPE=Release
|
||||
shift
|
||||
goto :parse_args
|
||||
)
|
||||
if /i "%~1"=="Debug" (
|
||||
set BUILD_TYPE=Debug
|
||||
shift
|
||||
goto :parse_args
|
||||
)
|
||||
if /i "%~1"=="clean" (
|
||||
set CLEAN_BUILD=1
|
||||
shift
|
||||
goto :parse_args
|
||||
)
|
||||
|
||||
echo Unknown argument: %~1
|
||||
echo Usage: build [Release^|Debug] [clean]
|
||||
exit /b 1
|
||||
|
||||
:main
|
||||
set BUILD_DIR=%SCRIPT_DIR%\build\windows\%BUILD_TYPE%
|
||||
|
||||
echo === Building %PROJECT_NAME% (%BUILD_TYPE%) ===
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Clean if requested
|
||||
:: --------------------------------------------------------------------------
|
||||
if %CLEAN_BUILD%==1 (
|
||||
echo Cleaning...
|
||||
if exist "%BUILD_DIR%" rmdir /s /q "%BUILD_DIR%" 2>nul
|
||||
)
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Create build directory
|
||||
:: --------------------------------------------------------------------------
|
||||
if not exist "%BUILD_DIR%" mkdir "%BUILD_DIR%"
|
||||
pushd "%BUILD_DIR%"
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Compiler selection
|
||||
:: --------------------------------------------------------------------------
|
||||
set CC=
|
||||
|
||||
where clang-cl >nul 2>&1
|
||||
if !ERRORLEVEL! equ 0 (
|
||||
echo Compiler: clang-cl ^(preferred^)
|
||||
set "CC=-DCMAKE_C_COMPILER=clang-cl"
|
||||
goto :find_generator
|
||||
)
|
||||
|
||||
where gcc >nul 2>&1
|
||||
if !ERRORLEVEL! equ 0 (
|
||||
echo Compiler: gcc ^(fallback^)
|
||||
set "CC=-DCMAKE_C_COMPILER=gcc"
|
||||
goto :find_generator
|
||||
)
|
||||
|
||||
where clang >nul 2>&1
|
||||
if !ERRORLEVEL! equ 0 (
|
||||
echo Compiler: clang ^(last resort^)
|
||||
set "CC=-DCMAKE_C_COMPILER=clang"
|
||||
goto :find_generator
|
||||
)
|
||||
|
||||
echo ERROR: No suitable compiler found!
|
||||
popd
|
||||
exit /b 1
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Generator selection (prefer ninja)
|
||||
:: --------------------------------------------------------------------------
|
||||
:find_generator
|
||||
set GEN=
|
||||
where ninja >nul 2>&1
|
||||
if !ERRORLEVEL! equ 0 (
|
||||
echo Generator: Ninja
|
||||
set "GEN=-G Ninja"
|
||||
) else (
|
||||
echo Generator: Default
|
||||
)
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Configure
|
||||
:: --------------------------------------------------------------------------
|
||||
echo.
|
||||
echo Configuring CMake...
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: compile_commands.json logic
|
||||
:: --------------------------------------------------------------------------
|
||||
set EXPORT_COMPILE_COMMANDS=OFF
|
||||
|
||||
if /i "%BUILD_TYPE%"=="Release" (
|
||||
if exist "%SCRIPT_DIR%\compile_commands.json" (
|
||||
echo compile_commands.json already exists - skipping generation
|
||||
) else (
|
||||
echo compile_commands.json will be generated
|
||||
set EXPORT_COMPILE_COMMANDS=ON
|
||||
)
|
||||
)
|
||||
|
||||
set CMD=cmake "%SCRIPT_DIR%" %GEN% %CC% -DCMAKE_BUILD_TYPE=%BUILD_TYPE% -DCMAKE_EXPORT_COMPILE_COMMANDS=%EXPORT_COMPILE_COMMANDS%
|
||||
|
||||
echo !CMD!
|
||||
!CMD!
|
||||
if !ERRORLEVEL! neq 0 (
|
||||
echo ERROR: Configuration failed
|
||||
popd
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Build
|
||||
:: --------------------------------------------------------------------------
|
||||
echo.
|
||||
echo Building...
|
||||
cmake --build . --config %BUILD_TYPE%
|
||||
if !ERRORLEVEL! neq 0 (
|
||||
echo ERROR: Build failed
|
||||
popd
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Copy compile_commands.json (only if generated)
|
||||
:: --------------------------------------------------------------------------
|
||||
if /i "%EXPORT_COMPILE_COMMANDS%"=="ON" (
|
||||
if exist "compile_commands.json" (
|
||||
echo.
|
||||
echo clangd: compile_commands.json generated
|
||||
|
||||
copy /Y "compile_commands.json" "%SCRIPT_DIR%\compile_commands.json" >nul 2>&1
|
||||
if !ERRORLEVEL! equ 0 (
|
||||
echo clangd: Copied to project root
|
||||
) else (
|
||||
echo clangd: Copy failed
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
popd
|
||||
|
||||
echo.
|
||||
echo === Build Complete ===
|
||||
echo Executable: %BUILD_DIR%\%PROJECT_NAME%.exe
|
||||
274
build.sh
Normal file
274
build.sh
Normal file
@@ -0,0 +1,274 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================================
|
||||
# build.sh - Build script (Linux)
|
||||
# Usage: ./build.sh [Release|Debug] [clean]
|
||||
#
|
||||
# Compiler preference: gcc > clang
|
||||
# Build system: Ninja (fallback to Make)
|
||||
# ============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_NAME="filehasher"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Colors
|
||||
# ---------------------------------------------------------------------------
|
||||
readonly RED='\033[0;31m'
|
||||
readonly GREEN='\033[0;32m'
|
||||
readonly YELLOW='\033[1;33m'
|
||||
readonly CYAN='\033[0;36m'
|
||||
readonly NC='\033[0m'
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Default values
|
||||
# ---------------------------------------------------------------------------
|
||||
BUILD_TYPE="Release"
|
||||
CLEAN_BUILD=0
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parse arguments
|
||||
# ---------------------------------------------------------------------------
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
Release|release)
|
||||
BUILD_TYPE="Release"
|
||||
shift
|
||||
;;
|
||||
Debug|debug)
|
||||
BUILD_TYPE="Debug"
|
||||
shift
|
||||
;;
|
||||
clean|-clean|--clean)
|
||||
CLEAN_BUILD=1
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo -e "${RED}Unknown argument: $1${NC}"
|
||||
echo "Usage: $0 [Release|Debug] [clean]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Setup
|
||||
# ---------------------------------------------------------------------------
|
||||
readonly BUILD_DIR="build/linux/${BUILD_TYPE}"
|
||||
readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
echo -e "${GREEN}=== Building ${PROJECT_NAME} (${BUILD_TYPE}) ===${NC}"
|
||||
echo "Project: ${SCRIPT_DIR}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Clean if requested
|
||||
# ---------------------------------------------------------------------------
|
||||
if [[ $CLEAN_BUILD -eq 1 ]]; then
|
||||
echo -e "${YELLOW}Cleaning build directory...${NC}"
|
||||
rm -rf "${BUILD_DIR}"
|
||||
echo
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Create build directory
|
||||
# ---------------------------------------------------------------------------
|
||||
mkdir -p "${BUILD_DIR}"
|
||||
cd "${BUILD_DIR}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compiler selection (prefer gcc, fallback to clang)
|
||||
# ---------------------------------------------------------------------------
|
||||
echo -e "${YELLOW}Detecting compiler...${NC}"
|
||||
|
||||
CC_BINARY=""
|
||||
CC_NAME=""
|
||||
|
||||
if command -v gcc &> /dev/null; then
|
||||
CC_BINARY="gcc"
|
||||
CC_VERSION=$(gcc --version | head -n1)
|
||||
CC_NAME="GCC (${CC_VERSION})"
|
||||
echo -e " ${GREEN}[OK]${NC} Found GCC (preferred): ${CC_VERSION}"
|
||||
elif command -v clang &> /dev/null; then
|
||||
CC_BINARY="clang"
|
||||
CC_VERSION=$(clang --version | head -n1)
|
||||
CC_NAME="Clang (${CC_VERSION})"
|
||||
echo -e " ${YELLOW}[OK]${NC} Found Clang (fallback): ${CC_VERSION}"
|
||||
else
|
||||
echo -e "${RED}[FAIL] No suitable compiler found!${NC}"
|
||||
echo "Please install gcc or clang:"
|
||||
echo " Ubuntu/Debian: sudo apt install build-essential"
|
||||
echo " Fedora/RHEL: sudo dnf install gcc"
|
||||
echo " Arch: sudo pacman -S gcc"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Check dependencies
|
||||
# ---------------------------------------------------------------------------
|
||||
echo -e "${YELLOW}Checking dependencies...${NC}"
|
||||
|
||||
# Check for liburing
|
||||
HAVE_LIBURING=0
|
||||
if ldconfig -p | grep -q liburing 2>/dev/null; then
|
||||
HAVE_LIBURING=1
|
||||
echo -e " ${GREEN}[OK]${NC} Found liburing"
|
||||
elif pkg-config --exists liburing 2>/dev/null; then
|
||||
HAVE_LIBURING=1
|
||||
echo -e " ${GREEN}[OK]${NC} Found liburing (pkg-config)"
|
||||
elif [[ -f /usr/lib/liburing.so ]] || [[ -f /usr/lib64/liburing.so ]] || [[ -f /usr/local/lib/liburing.so ]]; then
|
||||
HAVE_LIBURING=1
|
||||
echo -e " ${GREEN}[OK]${NC} Found liburing (manual detection)"
|
||||
else
|
||||
echo -e "${RED}[FAIL] liburing not found!${NC}"
|
||||
echo "Please install liburing-dev:"
|
||||
echo " Ubuntu/Debian: sudo apt install liburing-dev"
|
||||
echo " Fedora/RHEL: sudo dnf install liburing-devel"
|
||||
echo " Arch: sudo pacman -S liburing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for pthreads
|
||||
# Check if pthreads is available (either in ldconfig or merged into libc)
|
||||
if ldconfig -p | grep -q libpthread 2>/dev/null || ldd --version | grep -qP '2\.(3[4-9]|[4-9][0-9])'; then
|
||||
echo -e " ${GREEN}[OK]${NC} Found pthreads (merged or standalone)"
|
||||
else
|
||||
echo -e " ${YELLOW}[NOTE]${NC} pthreads not found, attempting link"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generator selection (prefer ninja)
|
||||
# ---------------------------------------------------------------------------
|
||||
echo -e "${YELLOW}Selecting build system...${NC}"
|
||||
|
||||
GENERATOR=""
|
||||
GENERATOR_NAME=""
|
||||
|
||||
if command -v ninja &> /dev/null; then
|
||||
GENERATOR="Ninja"
|
||||
GENERATOR_NAME="Ninja"
|
||||
echo -e " ${GREEN}[OK]${NC} Using Ninja"
|
||||
elif command -v make &> /dev/null; then
|
||||
GENERATOR="Unix Makefiles"
|
||||
GENERATOR_NAME="Make"
|
||||
echo -e " ${YELLOW}[OK]${NC} Ninja not found, using Make"
|
||||
else
|
||||
echo -e "${RED}[FAIL] No build system found!${NC}"
|
||||
echo "Please install ninja or make:"
|
||||
echo " Ubuntu/Debian: sudo apt install ninja-build"
|
||||
echo " Fedora/RHEL: sudo dnf install ninja-build"
|
||||
echo " Arch: sudo pacman -S ninja"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configure
|
||||
# ---------------------------------------------------------------------------
|
||||
echo -e "${YELLOW}Configuring CMake...${NC}"
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# compile_commands.json logic
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
EXPORT_COMPILE_COMMANDS=OFF
|
||||
|
||||
if [[ "$BUILD_TYPE" == "Release" ]]; then
|
||||
if [[ -f "${SCRIPT_DIR}/compile_commands.json" ]]; then
|
||||
echo -e " compile_commands.json already exists - skipping generation"
|
||||
else
|
||||
echo -e " compile_commands.json will be generated"
|
||||
EXPORT_COMPILE_COMMANDS=ON
|
||||
fi
|
||||
fi
|
||||
|
||||
echo -e " Build type: ${BUILD_TYPE}"
|
||||
echo -e " Compiler: ${CC_NAME}"
|
||||
echo -e " Generator: ${GENERATOR_NAME}"
|
||||
|
||||
cmake "${SCRIPT_DIR}" \
|
||||
-G "${GENERATOR}" \
|
||||
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
|
||||
-DCMAKE_C_COMPILER="${CC_BINARY}" \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=${EXPORT_COMPILE_COMMANDS}
|
||||
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo -e "${RED}CMake configuration failed!${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}Configuration successful!${NC}"
|
||||
echo
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build
|
||||
# ---------------------------------------------------------------------------
|
||||
echo -e "${YELLOW}Building...${NC}"
|
||||
|
||||
# Get number of CPU cores
|
||||
if command -v nproc &> /dev/null; then
|
||||
CORES=$(nproc)
|
||||
else
|
||||
CORES=4
|
||||
fi
|
||||
|
||||
cmake --build . --config "${BUILD_TYPE}" --parallel "${CORES}"
|
||||
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo -e "${RED}Build failed!${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}Build successful!${NC}"
|
||||
echo
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Verify output
|
||||
# ---------------------------------------------------------------------------
|
||||
cd "${SCRIPT_DIR}"
|
||||
|
||||
if [[ -f "${BUILD_DIR}/${PROJECT_NAME}" ]]; then
|
||||
echo -e "${GREEN}Executable: ${BUILD_DIR}/${PROJECT_NAME}${NC}"
|
||||
|
||||
if command -v file &> /dev/null; then
|
||||
echo -e " Type: $(file -b ${BUILD_DIR}/${PROJECT_NAME})"
|
||||
fi
|
||||
|
||||
if command -v du &> /dev/null; then
|
||||
echo -e " Size: $(du -h ${BUILD_DIR}/${PROJECT_NAME} | cut -f1)"
|
||||
fi
|
||||
elif [[ -f "${BUILD_DIR}/${PROJECT_NAME}.exe" ]]; then
|
||||
echo -e "${GREEN}Executable: ${BUILD_DIR}/${PROJECT_NAME}.exe${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}Note: Could not locate executable${NC}"
|
||||
echo "Checking build directory:"
|
||||
find "${BUILD_DIR}" -type f -executable 2>/dev/null || echo " No executables found"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Summary
|
||||
# ---------------------------------------------------------------------------
|
||||
echo
|
||||
echo -e "${CYAN}=== Build Complete ===${NC}"
|
||||
echo
|
||||
echo -e "${YELLOW}Build Information:${NC}"
|
||||
echo -e " Configuration: ${BUILD_TYPE}"
|
||||
echo -e " Compiler: ${CC_NAME}"
|
||||
echo -e " Generator: ${GENERATOR_NAME}"
|
||||
echo -e " Output: ${BUILD_DIR}/"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Copy compile_commands.json for clangd
|
||||
# ---------------------------------------------------------------------------
|
||||
if [[ "${EXPORT_COMPILE_COMMANDS}" == "ON" ]]; then
|
||||
if [[ -f "${BUILD_DIR}/compile_commands.json" ]]; then
|
||||
echo -e " clangd: compile_commands.json generated"
|
||||
|
||||
cp "${BUILD_DIR}/compile_commands.json" "${SCRIPT_DIR}/compile_commands.json"
|
||||
echo -e " clangd: Copied to project root"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo
|
||||
echo -e "${GREEN}Ready to run: ./${BUILD_DIR}/${PROJECT_NAME}${NC}"
|
||||
@@ -14,7 +14,7 @@ v3.2: Making the lock free MPMC queue growable
|
||||
Add padding to avoir false sharing
|
||||
Add sleep() and SwitchToThread() to limit spinning
|
||||
|
||||
v3.3: Fix bug slots used before initialization,compare and swap is protecting updating committed, but it is not protecting the memory initialization. Adding atomic_flag commit_lock to protect against that
|
||||
v3.3: Fix bug slots used before initialization, compare and swap is protecting updating committed, but it is not protecting the memory initialization. Adding atomic_flag commit_lock to protect against that
|
||||
Fix bug multiple threads committing at the same time, fixed by using atomic_flag commit_lock and re-checking committed after acquiring the lock
|
||||
Reorder helper functions
|
||||
|
||||
@@ -49,3 +49,10 @@ Fixing user prompt parsing
|
||||
4.5: Porting to linux
|
||||
Reorganising the code
|
||||
Improving the scan function
|
||||
|
||||
5.0: Implementing the IO Ring for windows and ui_uring for linux instead of buffered hashing, huge performance gains. The IO Ring is event driven, thread local, uses DMA and direct disk I/O, bypassing the OS cache completely, registered buffers (and registered files in io_uring), it supports bashing multiple submissions and can handle multiple files at the same time.
|
||||
Hashing small files using XXH3_128bits() instead of the streaming pipeline(XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest()), this reduses the overhead of creating a state and digest, coupled with the IO Ring it improves the hashing of small files whose size is inferior to the size of IO Ring buffers
|
||||
fixing the xxh_x86dispatch warnings
|
||||
Updating the progress printing function
|
||||
Implementing a config file
|
||||
Writing the README file
|
||||
32
config.h
Normal file
32
config.h
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
#define FILE_HASHES_TXT "file_hashes.txt"
|
||||
|
||||
// Metadata selection
|
||||
#define FILE_TIMES 1 // created and modified time
|
||||
#define FILE_OWNER 1
|
||||
|
||||
#define MULTI_THREADING 1
|
||||
#define READ_BLOCK KiB(64)
|
||||
|
||||
// -------------------- IO Ring Configuration ----------------------
|
||||
#define USE_IORING 1
|
||||
|
||||
#if USE_IORING
|
||||
#define IORING_BUFFER_SIZE KiB(256)
|
||||
#define NUM_BUFFERS_PER_THREAD 32
|
||||
#define MAX_ACTIVE_FILES 16
|
||||
#define MAX_WAIT_COUNT (NUM_BUFFERS_PER_THREAD / 2)
|
||||
|
||||
#define SUBMIT_TIMEOUT_MS 10000
|
||||
#define IORING_DEBUG_PRINTS 0
|
||||
#define IORING_DEBUG_STATS 0
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define USE_REGISTERED_FILES 1
|
||||
|
||||
#elif defined(__linux__)
|
||||
#define USE_REGISTERED_FILES 1
|
||||
#define CHECK_FILE_SYSTEM 0
|
||||
|
||||
#endif
|
||||
#endif
|
||||
147
experiments/io_ring_test.c
Normal file
147
experiments/io_ring_test.c
Normal file
@@ -0,0 +1,147 @@
|
||||
#pragma once
|
||||
|
||||
#include <ioringapi.h>
|
||||
#include <ntioring_x.h>
|
||||
// #include "ioringapi.c"
|
||||
#include <winerror.h>
|
||||
|
||||
// Initialize I/O Ring
|
||||
HIORING io_ring_init(void) {
|
||||
|
||||
// if (!io_ring_load_functions()) {
|
||||
// printf("[I/O Ring] Failed to load functions\n");
|
||||
// return NULL;
|
||||
// }
|
||||
|
||||
IORING_CAPABILITIES caps;
|
||||
ZeroMemory(&caps, sizeof(caps));
|
||||
|
||||
HRESULT hr = QueryIoRingCapabilities(&caps);
|
||||
if (FAILED(hr)) {
|
||||
printf("[I/O Ring] QueryIoRingCapabilities failed: 0x%08lx\n", hr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// printf("[I/O Ring] MaxVersion=%d, MaxSubmission=%u, MaxCompletion=%u\n",
|
||||
// (int)caps.MaxVersion, caps.MaxSubmissionQueueSize,
|
||||
// caps.MaxCompletionQueueSize);
|
||||
|
||||
if (caps.MaxVersion < IORING_VERSION_1) {
|
||||
printf("[I/O Ring] Version too old\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IORING_CREATE_FLAGS flags = {0};
|
||||
HIORING ring = NULL;
|
||||
|
||||
// hr = CreateIoRing(IORING_VERSION_1, flags, 256, 512, &ring);
|
||||
hr = CreateIoRing(caps.MaxVersion, flags, 256, 512, &ring);
|
||||
if (FAILED(hr)) {
|
||||
printf("[I/O Ring] CreateIoRing failed: 0x%08lx\n", hr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// printf("[I/O Ring] Created successfully\n");
|
||||
|
||||
// Check if read operation is supported
|
||||
|
||||
// HRESULT io_ring_support = IsIoRingOpSupported(ring, IORING_OP_READ);
|
||||
// if (io_ring_support == S_FALSE) {
|
||||
// printf("[I/O Ring] Not supported, %ld /n", io_ring_support);
|
||||
// }
|
||||
|
||||
// Get ring info
|
||||
IORING_INFO info;
|
||||
ZeroMemory(&info, sizeof(info));
|
||||
GetIoRingInfo(ring, &info);
|
||||
// printf("[I/O Ring] Submission: %u, Completion: %u\n",
|
||||
// info.SubmissionQueueSize, info.CompletionQueueSize);
|
||||
|
||||
return ring;
|
||||
}
|
||||
|
||||
void io_ring_cleanup(HIORING ring) {
|
||||
if (ring) {
|
||||
CloseIoRing(ring);
|
||||
// printf("[I/O Ring] Closed\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Read file using I/O Ring
|
||||
int io_ring_read_file(HIORING ring, HANDLE hFile, void *buffer, DWORD size,
|
||||
UINT64 offset) {
|
||||
|
||||
IORING_HANDLE_REF file_ref = IoRingHandleRefFromHandle(hFile);
|
||||
IORING_BUFFER_REF buf_ref = IoRingBufferRefFromPointer(buffer);
|
||||
|
||||
HRESULT hr = BuildIoRingReadFile(ring, file_ref, buf_ref, size, offset,
|
||||
(UINT_PTR)buffer, IOSQE_FLAGS_NONE);
|
||||
|
||||
if (FAILED(hr))
|
||||
return -1;
|
||||
|
||||
UINT32 submitted = 0;
|
||||
hr = SubmitIoRing(ring, 1, INFINITE, &submitted);
|
||||
if (FAILED(hr) || submitted == 0)
|
||||
return -1;
|
||||
|
||||
for (;;) {
|
||||
IORING_CQE cqe;
|
||||
hr = PopIoRingCompletion(ring, &cqe);
|
||||
|
||||
if (FAILED(hr))
|
||||
continue;
|
||||
|
||||
if (cqe.UserData != (UINT_PTR)buffer)
|
||||
continue;
|
||||
|
||||
if (FAILED(cqe.ResultCode))
|
||||
return -1;
|
||||
|
||||
return (int)cqe.Information;
|
||||
}
|
||||
}
|
||||
|
||||
// Test function
|
||||
void test_io_ring(void) {
|
||||
printf("\n=== Testing I/O Ring ===\n");
|
||||
|
||||
HIORING ring = io_ring_init();
|
||||
if (!ring) {
|
||||
printf("I/O Ring not available\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Create test file
|
||||
HANDLE hFile = CreateFileA("test.txt", GENERIC_READ | GENERIC_WRITE, 0, NULL,
|
||||
CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (hFile != INVALID_HANDLE_VALUE) {
|
||||
char test_data[] =
|
||||
"Hello, I/O Ring! This is a test of the Windows I/O Ring API.";
|
||||
DWORD written;
|
||||
WriteFile(hFile, test_data, sizeof(test_data), &written, NULL);
|
||||
CloseHandle(hFile);
|
||||
}
|
||||
|
||||
// Read using I/O Ring
|
||||
hFile = CreateFileA("test.txt", GENERIC_READ, FILE_SHARE_READ, NULL,
|
||||
OPEN_EXISTING, FILE_FLAG_OVERLAPPED, NULL);
|
||||
if (hFile != INVALID_HANDLE_VALUE) {
|
||||
char buffer[512] = {0};
|
||||
int bytes = io_ring_read_file(ring, hFile, buffer, sizeof(buffer), 0);
|
||||
if (bytes > 0) {
|
||||
printf("Read %d bytes: %s\n", bytes, buffer);
|
||||
} else {
|
||||
printf("Failed to read file\n");
|
||||
}
|
||||
CloseHandle(hFile);
|
||||
} else {
|
||||
printf("Failed to open test file\n");
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
DeleteFileA("test.txt");
|
||||
io_ring_cleanup(ring);
|
||||
|
||||
printf("=== Test complete ===\n\n");
|
||||
}
|
||||
721
experiments/io_uring_test.c
Normal file
721
experiments/io_uring_test.c
Normal file
@@ -0,0 +1,721 @@
|
||||
/*
|
||||
# Compile
|
||||
gcc -o io_uring_test io_uring_test.c -luring
|
||||
|
||||
# Run
|
||||
./io_uring_test
|
||||
*/
|
||||
#include "base.h"
|
||||
#include <stdint.h>
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <liburing.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define TEST_FILE "test_io_uring.txt"
|
||||
#define BUFFER_SIZE 4096
|
||||
#define NUM_BUFFERS 4
|
||||
#define NUM_REGISTERED_FILES 8 // Maximum number of files to register
|
||||
|
||||
// Colors for output
|
||||
#define COLOR_GREEN "\033[0;32m"
|
||||
#define COLOR_RED "\033[0;31m"
|
||||
#define COLOR_YELLOW "\033[0;33m"
|
||||
#define COLOR_BLUE "\033[0;34m"
|
||||
#define COLOR_RESET "\033[0m"
|
||||
|
||||
// Test result tracking
|
||||
typedef struct {
|
||||
int passed;
|
||||
int failed;
|
||||
} TestResults;
|
||||
|
||||
static void print_success(const char *step) {
|
||||
printf(COLOR_GREEN "[✓] SUCCESS: %s" COLOR_RESET "\n", step);
|
||||
}
|
||||
|
||||
static void print_failure(const char *step, const char *error) {
|
||||
printf(COLOR_RED "[✗] FAILED: %s - %s" COLOR_RESET "\n", step, error);
|
||||
}
|
||||
|
||||
static void print_info(const char *msg) {
|
||||
printf(COLOR_BLUE "[i] INFO: %s" COLOR_RESET "\n", msg);
|
||||
}
|
||||
|
||||
static void print_step(const char *step) {
|
||||
printf(COLOR_YELLOW "\n>>> Testing: %s" COLOR_RESET "\n", step);
|
||||
}
|
||||
|
||||
// Create a test file with known content
|
||||
static int create_test_file(const char *filename, const char *content) {
|
||||
FILE *f = fopen(filename, "w");
|
||||
if (!f) {
|
||||
perror("Failed to create test file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(f, "%s", content);
|
||||
fclose(f);
|
||||
|
||||
printf(" Created test file: %s\n", filename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 1: Create io_uring instance
|
||||
static int test_io_uring_create(struct io_uring *ring, TestResults *results) {
|
||||
print_step("io_uring creation");
|
||||
|
||||
int ret = io_uring_queue_init(256, ring, 0);
|
||||
if (ret < 0) {
|
||||
print_failure("io_uring_queue_init", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("io_uring instance created");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 2: Register buffers
|
||||
static int test_register_buffers(struct io_uring *ring, void **buffers,
|
||||
struct iovec *iovs, TestResults *results) {
|
||||
print_step("Buffer registration");
|
||||
|
||||
// Allocate and prepare buffers
|
||||
size_t total_size = BUFFER_SIZE * NUM_BUFFERS;
|
||||
*buffers = aligned_alloc(4096, total_size); // Page-aligned for O_DIRECT
|
||||
if (!*buffers) {
|
||||
print_failure("Buffer allocation", strerror(errno));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize iovecs
|
||||
for (int i = 0; i < NUM_BUFFERS; i++) {
|
||||
iovs[i].iov_base = (char *)*buffers + (i * BUFFER_SIZE);
|
||||
iovs[i].iov_len = BUFFER_SIZE;
|
||||
memset(iovs[i].iov_base, 0, BUFFER_SIZE);
|
||||
}
|
||||
|
||||
int ret = io_uring_register_buffers(ring, iovs, NUM_BUFFERS);
|
||||
if (ret < 0) {
|
||||
print_failure("io_uring_register_buffers", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("Buffers registered successfully");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 2b: Register files
|
||||
static int test_register_files(struct io_uring *ring, int *fds, int num_fds,
|
||||
TestResults *results) {
|
||||
print_step("File registration");
|
||||
|
||||
if (num_fds == 0) {
|
||||
print_info("No files to register");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ret = io_uring_register_files(ring, fds, num_fds);
|
||||
if (ret < 0) {
|
||||
// File registration might not be supported on all kernels
|
||||
if (ret == -EOPNOTSUPP || ret == -EINVAL) {
|
||||
print_info("File registration not supported on this kernel, skipping");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
print_failure("io_uring_register_files", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf(" Registered %d files\n", num_fds);
|
||||
print_success("Files registered successfully");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 3: Open file
|
||||
static int test_open_file(const char *filename, int *fd, bool use_direct,
|
||||
TestResults *results) {
|
||||
print_step("File opening");
|
||||
|
||||
// Get file size
|
||||
struct stat st;
|
||||
if (stat(filename, &st) != 0) {
|
||||
print_failure("stat", strerror(errno));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int page_size = plat_get_pagesize();
|
||||
size_t file_size = st.st_size;
|
||||
|
||||
printf(" File: %s\n", filename);
|
||||
printf(" File size: %zu bytes\n", file_size);
|
||||
printf(" Page size: %d bytes\n", page_size);
|
||||
|
||||
if (file_size % page_size != 0) {
|
||||
printf(" Extending read size from %zu to %zu bytes\n", file_size,
|
||||
ALIGN_UP_POW2(file_size, page_size));
|
||||
}
|
||||
|
||||
// Try to open with specified flags
|
||||
int flags = O_RDONLY;
|
||||
if (use_direct) {
|
||||
flags |= O_DIRECT;
|
||||
}
|
||||
|
||||
*fd = open(filename, flags);
|
||||
if (*fd < 0) {
|
||||
if (use_direct) {
|
||||
print_info("O_DIRECT failed, trying without it");
|
||||
*fd = open(filename, O_RDONLY);
|
||||
if (*fd < 0) {
|
||||
print_failure("open", strerror(errno));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
print_info("Using buffered I/O (O_DIRECT not available)");
|
||||
} else {
|
||||
print_failure("open", strerror(errno));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
const char *io_type = use_direct ? "O_DIRECT" : "buffered I/O";
|
||||
printf(" File opened with %s\n", io_type);
|
||||
print_success("File opened successfully");
|
||||
}
|
||||
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 4: Build and submit read operation (using registered file)
|
||||
static int test_submit_read_registered(struct io_uring *ring, int file_index,
|
||||
struct iovec *iovs, int buffer_id,
|
||||
uint64_t user_data, size_t file_size,
|
||||
TestResults *results) {
|
||||
print_step("Building and submitting read operation (registered file)");
|
||||
|
||||
u32 page_size = plat_get_pagesize();
|
||||
size_t read_size = BUFFER_SIZE;
|
||||
|
||||
// For O_DIRECT, ensure read size is sector-aligned
|
||||
if (read_size > file_size) {
|
||||
read_size = ALIGN_UP_POW2(file_size, page_size);
|
||||
printf(" Adjusted read size to %zu bytes for O_DIRECT alignment\n",
|
||||
read_size);
|
||||
}
|
||||
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
print_failure("io_uring_get_sqe", "No available SQE");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Use fixed file descriptor
|
||||
io_uring_prep_read_fixed(sqe, file_index, iovs[buffer_id].iov_base, read_size,
|
||||
0, buffer_id);
|
||||
io_uring_sqe_set_data64(sqe, user_data);
|
||||
|
||||
int ret = io_uring_submit(ring);
|
||||
if (ret < 0) {
|
||||
print_failure("io_uring_submit", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf(" Using registered file index: %d\n", file_index);
|
||||
print_success("Read operation submitted successfully (registered file)");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 4b: Build and submit read operation (using fd directly)
|
||||
static int test_submit_read(struct io_uring *ring, int fd, struct iovec *iovs,
|
||||
int buffer_id, uint64_t user_data,
|
||||
TestResults *results) {
|
||||
print_step("Building and submitting read operation");
|
||||
|
||||
// Get file size for proper alignment
|
||||
struct stat st;
|
||||
if (fstat(fd, &st) != 0) {
|
||||
print_failure("fstat", strerror(errno));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
u32 page_size = plat_get_pagesize();
|
||||
size_t file_size = st.st_size;
|
||||
size_t read_size = BUFFER_SIZE;
|
||||
|
||||
// For O_DIRECT, ensure read size is sector-aligned
|
||||
if (read_size > file_size) {
|
||||
read_size = ALIGN_UP_POW2(file_size, page_size);
|
||||
printf(" Adjusted read size to %zu bytes for O_DIRECT alignment\n",
|
||||
read_size);
|
||||
}
|
||||
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
print_failure("io_uring_get_sqe", "No available SQE");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Prepare read operation using registered buffer
|
||||
io_uring_prep_read_fixed(sqe, fd, iovs[buffer_id].iov_base, read_size, 0,
|
||||
buffer_id);
|
||||
io_uring_sqe_set_data64(sqe, user_data);
|
||||
|
||||
int ret = io_uring_submit(ring);
|
||||
if (ret < 0) {
|
||||
print_failure("io_uring_submit", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("Read operation submitted successfully");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 5: Wait for completion
|
||||
static int test_wait_completion(struct io_uring *ring,
|
||||
struct io_uring_cqe **cqe,
|
||||
TestResults *results) {
|
||||
print_step("Waiting for completion");
|
||||
|
||||
int ret = io_uring_wait_cqe(ring, cqe);
|
||||
if (ret < 0) {
|
||||
print_failure("io_uring_wait_cqe", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("Completion received");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 6: Process completion
|
||||
static int test_process_completion(struct io_uring_cqe *cqe,
|
||||
uint64_t expected_user_data,
|
||||
TestResults *results) {
|
||||
print_step("Processing completion");
|
||||
|
||||
uint64_t user_data = io_uring_cqe_get_data64(cqe);
|
||||
int res = cqe->res;
|
||||
|
||||
printf(" Completion data:\n");
|
||||
printf(" User data: %lu (expected: %lu)\n", user_data, expected_user_data);
|
||||
printf(" Result: %d bytes read\n", res);
|
||||
|
||||
if (user_data != expected_user_data) {
|
||||
print_failure("User data mismatch",
|
||||
"User data doesn't match expected value");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (res < 0) {
|
||||
print_failure("Read operation", strerror(-res));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("Completion processed successfully");
|
||||
results->passed++;
|
||||
return res; // Return number of bytes read
|
||||
}
|
||||
|
||||
// Test 7: Verify read data
|
||||
static int test_verify_data(struct iovec *iovs, int buffer_id, int bytes_read,
|
||||
const char *expected_content,
|
||||
TestResults *results) {
|
||||
print_step("Data verification");
|
||||
|
||||
char *data = (char *)iovs[buffer_id].iov_base;
|
||||
|
||||
printf(" Read data (first 200 chars):\n");
|
||||
printf(" ---\n");
|
||||
for (int i = 0; i < bytes_read && i < 200; i++) {
|
||||
putchar(data[i]);
|
||||
}
|
||||
if (bytes_read > 200)
|
||||
printf("...");
|
||||
printf("\n ---\n");
|
||||
|
||||
// Check if data is not empty
|
||||
if (bytes_read == 0) {
|
||||
print_failure("Data verification", "No data read");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check if data contains expected content
|
||||
if (expected_content && strstr(data, expected_content) == NULL) {
|
||||
print_failure("Data verification", "Expected content not found");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("Data verified successfully");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 8: Test multiple concurrent reads
|
||||
static int test_concurrent_reads(struct io_uring *ring, int fd,
|
||||
struct iovec *iovs, TestResults *results) {
|
||||
print_step("Concurrent reads test");
|
||||
|
||||
int num_reads = 3;
|
||||
int submitted = 0;
|
||||
|
||||
// Submit multiple reads
|
||||
for (int i = 0; i < num_reads; i++) {
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
print_failure("Getting SQE for concurrent read", "No available SQE");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
off_t offset = i * 100; // Read from different offsets
|
||||
io_uring_prep_read_fixed(sqe, fd, iovs[i].iov_base, BUFFER_SIZE, offset, i);
|
||||
io_uring_sqe_set_data64(sqe, i);
|
||||
submitted++;
|
||||
}
|
||||
|
||||
int ret = io_uring_submit(ring);
|
||||
if (ret != submitted) {
|
||||
char msg[64];
|
||||
snprintf(msg, sizeof(msg), "Expected %d, got %d", submitted, ret);
|
||||
|
||||
print_failure("Submitting concurrent reads", msg);
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("Concurrent reads submitted");
|
||||
|
||||
// Wait for and process completions
|
||||
for (int i = 0; i < submitted; i++) {
|
||||
struct io_uring_cqe *cqe;
|
||||
ret = io_uring_wait_cqe(ring, &cqe);
|
||||
if (ret < 0) {
|
||||
print_failure("Waiting for concurrent read completion", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t user_data = io_uring_cqe_get_data64(cqe);
|
||||
int res = cqe->res;
|
||||
|
||||
printf(" Concurrent read %lu completed: %d bytes read\n", user_data, res);
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
}
|
||||
|
||||
print_success("Concurrent reads completed successfully");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 9: Test file registration with multiple files
|
||||
static int test_file_registration(struct io_uring *ring, TestResults *results) {
|
||||
print_step("File registration with multiple files");
|
||||
|
||||
// Create multiple test files
|
||||
const char *filenames[] = {"test_file1.txt", "test_file2.txt",
|
||||
"test_file3.txt"};
|
||||
const char *contents[] = {"Content of file 1: Hello World!",
|
||||
"Content of file 2: io_uring is fast!",
|
||||
"Content of file 3: Registered files test."};
|
||||
|
||||
int fds[3];
|
||||
int num_files = 3;
|
||||
|
||||
// Create and open files
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
if (create_test_file(filenames[i], contents[i]) != 0) {
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
fds[i] = open(filenames[i], O_RDONLY);
|
||||
if (fds[i] < 0) {
|
||||
print_failure("Opening file for registration", strerror(errno));
|
||||
// Close previously opened files
|
||||
for (int j = 0; j < i; j++)
|
||||
close(fds[j]);
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Register files
|
||||
int ret = io_uring_register_files(ring, fds, num_files);
|
||||
if (ret < 0) {
|
||||
if (ret == -EOPNOTSUPP || ret == -EINVAL) {
|
||||
print_info("File registration not supported, skipping test");
|
||||
results->passed++;
|
||||
} else {
|
||||
print_failure("io_uring_register_files", strerror(-ret));
|
||||
results->failed++;
|
||||
}
|
||||
// Cleanup
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
close(fds[i]);
|
||||
remove(filenames[i]);
|
||||
}
|
||||
return (ret == -EOPNOTSUPP || ret == -EINVAL) ? 0 : -1;
|
||||
}
|
||||
|
||||
print_success("Multiple files registered successfully");
|
||||
|
||||
// Read from each registered file using fixed operations
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
struct iovec iov;
|
||||
char buf[256] = {0};
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = sizeof(buf);
|
||||
|
||||
// Register a single buffer for this test
|
||||
ret = io_uring_register_buffers(ring, &iov, 1);
|
||||
if (ret < 0) {
|
||||
print_failure("Registering buffer for file test", strerror(-ret));
|
||||
break;
|
||||
}
|
||||
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
print_failure("Getting SQE for registered file", "No available SQE");
|
||||
break;
|
||||
}
|
||||
|
||||
// Use fixed file and fixed buffer
|
||||
io_uring_prep_read_fixed(sqe, i, iov.iov_base, strlen(contents[i]), 0, 0);
|
||||
io_uring_sqe_set_data64(sqe, i);
|
||||
|
||||
ret = io_uring_submit(ring);
|
||||
if (ret < 0) {
|
||||
print_failure("Submitting read for registered file", strerror(-ret));
|
||||
break;
|
||||
}
|
||||
|
||||
struct io_uring_cqe *cqe;
|
||||
ret = io_uring_wait_cqe(ring, &cqe);
|
||||
if (ret < 0) {
|
||||
print_failure("Waiting for registered file read", strerror(-ret));
|
||||
break;
|
||||
}
|
||||
|
||||
if (cqe->res < 0) {
|
||||
print_failure("Reading registered file", strerror(-cqe->res));
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
break;
|
||||
}
|
||||
|
||||
printf(" File %d: Read %d bytes: %.*s\n", i, cqe->res, cqe->res, buf);
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
|
||||
// Unregister buffer for next iteration
|
||||
io_uring_unregister_buffers(ring);
|
||||
}
|
||||
|
||||
// Cleanup files
|
||||
io_uring_unregister_files(ring);
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
close(fds[i]);
|
||||
remove(filenames[i]);
|
||||
}
|
||||
|
||||
print_success("File registration test completed");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Cleanup function
|
||||
static void cleanup(struct io_uring *ring, int *fds, int num_fds,
|
||||
void *buffers) {
|
||||
if (fds) {
|
||||
io_uring_unregister_files(ring);
|
||||
for (int i = 0; i < num_fds; i++) {
|
||||
if (fds[i] >= 0)
|
||||
close(fds[i]);
|
||||
}
|
||||
}
|
||||
if (buffers) {
|
||||
io_uring_unregister_buffers(ring);
|
||||
free(buffers);
|
||||
}
|
||||
io_uring_queue_exit(ring);
|
||||
remove(TEST_FILE);
|
||||
}
|
||||
|
||||
int main() {
|
||||
TestResults results = {0, 0};
|
||||
struct io_uring ring;
|
||||
int fd = -1;
|
||||
int registered_fds[1] = {-1}; // For registered file test
|
||||
void *buffers = NULL;
|
||||
struct iovec iovs[NUM_BUFFERS];
|
||||
|
||||
printf(COLOR_BLUE "\n========================================\n");
|
||||
printf(" io_uring Test Suite with File Registration\n");
|
||||
printf("========================================\n" COLOR_RESET);
|
||||
|
||||
// Create main test file
|
||||
const char *test_content =
|
||||
"Hello, io_uring! This is a test file for async I/O operations.\n"
|
||||
"Line 2: Testing reads with registered buffers.\n"
|
||||
"Line 3: The quick brown fox jumps over the lazy dog.\n"
|
||||
"Line 4: ABCDEFGHIJKLMNOPQRSTUVWXYZ\n"
|
||||
"Line 5: 0123456789\n";
|
||||
|
||||
if (create_test_file(TEST_FILE, test_content) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 1: Create io_uring
|
||||
if (test_io_uring_create(&ring, &results) != 0) {
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 2: Register buffers
|
||||
if (test_register_buffers(&ring, &buffers, iovs, &results) != 0) {
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 3: Open file
|
||||
if (test_open_file(TEST_FILE, &fd, true, &results) != 0) {
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 4: Submit read with direct fd
|
||||
uint64_t test_user_data = 12345;
|
||||
if (test_submit_read(&ring, fd, iovs, 0, test_user_data, &results) != 0) {
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 5: Wait for completion
|
||||
struct io_uring_cqe *cqe;
|
||||
if (test_wait_completion(&ring, &cqe, &results) != 0) {
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 6: Process completion
|
||||
int bytes_read = test_process_completion(cqe, test_user_data, &results);
|
||||
if (bytes_read < 0) {
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
|
||||
// Test 7: Verify data
|
||||
if (test_verify_data(iovs, 0, bytes_read, "io_uring", &results) != 0) {
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Close the file for file registration test
|
||||
close(fd);
|
||||
|
||||
// Reopen and register the file
|
||||
registered_fds[0] = open(TEST_FILE, O_RDONLY);
|
||||
if (registered_fds[0] < 0) {
|
||||
print_failure("Reopening file for registration", strerror(errno));
|
||||
cleanup(&ring, NULL, 0, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 2b: Register files
|
||||
if (test_register_files(&ring, registered_fds, 1, &results) != 0) {
|
||||
cleanup(&ring, registered_fds, 1, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Get file size for the registered read test
|
||||
struct stat st;
|
||||
stat(TEST_FILE, &st);
|
||||
|
||||
// Test 4b: Submit read using registered file
|
||||
test_user_data = 67890;
|
||||
if (test_submit_read_registered(&ring, 0, iovs, 0, test_user_data, st.st_size,
|
||||
&results) != 0) {
|
||||
cleanup(&ring, registered_fds, 1, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Wait for and process completion
|
||||
if (test_wait_completion(&ring, &cqe, &results) != 0) {
|
||||
cleanup(&ring, registered_fds, 1, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
bytes_read = test_process_completion(cqe, test_user_data, &results);
|
||||
if (bytes_read < 0) {
|
||||
cleanup(&ring, registered_fds, 1, buffers);
|
||||
return 1;
|
||||
}
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
|
||||
// Verify data from registered file read
|
||||
if (test_verify_data(iovs, 0, bytes_read, "io_uring", &results) != 0) {
|
||||
cleanup(&ring, registered_fds, 1, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 8: Concurrent reads
|
||||
if (test_concurrent_reads(&ring, registered_fds[0], iovs, &results) != 0) {
|
||||
cleanup(&ring, registered_fds, 1, buffers);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Test 9: File registration with multiple files (requires new ring)
|
||||
cleanup(&ring, registered_fds, 1, buffers);
|
||||
buffers = NULL;
|
||||
registered_fds[0] = -1;
|
||||
|
||||
if (test_io_uring_create(&ring, &results) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
test_file_registration(&ring, &results);
|
||||
|
||||
// Cleanup the second ring
|
||||
io_uring_queue_exit(&ring);
|
||||
|
||||
// Print summary
|
||||
printf(COLOR_BLUE "\n========================================\n");
|
||||
printf(" TEST SUMMARY\n");
|
||||
printf("========================================\n" COLOR_RESET);
|
||||
printf(" Total tests: %d\n", results.passed + results.failed);
|
||||
printf(COLOR_GREEN " Passed: %d\n" COLOR_RESET, results.passed);
|
||||
if (results.failed > 0) {
|
||||
printf(COLOR_RED " Failed: %d\n" COLOR_RESET, results.failed);
|
||||
} else {
|
||||
printf(COLOR_GREEN " ✓ ALL TESTS PASSED!\n" COLOR_RESET);
|
||||
}
|
||||
|
||||
return results.failed > 0 ? 1 : 0;
|
||||
}
|
||||
397
experiments/io_uring_test2.c
Normal file
397
experiments/io_uring_test2.c
Normal file
@@ -0,0 +1,397 @@
|
||||
/*
|
||||
# Compile
|
||||
gcc -o io_uring_test io_uring_test2.c -luring
|
||||
|
||||
# Run
|
||||
./io_uring_test
|
||||
*/
|
||||
#include "base.h"
|
||||
#include <stdint.h>
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <liburing.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define BUFFER_SIZE 4096
|
||||
#define NUM_BUFFERS 4
|
||||
#define NUM_REGISTERED_FILES 3 // Test with 3 files
|
||||
|
||||
// Colors for output
|
||||
#define COLOR_GREEN "\033[0;32m"
|
||||
#define COLOR_RED "\033[0;31m"
|
||||
#define COLOR_YELLOW "\033[0;33m"
|
||||
#define COLOR_BLUE "\033[0;34m"
|
||||
#define COLOR_RESET "\033[0m"
|
||||
|
||||
// Test result tracking
|
||||
typedef struct {
|
||||
int passed;
|
||||
int failed;
|
||||
} TestResults;
|
||||
|
||||
static void print_success(const char *step) {
|
||||
printf(COLOR_GREEN "[✓] SUCCESS: %s" COLOR_RESET "\n", step);
|
||||
}
|
||||
|
||||
static void print_failure(const char *step, const char *error) {
|
||||
printf(COLOR_RED "[✗] FAILED: %s - %s" COLOR_RESET "\n", step, error);
|
||||
}
|
||||
|
||||
static void print_info(const char *msg) {
|
||||
printf(COLOR_BLUE "[i] INFO: %s" COLOR_RESET "\n", msg);
|
||||
}
|
||||
|
||||
static void print_step(const char *step) {
|
||||
printf(COLOR_YELLOW "\n>>> Testing: %s" COLOR_RESET "\n", step);
|
||||
}
|
||||
|
||||
static int create_test_file(const char *filename, const char *content) {
|
||||
FILE *f = fopen(filename, "w");
|
||||
if (!f) {
|
||||
perror("Failed to create test file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(f, "%s", content);
|
||||
fclose(f);
|
||||
|
||||
printf(" Created test file: %s\n", filename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 1: Create io_uring instance
|
||||
static int test_io_uring_create(struct io_uring *ring, TestResults *results) {
|
||||
print_step("io_uring creation");
|
||||
|
||||
int ret = io_uring_queue_init(256, ring, 0);
|
||||
if (ret < 0) {
|
||||
print_failure("io_uring_queue_init", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("io_uring instance created");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 2: Register buffers
|
||||
static int test_register_buffers(struct io_uring *ring, void **buffers,
|
||||
struct iovec *iovs, TestResults *results) {
|
||||
print_step("Buffer registration");
|
||||
|
||||
size_t total_size = BUFFER_SIZE * NUM_BUFFERS;
|
||||
*buffers = aligned_alloc(4096, total_size);
|
||||
if (!*buffers) {
|
||||
print_failure("Buffer allocation", strerror(errno));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < NUM_BUFFERS; i++) {
|
||||
iovs[i].iov_base = (char *)*buffers + (i * BUFFER_SIZE);
|
||||
iovs[i].iov_len = BUFFER_SIZE;
|
||||
memset(iovs[i].iov_base, 0, BUFFER_SIZE);
|
||||
}
|
||||
|
||||
int ret = io_uring_register_buffers(ring, iovs, NUM_BUFFERS);
|
||||
if (ret < 0) {
|
||||
print_failure("io_uring_register_buffers", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
print_success("Buffers registered successfully");
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 3: Register files sparse (empty table)
|
||||
static int test_register_files_sparse(struct io_uring *ring, unsigned nr_files,
|
||||
TestResults *results) {
|
||||
print_step("Sparse file registration (empty table)");
|
||||
|
||||
int ret = io_uring_register_files_sparse(ring, nr_files);
|
||||
if (ret < 0) {
|
||||
if (ret == -EINVAL) {
|
||||
print_info(
|
||||
"io_uring_register_files_sparse not supported (kernel < 5.19)");
|
||||
print_info("Trying regular file registration with invalid fds...");
|
||||
|
||||
// Fallback: register with invalid fds
|
||||
int *invalid_fds = calloc(nr_files, sizeof(int));
|
||||
if (!invalid_fds) {
|
||||
print_failure("Allocating invalid fds array", "Out of memory");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nr_files; i++) {
|
||||
invalid_fds[i] = -1; // Mark all as invalid
|
||||
}
|
||||
|
||||
ret = io_uring_register_files(ring, invalid_fds, nr_files);
|
||||
free(invalid_fds);
|
||||
|
||||
if (ret < 0) {
|
||||
print_failure("Regular file registration also failed", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
print_success("File table registered (regular, with invalid fds)");
|
||||
} else {
|
||||
print_failure("io_uring_register_files_sparse", strerror(-ret));
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
printf(" Registered empty file table with %u slots\n", nr_files);
|
||||
print_success("Sparse file table created");
|
||||
}
|
||||
|
||||
results->passed++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Test 4: Update file slot and read from it
|
||||
static int test_file_read_loop(struct io_uring *ring, struct iovec *iovs,
|
||||
const char **filenames,
|
||||
const char **expected_contents, int num_files,
|
||||
TestResults *results) {
|
||||
print_step("File slot update and read loop");
|
||||
|
||||
int *fds = calloc(num_files, sizeof(int));
|
||||
if (!fds) {
|
||||
print_failure("Allocating fd array", "Out of memory");
|
||||
results->failed++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open all files first
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
fds[i] = open(filenames[i], O_RDONLY);
|
||||
if (fds[i] < 0) {
|
||||
print_failure("Opening file", filenames[i]);
|
||||
results->failed++;
|
||||
// Close already opened files
|
||||
for (int j = 0; j < i; j++)
|
||||
close(fds[j]);
|
||||
free(fds);
|
||||
return -1;
|
||||
}
|
||||
printf(" Opened %s (fd=%d)\n", filenames[i], fds[i]);
|
||||
}
|
||||
|
||||
// Test loop: update slot, submit read, verify
|
||||
for (int slot = 0; slot < num_files; slot++) {
|
||||
printf("\n --- Testing slot %d with file '%s' ---\n", slot,
|
||||
filenames[slot]);
|
||||
|
||||
// Update the file registration for this slot
|
||||
printf(" Updating slot %d with fd %d...\n", slot, fds[slot]);
|
||||
int ret = io_uring_register_files_update(ring, slot, &fds[slot], 1);
|
||||
|
||||
if (ret < 0) {
|
||||
print_failure("File registration update", strerror(-ret));
|
||||
results->failed++;
|
||||
continue;
|
||||
}
|
||||
printf(" Slot update result: %d (expected 1)\n", ret);
|
||||
|
||||
// Get file size for read size calculation
|
||||
struct stat st;
|
||||
if (fstat(fds[slot], &st) != 0) {
|
||||
print_failure("fstat", strerror(errno));
|
||||
results->failed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t file_size = st.st_size;
|
||||
size_t read_size = BUFFER_SIZE;
|
||||
|
||||
// Adjust read size for O_DIRECT if needed
|
||||
int page_size = plat_get_pagesize();
|
||||
if (read_size > file_size) {
|
||||
read_size = ALIGN_UP_POW2(file_size, page_size);
|
||||
}
|
||||
|
||||
printf(" File size: %zu, read size: %zu\n", file_size, read_size);
|
||||
|
||||
// Clear buffer for this test
|
||||
memset(iovs[0].iov_base, 0, BUFFER_SIZE);
|
||||
|
||||
// Submit read using registered file
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
print_failure("Getting SQE", "No available SQE");
|
||||
results->failed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Use slot index with fixed file flag
|
||||
io_uring_prep_read_fixed(sqe, slot, iovs[0].iov_base, read_size, 0, 0);
|
||||
sqe->flags |= IOSQE_FIXED_FILE;
|
||||
io_uring_sqe_set_data64(sqe, 100 + slot); // Unique user_data per slot
|
||||
|
||||
ret = io_uring_submit(ring);
|
||||
if (ret < 0) {
|
||||
print_failure("Submitting read", strerror(-ret));
|
||||
results->failed++;
|
||||
continue;
|
||||
}
|
||||
printf(" Submitted read (1 SQE)\n");
|
||||
|
||||
// Wait for completion
|
||||
struct io_uring_cqe *cqe;
|
||||
ret = io_uring_wait_cqe(ring, &cqe);
|
||||
if (ret < 0) {
|
||||
print_failure("Waiting for completion", strerror(-ret));
|
||||
results->failed++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process completion
|
||||
uint64_t user_data = io_uring_cqe_get_data64(cqe);
|
||||
int bytes_read = cqe->res;
|
||||
|
||||
printf(" Completion: user_data=%lu, result=%d\n", (unsigned long)user_data,
|
||||
bytes_read);
|
||||
|
||||
if (bytes_read < 0) {
|
||||
print_failure("Read operation", strerror(-bytes_read));
|
||||
results->failed++;
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (user_data != 100 + slot) {
|
||||
print_failure("User data mismatch", "Wrong user_data value");
|
||||
results->failed++;
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Verify the data
|
||||
char *data = (char *)iovs[0].iov_base;
|
||||
printf(" Data read (%d bytes): %.*s\n", bytes_read,
|
||||
bytes_read < 100 ? bytes_read : 100, data);
|
||||
|
||||
if (strstr(data, expected_contents[slot]) == NULL) {
|
||||
print_failure("Data verification",
|
||||
"Expected content not found in read data");
|
||||
results->failed++;
|
||||
} else {
|
||||
print_success("Data verified successfully");
|
||||
results->passed++;
|
||||
}
|
||||
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
|
||||
// Invalidate the slot after use (mark as -1)
|
||||
int invalid_fd = -1;
|
||||
ret = io_uring_register_files_update(ring, slot, &invalid_fd, 1);
|
||||
if (ret < 0) {
|
||||
printf(" Warning: Could not invalidate slot %d: %s\n", slot,
|
||||
strerror(-ret));
|
||||
}
|
||||
}
|
||||
|
||||
// Close all files
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
if (fds[i] >= 0)
|
||||
close(fds[i]);
|
||||
}
|
||||
free(fds);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main() {
|
||||
TestResults results = {0, 0};
|
||||
struct io_uring ring;
|
||||
void *buffers = NULL;
|
||||
struct iovec iovs[NUM_BUFFERS];
|
||||
|
||||
printf(COLOR_BLUE "\n========================================\n");
|
||||
printf(" io_uring Sparse File Registration Test\n");
|
||||
printf("========================================\n" COLOR_RESET);
|
||||
|
||||
// Define test files and their content
|
||||
const char *filenames[] = {"test_file_0.txt", "test_file_1.txt",
|
||||
"test_file_2.txt"};
|
||||
|
||||
const char *contents[] = {
|
||||
"This is file 0: Hello World! The quick brown fox jumps over the lazy "
|
||||
"dog.",
|
||||
"This is file 1: io_uring is awesome for async I/O operations!",
|
||||
"This is file 2: Testing sparse file registration with multiple files."};
|
||||
|
||||
const char *expected_substrings[] = {"Hello World", "io_uring is awesome",
|
||||
"sparse file registration"};
|
||||
|
||||
int num_files = 3;
|
||||
|
||||
// Create all test files
|
||||
print_info("Creating test files...");
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
if (create_test_file(filenames[i], contents[i]) != 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Test 1: Create io_uring
|
||||
if (test_io_uring_create(&ring, &results) != 0) {
|
||||
goto cleanup_files;
|
||||
}
|
||||
|
||||
// Test 2: Register buffers
|
||||
if (test_register_buffers(&ring, &buffers, iovs, &results) != 0) {
|
||||
io_uring_queue_exit(&ring);
|
||||
goto cleanup_files;
|
||||
}
|
||||
|
||||
// Test 3: Register empty file table (sparse)
|
||||
if (test_register_files_sparse(&ring, num_files, &results) != 0) {
|
||||
io_uring_unregister_buffers(&ring);
|
||||
free(buffers);
|
||||
io_uring_queue_exit(&ring);
|
||||
goto cleanup_files;
|
||||
}
|
||||
|
||||
// Test 4: Loop through files, update slots, read and verify
|
||||
test_file_read_loop(&ring, iovs, filenames, expected_substrings, num_files,
|
||||
&results);
|
||||
|
||||
// Cleanup
|
||||
io_uring_unregister_files(&ring);
|
||||
io_uring_unregister_buffers(&ring);
|
||||
free(buffers);
|
||||
io_uring_queue_exit(&ring);
|
||||
|
||||
cleanup_files:
|
||||
// Remove test files
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
remove(filenames[i]);
|
||||
}
|
||||
|
||||
// Print summary
|
||||
int total = results.passed + results.failed;
|
||||
printf(COLOR_BLUE "\n========================================\n");
|
||||
printf(" TEST SUMMARY\n");
|
||||
printf("========================================\n" COLOR_RESET);
|
||||
printf(" Total tests: %d\n", total);
|
||||
printf(COLOR_GREEN " Passed: %d\n" COLOR_RESET, results.passed);
|
||||
if (results.failed > 0) {
|
||||
printf(COLOR_RED " Failed: %d\n" COLOR_RESET, results.failed);
|
||||
printf(COLOR_RED "\n ✗ SOME TESTS FAILED!\n" COLOR_RESET);
|
||||
} else {
|
||||
printf(COLOR_GREEN "\n ✓ ALL TESTS PASSED!\n" COLOR_RESET);
|
||||
}
|
||||
|
||||
return results.failed > 0 ? 1 : 0;
|
||||
}
|
||||
285
experiments/ioringapi.c
Normal file
285
experiments/ioringapi.c
Normal file
@@ -0,0 +1,285 @@
|
||||
#pragma once
|
||||
#include <stdio.h>
|
||||
#include <windows.h>
|
||||
#include <winnt.h>
|
||||
|
||||
// Forward declarations
|
||||
typedef struct IORING_HANDLE_REF IORING_HANDLE_REF;
|
||||
typedef struct IORING_BUFFER_REF IORING_BUFFER_REF;
|
||||
typedef void *HIORING;
|
||||
|
||||
/* --------------------- Types declaration --------------------- */
|
||||
typedef enum IORING_CREATE_ADVISORY_FLAGS {
|
||||
IORING_CREATE_ADVISORY_FLAGS_NONE,
|
||||
IORING_CREATE_SKIP_BUILDER_PARAM_CHECKS
|
||||
} IORING_CREATE_ADVISORY_FLAGS;
|
||||
// Specifies advisory flags for creating an I/O ring with a call to
|
||||
// CreateIoRing.
|
||||
|
||||
typedef enum IORING_CREATE_REQUIRED_FLAGS {
|
||||
IORING_CREATE_REQUIRED_FLAGS_NONE
|
||||
} IORING_CREATE_REQUIRED_FLAGS;
|
||||
// Specifies required flags for creating an I/O ring with a call to
|
||||
// CreateIoRing.
|
||||
|
||||
typedef enum IORING_REF_KIND {
|
||||
IORING_REF_RAW = 0,
|
||||
IORING_REF_REGISTERED = 1,
|
||||
} IORING_REF_KIND;
|
||||
// Specifies the type of an IORING_HANDLE_REF structure.
|
||||
|
||||
typedef enum IORING_SQE_FLAGS {
|
||||
IOSQE_FLAGS_NONE,
|
||||
IOSQE_FLAGS_DRAIN_PRECEDING_OPS
|
||||
} IORING_SQE_FLAGS;
|
||||
// Specifies kernel behavior options for I/O ring submission queue entries
|
||||
|
||||
// IORING_REGISTERED_BUFFER structure
|
||||
typedef struct IORING_REGISTERED_BUFFER {
|
||||
UINT32 Index;
|
||||
UINT32 Offset;
|
||||
} IORING_REGISTERED_BUFFER;
|
||||
|
||||
// IORING_HANDLE_REF
|
||||
struct IORING_HANDLE_REF {
|
||||
IORING_REF_KIND Kind;
|
||||
union {
|
||||
HANDLE Handle;
|
||||
UINT32 Index;
|
||||
} HandleUnion;
|
||||
};
|
||||
// Represents a reference to a file handle used in an I/O ring operation
|
||||
|
||||
// IORING_BUFFER_REF
|
||||
struct IORING_BUFFER_REF {
|
||||
IORING_REF_KIND Kind;
|
||||
union {
|
||||
void *Address;
|
||||
IORING_REGISTERED_BUFFER IndexAndOffset;
|
||||
} BufferUnion;
|
||||
};
|
||||
|
||||
typedef struct IORING_BUFFER_INFO {
|
||||
void *Address;
|
||||
UINT32 Length;
|
||||
} IORING_BUFFER_INFO;
|
||||
|
||||
// IORING_BUFFER_REF represents a reference to a buffer used in an I/O ring
|
||||
// operation
|
||||
|
||||
// IORING_VERSION enumeration
|
||||
typedef enum IORING_VERSION {
|
||||
IORING_VERSION_INVALID = 0,
|
||||
IORING_VERSION_1 = 1,
|
||||
IORING_VERSION_2 = 2,
|
||||
IORING_VERSION_3 = 3,
|
||||
IORING_VERSION_4 = 4,
|
||||
} IORING_VERSION;
|
||||
|
||||
typedef enum IORING_FEATURE_FLAGS {
|
||||
IORING_FEATURE_FLAGS_NONE = 0,
|
||||
IORING_FEATURE_UM_EMULATION = 1
|
||||
} IORING_FEATURE_FLAGS;
|
||||
|
||||
// IORING_CAPABILITIES structure
|
||||
typedef struct IORING_CAPABILITIES {
|
||||
IORING_VERSION MaxVersion;
|
||||
UINT32 MaxSubmissionQueueSize;
|
||||
UINT32 MaxCompletionQueueSize;
|
||||
IORING_FEATURE_FLAGS FeatureFlags;
|
||||
} IORING_CAPABILITIES;
|
||||
// Represents the IORING API capabilities.
|
||||
|
||||
// IORING_CQE structure
|
||||
typedef struct IORING_CQE {
|
||||
UINT_PTR UserData;
|
||||
HRESULT ResultCode;
|
||||
ULONG_PTR Information;
|
||||
} IORING_CQE;
|
||||
// Represents a completed I/O ring queue entry.
|
||||
|
||||
// IORING_CREATE_FLAGS structure
|
||||
typedef struct IORING_CREATE_FLAGS {
|
||||
IORING_CREATE_REQUIRED_FLAGS Required;
|
||||
IORING_CREATE_ADVISORY_FLAGS Advisory;
|
||||
} IORING_CREATE_FLAGS;
|
||||
// Specifies flags for creating an I/O ring with a call to CreateIoRing.
|
||||
|
||||
// IORING_INFO structure
|
||||
typedef struct IORING_INFO {
|
||||
IORING_VERSION IoRingVersion;
|
||||
IORING_CREATE_FLAGS Flags;
|
||||
UINT32 SubmissionQueueSize;
|
||||
UINT32 CompletionQueueSize;
|
||||
} IORING_INFO;
|
||||
// Represents the shape and version information for the specified I/O ring
|
||||
|
||||
// IORING_OP_CODE for IsIoRingOpSupported
|
||||
typedef enum IORING_OP_CODE {
|
||||
IORING_OP_NOP = 0,
|
||||
IORING_OP_READ = 1,
|
||||
IORING_OP_WRITE = 2,
|
||||
IORING_OP_FLUSH = 3,
|
||||
IORING_OP_REGISTER_BUFFERS = 4,
|
||||
IORING_OP_REGISTER_FILES = 5,
|
||||
IORING_OP_CANCEL = 6,
|
||||
} IORING_OP_CODE;
|
||||
|
||||
/* --------------------- Dynamic loader --------------------- */
|
||||
// Function pointer types
|
||||
typedef BOOL(WINAPI *IsIoRingOpSupported_t)(HIORING, IORING_OP_CODE);
|
||||
typedef HRESULT(WINAPI *QueryIoRingCapabilities_t)(IORING_CAPABILITIES *);
|
||||
typedef HRESULT(WINAPI *GetIoRingInfo_t)(HIORING, IORING_INFO *);
|
||||
typedef HRESULT(WINAPI *CreateIoRing_t)(IORING_VERSION, IORING_CREATE_FLAGS,
|
||||
UINT32, UINT32, HIORING *);
|
||||
typedef HRESULT(WINAPI *CloseIoRing_t)(HIORING);
|
||||
typedef HRESULT(WINAPI *SubmitIoRing_t)(HIORING, UINT32, UINT32, UINT32 *);
|
||||
typedef HRESULT(WINAPI *PopIoRingCompletion_t)(HIORING, IORING_CQE *);
|
||||
typedef HRESULT(WINAPI *SetIoRingCompletionEvent_t)(HIORING, HANDLE);
|
||||
typedef HRESULT(WINAPI *BuildIoRingCancelRequest_t)(HIORING, IORING_HANDLE_REF,
|
||||
UINT_PTR, UINT_PTR);
|
||||
typedef HRESULT(WINAPI *BuildIoRingReadFile_t)(HIORING, IORING_HANDLE_REF,
|
||||
IORING_BUFFER_REF, UINT32,
|
||||
UINT64, UINT_PTR,
|
||||
IORING_SQE_FLAGS);
|
||||
typedef HRESULT(WINAPI *BuildIoRingRegisterBuffers_t)(
|
||||
HIORING, UINT32, IORING_BUFFER_INFO const[], UINT_PTR);
|
||||
|
||||
typedef HRESULT(WINAPI *BuildIoRingRegisterFileHandles_t)(HIORING, UINT32,
|
||||
HANDLE const[],
|
||||
UINT_PTR);
|
||||
|
||||
// Core:
|
||||
// Queries the support of the specified operation for the specified I/O ring
|
||||
static IsIoRingOpSupported_t IsIoRingOpSupported = NULL;
|
||||
|
||||
// Queries the OS for the supported capabilities for IORINGs
|
||||
static QueryIoRingCapabilities_t QueryIoRingCapabilities = NULL;
|
||||
|
||||
// Gets information about the API version and queue sizes of an I/O ring
|
||||
static GetIoRingInfo_t GetIoRingInfo = NULL;
|
||||
|
||||
// Creates a new instance of an I/O ring submission/completion queue pair and
|
||||
// returns a handle for referencing the I/O ring
|
||||
static CreateIoRing_t CreateIoRing = NULL;
|
||||
|
||||
// Closes an HIORING handle that was previously opened with a call to
|
||||
// CreateIoRing
|
||||
static CloseIoRing_t CloseIoRing = NULL;
|
||||
|
||||
// Submission / completion:
|
||||
// Submits all constructed but not yet submitted entries to the kernel’s queue
|
||||
// and optionally waits for a set of operations to complete
|
||||
static SubmitIoRing_t SubmitIoRing = NULL;
|
||||
|
||||
// Pops a single entry from the completion queue, if one is available
|
||||
static PopIoRingCompletion_t PopIoRingCompletion = NULL;
|
||||
|
||||
// Registers a completion queue event with an IORING
|
||||
static SetIoRingCompletionEvent_t SetIoRingCompletionEvent = NULL;
|
||||
|
||||
// Operations:
|
||||
// Performs an asynchronous read from a file using an I/O ring
|
||||
static BuildIoRingReadFile_t BuildIoRingReadFile = NULL;
|
||||
|
||||
// Attempts to cancel a previously submitted I/O ring operation
|
||||
static BuildIoRingCancelRequest_t BuildIoRingCancelRequest = NULL;
|
||||
|
||||
// Registers an array of buffers with the system for future I/O ring operations
|
||||
static BuildIoRingRegisterBuffers_t BuildIoRingRegisterBuffers = NULL;
|
||||
|
||||
// Registers an array of file handles with the system for future I/O ring
|
||||
// operations
|
||||
static BuildIoRingRegisterFileHandles_t BuildIoRingRegisterFileHandles = NULL;
|
||||
|
||||
static int io_ring_loaded = 0;
|
||||
|
||||
static int io_ring_load_functions(void) {
|
||||
if (io_ring_loaded)
|
||||
return 1;
|
||||
|
||||
HMODULE hKernel = GetModuleHandleW(L"kernel32.dll");
|
||||
if (!hKernel)
|
||||
return 0;
|
||||
|
||||
IsIoRingOpSupported =
|
||||
(IsIoRingOpSupported_t)GetProcAddress(hKernel, "IsIoRingOpSupported");
|
||||
QueryIoRingCapabilities = (QueryIoRingCapabilities_t)GetProcAddress(
|
||||
hKernel, "QueryIoRingCapabilities");
|
||||
GetIoRingInfo = (GetIoRingInfo_t)GetProcAddress(hKernel, "GetIoRingInfo");
|
||||
CreateIoRing = (CreateIoRing_t)GetProcAddress(hKernel, "CreateIoRing");
|
||||
CloseIoRing = (CloseIoRing_t)GetProcAddress(hKernel, "CloseIoRing");
|
||||
SubmitIoRing = (SubmitIoRing_t)GetProcAddress(hKernel, "SubmitIoRing");
|
||||
PopIoRingCompletion =
|
||||
(PopIoRingCompletion_t)GetProcAddress(hKernel, "PopIoRingCompletion");
|
||||
SetIoRingCompletionEvent = (SetIoRingCompletionEvent_t)GetProcAddress(
|
||||
hKernel, "SetIoRingCompletionEvent");
|
||||
BuildIoRingReadFile =
|
||||
(BuildIoRingReadFile_t)GetProcAddress(hKernel, "BuildIoRingReadFile");
|
||||
BuildIoRingCancelRequest = (BuildIoRingCancelRequest_t)GetProcAddress(
|
||||
hKernel, "BuildIoRingCancelRequest");
|
||||
BuildIoRingRegisterBuffers = (BuildIoRingRegisterBuffers_t)GetProcAddress(
|
||||
hKernel, "BuildIoRingRegisterBuffers");
|
||||
BuildIoRingRegisterFileHandles =
|
||||
(BuildIoRingRegisterFileHandles_t)GetProcAddress(
|
||||
hKernel, "BuildIoRingRegisterFileHandles");
|
||||
|
||||
io_ring_loaded =
|
||||
(IsIoRingOpSupported && QueryIoRingCapabilities && CreateIoRing &&
|
||||
CloseIoRing && SubmitIoRing && PopIoRingCompletion &&
|
||||
SetIoRingCompletionEvent && BuildIoRingReadFile &&
|
||||
BuildIoRingCancelRequest && BuildIoRingRegisterBuffers &&
|
||||
BuildIoRingRegisterFileHandles);
|
||||
|
||||
if (io_ring_loaded)
|
||||
printf("[I/O Ring] Functions loaded\n");
|
||||
else
|
||||
printf("[I/O Ring] Some functions not available\n");
|
||||
|
||||
return io_ring_loaded;
|
||||
}
|
||||
|
||||
/* ------------- Standard helper functions definition ------------- */
|
||||
// Creates an instance of the IORING_BUFFER_REF structure with the provided
|
||||
// buffer index and offset
|
||||
static inline IORING_BUFFER_REF
|
||||
IoRingBufferRefFromIndexAndOffset(UINT32 index, UINT32 offset) {
|
||||
IORING_BUFFER_REF ref;
|
||||
ref.Kind = IORING_REF_REGISTERED;
|
||||
ref.BufferUnion.IndexAndOffset.Index = index;
|
||||
ref.BufferUnion.IndexAndOffset.Offset = offset;
|
||||
return ref;
|
||||
}
|
||||
|
||||
// Creates an instance of the IORING_BUFFER_REF structure from the provided
|
||||
// pointer
|
||||
static IORING_BUFFER_REF IoRingBufferRefFromPointer(void *addr) {
|
||||
IORING_BUFFER_REF ref;
|
||||
ref.Kind = IORING_REF_RAW;
|
||||
ref.BufferUnion.Address = addr;
|
||||
return ref;
|
||||
}
|
||||
|
||||
// Creates an instance of the IORING_HANDLE_REF structure from the provided file
|
||||
// handle
|
||||
static IORING_HANDLE_REF IoRingHandleRefFromHandle(HANDLE h) {
|
||||
IORING_HANDLE_REF ref;
|
||||
ref.Kind = IORING_REF_RAW;
|
||||
ref.HandleUnion.Handle = h;
|
||||
return ref;
|
||||
}
|
||||
|
||||
// Creates an instance of the IORING_HANDLE_REF structure from the provided
|
||||
// index
|
||||
static inline IORING_HANDLE_REF IoRingHandleRefFromIndex(UINT32 index) {
|
||||
IORING_HANDLE_REF ref;
|
||||
ref.Kind = IORING_REF_REGISTERED; // MUST be registered
|
||||
ref.HandleUnion.Index = index;
|
||||
return ref;
|
||||
}
|
||||
|
||||
// NOTE: If you are using index-based buffers or handles, make sure you have
|
||||
// successfully called BuildIoRingRegisterBuffers or
|
||||
// BuildIoRingRegisterFileHandles first so the kernel has a valid table to look
|
||||
// into, otherwise the kernel will treat the index as an invalid memory
|
||||
// address/handle.
|
||||
@@ -32,7 +32,11 @@ int main(int argc, char **argv) {
|
||||
buf[strcspn(buf, "\r\n")] = 0;
|
||||
|
||||
if (buf[0] == 0) {
|
||||
strcpy(folders[0], ".");
|
||||
if (!platform_get_current_directory(folders[0], sizeof(folders[0]))) {
|
||||
fprintf(stderr, "Failed to get current directory\n");
|
||||
return 1;
|
||||
}
|
||||
normalize_path(folders[0]);
|
||||
folder_count = 1;
|
||||
} else {
|
||||
folder_count = parse_paths(buf, folders, 64);
|
||||
@@ -71,24 +75,60 @@ int main(int argc, char **argv) {
|
||||
.max_nbre_blocks = 1,
|
||||
};
|
||||
|
||||
mem_arena *gp_arena = arena_create(¶ms);
|
||||
arena_params params_caligned = {
|
||||
.reserve_size = GiB(1),
|
||||
.commit_size = MiB(16),
|
||||
.align = ARENA_CACHE_ALIGN,
|
||||
.push_size = 0,
|
||||
.allow_free_list = true,
|
||||
.allow_swapback = false,
|
||||
.growth_policy = ARENA_GROWTH_NORMAL,
|
||||
.commit_policy = ARENA_COMMIT_LAZY,
|
||||
.max_nbre_blocks = 1,
|
||||
};
|
||||
mem_arena *gp_arena = arena_create(¶ms_caligned);
|
||||
|
||||
// -------------------------------
|
||||
// Detect hardware threads
|
||||
// Detect hardware
|
||||
// -------------------------------
|
||||
// --- Windows: detect PHYSICAL cores (not logical threads) ---
|
||||
size_t hw_threads = platform_physical_cores();
|
||||
uint8_t cpu_cores = platform_physical_cores();
|
||||
|
||||
// Logical threads = CPU cores * 2
|
||||
size_t num_threads = hw_threads * 2;
|
||||
uint8_t cpu_threads = cpu_cores * 2;
|
||||
|
||||
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
|
||||
hw_threads);
|
||||
printf(" Selected instruction set: %s\n", get_xxhash_instruction_set());
|
||||
#if MULTI_THREADING
|
||||
uint8_t num_scan_threads = cpu_threads;
|
||||
uint8_t num_hash_threads = cpu_threads;
|
||||
|
||||
printf("%d cores %d threads CPU detected with %s instruction set\n"
|
||||
"Starting thread pool: %d scanning and %d hashing threads\n",
|
||||
cpu_cores, cpu_threads, get_xxhash_instruction_set(), num_scan_threads,
|
||||
num_hash_threads);
|
||||
#else
|
||||
uint32_t num_scan_threads = 1;
|
||||
uint32_t num_hash_threads = 1;
|
||||
|
||||
printf(
|
||||
"%d cores %d threads CPU detected with %s instruction set\n"
|
||||
"Starting thread pool: %d scanning and %d hashing threads(Debug mode)\n",
|
||||
cpu_cores, cpu_threads, get_xxhash_instruction_set(), num_scan_threads,
|
||||
num_hash_threads);
|
||||
|
||||
#endif
|
||||
|
||||
// Align IO Ring block size to the system page size
|
||||
g_pagesize = plat_get_pagesize();
|
||||
g_read_block = ALIGN_UP_POW2(READ_BLOCK, g_pagesize);
|
||||
#if USE_IORING
|
||||
g_ioring_buffer_size = ALIGN_UP_POW2(IORING_BUFFER_SIZE, g_pagesize);
|
||||
#endif
|
||||
|
||||
// -------------------------------
|
||||
// Scanning and hashing
|
||||
// -------------------------------
|
||||
|
||||
// test_io_ring();
|
||||
MPMCQueue dir_queue;
|
||||
mpmc_init(&dir_queue, MiB(1));
|
||||
|
||||
@@ -96,19 +136,23 @@ int main(int argc, char **argv) {
|
||||
mpmc_init(&file_queue, MiB(1));
|
||||
|
||||
// Starting hash threads
|
||||
size_t num_hash_threads = num_threads;
|
||||
|
||||
WorkerContext workers[num_hash_threads];
|
||||
HasherContext workers[num_hash_threads];
|
||||
Thread *hash_threads =
|
||||
arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true);
|
||||
|
||||
for (size_t i = 0; i < num_hash_threads; ++i) {
|
||||
for (uint8_t i = 0; i < num_hash_threads; ++i) {
|
||||
workers[i].arena = arena_create(¶ms);
|
||||
workers[i].file_queue = &file_queue;
|
||||
|
||||
#if USE_IORING
|
||||
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker_ioring,
|
||||
&workers[i]) != 0)
|
||||
#else
|
||||
if (thread_create(&hash_threads[i], (ThreadFunc)hash_worker, &workers[i]) !=
|
||||
0) {
|
||||
fprintf(stderr, "Failed to create hash thread %zu\n", i);
|
||||
0)
|
||||
#endif
|
||||
{
|
||||
fprintf(stderr, "Failed to create hash thread %d\n", i);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -122,22 +166,19 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
// Starting scan threads
|
||||
size_t num_scan_threads = num_threads;
|
||||
|
||||
ScannerContext scanners[num_scan_threads];
|
||||
Thread *scan_threads =
|
||||
arena_push(&gp_arena, sizeof(Thread) * num_scan_threads, true);
|
||||
|
||||
for (size_t i = 0; i < num_scan_threads; i++) {
|
||||
for (uint8_t i = 0; i < num_scan_threads; i++) {
|
||||
scanners[i].num_threads = num_scan_threads;
|
||||
scanners[i].path_arena = arena_create(¶ms);
|
||||
scanners[i].meta_arena = arena_create(¶ms);
|
||||
scanners[i].meta_arena = arena_create(¶ms_caligned);
|
||||
scanners[i].dir_queue = &dir_queue;
|
||||
scanners[i].file_queue = &file_queue;
|
||||
|
||||
if (thread_create(&scan_threads[i], (ThreadFunc)scan_worker,
|
||||
&scanners[i]) != 0) {
|
||||
fprintf(stderr, "Failed to create scan thread %zu\n", i);
|
||||
fprintf(stderr, "Failed to create scan thread %d\n", i);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -145,7 +186,7 @@ int main(int argc, char **argv) {
|
||||
// Initial folder push
|
||||
for (int i = 0; i < folder_count; i++) {
|
||||
size_t len = strlen(folders[i]) + 1;
|
||||
char *path = arena_push(&scanners[0].path_arena, len, false);
|
||||
char *path = arena_push(&scanners[0].meta_arena, len, false);
|
||||
memcpy(path, folders[i], len);
|
||||
mpmc_push_work(&dir_queue, path);
|
||||
}
|
||||
@@ -153,7 +194,7 @@ int main(int argc, char **argv) {
|
||||
// Stop scan threads
|
||||
thread_wait_multiple(scan_threads, num_scan_threads);
|
||||
|
||||
for (size_t i = 0; i < num_scan_threads; ++i) {
|
||||
for (uint8_t i = 0; i < num_scan_threads; ++i) {
|
||||
thread_close(&scan_threads[i]);
|
||||
}
|
||||
|
||||
@@ -180,7 +221,7 @@ int main(int argc, char **argv) {
|
||||
// Stop hashing threads
|
||||
thread_wait_multiple(hash_threads, num_hash_threads);
|
||||
|
||||
for (size_t i = 0; i < num_hash_threads; ++i) {
|
||||
for (uint8_t i = 0; i < num_hash_threads; ++i) {
|
||||
thread_close(&hash_threads[i]);
|
||||
}
|
||||
|
||||
@@ -197,7 +238,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
FILE *f = fopen(FILE_HASHES_TXT, "wb");
|
||||
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
for (uint8_t i = 0; i < num_hash_threads; i++) {
|
||||
mem_arena *arena = workers[i].arena;
|
||||
u8 *arena_base =
|
||||
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
|
||||
@@ -209,6 +250,15 @@ int main(int argc, char **argv) {
|
||||
// -------------------------------
|
||||
// Print summary
|
||||
// -------------------------------
|
||||
#if USE_IORING
|
||||
uint64_t incomplete = atomic_load(&g_io_ring_fallbacks);
|
||||
if (incomplete > 0) {
|
||||
printf("\nWARNING: I/O Ring incomplete files: %llu (fallback to buffered "
|
||||
"I/O used)\n",
|
||||
(unsigned long long)incomplete);
|
||||
}
|
||||
#endif
|
||||
|
||||
double total_seconds = timer_elapsed(&total_timer);
|
||||
|
||||
printf("Completed hashing %zu files\n", total_found);
|
||||
|
||||
55
lf_mpmc.h
55
lf_mpmc.h
@@ -23,9 +23,6 @@ static void cpu_pause(void) {
|
||||
_mm_pause();
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct plat_sem plat_sem;
|
||||
|
||||
typedef struct CACHE_ALIGN {
|
||||
atomic_size_t seq;
|
||||
void *data;
|
||||
@@ -45,8 +42,6 @@ typedef struct {
|
||||
size_t commit_step;
|
||||
atomic_flag commit_lock;
|
||||
|
||||
plat_sem items_sem;
|
||||
|
||||
MPMCSlot *slots;
|
||||
} MPMCQueue;
|
||||
|
||||
@@ -94,8 +89,6 @@ static void mpmc_init(MPMCQueue *q, size_t max_capacity) {
|
||||
atomic_init(&q->head, 0);
|
||||
atomic_init(&q->tail, 0);
|
||||
atomic_init(&q->work_count, 0);
|
||||
|
||||
plat_sem_init(&q->items_sem, 0);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
@@ -141,7 +134,6 @@ static void mpmc_commit_more(MPMCQueue *q) {
|
||||
/* ----------------------------------------------------------- */
|
||||
/* PUSH */
|
||||
/* ----------------------------------------------------------- */
|
||||
// Does not increment work
|
||||
static void mpmc_push(MPMCQueue *q, void *item) {
|
||||
MPMCSlot *slot;
|
||||
size_t pos;
|
||||
@@ -184,8 +176,6 @@ static void mpmc_push(MPMCQueue *q, void *item) {
|
||||
slot->data = item;
|
||||
|
||||
atomic_store_explicit(&slot->seq, pos + 1, memory_order_release);
|
||||
|
||||
plat_sem_post(&q->items_sem, 1);
|
||||
}
|
||||
|
||||
// Increment work
|
||||
@@ -233,15 +223,11 @@ static void mpmc_push_work(MPMCQueue *q, void *item) {
|
||||
atomic_store_explicit(&slot->seq, pos + 1, memory_order_release);
|
||||
|
||||
atomic_fetch_add(&q->work_count, 1);
|
||||
plat_sem_post(&q->items_sem, 1);
|
||||
}
|
||||
/* ----------------------------------------------------------- */
|
||||
/* POP */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void *mpmc_pop(MPMCQueue *q) {
|
||||
|
||||
plat_sem_wait(&q->items_sem);
|
||||
|
||||
MPMCSlot *slot;
|
||||
size_t pos;
|
||||
|
||||
@@ -262,9 +248,14 @@ static void *mpmc_pop(MPMCQueue *q) {
|
||||
memory_order_relaxed))
|
||||
break;
|
||||
|
||||
} else if (diff < 0) { // queue is empty
|
||||
|
||||
Sleep(500);
|
||||
|
||||
} else { // slot is still transitioning (written by another thread)
|
||||
|
||||
if (++spins > 10) {
|
||||
sleep_ms(0); // yield CPU
|
||||
SwitchToThread(); // yield CPU
|
||||
spins = 0;
|
||||
} else {
|
||||
cpu_pause();
|
||||
@@ -306,21 +297,19 @@ static void mpmc_task_done(MPMCQueue *q, u8 consumer_count) {
|
||||
/* ----------------------------------------------------------- */
|
||||
/* MPMC Cleanup */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_finish(MPMCQueue *q) {
|
||||
if (!q)
|
||||
return;
|
||||
|
||||
if (q->slots) {
|
||||
plat_mem_release(q->slots, 0);
|
||||
q->slots = NULL;
|
||||
}
|
||||
|
||||
plat_sem_destroy(&q->items_sem);
|
||||
|
||||
q->capacity = 0;
|
||||
q->mask = 0;
|
||||
|
||||
atomic_store_explicit(&q->head, 0, memory_order_relaxed);
|
||||
atomic_store_explicit(&q->tail, 0, memory_order_relaxed);
|
||||
atomic_store_explicit(&q->committed, 0, memory_order_relaxed);
|
||||
}
|
||||
// static void mpmc_finish(MPMCQueue *q) { // Comment to prevent warning: unused function
|
||||
// if (!q)
|
||||
// return;
|
||||
//
|
||||
// if (q->slots) {
|
||||
// plat_mem_release(q->slots, 0);
|
||||
// q->slots = NULL;
|
||||
// }
|
||||
//
|
||||
// q->capacity = 0;
|
||||
// q->mask = 0;
|
||||
//
|
||||
// atomic_store_explicit(&q->head, 0, memory_order_relaxed);
|
||||
// atomic_store_explicit(&q->tail, 0, memory_order_relaxed);
|
||||
// atomic_store_explicit(&q->committed, 0, memory_order_relaxed);
|
||||
// }
|
||||
|
||||
254
mt_mpmc.h
Normal file
254
mt_mpmc.h
Normal file
@@ -0,0 +1,254 @@
|
||||
#pragma once
|
||||
|
||||
#include "base.h"
|
||||
|
||||
// Cache align abstraction
|
||||
#define CACHELINE 64
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define CACHE_ALIGN __declspec(align(CACHELINE))
|
||||
#else
|
||||
#define CACHE_ALIGN __attribute__((aligned(CACHELINE)))
|
||||
#endif
|
||||
|
||||
// Mutex/Critical section abstraction
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
typedef CRITICAL_SECTION mtx_t;
|
||||
typedef CONDITION_VARIABLE cond_t;
|
||||
|
||||
#define mtx_init(m) InitializeCriticalSection(m)
|
||||
#define mtx_lock(m) EnterCriticalSection(m)
|
||||
#define mtx_unlock(m) LeaveCriticalSection(m)
|
||||
#define mtx_destroy(m) DeleteCriticalSection(m)
|
||||
|
||||
#define cond_init(c) InitializeConditionVariable(c)
|
||||
#define cond_wait(c, m) SleepConditionVariableCS(c, m, INFINITE)
|
||||
#define cond_signal(c) WakeConditionVariable(c)
|
||||
#define cond_broadcast(c) WakeAllConditionVariable(c)
|
||||
|
||||
#else
|
||||
#include <pthread.h>
|
||||
typedef pthread_mutex_t mtx_t;
|
||||
typedef pthread_cond_t cond_t;
|
||||
|
||||
#define mtx_init(m) pthread_mutex_init(m, NULL)
|
||||
#define mtx_lock(m) pthread_mutex_lock(m)
|
||||
#define mtx_unlock(m) pthread_mutex_unlock(m)
|
||||
#define mtx_destroy(m) pthread_mutex_destroy(m)
|
||||
|
||||
#define cond_init(c) pthread_cond_init(c, NULL)
|
||||
#define cond_wait(c, m) pthread_cond_wait(c, m)
|
||||
#define cond_signal(c) pthread_cond_signal(c)
|
||||
#define cond_broadcast(c) pthread_cond_broadcast(c)
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct CACHE_ALIGN {
|
||||
void *data;
|
||||
char pad[64 - sizeof(void *)];
|
||||
} MPMCSlot;
|
||||
|
||||
typedef struct {
|
||||
CACHE_ALIGN size_t head;
|
||||
CACHE_ALIGN size_t tail;
|
||||
|
||||
CACHE_ALIGN size_t count;
|
||||
CACHE_ALIGN size_t work_count;
|
||||
|
||||
size_t capacity;
|
||||
size_t mask;
|
||||
|
||||
size_t committed;
|
||||
size_t commit_step;
|
||||
|
||||
mtx_t lock;
|
||||
cond_t not_empty;
|
||||
cond_t not_full;
|
||||
|
||||
MPMCSlot *slots;
|
||||
} MPMCQueue;
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* INIT */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_init(MPMCQueue *q, size_t max_capacity) {
|
||||
q->capacity = max_capacity;
|
||||
q->mask = max_capacity - 1;
|
||||
|
||||
size_t pagesize = plat_get_pagesize();
|
||||
size_t bytes = ALIGN_UP_POW2(sizeof(MPMCSlot) * max_capacity, pagesize);
|
||||
|
||||
q->slots = (MPMCSlot *)plat_mem_reserve(bytes);
|
||||
if (!q->slots) {
|
||||
fprintf(stderr, "plat_mem_reserve failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t commit_bytes = ALIGN_UP_POW2(pagesize, pagesize);
|
||||
q->commit_step = commit_bytes / sizeof(MPMCSlot);
|
||||
|
||||
q->committed = q->commit_step;
|
||||
plat_mem_commit(q->slots, commit_bytes);
|
||||
|
||||
for (size_t i = 0; i < q->committed; i++) {
|
||||
q->slots[i].data = NULL;
|
||||
}
|
||||
|
||||
q->head = 0;
|
||||
q->tail = 0;
|
||||
q->count = 0;
|
||||
q->work_count = 0;
|
||||
|
||||
mtx_init(&q->lock);
|
||||
cond_init(&q->not_empty);
|
||||
cond_init(&q->not_full);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* COMMIT MORE MEMORY */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_commit_more(MPMCQueue *q) {
|
||||
size_t start = q->committed;
|
||||
|
||||
if (start >= q->capacity)
|
||||
return;
|
||||
|
||||
size_t new_commit = start + q->commit_step;
|
||||
if (new_commit > q->capacity)
|
||||
new_commit = q->capacity;
|
||||
|
||||
size_t count = new_commit - start;
|
||||
|
||||
plat_mem_commit(&q->slots[start], count * sizeof(MPMCSlot));
|
||||
|
||||
for (size_t i = start; i < new_commit; i++) {
|
||||
q->slots[i].data = NULL;
|
||||
}
|
||||
|
||||
q->committed = new_commit;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* PUSH */
|
||||
/* ----------------------------------------------------------- */
|
||||
// Does not increment work
|
||||
static void mpmc_push(MPMCQueue *q, void *item) {
|
||||
mtx_lock(&q->lock);
|
||||
|
||||
while (q->count == q->capacity) {
|
||||
cond_wait(&q->not_full, &q->lock);
|
||||
}
|
||||
|
||||
// Ensure committed
|
||||
if (q->tail >= q->committed) {
|
||||
mpmc_commit_more(q);
|
||||
}
|
||||
|
||||
size_t pos = q->tail & q->mask;
|
||||
|
||||
q->slots[pos].data = item;
|
||||
q->tail++;
|
||||
q->count++;
|
||||
|
||||
cond_signal(&q->not_empty);
|
||||
mtx_unlock(&q->lock);
|
||||
}
|
||||
|
||||
// Increment work
|
||||
static void mpmc_push_work(MPMCQueue *q, void *item) {
|
||||
mtx_lock(&q->lock);
|
||||
|
||||
while (q->count == q->capacity) {
|
||||
cond_wait(&q->not_full, &q->lock);
|
||||
}
|
||||
|
||||
if (q->tail >= q->committed) {
|
||||
mpmc_commit_more(q);
|
||||
}
|
||||
|
||||
size_t pos = q->tail & q->mask;
|
||||
|
||||
q->slots[pos].data = item;
|
||||
q->tail++;
|
||||
q->count++;
|
||||
q->work_count++;
|
||||
|
||||
cond_signal(&q->not_empty);
|
||||
mtx_unlock(&q->lock);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* POP */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void *mpmc_pop(MPMCQueue *q) {
|
||||
mtx_lock(&q->lock);
|
||||
|
||||
while (q->count == 0) {
|
||||
cond_wait(&q->not_empty, &q->lock);
|
||||
}
|
||||
|
||||
size_t pos = q->head & q->mask;
|
||||
|
||||
void *data = q->slots[pos].data;
|
||||
|
||||
q->head++;
|
||||
q->count--;
|
||||
|
||||
cond_signal(&q->not_full);
|
||||
mtx_unlock(&q->lock);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* PUSH POISON */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_producers_finished(MPMCQueue *q, u8 consumer_count) {
|
||||
for (u8 i = 0; i < consumer_count; i++) {
|
||||
mpmc_push(q, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* Done */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_task_done(MPMCQueue *q, u8 consumer_count) {
|
||||
|
||||
bool finished = false;
|
||||
|
||||
mtx_lock(&q->lock);
|
||||
|
||||
if (--q->work_count == 0) {
|
||||
finished = true;
|
||||
}
|
||||
|
||||
mtx_unlock(&q->lock);
|
||||
|
||||
if (finished) {
|
||||
mpmc_producers_finished(q, consumer_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* MPMC Cleanup */
|
||||
/* ----------------------------------------------------------- */
|
||||
// static void mpmc_finish(MPMCQueue *q) { // Comment to prevent warning: unused
|
||||
// function
|
||||
// if (!q) return;
|
||||
//
|
||||
// if (q->slots) {
|
||||
// plat_mem_release(q->slots, 0);
|
||||
// q->slots = NULL;
|
||||
// }
|
||||
//
|
||||
// mtx_destroy(&q->lock);
|
||||
//
|
||||
// #if !defined(_WIN32)
|
||||
// pthread_cond_destroy(&q->not_empty);
|
||||
// pthread_cond_destroy(&q->not_full);
|
||||
// #endif
|
||||
//
|
||||
// q->capacity = 0;
|
||||
// q->mask = 0;
|
||||
// }
|
||||
2187
platform.c
2187
platform.c
File diff suppressed because it is too large
Load Diff
388
sm_mpmc.h
Normal file
388
sm_mpmc.h
Normal file
@@ -0,0 +1,388 @@
|
||||
#pragma once
|
||||
|
||||
#include "base.h"
|
||||
|
||||
// Cache align abstraction
|
||||
#define CACHELINE 64
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define CACHE_ALIGN __declspec(align(CACHELINE))
|
||||
#else
|
||||
#define CACHE_ALIGN __attribute__((aligned(CACHELINE)))
|
||||
#endif
|
||||
|
||||
// Compiler hints
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define likely(x) __builtin_expect((x), 1)
|
||||
#define unlikely(x) __builtin_expect((x), 0)
|
||||
#else
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif
|
||||
|
||||
static void cpu_pause(void) {
|
||||
#if defined(_MSC_VER) || defined(__x86_64__) || defined(__i386__)
|
||||
_mm_pause();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Semaphores
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
typedef struct plat_sem {
|
||||
HANDLE handle;
|
||||
} plat_sem;
|
||||
|
||||
static b32 plat_sem_init(plat_sem *s, u32 initial) {
|
||||
s->handle = CreateSemaphore(NULL, initial, LONG_MAX, NULL);
|
||||
return s->handle != NULL;
|
||||
}
|
||||
|
||||
static void plat_sem_wait(plat_sem *s) {
|
||||
WaitForSingleObject(s->handle, INFINITE);
|
||||
}
|
||||
|
||||
// static b32 plat_sem_trywait(HANDLE sem) { // Comment to prevent warning: unused function
|
||||
// DWORD r = WaitForSingleObject(sem, 0);
|
||||
// return r == WAIT_OBJECT_0;
|
||||
// }
|
||||
|
||||
static void plat_sem_post(plat_sem *s, u32 count) {
|
||||
ReleaseSemaphore(s->handle, count, NULL);
|
||||
}
|
||||
|
||||
// static void plat_sem_destroy(plat_sem *s) { // Comment to prevent warning: unused function
|
||||
// if (s->handle) {
|
||||
// CloseHandle(s->handle);
|
||||
// s->handle = NULL;
|
||||
// }
|
||||
// }
|
||||
|
||||
#elif defined(__linux__)
|
||||
#include <semaphore.h>
|
||||
|
||||
typedef struct plat_sem {
|
||||
sem_t sem;
|
||||
} plat_sem;
|
||||
|
||||
static b32 plat_sem_init(plat_sem *s, u32 initial) {
|
||||
return sem_init(&s->sem, 0, initial) == 0;
|
||||
}
|
||||
|
||||
static void plat_sem_wait(plat_sem *s) {
|
||||
while (sem_wait(&s->sem) == -1 && errno == EINTR) {
|
||||
}
|
||||
}
|
||||
|
||||
// static b32 plat_sem_trywait(sem_t *sem) { return sem_trywait(sem) == 0; } // Comment to prevent warning: unused function
|
||||
|
||||
static void plat_sem_post(plat_sem *s, u32 count) {
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
sem_post(&s->sem);
|
||||
}
|
||||
}
|
||||
|
||||
// static void plat_sem_destroy(plat_sem *s) { sem_destroy(&s->sem); } // Comment to prevent warning: unused function
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct plat_sem plat_sem;
|
||||
|
||||
typedef struct CACHE_ALIGN {
|
||||
atomic_size_t seq;
|
||||
void *data;
|
||||
char pad[64 - sizeof(atomic_size_t) - sizeof(void *)];
|
||||
} MPMCSlot;
|
||||
|
||||
typedef struct {
|
||||
CACHE_ALIGN atomic_size_t head;
|
||||
CACHE_ALIGN atomic_size_t tail;
|
||||
|
||||
CACHE_ALIGN atomic_size_t work_count;
|
||||
|
||||
size_t capacity;
|
||||
size_t mask;
|
||||
|
||||
atomic_size_t committed;
|
||||
size_t commit_step;
|
||||
atomic_flag commit_lock;
|
||||
|
||||
plat_sem items_sem;
|
||||
|
||||
MPMCSlot *slots;
|
||||
} MPMCQueue;
|
||||
|
||||
// --------------- functions ----------------
|
||||
// static: each translation unit gets its own private copy this will solve the
|
||||
// error: Function defined in a header file; function definitions in header
|
||||
// files can lead to ODR violations (multiple definition errors if included in
|
||||
// more than one file)
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* INIT */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_init(MPMCQueue *q, size_t max_capacity) {
|
||||
|
||||
q->capacity = max_capacity;
|
||||
q->mask = max_capacity - 1;
|
||||
|
||||
u32 pagesize = plat_get_pagesize();
|
||||
|
||||
size_t bytes = ALIGN_UP_POW2(sizeof(MPMCSlot) * max_capacity, pagesize);
|
||||
|
||||
q->slots = (MPMCSlot *)plat_mem_reserve(bytes);
|
||||
|
||||
if (!q->slots) {
|
||||
fprintf(stderr, "VirtualAlloc reserve failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
u64 commit_bytes = pagesize;
|
||||
commit_bytes = ALIGN_UP_POW2(commit_bytes, pagesize);
|
||||
|
||||
q->commit_step = commit_bytes / sizeof(MPMCSlot);
|
||||
|
||||
atomic_flag_clear(&q->commit_lock);
|
||||
|
||||
q->committed = q->commit_step;
|
||||
|
||||
plat_mem_commit(q->slots, commit_bytes);
|
||||
|
||||
for (size_t i = 0; i < q->committed; i++) {
|
||||
atomic_init(&q->slots[i].seq, i);
|
||||
q->slots[i].data = NULL;
|
||||
}
|
||||
|
||||
atomic_init(&q->head, 0);
|
||||
atomic_init(&q->tail, 0);
|
||||
atomic_init(&q->work_count, 0);
|
||||
|
||||
plat_sem_init(&q->items_sem, 0);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* COMMIT MORE MEMORY */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_commit_more(MPMCQueue *q) {
|
||||
|
||||
if (atomic_flag_test_and_set(&q->commit_lock))
|
||||
return;
|
||||
|
||||
size_t start = atomic_load_explicit(&q->committed, memory_order_acquire);
|
||||
size_t tail = atomic_load_explicit(&q->tail, memory_order_relaxed);
|
||||
|
||||
// another thread already committed enough
|
||||
if (tail < start) {
|
||||
atomic_flag_clear(&q->commit_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (start >= q->capacity) {
|
||||
atomic_flag_clear(&q->commit_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t new_commit = start + q->commit_step;
|
||||
if (new_commit > q->capacity)
|
||||
new_commit = q->capacity;
|
||||
|
||||
size_t count = new_commit - start;
|
||||
|
||||
plat_mem_commit(&q->slots[start], count * sizeof(MPMCSlot));
|
||||
|
||||
for (size_t i = start; i < new_commit; i++) {
|
||||
atomic_init(&q->slots[i].seq, i);
|
||||
q->slots[i].data = NULL;
|
||||
}
|
||||
|
||||
atomic_store_explicit(&q->committed, new_commit, memory_order_release);
|
||||
|
||||
atomic_flag_clear(&q->commit_lock);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* PUSH */
|
||||
/* ----------------------------------------------------------- */
|
||||
// Does not increment work
|
||||
static void mpmc_push(MPMCQueue *q, void *item) {
|
||||
MPMCSlot *slot;
|
||||
size_t pos;
|
||||
|
||||
for (;;) {
|
||||
|
||||
pos = atomic_load_explicit(&q->tail, memory_order_relaxed);
|
||||
|
||||
// ensure the slot is committed BEFORE accessing it
|
||||
size_t committed =
|
||||
atomic_load_explicit(&q->committed, memory_order_relaxed);
|
||||
|
||||
if (unlikely(pos >= committed)) {
|
||||
mpmc_commit_more(q);
|
||||
continue;
|
||||
}
|
||||
|
||||
slot = &q->slots[pos & q->mask];
|
||||
|
||||
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
|
||||
intptr_t diff = (intptr_t)seq - (intptr_t)pos;
|
||||
|
||||
if (likely(diff == 0)) {
|
||||
|
||||
if (atomic_compare_exchange_weak_explicit(&q->tail, &pos, pos + 1,
|
||||
memory_order_relaxed,
|
||||
memory_order_relaxed))
|
||||
break;
|
||||
|
||||
} else if (diff < 0) { // queue actually full
|
||||
|
||||
sleep_ms(1000);
|
||||
|
||||
} else { // waiting to grow
|
||||
|
||||
sleep_ms(0);
|
||||
}
|
||||
}
|
||||
|
||||
slot->data = item;
|
||||
|
||||
atomic_store_explicit(&slot->seq, pos + 1, memory_order_release);
|
||||
|
||||
plat_sem_post(&q->items_sem, 1);
|
||||
}
|
||||
|
||||
// Increment work
|
||||
static void mpmc_push_work(MPMCQueue *q, void *item) {
|
||||
MPMCSlot *slot;
|
||||
size_t pos;
|
||||
|
||||
for (;;) {
|
||||
|
||||
pos = atomic_load_explicit(&q->tail, memory_order_relaxed);
|
||||
|
||||
// ensure the slot is committed BEFORE accessing it
|
||||
size_t committed =
|
||||
atomic_load_explicit(&q->committed, memory_order_relaxed);
|
||||
|
||||
if (unlikely(pos >= committed)) {
|
||||
mpmc_commit_more(q);
|
||||
continue;
|
||||
}
|
||||
|
||||
slot = &q->slots[pos & q->mask];
|
||||
|
||||
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
|
||||
intptr_t diff = (intptr_t)seq - (intptr_t)pos;
|
||||
|
||||
if (likely(diff == 0)) {
|
||||
|
||||
if (atomic_compare_exchange_weak_explicit(&q->tail, &pos, pos + 1,
|
||||
memory_order_relaxed,
|
||||
memory_order_relaxed))
|
||||
break;
|
||||
|
||||
} else if (diff < 0) { // queue actually full
|
||||
|
||||
sleep_ms(1000);
|
||||
|
||||
} else { // waiting to grow
|
||||
|
||||
sleep_ms(0);
|
||||
}
|
||||
}
|
||||
|
||||
slot->data = item;
|
||||
|
||||
atomic_store_explicit(&slot->seq, pos + 1, memory_order_release);
|
||||
|
||||
atomic_fetch_add(&q->work_count, 1);
|
||||
plat_sem_post(&q->items_sem, 1);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* POP */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void *mpmc_pop(MPMCQueue *q) {
|
||||
|
||||
plat_sem_wait(&q->items_sem);
|
||||
|
||||
MPMCSlot *slot;
|
||||
size_t pos;
|
||||
|
||||
int spins = 0;
|
||||
|
||||
for (;;) {
|
||||
|
||||
pos = atomic_load_explicit(&q->head, memory_order_relaxed);
|
||||
slot = &q->slots[pos & q->mask];
|
||||
|
||||
size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire);
|
||||
intptr_t diff = (intptr_t)seq - (intptr_t)(pos + 1);
|
||||
|
||||
if (likely(diff == 0)) {
|
||||
|
||||
if (atomic_compare_exchange_weak_explicit(&q->head, &pos, pos + 1,
|
||||
memory_order_relaxed,
|
||||
memory_order_relaxed))
|
||||
break;
|
||||
|
||||
} else { // slot is still transitioning (written by another thread)
|
||||
if (++spins > 10) {
|
||||
sleep_ms(0); // yield CPU
|
||||
spins = 0;
|
||||
} else {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void *data = slot->data;
|
||||
|
||||
atomic_store_explicit(&slot->seq, pos + q->capacity, memory_order_release);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* PUSH POISON */
|
||||
/* ----------------------------------------------------------- */
|
||||
/*note:
|
||||
After producers finishes, push N poison pills where N = number of consumer
|
||||
threads, this is necessary to stop the consumers.
|
||||
*/
|
||||
|
||||
static void mpmc_producers_finished(MPMCQueue *q, u8 consumer_count) {
|
||||
for (u8 i = 0; i < consumer_count; i++) {
|
||||
mpmc_push(q, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* Done */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_task_done(MPMCQueue *q, u8 consumer_count) {
|
||||
size_t prev = atomic_fetch_sub(&q->work_count, 1);
|
||||
if (prev == 1) {
|
||||
mpmc_producers_finished(q, consumer_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* MPMC Cleanup */
|
||||
/* ----------------------------------------------------------- */
|
||||
// static void mpmc_finish(MPMCQueue *q) { // Comment to prevent warning: unused function
|
||||
// if (!q)
|
||||
// return;
|
||||
//
|
||||
// if (q->slots) {
|
||||
// plat_mem_release(q->slots, 0);
|
||||
// q->slots = NULL;
|
||||
// }
|
||||
//
|
||||
// plat_sem_destroy(&q->items_sem);
|
||||
//
|
||||
// q->capacity = 0;
|
||||
// q->mask = 0;
|
||||
//
|
||||
// atomic_store_explicit(&q->head, 0, memory_order_relaxed);
|
||||
// atomic_store_explicit(&q->tail, 0, memory_order_relaxed);
|
||||
// atomic_store_explicit(&q->committed, 0, memory_order_relaxed);
|
||||
// }
|
||||
Reference in New Issue
Block a user