diff --git a/.gitignore b/.gitignore index 4ae4fd8..c15abe7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ file_hasher.rdi file_hasher.exe file_hashes.txt file_list.txt -temp.c +temp_code.c diff --git a/base.h b/base.h index 10dd18a..2ac8251 100644 --- a/base.h +++ b/base.h @@ -29,13 +29,6 @@ #include #endif -#define XXH_VECTOR \ - XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082 - // Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc -#define XXH_INLINE_ALL -#include "xxhash.c" -#include "xxhash.h" - /* ------------------------------------------------------------ Base types ------------------------------------------------------------ */ diff --git a/binaries/changelog.txt b/binaries/changelog.txt index d464b0c..f346008 100644 --- a/binaries/changelog.txt +++ b/binaries/changelog.txt @@ -37,3 +37,7 @@ Align the MPMC queue to pagesize Getting file size from FindFirstFileA() instead of CreateFileA(), since we already call FindFirstFileA() and it returns the size there is no need to open/close every file to get it's size Replacing Malloc and strdup in scan helper function with FileEntry and path arenas + +Making the MPMC queue support when producers are consumers at the same time by adding a variable work, mpmc_push_work() that increments work and mpmc_task_done() that decrements work, and if work = 0 calls mpmc_producers_finished() that pushes poinsons to wake up sleeping threads and make them return NULL + +Replacing DirQueue, a queue growable with realloc with the MPMC queue diff --git a/binaries/file_hasher_v4.0.exe b/binaries/file_hasher_v4.0.exe new file mode 100644 index 0000000..75e93c8 Binary files /dev/null and b/binaries/file_hasher_v4.0.exe differ diff --git a/lf_mpmc.h b/lf_mpmc.h index 640b874..0524923 100644 --- a/lf_mpmc.h +++ b/lf_mpmc.h @@ -279,52 +279,6 @@ static void *mpmc_pop(MPMCQueue *q) { return data; } -/* ----------------------------------------------------------- */ -/* TRY POP (non blocking) */ -/* ----------------------------------------------------------- */ -// static b32 mpmc_try_pop(MPMCQueue *q, void **out) { -// -// if (!plat_sem_trywait(&q->items_sem)) -// return false; -// -// MPMCSlot *slot; -// size_t pos; -// -// int spins = 0; -// -// for (;;) { -// -// pos = atomic_load_explicit(&q->head, memory_order_relaxed); -// slot = &q->slots[pos & q->mask]; -// -// size_t seq = atomic_load_explicit(&slot->seq, memory_order_acquire); -// intptr_t diff = (intptr_t)seq - (intptr_t)(pos + 1); -// -// if (likely(diff == 0)) { -// -// if (atomic_compare_exchange_weak_explicit(&q->head, &pos, pos + 1, -// memory_order_relaxed, -// memory_order_relaxed)) -// break; -// -// } else { -// -// if (++spins > 10) { -// SwitchToThread(); -// spins = 0; -// } else { -// cpu_pause(); -// } -// } -// } -// -// *out = slot->data; -// -// atomic_store_explicit(&slot->seq, pos + q->capacity, memory_order_release); -// -// return true; -// } - /* ----------------------------------------------------------- */ /* PUSH POISON */ /* ----------------------------------------------------------- */ diff --git a/platform.h b/platform.h index 088d564..72cf37b 100644 --- a/platform.h +++ b/platform.h @@ -6,6 +6,14 @@ #include "lf_mpmc.h" #include "arena.c" + +#define XXH_VECTOR \ + XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082 + // Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc +#define XXH_INLINE_ALL +#include "xxhash.c" +#include "xxhash.h" + // ----------------------------- Config ------------------------------------- #define FILE_HASHES_TXT "file_hashes.txt" #define HASH_STRLEN 33 // 128-bit hex (32 chars) + null @@ -45,43 +53,19 @@ static double timer_stop(HiResTimer *t) { (double)g_qpc_freq.QuadPart; } -/* Scan folders */ - -typedef struct DirQueue DirQueue; - -typedef struct DirQueue { - char **items; - size_t count; - size_t cap; - size_t active; - - int stop; - -#if PLATFORM_WINDOWS - CRITICAL_SECTION cs; - CONDITION_VARIABLE cv; -#else - pthread_mutex_t mutex; - pthread_cond_t cond; -#endif -} DirQueue; - -// MPMC Queue - -static MPMCQueue g_dir_queue; -static MPMCQueue g_file_queue; +// Workers context typedef struct { - DirQueue *dir_queue; + u8 num_threads; mem_arena *path_arena; mem_arena *meta_arena; + + MPMCQueue *dir_queue; + MPMCQueue *file_queue; } ScannerContext; typedef struct { mem_arena *arena; + MPMCQueue *file_queue; } WorkerContext; - -// void scan_folder_windows_parallel(const char *base, ScannerContext *ctx); -// void scan_folder_posix_parallel(const char *base, ScannerContext *ctx); -// diff --git a/platform_windows.c b/platform_windows.c index e54312f..68ade97 100644 --- a/platform_windows.c +++ b/platform_windows.c @@ -1,4 +1,3 @@ -#include "arena.h" #include "platform.h" // ----------------------------- Globals ------------------------------------ @@ -104,48 +103,6 @@ void platform_get_file_owner(const char *path, char *out_owner, get_file_owner(path, out_owner, out_owner_size); } -// --------------- parallel directory scanning ---------------- -// Add queue helper functions -static void dirqueue_push(DirQueue *q, const char *path) { - EnterCriticalSection(&q->cs); - - if (q->count + 1 > q->cap) { - q->cap = q->cap ? q->cap * 2 : 1024; - q->items = realloc(q->items, q->cap * sizeof(char *)); - } - - q->items[q->count++] = _strdup(path); - - WakeConditionVariable(&q->cv); - LeaveCriticalSection(&q->cs); -} - -static char *dirqueue_pop(DirQueue *q) { - EnterCriticalSection(&q->cs); - - while (q->count == 0 && q->active > 0) { - SleepConditionVariableCS(&q->cv, &q->cs, INFINITE); - } - - if (q->count == 0 && q->active == 0) { - LeaveCriticalSection(&q->cs); - return NULL; // truly done - } - - char *dir = q->items[--q->count]; - q->active++; - - LeaveCriticalSection(&q->cs); - return dir; -} - -static void dirqueue_done(DirQueue *q) { - EnterCriticalSection(&q->cs); - q->active--; - WakeAllConditionVariable(&q->cv); - LeaveCriticalSection(&q->cs); -} - // ----------------------------- Scan helpers ----------------------------- void scan_folder_windows_parallel(const char *base, ScannerContext *ctx) { char search[MAX_PATHLEN]; @@ -167,7 +124,12 @@ void scan_folder_windows_parallel(const char *base, ScannerContext *ctx) { continue; if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { - dirqueue_push(ctx->dir_queue, full); + size_t len = strlen(full) + 1; + + char *dir = arena_push(&ctx->path_arena, len, false); + memcpy(dir, full, len); + + mpmc_push_work(ctx->dir_queue, dir); } else { atomic_fetch_add(&g_files_found, 1); @@ -192,7 +154,7 @@ void scan_folder_windows_parallel(const char *base, ScannerContext *ctx) { fe->size_bytes = ((uint64_t)fd.nFileSizeHigh << 32) | fd.nFileSizeLow; - mpmc_push(&g_file_queue, fe); + mpmc_push(ctx->file_queue, fe); } } while (FindNextFileA(h, &fd)); @@ -203,19 +165,16 @@ void scan_folder_windows_parallel(const char *base, ScannerContext *ctx) { // ------------------------- Scan worker -------------------------------- static DWORD WINAPI scan_worker(LPVOID arg) { ScannerContext *ctx = arg; - DirQueue *q = ctx->dir_queue; for (;;) { - char *dir = dirqueue_pop(q); + char *dir = mpmc_pop(ctx->dir_queue); if (!dir) break; scan_folder_windows_parallel(dir, ctx); - free(dir); - dirqueue_done(q); + mpmc_task_done(ctx->dir_queue, ctx->num_threads); } - return 0; } @@ -252,11 +211,10 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex, BYTE *buf) { static DWORD WINAPI hash_worker(LPVOID arg) { WorkerContext *ctx = (WorkerContext *)arg; - mem_arena *local_arena = ctx->arena; BYTE *buf = (BYTE *)malloc(READ_BLOCK); for (;;) { - FileEntry *fe = mpmc_pop(&g_file_queue); + FileEntry *fe = mpmc_pop(ctx->file_queue); if (!fe) break; @@ -275,12 +233,12 @@ static DWORD WINAPI hash_worker(LPVOID arg) { snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n", hash, fe->path, size_kib, created, modified, fe->owner); - char *dst = arena_push(&local_arena, len, false); + char *dst = arena_push(&ctx->arena, len, false); memcpy(dst, stack_buf, len); atomic_fetch_add(&g_files_hashed, 1); } - free(buf); + // free(buf); It will be freed by the system when the program exits return 0; } @@ -420,9 +378,7 @@ int main(int argc, char **argv) { folder_count = 1; } - // ------------------------------- // Display selected folders - // ------------------------------- printf("Processing %d folder(s):\n", folder_count); for (int i = 0; i < folder_count; ++i) { printf(" - %s\n", folders[i]); @@ -472,20 +428,13 @@ int main(int argc, char **argv) { size_t num_threads = hw_threads * 2; // ------------------------------- - // Step 1: Scan all folders + // Scanning and hashing // ------------------------------- + MPMCQueue dir_queue; + mpmc_init(&dir_queue, MiB(1)); - mpmc_init(&g_file_queue, MiB(1)); - - DirQueue q; - memset(&q, 0, sizeof(q)); - InitializeCriticalSection(&q.cs); - InitializeConditionVariable(&q.cv); - q.active = 0; - - for (int i = 0; i < folder_count; ++i) { - dirqueue_push(&q, folders[i]); - } + MPMCQueue file_queue; + mpmc_init(&file_queue, MiB(1)); // starting hash threads size_t num_hash_threads = num_threads; @@ -498,6 +447,7 @@ int main(int argc, char **argv) { for (size_t i = 0; i < num_hash_threads; ++i) { workers[i].arena = arena_create(¶ms); + workers[i].file_queue = &file_queue; hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL); } @@ -514,26 +464,40 @@ int main(int argc, char **argv) { arena_push(&gp_arena, sizeof(HANDLE) * num_scan_threads, true); for (size_t i = 0; i < num_scan_threads; i++) { - - scanners[i].dir_queue = &q; + scanners[i].num_threads = num_scan_threads; scanners[i].path_arena = arena_create(¶ms); scanners[i].meta_arena = arena_create(¶ms); + scanners[i].dir_queue = &dir_queue; + scanners[i].file_queue = &file_queue; + scan_threads[i] = CreateThread(NULL, 0, scan_worker, &scanners[i], 0, NULL); } - WaitForMultipleObjects((DWORD)num_scan_threads, scan_threads, TRUE, INFINITE); + // Initial folder push + for (int i = 0; i < folder_count; i++) { - for (size_t i = 0; i < num_hash_threads; i++) { - mpmc_push(&g_file_queue, NULL); + size_t len = strlen(folders[i]) + 1; + + char *path = arena_push(&scanners[0].path_arena, len, false); + memcpy(path, folders[i], len); + + mpmc_push_work(&dir_queue, path); } - atomic_store(&g_scan_done, 1); + // Stop scan threads + WaitForMultipleObjects((DWORD)num_scan_threads, scan_threads, TRUE, INFINITE); for (size_t i = 0; i < num_scan_threads; ++i) CloseHandle(scan_threads[i]); + for (size_t i = 0; i < num_hash_threads; i++) { + mpmc_push(&file_queue, NULL); + } + + atomic_store(&g_scan_done, 1); + arena_free(&gp_arena, (u8 **)&scan_threads, sizeof(HANDLE) * num_scan_threads); @@ -544,13 +508,13 @@ int main(int argc, char **argv) { printf("Completed scanning in %.2f seconds, found %zu files\n\n", scan_seconds, total_found); - // if no files found + // If no files found if (total_found == 0) { printf("No files found.\n"); return 0; } - // stop hashing threads + // Stop hashing threads WaitForMultipleObjects((DWORD)num_hash_threads, hash_threads, TRUE, INFINITE); for (size_t i = 0; i < num_hash_threads; ++i) @@ -559,10 +523,13 @@ int main(int argc, char **argv) { arena_free(&gp_arena, (u8 **)&hash_threads, sizeof(HANDLE) * num_hash_threads); + // Stop progress printing thread WaitForSingleObject(progress, INFINITE); CloseHandle(progress); - // write file_hashes.txt + // ------------------------------- + // Export file_hashes.txt + // ------------------------------- // FILE *f = fopen(FILE_HASHES_TXT, "wb"); // @@ -591,7 +558,9 @@ int main(int argc, char **argv) { WriteFile(h, arena_base, (DWORD)local_hash_arena->pos, &written, NULL); } - // done time + // ------------------------------- + // Print summary + // ------------------------------- double total_seconds = timer_stop(&total_timer); printf("Completed hashing %zu files\n", total_found); diff --git a/xxh_x86dispatch.c b/xxh_x86dispatch.c new file mode 100644 index 0000000..0c15820 --- /dev/null +++ b/xxh_x86dispatch.c @@ -0,0 +1,821 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2020-2021 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +/*! + * @file xxh_x86dispatch.c + * + * Automatic dispatcher code for the @ref XXH3_family on x86-based targets. + * + * Optional add-on. + * + * **Compile this file with the default flags for your target.** + * Note that compiling with flags like `-mavx*`, `-march=native`, or `/arch:AVX*` + * will make the resulting binary incompatible with cpus not supporting the requested instruction set. + * + * @defgroup dispatch x86 Dispatcher + * @{ + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)) +# error "Dispatching is currently only supported on x86 and x86_64." +#endif + +/*! @cond Doxygen ignores this part */ +#ifndef XXH_HAS_INCLUDE +# ifdef __has_include +/* + * Not defined as XXH_HAS_INCLUDE(x) (function-like) because + * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion) + */ +# define XXH_HAS_INCLUDE __has_include +# else +# define XXH_HAS_INCLUDE(x) 0 +# endif +#endif +/*! @endcond */ + +/*! + * @def XXH_DISPATCH_SCALAR + * @brief Enables/dispatching the scalar code path. + * + * If this is defined to 0, SSE2 support is assumed. This reduces code size + * when the scalar path is not needed. + * + * This is automatically defined to 0 when... + * - SSE2 support is enabled in the compiler + * - Targeting x86_64 + * - Targeting Android x86 + * - Targeting macOS + */ +#ifndef XXH_DISPATCH_SCALAR +# if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \ + || defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \ + || defined(__ANDROID__) || defined(__APPLE__) /* Android or macOS */ +# define XXH_DISPATCH_SCALAR 0 /* disable */ +# else +# define XXH_DISPATCH_SCALAR 1 +# endif +#endif +/*! + * @def XXH_DISPATCH_AVX2 + * @brief Enables/disables dispatching for AVX2. + * + * This is automatically detected if it is not defined. + * - GCC 4.7 and later are known to support AVX2, but >4.9 is required for + * to get the AVX2 intrinsics and typedefs without -mavx -mavx2. + * - Visual Studio 2013 Update 2 and later are known to support AVX2. + * - The GCC/Clang internal header `` is detected. While this is + * not allowed to be included directly, it still appears in the builtin + * include path and is detectable with `__has_include`. + * + * @see XXH_AVX2 + */ +#ifndef XXH_DISPATCH_AVX2 +# if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \ + || (defined(_MSC_VER) && _MSC_VER >= 1900) /* VS 2015+ */ \ + || (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501) /* VS 2013 Update 2 */ \ + || XXH_HAS_INCLUDE() /* GCC/Clang internal header */ +# define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */ +# else +# define XXH_DISPATCH_AVX2 0 +# endif +#endif /* XXH_DISPATCH_AVX2 */ + +/*! + * @def XXH_DISPATCH_AVX512 + * @brief Enables/disables dispatching for AVX512. + * + * Automatically detected if one of the following conditions is met: + * - GCC 4.9 and later are known to support AVX512. + * - Visual Studio 2017 and later are known to support AVX2. + * - The GCC/Clang internal header `` is detected. While this + * is not allowed to be included directly, it still appears in the builtin + * include path and is detectable with `__has_include`. + * + * @see XXH_AVX512 + */ +#ifndef XXH_DISPATCH_AVX512 +# if (defined(__GNUC__) \ + && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \ + || (defined(_MSC_VER) && _MSC_VER >= 1910) /* VS 2017+ */ \ + || XXH_HAS_INCLUDE() /* GCC/Clang internal header */ +# define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */ +# else +# define XXH_DISPATCH_AVX512 0 +# endif +#endif /* XXH_DISPATCH_AVX512 */ + +/*! + * @def XXH_TARGET_SSE2 + * @brief Allows a function to be compiled with SSE2 intrinsics. + * + * Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used + * even with `-mno-sse2`. + * + * @def XXH_TARGET_AVX2 + * @brief Like @ref XXH_TARGET_SSE2, but for AVX2. + * + * @def XXH_TARGET_AVX512 + * @brief Like @ref XXH_TARGET_SSE2, but for AVX512. + * + */ +#if defined(__GNUC__) +# include /* SSE2 */ +# if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 +# include /* AVX2, AVX512F */ +# endif +# define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) +# define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) +# define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) +#elif defined(__clang__) && defined(_MSC_VER) /* clang-cl.exe */ +# include /* SSE2 */ +# if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 +# include /* AVX2, AVX512F */ +# include +# include +# include +# include +# endif +# define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) +# define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) +# define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) +#elif defined(_MSC_VER) +# include +# define XXH_TARGET_SSE2 +# define XXH_TARGET_AVX2 +# define XXH_TARGET_AVX512 +#else +# error "Dispatching is currently not supported for your compiler." +#endif + +/*! @cond Doxygen ignores this part */ +#ifdef XXH_DISPATCH_DEBUG +/* debug logging */ +# include +# define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); } +#else +# define XXH_debugPrint(str) ((void)0) +# undef NDEBUG /* avoid redefinition */ +# define NDEBUG +#endif +/*! @endcond */ +#include + +#ifndef XXH_DOXYGEN +#define XXH_INLINE_ALL +#define XXH_X86DISPATCH +#include "xxhash.h" +#endif + +/*! @cond Doxygen ignores this part */ +#ifndef XXH_HAS_ATTRIBUTE +# ifdef __has_attribute +# define XXH_HAS_ATTRIBUTE(...) __has_attribute(__VA_ARGS__) +# else +# define XXH_HAS_ATTRIBUTE(...) 0 +# endif +#endif +/*! @endcond */ + +/*! @cond Doxygen ignores this part */ +#if XXH_HAS_ATTRIBUTE(constructor) +# define XXH_CONSTRUCTOR __attribute__((constructor)) +# define XXH_DISPATCH_MAYBE_NULL 0 +#else +# define XXH_CONSTRUCTOR +# define XXH_DISPATCH_MAYBE_NULL 1 +#endif +/*! @endcond */ + + +/*! @cond Doxygen ignores this part */ +/* + * Support both AT&T and Intel dialects + * + * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if + * compiled with -masm=intel. Instead, it supports dialect switching with + * curly braces: { AT&T syntax | Intel syntax } + * + * Clang's integrated assembler automatically converts AT&T syntax to Intel if + * needed, making the dialect switching useless (it isn't even supported). + * + * Note: Comments are written in the inline assembly itself. + */ +#ifdef __clang__ +# define XXH_I_ATT(intel, att) att "\n\t" +#else +# define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t" +#endif +/*! @endcond */ + +/*! + * @private + * @brief Runs CPUID. + * + * @param eax , ecx The parameters to pass to CPUID, %eax and %ecx respectively. + * @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }` + */ +static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) +{ +#if defined(_MSC_VER) + __cpuidex((int*)abcd, eax, ecx); +#else + xxh_u32 ebx, edx; +# if defined(__i386__) && defined(__PIC__) + __asm__( + "# Call CPUID\n\t" + "#\n\t" + "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t" + "# EBX, so we use EDI instead.\n\t" + XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi") + XXH_I_ATT("cpuid", "cpuid" ) + XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi") + : "=D" (ebx), +# else + __asm__( + "# Call CPUID\n\t" + XXH_I_ATT("cpuid", "cpuid") + : "=b" (ebx), +# endif + "+a" (eax), "+c" (ecx), "=d" (edx)); + abcd[0] = eax; + abcd[1] = ebx; + abcd[2] = ecx; + abcd[3] = edx; +#endif +} + +/* + * Modified version of Intel's guide + * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + */ + +#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 +/*! + * @private + * @brief Runs `XGETBV`. + * + * While the CPU may support AVX2, the operating system might not properly save + * the full YMM/ZMM registers. + * + * xgetbv is used for detecting this: Any compliant operating system will define + * a set of flags in the xcr0 register indicating how it saves the AVX registers. + * + * You can manually disable this flag on Windows by running, as admin: + * + * bcdedit.exe /set xsavedisable 1 + * + * and rebooting. Run the same command with 0 to re-enable it. + */ +static xxh_u64 XXH_xgetbv(void) +{ +#if defined(_MSC_VER) + return _xgetbv(0); /* min VS2010 SP1 compiler is required */ +#else + xxh_u32 xcr0_lo, xcr0_hi; + __asm__( + "# Call XGETBV\n\t" + "#\n\t" + "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t" + "# the XGETBV opcode, so we encode it by hand instead.\n\t" + "# See for details.\n\t" + ".byte 0x0f, 0x01, 0xd0\n\t" + : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); + return xcr0_lo | ((xxh_u64)xcr0_hi << 32); +#endif +} +#endif + +/*! @cond Doxygen ignores this part */ +#define XXH_SSE2_CPUID_MASK (1 << 26) +#define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27)) +#define XXH_AVX2_CPUID_MASK (1 << 5) +#define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1)) +#define XXH_AVX512F_CPUID_MASK (1 << 16) +#define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1)) +/*! @endcond */ + +/*! + * @private + * @brief Returns the best XXH3 implementation. + * + * Runs various CPUID/XGETBV tests to try and determine the best implementation. + * + * @return The best @ref XXH_VECTOR implementation. + * @see XXH_VECTOR_TYPES + */ +int XXH_featureTest(void) +{ + xxh_u32 abcd[4]; + xxh_u32 max_leaves; + int best = XXH_SCALAR; +#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 + xxh_u64 xgetbv_val; +#endif +#if defined(__GNUC__) && defined(__i386__) + xxh_u32 cpuid_supported; + __asm__( + "# For the sake of ruthless backwards compatibility, check if CPUID\n\t" + "# is supported in the EFLAGS on i386.\n\t" + "# This is not necessary on x86_64 - CPUID is mandatory.\n\t" + "# The ID flag (bit 21) in the EFLAGS register indicates support\n\t" + "# for the CPUID instruction. If a software procedure can set and\n\t" + "# clear this flag, the processor executing the procedure supports\n\t" + "# the CPUID instruction.\n\t" + "# \n\t" + "#\n\t" + "# Routine is from .\n\t" + + "# Save EFLAGS\n\t" + XXH_I_ATT("pushfd", "pushfl" ) + "# Store EFLAGS\n\t" + XXH_I_ATT("pushfd", "pushfl" ) + "# Invert the ID bit in stored EFLAGS\n\t" + XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)") + "# Load stored EFLAGS (with ID bit inverted)\n\t" + XXH_I_ATT("popfd", "popfl" ) + "# Store EFLAGS again (ID bit may or not be inverted)\n\t" + XXH_I_ATT("pushfd", "pushfl" ) + "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t" + XXH_I_ATT("pop eax", "popl %%eax" ) + "# eax = whichever bits were changed\n\t" + XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" ) + "# Restore original EFLAGS\n\t" + XXH_I_ATT("popfd", "popfl" ) + "# eax = zero if ID bit can't be changed, else non-zero\n\t" + XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" ) + : "=a" (cpuid_supported) :: "cc"); + + if (XXH_unlikely(!cpuid_supported)) { + XXH_debugPrint("CPUID support is not detected!"); + return best; + } + +#endif + /* Check how many CPUID pages we have */ + XXH_cpuid(0, 0, abcd); + max_leaves = abcd[0]; + + /* Shouldn't happen on hardware, but happens on some QEMU configs. */ + if (XXH_unlikely(max_leaves == 0)) { + XXH_debugPrint("Max CPUID leaves == 0!"); + return best; + } + + /* Check for SSE2, OSXSAVE and xgetbv */ + XXH_cpuid(1, 0, abcd); + + /* + * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt. + */ + if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK)) + return best; + + XXH_debugPrint("SSE2 support detected."); + + best = XXH_SSE2; +#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 + /* Make sure we have enough leaves */ + if (XXH_unlikely(max_leaves < 7)) + return best; + + /* Test for OSXSAVE and XGETBV */ + if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK) + return best; + + /* CPUID check for AVX features */ + XXH_cpuid(7, 0, abcd); + + xgetbv_val = XXH_xgetbv(); +#if XXH_DISPATCH_AVX2 + /* Validate that AVX2 is supported by the CPU */ + if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK) + return best; + + /* Validate that the OS supports YMM registers */ + if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) { + XXH_debugPrint("AVX2 supported by the CPU, but not the OS."); + return best; + } + + /* AVX2 supported */ + XXH_debugPrint("AVX2 support detected."); + best = XXH_AVX2; +#endif +#if XXH_DISPATCH_AVX512 + /* Check if AVX512F is supported by the CPU */ + if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) { + XXH_debugPrint("AVX512F not supported by CPU"); + return best; + } + + /* Validate that the OS supports ZMM registers */ + if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) { + XXH_debugPrint("AVX512F supported by the CPU, but not the OS."); + return best; + } + + /* AVX512F supported */ + XXH_debugPrint("AVX512F support detected."); + best = XXH_AVX512; +#endif +#endif + return best; +} + + +/* === Vector implementations === */ + +/*! @cond PRIVATE */ +/*! + * @private + * @brief Defines the various dispatch functions. + * + * TODO: Consolidate? + * + * @param suffix The suffix for the functions, e.g. sse2 or scalar + * @param target XXH_TARGET_* or empty. + */ + +#define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \ + \ +/* === XXH3, default variants === */ \ + \ +XXH_NO_INLINE target XXH64_hash_t \ +XXHL64_default_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \ + size_t len) \ +{ \ + return XXH3_hashLong_64b_internal( \ + input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ + XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \ + ); \ +} \ + \ +/* === XXH3, Seeded variants === */ \ + \ +XXH_NO_INLINE target XXH64_hash_t \ +XXHL64_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len, \ + XXH64_hash_t seed) \ +{ \ + return XXH3_hashLong_64b_withSeed_internal( \ + input, len, seed, XXH3_accumulate_##suffix, \ + XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \ + ); \ +} \ + \ +/* === XXH3, Secret variants === */ \ + \ +XXH_NO_INLINE target XXH64_hash_t \ +XXHL64_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \ + size_t len, XXH_NOESCAPE const void* secret, \ + size_t secretLen) \ +{ \ + return XXH3_hashLong_64b_internal( \ + input, len, secret, secretLen, \ + XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \ + ); \ +} \ + \ +/* === XXH3 update variants === */ \ + \ +XXH_NO_INLINE target XXH_errorcode \ +XXH3_update_##suffix(XXH_NOESCAPE XXH3_state_t* state, \ + XXH_NOESCAPE const void* input, size_t len) \ +{ \ + return XXH3_update(state, (const xxh_u8*)input, len, \ + XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix); \ +} \ + \ +/* === XXH128 default variants === */ \ + \ +XXH_NO_INLINE target XXH128_hash_t \ +XXHL128_default_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \ + size_t len) \ +{ \ + return XXH3_hashLong_128b_internal( \ + input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ + XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \ + ); \ +} \ + \ +/* === XXH128 Secret variants === */ \ + \ +XXH_NO_INLINE target XXH128_hash_t \ +XXHL128_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \ + size_t len, \ + XXH_NOESCAPE const void* XXH_RESTRICT secret, \ + size_t secretLen) \ +{ \ + return XXH3_hashLong_128b_internal( \ + input, len, (const xxh_u8*)secret, secretLen, \ + XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix); \ +} \ + \ +/* === XXH128 Seeded variants === */ \ + \ +XXH_NO_INLINE target XXH128_hash_t \ +XXHL128_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len,\ + XXH64_hash_t seed) \ +{ \ + return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \ + XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix, \ + XXH3_initCustomSecret_##suffix); \ +} + +/*! @endcond */ +/* End XXH_DEFINE_DISPATCH_FUNCS */ + +/*! @cond Doxygen ignores this part */ +#if XXH_DISPATCH_SCALAR +XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */) +#endif +XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2) +#if XXH_DISPATCH_AVX2 +XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2) +#endif +#if XXH_DISPATCH_AVX512 +XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512) +#endif +#undef XXH_DEFINE_DISPATCH_FUNCS +/*! @endcond */ + +/* ==== Dispatchers ==== */ + +/*! @cond Doxygen ignores this part */ +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t); + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t); + +typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH_NOESCAPE const void* XXH_RESTRICT, size_t); + +typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH_NOESCAPE XXH3_state_t*, XXH_NOESCAPE const void*, size_t); + +typedef struct { + XXH3_dispatchx86_hashLong64_default hashLong64_default; + XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed; + XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret; + XXH3_dispatchx86_update update; +} XXH_dispatchFunctions_s; + +#define XXH_NB_DISPATCHES 4 +/*! @endcond */ + +/*! + * @private + * @brief Table of dispatchers for @ref XXH3_64bits(). + * + * @pre The indices must match @ref XXH_VECTOR_TYPE. + */ +static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = { +#if XXH_DISPATCH_SCALAR + /* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar }, +#else + /* Scalar */ { NULL, NULL, NULL, NULL }, +#endif + /* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 }, +#if XXH_DISPATCH_AVX2 + /* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 }, +#else + /* AVX2 */ { NULL, NULL, NULL, NULL }, +#endif +#if XXH_DISPATCH_AVX512 + /* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 } +#else + /* AVX512 */ { NULL, NULL, NULL, NULL } +#endif +}; +/*! + * @private + * @brief The selected dispatch table for @ref XXH3_64bits(). + */ +static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL }; + + +/*! @cond Doxygen ignores this part */ +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t); + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t); + +typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH_NOESCAPE const void* XXH_RESTRICT, size_t); + +typedef struct { + XXH3_dispatchx86_hashLong128_default hashLong128_default; + XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed; + XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret; + XXH3_dispatchx86_update update; +} XXH_dispatch128Functions_s; +/*! @endcond */ + + +/*! + * @private + * @brief Table of dispatchers for @ref XXH3_128bits(). + * + * @pre The indices must match @ref XXH_VECTOR_TYPE. + */ +static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = { +#if XXH_DISPATCH_SCALAR + /* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar }, +#else + /* Scalar */ { NULL, NULL, NULL, NULL }, +#endif + /* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 }, +#if XXH_DISPATCH_AVX2 + /* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 }, +#else + /* AVX2 */ { NULL, NULL, NULL, NULL }, +#endif +#if XXH_DISPATCH_AVX512 + /* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 } +#else + /* AVX512 */ { NULL, NULL, NULL, NULL } +#endif +}; + +/*! + * @private + * @brief The selected dispatch table for @ref XXH3_64bits(). + */ +static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL }; + +/*! + * @private + * @brief Runs a CPUID check and sets the correct dispatch tables. + */ +static XXH_CONSTRUCTOR void XXH_setDispatch(void) +{ + int vecID = XXH_featureTest(); + XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1); + assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512); +#if !XXH_DISPATCH_SCALAR + assert(vecID != XXH_SCALAR); +#endif +#if !XXH_DISPATCH_AVX512 + assert(vecID != XXH_AVX512); +#endif +#if !XXH_DISPATCH_AVX2 + assert(vecID != XXH_AVX2); +#endif + XXH_g_dispatch = XXH_kDispatch[vecID]; + XXH_g_dispatch128 = XXH_kDispatch128[vecID]; +} + + +/* ==== XXH3 public functions ==== */ +/*! @cond Doxygen ignores this part */ + +static XXH64_hash_t +XXH3_hashLong_64b_defaultSecret_selection(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_default == NULL) + XXH_setDispatch(); + return XXH_g_dispatch.hashLong64_default(input, len); +} + +XXH64_hash_t XXH3_64bits_dispatch(XXH_NOESCAPE const void* input, size_t len) +{ + return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection); +} + +static XXH64_hash_t +XXH3_hashLong_64b_withSeed_selection(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_seed == NULL) + XXH_setDispatch(); + return XXH_g_dispatch.hashLong64_seed(input, len, seed64); +} + +XXH64_hash_t XXH3_64bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection); +} + +static XXH64_hash_t +XXH3_hashLong_64b_withSecret_selection(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_secret == NULL) + XXH_setDispatch(); + return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen); +} + +XXH64_hash_t XXH3_64bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen) +{ + return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection); +} + +XXH_errorcode +XXH3_64bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) +{ + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.update == NULL) + XXH_setDispatch(); + + return XXH_g_dispatch.update(state, (const xxh_u8*)input, len); +} + +/*! @endcond */ + + +/* ==== XXH128 public functions ==== */ +/*! @cond Doxygen ignores this part */ + +static XXH128_hash_t +XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_default == NULL) + XXH_setDispatch(); + return XXH_g_dispatch128.hashLong128_default(input, len); +} + +XXH128_hash_t XXH3_128bits_dispatch(XXH_NOESCAPE const void* input, size_t len) +{ + return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection); +} + +static XXH128_hash_t +XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_seed == NULL) + XXH_setDispatch(); + return XXH_g_dispatch128.hashLong128_seed(input, len, seed64); +} + +XXH128_hash_t XXH3_128bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection); +} + +static XXH128_hash_t +XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len, + XXH64_hash_t seed64, const void* secret, size_t secretLen) +{ + (void)seed64; + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_secret == NULL) + XXH_setDispatch(); + return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen); +} + +XXH128_hash_t XXH3_128bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen) +{ + return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection); +} + +XXH_errorcode +XXH3_128bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) +{ + if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.update == NULL) + XXH_setDispatch(); + return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len); +} + +/*! @endcond */ + +#if defined (__cplusplus) +} +#endif +/*! @} */ diff --git a/xxh_x86dispatch.h b/xxh_x86dispatch.h new file mode 100644 index 0000000..7085221 --- /dev/null +++ b/xxh_x86dispatch.h @@ -0,0 +1,93 @@ +/* + * xxHash - XXH3 Dispatcher for x86-based targets + * Copyright (C) 2020-2024 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +#ifndef XXH_X86DISPATCH_H_13563687684 +#define XXH_X86DISPATCH_H_13563687684 + +#include "xxhash.h" /* XXH64_hash_t, XXH3_state_t */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/*! + * @brief Returns the best XXH3 implementation for x86 + * + * @return The best @ref XXH_VECTOR implementation. + * @see XXH_VECTOR_TYPES + */ +XXH_PUBLIC_API int XXH_featureTest(void); + +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_dispatch(XXH_NOESCAPE const void* input, size_t len); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len); + +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_dispatch(XXH_NOESCAPE const void* input, size_t len); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len); + +#if defined (__cplusplus) +} +#endif + + +/* automatic replacement of XXH3 functions. + * can be disabled by setting XXH_DISPATCH_DISABLE_REPLACE */ +#ifndef XXH_DISPATCH_DISABLE_REPLACE + +# undef XXH3_64bits +# define XXH3_64bits XXH3_64bits_dispatch +# undef XXH3_64bits_withSeed +# define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch +# undef XXH3_64bits_withSecret +# define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch +# undef XXH3_64bits_update +# define XXH3_64bits_update XXH3_64bits_update_dispatch + +# undef XXH128 +# define XXH128 XXH3_128bits_withSeed_dispatch +# undef XXH3_128bits +# define XXH3_128bits XXH3_128bits_dispatch +# undef XXH3_128bits_withSeed +# define XXH3_128bits_withSeed XXH3_128bits_withSeed_dispatch +# undef XXH3_128bits_withSecret +# define XXH3_128bits_withSecret XXH3_128bits_withSecret_dispatch +# undef XXH3_128bits_update +# define XXH3_128bits_update XXH3_128bits_update_dispatch + +#endif /* XXH_DISPATCH_DISABLE_REPLACE */ + +#endif /* XXH_X86DISPATCH_H_13563687684 */