forked from amir/filehasher
hashers now use thread local arena
Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end using #pragma once to ensure that a given header file is included only once in a single compilation unit
This commit is contained in:
15
arena.c
15
arena.c
@@ -1,6 +1,5 @@
|
||||
#include "base.h"
|
||||
|
||||
#include "arena.h"
|
||||
#include "base.h"
|
||||
|
||||
/* ============================================================
|
||||
Helper functions
|
||||
@@ -132,9 +131,6 @@ mem_arena *arena_create(arena_params *params) { // mk create
|
||||
|
||||
u32 pagesize = arena_pagesize();
|
||||
|
||||
u64 align = params->align ? params->align : ARENA_ALIGN;
|
||||
ASSERT((align & (align - 1)) == 0);
|
||||
|
||||
u64 reserve_size = ALIGN_UP_POW2(params->reserve_size, pagesize);
|
||||
u64 commit_size =
|
||||
params->commit_size ? ALIGN_UP_POW2(params->commit_size, pagesize) : 0;
|
||||
@@ -181,7 +177,7 @@ mem_arena *arena_create(arena_params *params) { // mk create
|
||||
arena->commit_size = commit_size;
|
||||
arena->commit_pos = commit_size;
|
||||
|
||||
arena->align = align;
|
||||
arena->align = params->align;
|
||||
arena->push_size = 0;
|
||||
|
||||
arena->allow_free_list = params->allow_free_list;
|
||||
@@ -360,7 +356,8 @@ void *arena_push(mem_arena **arena_ptr, u64 size, bool zero) { // mk push
|
||||
u64 local_pre = ALIGN_UP_POW2(local_pos, selected->align);
|
||||
u64 local_post = local_pre + size;
|
||||
|
||||
if (local_post > selected->reserve_size) {
|
||||
if (local_post > selected->reserve_size -
|
||||
ALIGN_UP_POW2(sizeof(mem_arena), selected->align)) {
|
||||
|
||||
if (arena->allow_free_list && arena->push_size == 0) {
|
||||
u64 tail_start = selected->pos;
|
||||
@@ -605,8 +602,8 @@ void *arena_swapback_pop(mem_arena **arena_ptr, u64 index) { // mk swapback
|
||||
mem_arena *owner = arena_block_from_index(arena, index);
|
||||
|
||||
if (!owner) {
|
||||
fprintf(stderr, "ERROR: Swapback pop failed, index out of range");
|
||||
return NULL;
|
||||
fprintf(stderr, "ERROR: Swapback pop failed, index out of range");
|
||||
return NULL;
|
||||
}
|
||||
u8 *owner_base = (u8 *)owner + ALIGN_UP_POW2(sizeof(mem_arena), owner->align);
|
||||
u8 *arena_base = (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
|
||||
|
||||
6
arena.h
6
arena.h
@@ -1,6 +1,4 @@
|
||||
#ifndef BASE_ARENA_H
|
||||
#define BASE_ARENA_H
|
||||
|
||||
#pragma once
|
||||
#include "base.h"
|
||||
|
||||
// #define _CRT_SECURE_NO_WARNINGS
|
||||
@@ -425,5 +423,3 @@ void *plat_mem_reserve(u64 size);
|
||||
b32 plat_mem_commit(void *ptr, u64 size);
|
||||
b32 plat_mem_decommit(void *ptr, u64 size);
|
||||
b32 plat_mem_release(void *ptr, u64 size);
|
||||
|
||||
#endif // BASE_ARENA_H
|
||||
|
||||
61
arena_base.h
Normal file
61
arena_base.h
Normal file
@@ -0,0 +1,61 @@
|
||||
#ifndef BASE_H
|
||||
#define BASE_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Base types
|
||||
------------------------------------------------------------ */
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
typedef int8_t i8;
|
||||
typedef int16_t i16;
|
||||
typedef int32_t i32;
|
||||
typedef int64_t i64;
|
||||
|
||||
typedef i8 b8;
|
||||
typedef int b32;
|
||||
|
||||
typedef float f32;
|
||||
typedef double f64;
|
||||
/* ------------------------------------------------------------
|
||||
Size helpers
|
||||
------------------------------------------------------------ */
|
||||
|
||||
#define KiB(x) ((u64)(x) * 1024ULL)
|
||||
#define MiB(x) (KiB(x) * 1024ULL)
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Min / Max helpers
|
||||
------------------------------------------------------------ */
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Alignment helpers
|
||||
------------------------------------------------------------ */
|
||||
|
||||
#define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Assert
|
||||
------------------------------------------------------------ */
|
||||
|
||||
#ifndef ASSERT
|
||||
#define ASSERT(x) assert(x)
|
||||
#endif
|
||||
|
||||
#endif // Base.h
|
||||
39
base.h
39
base.h
@@ -1,11 +1,40 @@
|
||||
#ifndef BASE_H
|
||||
#define BASE_H
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <immintrin.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define PLATFORM_WINDOWS 1
|
||||
#include <aclapi.h>
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <windows.h>
|
||||
|
||||
#define strdup _strdup
|
||||
#else
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#define XXH_VECTOR \
|
||||
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
|
||||
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
|
||||
#define XXH_INLINE_ALL
|
||||
#include "xxhash.c"
|
||||
#include "xxhash.h"
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Base types
|
||||
@@ -25,12 +54,14 @@ typedef int b32;
|
||||
|
||||
typedef float f32;
|
||||
typedef double f64;
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Size helpers
|
||||
------------------------------------------------------------ */
|
||||
|
||||
#define KiB(x) ((u64)(x) * 1024ULL)
|
||||
#define MiB(x) (KiB(x) * 1024ULL)
|
||||
#define GiB(x) (MiB(x) * 1024ULL)
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Min / Max helpers
|
||||
@@ -48,7 +79,7 @@ typedef double f64;
|
||||
Alignment helpers
|
||||
------------------------------------------------------------ */
|
||||
|
||||
#define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
|
||||
#define ALIGN_UP_POW2(x, a) ((a) ? (((x) + ((a) - 1)) & ~((a) - 1)) : (x))
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Assert
|
||||
@@ -58,4 +89,4 @@ typedef double f64;
|
||||
#define ASSERT(x) assert(x)
|
||||
#endif
|
||||
|
||||
#endif // Base.h
|
||||
#define NDEBUG // Comment to enable asserts
|
||||
|
||||
@@ -20,3 +20,7 @@ Fix bug multiple threads committing at the same time, fixed by using atomic_flag
|
||||
Reorder helper functions
|
||||
|
||||
v3.4: Rewriting hash_worker() to export file_hashes.txt
|
||||
|
||||
v4.0: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end
|
||||
using #pragma once to ensure that a given header file is included only once in a single compilation unit
|
||||
forcing xxhash to use the stack instead of the heap
|
||||
|
||||
44
platform.h
44
platform.h
@@ -1,39 +1,10 @@
|
||||
#pragma once
|
||||
#pragma once // ensure that a given header file is included only once in a
|
||||
// single compilation unit
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define PLATFORM_WINDOWS 1
|
||||
#include <aclapi.h>
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <windows.h>
|
||||
|
||||
#define strdup _strdup
|
||||
#else
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#define XXH_VECTOR \
|
||||
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
|
||||
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
|
||||
#define XXH_INLINE_ALL
|
||||
#include "xxhash.c"
|
||||
#include "xxhash.h"
|
||||
#include "arena.h"
|
||||
#include "base.h"
|
||||
|
||||
#include "arena.c"
|
||||
// ----------------------------- Config -------------------------------------
|
||||
#define FILE_HASHES_TXT "file_hashes.txt"
|
||||
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
|
||||
@@ -124,6 +95,11 @@ typedef struct {
|
||||
|
||||
static MPMCQueue g_file_queue;
|
||||
|
||||
typedef struct {
|
||||
MPMCQueue *queue;
|
||||
mem_arena *arena;
|
||||
} WorkerContext;
|
||||
|
||||
/* Scan folders */
|
||||
typedef struct DirQueue DirQueue;
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#include "platform.h"
|
||||
#include <stdio.h>
|
||||
|
||||
// ----------------------------- Globals ------------------------------------
|
||||
static atomic_uint_fast64_t g_files_found = 0;
|
||||
@@ -409,14 +408,10 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex) {
|
||||
|
||||
// ------------------------- Hash worker --------------------------------
|
||||
static DWORD WINAPI hash_worker(LPVOID arg) {
|
||||
MPMCQueue *q = (MPMCQueue *)arg;
|
||||
|
||||
static CRITICAL_SECTION append_cs;
|
||||
static LONG init = 0;
|
||||
|
||||
if (InterlockedCompareExchange(&init, 1, 0) == 0) {
|
||||
InitializeCriticalSection(&append_cs);
|
||||
}
|
||||
WorkerContext *ctx = (WorkerContext *)arg;
|
||||
MPMCQueue *q = ctx->queue;
|
||||
mem_arena *local_arena = ctx->arena;
|
||||
|
||||
for (;;) {
|
||||
FileEntry *fe = mpmc_pop(q);
|
||||
@@ -432,16 +427,14 @@ static DWORD WINAPI hash_worker(LPVOID arg) {
|
||||
|
||||
double size_kib = (double)fe->size_bytes / 1024.0;
|
||||
|
||||
EnterCriticalSection(&append_cs);
|
||||
char stack_buf[1024];
|
||||
|
||||
FILE *hf = fopen(FILE_HASHES_TXT, "a");
|
||||
if (hf) {
|
||||
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hash, fe->path, size_kib,
|
||||
created, modified, fe->owner);
|
||||
fclose(hf);
|
||||
}
|
||||
int len =
|
||||
snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n",
|
||||
hash, fe->path, size_kib, created, modified, fe->owner);
|
||||
|
||||
LeaveCriticalSection(&append_cs);
|
||||
char *dst = arena_push(&local_arena, len, false);
|
||||
memcpy(dst, stack_buf, len);
|
||||
|
||||
atomic_fetch_add(&g_files_hashed, 1);
|
||||
|
||||
@@ -627,7 +620,7 @@ int main(int argc, char **argv) {
|
||||
// Step 1: Scan all folders
|
||||
// -------------------------------
|
||||
|
||||
mpmc_init(&g_file_queue, 1024 * 1024 * 1024);
|
||||
mpmc_init(&g_file_queue, GiB(1));
|
||||
|
||||
DirQueue q;
|
||||
memset(&q, 0, sizeof(q));
|
||||
@@ -636,11 +629,30 @@ int main(int argc, char **argv) {
|
||||
q.active = 0;
|
||||
|
||||
// starting hash threads
|
||||
|
||||
arena_params params = {
|
||||
.reserve_size = GiB(1),
|
||||
.commit_size = MiB(16),
|
||||
.align = 0,
|
||||
.push_size = 0,
|
||||
.allow_free_list = true,
|
||||
.allow_swapback = false,
|
||||
.growth_policy = ARENA_GROWTH_NORMAL,
|
||||
.commit_policy = ARENA_COMMIT_LAZY,
|
||||
.max_nbre_blocks = 0,
|
||||
};
|
||||
|
||||
WorkerContext workers[num_threads];
|
||||
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
workers[i].queue = &g_file_queue;
|
||||
workers[i].arena = arena_create(¶ms);
|
||||
}
|
||||
|
||||
HANDLE *hash_threads = malloc(sizeof(HANDLE) * num_threads);
|
||||
|
||||
for (size_t i = 0; i < num_threads; ++i) {
|
||||
hash_threads[i] =
|
||||
CreateThread(NULL, 0, hash_worker, &g_file_queue, 0, NULL);
|
||||
hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL);
|
||||
}
|
||||
|
||||
// starting scan threads
|
||||
@@ -662,7 +674,6 @@ int main(int argc, char **argv) {
|
||||
|
||||
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
||||
|
||||
// mpmc_finish(&g_file_queue);
|
||||
// debug
|
||||
for (size_t i = 0; i < num_threads; i++) {
|
||||
mpmc_push(&g_file_queue, NULL);
|
||||
@@ -694,11 +705,39 @@ int main(int argc, char **argv) {
|
||||
CloseHandle(hash_threads[i]);
|
||||
|
||||
free(hash_threads);
|
||||
// free(g_file_queue.items);
|
||||
|
||||
WaitForSingleObject(progress, INFINITE);
|
||||
CloseHandle(progress);
|
||||
|
||||
// write file_hashes.txt
|
||||
|
||||
// FILE *f = fopen(FILE_HASHES_TXT, "wb");
|
||||
//
|
||||
// for (int i = 0; i < num_threads; i++) {
|
||||
// mem_arena *arena = workers[i].arena;
|
||||
//
|
||||
// u8 *arena_base =
|
||||
// (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
|
||||
// fwrite(arena_base, 1, arena->pos, f);
|
||||
// }
|
||||
//
|
||||
// fclose(f);
|
||||
|
||||
HANDLE h = CreateFileA(FILE_HASHES_TXT, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
|
||||
FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
|
||||
mem_arena *arena = workers[i].arena;
|
||||
|
||||
DWORD written;
|
||||
|
||||
u8 *arena_base =
|
||||
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
|
||||
|
||||
WriteFile(h, arena_base, (DWORD)arena->pos, &written, NULL);
|
||||
}
|
||||
|
||||
// done time
|
||||
double total_seconds = timer_stop(&total_timer);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user