forked from amir/filehasher
hashers now use thread local arena
Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end using #pragma once to ensure that a given header file is included only once in a single compilation unit
This commit is contained in:
11
arena.c
11
arena.c
@@ -1,6 +1,5 @@
|
|||||||
#include "base.h"
|
|
||||||
|
|
||||||
#include "arena.h"
|
#include "arena.h"
|
||||||
|
#include "base.h"
|
||||||
|
|
||||||
/* ============================================================
|
/* ============================================================
|
||||||
Helper functions
|
Helper functions
|
||||||
@@ -132,9 +131,6 @@ mem_arena *arena_create(arena_params *params) { // mk create
|
|||||||
|
|
||||||
u32 pagesize = arena_pagesize();
|
u32 pagesize = arena_pagesize();
|
||||||
|
|
||||||
u64 align = params->align ? params->align : ARENA_ALIGN;
|
|
||||||
ASSERT((align & (align - 1)) == 0);
|
|
||||||
|
|
||||||
u64 reserve_size = ALIGN_UP_POW2(params->reserve_size, pagesize);
|
u64 reserve_size = ALIGN_UP_POW2(params->reserve_size, pagesize);
|
||||||
u64 commit_size =
|
u64 commit_size =
|
||||||
params->commit_size ? ALIGN_UP_POW2(params->commit_size, pagesize) : 0;
|
params->commit_size ? ALIGN_UP_POW2(params->commit_size, pagesize) : 0;
|
||||||
@@ -181,7 +177,7 @@ mem_arena *arena_create(arena_params *params) { // mk create
|
|||||||
arena->commit_size = commit_size;
|
arena->commit_size = commit_size;
|
||||||
arena->commit_pos = commit_size;
|
arena->commit_pos = commit_size;
|
||||||
|
|
||||||
arena->align = align;
|
arena->align = params->align;
|
||||||
arena->push_size = 0;
|
arena->push_size = 0;
|
||||||
|
|
||||||
arena->allow_free_list = params->allow_free_list;
|
arena->allow_free_list = params->allow_free_list;
|
||||||
@@ -360,7 +356,8 @@ void *arena_push(mem_arena **arena_ptr, u64 size, bool zero) { // mk push
|
|||||||
u64 local_pre = ALIGN_UP_POW2(local_pos, selected->align);
|
u64 local_pre = ALIGN_UP_POW2(local_pos, selected->align);
|
||||||
u64 local_post = local_pre + size;
|
u64 local_post = local_pre + size;
|
||||||
|
|
||||||
if (local_post > selected->reserve_size) {
|
if (local_post > selected->reserve_size -
|
||||||
|
ALIGN_UP_POW2(sizeof(mem_arena), selected->align)) {
|
||||||
|
|
||||||
if (arena->allow_free_list && arena->push_size == 0) {
|
if (arena->allow_free_list && arena->push_size == 0) {
|
||||||
u64 tail_start = selected->pos;
|
u64 tail_start = selected->pos;
|
||||||
|
|||||||
6
arena.h
6
arena.h
@@ -1,6 +1,4 @@
|
|||||||
#ifndef BASE_ARENA_H
|
#pragma once
|
||||||
#define BASE_ARENA_H
|
|
||||||
|
|
||||||
#include "base.h"
|
#include "base.h"
|
||||||
|
|
||||||
// #define _CRT_SECURE_NO_WARNINGS
|
// #define _CRT_SECURE_NO_WARNINGS
|
||||||
@@ -425,5 +423,3 @@ void *plat_mem_reserve(u64 size);
|
|||||||
b32 plat_mem_commit(void *ptr, u64 size);
|
b32 plat_mem_commit(void *ptr, u64 size);
|
||||||
b32 plat_mem_decommit(void *ptr, u64 size);
|
b32 plat_mem_decommit(void *ptr, u64 size);
|
||||||
b32 plat_mem_release(void *ptr, u64 size);
|
b32 plat_mem_release(void *ptr, u64 size);
|
||||||
|
|
||||||
#endif // BASE_ARENA_H
|
|
||||||
|
|||||||
61
arena_base.h
Normal file
61
arena_base.h
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
#ifndef BASE_H
|
||||||
|
#define BASE_H
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------
|
||||||
|
Base types
|
||||||
|
------------------------------------------------------------ */
|
||||||
|
|
||||||
|
typedef uint8_t u8;
|
||||||
|
typedef uint16_t u16;
|
||||||
|
typedef uint32_t u32;
|
||||||
|
typedef uint64_t u64;
|
||||||
|
typedef int8_t i8;
|
||||||
|
typedef int16_t i16;
|
||||||
|
typedef int32_t i32;
|
||||||
|
typedef int64_t i64;
|
||||||
|
|
||||||
|
typedef i8 b8;
|
||||||
|
typedef int b32;
|
||||||
|
|
||||||
|
typedef float f32;
|
||||||
|
typedef double f64;
|
||||||
|
/* ------------------------------------------------------------
|
||||||
|
Size helpers
|
||||||
|
------------------------------------------------------------ */
|
||||||
|
|
||||||
|
#define KiB(x) ((u64)(x) * 1024ULL)
|
||||||
|
#define MiB(x) (KiB(x) * 1024ULL)
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------
|
||||||
|
Min / Max helpers
|
||||||
|
------------------------------------------------------------ */
|
||||||
|
|
||||||
|
#ifndef MIN
|
||||||
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef MAX
|
||||||
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------
|
||||||
|
Alignment helpers
|
||||||
|
------------------------------------------------------------ */
|
||||||
|
|
||||||
|
#define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------
|
||||||
|
Assert
|
||||||
|
------------------------------------------------------------ */
|
||||||
|
|
||||||
|
#ifndef ASSERT
|
||||||
|
#define ASSERT(x) assert(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // Base.h
|
||||||
39
base.h
39
base.h
@@ -1,11 +1,40 @@
|
|||||||
#ifndef BASE_H
|
#pragma once
|
||||||
#define BASE_H
|
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <stdatomic.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
#define PLATFORM_WINDOWS 1
|
||||||
|
#include <aclapi.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <io.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <windows.h>
|
||||||
|
|
||||||
|
#define strdup _strdup
|
||||||
|
#else
|
||||||
|
#include <dirent.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <pwd.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define XXH_VECTOR \
|
||||||
|
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
|
||||||
|
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
|
||||||
|
#define XXH_INLINE_ALL
|
||||||
|
#include "xxhash.c"
|
||||||
|
#include "xxhash.h"
|
||||||
|
|
||||||
/* ------------------------------------------------------------
|
/* ------------------------------------------------------------
|
||||||
Base types
|
Base types
|
||||||
@@ -25,12 +54,14 @@ typedef int b32;
|
|||||||
|
|
||||||
typedef float f32;
|
typedef float f32;
|
||||||
typedef double f64;
|
typedef double f64;
|
||||||
|
|
||||||
/* ------------------------------------------------------------
|
/* ------------------------------------------------------------
|
||||||
Size helpers
|
Size helpers
|
||||||
------------------------------------------------------------ */
|
------------------------------------------------------------ */
|
||||||
|
|
||||||
#define KiB(x) ((u64)(x) * 1024ULL)
|
#define KiB(x) ((u64)(x) * 1024ULL)
|
||||||
#define MiB(x) (KiB(x) * 1024ULL)
|
#define MiB(x) (KiB(x) * 1024ULL)
|
||||||
|
#define GiB(x) (MiB(x) * 1024ULL)
|
||||||
|
|
||||||
/* ------------------------------------------------------------
|
/* ------------------------------------------------------------
|
||||||
Min / Max helpers
|
Min / Max helpers
|
||||||
@@ -48,7 +79,7 @@ typedef double f64;
|
|||||||
Alignment helpers
|
Alignment helpers
|
||||||
------------------------------------------------------------ */
|
------------------------------------------------------------ */
|
||||||
|
|
||||||
#define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
|
#define ALIGN_UP_POW2(x, a) ((a) ? (((x) + ((a) - 1)) & ~((a) - 1)) : (x))
|
||||||
|
|
||||||
/* ------------------------------------------------------------
|
/* ------------------------------------------------------------
|
||||||
Assert
|
Assert
|
||||||
@@ -58,4 +89,4 @@ typedef double f64;
|
|||||||
#define ASSERT(x) assert(x)
|
#define ASSERT(x) assert(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // Base.h
|
#define NDEBUG // Comment to enable asserts
|
||||||
|
|||||||
@@ -20,3 +20,7 @@ Fix bug multiple threads committing at the same time, fixed by using atomic_flag
|
|||||||
Reorder helper functions
|
Reorder helper functions
|
||||||
|
|
||||||
v3.4: Rewriting hash_worker() to export file_hashes.txt
|
v3.4: Rewriting hash_worker() to export file_hashes.txt
|
||||||
|
|
||||||
|
v4.0: Instead of writing directly to file_hashes.txt, hash_workers now are using a local arena, writing everything once at the end
|
||||||
|
using #pragma once to ensure that a given header file is included only once in a single compilation unit
|
||||||
|
forcing xxhash to use the stack instead of the heap
|
||||||
|
|||||||
44
platform.h
44
platform.h
@@ -1,39 +1,10 @@
|
|||||||
#pragma once
|
#pragma once // ensure that a given header file is included only once in a
|
||||||
|
// single compilation unit
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include "arena.h"
|
||||||
#include <stdatomic.h>
|
#include "base.h"
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
|
||||||
#define PLATFORM_WINDOWS 1
|
|
||||||
#include <aclapi.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <io.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <windows.h>
|
|
||||||
|
|
||||||
#define strdup _strdup
|
|
||||||
#else
|
|
||||||
#include <dirent.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <pthread.h>
|
|
||||||
#include <pwd.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define XXH_VECTOR \
|
|
||||||
XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
|
|
||||||
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
|
|
||||||
#define XXH_INLINE_ALL
|
|
||||||
#include "xxhash.c"
|
|
||||||
#include "xxhash.h"
|
|
||||||
|
|
||||||
|
#include "arena.c"
|
||||||
// ----------------------------- Config -------------------------------------
|
// ----------------------------- Config -------------------------------------
|
||||||
#define FILE_HASHES_TXT "file_hashes.txt"
|
#define FILE_HASHES_TXT "file_hashes.txt"
|
||||||
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
|
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
|
||||||
@@ -124,6 +95,11 @@ typedef struct {
|
|||||||
|
|
||||||
static MPMCQueue g_file_queue;
|
static MPMCQueue g_file_queue;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
MPMCQueue *queue;
|
||||||
|
mem_arena *arena;
|
||||||
|
} WorkerContext;
|
||||||
|
|
||||||
/* Scan folders */
|
/* Scan folders */
|
||||||
typedef struct DirQueue DirQueue;
|
typedef struct DirQueue DirQueue;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
#include "platform.h"
|
#include "platform.h"
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
// ----------------------------- Globals ------------------------------------
|
// ----------------------------- Globals ------------------------------------
|
||||||
static atomic_uint_fast64_t g_files_found = 0;
|
static atomic_uint_fast64_t g_files_found = 0;
|
||||||
@@ -409,14 +408,10 @@ static void xxh3_hash_file_stream(const char *path, char *out_hex) {
|
|||||||
|
|
||||||
// ------------------------- Hash worker --------------------------------
|
// ------------------------- Hash worker --------------------------------
|
||||||
static DWORD WINAPI hash_worker(LPVOID arg) {
|
static DWORD WINAPI hash_worker(LPVOID arg) {
|
||||||
MPMCQueue *q = (MPMCQueue *)arg;
|
|
||||||
|
|
||||||
static CRITICAL_SECTION append_cs;
|
WorkerContext *ctx = (WorkerContext *)arg;
|
||||||
static LONG init = 0;
|
MPMCQueue *q = ctx->queue;
|
||||||
|
mem_arena *local_arena = ctx->arena;
|
||||||
if (InterlockedCompareExchange(&init, 1, 0) == 0) {
|
|
||||||
InitializeCriticalSection(&append_cs);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
FileEntry *fe = mpmc_pop(q);
|
FileEntry *fe = mpmc_pop(q);
|
||||||
@@ -432,16 +427,14 @@ static DWORD WINAPI hash_worker(LPVOID arg) {
|
|||||||
|
|
||||||
double size_kib = (double)fe->size_bytes / 1024.0;
|
double size_kib = (double)fe->size_bytes / 1024.0;
|
||||||
|
|
||||||
EnterCriticalSection(&append_cs);
|
char stack_buf[1024];
|
||||||
|
|
||||||
FILE *hf = fopen(FILE_HASHES_TXT, "a");
|
int len =
|
||||||
if (hf) {
|
snprintf(stack_buf, sizeof(stack_buf), "%s\t%s\t%.2f\t%s\t%s\t%s\n",
|
||||||
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hash, fe->path, size_kib,
|
hash, fe->path, size_kib, created, modified, fe->owner);
|
||||||
created, modified, fe->owner);
|
|
||||||
fclose(hf);
|
|
||||||
}
|
|
||||||
|
|
||||||
LeaveCriticalSection(&append_cs);
|
char *dst = arena_push(&local_arena, len, false);
|
||||||
|
memcpy(dst, stack_buf, len);
|
||||||
|
|
||||||
atomic_fetch_add(&g_files_hashed, 1);
|
atomic_fetch_add(&g_files_hashed, 1);
|
||||||
|
|
||||||
@@ -627,7 +620,7 @@ int main(int argc, char **argv) {
|
|||||||
// Step 1: Scan all folders
|
// Step 1: Scan all folders
|
||||||
// -------------------------------
|
// -------------------------------
|
||||||
|
|
||||||
mpmc_init(&g_file_queue, 1024 * 1024 * 1024);
|
mpmc_init(&g_file_queue, GiB(1));
|
||||||
|
|
||||||
DirQueue q;
|
DirQueue q;
|
||||||
memset(&q, 0, sizeof(q));
|
memset(&q, 0, sizeof(q));
|
||||||
@@ -636,11 +629,30 @@ int main(int argc, char **argv) {
|
|||||||
q.active = 0;
|
q.active = 0;
|
||||||
|
|
||||||
// starting hash threads
|
// starting hash threads
|
||||||
|
|
||||||
|
arena_params params = {
|
||||||
|
.reserve_size = GiB(1),
|
||||||
|
.commit_size = MiB(16),
|
||||||
|
.align = 0,
|
||||||
|
.push_size = 0,
|
||||||
|
.allow_free_list = true,
|
||||||
|
.allow_swapback = false,
|
||||||
|
.growth_policy = ARENA_GROWTH_NORMAL,
|
||||||
|
.commit_policy = ARENA_COMMIT_LAZY,
|
||||||
|
.max_nbre_blocks = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
WorkerContext workers[num_threads];
|
||||||
|
|
||||||
|
for (int i = 0; i < num_threads; i++) {
|
||||||
|
workers[i].queue = &g_file_queue;
|
||||||
|
workers[i].arena = arena_create(¶ms);
|
||||||
|
}
|
||||||
|
|
||||||
HANDLE *hash_threads = malloc(sizeof(HANDLE) * num_threads);
|
HANDLE *hash_threads = malloc(sizeof(HANDLE) * num_threads);
|
||||||
|
|
||||||
for (size_t i = 0; i < num_threads; ++i) {
|
for (size_t i = 0; i < num_threads; ++i) {
|
||||||
hash_threads[i] =
|
hash_threads[i] = CreateThread(NULL, 0, hash_worker, &workers[i], 0, NULL);
|
||||||
CreateThread(NULL, 0, hash_worker, &g_file_queue, 0, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// starting scan threads
|
// starting scan threads
|
||||||
@@ -662,7 +674,6 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
|
||||||
|
|
||||||
// mpmc_finish(&g_file_queue);
|
|
||||||
// debug
|
// debug
|
||||||
for (size_t i = 0; i < num_threads; i++) {
|
for (size_t i = 0; i < num_threads; i++) {
|
||||||
mpmc_push(&g_file_queue, NULL);
|
mpmc_push(&g_file_queue, NULL);
|
||||||
@@ -694,11 +705,39 @@ int main(int argc, char **argv) {
|
|||||||
CloseHandle(hash_threads[i]);
|
CloseHandle(hash_threads[i]);
|
||||||
|
|
||||||
free(hash_threads);
|
free(hash_threads);
|
||||||
// free(g_file_queue.items);
|
|
||||||
|
|
||||||
WaitForSingleObject(progress, INFINITE);
|
WaitForSingleObject(progress, INFINITE);
|
||||||
CloseHandle(progress);
|
CloseHandle(progress);
|
||||||
|
|
||||||
|
// write file_hashes.txt
|
||||||
|
|
||||||
|
// FILE *f = fopen(FILE_HASHES_TXT, "wb");
|
||||||
|
//
|
||||||
|
// for (int i = 0; i < num_threads; i++) {
|
||||||
|
// mem_arena *arena = workers[i].arena;
|
||||||
|
//
|
||||||
|
// u8 *arena_base =
|
||||||
|
// (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
|
||||||
|
// fwrite(arena_base, 1, arena->pos, f);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// fclose(f);
|
||||||
|
|
||||||
|
HANDLE h = CreateFileA(FILE_HASHES_TXT, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
|
||||||
|
FILE_ATTRIBUTE_NORMAL, NULL);
|
||||||
|
|
||||||
|
for (int i = 0; i < num_threads; i++) {
|
||||||
|
|
||||||
|
mem_arena *arena = workers[i].arena;
|
||||||
|
|
||||||
|
DWORD written;
|
||||||
|
|
||||||
|
u8 *arena_base =
|
||||||
|
(u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
|
||||||
|
|
||||||
|
WriteFile(h, arena_base, (DWORD)arena->pos, &written, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
// done time
|
// done time
|
||||||
double total_seconds = timer_stop(&total_timer);
|
double total_seconds = timer_stop(&total_timer);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user