first commit

This commit is contained in:
2026-02-28 10:54:16 +01:00
parent 92aac64cf1
commit 1744309b50
9 changed files with 10602 additions and 0 deletions

892
arena.c Normal file
View File

@@ -0,0 +1,892 @@
#include "base.h"
#include "arena.h"
/* ============================================================
Helper functions
============================================================ */
/* Page size */
static u32 g_pagesize = 0;
static inline u32 arena_pagesize(void) {
if (g_pagesize == 0) {
g_pagesize = plat_get_pagesize();
}
return g_pagesize;
}
mem_arena *arena_block_from_pos(mem_arena *arena, u64 global_pos) {
mem_arena *selected = arena;
while (selected) {
if (global_pos >= selected->base_pos && global_pos <= selected->pos) {
return selected;
}
selected = selected->prev;
}
return NULL;
}
mem_arena *arena_block_from_index(mem_arena *arena, u64 index) {
if (arena->push_size <= 0) {
return NULL;
}
mem_arena *selected = arena;
u64 pos = (index - 1) * arena->push_size + arena->block_index - 1;
while (selected) {
if (pos >= selected->base_pos && pos < selected->pos) {
return selected;
}
selected = selected->prev;
}
return NULL;
}
mem_arena *arena_block_from_ptr(mem_arena *arena, u8 *ptr) {
mem_arena *selected = arena;
while (selected) {
u8 *block_base =
(u8 *)selected + ALIGN_UP_POW2(sizeof(mem_arena), selected->align);
u8 *block_end = block_base + selected->pos - selected->base_pos;
if (ptr >= block_base && ptr < block_end) {
return selected;
}
selected = selected->prev;
}
return NULL;
}
u64 arena_pos_from_ptr(mem_arena *arena, void *ptr) {
ASSERT(arena);
ASSERT(ptr);
if (!arena || !ptr) {
return -1;
}
mem_arena *owner = arena_block_from_ptr(arena, ptr);
ASSERT(owner);
if (!owner) {
return -1;
}
u8 *block_base = (u8 *)owner + ALIGN_UP_POW2(sizeof(mem_arena), owner->align);
u64 local_offset = (u8 *)ptr - block_base;
u64 pos = owner->base_pos + local_offset;
if (pos >= owner->pos) {
return -1;
}
return pos;
}
void *arena_ptr_from_pos(mem_arena *arena, u64 global_pos) {
ASSERT(arena);
ASSERT(global_pos >= 0);
if (!arena || global_pos < 0) {
return NULL;
}
mem_arena *owner = arena_block_from_pos(arena, global_pos);
if (!owner) {
return NULL;
}
u8 *block_base = (u8 *)owner + ALIGN_UP_POW2(sizeof(mem_arena), owner->align);
u64 local_offset = global_pos - owner->base_pos;
return (void *)(block_base + local_offset);
}
void *arena_ptr_from_index(mem_arena *arena, u64 index) {
ASSERT(arena);
ASSERT(index);
if (!arena || !index || arena->push_size <= 0) {
return 0;
}
mem_arena *owner = arena_block_from_index(arena, index);
if (!owner) {
return NULL;
}
u8 *block_base = (u8 *)owner + ALIGN_UP_POW2(sizeof(mem_arena), owner->align);
u64 pos = (index - 1) * owner->push_size + owner->block_index - 1;
u64 local_offset = pos - owner->base_pos;
return (void *)(block_base + local_offset);
}
/* ============================================================
Arena creation / destruction
============================================================ */
mem_arena *arena_create(arena_params *params) { // mk create
ASSERT(params);
ASSERT(params->reserve_size > 0);
if (!params) {
return NULL;
}
u32 pagesize = arena_pagesize();
u64 align = params->align ? params->align : ARENA_ALIGN;
ASSERT((align & (align - 1)) == 0);
u64 reserve_size = ALIGN_UP_POW2(params->reserve_size, pagesize);
u64 commit_size =
params->commit_size ? ALIGN_UP_POW2(params->commit_size, pagesize) : 0;
ASSERT(commit_size <= reserve_size);
mem_arena *arena = (mem_arena *)plat_mem_reserve(reserve_size);
if (!arena)
return NULL;
/* ------------------------------------------------------------
Commit policy
------------------------------------------------------------ */
u64 header_commit = ALIGN_UP_POW2(sizeof(mem_arena), pagesize);
if (params->commit_policy == ARENA_COMMIT_FULL) {
/* Commit everything */
if (!plat_mem_commit(arena, reserve_size)) {
plat_mem_release(arena, reserve_size);
return NULL;
}
commit_size = reserve_size;
} else {
/* Lazy / partial commit */
if (commit_size == 0 || commit_size < header_commit) {
commit_size = header_commit;
}
if (!plat_mem_commit(arena, commit_size)) {
plat_mem_release(arena, reserve_size);
return NULL;
}
}
/* Initialize header */
arena->prev = NULL;
arena->next = NULL;
arena->base_pos = 0;
arena->pos = 0;
arena->reserve_size = reserve_size;
arena->commit_size = commit_size;
arena->commit_pos = commit_size;
arena->align = align;
arena->push_size = 0;
arena->allow_free_list = params->allow_free_list;
arena->free_list = NULL;
arena->allow_swapback = 0;
arena->growth_policy = params->growth_policy;
arena->commit_policy = params->commit_policy;
arena->max_nbre_blocks = params->max_nbre_blocks;
arena->block_index = 1;
if (arena->allow_free_list) {
arena->push_size = ALIGN_UP_POW2(params->push_size, arena->align);
if (!params->free_list) {
arena->free_list = arena_create(&(arena_params){
.reserve_size = MiB(1),
.commit_size = MiB(1),
.align = ARENA_ALIGN,
.push_size = sizeof(arena_free_node),
.allow_free_list = false,
.free_list = NULL,
.allow_swapback = true,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 0,
});
if (!arena->free_list) {
plat_mem_release(arena, arena->reserve_size);
return NULL;
}
} else {
arena->free_list = params->free_list;
}
} else {
ASSERT(params->push_size > 0);
arena->push_size = ALIGN_UP_POW2(params->push_size, arena->align);
arena->allow_swapback = params->allow_swapback;
}
return arena;
}
void *arena_destroy(mem_arena **arena_ptr) { // mk destroy
mem_arena *arena = *arena_ptr;
if (!arena)
return NULL;
if (arena->free_list) {
arena_destroy(&arena->free_list);
}
mem_arena *selected = arena;
while (selected) {
plat_mem_release(selected, selected->reserve_size);
selected = selected->prev;
}
*arena_ptr = NULL;
return (void *)1;
}
/* ============================================================
Allocation
============================================================ */
void *arena_push(mem_arena **arena_ptr, u64 size, bool zero) { // mk push
ASSERT(arena_ptr);
if (!arena_ptr)
return NULL;
mem_arena *arena = *arena_ptr;
/* ------------------------------------------------------------
Resolve allocation size
------------------------------------------------------------ */
if (arena->push_size > 0) {
size = arena->push_size;
} else {
if (size == 0)
return NULL;
size = ALIGN_UP_POW2(size, arena->align);
}
/* ------------------------------------------------------------
Free-list reuse (pointer mode)
------------------------------------------------------------ */
if (arena->free_list) {
mem_arena *fl = arena->free_list;
u64 fl_base = ALIGN_UP_POW2(sizeof(mem_arena), fl->align);
u64 count = fl->pos / fl->push_size;
b32 arena_full = arena->max_nbre_blocks > 0 &&
arena->block_index >= arena->max_nbre_blocks;
/* -------- Fixed-size: O(1) reuse -------- */
if (arena->push_size > 0 && count > 0) {
arena_free_node *node =
(arena_free_node *)((u8 *)fl + fl_base +
(count - 1) * sizeof(arena_free_node));
mem_arena *owner = arena_block_from_pos(arena, node->offset);
ASSERT(owner);
void *result = (u8 *)owner +
ALIGN_UP_POW2(sizeof(mem_arena), owner->align) +
(node->offset - owner->base_pos);
arena_pop(&arena->free_list);
if (zero)
memset(result, 0, size);
return result;
}
/* -------- Variable-size reuse -------- */
if (arena->push_size == 0 && count > 0 &&
(arena_full || arena->max_nbre_blocks == 0)) {
for (mem_arena *selected_fl = fl; selected_fl;
selected_fl = selected_fl->prev) {
u64 selected_fl_count =
(selected_fl->pos - selected_fl->base_pos) / selected_fl->push_size;
for (u64 j = selected_fl_count; j >= 1; j--) {
arena_free_node *node =
(arena_free_node *)((u8 *)selected_fl + fl_base +
(j - 1) * sizeof(arena_free_node));
if (node->size >= size) {
mem_arena *owner = arena_block_from_pos(arena, node->offset);
ASSERT(owner);
u8 *result = (u8 *)owner +
ALIGN_UP_POW2(sizeof(mem_arena), owner->align) +
(node->offset - owner->base_pos);
u64 remaining = node->size - size;
u64 rem_offset = node->offset + size;
arena_swapback_pop(&arena->free_list, count);
if (remaining > 0) {
arena_free_node *rem =
(arena_free_node *)arena_push(&arena->free_list, 0, true);
if (!rem)
return NULL;
assert(rem);
rem->offset = rem_offset;
rem->size = remaining;
}
if (zero)
memset(result, 0, size);
return result;
}
count--;
}
}
}
}
/* ------------------------------------------------------------
Normal allocation (last block)
------------------------------------------------------------ */
mem_arena *selected = arena;
u64 local_pos = selected->pos - selected->base_pos;
u64 local_pre = ALIGN_UP_POW2(local_pos, selected->align);
u64 local_post = local_pre + size;
if (local_post > selected->reserve_size) {
if (arena->allow_free_list && arena->push_size == 0) {
u64 tail_start = selected->pos;
u64 tail_size = selected->reserve_size -
(tail_start - selected->base_pos +
ALIGN_UP_POW2(sizeof(mem_arena), selected->align));
if (tail_size > 0) {
arena_free_node *node =
(arena_free_node *)arena_push(&arena->free_list, 0, true);
assert(node);
if (!node)
return NULL;
node->offset = tail_start;
node->size = tail_size;
selected->pos = selected->base_pos + selected->reserve_size;
}
}
if (selected->next) {
selected->next->base_pos = selected->pos + 1;
selected->next->pos = selected->pos + 1;
selected->next->free_list = selected->free_list;
selected = selected->next;
*arena_ptr = selected;
local_pre = 0;
local_post = size;
} else {
/* ------------------------------------------------------------
Grow arena if needed
------------------------------------------------------------ */
if (arena->max_nbre_blocks &&
arena->block_index >= arena->max_nbre_blocks) {
printf("Arena full.\n");
return NULL;
}
u64 new_reserve = selected->reserve_size;
if (arena->growth_policy == ARENA_GROWTH_DOUBLE)
new_reserve *= 2;
arena_params p = {
.reserve_size = new_reserve,
.commit_size = selected->commit_size,
.align = selected->align,
.push_size = arena->push_size,
.allow_free_list = arena->allow_free_list,
.free_list = arena->free_list,
.allow_swapback = arena->allow_swapback,
.growth_policy = arena->growth_policy,
.commit_policy = arena->commit_policy,
.max_nbre_blocks = arena->max_nbre_blocks,
};
mem_arena *next = arena_create(&p);
if (!next)
return NULL;
next->base_pos = selected->pos + 1;
next->pos = selected->pos + 1;
next->prev = selected;
selected->next = next;
next->block_index = selected->block_index + 1;
selected = next;
*arena_ptr = selected;
local_pre = 0;
local_post = size;
}
}
/* ------------------------------------------------------------
Commit memory if needed
------------------------------------------------------------ */
if (local_post > selected->commit_pos) {
u64 new_commit = ALIGN_UP_POW2(local_post, arena_pagesize());
new_commit = MIN(new_commit, selected->reserve_size);
if (!plat_mem_commit((u8 *)selected + selected->commit_pos,
new_commit - selected->commit_pos)) {
return NULL;
}
selected->commit_pos = new_commit;
}
/* ------------------------------------------------------------
Finalize allocation
------------------------------------------------------------ */
u8 *result = (u8 *)selected +
ALIGN_UP_POW2(sizeof(mem_arena), selected->align) + local_pre;
selected->pos = selected->base_pos + local_post;
if (zero)
memset(result, 0, size);
return result;
}
/* ============================================================
Free (pointer mode): pop to free list
============================================================ */
void *arena_free(mem_arena **arena_ptr, u8 **ptr, u64 size) { // mk free
mem_arena *arena = *arena_ptr;
ASSERT(arena);
ASSERT(arena->allow_free_list);
ASSERT(ptr && *ptr);
if (!arena || !arena->allow_free_list || !ptr || !*ptr)
return NULL;
u64 elem_size = arena->push_size ? arena->push_size : size;
ASSERT(elem_size > 0);
/* ------------------------------------------------------------
Find owning block
------------------------------------------------------------ */
mem_arena *selected = arena;
mem_arena *owner = arena_block_from_ptr(arena, *ptr);
ASSERT(owner);
if (!owner) {
return NULL;
}
/* ------------------------------------------------------------
Compute global offset using arena_pos()
------------------------------------------------------------ */
u64 global_offset = arena_pos_from_ptr(arena, *ptr);
if (global_offset == -1) {
return NULL;
}
/* ------------------------------------------------------------
Fast path: pop only if this is the LAST block
------------------------------------------------------------ */
if (owner == arena && global_offset + elem_size == arena->pos) {
arena->pos -= elem_size;
if (arena->pos < arena->base_pos) {
arena->prev->pos = arena->pos - 1;
arena->pos = arena->base_pos;
arena->prev->free_list = arena->free_list;
*arena_ptr = arena->prev;
}
*ptr = NULL;
return (void *)1;
}
/* ------------------------------------------------------------
Otherwise push into free list
------------------------------------------------------------ */
arena_free_node *node = (arena_free_node *)arena_push(
&arena->free_list, sizeof(arena_free_node), false);
if (!node)
return NULL;
node->offset = global_offset;
node->size = elem_size;
*ptr = NULL;
return (void *)1;
}
/* ============================================================
Stack operations
============================================================ */
void *arena_pop_to(mem_arena **arena_ptr, u64 count) { // mk pop to
ASSERT(arena_ptr);
mem_arena *arena = *arena_ptr;
ASSERT(!arena->allow_free_list);
ASSERT(arena->push_size > 0);
if (arena->allow_free_list || arena->push_size <= 0) {
return NULL;
}
u64 target_pos;
if (arena->pos < count * arena->push_size) {
target_pos = 0;
} else {
target_pos = arena->pos - count * arena->push_size;
}
mem_arena *selected = arena;
while (true) {
if (selected->base_pos <= target_pos) {
selected->pos = target_pos;
selected->free_list = arena->free_list;
break;
}
selected->pos = selected->base_pos;
target_pos = target_pos - 1;
selected = selected->prev;
}
*arena_ptr = selected;
return (void *)1;
}
void *arena_swapback_pop(mem_arena **arena_ptr, u64 index) { // mk swapback
mem_arena *arena = *arena_ptr;
ASSERT(arena);
ASSERT(arena->push_size > 0);
ASSERT(arena->allow_swapback);
if (arena->push_size <= 0 || !arena->allow_swapback) {
return NULL;
}
u64 count = arena->pos / arena->push_size;
ASSERT(index <= count);
if (index > count) {
return NULL;
}
/* Last element: just pop */
if (index == count - 1) {
u8 *r = arena_pop(&arena);
*arena_ptr = arena;
return r;
}
mem_arena *owner = arena_block_from_index(arena, index);
if (!owner) {
fprintf(stderr, "ERROR: Swapback pop failed, index out of range");
return NULL;
}
u8 *owner_base = (u8 *)owner + ALIGN_UP_POW2(sizeof(mem_arena), owner->align);
u8 *arena_base = (u8 *)arena + ALIGN_UP_POW2(sizeof(mem_arena), arena->align);
u8 *dst = arena_ptr_from_index(arena, index);
u8 *src = arena_ptr_from_index(arena, count);
memcpy(dst, src, arena->push_size);
u8 *r = arena_pop(&arena);
*arena_ptr = arena;
return r;
}
/* ============================================================
Utilities
============================================================ */
void *arena_clear(mem_arena **arena_ptr) { // mk clear
mem_arena *arena = *arena_ptr;
if (!arena)
return NULL;
mem_arena *selected = arena;
while (selected->prev) {
selected->pos = 0;
selected->base_pos = 0;
selected = selected->prev;
}
selected->pos = 0;
selected->free_list = arena->free_list;
*arena_ptr = selected;
if (arena->free_list) {
if (!arena_clear(&arena->free_list))
return NULL;
}
return (void *)1;
}
mem_arena *arena_merge(mem_arena **dst_ptr, mem_arena **src_ptr) { // mk merge
mem_arena *dst = *dst_ptr;
mem_arena *src = *src_ptr;
if (!dst || !src || dst == src)
return NULL;
/* ------------------------------------------------------------
Config compatibility
------------------------------------------------------------ */
if (dst->align != src->align || dst->push_size != src->push_size ||
dst->allow_free_list != src->allow_free_list ||
dst->allow_swapback != src->allow_swapback ||
dst->growth_policy != src->growth_policy) {
return NULL;
}
/* ------------------------------------------------------------
Merge free lists
------------------------------------------------------------ */
u64 block_base_pos = dst->pos + 1;
if (dst->allow_free_list) {
u64 fl_base = ALIGN_UP_POW2(sizeof(mem_arena), src->free_list->align);
for (mem_arena *selected_fl = src->free_list; selected_fl;
selected_fl = selected_fl->prev) {
u64 selected_fl_count =
(selected_fl->pos - selected_fl->base_pos) / selected_fl->push_size;
for (u64 j = selected_fl_count; j >= 1; j--) {
arena_free_node *node =
(arena_free_node *)((u8 *)selected_fl + fl_base +
(j - 1) * sizeof(arena_free_node));
node->offset += block_base_pos;
}
}
src->free_list = arena_merge(&dst->free_list, &src->free_list);
if (!src->free_list)
return NULL;
}
/* ------------------------------------------------------------
Walk src blocks once:
------------------------------------------------------------ */
mem_arena *src_first = NULL;
for (mem_arena *b = src; b; b = b->prev) {
src_first = b;
}
/* ------------------------------------------------------------
Update global metadata
------------------------------------------------------------ */
u8 selected_block_index = dst->block_index;
dst->max_nbre_blocks += src->max_nbre_blocks;
mem_arena *selected = src_first;
while (selected) {
selected->pos += block_base_pos;
selected->base_pos += block_base_pos;
selected->block_index += selected_block_index;
selected->max_nbre_blocks = dst->max_nbre_blocks;
selected = selected->next;
}
for (mem_arena *a = dst; a; a = a->prev) {
a->max_nbre_blocks = dst->max_nbre_blocks;
}
if (dst->next) {
mem_arena *src_last = NULL;
for (mem_arena *b = src; b; b = b->next) {
src_last = b;
}
mem_arena *dst_fst_empty_block = dst->next;
mem_arena *dst_empty_block = dst_fst_empty_block;
u8 dst_empty_block_index = src_last->block_index;
while (dst_empty_block) {
dst_empty_block->block_index = dst_empty_block_index + 1;
dst_empty_block->max_nbre_blocks = dst->max_nbre_blocks;
dst_empty_block = dst_empty_block->next;
}
/* ------------------------------------------------------------
Stitch block chains
------------------------------------------------------------ */
src_last->next = dst_fst_empty_block;
dst_fst_empty_block->prev = src_last;
}
dst->next = src_first;
src_first->prev = dst;
*dst_ptr = src;
*src_ptr = NULL;
return *dst_ptr;
}
/* ============================================================
Temp arenas
============================================================ */
mem_arena_temp arena_temp_begin(mem_arena *arena) {
ASSERT(arena);
ASSERT(!arena->allow_free_list);
return (mem_arena_temp){arena, arena->pos};
}
void arena_temp_end(mem_arena_temp temp) {
ASSERT(temp.arena);
ASSERT(!temp.arena->allow_free_list);
arena_pop_to(&temp.arena, temp.pos / temp.arena->push_size);
}
static THREAD_LOCAL mem_arena *_scratch_arenas[2] = {NULL, NULL};
mem_arena_temp arena_scratch_get(mem_arena **conflicts, u32 num_conflicts) {
i32 scratch_index = -1;
for (i32 i = 0; i < 2; i++) {
b32 conflict_found = false;
for (u32 j = 0; j < num_conflicts; j++) {
if (_scratch_arenas[i] == conflicts[j]) {
conflict_found = true;
break;
}
}
if (!conflict_found) {
scratch_index = i;
break;
}
}
if (scratch_index == -1) {
return (mem_arena_temp){0};
}
mem_arena **selected = &_scratch_arenas[scratch_index];
if (*selected == NULL) {
arena_params params = {
.reserve_size = MiB(64),
.commit_size = MiB(1),
.align = ARENA_ALIGN,
.push_size = 8,
.allow_free_list = false,
.allow_swapback = true,
.growth_policy = ARENA_GROWTH_NORMAL,
.commit_policy = ARENA_COMMIT_LAZY,
.max_nbre_blocks = 0,
};
*selected = arena_create(&params);
}
return arena_temp_begin(*selected);
}
void arena_scratch_release(mem_arena_temp scratch) { arena_temp_end(scratch); }
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
u32 plat_get_pagesize(void) {
SYSTEM_INFO sysinfo = {0};
GetSystemInfo(&sysinfo);
return sysinfo.dwPageSize;
}
void *plat_mem_reserve(u64 size) {
return VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_READWRITE);
}
b32 plat_mem_commit(void *ptr, u64 size) {
void *ret = VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE);
return ret != NULL;
}
b32 plat_mem_decommit(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_DECOMMIT);
}
b32 plat_mem_release(void *ptr, u64 size) {
return VirtualFree(ptr, size, MEM_RELEASE);
}
#elif defined(__linux__)
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE
#endif
#include <sys/mman.h>
#include <unistd.h>
u32 plat_get_pagesize(void) { return (u32)sysconf(_SC_PAGESIZE); }
void *plat_mem_reserve(u64 size) {
void *out = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (out == MAP_FAILED) {
return NULL;
}
return out;
}
b32 plat_mem_commit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_READ | PROT_WRITE);
return ret == 0;
}
b32 plat_mem_decommit(void *ptr, u64 size) {
i32 ret = mprotect(ptr, size, PROT_NONE);
if (ret != 0)
return false;
ret = madvise(ptr, size, MADV_DONTNEED);
return ret == 0;
}
b32 plat_mem_release(void *ptr, u64 size) {
i32 ret = munmap(ptr, size);
return ret == 0;
}
#endif

432
arena.h Normal file
View File

@@ -0,0 +1,432 @@
#ifndef BASE_ARENA_H
#define BASE_ARENA_H
#include "base.h"
// #define _CRT_SECURE_NO_WARNINGS
//
// #include <assert.h>
// #include <stdbool.h>
// #include <stdint.h>
// #include <stdio.h>
// #include <string.h>
//
// /* ------------------------------------------------------------
// Base types
// ------------------------------------------------------------ */
//
// typedef uint8_t u8;
// typedef uint32_t u32;
// typedef uint64_t u64;
// typedef int32_t i32;
// typedef int b32;
//
// /* ------------------------------------------------------------
// Size helpers
// ------------------------------------------------------------ */
//
// #define KiB(x) ((u64)(x) * 1024ULL)
// #define MiB(x) (KiB(x) * 1024ULL)
//
// /* ------------------------------------------------------------
// Alignment helpers
// ------------------------------------------------------------ */
//
// #define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
//
// /* ------------------------------------------------------------
// Assert
// ------------------------------------------------------------ */
//
// #ifndef ASSERT
// #define ASSERT(x) assert(x)
// #endif
//
/*
===============================================================================
ARENA USAGE GUIDE
===============================================================================
OVERVIEW
--------
The arena allocator is a high-performance memory allocator designed for
predictable allocation patterns. It supports:
- Multiple chained memory blocks
- Pointer-style allocation with free-list reuse
- Stack-style allocation
- Optional Swap-back removal for unordered data
- Merging compatible arenas
Memory is committed in page-sized chunks. The arena never commits less than
the system page size and grows by allocating new blocks when needed.
CORE CONCEPTS
-------------
Arena Blocks
~~~~~~~~~~~~
An arena consists of one or more memory blocks linked together.
Each block contains:
- base_pos : The local starting position of a block (bytes)
- pos : Global position (bytes)
- commit_size : Total committed size (bytes)
- reserve_size : Total usable size of the block (bytes)
- prev/next: Links to neighboring blocks
- the arena pointer points to the current block
The arena allocates from the current block where the global position is or from the free list.
Blocks form a single logical address space:
global_offset = pos
Global Position Model
~~~~~~~~~~~~~~~~~~~~~
All allocations are addressed using a global offset. This allows:
- Popping across block boundaries
- Cross-block swap-back operations
- Arena merging by linking blocks
ARENA CONFIGURATION
-------------------
An arena is configured usings the struct arena_params before creation.
Important parameters:
- push_size : Fixed element size in bytes (0 = variable-size arena)
- allow_free_list : Enables arena_free() and pointer mode, disabling it will
enable stack mode
- allow_swapback : Enables arena_swapback_pop(), works in stack mode only
- max_nbre_blocks : Maximum number of blocks (0 = unlimited)
- growth_policy : How blocks grow (next created block will have the same size
or double the size)
- commit_policy : How memory is committed (lazily if needed or commit all at
block creation)
ARENA CREATION AND DESTRUCTION
------------------------------
We create arenas using
mem_arena *arena_create(arena_params *params);
Behavior:
- Create an arena and return the pointer to this arena or NULL if it fails
Requirements:
- The configuration of the arena needs to be injected as an argument using
arena_params
We destroy arenas using
void *arena_destroy(mem_arena **arena_ptr);
Behavior:
- Return (void *)1 if it succeeds or NULL if it fails
ALLOCATION (arena_push)
-----------------------
We allocate using one function:
void *arena_push(mem_arena **arena_ptr, u64 size, bool non_zero);
Behavior:
- zero can be set to true of false to zero the allocated memory or not
- If the current block is full and block_index < max_nbre_blocks, Grows into a
new block, setting the new block to arena->next then setting the arena pointer
to the new block
- The size can be set only if the arena has variable-size elements (push_size ==
0)
- Return the pointer of the pushed element if it succeeds or NULL if it fails
Variable-size allocation (allow_free_list == true, pointer mode only):
- Allocates 'size' bytes
- Only available in pointer mode with a free list
- Since the size of the elements is variable, when using the free list we loop
through all the elements until finding one with enough size, after allocation if
there is a remaining we store it as a new entry in the free list. The allocation
is slower than fixed-size arenas
- If there is not enough memory to push an element we try to create a new block
and push the element to it, if there is some memory remaining in the previous
block we add it to the free list
- max_nbre_blocks determines the behavior of the free list
- If max_nbre_blocks > 0 we push directly to the arena until it's full then
we use the free list
- If max_nbre_blocks = 0 (unlimited number of blocks) we use the free list
directly
Fixed-size allocation (allow_free_list can be true of false):
- Size is ignored, push_size defines element size (arena config)
- Available for both in stack and pointer modes
- Caller-provided size is ignored
- Required for swap-back correctness
- Faster and safer than variable-size mode
- If allow_free_list == true (pointer mode), uses the free list first then push
DEALLOCATION
------------
POINTER MODE (WITH A FREE LIST)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Popping requires the pointer that points to the element
- The only function allowed to pop is arena_free()
void *arena_free(mem_arena **arena_ptr, u8 **ptr, u64 size);
Behavior:
- Stores (offset, size) in the free list
- Memory is reused on future allocations
- Sets the pointer to NULL
- The freed memory is not zeroed, we can zero the memory only when allocating
- If the element is last, behaves like a pop, decrementing pos, if the position
crosses to the previous arena block, set the arena pointer to arena->prev
- Return (void *)1 if it succeeds or NULL if it fails
Requirements:
- allow_free_list == true
- Correct element size for variable-size arenas
STACK MODE (HAS NO FREE LIST)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- If allow_swapback = false, we can only pop with arena_pop_to()
- If allow_swapback = true, we can pop with arena_pop_to() and
arena_swapback_pop()
Pop to position:
void *arena_pop_to(mem_arena **arena_ptr, u64 count);
- Removes x elements from the top of the stack
- Can remove data across blocks
- Pops the element by moving the position (pos)
- Does not zero memory we can zero the memory only when allocating
- If we pop to the previous arena block, set the arena pointer to arena->prev
- Return (void *)1 if it succeeds or NULL if it fails
Requirements:
- allow_free_list == false
- Fixed-size elements (push_size > 0)
- The number of elements to pop
A macro arena_pop(arena) is available to pop one element
void *arena_swapback_pop(mem_arena **arena_ptr, u64 index);
- The top of the stack element is copied into the removed slot, the top of the
stack is then reduced by one element
- Works across multiple blocks
- Return (void *)1 if it succeeds or NULL if it fails
- Swapping back the last element will only pop it
Cases:
- Middle of block -> swapback
- End of non-last block -> swapback
- Last element overall -> pop only
Requirements:
- allow_free_list == false
- Fixed-size elements (push_size > 0)
- allow_swapback == true
CLEARING THE ARENA
------------------
void *arena_clear(mem_arena **arena_ptr);
- Resets pos for all blocks
- Keeps all committed memory
- Does NOT zero memory
- Resets the arena pointer to the first arena block
- Resets the free-list
- Return (void *)1 if it succeeds or NULL if it fails
MERGING ARENAS
--------------
mem_arena *arena_merge(mem_arena **dst_ptr, mem_arena **src_ptr);
Behavior:
Merges arena B into arena A by:
- Updating the pos and base_pos and block_index of all the blocks of B
- Linking the two arenas by setting arena->prev of the first block of B to the
last block of A
- If there is empty blocks in arena A, append them to the end of arena B
- Resets the arena pointer to arena B
- Merges the free list arenas if available
- max_nbre_blocks is summed
- Return the pointer to the new arena of NULL if it fails
Requirements:
- Configurations must match exactly
HELPER FUNCTIONS
------------------
mem_arena *arena_block_from_pos(mem_arena *last, u64 global_pos);
mem_arena *arena_block_from_index(mem_arena *last, u64 index);
mem_arena *arena_block_from_ptr(mem_arena *last, u8 *ptr);
u64 arena_pos_from_ptr(mem_arena *arena, void *ptr);
void *arena_ptr_from_pos(mem_arena *arena, u64 global_pos);
void *arena_ptr_from_index(mem_arena *arena, u64 index);
===============================================================================
*/
#define ARENA_HEADER_SIZE (sizeof(mem_arena))
#define ARENA_ALIGN (sizeof(void *))
// arena config
typedef enum arena_growth_policy {
ARENA_GROWTH_NORMAL = 0, // grow by fixed block size
ARENA_GROWTH_DOUBLE // double block size on each growth
} arena_growth_policy;
typedef enum arena_commit_policy {
ARENA_COMMIT_LAZY = 0, // commit pages on demand
ARENA_COMMIT_FULL // commit entire reserve at creation
} arena_commit_policy;
typedef struct arena_params {
u64 reserve_size; // size of one arena block
u64 commit_size; // initial commit size
u64 align; // allocation alignment (0 = default)
// Element size rules:
// - stack mode : push_size > 0 (mandatory)
// - pointer fixed : push_size > 0
// - pointer variable : push_size == 0
u64 push_size;
struct mem_arena *free_list;
b32 allow_free_list; // pointer mode if true
b32 allow_swapback; // stack mode only
arena_growth_policy growth_policy;
arena_commit_policy commit_policy;
u32 max_nbre_blocks; // 0 = unlimited
} arena_params;
typedef struct arena_free_node {
u64 offset; // offset from arena base
u64 size; // size of freed block
} arena_free_node;
// arena definition struct
typedef struct mem_arena {
// block chaining
struct mem_arena *prev;
struct mem_arena *next; // valid only on root arena
// positions
u64 base_pos; // of selected block
u64 pos; // global pos
// memory limits
u64 reserve_size;
u64 commit_size;
u64 commit_pos;
// configuration
u64 align;
// Element size:
// - stack mode : fixed > 0
// - pointer fixed : fixed > 0
// - pointer variable : 0
u64 push_size;
// Pointer mode only
struct mem_arena *free_list;
b32 allow_free_list;
// Stack mode only
b32 allow_swapback;
arena_growth_policy growth_policy;
arena_commit_policy commit_policy;
u32 max_nbre_blocks;
u32 block_index;
} mem_arena;
typedef struct mem_arena_temp {
mem_arena *arena;
u64 pos;
} mem_arena_temp;
// helper functions
mem_arena *arena_block_from_pos(mem_arena *last, u64 global_pos);
mem_arena *arena_block_from_index(mem_arena *last, u64 index);
mem_arena *arena_block_from_ptr(mem_arena *last, u8 *ptr);
u64 arena_pos_from_ptr(mem_arena *arena, void *ptr);
void *arena_ptr_from_pos(mem_arena *arena, u64 global_pos);
void *arena_ptr_from_index(mem_arena *arena, u64 index);
// arena core functions
// creation / destruction
mem_arena *arena_create(arena_params *params);
void *arena_destroy(mem_arena **arena_ptr);
// allocation
void *arena_push(mem_arena **arena_ptr, u64 size, bool non_zero);
// pointer mode only
// - fixed-size arena : size is ignored
// - variable-size arena: size is mandatory
void *arena_free(mem_arena **arena_ptr, u8 **ptr, u64 size);
// stack mode only
void *arena_pop_to(mem_arena **arena_ptr, u64 count);
void *arena_swapback_pop(mem_arena **arena_ptr, u64 index);
// utilities
void *arena_clear(mem_arena **arena_ptr);
mem_arena *arena_merge(mem_arena **dst_ptr, mem_arena **src_ptr);
// temp arenas
/* create a temporary arena on top of an existing arena, it:
* takes an existing arena, marks it's position, allocate beyond this position
* and free it until the marked position when not needed*/
mem_arena_temp arena_temp_begin(mem_arena *arena);
void arena_temp_end(mem_arena_temp temp);
mem_arena_temp arena_scratch_get(mem_arena **conflicts, u32 num_conflicts);
void arena_scratch_release(mem_arena_temp scratch);
#if defined(_MSC_VER)
#define THREAD_LOCAL __declspec(thread)
#else
#define THREAD_LOCAL __thread
#endif
// Helpers
// Pointer mode only
/* Fixed-size arena: size is implicit, can pass 0 */
#define PUSH_STRUCT(arena, T) \
((T *)arena_push((arena), 0, true)) // Zeroes the memory
#define PUSH_STRUCT_NZ(arena, T) ((T *)arena_push((arena), 0, false))
#define PUSH_ARRAY(arena, T, n) ((T *)arena_push((arena), 0, true))
#define PUSH_ARRAY_NZ(arena, T, n) ((T *)arena_push((arena), 0, false))
/* Variable-size arena helpers: REQUIRE explicit size via arena_push() */
#define ARENA_PUSH(arena, size) arena_push((arena), (size), true)
#define ARENA_PUSH_NZ(arena, size) arena_push((arena), (size), false)
#define arena_pop(arena_ptr) arena_pop_to((arena_ptr), 1)
u32 plat_get_pagesize(void);
void *plat_mem_reserve(u64 size);
b32 plat_mem_commit(void *ptr, u64 size);
b32 plat_mem_decommit(void *ptr, u64 size);
b32 plat_mem_release(void *ptr, u64 size);
#endif // BASE_ARENA_H

61
base.h Normal file
View File

@@ -0,0 +1,61 @@
#ifndef BASE_H
#define BASE_H
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/* ------------------------------------------------------------
Base types
------------------------------------------------------------ */
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
typedef i8 b8;
typedef int b32;
typedef float f32;
typedef double f64;
/* ------------------------------------------------------------
Size helpers
------------------------------------------------------------ */
#define KiB(x) ((u64)(x) * 1024ULL)
#define MiB(x) (KiB(x) * 1024ULL)
/* ------------------------------------------------------------
Min / Max helpers
------------------------------------------------------------ */
#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
/* ------------------------------------------------------------
Alignment helpers
------------------------------------------------------------ */
#define ALIGN_UP_POW2(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
/* ------------------------------------------------------------
Assert
------------------------------------------------------------ */
#ifndef ASSERT
#define ASSERT(x) assert(x)
#endif
#endif // Base.h

7
file_hasher.c Normal file
View File

@@ -0,0 +1,7 @@
#define _CRT_SECURE_NO_WARNINGS
#if defined(_WIN32) || defined(_WIN64)
#include "platform_windows.c"
#else
#include "platform_posix.c"
#endif

157
platform.h Normal file
View File

@@ -0,0 +1,157 @@
#pragma once
#if defined(_WIN32) || defined(_WIN64)
#define PLATFORM_WINDOWS 1
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <aclapi.h>
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <windows.h>
#else
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <dirent.h>
#include <fcntl.h>
#include <pthread.h>
#include <pwd.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#define XXH_VECTOR XXH_AVX2 //don't compile with gcc see xxhash.h line 4082
#define XXH_INLINE_ALL
#include "xxhash.c"
#include "xxhash.h"
// ----------------------------- Config -------------------------------------
#define FILE_LIST_TXT "file_list.txt"
#define FILE_HASHES_TXT "file_hashes.txt"
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
#define MAX_PATHLEN 4096
#define READ_BLOCK (64 * 1024) // 64KB blocks
// ----------------------------- Data types ---------------------------------
typedef struct FileEntry {
char *path;
uint64_t size_bytes;
uint64_t created_time; // epoch
uint64_t modified_time; // epoch seconds
char owner[128]; // resolved owner name
} FileEntry;
/* File path and metadata */
static void normalize_path(char *p) {
char *src = p;
char *dst = p;
int prev_slash = 0;
while (*src) {
char c = *src++;
if (c == '\\' || c == '/') {
if (!prev_slash) {
*dst++ = '/';
prev_slash = 1;
}
} else {
*dst++ = c;
prev_slash = 0;
}
}
*dst = '\0';
}
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified);
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size);
/* scan folder timer*/
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER end;
} HiResTimer;
static LARGE_INTEGER g_qpc_freq;
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
static double timer_stop(HiResTimer *t) {
QueryPerformanceCounter(&t->end);
return (double)(t->end.QuadPart - t->start.QuadPart) /
(double)g_qpc_freq.QuadPart;
}
/* Scan folders */
typedef struct EntryBuffer {
FileEntry *entries;
size_t count;
size_t capacity;
} EntryBuffer;
typedef struct DirQueue DirQueue;
void scan_folder_windows_parallel(const char *base, DirQueue *q,
EntryBuffer *buf);
void scan_folder_posix_parallel(const char *base, DirQueue *q);
typedef struct DirJob {
char *path;
struct DirJob *next;
} DirJob;
typedef struct DirQueue {
char **items;
size_t count;
size_t cap;
size_t active;
int stop;
#if PLATFORM_WINDOWS
CRITICAL_SECTION cs;
CONDITION_VARIABLE cv;
#else
pthread_mutex_t mutex;
pthread_cond_t cond;
#endif
} DirQueue;
/* Hashing */
typedef struct Job {
FileEntry *file;
struct Job *next;
} Job;
typedef struct {
Job *head;
Job *tail;
CRITICAL_SECTION cs;
CONDITION_VARIABLE cv;
atomic_size_t count; // queued jobs
int stop;
} JobQueue;
typedef struct {
JobQueue *queue;
atomic_size_t *done_counter;
size_t total_jobs;
atomic_int *live_workers;
} WorkerArg;

678
platform_posix.c Normal file
View File

@@ -0,0 +1,678 @@
#include "platform.h"
// ----------------------------- Globals ------------------------------------
static atomic_uint_fast64_t g_bytes_processed = 0;
FileEntry *g_entries = NULL;
size_t g_entry_count = 0;
size_t g_entry_capacity = 0;
// ----------------------------- Utils --------------------------------------
static void perror_exit(const char *msg) {
perror(msg);
exit(1);
}
static void *xmalloc(size_t n) {
void *p = malloc(n);
if (!p)
perror_exit("malloc");
return p;
}
static void add_entry(const FileEntry *src) {
if (g_entry_count + 1 > g_entry_capacity) {
g_entry_capacity = g_entry_capacity ? g_entry_capacity * 2 : 1024;
g_entries = realloc(g_entries, sizeof(FileEntry) * g_entry_capacity);
if (!g_entries)
perror_exit("realloc");
}
FileEntry *dst = &g_entries[g_entry_count++];
memset(dst, 0, sizeof(*dst));
dst->size_bytes = src->size_bytes;
dst->created_time = src->created_time;
dst->modified_time = src->modified_time;
if (src->path)
dst->path = strdup(src->path);
strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1);
dst->owner[sizeof(dst->owner) - 1] = '\0';
}
static void free_entries(void) {
for (size_t i = 0; i < g_entry_count; ++i) {
free(g_entries[i].path);
}
free(g_entries);
g_entries = NULL;
g_entry_count = 0;
g_entry_capacity = 0;
}
// ----------------------------- Owner lookup ------------------------------
static void get_file_owner(uid_t uid, char *out, size_t out_sz) {
struct passwd *pw = getpwuid(uid);
if (pw) {
snprintf(out, out_sz, "%s", pw->pw_name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
}
// ----------------------------- Format time helper -------------------------
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
#if PLATFORM_WINDOWS
localtime_s(&tm, &tt);
#else
localtime_r(&tt, &tm);
#endif
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// --------------- parallel directory scanning ----------------
// Add queue helper functions
static void dirqueue_push(DirQueue *q, const char *path) {
DirJob *job = malloc(sizeof(*job));
job->path = strdup(path);
job->next = NULL;
pthread_mutex_lock(&q->mutex);
if (q->tail)
q->tail->next = job;
else
q->head = job;
q->tail = job;
pthread_cond_signal(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
static char *dirqueue_pop(DirQueue *q) {
pthread_mutex_lock(&q->mutex);
while (!q->head && !q->stop)
pthread_cond_wait(&q->cond, &q->mutex);
if (q->stop) {
pthread_mutex_unlock(&q->mutex);
return NULL;
}
DirJob *job = q->head;
q->head = job->next;
if (!q->head)
q->tail = NULL;
q->active_workers++;
pthread_mutex_unlock(&q->mutex);
char *path = job->path;
free(job);
return path;
}
static void dirqueue_done(DirQueue *q) {
pthread_mutex_lock(&q->mutex);
q->active_workers--;
if (!q->head && q->active_workers == 0) {
q->stop = 1;
pthread_cond_broadcast(&q->cond);
}
pthread_mutex_unlock(&q->mutex);
}
// Scanning directory worker thread function
static void scan_worker(void *arg) {
DirQueue *q = arg;
for (;;) {
char *dir = dirqueue_pop(q);
if (!dir)
break;
scan_folder_posix_parallel(dir, q);
free(dir);
dirqueue_done(q);
}
}
// Scanning directory function
void scan_folder_posix_parallel(const char *base, DirQueue *q) {
DIR *d = opendir(base);
if (!d)
return;
struct dirent *ent;
while ((ent = readdir(d))) {
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
char full[MAX_PATHLEN];
snprintf(full, sizeof(full), "%s/%s", base, ent->d_name);
struct stat st;
if (lstat(full, &st) != 0)
continue;
if (S_ISDIR(st.st_mode)) {
dirqueue_push(q, full);
} else if (S_ISREG(st.st_mode)) {
FileEntry fe;
memset(&fe, 0, sizeof(fe));
normalize_path(full);
fe.path = full;
fe.size_bytes = (uint64_t)st.st_size;
fe.created_time = (uint64_t)st.st_ctime;
fe.modified_time = (uint64_t)st.st_mtime;
get_file_owner(st.st_uid, fe.owner, sizeof(fe.owner));
add_entry(&fe);
}
}
closedir(d);
}
// ----------------------------- Job queue ----------------------------------
static void jobqueue_init(JobQueue *q) {
q->head = q->tail = NULL;
atomic_store(&q->count, 0);
q->stop = 0;
pthread_mutex_init(&q->mutex, NULL);
pthread_cond_init(&q->cond, NULL);
}
static void jobqueue_push(JobQueue *q, Job *job) {
pthread_mutex_lock(&q->mutex);
job->next = NULL;
if (q->tail)
q->tail->next = job;
else
q->head = job;
q->tail = job;
atomic_fetch_add(&q->count, 1);
pthread_cond_signal(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
static Job *jobqueue_pop(JobQueue *q) {
pthread_mutex_lock(&q->mutex);
while (!q->head && !q->stop)
pthread_cond_wait(&q->cond, &q->mutex);
if (q->stop && !q->head) {
pthread_mutex_unlock(&q->mutex);
return NULL;
}
Job *j = q->head;
q->head = j->next;
if (!q->head)
q->tail = NULL;
pthread_mutex_unlock(&q->mutex);
if (j)
atomic_fetch_sub(&q->count, 1);
return j;
}
static void jobqueue_stop(JobQueue *q) {
pthread_mutex_lock(&q->mutex);
q->stop = 1;
pthread_cond_broadcast(&q->cond);
pthread_mutex_unlock(&q->mutex);
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper.
int fd = open(path, O_RDONLY);
if (fd < 0) {
strcpy(out_hex, "ERROR");
return;
}
XXH128_hash_t h;
XXH3_state_t *state = XXH3_createState();
XXH3_128bits_reset(state);
unsigned char *buf = (unsigned char *)malloc(READ_BLOCK);
ssize_t r;
while ((r = read(fd, buf, READ_BLOCK)) > 0) {
XXH3_128bits_update(state, buf, (size_t)r);
atomic_fetch_add(&g_bytes_processed, (uint64_t)r);
}
h = XXH3_128bits_digest(state);
XXH3_freeState(state);
close(fd);
free(buf);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ----------------------------- Worker --------------------------------------
static void *worker_thread_posix(void *argp) {
WorkerArg *w = (WorkerArg *)argp;
JobQueue *q = w->queue;
for (;;) {
Job *job = jobqueue_pop(q);
if (!job)
break;
char hex[HASH_STRLEN];
xxh3_hash_file_stream(job->file->path, hex);
// append to file_hashes.txt atomically: we will store results to a temp
// buffer and write them at the end (to avoid synchronization issues). But
// for simplicity, here we append directly using a file lock (fopen+fwrite
// guarded by mutex). We'll store results in job->file->path? Instead,
// simple global append with a mutex. Using a file-level append lock:
static pthread_mutex_t append_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&append_mutex);
FILE *hf = fopen(FILE_HASHES_TXT, "a");
if (hf) {
char created[32], modified[32];
format_time(job->file->created_time, created, sizeof(created));
format_time(job->file->modified_time, modified, sizeof(modified));
double size_kib = (double)job->file->size_bytes / (1024.0);
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib,
created, modified, job->file->owner);
fclose(hf);
}
pthread_mutex_unlock(&append_mutex);
atomic_fetch_add(w->done_counter, 1);
free(job);
}
atomic_fetch_sub(w->live_workers, 1);
return NULL;
}
// ----------------------------- Progress display ---------------------------
static void print_progress(size_t done, size_t total) {
const int barw = 40;
double pct = total ? (double)done / (double)total : 0.0;
int filled = (int)(pct * barw + 0.5);
printf("\r[");
for (int i = 0; i < filled; ++i)
putchar('#');
for (int i = filled; i < barw; ++i)
putchar(' ');
printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total);
fflush(stdout);
}
// ----------------------------- Helpers: load/save --------------------------
static int file_exists(const char *path) {
struct stat st;
return (stat(path, &st) == 0);
}
static void save_file_list(const char *list_path) {
FILE *f = fopen(list_path, "w");
if (!f) {
perror("fopen file_list");
return;
}
for (size_t i = 0; i < g_entry_count; ++i) {
fprintf(f, "%s\n", g_entries[i].path);
}
fclose(f);
}
static void load_file_list(const char *list_path) {
FILE *f = fopen(list_path, "r");
if (!f)
return;
char line[MAX_PATHLEN];
while (fgets(line, sizeof(line), f)) {
line[strcspn(line, "\r\n")] = 0;
FileEntry fe;
memset(&fe, 0, sizeof(fe));
fe.path = line;
/* Populate metadata from filesystem */
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
add_entry(&fe);
}
fclose(f);
}
// Read existing hashes into memory map for resume
// Simple linear search mapping: returns 1 if path has hash found (and writes
// into out_hex)
static int find_hash_in_file(const char *hashfile, const char *path,
char *out_hex) {
FILE *f = fopen(hashfile, "r");
if (!f)
return 0;
char p[MAX_PATHLEN];
char h[128];
int found = 0;
while (fscanf(f, "%4095s %127s", p, h) == 2) {
if (strcmp(p, path) == 0) {
strncpy(out_hex, h, HASH_STRLEN);
out_hex[HASH_STRLEN - 1] = 0;
found = 1;
break;
}
}
fclose(f);
return found;
}
// ----------------------------- Get file metadata -------------------------
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
struct stat st;
if (stat(path, &st) == 0) {
*out_created = (uint64_t)st.st_ctime;
*out_modified = (uint64_t)st.st_mtime;
} else {
*out_created = 0;
*out_modified = 0;
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
struct stat st;
if (stat(path, &st) == 0) {
get_file_owner(st.st_uid, out_owner, out_owner_size);
} else {
snprintf(out_owner, out_owner_size, "UNKNOWN");
}
}
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
int resume = 0;
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-resume") == 0) {
resume = 1;
} else {
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0 && !resume) {
printf("Enter folder to process (Enter = current folder): ");
fflush(stdout);
char buf[MAX_PATHLEN];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0)
strcpy(folders[0], ".");
else
strncpy(folders[0], buf, MAX_PATHLEN - 1);
folder_count = 1;
} else if (folder_count == 0 && resume) {
strcpy(folders[0], ".");
folder_count = 1;
}
// -------------------------------
// Display selected folders
// -------------------------------
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Detect hardware threads (CPU cores)
// -------------------------------
size_t hw_threads = 1;
long cpus = sysconf(_SC_NPROCESSORS_ONLN);
if (cpus > 0)
hw_threads = (size_t)cpus;
// Add some extra threads to overlap I/O more aggressively
size_t num_threads = hw_threads * 2;
if (num_threads < 2)
num_threads = 2;
// -------------------------------
// Step 1: Scan all folders
// -------------------------------
if (!resume) {
DirQueue q = {0};
pthread_mutex_init(&q.mutex, NULL);
pthread_cond_init(&q.cond, NULL);
// Seed queue
for (int i = 0; i < folder_count; ++i)
dirqueue_push(&q, folders[i]);
pthread_t *threads = malloc(sizeof(pthread_t) * num_threads);
for (size_t i = 0; i < num_threads; ++i)
pthread_create(&threads[i], NULL, (void *(*)(void *))scan_worker, &q);
for (size_t i = 0; i < num_threads; ++i)
pthread_join(threads[i], NULL);
free(threads);
pthread_mutex_destroy(&q.mutex);
pthread_cond_destroy(&q.cond);
printf("Found %zu files. Saving to %s\n", g_entry_count, FILE_LIST_TXT);
save_file_list(FILE_LIST_TXT);
} else {
if (!file_exists(FILE_LIST_TXT)) {
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
return 1;
}
load_file_list(FILE_LIST_TXT);
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
}
if (g_entry_count == 0) {
printf("No files to process.\n");
return 0;
}
// If resume: create map of which files are already hashed
char **existing_hash = calloc(g_entry_count, sizeof(char *));
for (size_t i = 0; i < g_entry_count; ++i)
existing_hash[i] = NULL;
if (resume && file_exists(FILE_HASHES_TXT)) {
// For simplicity we parse hash file and match lines to list entries.
for (size_t i = 0; i < g_entry_count; ++i) {
char hex[HASH_STRLEN] = {0};
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
existing_hash[i] = strdup(hex);
}
}
}
// Prepare job queue of only missing files (or all if not resume)
JobQueue queue;
jobqueue_init(&queue);
size_t total_jobs = 0;
for (size_t i = 0; i < g_entry_count; ++i) {
if (resume && existing_hash[i])
continue;
Job *j = (Job *)malloc(sizeof(Job));
j->file = &g_entries[i];
j->next = NULL;
jobqueue_push(&queue, j);
++total_jobs;
}
if (total_jobs == 0) {
printf("Nothing to do — all files already hashed.\n");
return 0;
}
// Remove old hashes file if we're recomputing from scratch.
if (!resume) {
// create/overwrite hashes file
FILE *hf = fopen(FILE_HASHES_TXT, "w");
if (hf)
fclose(hf);
} // if resume, we append only missing
// Starting thread pool
atomic_size_t done_counter;
atomic_store(&done_counter, 0);
atomic_int live_workers;
atomic_store(&live_workers, (int)num_threads);
WorkerArg warg = {.queue = &queue,
.done_counter = &done_counter,
.total_jobs = total_jobs,
.live_workers = &live_workers};
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
// Launch threads
pthread_t *tids = malloc(sizeof(pthread_t) * num_threads);
for (size_t i = 0; i < num_threads; ++i) {
pthread_create(&tids[i], NULL, worker_thread_posix, &warg);
}
// Progress / timer
struct timespec tstart, tnow;
clock_gettime(CLOCK_MONOTONIC, &tstart);
size_t last_done = 0;
// ---------- Correct real-time MB/s (stable & accurate) ----------
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
char linebuf[256];
for (;;) {
size_t done = (size_t)atomic_load(&done_counter);
// ---- monotonic time ----
clock_gettime(CLOCK_MONOTONIC, &tnow);
double now =
(tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9;
// ---- bytes so far ----
uint64_t bytes = atomic_load(&g_bytes_processed);
// ---- real sampler (independent of UI sleep) ----
if (last_time == 0.0) {
last_time = now;
last_bytes = bytes;
}
double dt = now - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = now;
}
// ---- progress bar build ----
const int barw = 40;
double pct = total_jobs ? (double)done / (double)total_jobs : 0.0;
int filled = (int)(pct * barw + 0.5);
int p = 0;
p += snprintf(linebuf + p, sizeof(linebuf) - p, "[");
for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, "#");
for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, ".");
snprintf(linebuf + p, sizeof(linebuf) - p,
"] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs,
displayed_speed);
printf("\r%s", linebuf);
fflush(stdout);
if (done >= total_jobs)
break;
usleep(100000);
}
printf("\n\n");
// stop queue and join threads
jobqueue_stop(&queue);
for (size_t i = 0; i < num_threads; ++i)
pthread_join(tids[i], NULL);
// done time
clock_gettime(CLOCK_MONOTONIC, &tnow);
double elapsed =
(tnow.tv_sec - tstart.tv_sec) + (tnow.tv_nsec - tstart.tv_nsec) / 1e9;
printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / elapsed;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
// If resume: we appended missing entries. If not resume: we wrote all results
// during workers. Note: This program appends hashes as workers finish. This
// avoids holding all hashes in RAM.
// Cleanup
for (size_t i = 0; i < g_entry_count; ++i)
if (existing_hash[i])
free(existing_hash[i]);
free(existing_hash);
free_entries();
return 0;
}

841
platform_windows.c Normal file
View File

@@ -0,0 +1,841 @@
#include "platform.h"
// ----------------------------- Globals ------------------------------------
FileEntry *g_entries = NULL;
size_t g_entry_count = 0;
size_t g_entry_capacity = 0;
static atomic_int g_scan_done = 0;
static atomic_size_t g_files_found = 0;
static atomic_uint_fast64_t g_bytes_processed = 0;
// __________________________________________________________________________
static CRITICAL_SECTION g_entries_cs;
// ----------------------------- Utils --------------------------------------
static void perror_exit(const char *msg) {
perror(msg);
exit(1);
}
static void *xmalloc(size_t n) {
void *p = malloc(n);
if (!p)
perror_exit("malloc");
return p;
}
static void global_entries_push(const FileEntry *src) {
if (g_entry_count == g_entry_capacity) {
size_t newcap = g_entry_capacity ? g_entry_capacity * 2 : 1024;
g_entries = realloc(g_entries, newcap * sizeof(FileEntry));
if (!g_entries)
perror_exit("realloc");
g_entry_capacity = newcap;
}
FileEntry *dst = &g_entries[g_entry_count++];
memset(dst, 0, sizeof(*dst));
dst->size_bytes = src->size_bytes;
dst->created_time = src->created_time;
dst->modified_time = src->modified_time;
dst->path = strdup(src->path);
strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1);
}
static void free_entries(void) {
for (size_t i = 0; i < g_entry_count; ++i) {
free(g_entries[i].path);
}
free(g_entries);
g_entries = NULL;
g_entry_count = 0;
g_entry_capacity = 0;
}
// ----------------------------- Convert filetime to epoch --------------
static uint64_t filetime_to_epoch(const FILETIME *ft) {
ULARGE_INTEGER ull;
ull.LowPart = ft->dwLowDateTime;
ull.HighPart = ft->dwHighDateTime;
// Windows epoch (1601) → Unix epoch (1970)
return (ull.QuadPart - 116444736000000000ULL) / 10000000ULL;
}
// ----------------------------- Resolve file owner ---------------------
static void get_file_owner(const char *path, char *out, size_t out_sz) {
PSID sid = NULL;
PSECURITY_DESCRIPTOR sd = NULL;
if (GetNamedSecurityInfoA(path, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION,
&sid, NULL, NULL, NULL, &sd) == ERROR_SUCCESS) {
char name[64], domain[64];
DWORD name_len = sizeof(name);
DWORD domain_len = sizeof(domain);
SID_NAME_USE use;
if (LookupAccountSidA(NULL, sid, name, &name_len, domain, &domain_len,
&use)) {
snprintf(out, out_sz, "%s\\%s", domain, name);
} else {
snprintf(out, out_sz, "UNKNOWN");
}
} else {
snprintf(out, out_sz, "UNKNOWN");
}
if (sd)
LocalFree(sd);
}
// ----------------------------- Format time helper -------------------------
static void format_time(uint64_t t, char *out, size_t out_sz) {
if (t == 0) {
snprintf(out, out_sz, "N/A");
return;
}
time_t tt = (time_t)t;
struct tm tm;
#if PLATFORM_WINDOWS
localtime_s(&tm, &tt);
#else
localtime_r(&tt, &tm);
#endif
strftime(out, out_sz, "%Y-%m-%d %H:%M:%S", &tm);
}
// --------------- parallel directory scanning ----------------
static void entrybuf_init(EntryBuffer *b) {
b->entries = NULL;
b->count = 0;
b->capacity = 0;
}
static void entrybuf_push(EntryBuffer *b, const FileEntry *src) {
if (b->count == b->capacity) {
size_t newcap = b->capacity ? b->capacity * 2 : 256;
b->entries = realloc(b->entries, newcap * sizeof(FileEntry));
if (!b->entries)
perror_exit("realloc");
b->capacity = newcap;
}
FileEntry *dst = &b->entries[b->count++];
memset(dst, 0, sizeof(*dst));
dst->size_bytes = src->size_bytes;
dst->created_time = src->created_time;
dst->modified_time = src->modified_time;
dst->path = strdup(src->path);
strncpy(dst->owner, src->owner, sizeof(dst->owner) - 1);
}
// Add queue helper functions
static void dirqueue_push(DirQueue *q, const char *path) {
EnterCriticalSection(&q->cs);
if (q->count + 1 > q->cap) {
q->cap = q->cap ? q->cap * 2 : 1024;
q->items = realloc(q->items, q->cap * sizeof(char *));
}
q->items[q->count++] = _strdup(path);
WakeConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
static char *dirqueue_pop(DirQueue *q) {
EnterCriticalSection(&q->cs);
while (q->count == 0 && q->active > 0) {
SleepConditionVariableCS(&q->cv, &q->cs, INFINITE);
}
if (q->count == 0 && q->active == 0) {
LeaveCriticalSection(&q->cs);
return NULL; // truly done
}
char *dir = q->items[--q->count];
q->active++;
LeaveCriticalSection(&q->cs);
return dir;
}
static void dirqueue_done(DirQueue *q) {
EnterCriticalSection(&q->cs);
q->active--;
WakeAllConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
static DWORD WINAPI scan_worker(LPVOID arg) {
DirQueue *q = (DirQueue *)arg;
EntryBuffer local;
entrybuf_init(&local);
for (;;) {
char *dir = dirqueue_pop(q);
if (!dir)
break;
scan_folder_windows_parallel(dir, q, &local);
// debug
// printf("[T%lu] scanning %s\n", GetCurrentThreadId(), dir);
// debug
free(dir);
dirqueue_done(q);
}
// merge once at end
EnterCriticalSection(&g_entries_cs);
if (g_entry_count + local.count > g_entry_capacity) {
g_entry_capacity = g_entry_count + local.count;
g_entries = realloc(g_entries, g_entry_capacity * sizeof(FileEntry));
if (!g_entries)
perror_exit("realloc");
}
memcpy(&g_entries[g_entry_count], local.entries,
local.count * sizeof(FileEntry));
g_entry_count += local.count;
LeaveCriticalSection(&g_entries_cs);
free(local.entries);
return 0;
}
// Scanning directory function
void scan_folder_windows_parallel(const char *base, DirQueue *q,
EntryBuffer *buf) {
char search[MAX_PATHLEN];
snprintf(search, sizeof(search), "%s\\*", base);
WIN32_FIND_DATAA fd;
HANDLE h = FindFirstFileA(search, &fd);
if (h == INVALID_HANDLE_VALUE)
return;
do {
if (!strcmp(fd.cFileName, ".") || !strcmp(fd.cFileName, ".."))
continue;
char full[MAX_PATHLEN];
snprintf(full, sizeof(full), "%s\\%s", base, fd.cFileName);
if (fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
continue;
if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
dirqueue_push(q, full);
} else {
atomic_fetch_add(&g_files_found, 1);
FileEntry fe;
memset(&fe, 0, sizeof(fe));
char norm[MAX_PATHLEN];
strncpy(norm, full, sizeof(norm) - 1);
norm[sizeof(norm) - 1] = 0;
normalize_path(norm);
fe.path = norm;
platform_get_file_times(full, &fe.created_time, &fe.modified_time);
platform_get_file_owner(full, fe.owner, sizeof(fe.owner));
LARGE_INTEGER size;
HANDLE hf =
CreateFileA(full, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hf != INVALID_HANDLE_VALUE) {
if (GetFileSizeEx(hf, &size))
fe.size_bytes = (uint64_t)size.QuadPart;
CloseHandle(hf);
}
entrybuf_push(buf, &fe);
}
} while (FindNextFileA(h, &fd));
FindClose(h);
}
// Scan progress thread
static DWORD WINAPI scan_progress_thread(LPVOID arg) {
(void)arg;
for (;;) {
if (atomic_load(&g_scan_done))
break;
Sleep(100); // 0.2 seconds
size_t count = atomic_load(&g_files_found);
printf("\rScanning... %zu files found", count);
fflush(stdout);
}
return 0;
}
// ----------------------------- Job queue ----------------------------------
static void jobqueue_init(JobQueue *q) {
q->head = q->tail = NULL;
atomic_store(&q->count, 0);
q->stop = 0;
InitializeCriticalSection(&q->cs);
InitializeConditionVariable(&q->cv);
}
static void jobqueue_push(JobQueue *q, Job *job) {
EnterCriticalSection(&q->cs);
job->next = NULL;
if (q->tail)
q->tail->next = job;
else
q->head = job;
q->tail = job;
atomic_fetch_add(&q->count, 1);
WakeConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
static Job *jobqueue_pop(JobQueue *q) {
EnterCriticalSection(&q->cs);
while (!q->head && !q->stop)
SleepConditionVariableCS(&q->cv, &q->cs, INFINITE);
if (q->stop && !q->head) {
LeaveCriticalSection(&q->cs);
return NULL;
}
Job *j = q->head;
q->head = j->next;
if (!q->head)
q->tail = NULL;
LeaveCriticalSection(&q->cs);
if (j)
atomic_fetch_sub(&q->count, 1);
return j;
}
static void jobqueue_stop(JobQueue *q) {
EnterCriticalSection(&q->cs);
q->stop = 1;
WakeAllConditionVariable(&q->cv);
LeaveCriticalSection(&q->cs);
}
// ----------------------------- Hashing helpers -----------------------------
static void xxh3_hash_file_stream(const char *path, char *out_hex) {
// compute XXH3_128 over file. POSIX and Windows use standard reads in this
// helper.
// On Windows try to use overlapped synchronous chunked reads for higher
// throughput.
HANDLE hFile =
CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
strcpy(out_hex, "ERROR");
return;
}
XXH128_hash_t h;
XXH3_state_t *state = XXH3_createState();
XXH3_128bits_reset(state);
BYTE *buf = (BYTE *)malloc(READ_BLOCK);
DWORD read = 0;
BOOL ok;
while (ReadFile(hFile, buf, READ_BLOCK, &read, NULL) && read > 0) {
XXH3_128bits_update(state, buf, (size_t)read);
atomic_fetch_add(&g_bytes_processed, (uint64_t)read);
}
h = XXH3_128bits_digest(state);
XXH3_freeState(state);
CloseHandle(hFile);
free(buf);
snprintf(out_hex, HASH_STRLEN, "%016llx%016llx", (unsigned long long)h.high64,
(unsigned long long)h.low64);
}
// ----------------------------- Worker --------------------------------------
static DWORD WINAPI worker_thread_windows(LPVOID argp) {
WorkerArg *w = (WorkerArg *)argp;
JobQueue *q = w->queue;
for (;;) {
Job *job = jobqueue_pop(q);
if (!job)
break;
char hex[HASH_STRLEN];
// On Windows we use overlapped ReadFile for large files would be better,
// but ReadFile with NULL overlapped is sufficient inside parallel threads.
xxh3_hash_file_stream(job->file->path, hex);
// append to hashes file using a critical section to avoid races
static CRITICAL_SECTION append_cs;
static LONG init = 0;
if (InterlockedCompareExchange(&init, 1, 1) == 0) {
// first time initialize
InitializeCriticalSection(&append_cs);
InterlockedExchange(&init, 1);
}
EnterCriticalSection(&append_cs);
FILE *hf = fopen(FILE_HASHES_TXT, "a");
if (hf) {
char created[32], modified[32];
format_time(job->file->created_time, created, sizeof(created));
format_time(job->file->modified_time, modified, sizeof(modified));
double size_kib = (double)job->file->size_bytes / (1024.0);
fprintf(hf, "%s\t%s\t%.2f\t%s\t%s\t%s\n", hex, job->file->path, size_kib,
created, modified, job->file->owner);
fclose(hf);
}
LeaveCriticalSection(&append_cs);
atomic_fetch_add(w->done_counter, 1);
free(job);
}
atomic_fetch_sub(w->live_workers, 1);
return 0;
}
// ----------------------------- Progress display ---------------------------
static void print_progress(size_t done, size_t total) {
const int barw = 40;
double pct = total ? (double)done / (double)total : 0.0;
int filled = (int)(pct * barw + 0.5);
printf("\r[");
for (int i = 0; i < filled; ++i)
putchar('#');
for (int i = filled; i < barw; ++i)
putchar(' ');
printf("] %6.2f%% (%zu / %zu) ", pct * 100.0, done, total);
fflush(stdout);
}
// ----------------------------- Helpers: load/save --------------------------
static int file_exists(const char *path) {
DWORD attr = GetFileAttributesA(path);
return attr != INVALID_FILE_ATTRIBUTES;
}
static void save_file_list(const char *list_path) {
FILE *f = fopen(list_path, "w");
if (!f) {
perror("fopen file_list");
return;
}
for (size_t i = 0; i < g_entry_count; ++i) {
fprintf(f, "%s\n", g_entries[i].path);
}
fclose(f);
}
static void load_file_list(const char *list_path) {
FILE *f = fopen(list_path, "r");
if (!f)
return;
char line[MAX_PATHLEN];
while (fgets(line, sizeof(line), f)) {
line[strcspn(line, "\r\n")] = 0;
FileEntry fe;
memset(&fe, 0, sizeof(fe));
fe.path = line;
normalize_path(fe.path);
/* Populate metadata from filesystem */
platform_get_file_times(line, &fe.created_time, &fe.modified_time);
platform_get_file_owner(line, fe.owner, sizeof(fe.owner));
global_entries_push(&fe);
}
fclose(f);
}
// Read existing hashes into memory map for resume
// Simple linear search mapping: returns 1 if path has hash found (and writes
// into out_hex)
// ----------------------------- Get file metadata -------------------------
static int find_hash_in_file(const char *hashfile, const char *path,
char *out_hex) {
FILE *f = fopen(hashfile, "r");
if (!f)
return 0;
char p[MAX_PATHLEN];
char h[128];
int found = 0;
while (fscanf(f, "%4095s %127s", p, h) == 2) {
if (strcmp(p, path) == 0) {
strncpy(out_hex, h, HASH_STRLEN);
out_hex[HASH_STRLEN - 1] = 0;
found = 1;
break;
}
}
fclose(f);
return found;
}
void platform_get_file_times(const char *path, uint64_t *out_created,
uint64_t *out_modified) {
WIN32_FILE_ATTRIBUTE_DATA fad;
if (GetFileAttributesExA(path, GetFileExInfoStandard, &fad)) {
*out_created = filetime_to_epoch(&fad.ftCreationTime);
*out_modified = filetime_to_epoch(&fad.ftLastWriteTime);
} else {
*out_created = 0;
*out_modified = 0;
}
}
void platform_get_file_owner(const char *path, char *out_owner,
size_t out_owner_size) {
get_file_owner(path, out_owner, out_owner_size);
}
// ----------------------------- Main ---------------------------------------
int main(int argc, char **argv) {
char folders[64][MAX_PATHLEN]; // up to 64 input folders
int folder_count = 0;
int resume = 0;
// -------------------------------
// Scanning and total timer init
// -------------------------------
timer_init();
HiResTimer total_timer;
HiResTimer scan_timer;
HiResTimer hash_timer;
timer_start(&total_timer);
timer_start(&scan_timer);
// -------------------------------
// Parse arguments
// -------------------------------
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-resume") == 0) {
resume = 1;
} else {
if (folder_count < 64) {
strncpy(folders[folder_count], argv[i], MAX_PATHLEN - 1);
folders[folder_count][MAX_PATHLEN - 1] = 0;
folder_count++;
}
}
}
// -------------------------------
// Ask user if no folders provided
// -------------------------------
if (folder_count == 0 && !resume) {
printf("Enter folder to process (Enter = current folder): ");
fflush(stdout);
char buf[MAX_PATHLEN];
if (!fgets(buf, sizeof(buf), stdin))
return 1;
buf[strcspn(buf, "\r\n")] = 0;
if (buf[0] == 0)
strcpy(folders[0], ".");
else
strncpy(folders[0], buf, MAX_PATHLEN - 1);
folder_count = 1;
} else if (folder_count == 0 && resume) {
strcpy(folders[0], ".");
folder_count = 1;
}
// -------------------------------
// Display selected folders
// -------------------------------
printf("Processing %d folder(s):\n", folder_count);
for (int i = 0; i < folder_count; ++i) {
printf(" - %s\n", folders[i]);
}
// -------------------------------
// Detect hardware threads (CPU cores)
// -------------------------------
size_t hw_threads = 1;
// --- Windows: detect PHYSICAL cores (not logical threads) ---
DWORD len = 0;
GetLogicalProcessorInformation(NULL, &len);
SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION *)malloc(len);
if (GetLogicalProcessorInformation(buf, &len)) {
DWORD count = 0;
DWORD n = len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
for (DWORD i = 0; i < n; i++) {
if (buf[i].Relationship == RelationProcessorCore)
count++;
}
if (count > 0)
hw_threads = count;
}
free(buf);
// Add some extra threads to overlap I/O more aggressively
size_t num_threads = hw_threads * 2;
if (num_threads < 2)
num_threads = 2;
// -------------------------------
// Step 1: Scan all folders
// -------------------------------
InitializeCriticalSection(&g_entries_cs);
if (!resume) {
DirQueue q;
memset(&q, 0, sizeof(q));
InitializeCriticalSection(&q.cs);
InitializeConditionVariable(&q.cv);
q.active = 0;
HANDLE scan_progress =
CreateThread(NULL, 0, scan_progress_thread, NULL, 0, NULL);
for (int i = 0; i < folder_count; ++i) {
dirqueue_push(&q, folders[i]);
}
size_t scan_threads = hw_threads;
if (scan_threads < 2)
scan_threads = 2;
HANDLE *scan_tids = malloc(sizeof(HANDLE) * scan_threads);
for (size_t i = 0; i < scan_threads; ++i) {
scan_tids[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)scan_worker,
&q, 0, NULL);
}
WaitForMultipleObjects((DWORD)scan_threads, scan_tids, TRUE, INFINITE);
atomic_store(&g_scan_done, 1);
WaitForSingleObject(scan_progress, INFINITE);
CloseHandle(scan_progress);
for (size_t i = 0; i < scan_threads; ++i)
CloseHandle(scan_tids[i]);
free(scan_tids);
double scan_seconds = timer_stop(&scan_timer);
double scan_rate = (double)g_entry_count / scan_seconds;
printf(". Scan rate : %.1f files/sec\n", scan_rate);
printf("Completed scanning in %.2f seconds. Saving to %s\n\n", scan_seconds,
FILE_LIST_TXT);
save_file_list(FILE_LIST_TXT);
} else {
if (!file_exists(FILE_LIST_TXT)) {
fprintf(stderr, "Resume requested but %s not found\n", FILE_LIST_TXT);
return 1;
}
load_file_list(FILE_LIST_TXT);
printf("Loaded %zu files from %s\n", g_entry_count, FILE_LIST_TXT);
}
if (g_entry_count == 0) {
printf("No files to process.\n");
return 0;
}
DeleteCriticalSection(&g_entries_cs);
// If resume: create map of which files are already hashed
char **existing_hash = calloc(g_entry_count, sizeof(char *));
for (size_t i = 0; i < g_entry_count; ++i)
existing_hash[i] = NULL;
if (resume && file_exists(FILE_HASHES_TXT)) {
// For simplicity we parse hash file and match lines to list entries.
for (size_t i = 0; i < g_entry_count; ++i) {
char hex[HASH_STRLEN] = {0};
if (find_hash_in_file(FILE_HASHES_TXT, g_entries[i].path, hex)) {
existing_hash[i] = strdup(hex);
}
}
}
// Prepare job queue of only missing files (or all if not resume)
JobQueue queue;
jobqueue_init(&queue);
size_t total_jobs = 0;
for (size_t i = 0; i < g_entry_count; ++i) {
if (resume && existing_hash[i])
continue;
Job *j = (Job *)malloc(sizeof(Job));
j->file = &g_entries[i];
j->next = NULL;
jobqueue_push(&queue, j);
++total_jobs;
}
if (total_jobs == 0) {
printf("Nothing to do — all files already hashed.\n");
return 0;
}
// Remove old hashes file if we're recomputing from scratch.
if (!resume) {
// create/overwrite hashes file
FILE *hf = fopen(FILE_HASHES_TXT, "w");
if (hf)
fclose(hf);
} // if resume, we append only missing
// Starting thread pool
atomic_size_t done_counter;
atomic_store(&done_counter, 0);
atomic_int live_workers;
atomic_store(&live_workers, (int)num_threads);
WorkerArg warg = {.queue = &queue,
.done_counter = &done_counter,
.total_jobs = total_jobs,
.live_workers = &live_workers};
printf("Starting thread pool: %zu threads (CPU cores: %zu)\n", num_threads,
hw_threads);
// Launch threads
HANDLE *tids = malloc(sizeof(HANDLE) * num_threads);
for (size_t i = 0; i < num_threads; ++i) {
tids[i] = CreateThread(NULL, 0, worker_thread_windows, &warg, 0, NULL);
}
// Progress / timer
struct timespec tstart, tnow;
// fallback for windows
LARGE_INTEGER freq, start_li;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&start_li);
size_t last_done = 0;
// --------------- Hashing speed MB/s ----------------
uint64_t last_bytes = atomic_load(&g_bytes_processed);
double last_time = 0.0;
double displayed_speed = 0.0;
const double sample_interval = 0.5;
char linebuf[256];
for (;;) {
size_t done = (size_t)atomic_load(&done_counter);
// ---- monotonic time ----
LARGE_INTEGER now_li;
QueryPerformanceCounter(&now_li);
double now =
(double)(now_li.QuadPart - start_li.QuadPart) / (double)freq.QuadPart;
// ---- total processed bytes ----
uint64_t bytes = atomic_load(&g_bytes_processed);
// ---- real sampler (independent of UI sleep) ----
if (last_time == 0.0) {
last_time = now;
last_bytes = bytes;
}
double dt = now - last_time;
if (dt >= sample_interval) {
uint64_t db = bytes - last_bytes;
if (db > 0 && dt > 0.0001) {
displayed_speed = (double)db / (1024.0 * 1024.0) / dt;
}
last_bytes = bytes;
last_time = now;
}
// ---- progress bar build ----
const int barw = 40;
double pct = total_jobs ? (double)done / (double)total_jobs : 0.0;
int filled = (int)(pct * barw + 0.5);
int p = 0;
p += snprintf(linebuf + p, sizeof(linebuf) - p, "[");
for (int i = 0; i < filled && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, "#");
for (int i = filled; i < barw && p < (int)sizeof(linebuf); ++i)
p += snprintf(linebuf + p, sizeof(linebuf) - p, ".");
snprintf(linebuf + p, sizeof(linebuf) - p,
"] %6.2f%% (%zu / %zu) %8.2f MB/s", pct * 100.0, done, total_jobs,
displayed_speed);
printf("\r%s", linebuf);
fflush(stdout);
if (done >= total_jobs)
break;
Sleep(100);
}
printf("\n\n");
// stop queue and join threads
jobqueue_stop(&queue);
WaitForMultipleObjects((DWORD)num_threads, tids, TRUE, INFINITE);
for (size_t i = 0; i < num_threads; ++i)
CloseHandle(tids[i]);
// done time
LARGE_INTEGER end_li;
QueryPerformanceCounter(&end_li);
double elapsed =
(double)(end_li.QuadPart - start_li.QuadPart) / (double)freq.QuadPart;
double total_seconds = timer_stop(&total_timer);
printf("Completed hashing %zu files in %.2f seconds\n", total_jobs, elapsed);
uint64_t total_bytes = (uint64_t)atomic_load(&g_bytes_processed);
double total_mb = (double)total_bytes / (1024.0 * 1024.0);
double avg_mbps = total_mb / elapsed;
printf("Total: %.2f MB, Average: %.2f MB/s\n", total_mb, avg_mbps);
printf(" Total time : %.2f seconds\n", total_seconds);
// If resume: we appended missing entries. If not resume: we wrote all results
// during workers. Note: This program appends hashes as workers finish. This
// avoids holding all hashes in RAM.
// Cleanup
for (size_t i = 0; i < g_entry_count; ++i)
if (existing_hash[i])
free(existing_hash[i]);
free(existing_hash);
free_entries();
return 0;
}

42
xxhash.c Normal file
View File

@@ -0,0 +1,42 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2023 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

7492
xxhash.h Normal file

File diff suppressed because it is too large Load Diff