Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0029179fc0 | |||
| f37e915489 |
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(filehasher
|
||||
project(dfin
|
||||
VERSION 1.0.0
|
||||
DESCRIPTION "High-performance file hasher with I/O Ring/io_uring support"
|
||||
DESCRIPTION "High-performance duplicate finder with I/O Ring/io_uring support"
|
||||
LANGUAGES C
|
||||
)
|
||||
|
||||
@@ -106,7 +106,7 @@ endif()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
set(SOURCES
|
||||
file_hasher.c
|
||||
duplicate_finder.c
|
||||
xxhash.c
|
||||
xxh_x86dispatch.c
|
||||
)
|
||||
@@ -116,7 +116,7 @@ set(HEADERS
|
||||
arena.h
|
||||
base.h
|
||||
xxhash.h
|
||||
lf_mpmc.h
|
||||
mt_mpmc.h
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -281,4 +281,4 @@ message(STATUS " Compiler: ${CMAKE_C_COMPILER}")
|
||||
message(STATUS " Build Type: ${CMAKE_BUILD_TYPE}")
|
||||
message(STATUS " Generator: ${CMAKE_GENERATOR}")
|
||||
message(STATUS " Platform: ${PLATFORM_NAME}")
|
||||
message(STATUS "----------------------------------------")
|
||||
message(STATUS "----------------------------------------")
|
||||
|
||||
117
arena.c
117
arena.c
@@ -196,7 +196,7 @@ mem_arena *arena_create(arena_params *params) { // mk create
|
||||
arena->free_list = arena_create(&(arena_params){
|
||||
.reserve_size = MiB(1),
|
||||
.commit_size = MiB(1),
|
||||
.align = ARENA_ALIGN,
|
||||
.align = ARENA_CACHE_ALIGN,
|
||||
.push_size = sizeof(arena_free_node),
|
||||
.allow_free_list = false,
|
||||
.free_list = NULL,
|
||||
@@ -620,6 +620,119 @@ void *arena_swapback_pop(mem_arena **arena_ptr, u64 index) { // mk swapback
|
||||
/* ============================================================
|
||||
Utilities
|
||||
============================================================ */
|
||||
typedef enum arena_trim_flags {
|
||||
ARENA_TRIM_NONE = 0,
|
||||
|
||||
ARENA_TRIM_SPACE = 1 << 0,
|
||||
ARENA_TRIM_TAB = 1 << 1,
|
||||
ARENA_TRIM_LF = 1 << 2,
|
||||
ARENA_TRIM_CR = 1 << 3,
|
||||
ARENA_TRIM_NUL = 1 << 4,
|
||||
|
||||
} arena_trim_flags;
|
||||
|
||||
u64 arena_trim_string(mem_arena **arena_ptr, char *str, u8 termination_flags) {
|
||||
ASSERT(arena_ptr);
|
||||
ASSERT(*arena_ptr);
|
||||
ASSERT(str);
|
||||
|
||||
if (!arena_ptr || !*arena_ptr || !str) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
mem_arena *arena = *arena_ptr;
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Find owning block
|
||||
------------------------------------------------------------ */
|
||||
|
||||
mem_arena *owner = arena_block_from_ptr(arena, (u8 *)str);
|
||||
|
||||
ASSERT(owner);
|
||||
if (!owner) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Must be current block
|
||||
------------------------------------------------------------ */
|
||||
|
||||
if (owner != arena) {
|
||||
fprintf(stderr, "arena_trim_string(): string is not "
|
||||
"in current arena block.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Compute string position
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 str_pos = arena_pos_from_ptr(arena, str);
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Original reserved size
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 allocated_size = arena->pos - str_pos;
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Compute sizes
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 str_size = strlen(str);
|
||||
|
||||
char *dst = str + str_size;
|
||||
u64 termination_size = 0;
|
||||
|
||||
if (termination_flags & ARENA_TRIM_SPACE) {
|
||||
*dst++ = ' ';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_TAB) {
|
||||
*dst++ = '\t';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_CR) {
|
||||
*dst++ = '\r';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_LF) {
|
||||
*dst++ = '\n';
|
||||
termination_size++;
|
||||
}
|
||||
|
||||
if (termination_flags & ARENA_TRIM_NUL) {
|
||||
*dst++ = '\0';
|
||||
termination_size++;
|
||||
}
|
||||
/* ------------------------------------------------------------
|
||||
Final used size
|
||||
------------------------------------------------------------ */
|
||||
|
||||
u64 used_size = str_size + termination_size;
|
||||
|
||||
used_size = ALIGN_UP_POW2(used_size, arena->align);
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Overflow detection
|
||||
------------------------------------------------------------ */
|
||||
|
||||
if (used_size > allocated_size) {
|
||||
fprintf(stderr, "arena_trim_string(): string overflow "
|
||||
"detected.\n");
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
Update arena position
|
||||
------------------------------------------------------------ */
|
||||
|
||||
arena->pos = str_pos + used_size;
|
||||
|
||||
return used_size;
|
||||
}
|
||||
|
||||
void *arena_clear(mem_arena **arena_ptr) { // mk clear
|
||||
|
||||
@@ -801,7 +914,7 @@ mem_arena_temp arena_scratch_get(mem_arena **conflicts, u32 num_conflicts) {
|
||||
arena_params params = {
|
||||
.reserve_size = MiB(64),
|
||||
.commit_size = MiB(1),
|
||||
.align = ARENA_ALIGN,
|
||||
.align = ARENA_CACHE_ALIGN,
|
||||
.push_size = 8,
|
||||
.allow_free_list = false,
|
||||
.allow_swapback = true,
|
||||
|
||||
2
arena.h
2
arena.h
@@ -239,7 +239,7 @@ void *arena_ptr_from_index(mem_arena *arena, u64 index);
|
||||
*/
|
||||
|
||||
#define ARENA_HEADER_SIZE (sizeof(mem_arena))
|
||||
#define ARENA_ALIGN (sizeof(void *))
|
||||
#define ARENA_CACHE_ALIGN (sizeof(void *))
|
||||
|
||||
// arena config
|
||||
typedef enum arena_growth_policy {
|
||||
|
||||
4
base.h
4
base.h
@@ -35,6 +35,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
@@ -147,9 +148,6 @@ static void sleep_ms(int ms) { Sleep(ms); }
|
||||
#define _DEFAULT_SOURCE
|
||||
#endif
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static u32 plat_get_pagesize(void) { return (u32)sysconf(_SC_PAGESIZE); }
|
||||
|
||||
static void *plat_mem_reserve(u64 size) {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
@echo off
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
set PROJECT_NAME=dfin
|
||||
|
||||
:: ============================================================================
|
||||
:: build.bat
|
||||
:: ============================================================================
|
||||
@@ -45,7 +47,7 @@ exit /b 1
|
||||
:main
|
||||
set BUILD_DIR=%SCRIPT_DIR%\build\windows\%BUILD_TYPE%
|
||||
|
||||
echo === Building filehasher (%BUILD_TYPE%) ===
|
||||
echo === Building %PROJECT_NAME% (%BUILD_TYPE%) ===
|
||||
|
||||
:: --------------------------------------------------------------------------
|
||||
:: Clean if requested
|
||||
@@ -167,4 +169,4 @@ popd
|
||||
|
||||
echo.
|
||||
echo === Build Complete ===
|
||||
echo Executable: %BUILD_DIR%\filehasher.exe
|
||||
echo Executable: %BUILD_DIR%\%PROJECT_NAME%.exe
|
||||
|
||||
20
build.sh
20
build.sh
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================================
|
||||
# build.sh - Build script for filehasher (Linux)
|
||||
# build.sh - Build script (Linux)
|
||||
# Usage: ./build.sh [Release|Debug] [clean]
|
||||
#
|
||||
# Compiler preference: gcc > clang
|
||||
@@ -9,6 +9,8 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_NAME="dfin"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Colors
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -55,7 +57,7 @@ done
|
||||
readonly BUILD_DIR="build/linux/${BUILD_TYPE}"
|
||||
readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
echo -e "${GREEN}=== Building filehasher (${BUILD_TYPE}) ===${NC}"
|
||||
echo -e "${GREEN}=== Building ${PROJECT_NAME} (${BUILD_TYPE}) ===${NC}"
|
||||
echo "Project: ${SCRIPT_DIR}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -226,18 +228,18 @@ echo
|
||||
# ---------------------------------------------------------------------------
|
||||
cd "${SCRIPT_DIR}"
|
||||
|
||||
if [[ -f "${BUILD_DIR}/filehasher" ]]; then
|
||||
echo -e "${GREEN}Executable: ${BUILD_DIR}/filehasher${NC}"
|
||||
if [[ -f "${BUILD_DIR}/${PROJECT_NAME}" ]]; then
|
||||
echo -e "${GREEN}Executable: ${BUILD_DIR}/${PROJECT_NAME}${NC}"
|
||||
|
||||
if command -v file &> /dev/null; then
|
||||
echo -e " Type: $(file -b ${BUILD_DIR}/filehasher)"
|
||||
echo -e " Type: $(file -b ${BUILD_DIR}/${PROJECT_NAME})"
|
||||
fi
|
||||
|
||||
if command -v du &> /dev/null; then
|
||||
echo -e " Size: $(du -h ${BUILD_DIR}/filehasher | cut -f1)"
|
||||
echo -e " Size: $(du -h ${BUILD_DIR}/${PROJECT_NAME} | cut -f1)"
|
||||
fi
|
||||
elif [[ -f "${BUILD_DIR}/filehasher.exe" ]]; then
|
||||
echo -e "${GREEN}Executable: ${BUILD_DIR}/filehasher.exe${NC}"
|
||||
elif [[ -f "${BUILD_DIR}/${PROJECT_NAME}.exe" ]]; then
|
||||
echo -e "${GREEN}Executable: ${BUILD_DIR}/${PROJECT_NAME}.exe${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}Note: Could not locate executable${NC}"
|
||||
echo "Checking build directory:"
|
||||
@@ -269,4 +271,4 @@ if [[ "${EXPORT_COMPILE_COMMANDS}" == "ON" ]]; then
|
||||
fi
|
||||
|
||||
echo
|
||||
echo -e "${GREEN}Ready to run: ./${BUILD_DIR}/filehasher${NC}"
|
||||
echo -e "${GREEN}Ready to run: ./${BUILD_DIR}/${PROJECT_NAME}${NC}"
|
||||
|
||||
@@ -32,7 +32,11 @@ int main(int argc, char **argv) {
|
||||
buf[strcspn(buf, "\r\n")] = 0;
|
||||
|
||||
if (buf[0] == 0) {
|
||||
strcpy(folders[0], ".");
|
||||
if (!platform_get_current_directory(folders[0], sizeof(folders[0]))) {
|
||||
fprintf(stderr, "Failed to get current directory\n");
|
||||
return 1;
|
||||
}
|
||||
normalize_path(folders[0]);
|
||||
folder_count = 1;
|
||||
} else {
|
||||
folder_count = parse_paths(buf, folders, 64);
|
||||
@@ -71,7 +75,19 @@ int main(int argc, char **argv) {
|
||||
.max_nbre_blocks = 1,
|
||||
};
|
||||
|
||||
mem_arena *gp_arena = arena_create(¶ms);
|
||||
arena_params params_caligned = {
|
||||
.reserve_size = GiB(1),
|
||||
.commit_size = MiB(16),
|
||||
.align = ARENA_CACHE_ALIGN,
|
||||
.push_size = 0,
|
||||
.allow_free_list = true,
|
||||
.allow_swapback = false,
|
||||
.growth_policy = ARENA_GROWTH_NORMAL,
|
||||
.commit_policy = ARENA_COMMIT_LAZY,
|
||||
.max_nbre_blocks = 1,
|
||||
};
|
||||
|
||||
mem_arena *gp_arena = arena_create(¶ms_caligned);
|
||||
|
||||
// -------------------------------
|
||||
// Detect hardware
|
||||
@@ -119,7 +135,7 @@ int main(int argc, char **argv) {
|
||||
mpmc_init(&file_queue, MiB(1));
|
||||
|
||||
// Starting hash threads
|
||||
WorkerContext workers[num_hash_threads];
|
||||
HasherContext workers[num_hash_threads];
|
||||
Thread *hash_threads =
|
||||
arena_push(&gp_arena, sizeof(Thread) * num_hash_threads, true);
|
||||
|
||||
@@ -155,8 +171,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
for (uint8_t i = 0; i < num_scan_threads; i++) {
|
||||
scanners[i].num_threads = num_scan_threads;
|
||||
scanners[i].path_arena = arena_create(¶ms);
|
||||
scanners[i].meta_arena = arena_create(¶ms);
|
||||
scanners[i].meta_arena = arena_create(¶ms_caligned);
|
||||
scanners[i].dir_queue = &dir_queue;
|
||||
scanners[i].file_queue = &file_queue;
|
||||
|
||||
@@ -170,7 +185,7 @@ int main(int argc, char **argv) {
|
||||
// Initial folder push
|
||||
for (int i = 0; i < folder_count; i++) {
|
||||
size_t len = strlen(folders[i]) + 1;
|
||||
char *path = arena_push(&scanners[0].path_arena, len, false);
|
||||
char *path = arena_push(&scanners[0].meta_arena, len, false);
|
||||
memcpy(path, folders[i], len);
|
||||
mpmc_push_work(&dir_queue, path);
|
||||
}
|
||||
14
mt_mpmc.h
14
mt_mpmc.h
@@ -1,4 +1,4 @@
|
||||
#pragma once
|
||||
#pragma once
|
||||
|
||||
#include "base.h"
|
||||
|
||||
@@ -214,19 +214,27 @@ static void mpmc_producers_finished(MPMCQueue *q, u8 consumer_count) {
|
||||
/* Done */
|
||||
/* ----------------------------------------------------------- */
|
||||
static void mpmc_task_done(MPMCQueue *q, u8 consumer_count) {
|
||||
|
||||
bool finished = false;
|
||||
|
||||
mtx_lock(&q->lock);
|
||||
|
||||
if (--q->work_count == 0) {
|
||||
mpmc_producers_finished(q, consumer_count);
|
||||
finished = true;
|
||||
}
|
||||
|
||||
mtx_unlock(&q->lock);
|
||||
|
||||
if (finished) {
|
||||
mpmc_producers_finished(q, consumer_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------- */
|
||||
/* MPMC Cleanup */
|
||||
/* ----------------------------------------------------------- */
|
||||
// static void mpmc_finish(MPMCQueue *q) { // Comment to prevent warning: unused function
|
||||
// static void mpmc_finish(MPMCQueue *q) { // Comment to prevent warning: unused
|
||||
// function
|
||||
// if (!q) return;
|
||||
//
|
||||
// if (q->slots) {
|
||||
|
||||
743
platform.c
743
platform.c
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user