forked from amir/filehasher
159 lines
3.4 KiB
C
159 lines
3.4 KiB
C
#pragma once
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
#define PLATFORM_WINDOWS 1
|
|
#include <stdatomic.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
|
|
#include <aclapi.h>
|
|
#include <fcntl.h>
|
|
#include <io.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <windows.h>
|
|
#else
|
|
#include <stdatomic.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
|
|
#include <dirent.h>
|
|
#include <fcntl.h>
|
|
#include <pthread.h>
|
|
#include <pwd.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#define XXH_VECTOR XXH_AVX2 // not recommanded to compile with gcc see xxhash.h line 4082
|
|
// Must compile with /arch:AVX2 in clang-cl or -mavx2 in clang/gcc
|
|
#define XXH_INLINE_ALL
|
|
#include "xxhash.c"
|
|
#include "xxhash.h"
|
|
|
|
// ----------------------------- Config -------------------------------------
|
|
#define FILE_LIST_TXT "file_list.txt"
|
|
#define FILE_HASHES_TXT "file_hashes.txt"
|
|
#define HASH_STRLEN 33 // 128-bit hex (32 chars) + null
|
|
#define MAX_PATHLEN 4096
|
|
#define READ_BLOCK (64 * 1024) // 64KB blocks
|
|
|
|
// ----------------------------- Data types ---------------------------------
|
|
typedef struct FileEntry {
|
|
char *path;
|
|
|
|
uint64_t size_bytes;
|
|
uint64_t created_time; // epoch
|
|
uint64_t modified_time; // epoch seconds
|
|
char owner[128]; // resolved owner name
|
|
} FileEntry;
|
|
|
|
/* File path and metadata */
|
|
static void normalize_path(char *p) {
|
|
char *src = p;
|
|
char *dst = p;
|
|
int prev_slash = 0;
|
|
|
|
while (*src) {
|
|
char c = *src++;
|
|
|
|
if (c == '\\' || c == '/') {
|
|
if (!prev_slash) {
|
|
*dst++ = '/';
|
|
prev_slash = 1;
|
|
}
|
|
} else {
|
|
*dst++ = c;
|
|
prev_slash = 0;
|
|
}
|
|
}
|
|
|
|
*dst = '\0';
|
|
}
|
|
|
|
void platform_get_file_times(const char *path, uint64_t *out_created,
|
|
uint64_t *out_modified);
|
|
void platform_get_file_owner(const char *path, char *out_owner,
|
|
size_t out_owner_size);
|
|
|
|
/* scan folder timer*/
|
|
typedef struct {
|
|
LARGE_INTEGER start;
|
|
LARGE_INTEGER end;
|
|
} HiResTimer;
|
|
|
|
static LARGE_INTEGER g_qpc_freq;
|
|
|
|
static void timer_init(void) { QueryPerformanceFrequency(&g_qpc_freq); }
|
|
|
|
static void timer_start(HiResTimer *t) { QueryPerformanceCounter(&t->start); }
|
|
|
|
static double timer_stop(HiResTimer *t) {
|
|
QueryPerformanceCounter(&t->end);
|
|
return (double)(t->end.QuadPart - t->start.QuadPart) /
|
|
(double)g_qpc_freq.QuadPart;
|
|
}
|
|
|
|
/* Scan folders */
|
|
typedef struct EntryBuffer {
|
|
FileEntry *entries;
|
|
size_t count;
|
|
size_t capacity;
|
|
} EntryBuffer;
|
|
|
|
typedef struct DirQueue DirQueue;
|
|
|
|
void scan_folder_windows_parallel(const char *base, DirQueue *q,
|
|
EntryBuffer *buf);
|
|
void scan_folder_posix_parallel(const char *base, DirQueue *q);
|
|
|
|
typedef struct DirJob {
|
|
char *path;
|
|
struct DirJob *next;
|
|
} DirJob;
|
|
|
|
typedef struct DirQueue {
|
|
char **items;
|
|
size_t count;
|
|
size_t cap;
|
|
size_t active;
|
|
|
|
int stop;
|
|
|
|
#if PLATFORM_WINDOWS
|
|
CRITICAL_SECTION cs;
|
|
CONDITION_VARIABLE cv;
|
|
#else
|
|
pthread_mutex_t mutex;
|
|
pthread_cond_t cond;
|
|
#endif
|
|
} DirQueue;
|
|
|
|
/* Hashing */
|
|
typedef struct Job {
|
|
FileEntry *file;
|
|
struct Job *next;
|
|
} Job;
|
|
|
|
typedef struct {
|
|
Job *head;
|
|
Job *tail;
|
|
CRITICAL_SECTION cs;
|
|
CONDITION_VARIABLE cv;
|
|
atomic_size_t count; // queued jobs
|
|
int stop;
|
|
} JobQueue;
|
|
|
|
typedef struct {
|
|
JobQueue *queue;
|
|
atomic_size_t *done_counter;
|
|
size_t total_jobs;
|
|
atomic_int *live_workers;
|
|
} WorkerArg;
|