From 24348a0d1868698e46e1e39eb0fafde9bb47ea69 Mon Sep 17 00:00:00 2001 From: Souvik Banerjee Date: Sat, 14 Mar 2026 19:50:56 -0700 Subject: [PATCH] use native page size instead of hardcoded 4096 block size Replace all hardcoded DAXFS_BLOCK_SIZE (4096) references with the native page size queried at mkfs time via sysconf(_SC_PAGESIZE) and validated at mount time against PAGE_SIZE. This enables daxfs to work correctly on architectures with non-4K pages (e.g. 16K on Apple Silicon). Key changes: - mkdaxfs: detect native page size at runtime, store in superblock - mount: validate superblock block_size matches kernel PAGE_SIZE - overlay/pcache/validate: use info->block_size instead of DAXFS_BLOCK_SIZE - daxfs_format.h: bump version to 8, flexible-array for ovl_data_entry - tests: use runtime page size instead of hardcoded 4096 --- daxfs/daxfs.h | 5 ++ daxfs/file.c | 6 +-- daxfs/overlay.c | 17 +++++-- daxfs/pcache.c | 23 +++++---- daxfs/super.c | 19 ++++++-- daxfs/validate.c | 10 ++-- include/daxfs_format.h | 18 +++---- tests/test_mmap.c | 3 +- tests/test_overlay.sh | 5 +- tools/mkdaxfs.c | 105 ++++++++++++++++++++++++----------------- 10 files changed, 128 insertions(+), 83 deletions(-) diff --git a/daxfs/daxfs.h b/daxfs/daxfs.h index 6f8c885..e7bd205 100644 --- a/daxfs/daxfs.h +++ b/daxfs/daxfs.h @@ -39,6 +39,8 @@ struct daxfs_pcache { void *data; /* On-DAX slot data area */ u32 slot_count; u32 hash_mask; /* slot_count - 1 */ + u32 block_size; /* Cached from daxfs_info */ + u32 block_shift; /* ilog2(block_size) */ struct list_head backing_files; /* List of daxfs_pcache_backing */ struct task_struct *fill_thread; /* Host kthread, NULL for spawn */ struct file **backing_array; /* O(1) lookup by ino, [0..max_ino] */ @@ -83,6 +85,9 @@ struct daxfs_info { /* On-DAX Superblock */ struct daxfs_super *super; + /* Cached block_size from superblock (== PAGE_SIZE, validated at mount) */ + u32 block_size; + /* Base image access */ struct daxfs_base_inode *base_inodes; u64 base_data_offset; /* Absolute offset to data region */ diff --git a/daxfs/file.c b/daxfs/file.c index 608f493..a0df7d7 100644 --- a/daxfs/file.c +++ b/daxfs/file.c @@ -47,7 +47,7 @@ static void daxfs_refresh_isize(struct inode *inode, struct daxfs_info *info) * * Resolution order: overlay page → pcache → inline base image data * - * COW granularity is per-page (4KB). After a partial-page write, only + * COW granularity is per-page (block_size). After a partial-page write, only * the written page has an overlay entry; adjacent pages still resolve * through pcache or base image. This is intentional — each page is * independently versioned. @@ -174,8 +174,8 @@ static ssize_t daxfs_read_iter(struct kiocb *iocb, struct iov_iter *to) * Fast path: check per-inode overlay cache directly. * Avoids the full daxfs_base_file_data() call overhead * and prefetches the next page for sequential reads. - * Overlay pages are 4104 bytes apart in the pool (8-byte - * header + 4096 data), a stride the HW prefetcher + * Overlay pages are (8 + PAGE_SIZE) bytes apart in the + * pool (8-byte header + data), a stride the HW prefetcher * doesn't recognise, so SW prefetch matters here. */ if (info->overlay) { diff --git a/daxfs/overlay.c b/daxfs/overlay.c index 2b2b2a7..a7e7093 100644 --- a/daxfs/overlay.c +++ b/daxfs/overlay.c @@ -500,9 +500,17 @@ int daxfs_overlay_set_inode(struct daxfs_info *info, u64 ino, return 0; } +/* + * Size of an overlay data entry: 8-byte header + one page of file data. + */ +static size_t ovl_data_entry_size(struct daxfs_info *info) +{ + return sizeof(struct daxfs_ovl_data_entry) + info->block_size; +} + /* * Get a data page from overlay. - * Returns pointer to 4KB data, or NULL if not found. + * Returns pointer to block_size data, or NULL if not found. */ void *daxfs_overlay_get_page(struct daxfs_info *info, u64 ino, u64 pgoff) { @@ -529,7 +537,7 @@ void *daxfs_overlay_get_page(struct daxfs_info *info, u64 ino, u64 pgoff) * to the hash table. The caller must initialise the data (COW) and * then call daxfs_overlay_publish_page() to make it visible. * - * Returns pointer to 4KB data area, or NULL on failure. + * Returns pointer to block_size data area, or NULL on failure. * On success, *pool_off_out receives the pool offset (needed for publish). */ void *daxfs_overlay_alloc_page(struct daxfs_info *info, u64 ino, u64 pgoff, @@ -545,7 +553,8 @@ void *daxfs_overlay_alloc_page(struct daxfs_info *info, u64 ino, u64 pgoff, if (pgoff > DAXFS_OVL_MAX_PGOFF) return NULL; - pool_off = overlay_pool_alloc(ovl, sizeof(*de), DAXFS_OVL_DATA); + pool_off = overlay_pool_alloc(ovl, ovl_data_entry_size(info), + DAXFS_OVL_DATA); if (pool_off == (u64)-1) return NULL; @@ -606,7 +615,7 @@ int daxfs_overlay_alloc_pages_batch(struct daxfs_info *info, u64 ino, void **pages, u64 *pool_offs) { struct daxfs_overlay *ovl = info->overlay; - size_t entry_size = sizeof(struct daxfs_ovl_data_entry); + size_t entry_size = ovl_data_entry_size(info); size_t aligned = ALIGN(entry_size, 8); u64 base_pool_off; u32 i, ok = 0; diff --git a/daxfs/pcache.c b/daxfs/pcache.c index 391594c..cd23c52 100644 --- a/daxfs/pcache.c +++ b/daxfs/pcache.c @@ -73,8 +73,9 @@ static int pcache_fill_slot(struct daxfs_pcache *pc, u32 slot_idx, u64 tag) { u64 ino = PCACHE_TAG_INO(tag); u64 pgoff = PCACHE_TAG_PGOFF(tag); - loff_t pos = (loff_t)pgoff << PAGE_SHIFT; - void *dst = pc->data + (u64)slot_idx * PAGE_SIZE; + u32 bsize = pc->block_size; + loff_t pos = (loff_t)pgoff << pc->block_shift; + void *dst = pc->data + (u64)slot_idx * bsize; struct file *backing; ssize_t n; u64 old_val, new_val; @@ -90,7 +91,7 @@ static int pcache_fill_slot(struct daxfs_pcache *pc, u32 slot_idx, u64 tag) return -ENOENT; } - n = kernel_read(backing, dst, PAGE_SIZE, &pos); + n = kernel_read(backing, dst, bsize, &pos); if (n < 0) { pr_err_ratelimited("daxfs: pcache read error ino=%llu pgoff=%llu: %zd\n", ino, pgoff, n); @@ -103,8 +104,8 @@ static int pcache_fill_slot(struct daxfs_pcache *pc, u32 slot_idx, u64 tag) return (int)n; } - if (n < PAGE_SIZE) - memset(dst + n, 0, PAGE_SIZE - n); + if (n < bsize) + memset(dst + n, 0, bsize - n); smp_wmb(); @@ -238,7 +239,7 @@ static void *pcache_wait_valid(struct daxfs_pcache *pc, u32 target_idx, pcache_touch(&pc->slots[target_idx]); if (pinned_slot) *pinned_slot = (s32)target_idx; - return pc->data + (u64)target_idx * PAGE_SIZE; + return pc->data + (u64)target_idx * pc->block_size; } if (PCACHE_STATE(val) == PCACHE_STATE_FREE) return ERR_PTR(-EAGAIN); @@ -292,7 +293,7 @@ void *daxfs_pcache_get_page(struct daxfs_info *info, u64 ino, u64 pgoff, pcache_touch(&pc->slots[slot_idx]); if (pinned_slot) *pinned_slot = (s32)slot_idx; - return pc->data + (u64)slot_idx * PAGE_SIZE; + return pc->data + (u64)slot_idx * pc->block_size; } } @@ -334,7 +335,7 @@ static noinline void *pcache_slow_path(struct daxfs_pcache *pc, pcache_touch(&pc->slots[idx]); if (pinned_slot) *pinned_slot = (s32)idx; - return pc->data + (u64)idx * PAGE_SIZE; + return pc->data + (u64)idx * pc->block_size; } break; @@ -383,7 +384,7 @@ static noinline void *pcache_slow_path(struct daxfs_pcache *pc, pcache_touch(&pc->slots[free_idx]); if (pinned_slot) *pinned_slot = (s32)free_idx; - return pc->data + (u64)free_idx * PAGE_SIZE; + return pc->data + (u64)free_idx * pc->block_size; } goto restart; } @@ -473,7 +474,7 @@ bool daxfs_is_pcache_data(struct daxfs_info *info, void *ptr) if (!pc || !ptr) return false; return ptr >= pc->data && - ptr < pc->data + (u64)pc->slot_count * PAGE_SIZE; + ptr < pc->data + (u64)pc->slot_count * pc->block_size; } /* @@ -692,6 +693,8 @@ int daxfs_pcache_init(struct daxfs_info *info, const char *backing_path) return -ENOMEM; INIT_LIST_HEAD(&pc->backing_files); + pc->block_size = info->block_size; + pc->block_shift = ilog2(info->block_size); hdr = daxfs_mem_ptr(info, pcache_offset); if (le32_to_cpu(hdr->magic) != DAXFS_PCACHE_MAGIC) { diff --git a/daxfs/super.c b/daxfs/super.c index d98d702..489ec4e 100644 --- a/daxfs/super.c +++ b/daxfs/super.c @@ -154,8 +154,17 @@ static int daxfs_fill_super(struct super_block *sb, struct fs_context *fc) /* Validate version */ if (le32_to_cpu(info->super->version) != DAXFS_VERSION) { - pr_err("daxfs: unsupported version %u\n", - le32_to_cpu(info->super->version)); + pr_err("daxfs: unsupported version %u (expected %u)\n", + le32_to_cpu(info->super->version), DAXFS_VERSION); + ret = -EINVAL; + goto err_unmap; + } + + /* Validate block_size matches native page size */ + info->block_size = le32_to_cpu(info->super->block_size); + if (info->block_size != PAGE_SIZE) { + pr_err("daxfs: block_size %u does not match PAGE_SIZE %lu\n", + info->block_size, PAGE_SIZE); ret = -EINVAL; goto err_unmap; } @@ -320,8 +329,8 @@ static int daxfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct daxfs_info *info = DAXFS_SB(dentry->d_sb); buf->f_type = DAXFS_SUPER_MAGIC; - buf->f_bsize = DAXFS_BLOCK_SIZE; - buf->f_blocks = info->size / DAXFS_BLOCK_SIZE; + buf->f_bsize = info->block_size; + buf->f_blocks = info->size / info->block_size; buf->f_bfree = 0; buf->f_bavail = 0; buf->f_files = info->base_inode_count; @@ -334,7 +343,7 @@ static int daxfs_statfs(struct dentry *dentry, struct kstatfs *buf) u64 pool_size = le64_to_cpu(ovl->header->pool_size); if (pool_size > pool_used) { - buf->f_bfree = (pool_size - pool_used) / DAXFS_BLOCK_SIZE; + buf->f_bfree = (pool_size - pool_used) / info->block_size; buf->f_bavail = buf->f_bfree; } buf->f_ffree = UINT_MAX; diff --git a/daxfs/validate.c b/daxfs/validate.c index 7028620..443c6ab 100644 --- a/daxfs/validate.c +++ b/daxfs/validate.c @@ -211,8 +211,8 @@ int daxfs_validate_super(struct daxfs_info *info) u64 bucket_array_size = ALIGN( (u64)bucket_count * sizeof(struct daxfs_overlay_bucket), - DAXFS_BLOCK_SIZE); - u64 min_size = DAXFS_BLOCK_SIZE + bucket_array_size; + info->block_size); + u64 min_size = info->block_size + bucket_array_size; if (overlay_size < min_size) { pr_err("daxfs: overlay region too small (%llu < %llu)\n", @@ -245,9 +245,9 @@ int daxfs_validate_super(struct daxfs_info *info) { u64 meta_size = ALIGN((u64)slot_count * sizeof(struct daxfs_pcache_slot), - DAXFS_BLOCK_SIZE); - u64 data_size = (u64)slot_count * DAXFS_BLOCK_SIZE; - u64 min_size = DAXFS_BLOCK_SIZE + meta_size + data_size; + info->block_size); + u64 data_size = (u64)slot_count * info->block_size; + u64 min_size = info->block_size + meta_size + data_size; if (pcache_size < min_size) { pr_err("daxfs: pcache region too small (%llu < %llu)\n", diff --git a/include/daxfs_format.h b/include/daxfs_format.h index 433cf5f..29b0f27 100644 --- a/include/daxfs_format.h +++ b/include/daxfs_format.h @@ -16,18 +16,18 @@ #define DAXFS_IOC_GET_DMABUF _IO('D', 1) /* Get dma-buf fd for this mount */ #define DAXFS_SUPER_MAGIC 0x64617835 /* "dax5" */ -#define DAXFS_VERSION 7 -#define DAXFS_BLOCK_SIZE 4096 +#define DAXFS_VERSION 8 +#define DAXFS_MIN_BLOCK_SIZE 4096 /* Minimum (superblock/header padding) */ #define DAXFS_INODE_SIZE 64 #define DAXFS_NAME_MAX 255 #define DAXFS_DIRENT_SIZE (16 + DAXFS_NAME_MAX) /* ino + mode + name_len + reserved + name */ #define DAXFS_ROOT_INO 1 /* - * Superblock - at offset 0, 4KB + * Superblock - 4KB struct at offset 0, occupies one block (block_size bytes) * * On-DAX Layout: - * [ Superblock (4KB) | Base Image (optional) | Overlay (optional) | Page Cache (optional) ] + * [ Superblock (block_size) | Base Image (optional) | Overlay (optional) | Page Cache (optional) ] * * All layout metadata lives here - region headers only carry magic/version * for validation, not duplicated layout fields. @@ -35,7 +35,7 @@ struct daxfs_super { __le32 magic; /* DAXFS_SUPER_MAGIC */ __le32 version; /* DAXFS_VERSION */ - __le32 block_size; /* 4096 */ + __le32 block_size; /* Native page size at mkfs time */ __le32 reserved0; __le64 total_size; @@ -217,7 +217,7 @@ struct daxfs_ovl_inode_entry { struct daxfs_ovl_data_entry { __le32 type; /* DAXFS_OVL_DATA */ __le32 reserved; - __u8 data[4096]; /* One page of file data */ + __u8 data[]; /* One page of file data (block_size bytes) */ }; struct daxfs_ovl_dirent_entry { @@ -261,9 +261,9 @@ struct daxfs_ovl_dirlist_entry { * Slot count/hash_shift are in the main superblock. * * Region layout: - * [pcache_header (4KB)] - * [slot_metadata (slot_count * 16B, padded to 4KB)] - * [slot_data (slot_count * 4KB)] + * [pcache_header (block_size)] + * [slot_metadata (slot_count * 16B, padded to block_size)] + * [slot_data (slot_count * block_size)] */ #define DAXFS_PCACHE_MAGIC 0x70636163 /* "pcac" */ diff --git a/tests/test_mmap.c b/tests/test_mmap.c index 3e724ac..054a538 100644 --- a/tests/test_mmap.c +++ b/tests/test_mmap.c @@ -28,7 +28,7 @@ static int tests_run = 0; static int tests_passed = 0; #define TEST_FILE "mmap_test_file" -#define PAGE_SIZE 4096 +static long PAGE_SIZE; #define TEST_START(name) do { \ printf(" TEST: %s ... ", name); \ @@ -691,6 +691,7 @@ int main(int argc, char *argv[]) return 1; } mountpoint = argv[1]; + PAGE_SIZE = sysconf(_SC_PAGESIZE); printf("DAXFS mmap test suite\n"); printf("Mountpoint: %s\n\n", mountpoint); diff --git a/tests/test_overlay.sh b/tests/test_overlay.sh index ad6d003..c9c5244 100755 --- a/tests/test_overlay.sh +++ b/tests/test_overlay.sh @@ -256,8 +256,9 @@ setup_split() { echo "Nested split file" > "$SPLIT_SOURCE/subdir/nested.txt" ln -s hello.txt "$SPLIT_SOURCE/link_to_hello" - # Create a multi-page file (larger than 4KB) - dd if=/dev/urandom of="$SPLIT_SOURCE/largefile.bin" bs=4096 count=8 2>/dev/null + # Create a multi-page file (at least 4 pages on any page size) + PAGE_SIZE=$(getconf PAGESIZE) + dd if=/dev/urandom of="$SPLIT_SOURCE/largefile.bin" bs="$PAGE_SIZE" count=4 2>/dev/null LARGE_CKSUM=$(md5sum "$SPLIT_SOURCE/largefile.bin" | awk '{print $1}') # Create more files to exercise multiple cache slots diff --git a/tools/mkdaxfs.c b/tools/mkdaxfs.c index 863b36b..c5eb9c9 100644 --- a/tools/mkdaxfs.c +++ b/tools/mkdaxfs.c @@ -38,6 +38,9 @@ #define DAXFS_DEFAULT_OVERLAY_POOL (64ULL * 1024 * 1024) /* 64MB default pool */ #define DAXFS_DEFAULT_BUCKET_COUNT 65536 /* 64K buckets = 1MB */ +/* Block size = native page size (stored in superblock, validated at mount) */ +static uint32_t block_size; + /* From linux/dma-heap.h */ struct dma_heap_allocation_data { uint64_t len; @@ -280,7 +283,7 @@ static void calculate_offsets(bool split, bool export) struct hardlink_entry *hl; uint64_t inode_offset = 0; /* relative to base start */ uint64_t base_data_offset = ALIGN(inode_offset + file_count * DAXFS_INODE_SIZE, - DAXFS_BLOCK_SIZE); + block_size); uint64_t back_offset = 0; for (e = files_head; e; e = e->next) { @@ -294,24 +297,24 @@ static void calculate_offsets(bool split, bool export) e->data_offset = 0; } else if (split) { e->data_offset = back_offset; - back_offset += ALIGN(e->st.st_size, DAXFS_BLOCK_SIZE); + back_offset += ALIGN(e->st.st_size, block_size); hl = find_hardlink(e->st.st_dev, e->st.st_ino); if (hl) hl->data_offset = e->data_offset; } else { e->data_offset = base_data_offset; - base_data_offset += ALIGN(e->st.st_size, DAXFS_BLOCK_SIZE); + base_data_offset += ALIGN(e->st.st_size, block_size); hl = find_hardlink(e->st.st_dev, e->st.st_ino); if (hl) hl->data_offset = e->data_offset; } } else if (S_ISLNK(e->st.st_mode)) { e->data_offset = base_data_offset; - base_data_offset += ALIGN(e->st.st_size + 1, DAXFS_BLOCK_SIZE); + base_data_offset += ALIGN(e->st.st_size + 1, block_size); } else if (S_ISDIR(e->st.st_mode) && e->child_count > 0) { e->data_offset = base_data_offset; base_data_offset += ALIGN(e->child_count * DAXFS_DIRENT_SIZE, - DAXFS_BLOCK_SIZE); + block_size); } } @@ -328,17 +331,17 @@ static size_t calculate_base_size(bool split) struct file_entry *e; uint64_t inode_offset = 0; uint64_t data_offset = ALIGN(inode_offset + file_count * DAXFS_INODE_SIZE, - DAXFS_BLOCK_SIZE); + block_size); size_t total = data_offset; for (e = files_head; e; e = e->next) { if (S_ISREG(e->st.st_mode)) { if (!split && !e->is_hardlink) - total += ALIGN(e->st.st_size, DAXFS_BLOCK_SIZE); + total += ALIGN(e->st.st_size, block_size); } else if (S_ISLNK(e->st.st_mode)) { - total += ALIGN(e->st.st_size + 1, DAXFS_BLOCK_SIZE); + total += ALIGN(e->st.st_size + 1, block_size); } else if (S_ISDIR(e->st.st_mode) && e->child_count > 0) { - total += ALIGN(e->child_count * DAXFS_DIRENT_SIZE, DAXFS_BLOCK_SIZE); + total += ALIGN(e->child_count * DAXFS_DIRENT_SIZE, block_size); } } @@ -367,18 +370,18 @@ static uint32_t calc_ilog2(uint32_t v) static size_t calculate_pcache_region_size(uint32_t slot_count) { uint64_t meta_size = ALIGN((uint64_t)slot_count * sizeof(struct daxfs_pcache_slot), - DAXFS_BLOCK_SIZE); - uint64_t data_size = (uint64_t)slot_count * DAXFS_BLOCK_SIZE; + block_size); + uint64_t data_size = (uint64_t)slot_count * block_size; - return DAXFS_BLOCK_SIZE + meta_size + data_size; + return block_size + meta_size + data_size; } static size_t calculate_overlay_region_size(uint32_t bucket_count, size_t pool_size) { uint64_t bucket_array_size = ALIGN((uint64_t)bucket_count * sizeof(struct daxfs_overlay_bucket), - DAXFS_BLOCK_SIZE); - return DAXFS_BLOCK_SIZE + bucket_array_size + pool_size; + block_size); + return block_size + bucket_array_size + pool_size; } static int write_overlay_region(void *overlay_mem, uint32_t bucket_count, @@ -387,15 +390,15 @@ static int write_overlay_region(void *overlay_mem, uint32_t bucket_count, struct daxfs_overlay_header *hdr = overlay_mem; uint64_t bucket_array_size = ALIGN((uint64_t)bucket_count * sizeof(struct daxfs_overlay_bucket), - DAXFS_BLOCK_SIZE); - size_t total = DAXFS_BLOCK_SIZE + bucket_array_size + pool_size; + block_size); + size_t total = block_size + bucket_array_size + pool_size; memset(overlay_mem, 0, total); hdr->magic = htole32(DAXFS_OVERLAY_MAGIC); hdr->version = htole32(DAXFS_OVERLAY_VERSION); - hdr->bucket_offset = htole64(DAXFS_BLOCK_SIZE); - hdr->pool_offset = htole64(DAXFS_BLOCK_SIZE + bucket_array_size); + hdr->bucket_offset = htole64(block_size); + hdr->pool_offset = htole64(block_size + bucket_array_size); hdr->pool_size = htole64(pool_size); hdr->pool_alloc = htole64(0); hdr->next_ino = htole64(next_ino_val); @@ -557,10 +560,10 @@ static int write_pcache_region(void *pcache_mem, uint32_t slot_count) { struct daxfs_pcache_header *hdr = pcache_mem; uint64_t meta_size = ALIGN((uint64_t)slot_count * sizeof(struct daxfs_pcache_slot), - DAXFS_BLOCK_SIZE); - uint64_t slot_meta_offset = DAXFS_BLOCK_SIZE; - uint64_t slot_data_offset = DAXFS_BLOCK_SIZE + meta_size; - size_t total = slot_data_offset + (uint64_t)slot_count * DAXFS_BLOCK_SIZE; + block_size); + uint64_t slot_meta_offset = block_size; + uint64_t slot_data_offset = block_size + meta_size; + size_t total = slot_data_offset + (uint64_t)slot_count * block_size; memset(pcache_mem, 0, total); @@ -581,7 +584,7 @@ static void fill_super_common(struct daxfs_super *super, uint64_t total_size) { super->magic = htole32(DAXFS_SUPER_MAGIC); super->version = htole32(DAXFS_VERSION); - super->block_size = htole32(DAXFS_BLOCK_SIZE); + super->block_size = htole32(block_size); /* Native page size */ super->total_size = htole64(total_size); } @@ -593,7 +596,7 @@ static void fill_super_base(struct daxfs_super *super, uint64_t base_offset, { uint64_t inode_offset = 0; uint64_t data_offset = ALIGN(inode_offset + file_count * DAXFS_INODE_SIZE, - DAXFS_BLOCK_SIZE); + block_size); super->base_offset = htole64(base_offset); super->base_size = htole64(base_size); @@ -612,12 +615,12 @@ static int write_split_image(void *mem, size_t mem_size, const char *src_dir, size_t overlay_pool_size, uint32_t pcache_slots) { struct daxfs_super *super = mem; - uint64_t base_offset = DAXFS_BLOCK_SIZE; - uint64_t overlay_offset = ALIGN(base_offset + base_size, DAXFS_BLOCK_SIZE); + uint64_t base_offset = block_size; + uint64_t overlay_offset = ALIGN(base_offset + base_size, block_size); size_t overlay_region_size = calculate_overlay_region_size(overlay_buckets, overlay_pool_size); uint64_t pcache_offset = ALIGN(overlay_offset + overlay_region_size, - DAXFS_BLOCK_SIZE); + block_size); size_t pcache_region_size = calculate_pcache_region_size(pcache_slots); uint64_t total = pcache_offset + pcache_region_size; @@ -654,7 +657,7 @@ static int write_split_image(void *mem, size_t mem_size, const char *src_dir, write_pcache_region(mem + pcache_offset, pcache_slots); printf("Image layout (split mode):\n"); - printf(" Superblock: 0x%x - 0x%x\n", 0, DAXFS_BLOCK_SIZE); + printf(" Superblock: 0x%x - 0x%x\n", 0, block_size); printf(" Base image: 0x%lx - 0x%lx (%zu bytes, metadata only)\n", (unsigned long)base_offset, (unsigned long)(base_offset + base_size), @@ -675,12 +678,12 @@ static size_t calculate_split_dax_size(size_t base_size, uint32_t overlay_bucket size_t overlay_pool_size, uint32_t pcache_slots) { - uint64_t base_offset = DAXFS_BLOCK_SIZE; - uint64_t overlay_offset = ALIGN(base_offset + base_size, DAXFS_BLOCK_SIZE); + uint64_t base_offset = block_size; + uint64_t overlay_offset = ALIGN(base_offset + base_size, block_size); size_t overlay_region_size = calculate_overlay_region_size(overlay_buckets, overlay_pool_size); uint64_t pcache_offset = ALIGN(overlay_offset + overlay_region_size, - DAXFS_BLOCK_SIZE); + block_size); size_t pcache_region_size = calculate_pcache_region_size(pcache_slots); return pcache_offset + pcache_region_size; @@ -695,7 +698,7 @@ static int write_empty_image(void *mem, size_t mem_size, uint32_t pcache_slots) { struct daxfs_super *super = mem; - uint64_t overlay_offset = DAXFS_BLOCK_SIZE; + uint64_t overlay_offset = block_size; size_t overlay_region_size = calculate_overlay_region_size(overlay_buckets, overlay_pool_size); uint64_t pcache_offset = 0; @@ -704,7 +707,7 @@ static int write_empty_image(void *mem, size_t mem_size, if (pcache_slots) { pcache_offset = ALIGN(overlay_offset + overlay_region_size, - DAXFS_BLOCK_SIZE); + block_size); pcache_region_size = calculate_pcache_region_size(pcache_slots); total = pcache_offset + pcache_region_size; } else { @@ -751,9 +754,9 @@ static int write_empty_image(void *mem, size_t mem_size, uint64_t bucket_array_size = ALIGN( (uint64_t)overlay_buckets * sizeof(struct daxfs_overlay_bucket), - DAXFS_BLOCK_SIZE); + block_size); void *pool_base = mem + overlay_offset + - DAXFS_BLOCK_SIZE + bucket_array_size; + block_size + bucket_array_size; struct daxfs_ovl_inode_entry *ie; struct daxfs_overlay_bucket *buckets; uint64_t key, pool_off; @@ -774,7 +777,7 @@ static int write_empty_image(void *mem, size_t mem_size, /* Insert into hash table */ key = DAXFS_OVL_KEY_INODE(DAXFS_ROOT_INO); - buckets = mem + overlay_offset + DAXFS_BLOCK_SIZE; + buckets = mem + overlay_offset + block_size; idx = (uint32_t)(key & (overlay_buckets - 1)); buckets[idx].state_key = htole64(DAXFS_OVL_MAKE( DAXFS_OVL_USED, key)); @@ -785,7 +788,7 @@ static int write_empty_image(void *mem, size_t mem_size, write_pcache_region(mem + pcache_offset, pcache_slots); printf("Image layout (empty mode):\n"); - printf(" Superblock: 0x%x - 0x%x\n", 0, DAXFS_BLOCK_SIZE); + printf(" Superblock: 0x%x - 0x%x\n", 0, block_size); printf(" Overlay: 0x%lx - 0x%lx (%zu bytes, %u buckets, %zu pool)\n", (unsigned long)overlay_offset, (unsigned long)(overlay_offset + overlay_region_size), @@ -803,13 +806,13 @@ static size_t calculate_empty_size(uint32_t overlay_buckets, size_t overlay_pool_size, uint32_t pcache_slots) { - uint64_t overlay_offset = DAXFS_BLOCK_SIZE; + uint64_t overlay_offset = block_size; size_t overlay_region_size = calculate_overlay_region_size(overlay_buckets, overlay_pool_size); if (pcache_slots) { uint64_t pcache_offset = ALIGN(overlay_offset + overlay_region_size, - DAXFS_BLOCK_SIZE); + block_size); return pcache_offset + calculate_pcache_region_size(pcache_slots); } @@ -1060,7 +1063,7 @@ static int write_static_image(void *mem, size_t mem_size, const char *src_dir, size_t base_size) { struct daxfs_super *super = mem; - uint64_t base_offset = DAXFS_BLOCK_SIZE; + uint64_t base_offset = block_size; if (base_offset + base_size > mem_size) { fprintf(stderr, "Error: image too large for allocated space\n"); @@ -1079,7 +1082,7 @@ static int write_static_image(void *mem, size_t mem_size, const char *src_dir, write_base_image(mem + base_offset, base_size, src_dir, false); printf("Image layout (static):\n"); - printf(" Superblock: 0x%x - 0x%x\n", 0, DAXFS_BLOCK_SIZE); + printf(" Superblock: 0x%x - 0x%x\n", 0, block_size); printf(" Base image: 0x%lx - 0x%lx (%zu bytes)\n", (unsigned long)base_offset, (unsigned long)(base_offset + base_size), @@ -1090,7 +1093,7 @@ static int write_static_image(void *mem, size_t mem_size, const char *src_dir, static size_t calculate_static_size(size_t base_size) { - return DAXFS_BLOCK_SIZE + base_size; + return block_size + base_size; } static void print_usage(const char *prog) @@ -1168,6 +1171,20 @@ int main(int argc, char *argv[]) size_t overlay_pool_size = 0; uint32_t overlay_buckets = 0; + /* Set block_size to native page size */ + { + long ps = sysconf(_SC_PAGESIZE); + + if (ps <= 0) { + fprintf(stderr, "Warning: sysconf(_SC_PAGESIZE) failed, " + "defaulting to %u\n", DAXFS_MIN_BLOCK_SIZE); + ps = DAXFS_MIN_BLOCK_SIZE; + } + block_size = (uint32_t)ps; + if (block_size < DAXFS_MIN_BLOCK_SIZE) + block_size = DAXFS_MIN_BLOCK_SIZE; + } + while ((opt = getopt_long(argc, argv, "d:o:H:D:m:p:s:C:O:B:EXVh", long_options, NULL)) != -1) { switch (opt) { case 'd': @@ -1338,8 +1355,8 @@ int main(int argc, char *argv[]) prev_power_of_2(file_count) : 16; } else { uint32_t backing_pages = (backing_file_size + - DAXFS_BLOCK_SIZE - 1) / - DAXFS_BLOCK_SIZE; + block_size - 1) / + block_size; pcache_slots = backing_pages > 0 ? prev_power_of_2(backing_pages) : 16; }