Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions daxfs/daxfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ struct daxfs_pcache {
void *data; /* On-DAX slot data area */
u32 slot_count;
u32 hash_mask; /* slot_count - 1 */
u32 block_size; /* Cached from daxfs_info */
u32 block_shift; /* ilog2(block_size) */
struct list_head backing_files; /* List of daxfs_pcache_backing */
struct task_struct *fill_thread; /* Host kthread, NULL for spawn */
struct file **backing_array; /* O(1) lookup by ino, [0..max_ino] */
Expand Down Expand Up @@ -83,6 +85,9 @@ struct daxfs_info {
/* On-DAX Superblock */
struct daxfs_super *super;

/* Cached block_size from superblock (== PAGE_SIZE, validated at mount) */
u32 block_size;

/* Base image access */
struct daxfs_base_inode *base_inodes;
u64 base_data_offset; /* Absolute offset to data region */
Expand Down
6 changes: 3 additions & 3 deletions daxfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ static void daxfs_refresh_isize(struct inode *inode, struct daxfs_info *info)
*
* Resolution order: overlay page → pcache → inline base image data
*
* COW granularity is per-page (4KB). After a partial-page write, only
* COW granularity is per-page (block_size). After a partial-page write, only
* the written page has an overlay entry; adjacent pages still resolve
* through pcache or base image. This is intentional — each page is
* independently versioned.
Expand Down Expand Up @@ -174,8 +174,8 @@ static ssize_t daxfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
* Fast path: check per-inode overlay cache directly.
* Avoids the full daxfs_base_file_data() call overhead
* and prefetches the next page for sequential reads.
* Overlay pages are 4104 bytes apart in the pool (8-byte
* header + 4096 data), a stride the HW prefetcher
* Overlay pages are (8 + PAGE_SIZE) bytes apart in the
* pool (8-byte header + data), a stride the HW prefetcher
* doesn't recognise, so SW prefetch matters here.
*/
if (info->overlay) {
Expand Down
17 changes: 13 additions & 4 deletions daxfs/overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,17 @@ int daxfs_overlay_set_inode(struct daxfs_info *info, u64 ino,
return 0;
}

/*
* Size of an overlay data entry: 8-byte header + one page of file data.
*/
static size_t ovl_data_entry_size(struct daxfs_info *info)
{
return sizeof(struct daxfs_ovl_data_entry) + info->block_size;
}

/*
* Get a data page from overlay.
* Returns pointer to 4KB data, or NULL if not found.
* Returns pointer to block_size data, or NULL if not found.
*/
void *daxfs_overlay_get_page(struct daxfs_info *info, u64 ino, u64 pgoff)
{
Expand All @@ -529,7 +537,7 @@ void *daxfs_overlay_get_page(struct daxfs_info *info, u64 ino, u64 pgoff)
* to the hash table. The caller must initialise the data (COW) and
* then call daxfs_overlay_publish_page() to make it visible.
*
* Returns pointer to 4KB data area, or NULL on failure.
* Returns pointer to block_size data area, or NULL on failure.
* On success, *pool_off_out receives the pool offset (needed for publish).
*/
void *daxfs_overlay_alloc_page(struct daxfs_info *info, u64 ino, u64 pgoff,
Expand All @@ -545,7 +553,8 @@ void *daxfs_overlay_alloc_page(struct daxfs_info *info, u64 ino, u64 pgoff,
if (pgoff > DAXFS_OVL_MAX_PGOFF)
return NULL;

pool_off = overlay_pool_alloc(ovl, sizeof(*de), DAXFS_OVL_DATA);
pool_off = overlay_pool_alloc(ovl, ovl_data_entry_size(info),
DAXFS_OVL_DATA);
if (pool_off == (u64)-1)
return NULL;

Expand Down Expand Up @@ -606,7 +615,7 @@ int daxfs_overlay_alloc_pages_batch(struct daxfs_info *info, u64 ino,
void **pages, u64 *pool_offs)
{
struct daxfs_overlay *ovl = info->overlay;
size_t entry_size = sizeof(struct daxfs_ovl_data_entry);
size_t entry_size = ovl_data_entry_size(info);
size_t aligned = ALIGN(entry_size, 8);
u64 base_pool_off;
u32 i, ok = 0;
Expand Down
23 changes: 13 additions & 10 deletions daxfs/pcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ static int pcache_fill_slot(struct daxfs_pcache *pc, u32 slot_idx, u64 tag)
{
u64 ino = PCACHE_TAG_INO(tag);
u64 pgoff = PCACHE_TAG_PGOFF(tag);
loff_t pos = (loff_t)pgoff << PAGE_SHIFT;
void *dst = pc->data + (u64)slot_idx * PAGE_SIZE;
u32 bsize = pc->block_size;
loff_t pos = (loff_t)pgoff << pc->block_shift;
void *dst = pc->data + (u64)slot_idx * bsize;
struct file *backing;
ssize_t n;
u64 old_val, new_val;
Expand All @@ -90,7 +91,7 @@ static int pcache_fill_slot(struct daxfs_pcache *pc, u32 slot_idx, u64 tag)
return -ENOENT;
}

n = kernel_read(backing, dst, PAGE_SIZE, &pos);
n = kernel_read(backing, dst, bsize, &pos);
if (n < 0) {
pr_err_ratelimited("daxfs: pcache read error ino=%llu pgoff=%llu: %zd\n",
ino, pgoff, n);
Expand All @@ -103,8 +104,8 @@ static int pcache_fill_slot(struct daxfs_pcache *pc, u32 slot_idx, u64 tag)
return (int)n;
}

if (n < PAGE_SIZE)
memset(dst + n, 0, PAGE_SIZE - n);
if (n < bsize)
memset(dst + n, 0, bsize - n);

smp_wmb();

Expand Down Expand Up @@ -238,7 +239,7 @@ static void *pcache_wait_valid(struct daxfs_pcache *pc, u32 target_idx,
pcache_touch(&pc->slots[target_idx]);
if (pinned_slot)
*pinned_slot = (s32)target_idx;
return pc->data + (u64)target_idx * PAGE_SIZE;
return pc->data + (u64)target_idx * pc->block_size;
}
if (PCACHE_STATE(val) == PCACHE_STATE_FREE)
return ERR_PTR(-EAGAIN);
Expand Down Expand Up @@ -292,7 +293,7 @@ void *daxfs_pcache_get_page(struct daxfs_info *info, u64 ino, u64 pgoff,
pcache_touch(&pc->slots[slot_idx]);
if (pinned_slot)
*pinned_slot = (s32)slot_idx;
return pc->data + (u64)slot_idx * PAGE_SIZE;
return pc->data + (u64)slot_idx * pc->block_size;
}
}

Expand Down Expand Up @@ -334,7 +335,7 @@ static noinline void *pcache_slow_path(struct daxfs_pcache *pc,
pcache_touch(&pc->slots[idx]);
if (pinned_slot)
*pinned_slot = (s32)idx;
return pc->data + (u64)idx * PAGE_SIZE;
return pc->data + (u64)idx * pc->block_size;
}
break;

Expand Down Expand Up @@ -383,7 +384,7 @@ static noinline void *pcache_slow_path(struct daxfs_pcache *pc,
pcache_touch(&pc->slots[free_idx]);
if (pinned_slot)
*pinned_slot = (s32)free_idx;
return pc->data + (u64)free_idx * PAGE_SIZE;
return pc->data + (u64)free_idx * pc->block_size;
}
goto restart;
}
Expand Down Expand Up @@ -473,7 +474,7 @@ bool daxfs_is_pcache_data(struct daxfs_info *info, void *ptr)
if (!pc || !ptr)
return false;
return ptr >= pc->data &&
ptr < pc->data + (u64)pc->slot_count * PAGE_SIZE;
ptr < pc->data + (u64)pc->slot_count * pc->block_size;
}

/*
Expand Down Expand Up @@ -692,6 +693,8 @@ int daxfs_pcache_init(struct daxfs_info *info, const char *backing_path)
return -ENOMEM;

INIT_LIST_HEAD(&pc->backing_files);
pc->block_size = info->block_size;
pc->block_shift = ilog2(info->block_size);

hdr = daxfs_mem_ptr(info, pcache_offset);
if (le32_to_cpu(hdr->magic) != DAXFS_PCACHE_MAGIC) {
Expand Down
19 changes: 14 additions & 5 deletions daxfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,17 @@ static int daxfs_fill_super(struct super_block *sb, struct fs_context *fc)

/* Validate version */
if (le32_to_cpu(info->super->version) != DAXFS_VERSION) {
pr_err("daxfs: unsupported version %u\n",
le32_to_cpu(info->super->version));
pr_err("daxfs: unsupported version %u (expected %u)\n",
le32_to_cpu(info->super->version), DAXFS_VERSION);
ret = -EINVAL;
goto err_unmap;
}

/* Validate block_size matches native page size */
info->block_size = le32_to_cpu(info->super->block_size);
if (info->block_size != PAGE_SIZE) {
pr_err("daxfs: block_size %u does not match PAGE_SIZE %lu\n",
info->block_size, PAGE_SIZE);
ret = -EINVAL;
goto err_unmap;
}
Expand Down Expand Up @@ -320,8 +329,8 @@ static int daxfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct daxfs_info *info = DAXFS_SB(dentry->d_sb);

buf->f_type = DAXFS_SUPER_MAGIC;
buf->f_bsize = DAXFS_BLOCK_SIZE;
buf->f_blocks = info->size / DAXFS_BLOCK_SIZE;
buf->f_bsize = info->block_size;
buf->f_blocks = info->size / info->block_size;
buf->f_bfree = 0;
buf->f_bavail = 0;
buf->f_files = info->base_inode_count;
Expand All @@ -334,7 +343,7 @@ static int daxfs_statfs(struct dentry *dentry, struct kstatfs *buf)
u64 pool_size = le64_to_cpu(ovl->header->pool_size);

if (pool_size > pool_used) {
buf->f_bfree = (pool_size - pool_used) / DAXFS_BLOCK_SIZE;
buf->f_bfree = (pool_size - pool_used) / info->block_size;
buf->f_bavail = buf->f_bfree;
}
buf->f_ffree = UINT_MAX;
Expand Down
10 changes: 5 additions & 5 deletions daxfs/validate.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ int daxfs_validate_super(struct daxfs_info *info)
u64 bucket_array_size = ALIGN(
(u64)bucket_count *
sizeof(struct daxfs_overlay_bucket),
DAXFS_BLOCK_SIZE);
u64 min_size = DAXFS_BLOCK_SIZE + bucket_array_size;
info->block_size);
u64 min_size = info->block_size + bucket_array_size;

if (overlay_size < min_size) {
pr_err("daxfs: overlay region too small (%llu < %llu)\n",
Expand Down Expand Up @@ -245,9 +245,9 @@ int daxfs_validate_super(struct daxfs_info *info)

{
u64 meta_size = ALIGN((u64)slot_count * sizeof(struct daxfs_pcache_slot),
DAXFS_BLOCK_SIZE);
u64 data_size = (u64)slot_count * DAXFS_BLOCK_SIZE;
u64 min_size = DAXFS_BLOCK_SIZE + meta_size + data_size;
info->block_size);
u64 data_size = (u64)slot_count * info->block_size;
u64 min_size = info->block_size + meta_size + data_size;

if (pcache_size < min_size) {
pr_err("daxfs: pcache region too small (%llu < %llu)\n",
Expand Down
18 changes: 9 additions & 9 deletions include/daxfs_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,26 @@
#define DAXFS_IOC_GET_DMABUF _IO('D', 1) /* Get dma-buf fd for this mount */

#define DAXFS_SUPER_MAGIC 0x64617835 /* "dax5" */
#define DAXFS_VERSION 7
#define DAXFS_BLOCK_SIZE 4096
#define DAXFS_VERSION 8
#define DAXFS_MIN_BLOCK_SIZE 4096 /* Minimum (superblock/header padding) */
#define DAXFS_INODE_SIZE 64
#define DAXFS_NAME_MAX 255
#define DAXFS_DIRENT_SIZE (16 + DAXFS_NAME_MAX) /* ino + mode + name_len + reserved + name */
#define DAXFS_ROOT_INO 1

/*
* Superblock - at offset 0, 4KB
* Superblock - 4KB struct at offset 0, occupies one block (block_size bytes)
*
* On-DAX Layout:
* [ Superblock (4KB) | Base Image (optional) | Overlay (optional) | Page Cache (optional) ]
* [ Superblock (block_size) | Base Image (optional) | Overlay (optional) | Page Cache (optional) ]
*
* All layout metadata lives here - region headers only carry magic/version
* for validation, not duplicated layout fields.
*/
struct daxfs_super {
__le32 magic; /* DAXFS_SUPER_MAGIC */
__le32 version; /* DAXFS_VERSION */
__le32 block_size; /* 4096 */
__le32 block_size; /* Native page size at mkfs time */
__le32 reserved0;
__le64 total_size;

Expand Down Expand Up @@ -217,7 +217,7 @@ struct daxfs_ovl_inode_entry {
struct daxfs_ovl_data_entry {
__le32 type; /* DAXFS_OVL_DATA */
__le32 reserved;
__u8 data[4096]; /* One page of file data */
__u8 data[]; /* One page of file data (block_size bytes) */
};

struct daxfs_ovl_dirent_entry {
Expand Down Expand Up @@ -261,9 +261,9 @@ struct daxfs_ovl_dirlist_entry {
* Slot count/hash_shift are in the main superblock.
*
* Region layout:
* [pcache_header (4KB)]
* [slot_metadata (slot_count * 16B, padded to 4KB)]
* [slot_data (slot_count * 4KB)]
* [pcache_header (block_size)]
* [slot_metadata (slot_count * 16B, padded to block_size)]
* [slot_data (slot_count * block_size)]
*/

#define DAXFS_PCACHE_MAGIC 0x70636163 /* "pcac" */
Expand Down
3 changes: 2 additions & 1 deletion tests/test_mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static int tests_run = 0;
static int tests_passed = 0;

#define TEST_FILE "mmap_test_file"
#define PAGE_SIZE 4096
static long PAGE_SIZE;

#define TEST_START(name) do { \
printf(" TEST: %s ... ", name); \
Expand Down Expand Up @@ -691,6 +691,7 @@ int main(int argc, char *argv[])
return 1;
}
mountpoint = argv[1];
PAGE_SIZE = sysconf(_SC_PAGESIZE);

printf("DAXFS mmap test suite\n");
printf("Mountpoint: %s\n\n", mountpoint);
Expand Down
5 changes: 3 additions & 2 deletions tests/test_overlay.sh
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,9 @@ setup_split() {
echo "Nested split file" > "$SPLIT_SOURCE/subdir/nested.txt"
ln -s hello.txt "$SPLIT_SOURCE/link_to_hello"

# Create a multi-page file (larger than 4KB)
dd if=/dev/urandom of="$SPLIT_SOURCE/largefile.bin" bs=4096 count=8 2>/dev/null
# Create a multi-page file (at least 4 pages on any page size)
PAGE_SIZE=$(getconf PAGESIZE)
dd if=/dev/urandom of="$SPLIT_SOURCE/largefile.bin" bs="$PAGE_SIZE" count=4 2>/dev/null
LARGE_CKSUM=$(md5sum "$SPLIT_SOURCE/largefile.bin" | awk '{print $1}')

# Create more files to exercise multiple cache slots
Expand Down
Loading