From f59bccd5fa90f54599eed49a18ff7fb7db3e8135 Mon Sep 17 00:00:00 2001
From: Ryan Breen <ryan@ryanbreen.com>
Date: Mon, 22 Dec 2025 14:18:18 -0500
Subject: [PATCH 1/4] Fix scheduler desync in yield_current() causing fork
 return corruption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

yield_current() was calling schedule() which updates self.current_thread,
but no actual context switch happened. This caused the scheduler to get
out of sync with reality:

1. Thread A runs, calls yield (via sys_yield or pipe blocking)
2. yield_current() calls schedule(), returns (A, B)
3. schedule() sets self.current_thread = B
4. No actual context switch - thread A continues running
5. Timer fires, schedule() returns (B, C), thinks B is current
6. Context save stores thread A's registers to thread B's context
7. Thread B's fork return value (RAX) is corrupted with thread A's RAX

The fix is to just set need_resched flag in yield_current(). The actual
scheduling decision and context switch will happen at the next interrupt
return via check_need_resched_and_switch(), which properly:
- Identifies the currently running thread from interrupt context
- Saves that thread's registers
- Restores the target thread's context

This was causing the pipe_fork_test parent to run child code because
the parent's RAX (fork return value = child PID) was being overwritten
with 0 from another thread's register state.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 kernel/src/task/scheduler.rs | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/kernel/src/task/scheduler.rs b/kernel/src/task/scheduler.rs
index 24fcd80..9edf174 100644
--- a/kernel/src/task/scheduler.rs
+++ b/kernel/src/task/scheduler.rs
@@ -415,12 +415,18 @@ pub fn current_thread_id() -> Option<u64> {
 
 /// Yield the current thread
 pub fn yield_current() {
-    // This will be called from timer interrupt or sys_yield
-    // The actual context switch happens in the interrupt handler
-    if let Some((old_id, new_id)) = schedule() {
-        crate::serial_println!("Scheduling: {} -> {}", old_id, new_id);
-        // Context switch will be performed by caller
-    }
+    // CRITICAL FIX: Do NOT call schedule() here!
+    // schedule() updates self.current_thread, but no actual context switch happens.
+    // This caused the scheduler to get out of sync with reality:
+    //   1. Thread A is running
+    //   2. yield_current() calls schedule(), returns (A, B), sets current_thread = B
+    //   3. No actual context switch - thread A continues running
+    //   4. Timer fires, schedule() returns (B, C), saves thread A's regs to thread B's context
+    //   5. Thread B's context is now corrupted with thread A's registers
+    //
+    // Instead, just set need_resched flag. The actual scheduling decision and context
+    // switch will happen at the next interrupt return via check_need_resched_and_switch.
+    set_need_resched();
 }
 
 /// Get pending context switch if any

From a1cf4ee2e9e22673cb78c7bfb3ce6586b53eacd7 Mon Sep 17 00:00:00 2001
From: Ryan Breen <ryan@ryanbreen.com>
Date: Mon, 29 Dec 2025 07:56:15 -0500
Subject: [PATCH 2/4] Remove get_pending_switch() to prevent scheduler state
 corruption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function had the same bug as yield_current() - it called schedule()
which mutates self.current_thread. Calling get_pending_switch() "just to
peek" at what scheduling decision would be made would corrupt the scheduler
state, causing the same context corruption bug.

Since the function was unused (#[allow(dead_code)]), removing it is the
safest fix. If a peek function is needed in the future, it must be
implemented without calling schedule().

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 kernel/src/task/scheduler.rs | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/kernel/src/task/scheduler.rs b/kernel/src/task/scheduler.rs
index 9edf174..2a459a4 100644
--- a/kernel/src/task/scheduler.rs
+++ b/kernel/src/task/scheduler.rs
@@ -429,14 +429,9 @@ pub fn yield_current() {
     set_need_resched();
 }
 
-/// Get pending context switch if any
-/// Returns Some((old_thread_id, new_thread_id)) if a switch is pending
-#[allow(dead_code)]
-pub fn get_pending_switch() -> Option<(u64, u64)> {
-    // For now, just call schedule to see if we would switch
-    // In a real implementation, we might cache this decision
-    schedule()
-}
+// NOTE: get_pending_switch() was removed because it called schedule() which mutates
+// self.current_thread. Calling it "just to peek" would corrupt scheduler state.
+// If needed in future, implement a true peek function that doesn't mutate state.
 
 /// Allocate a new thread ID
 #[allow(dead_code)]

From 8df81c080b1d10e43dbc9a98fc39e6f31725f505 Mon Sep 17 00:00:00 2001
From: Ryan Breen <ryan@ryanbreen.com>
Date: Mon, 29 Dec 2025 14:28:48 -0500
Subject: [PATCH 3/4] Fix pipe/fork issues and add comprehensive IPC tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit addresses multiple interconnected issues in process forking,
pipe handling, and resource cleanup:

Kernel Fixes:
- Fix register preservation in timer_entry.asm: RCX was being overwritten
  with CS value during privilege check, causing RAX=3 on userspace entry
- Add close_all_fds() to Process::terminate() to properly decrement pipe
  reader/writer counts when processes exit
- Add FdTable::drop() as safety net for fd cleanup
- Increase HEAP_SIZE from 1 MiB to 4 MiB to support concurrent process tests
- Add scheduler unit test for yield_current() to prevent regression
- Add architectural constraint comment warning against schedule() for peeking
- Add dup2 syscall (number 33) for file descriptor duplication

New Tests:
- pipe_fork_test.rs: Tests pipe IPC across fork with EOF detection
- pipe_concurrent_test.rs: Tests 4 concurrent writers to single pipe
- pipe_refcount_test.rs: Tests reference counting, EOF, EPIPE, dup2

Test Improvements:
- Add EAGAIN retry loops to pipe tests for proper non-blocking handling
- Update register_init_test to validate all GP registers are zeroed
- Add boot stages 77-78 for pipe fork and concurrent tests

All 78/78 boot stages now pass.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 kernel/src/interrupts/context_switch.rs |   4 +-
 kernel/src/interrupts/timer_entry.asm   |  16 +-
 kernel/src/ipc/fd.rs                    |  43 ++
 kernel/src/ipc/mod.rs                   |   2 +-
 kernel/src/main.rs                      |   8 +
 kernel/src/memory/heap.rs               |  21 +-
 kernel/src/process/manager.rs           | 271 +++++++++-
 kernel/src/process/process.rs           |  40 ++
 kernel/src/syscall/dispatcher.rs        |   1 +
 kernel/src/syscall/handler.rs           |   1 +
 kernel/src/syscall/handlers.rs          | 103 +++-
 kernel/src/syscall/mod.rs               |   2 +
 kernel/src/syscall/pipe.rs              |  26 +-
 kernel/src/task/scheduler.rs            |  90 ++++
 kernel/src/task/thread.rs               |  26 +
 kernel/src/test_exec.rs                 |  72 ++-
 userspace/tests/Cargo.toml              |  12 +
 userspace/tests/build.sh                |   3 +
 userspace/tests/pipe_concurrent_test.rs | 348 ++++++++++++
 userspace/tests/pipe_fork_test.rs       | 272 ++++++++++
 userspace/tests/pipe_refcount_test.rs   | 676 ++++++++++++++++++++++++
 userspace/tests/register_init_test.rs   | 106 ++--
 xtask/src/main.rs                       |  14 +
 23 files changed, 2059 insertions(+), 98 deletions(-)
 create mode 100644 userspace/tests/pipe_concurrent_test.rs
 create mode 100644 userspace/tests/pipe_fork_test.rs
 create mode 100644 userspace/tests/pipe_refcount_test.rs

diff --git a/kernel/src/interrupts/context_switch.rs b/kernel/src/interrupts/context_switch.rs
index 2cb842f..06d190c 100644
--- a/kernel/src/interrupts/context_switch.rs
+++ b/kernel/src/interrupts/context_switch.rs
@@ -608,8 +608,8 @@ fn setup_first_userspace_entry(
     saved_regs.r14 = 0;
     saved_regs.r15 = 0;
 
-    // DEBUG: Log that we're zeroing RDI for first entry
-    log::info!("FIRST_ENTRY t{}: zeroing rdi to 0", thread_id);
+    // DEBUG: Log that registers were zeroed for first entry
+    log::info!("FIRST_ENTRY t{}: zeroed all registers", thread_id);
 
     // CRITICAL: Now set up CR3 and kernel stack for this thread
     // This must happen BEFORE we iretq to userspace
diff --git a/kernel/src/interrupts/timer_entry.asm b/kernel/src/interrupts/timer_entry.asm
index 2e9f238..55f098f 100644
--- a/kernel/src/interrupts/timer_entry.asm
+++ b/kernel/src/interrupts/timer_entry.asm
@@ -149,12 +149,16 @@ timer_interrupt_entry:
     pop rdx
     pop rcx
     pop rax
-    
+
     ; Check if we're returning to ring 3 (userspace)
     ; Frame is now: [RIP][CS][RFLAGS][RSP][SS] at RSP
-    mov rcx, [rsp + 8]         ; Get CS from interrupt frame (use RCX instead of RAX)
+    ; CRITICAL FIX: Save RCX before using it for privilege check!
+    ; We just restored RCX from saved_regs, don't clobber it.
+    push rcx                   ; Save userspace RCX value
+    mov rcx, [rsp + 16]        ; Get CS from interrupt frame (offset +8 for our push)
     and rcx, 3                 ; Check privilege level
     cmp rcx, 3                 ; Ring 3?
+    pop rcx                    ; Restore userspace RCX (before conditional jump!)
     jne .no_userspace_return
     
     ; FIXED: CR3 switching now happens in the scheduler during context switch
@@ -217,7 +221,8 @@ timer_interrupt_entry:
     ; Swap back to kernel GS temporarily
     swapgs
 
-    ; Save registers we need
+    ; Save registers we need (including RAX which is caller-saved)
+    push rax
     push rdi
     push rsi
     push rdx
@@ -227,9 +232,9 @@ timer_interrupt_entry:
     push r10
     push r11
 
-    ; Pass pointer to IRETQ frame (RIP is at RSP+64 after our pushes)
+    ; Pass pointer to IRETQ frame (RIP is at RSP+72 after our 9 pushes)
     mov rdi, rsp
-    add rdi, 64         ; Skip 8 pushed registers to point to RIP
+    add rdi, 72         ; Skip 9 pushed registers to point to RIP
     call trace_iretq_to_ring3
 
     ; Restore registers
@@ -241,6 +246,7 @@ timer_interrupt_entry:
     pop rdx
     pop rsi
     pop rdi
+    pop rax
 
     ; Debug: Output marker after register restore
     push rax
diff --git a/kernel/src/ipc/fd.rs b/kernel/src/ipc/fd.rs
index bc366d4..0479939 100644
--- a/kernel/src/ipc/fd.rs
+++ b/kernel/src/ipc/fd.rs
@@ -102,6 +102,14 @@ impl Default for FdTable {
 
 impl Clone for FdTable {
     fn clone(&self) -> Self {
+        // Log what we're cloning
+        log::debug!("FdTable::clone() - cloning fd_table with entries:");
+        for i in 0..10 {
+            if let Some(fd_entry) = self.fds[i].as_ref() {
+                log::debug!("  fd[{}] = {:?}", i, fd_entry.kind);
+            }
+        }
+
         let cloned_fds = alloc::boxed::Box::new((*self.fds).clone());
 
         // Increment pipe reference counts for all cloned pipe fds
@@ -255,3 +263,38 @@ impl FdTable {
         Err(24) // EMFILE
     }
 }
+
+/// Drop implementation for FdTable
+///
+/// When a process exits and its FdTable is dropped, we need to properly
+/// decrement pipe reader/writer counts for any open pipe fds. This ensures
+/// that when all writers close, readers get EOF instead of EAGAIN.
+///
+/// Note: UdpSocket cleanup is handled by UdpSocket's own Drop impl when the
+/// Arc reference count goes to zero.
+impl Drop for FdTable {
+    fn drop(&mut self) {
+        log::debug!("FdTable::drop() - closing all fds and decrementing pipe counts");
+        for i in 0..MAX_FDS {
+            if let Some(fd_entry) = self.fds[i].take() {
+                match fd_entry.kind {
+                    FdKind::PipeRead(buffer) => {
+                        buffer.lock().close_read();
+                        log::debug!("FdTable::drop() - closed pipe read fd {}", i);
+                    }
+                    FdKind::PipeWrite(buffer) => {
+                        buffer.lock().close_write();
+                        log::debug!("FdTable::drop() - closed pipe write fd {}", i);
+                    }
+                    FdKind::UdpSocket(_) => {
+                        // Socket cleanup handled by UdpSocket::Drop when Arc refcount reaches 0
+                        log::debug!("FdTable::drop() - releasing UDP socket fd {}", i);
+                    }
+                    FdKind::StdIo(_) => {
+                        // StdIo doesn't need cleanup
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/kernel/src/ipc/mod.rs b/kernel/src/ipc/mod.rs
index af2a78f..1ccedfc 100644
--- a/kernel/src/ipc/mod.rs
+++ b/kernel/src/ipc/mod.rs
@@ -8,5 +8,5 @@ pub mod fd;
 pub mod pipe;
 
 // Re-export public API - some of these are not used yet but are part of the public API
-pub use fd::{FdKind, FdTable};
+pub use fd::{FdKind, FdTable, MAX_FDS};
 pub use pipe::create_pipe;
diff --git a/kernel/src/main.rs b/kernel/src/main.rs
index 112c6d9..975c16d 100644
--- a/kernel/src/main.rs
+++ b/kernel/src/main.rs
@@ -622,6 +622,14 @@ fn kernel_main_continue() -> ! {
         log::info!("=== IPC TEST: Pipe syscall functionality ===");
         test_exec::test_pipe();
 
+        // Test pipe + fork IPC
+        log::info!("=== IPC TEST: Pipe + fork concurrency ===");
+        test_exec::test_pipe_fork();
+
+        // Test concurrent pipe writes from multiple processes
+        log::info!("=== IPC TEST: Concurrent pipe writes ===");
+        test_exec::test_pipe_concurrent();
+
         // Run fault tests to validate privilege isolation
         log::info!("=== FAULT TEST: Running privilege violation tests ===");
         userspace_fault_tests::run_fault_tests();
diff --git a/kernel/src/memory/heap.rs b/kernel/src/memory/heap.rs
index bc7c6aa..9c8320b 100644
--- a/kernel/src/memory/heap.rs
+++ b/kernel/src/memory/heap.rs
@@ -3,7 +3,26 @@ use x86_64::structures::paging::{Mapper, OffsetPageTable, Page, PageTableFlags,
 use x86_64::VirtAddr;
 
 pub const HEAP_START: u64 = 0x_4444_4444_0000;
-pub const HEAP_SIZE: u64 = 1024 * 1024; // 1 MiB
+
+/// Heap size of 4 MiB.
+///
+/// This size was chosen to support concurrent process tests which require:
+/// - Multiple child processes (4+) running simultaneously after fork()
+/// - Each process needs: fd table (~6KB), pipe buffers (4KB each), ProcessInfo struct,
+///   Thread structs, page tables, and kernel stack allocations
+/// - Total per-process overhead is approximately 50-100KB depending on fd usage
+///
+/// IMPORTANT: We use a bump allocator which only reclaims memory when ALL allocations
+/// are freed. This means memory fragmentation is effectively permanent during a test run.
+/// The 4 MiB size provides sufficient headroom for:
+/// - Boot initialization allocations (~500KB)
+/// - Running 10+ concurrent processes with full fd tables
+/// - Pipe buffers for IPC testing
+/// - Safety margin for test variations
+///
+/// Reduced sizes (1-2 MiB) caused OOM during concurrent fork/pipe tests.
+/// Increased from 1 MiB based on empirical testing of pipe_concurrent_test scenarios.
+pub const HEAP_SIZE: u64 = 4 * 1024 * 1024;
 
 /// A simple bump allocator
 struct BumpAllocator {
diff --git a/kernel/src/process/manager.rs b/kernel/src/process/manager.rs
index 0bc8bff..db42605 100644
--- a/kernel/src/process/manager.rs
+++ b/kernel/src/process/manager.rs
@@ -535,10 +535,12 @@ impl ProcessManager {
 
     /// Fork a process with a pre-allocated page table
     /// This version accepts a page table created outside the lock to avoid deadlock
+    #[allow(dead_code)] // Part of public fork API - available for deadlock-free fork patterns
     pub fn fork_process_with_page_table(
         &mut self,
         parent_pid: ProcessId,
         #[cfg_attr(not(feature = "testing"), allow(unused_variables))] userspace_rsp: Option<u64>,
+        #[cfg_attr(not(feature = "testing"), allow(unused_variables))] return_rip: Option<u64>,
         #[cfg_attr(not(feature = "testing"), allow(unused_variables, unused_mut))] mut child_page_table: Box<ProcessPageTable>,
     ) -> Result<ProcessId, &'static str> {
         // Get the parent process info we need
@@ -580,9 +582,10 @@ impl ProcessManager {
         child_process.parent = Some(parent_pid);
 
         // Load the same program into the child page table
+        // Use the parent's name to load the correct binary (not hardcoded fork_test)
         #[cfg(feature = "testing")]
         {
-            let elf_buf = crate::userspace_test::get_test_binary("fork_test");
+            let elf_buf = crate::userspace_test::get_test_binary(&parent_name);
             let loaded_elf =
                 crate::elf::load_elf_into_page_table(&elf_buf, child_page_table.as_mut())?;
 
@@ -595,7 +598,8 @@ impl ProcessManager {
             child_process.heap_end = heap_base;
 
             log::info!(
-                "fork_process: Loaded fork_test.elf into child, entry point: {:#x}, heap_start: {:#x}",
+                "fork_process: Loaded {}.elf into child, entry point: {:#x}, heap_start: {:#x}",
+                parent_name,
                 loaded_elf.entry_point,
                 heap_base
             );
@@ -616,12 +620,106 @@ impl ProcessManager {
                 child_pid,
                 &parent_thread_info,
                 userspace_rsp,
+                return_rip,
+                None, // No parent context - will use parent_thread.context
+                child_process,
+            )
+        }
+    }
+
+    /// Fork a process with the ACTUAL parent register state from syscall frame
+    /// This is the preferred method as it captures the exact register values at fork time,
+    /// not the stale values from the last context switch.
+    pub fn fork_process_with_parent_context(
+        &mut self,
+        parent_pid: ProcessId,
+        parent_context: crate::task::thread::CpuContext,
+        #[cfg_attr(not(feature = "testing"), allow(unused_variables, unused_mut))]
+        mut child_page_table: Box<ProcessPageTable>,
+    ) -> Result<ProcessId, &'static str> {
+        // Get the parent process info we need
+        #[cfg_attr(not(feature = "testing"), allow(unused_variables))]
+        let (parent_name, parent_entry_point, parent_thread_info) = {
+            let parent = self
+                .processes
+                .get(&parent_pid)
+                .ok_or("Parent process not found")?;
+
+            let _parent_thread = parent
+                .main_thread
+                .as_ref()
+                .ok_or("Parent process has no main thread")?;
+
+            // Clone what we need to avoid borrow issues
+            (
+                parent.name.clone(),
+                parent.entry_point,
+                _parent_thread.clone(),
+            )
+        };
+
+        // Allocate a new PID for the child
+        let child_pid = ProcessId::new(self.next_pid.fetch_add(1, atomic::Ordering::SeqCst));
+
+        log::info!(
+            "Forking process {} '{}' -> child PID {}",
+            parent_pid.as_u64(),
+            parent_name,
+            child_pid.as_u64()
+        );
+
+        // Create child process name
+        let child_name = format!("{}_child_{}", parent_name, child_pid.as_u64());
+
+        // Create the child process with the same entry point
+        let mut child_process = Process::new(child_pid, child_name.clone(), parent_entry_point);
+        child_process.parent = Some(parent_pid);
+
+        // Load the same program into the child page table
+        #[cfg(feature = "testing")]
+        {
+            let elf_buf = crate::userspace_test::get_test_binary(&parent_name);
+            let loaded_elf =
+                crate::elf::load_elf_into_page_table(&elf_buf, child_page_table.as_mut())?;
+
+            child_process.entry_point = loaded_elf.entry_point;
+            let heap_base = loaded_elf.segments_end;
+            child_process.heap_start = heap_base;
+            child_process.heap_end = heap_base;
+
+            log::info!(
+                "fork_process: Loaded {}.elf into child, entry point: {:#x}, heap_start: {:#x}",
+                parent_name,
+                loaded_elf.entry_point,
+                heap_base
+            );
+        }
+        #[cfg(not(feature = "testing"))]
+        {
+            log::error!("fork_process: Cannot reload ELF - testing feature not enabled");
+            return Err("Cannot implement fork without testing feature");
+        }
+
+        #[cfg(feature = "testing")]
+        {
+            child_process.page_table = Some(child_page_table);
+
+            // Use the actual parent context from the syscall frame
+            self.complete_fork(
+                parent_pid,
+                child_pid,
+                &parent_thread_info,
+                Some(parent_context.rsp),
+                Some(parent_context.rip),
+                Some(parent_context), // Pass the actual parent context
                 child_process,
             )
         }
     }
 
     /// Complete the fork operation after page table is created
+    /// If `parent_context_override` is provided, it will be used for the child's context
+    /// instead of the stale values from `parent_thread.context`.
     #[allow(dead_code)]
     fn complete_fork(
         &mut self,
@@ -629,6 +727,8 @@ impl ProcessManager {
         child_pid: ProcessId,
         parent_thread: &Thread,
         userspace_rsp: Option<u64>,
+        return_rip: Option<u64>,
+        parent_context_override: Option<crate::task::thread::CpuContext>,
         mut child_process: Process,
     ) -> Result<ProcessId, &'static str> {
         log::info!(
@@ -736,7 +836,27 @@ impl ProcessManager {
         child_thread.kernel_stack_top = Some(child_kernel_stack_top);
 
         // Copy parent's thread context
-        child_thread.context = parent_thread.context.clone();
+        // CRITICAL: Use parent_context_override if provided - this contains the ACTUAL
+        // register values from the syscall frame, not the stale values from last context switch
+        child_thread.context = match parent_context_override {
+            Some(ctx) => {
+                log::debug!("fork: Using actual parent context from syscall frame");
+                log::debug!(
+                    "  Parent registers: rbx={:#x}, r12={:#x}, r13={:#x}, r14={:#x}, r15={:#x}",
+                    ctx.rbx, ctx.r12, ctx.r13, ctx.r14, ctx.r15
+                );
+                ctx
+            }
+            None => {
+                log::debug!("fork: Using stale parent_thread.context (may have incorrect register values!)");
+                parent_thread.context.clone()
+            }
+        };
+
+        // CRITICAL: Set has_started=true for forked children so they use the
+        // restore_userspace_context path which preserves the cloned register values
+        // instead of the first-run path which zeros all registers.
+        child_thread.has_started = true;
 
         // Log the child's context for debugging
         log::debug!("Child thread context after copy:");
@@ -749,21 +869,88 @@ impl ProcessManager {
         // In x86_64, system call return values go in RAX
         child_thread.context.rax = 0;
 
-        // Update child's stack pointer based on userspace RSP if provided
-        if let Some(user_rsp) = userspace_rsp {
-            child_thread.context.rsp = user_rsp;
-            log::info!("fork: Using userspace RSP {:#x} for child", user_rsp);
-        } else {
-            // Calculate the child's RSP based on the parent's stack usage
-            let parent_stack_used = parent_thread.stack_top.as_u64() - parent_thread.context.rsp;
-            child_thread.context.rsp = child_stack_top.as_u64() - parent_stack_used;
+        // Calculate the child's stack pointer based on parent's stack usage
+        // CRITICAL: We MUST calculate RSP relative to the child's stack, not use parent's RSP directly!
+        // The parent's RSP points into parent's stack address space, but the child has its own stack.
+        let parent_rsp = userspace_rsp.unwrap_or(parent_thread.context.rsp);
+        let parent_stack_used = parent_thread.stack_top.as_u64().saturating_sub(parent_rsp);
+        let child_rsp = child_stack_top.as_u64().saturating_sub(parent_stack_used);
+        child_thread.context.rsp = child_rsp;
+        log::info!(
+            "fork: parent_stack_top={:#x}, parent_rsp={:#x}, used={:#x}",
+            parent_thread.stack_top.as_u64(),
+            parent_rsp,
+            parent_stack_used
+        );
+        log::info!(
+            "fork: child_stack_top={:#x}, child_rsp={:#x}",
+            child_stack_top.as_u64(),
+            child_rsp
+        );
+
+        // CRITICAL: Copy the parent's stack contents to the child's stack
+        // This ensures local variables (like `write_fd`, `pipefd`, etc.) are preserved
+        if parent_stack_used > 0 && parent_stack_used <= (64 * 1024) {
+            // Access parent's stack through the kernel page table
+            // Both parent and child stacks are identity-mapped in kernel space
+            let parent_stack_src = parent_rsp as *const u8;
+            let child_stack_dst = child_rsp as *mut u8;
+
+            // Debug: Show first few words of parent's stack before copy
+            log::debug!(
+                "fork: Stack copy debug - parent_src={:#x}, child_dst={:#x}, bytes={}",
+                parent_rsp,
+                child_rsp,
+                parent_stack_used
+            );
+            unsafe {
+                let parent_words = parent_stack_src as *const u64;
+                for i in 0..core::cmp::min(16, (parent_stack_used / 8) as isize) {
+                    let val = *parent_words.offset(i);
+                    if val != 0 {
+                        log::debug!("  parent stack[{}]: {:#x}", i, val);
+                    }
+                }
+            }
+
+            unsafe {
+                core::ptr::copy_nonoverlapping(
+                    parent_stack_src,
+                    child_stack_dst,
+                    parent_stack_used as usize,
+                );
+            }
+
+            // Debug: Verify copy by checking first few words of child's stack
+            unsafe {
+                let child_words = child_stack_dst as *const u64;
+                for i in 0..core::cmp::min(16, (parent_stack_used / 8) as isize) {
+                    let val = *child_words.offset(i);
+                    if val != 0 {
+                        log::debug!("  child stack[{}]: {:#x}", i, val);
+                    }
+                }
+            }
+
             log::info!(
-                "fork: Calculated child RSP {:#x} based on parent stack usage {:#x}",
-                child_thread.context.rsp,
+                "fork: Copied {} bytes of stack from parent to child",
                 parent_stack_used
             );
         }
 
+        // Update child's instruction pointer to return to the instruction after fork syscall
+        // The return RIP comes from RCX which was saved by the syscall instruction
+        if let Some(rip) = return_rip {
+            child_thread.context.rip = rip;
+            log::info!("fork: Using return RIP {:#x} for child", rip);
+        } else {
+            // If no return RIP provided, keep the parent's RIP (fallback for sys_fork without frame)
+            log::info!(
+                "fork: No return RIP provided, using parent RIP {:#x}",
+                child_thread.context.rip
+            );
+        }
+
         log::info!(
             "Created child thread {} with entry point {:#x}",
             child_thread_id,
@@ -776,6 +963,34 @@ impl ProcessManager {
         // Store the stack in the child process
         child_process.stack = Some(Box::new(child_stack));
 
+        // Clone the file descriptor table from parent to child
+        // This is critical for pipes, sockets, and other shared resources
+        if let Some(parent) = self.processes.get(&parent_pid) {
+            // Debug: Log what's in the parent's fd_table BEFORE cloning
+            log::debug!(
+                "fork: Parent {} '{}' fd_table BEFORE clone:",
+                parent_pid.as_u64(),
+                parent.name
+            );
+            for i in 0..10 {
+                if let Some(fd_entry) = parent.fd_table.get(i) {
+                    log::debug!("  parent fd[{}] = {:?}", i, fd_entry.kind);
+                }
+            }
+
+            child_process.fd_table = parent.fd_table.clone();
+            log::debug!(
+                "fork: Cloned fd_table from parent {} to child {}",
+                parent_pid.as_u64(),
+                child_pid.as_u64()
+            );
+        } else {
+            log::error!(
+                "fork: CRITICAL - Parent {} not found when cloning fd_table!",
+                parent_pid.as_u64()
+            );
+        }
+
         // Add the child to the parent's children list
         if let Some(parent) = self.processes.get_mut(&parent_pid) {
             parent.children.push(child_pid);
@@ -890,17 +1105,24 @@ impl ProcessManager {
             })?;
         */
 
-        // Load the fork_test ELF into the child (same program the parent is running)
+        // Load the parent's ELF into the child (use parent's name, not hardcoded fork_test)
+        // We need to save the parent name before the mutable borrow
+        let parent_binary_name = {
+            let parent = self.processes.get(&parent_pid).ok_or("Parent process not found")?;
+            parent.name.clone()
+        };
+
         #[cfg(feature = "testing")]
         {
-            let elf_buf = crate::userspace_test::get_test_binary("fork_test");
+            let elf_buf = crate::userspace_test::get_test_binary(&parent_binary_name);
             let loaded_elf =
                 crate::elf::load_elf_into_page_table(&elf_buf, child_page_table.as_mut())?;
 
             // Update the child process entry point to match the loaded ELF
             child_process.entry_point = loaded_elf.entry_point;
             log::info!(
-                "fork_process: Loaded fork_test.elf into child, entry point: {:#x}",
+                "fork_process: Loaded {}.elf into child, entry point: {:#x}",
+                parent_binary_name,
                 loaded_elf.entry_point
             );
         }
@@ -990,7 +1212,11 @@ impl ProcessManager {
             time_slice: parent_thread.time_slice,
             entry_point: None, // Userspace threads don't have kernel entry points
             privilege: parent_thread.privilege,
-            has_started: false, // New thread hasn't run yet
+            // CRITICAL: Set has_started=true for forked children so they use the
+            // restore_userspace_context path which preserves the cloned register values
+            // instead of the first-run path which zeros all registers.
+            // The child should resume from the same context as the parent.
+            has_started: true,
         };
 
         // CRITICAL: Use the userspace RSP if provided (from syscall frame)
@@ -1029,6 +1255,17 @@ impl ProcessManager {
         )?;
         super::fork::copy_process_state(parent, &mut child_process)?;
 
+        // Clone the file descriptor table from parent to child
+        // This is critical for pipes, sockets, and other shared resources
+        // The FdTable::clone() implementation handles incrementing reference counts
+        // for pipes and other shared resources
+        child_process.fd_table = parent.fd_table.clone();
+        log::debug!(
+            "fork: Cloned fd_table from parent {} to child {}",
+            parent_pid.as_u64(),
+            child_pid.as_u64()
+        );
+
         // Set the child thread as the main thread of the child process
         child_process.set_main_thread(child_thread);
 
diff --git a/kernel/src/process/process.rs b/kernel/src/process/process.rs
index aead5ee..f42af28 100644
--- a/kernel/src/process/process.rs
+++ b/kernel/src/process/process.rs
@@ -160,11 +160,51 @@ impl Process {
     }
 
     /// Terminate the process
+    ///
+    /// This sets the process state to Terminated and closes all file descriptors
+    /// to properly release resources (e.g., decrement pipe reader/writer counts).
     pub fn terminate(&mut self, exit_code: i32) {
+        // Close all file descriptors before setting state to Terminated
+        // This ensures pipe counts are properly decremented so readers get EOF
+        self.close_all_fds();
+
         self.state = ProcessState::Terminated(exit_code);
         self.exit_code = Some(exit_code);
     }
 
+    /// Close all file descriptors in this process
+    ///
+    /// This properly decrements pipe reader/writer counts, ensuring that
+    /// when all writers close, readers get EOF instead of EAGAIN.
+    fn close_all_fds(&mut self) {
+        use crate::ipc::FdKind;
+
+        log::debug!("Process::close_all_fds() for process '{}'", self.name);
+
+        // Close each fd, which will decrement pipe counts
+        for fd in 0..crate::ipc::MAX_FDS {
+            if let Ok(fd_entry) = self.fd_table.close(fd as i32) {
+                match fd_entry.kind {
+                    FdKind::PipeRead(buffer) => {
+                        buffer.lock().close_read();
+                        log::debug!("Process::close_all_fds() - closed pipe read fd {}", fd);
+                    }
+                    FdKind::PipeWrite(buffer) => {
+                        buffer.lock().close_write();
+                        log::debug!("Process::close_all_fds() - closed pipe write fd {}", fd);
+                    }
+                    FdKind::UdpSocket(_) => {
+                        // Socket cleanup handled by UdpSocket::Drop when Arc refcount reaches 0
+                        log::debug!("Process::close_all_fds() - released UDP socket fd {}", fd);
+                    }
+                    FdKind::StdIo(_) => {
+                        // StdIo doesn't need cleanup
+                    }
+                }
+            }
+        }
+    }
+
     /// Check if process is terminated
     pub fn is_terminated(&self) -> bool {
         matches!(self.state, ProcessState::Terminated(_))
diff --git a/kernel/src/syscall/dispatcher.rs b/kernel/src/syscall/dispatcher.rs
index fd99a14..47bc256 100644
--- a/kernel/src/syscall/dispatcher.rs
+++ b/kernel/src/syscall/dispatcher.rs
@@ -55,5 +55,6 @@ pub fn dispatch_syscall(
         SyscallNumber::RecvFrom => super::socket::sys_recvfrom(arg1, arg2, arg3, arg4, arg5, arg6),
         SyscallNumber::Pipe => super::pipe::sys_pipe(arg1),
         SyscallNumber::Close => super::pipe::sys_close(arg1 as i32),
+        SyscallNumber::Dup2 => handlers::sys_dup2(arg1, arg2),
     }
 }
diff --git a/kernel/src/syscall/handler.rs b/kernel/src/syscall/handler.rs
index 2791e75..63cef0a 100644
--- a/kernel/src/syscall/handler.rs
+++ b/kernel/src/syscall/handler.rs
@@ -175,6 +175,7 @@ pub extern "C" fn rust_syscall_handler(frame: &mut SyscallFrame) {
         }
         Some(SyscallNumber::Pipe) => super::pipe::sys_pipe(args.0),
         Some(SyscallNumber::Close) => super::pipe::sys_close(args.0 as i32),
+        Some(SyscallNumber::Dup2) => super::handlers::sys_dup2(args.0, args.1),
         None => {
             log::warn!("Unknown syscall number: {} - returning ENOSYS", syscall_num);
             SyscallResult::Err(super::ErrorCode::NoSys as u64)
diff --git a/kernel/src/syscall/handlers.rs b/kernel/src/syscall/handlers.rs
index 8754398..cf0e2cf 100644
--- a/kernel/src/syscall/handlers.rs
+++ b/kernel/src/syscall/handlers.rs
@@ -391,19 +391,40 @@ pub fn sys_get_time() -> SyscallResult {
 /// sys_fork - Basic fork implementation
 /// sys_fork with syscall frame - provides access to actual userspace context
 pub fn sys_fork_with_frame(frame: &super::handler::SyscallFrame) -> SyscallResult {
-    // Store the userspace RSP for the child to inherit
-    let userspace_rsp = frame.rsp;
-    log::info!("sys_fork_with_frame: userspace RSP = {:#x}", userspace_rsp);
+    // Create a CpuContext from the syscall frame - this captures the ACTUAL register
+    // values at the time of the syscall, not the stale values from the last context switch
+    let parent_context = crate::task::thread::CpuContext::from_syscall_frame(frame);
 
-    // Call fork with the userspace context
-    sys_fork_with_rsp(userspace_rsp)
+    log::info!(
+        "sys_fork_with_frame: userspace RSP = {:#x}, return RIP = {:#x}",
+        parent_context.rsp,
+        parent_context.rip
+    );
+
+    // Debug: log some callee-saved registers that might hold local variables
+    log::debug!(
+        "sys_fork_with_frame: rbx={:#x}, rbp={:#x}, r12={:#x}, r13={:#x}, r14={:#x}, r15={:#x}",
+        parent_context.rbx,
+        parent_context.rbp,
+        parent_context.r12,
+        parent_context.r13,
+        parent_context.r14,
+        parent_context.r15
+    );
+
+    // Call fork with the complete parent context
+    sys_fork_with_parent_context(parent_context)
 }
 
-/// sys_fork with explicit RSP - used by sys_fork_with_frame
-fn sys_fork_with_rsp(userspace_rsp: u64) -> SyscallResult {
+/// sys_fork with full parent context - captures all registers from syscall frame
+fn sys_fork_with_parent_context(parent_context: crate::task::thread::CpuContext) -> SyscallResult {
     // Disable interrupts for the entire fork operation to ensure atomicity
     x86_64::instructions::interrupts::without_interrupts(|| {
-        log::info!("sys_fork_with_rsp called with RSP {:#x}", userspace_rsp);
+        log::info!(
+            "sys_fork_with_parent_context called with RSP {:#x}, RIP {:#x}",
+            parent_context.rsp,
+            parent_context.rip
+        );
 
         // Get current thread ID from scheduler
         let scheduler_thread_id = crate::task::scheduler::current_thread_id();
@@ -464,12 +485,7 @@ fn sys_fork_with_rsp(userspace_rsp: u64) -> SyscallResult {
         // Now re-acquire the lock and complete the fork
         let mut manager_guard = crate::process::manager();
         if let Some(ref mut manager) = *manager_guard {
-            let rsp_option = if userspace_rsp != 0 {
-                Some(userspace_rsp)
-            } else {
-                None
-            };
-            match manager.fork_process_with_page_table(parent_pid, rsp_option, child_page_table) {
+            match manager.fork_process_with_parent_context(parent_pid, parent_context, child_page_table) {
                 Ok(child_pid) => {
                     // Get the child's thread ID to add to scheduler
                     if let Some(child_process) = manager.get_process(child_pid) {
@@ -515,8 +531,11 @@ fn sys_fork_with_rsp(userspace_rsp: u64) -> SyscallResult {
 }
 
 pub fn sys_fork() -> SyscallResult {
-    // Call fork without userspace context (use calculated RSP)
-    sys_fork_with_rsp(0)
+    // DEPRECATED: This function should not be used - use sys_fork_with_frame instead
+    // to get the actual register values at syscall time.
+    log::error!("sys_fork() called without frame - this path is deprecated and broken!");
+    log::error!("The syscall handler should use sys_fork_with_frame() to capture registers correctly.");
+    SyscallResult::Err(22) // EINVAL - invalid argument
 }
 
 /// sys_exec - Replace the current process with a new program
@@ -708,3 +727,55 @@ pub fn sys_gettid() -> SyscallResult {
     log::error!("sys_gettid: No current thread");
     SyscallResult::Ok(0) // Return 0 as fallback
 }
+
+/// sys_dup2 - Duplicate a file descriptor to a specific number
+///
+/// dup2(old_fd, new_fd) creates a copy of old_fd using the file descriptor
+/// number specified in new_fd. If new_fd was previously open, it is silently
+/// closed before being reused.
+///
+/// Per POSIX: if old_fd == new_fd, dup2 just validates old_fd and returns it.
+/// This avoids a race condition where the reference count would temporarily
+/// go to zero.
+///
+/// Returns: new_fd on success, negative error code on failure
+pub fn sys_dup2(old_fd: u64, new_fd: u64) -> SyscallResult {
+    log::debug!("sys_dup2: old_fd={}, new_fd={}", old_fd, new_fd);
+
+    // Get current thread to find process
+    let thread_id = match crate::task::scheduler::current_thread_id() {
+        Some(id) => id,
+        None => {
+            log::error!("sys_dup2: No current thread");
+            return SyscallResult::Err(9); // EBADF
+        }
+    };
+
+    // Get mutable access to process manager
+    let mut manager_guard = crate::process::manager();
+    let process = match &mut *manager_guard {
+        Some(manager) => match manager.find_process_by_thread_mut(thread_id) {
+            Some((_pid, p)) => p,
+            None => {
+                log::error!("sys_dup2: Thread {} not in any process", thread_id);
+                return SyscallResult::Err(9); // EBADF
+            }
+        },
+        None => {
+            log::error!("sys_dup2: No process manager");
+            return SyscallResult::Err(9); // EBADF
+        }
+    };
+
+    // Call the fd_table's dup2 implementation
+    match process.fd_table.dup2(old_fd as i32, new_fd as i32) {
+        Ok(fd) => {
+            log::debug!("sys_dup2: Successfully duplicated fd {} to {}", old_fd, fd);
+            SyscallResult::Ok(fd as u64)
+        }
+        Err(e) => {
+            log::debug!("sys_dup2: Failed with error {}", e);
+            SyscallResult::Err(e as u64)
+        }
+    }
+}
diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs
index c5ba2cf..352f48a 100644
--- a/kernel/src/syscall/mod.rs
+++ b/kernel/src/syscall/mod.rs
@@ -37,6 +37,7 @@ pub enum SyscallNumber {
     Sigprocmask = 14,   // Linux syscall number for rt_sigprocmask
     Sigreturn = 15,     // Linux syscall number for rt_sigreturn
     Pipe = 22,          // Linux syscall number for pipe
+    Dup2 = 33,          // Linux syscall number for dup2
     GetPid = 39,        // Linux syscall number for getpid
     Socket = 41,        // Linux syscall number for socket
     SendTo = 44,        // Linux syscall number for sendto
@@ -68,6 +69,7 @@ impl SyscallNumber {
             14 => Some(Self::Sigprocmask),
             15 => Some(Self::Sigreturn),
             22 => Some(Self::Pipe),
+            33 => Some(Self::Dup2),
             39 => Some(Self::GetPid),
             41 => Some(Self::Socket),
             44 => Some(Self::SendTo),
diff --git a/kernel/src/syscall/pipe.rs b/kernel/src/syscall/pipe.rs
index 4a9c975..346a24f 100644
--- a/kernel/src/syscall/pipe.rs
+++ b/kernel/src/syscall/pipe.rs
@@ -116,9 +116,9 @@ pub fn sys_close(fd: i32) -> SyscallResult {
         }
     };
     let mut manager_guard = manager();
-    let process = match &mut *manager_guard {
+    let (process_pid, process) = match &mut *manager_guard {
         Some(manager) => match manager.find_process_by_thread_mut(thread_id) {
-            Some((_pid, p)) => p,
+            Some((pid, p)) => (pid, p),
             None => {
                 log::error!("sys_close: Process not found for thread {}", thread_id);
                 return SyscallResult::Err(3); // ESRCH
@@ -129,6 +129,13 @@ pub fn sys_close(fd: i32) -> SyscallResult {
             return SyscallResult::Err(3); // ESRCH
         }
     };
+    log::debug!(
+        "sys_close: thread {} -> process {} '{}', closing fd={}",
+        thread_id,
+        process_pid.as_u64(),
+        process.name,
+        fd
+    );
 
     // Close the file descriptor
     match process.fd_table.close(fd) {
@@ -158,7 +165,20 @@ pub fn sys_close(fd: i32) -> SyscallResult {
             SyscallResult::Ok(0)
         }
         Err(e) => {
-            log::error!("sys_close: Failed to close fd={}: error {}", fd, e);
+            log::error!(
+                "sys_close: Failed to close fd={} for process {} '{}' (thread {}): error {}",
+                fd,
+                process_pid.as_u64(),
+                process.name,
+                thread_id,
+                e
+            );
+            // Log what fds ARE present
+            for i in 0..10 {
+                if let Some(fd_entry) = process.fd_table.get(i) {
+                    log::debug!("  fd_table[{}] = {:?}", i, fd_entry.kind);
+                }
+            }
             SyscallResult::Err(e as u64)
         }
     }
diff --git a/kernel/src/task/scheduler.rs b/kernel/src/task/scheduler.rs
index 2a459a4..7c37595 100644
--- a/kernel/src/task/scheduler.rs
+++ b/kernel/src/task/scheduler.rs
@@ -432,6 +432,12 @@ pub fn yield_current() {
 // NOTE: get_pending_switch() was removed because it called schedule() which mutates
 // self.current_thread. Calling it "just to peek" would corrupt scheduler state.
 // If needed in future, implement a true peek function that doesn't mutate state.
+//
+// ARCHITECTURAL CONSTRAINT: Never add a function that calls schedule() "just to look"
+// at what would happen. The schedule() function MUST only be called when an actual
+// context switch will follow immediately. Violating this invariant will desync
+// scheduler.current_thread from reality, causing register corruption in child processes.
+// See commit f59bccd for the full bug investigation.
 
 /// Allocate a new thread ID
 #[allow(dead_code)]
@@ -476,3 +482,87 @@ pub fn switch_to_idle() {
         log::info!("Exception handler: Switched scheduler to idle thread {}", idle_id);
     });
 }
+
+/// Test module for scheduler state invariants
+#[cfg(test)]
+pub mod tests {
+    use super::*;
+
+    /// Test that yield_current() does NOT modify scheduler.current_thread.
+    ///
+    /// This test validates the fix for the bug where yield_current() called schedule(),
+    /// which updated self.current_thread without an actual context switch occurring.
+    /// This caused scheduler state to desync from reality, corrupting child process
+    /// register state during fork.
+    ///
+    /// The fix changed yield_current() to only set the need_resched flag, deferring
+    /// the actual scheduling decision to the next interrupt return.
+    pub fn test_yield_current_does_not_modify_scheduler_state() {
+        log::info!("=== TEST: yield_current() scheduler state invariant ===");
+
+        // Capture the current thread ID before yield
+        let thread_id_before = current_thread_id();
+        log::info!("Thread ID before yield_current(): {:?}", thread_id_before);
+
+        // Call yield_current() - this should ONLY set need_resched flag
+        yield_current();
+
+        // Capture the current thread ID after yield
+        let thread_id_after = current_thread_id();
+        log::info!("Thread ID after yield_current(): {:?}", thread_id_after);
+
+        // CRITICAL ASSERTION: current_thread should NOT have changed
+        // If this fails, it means yield_current() is calling schedule() which
+        // would cause the register corruption bug to return.
+        assert_eq!(
+            thread_id_before, thread_id_after,
+            "BUG: yield_current() modified scheduler.current_thread! \
+             This will cause fork to corrupt child registers. \
+             yield_current() must ONLY set need_resched flag, not call schedule()."
+        );
+
+        // Verify that need_resched was set
+        let need_resched = crate::per_cpu::need_resched();
+        assert!(
+            need_resched,
+            "yield_current() should have set the need_resched flag"
+        );
+
+        // Clean up: clear the need_resched flag to avoid affecting other tests
+        crate::per_cpu::set_need_resched(false);
+
+        log::info!("=== TEST PASSED: yield_current() correctly preserves scheduler state ===");
+    }
+}
+
+/// Public wrapper for running scheduler tests (callable from kernel main)
+/// This is intentionally available but not automatically called - it can be
+/// invoked manually during debugging to verify scheduler invariants.
+#[allow(dead_code)]
+pub fn run_scheduler_tests() {
+    #[cfg(test)]
+    {
+        tests::test_yield_current_does_not_modify_scheduler_state();
+    }
+    #[cfg(not(test))]
+    {
+        // In non-test builds, run a simplified version that doesn't use assert
+        log::info!("=== Scheduler invariant check (non-test mode) ===");
+
+        let thread_id_before = current_thread_id();
+        yield_current();
+        let thread_id_after = current_thread_id();
+
+        if thread_id_before != thread_id_after {
+            log::error!(
+                "SCHEDULER BUG: yield_current() changed current_thread from {:?} to {:?}!",
+                thread_id_before, thread_id_after
+            );
+        } else {
+            log::info!("Scheduler invariant check passed: yield_current() preserves state");
+        }
+
+        // Clean up
+        crate::per_cpu::set_need_resched(false);
+    }
+}
diff --git a/kernel/src/task/thread.rs b/kernel/src/task/thread.rs
index 135df58..04d19fd 100644
--- a/kernel/src/task/thread.rs
+++ b/kernel/src/task/thread.rs
@@ -71,6 +71,32 @@ pub struct CpuContext {
 }
 
 impl CpuContext {
+    /// Create a CpuContext from a syscall frame (captures actual register values at syscall time)
+    pub fn from_syscall_frame(frame: &crate::syscall::handler::SyscallFrame) -> Self {
+        Self {
+            rax: frame.rax,
+            rbx: frame.rbx,
+            rcx: frame.rcx,
+            rdx: frame.rdx,
+            rsi: frame.rsi,
+            rdi: frame.rdi,
+            rbp: frame.rbp,
+            rsp: frame.rsp,
+            r8: frame.r8,
+            r9: frame.r9,
+            r10: frame.r10,
+            r11: frame.r11,
+            r12: frame.r12,
+            r13: frame.r13,
+            r14: frame.r14,
+            r15: frame.r15,
+            rip: frame.rip,
+            rflags: frame.rflags,
+            cs: frame.cs,
+            ss: frame.ss,
+        }
+    }
+
     /// Create a new CPU context for a thread entry point
     pub fn new(entry_point: VirtAddr, stack_pointer: VirtAddr, privilege: ThreadPrivilege) -> Self {
         Self {
diff --git a/kernel/src/test_exec.rs b/kernel/src/test_exec.rs
index 1aaf28f..21b60be 100644
--- a/kernel/src/test_exec.rs
+++ b/kernel/src/test_exec.rs
@@ -1050,7 +1050,7 @@ pub fn test_pipe() {
         Ok(pid) => {
             log::info!("Created pipe_test process with PID {:?}", pid);
             log::info!("Pipe test: process scheduled for execution.");
-            log::info!("    -> Should print 'PIPE_TEST_PASSED' if pipe operations succeed");
+            log::info!("    -> Emits pass marker on success (PIPE_TEST_...)");
         }
         Err(e) => {
             log::error!("Failed to create pipe_test process: {}", e);
@@ -1058,3 +1058,73 @@ pub fn test_pipe() {
         }
     }
 }
+
+/// Test pipe + fork concurrency
+///
+/// TWO-STAGE VALIDATION PATTERN:
+/// - Stage 1 (Checkpoint): Process creation
+///   - Marker: "Pipe+fork test: process scheduled for execution"
+///   - This is a CHECKPOINT confirming process creation succeeded
+/// - Stage 2 (Boot stage): Validates pipe operations across fork boundary
+///   - Marker: "PIPE_FORK_TEST_PASSED"
+///   - This PROVES pipes work correctly across fork, with proper IPC and EOF handling
+pub fn test_pipe_fork() {
+    log::info!("Testing pipe+fork concurrency");
+
+    #[cfg(feature = "testing")]
+    let pipe_fork_test_elf_buf = crate::userspace_test::get_test_binary("pipe_fork_test");
+    #[cfg(feature = "testing")]
+    let pipe_fork_test_elf: &[u8] = &pipe_fork_test_elf_buf;
+    #[cfg(not(feature = "testing"))]
+    let pipe_fork_test_elf = &create_hello_world_elf();
+
+    match crate::process::creation::create_user_process(
+        String::from("pipe_fork_test"),
+        pipe_fork_test_elf,
+    ) {
+        Ok(pid) => {
+            log::info!("Created pipe_fork_test process with PID {:?}", pid);
+            log::info!("Pipe+fork test: process scheduled for execution.");
+            log::info!("    -> Emits pass marker on success (PIPE_FORK_...)");
+        }
+        Err(e) => {
+            log::error!("Failed to create pipe_fork_test process: {}", e);
+            log::error!("Pipe+fork test cannot run without valid userspace process");
+        }
+    }
+}
+
+/// Test concurrent pipe writes from multiple processes
+///
+/// TWO-STAGE VALIDATION PATTERN:
+/// - Stage 1 (Checkpoint): Process creation
+///   - Marker: "Pipe concurrent test: process scheduled for execution"
+///   - This is a CHECKPOINT confirming process creation succeeded
+/// - Stage 2 (Boot stage): Validates concurrent pipe operations
+///   - Marker: "PIPE_CONCURRENT_TEST_PASSED"
+///   - This PROVES the pipe buffer handles concurrent writes correctly under Arc<Mutex<PipeBuffer>>
+pub fn test_pipe_concurrent() {
+    log::info!("Testing concurrent pipe writes from multiple processes");
+
+    #[cfg(feature = "testing")]
+    let pipe_concurrent_test_elf_buf = crate::userspace_test::get_test_binary("pipe_concurrent_test");
+    #[cfg(feature = "testing")]
+    let pipe_concurrent_test_elf: &[u8] = &pipe_concurrent_test_elf_buf;
+    #[cfg(not(feature = "testing"))]
+    let pipe_concurrent_test_elf = &create_hello_world_elf();
+
+    match crate::process::creation::create_user_process(
+        String::from("pipe_concurrent_test"),
+        pipe_concurrent_test_elf,
+    ) {
+        Ok(pid) => {
+            log::info!("Created pipe_concurrent_test process with PID {:?}", pid);
+            log::info!("Pipe concurrent test: process scheduled for execution.");
+            log::info!("    -> Emits pass marker on success (PIPE_CONCURRENT_...)");
+        }
+        Err(e) => {
+            log::error!("Failed to create pipe_concurrent_test process: {}", e);
+            log::error!("Pipe concurrent test cannot run without valid userspace process");
+        }
+    }
+}
diff --git a/userspace/tests/Cargo.toml b/userspace/tests/Cargo.toml
index 5549a02..0ec72fd 100644
--- a/userspace/tests/Cargo.toml
+++ b/userspace/tests/Cargo.toml
@@ -78,6 +78,18 @@ path = "udp_socket_test.rs"
 name = "pipe_test"
 path = "pipe_test.rs"
 
+[[bin]]
+name = "pipe_fork_test"
+path = "pipe_fork_test.rs"
+
+[[bin]]
+name = "pipe_concurrent_test"
+path = "pipe_concurrent_test.rs"
+
+[[bin]]
+name = "pipe_refcount_test"
+path = "pipe_refcount_test.rs"
+
 [profile.release]
 panic = "abort"
 lto = true
diff --git a/userspace/tests/build.sh b/userspace/tests/build.sh
index 4997c3a..2097034 100755
--- a/userspace/tests/build.sh
+++ b/userspace/tests/build.sh
@@ -48,6 +48,9 @@ BINARIES=(
     "signal_regs_test"
     "udp_socket_test"
     "pipe_test"
+    "pipe_fork_test"
+    "pipe_concurrent_test"
+    "pipe_refcount_test"
 )
 
 echo "Building ${#BINARIES[@]} userspace binaries with libbreenix..."
diff --git a/userspace/tests/pipe_concurrent_test.rs b/userspace/tests/pipe_concurrent_test.rs
new file mode 100644
index 0000000..f5b02a1
--- /dev/null
+++ b/userspace/tests/pipe_concurrent_test.rs
@@ -0,0 +1,348 @@
+//! Concurrent pipe test program
+//!
+//! Tests concurrent access to the pipe buffer from multiple processes.
+//! This stress-tests the Arc<Mutex<PipeBuffer>> under concurrent writes.
+
+#![no_std]
+#![no_main]
+
+use core::panic::PanicInfo;
+use libbreenix::io;
+use libbreenix::process;
+use libbreenix::types::fd;
+
+/// Number of concurrent writer processes
+const NUM_WRITERS: usize = 4;
+
+/// Number of messages each writer sends
+const MESSAGES_PER_WRITER: usize = 3;
+
+/// Message size in bytes (including marker and newline)
+const MESSAGE_SIZE: usize = 32;
+
+/// Buffer for number to string conversion
+static mut BUFFER: [u8; 64] = [0; 64];
+
+/// Convert number to string and write it
+unsafe fn write_num(n: u64) {
+    let mut num = n;
+    let mut i = 0;
+
+    if num == 0 {
+        BUFFER[0] = b'0';
+        i = 1;
+    } else {
+        while num > 0 {
+            BUFFER[i] = b'0' + (num % 10) as u8;
+            num /= 10;
+            i += 1;
+        }
+        // Reverse the digits
+        for j in 0..i / 2 {
+            let tmp = BUFFER[j];
+            BUFFER[j] = BUFFER[i - j - 1];
+            BUFFER[i - j - 1] = tmp;
+        }
+    }
+
+    io::write(fd::STDOUT, &BUFFER[..i]);
+}
+
+/// Print a number with prefix
+unsafe fn print_number(prefix: &str, num: u64) {
+    io::print(prefix);
+    write_num(num);
+    io::print("\n");
+}
+
+/// Helper to fail with an error message
+fn fail(msg: &str) -> ! {
+    io::print("PIPE_CONCURRENT: FAIL - ");
+    io::print(msg);
+    io::print("\n");
+    process::exit(1);
+}
+
+/// Helper to yield CPU multiple times
+fn yield_cpu() {
+    for _ in 0..10 {
+        process::yield_now();
+    }
+}
+
+/// Child writer process
+unsafe fn child_writer(writer_id: u64, write_fd: i32) -> ! {
+    io::print("  Writer ");
+    write_num(writer_id);
+    io::print(" started (PID ");
+    write_num(process::getpid());
+    io::print(")\n");
+
+    // Each writer sends MESSAGES_PER_WRITER messages
+    // Format: "W[id]M[msg]XXXXXXXXXXXXXXXX\n" (32 bytes total)
+    for msg_num in 0..MESSAGES_PER_WRITER {
+        // Build message: "W[id]M[msg]" followed by padding and newline
+        let mut msg_buf = [b'X'; MESSAGE_SIZE];
+        msg_buf[0] = b'W';
+        msg_buf[1] = b'0' + (writer_id as u8);
+        msg_buf[2] = b'M';
+        msg_buf[3] = b'0' + (msg_num as u8);
+        msg_buf[MESSAGE_SIZE - 1] = b'\n';
+
+        // Write to pipe
+        let ret = io::write(write_fd as u64, &msg_buf);
+        if ret < 0 {
+            io::print("  Writer ");
+            write_num(writer_id);
+            io::print(" FAILED to write message ");
+            write_num(msg_num as u64);
+            io::print("\n");
+            process::exit(1);
+        }
+
+        if ret != MESSAGE_SIZE as i64 {
+            io::print("  Writer ");
+            write_num(writer_id);
+            io::print(" wrote ");
+            write_num(ret as u64);
+            io::print(" bytes, expected ");
+            write_num(MESSAGE_SIZE as u64);
+            io::print("\n");
+            process::exit(1);
+        }
+
+        // Small yield to encourage interleaving
+        process::yield_now();
+    }
+
+    io::print("  Writer ");
+    write_num(writer_id);
+    io::print(" completed all messages\n");
+    process::exit(0);
+}
+
+/// Main entry point
+#[no_mangle]
+pub extern "C" fn _start() -> ! {
+    io::print("=== Concurrent Pipe Test Program ===\n");
+    io::print("Testing concurrent writes from multiple processes\n");
+    io::print("\n");
+
+    unsafe {
+        // Phase 1: Create pipe
+        io::print("Phase 1: Creating pipe...\n");
+        let mut pipefd: [i32; 2] = [0, 0];
+        let ret = io::pipe(&mut pipefd);
+
+        if ret < 0 {
+            print_number("  pipe() returned error: ", (-ret) as u64);
+            fail("pipe() failed");
+        }
+
+        print_number("  Read fd: ", pipefd[0] as u64);
+        print_number("  Write fd: ", pipefd[1] as u64);
+
+        // Validate fd numbers
+        if pipefd[0] < 3 || pipefd[1] < 3 {
+            fail("Pipe fds should be >= 3");
+        }
+        if pipefd[0] == pipefd[1] {
+            fail("Read and write fds should be different");
+        }
+
+        io::print("  Pipe created successfully\n\n");
+
+        // Phase 2: Fork multiple writer processes
+        io::print("Phase 2: Forking ");
+        write_num(NUM_WRITERS as u64);
+        io::print(" writer processes...\n");
+
+        let mut child_pids: [i64; NUM_WRITERS] = [0; NUM_WRITERS];
+
+        for i in 0..NUM_WRITERS {
+            let fork_result = process::fork();
+
+            if fork_result < 0 {
+                print_number("  fork() failed with error: ", (-fork_result) as u64);
+                fail("fork() failed");
+            }
+
+            if fork_result == 0 {
+                // Child process - close read end and become a writer
+                let close_ret = io::close(pipefd[0] as u64);
+                if close_ret < 0 {
+                    fail("child failed to close read fd");
+                }
+                child_writer(i as u64, pipefd[1]);
+                // Never returns
+            }
+
+            // Parent: record child PID
+            child_pids[i] = fork_result;
+            io::print("  Forked writer ");
+            write_num(i as u64);
+            io::print(" with PID ");
+            write_num(fork_result as u64);
+            io::print("\n");
+        }
+
+        io::print("  All writers forked\n\n");
+
+        // Phase 3: Parent closes write end and reads all data
+        io::print("Phase 3: Parent closing write end and reading data...\n");
+        let close_ret = io::close(pipefd[1] as u64);
+        if close_ret < 0 {
+            fail("parent failed to close write fd");
+        }
+        io::print("  Parent closed write fd\n");
+
+        // Read all messages
+        // Total expected: NUM_WRITERS * MESSAGES_PER_WRITER messages of MESSAGE_SIZE bytes each
+        let total_expected_bytes = (NUM_WRITERS * MESSAGES_PER_WRITER * MESSAGE_SIZE) as u64;
+        let mut total_read: u64 = 0;
+        let mut read_buf = [0u8; MESSAGE_SIZE];
+
+        // Track which messages we received from each writer
+        let mut writer_msg_counts = [0u32; NUM_WRITERS];
+
+        io::print("  Reading messages (expecting ");
+        write_num(total_expected_bytes);
+        io::print(" bytes)...\n");
+
+        // Maximum retries on EAGAIN before giving up
+        const MAX_EAGAIN_RETRIES: u32 = 1000;
+        let mut consecutive_eagain: u32 = 0;
+
+        loop {
+            let ret = io::read(pipefd[0] as u64, &mut read_buf);
+
+            if ret == -11 {
+                // EAGAIN - buffer empty but writers still exist
+                // Yield and retry
+                consecutive_eagain += 1;
+                if consecutive_eagain >= MAX_EAGAIN_RETRIES {
+                    io::print("  Too many EAGAIN retries, giving up\n");
+                    print_number("  Total bytes read so far: ", total_read);
+                    fail("read timed out waiting for data");
+                }
+                yield_cpu();
+                continue;
+            }
+
+            // Reset counter on successful operation
+            consecutive_eagain = 0;
+
+            if ret < 0 {
+                print_number("  read() failed with error: ", (-ret) as u64);
+                fail("read from pipe failed");
+            }
+
+            if ret == 0 {
+                // EOF - all writers have closed
+                io::print("  EOF reached\n");
+                break;
+            }
+
+            if ret != MESSAGE_SIZE as i64 {
+                io::print("  WARNING: Read ");
+                write_num(ret as u64);
+                io::print(" bytes, expected ");
+                write_num(MESSAGE_SIZE as u64);
+                io::print(" bytes per message\n");
+            }
+
+            total_read += ret as u64;
+
+            // Validate message format: "W[id]M[msg]..."
+            if read_buf[0] != b'W' {
+                io::print("  Invalid message format: first byte is not 'W'\n");
+                io::print("  Got: ");
+                io::write(fd::STDOUT, &read_buf[..ret as usize]);
+                io::print("\n");
+                fail("Invalid message format");
+            }
+
+            let writer_id = (read_buf[1] - b'0') as usize;
+            if writer_id >= NUM_WRITERS {
+                io::print("  Invalid writer ID: ");
+                write_num(writer_id as u64);
+                io::print("\n");
+                fail("Writer ID out of range");
+            }
+
+            if read_buf[2] != b'M' {
+                io::print("  Invalid message format: byte 2 is not 'M'\n");
+                fail("Invalid message format");
+            }
+
+            // Track this message
+            writer_msg_counts[writer_id] += 1;
+        }
+
+        io::print("\n");
+        print_number("  Total bytes read: ", total_read);
+        print_number("  Expected bytes: ", total_expected_bytes);
+
+        if total_read != total_expected_bytes {
+            fail("Did not read expected number of bytes");
+        }
+
+        io::print("  Byte count matches!\n\n");
+
+        // Phase 4: Verify each writer sent the correct number of messages
+        io::print("Phase 4: Verifying message distribution...\n");
+        let mut all_correct = true;
+
+        for i in 0..NUM_WRITERS {
+            io::print("  Writer ");
+            write_num(i as u64);
+            io::print(": ");
+            write_num(writer_msg_counts[i] as u64);
+            io::print(" messages (expected ");
+            write_num(MESSAGES_PER_WRITER as u64);
+            io::print(")\n");
+
+            if writer_msg_counts[i] != MESSAGES_PER_WRITER as u32 {
+                all_correct = false;
+            }
+        }
+
+        if !all_correct {
+            fail("Message distribution incorrect");
+        }
+
+        io::print("  Message distribution verified!\n\n");
+
+        // Phase 5: Close read end
+        io::print("Phase 5: Closing read fd...\n");
+        let close_ret = io::close(pipefd[0] as u64);
+        if close_ret < 0 {
+            fail("parent failed to close read fd");
+        }
+        io::print("  Read fd closed\n\n");
+
+        // All tests passed
+        io::print("===========================================\n");
+        io::print("PIPE_CONCURRENT: ALL TESTS PASSED\n");
+        io::print("===========================================\n");
+        io::print("Successfully tested concurrent pipe writes from ");
+        write_num(NUM_WRITERS as u64);
+        io::print(" processes\n");
+        io::print("Total messages: ");
+        write_num((NUM_WRITERS * MESSAGES_PER_WRITER) as u64);
+        io::print("\n");
+        io::print("Total bytes: ");
+        write_num(total_expected_bytes);
+        io::print("\n");
+        io::print("PIPE_CONCURRENT_TEST_PASSED\n");
+
+        process::exit(0);
+    }
+}
+
+/// Panic handler
+#[panic_handler]
+fn panic(_info: &PanicInfo) -> ! {
+    io::print("PANIC in pipe concurrent test!\n");
+    process::exit(255);
+}
diff --git a/userspace/tests/pipe_fork_test.rs b/userspace/tests/pipe_fork_test.rs
new file mode 100644
index 0000000..b8aaaac
--- /dev/null
+++ b/userspace/tests/pipe_fork_test.rs
@@ -0,0 +1,272 @@
+//! Fork + Pipe concurrency test program
+//!
+//! Tests that pipes work correctly across fork boundaries:
+//! - Pipe created before fork is shared between parent and child
+//! - Parent can write, child can read (and vice versa)
+//! - EOF detection when writer closes
+//! - Data integrity across process boundaries
+
+#![no_std]
+#![no_main]
+
+use core::panic::PanicInfo;
+use libbreenix::io;
+use libbreenix::process;
+use libbreenix::types::fd;
+
+/// Buffer for number to string conversion
+static mut BUFFER: [u8; 32] = [0; 32];
+
+/// Convert number to string and print it
+unsafe fn print_number(prefix: &str, num: u64) {
+    io::print(prefix);
+
+    let mut n = num;
+    let mut i = 0;
+
+    if n == 0 {
+        BUFFER[0] = b'0';
+        i = 1;
+    } else {
+        while n > 0 {
+            BUFFER[i] = b'0' + (n % 10) as u8;
+            n /= 10;
+            i += 1;
+        }
+        // Reverse the digits
+        for j in 0..i / 2 {
+            let tmp = BUFFER[j];
+            BUFFER[j] = BUFFER[i - j - 1];
+            BUFFER[i - j - 1] = tmp;
+        }
+    }
+
+    io::write(fd::STDOUT, &BUFFER[..i]);
+    io::print("\n");
+}
+
+/// Helper to exit with error message
+fn fail(msg: &str) -> ! {
+    io::print("PIPE_FORK_TEST: FAIL - ");
+    io::print(msg);
+    io::print("\n");
+    process::exit(1);
+}
+
+/// Helper to yield CPU
+fn yield_cpu() {
+    for _ in 0..10 {
+        process::yield_now();
+    }
+}
+
+/// Main entry point
+#[no_mangle]
+pub extern "C" fn _start() -> ! {
+    unsafe {
+        io::print("=== Pipe + Fork Concurrency Test ===\n");
+
+        // Phase 1: Create pipe before fork
+        io::print("Phase 1: Creating pipe...\n");
+        let mut pipefd: [i32; 2] = [0, 0];
+        let ret = io::pipe(&mut pipefd);
+
+        if ret < 0 {
+            print_number("  pipe() failed with error: ", (-ret) as u64);
+            fail("pipe creation failed");
+        }
+
+        let read_fd = pipefd[0] as u64;
+        let write_fd = pipefd[1] as u64;
+
+        print_number("  Read fd: ", read_fd);
+        print_number("  Write fd: ", write_fd);
+
+        // Validate FD numbers
+        if read_fd < 3 || write_fd < 3 {
+            fail("Pipe fds should be >= 3");
+        }
+        if read_fd == write_fd {
+            fail("Read and write fds should be different");
+        }
+
+        io::print("  Pipe created successfully\n");
+
+        // Phase 2: Fork to create parent/child
+        io::print("Phase 2: Forking process...\n");
+        let fork_result = process::fork();
+
+        if fork_result < 0 {
+            print_number("  fork() failed with error: ", (-fork_result) as u64);
+            fail("fork failed");
+        }
+
+        if fork_result == 0 {
+            // ========== CHILD PROCESS ==========
+            io::print("\n[CHILD] Process started\n");
+            print_number("[CHILD] PID: ", process::getpid());
+
+            // Phase 3a: Child closes write end (won't be writing)
+            io::print("[CHILD] Phase 3a: Closing write end of pipe...\n");
+            let ret = io::close(write_fd);
+            if ret < 0 {
+                print_number("[CHILD] close(write_fd) failed: ", (-ret) as u64);
+                fail("child close write_fd failed");
+            }
+            io::print("[CHILD] Write end closed\n");
+
+            // Phase 4a: Child reads message from parent
+            io::print("[CHILD] Phase 4a: Reading message from parent...\n");
+            let mut read_buf = [0u8; 64];
+
+            // Read with retry on EAGAIN (error 11 = would block)
+            // The pipe is non-blocking, so we need to poll
+            let mut bytes_read: i64 = -11; // Start with EAGAIN
+            let mut retries = 0;
+            while bytes_read == -11 && retries < 100 {
+                bytes_read = io::read(read_fd, &mut read_buf);
+                if bytes_read == -11 {
+                    // EAGAIN - yield and retry
+                    yield_cpu();
+                    retries += 1;
+                }
+            }
+
+            if bytes_read < 0 {
+                print_number("[CHILD] read() failed after retries: ", (-bytes_read) as u64);
+                fail("child read failed");
+            }
+
+            print_number("[CHILD] Read bytes: ", bytes_read as u64);
+
+            // Verify message content
+            let expected = b"Hello from parent!";
+            if bytes_read != expected.len() as i64 {
+                print_number("[CHILD] Expected bytes: ", expected.len() as u64);
+                print_number("[CHILD] Got bytes: ", bytes_read as u64);
+                fail("child read wrong number of bytes");
+            }
+
+            let read_slice = &read_buf[..bytes_read as usize];
+            if read_slice != expected {
+                io::print("[CHILD] Data mismatch!\n");
+                io::print("[CHILD] Expected: ");
+                io::write(fd::STDOUT, expected);
+                io::print("\n[CHILD] Got: ");
+                io::write(fd::STDOUT, read_slice);
+                io::print("\n");
+                fail("child data verification failed");
+            }
+
+            io::print("[CHILD] Received: '");
+            io::write(fd::STDOUT, read_slice);
+            io::print("'\n");
+
+            // Phase 5a: Test EOF detection - read again should get 0 (EOF)
+            // The parent needs time to close its write end. Since yield() only sets a flag
+            // and doesn't guarantee an immediate context switch (the actual switch happens
+            // on timer interrupt), we need to retry on EAGAIN until we get EOF.
+            io::print("[CHILD] Phase 5a: Testing EOF detection...\n");
+
+            let mut eof_read: i64 = -11; // Start with EAGAIN
+            let mut eof_retries = 0;
+            while eof_read == -11 && eof_retries < 100 {
+                eof_read = io::read(read_fd, &mut read_buf);
+                if eof_read == -11 {
+                    // EAGAIN - parent hasn't closed write end yet, yield and retry
+                    yield_cpu();
+                    eof_retries += 1;
+                }
+            }
+
+            if eof_read != 0 {
+                print_number("[CHILD] Expected EOF (0), got: ", eof_read as u64);
+                print_number("[CHILD] Retries: ", eof_retries as u64);
+                fail("child EOF detection failed");
+            }
+            io::print("[CHILD] EOF detected correctly (read returned 0)\n");
+            print_number("[CHILD] EOF detected after retries: ", eof_retries as u64);
+
+            // Phase 6a: Close read end
+            io::print("[CHILD] Phase 6a: Closing read end...\n");
+            let ret = io::close(read_fd);
+            if ret < 0 {
+                print_number("[CHILD] close(read_fd) failed: ", (-ret) as u64);
+                fail("child close read_fd failed");
+            }
+
+            io::print("[CHILD] All tests passed!\n");
+            io::print("[CHILD] Exiting with code 0\n");
+            process::exit(0);
+
+        } else {
+            // ========== PARENT PROCESS ==========
+            io::print("\n[PARENT] Process continuing\n");
+            print_number("[PARENT] PID: ", process::getpid());
+            print_number("[PARENT] Child PID: ", fork_result as u64);
+
+            // Phase 3b: Parent closes read end (won't be reading)
+            io::print("[PARENT] Phase 3b: Closing read end of pipe...\n");
+            let ret = io::close(read_fd);
+            if ret < 0 {
+                print_number("[PARENT] close(read_fd) failed: ", (-ret) as u64);
+                fail("parent close read_fd failed");
+            }
+            io::print("[PARENT] Read end closed\n");
+
+            // Phase 4b: Parent writes message to child
+            io::print("[PARENT] Phase 4b: Writing message to child...\n");
+            let message = b"Hello from parent!";
+            let bytes_written = io::write(write_fd, message);
+
+            if bytes_written < 0 {
+                print_number("[PARENT] write() failed: ", (-bytes_written) as u64);
+                fail("parent write failed");
+            }
+
+            print_number("[PARENT] Wrote bytes: ", bytes_written as u64);
+
+            if bytes_written != message.len() as i64 {
+                print_number("[PARENT] Expected to write: ", message.len() as u64);
+                print_number("[PARENT] Actually wrote: ", bytes_written as u64);
+                fail("parent write incomplete");
+            }
+
+            io::print("[PARENT] Message sent successfully\n");
+
+            // Phase 5b: Close write end to signal EOF to child
+            io::print("[PARENT] Phase 5b: Closing write end to signal EOF...\n");
+            let ret = io::close(write_fd);
+            if ret < 0 {
+                print_number("[PARENT] close(write_fd) failed: ", (-ret) as u64);
+                fail("parent close write_fd failed");
+            }
+            io::print("[PARENT] Write end closed (EOF sent to child)\n");
+
+            // Phase 6b: Wait a bit for child to complete
+            io::print("[PARENT] Phase 6b: Waiting for child to complete...\n");
+            for i in 0..10 {
+                yield_cpu();
+                if i % 3 == 0 {
+                    io::print("[PARENT] .\n");
+                }
+            }
+
+            io::print("\n[PARENT] All tests completed successfully!\n");
+
+            // Emit boot stage markers
+            io::print("PIPE_FORK_TEST: ALL TESTS PASSED\n");
+            io::print("PIPE_FORK_TEST_PASSED\n");
+
+            io::print("[PARENT] Exiting with code 0\n");
+            process::exit(0);
+        }
+    }
+}
+
+/// Panic handler
+#[panic_handler]
+fn panic(_info: &PanicInfo) -> ! {
+    io::print("PANIC in pipe_fork_test!\n");
+    process::exit(255);
+}
diff --git a/userspace/tests/pipe_refcount_test.rs b/userspace/tests/pipe_refcount_test.rs
new file mode 100644
index 0000000..28e05c4
--- /dev/null
+++ b/userspace/tests/pipe_refcount_test.rs
@@ -0,0 +1,676 @@
+//! Pipe Reference Counting Stress Test
+//!
+//! Tests pipe reference counting behavior with close and dup2 operations.
+//! This validates that pipes correctly track reader/writer counts and
+//! handle EOF/EPIPE conditions appropriately.
+//!
+//! Tests include:
+//! - Basic close behavior (Tests 1-6)
+//! - dup2 reference counting (Tests 7-10): verifies that duplicated fds
+//!   correctly increment/decrement ref counts and that dup2(fd, fd) is a no-op
+
+#![no_std]
+#![no_main]
+
+use core::panic::PanicInfo;
+
+// System call numbers
+const SYS_EXIT: u64 = 0;
+const SYS_WRITE: u64 = 1;
+const SYS_READ: u64 = 2;
+const SYS_CLOSE: u64 = 6;
+const SYS_PIPE: u64 = 22;
+const SYS_DUP2: u64 = 33;  // Linux standard dup2 syscall number
+
+// Error codes
+const EPIPE: i64 = -32;  // Broken pipe (write with no readers)
+const EBADF: i64 = -9;   // Bad file descriptor
+
+// Syscall wrappers
+#[inline(always)]
+unsafe fn syscall1(n: u64, arg1: u64) -> u64 {
+    let ret: u64;
+    core::arch::asm!(
+        "int 0x80",
+        inlateout("rax") n => ret,
+        inlateout("rdi") arg1 => _,
+        out("rcx") _,
+        out("rdx") _,
+        out("rsi") _,
+        out("r8") _,
+        out("r9") _,
+        out("r10") _,
+        out("r11") _,
+    );
+    ret
+}
+
+#[inline(always)]
+unsafe fn syscall2(n: u64, arg1: u64, arg2: u64) -> u64 {
+    let ret: u64;
+    core::arch::asm!(
+        "int 0x80",
+        inlateout("rax") n => ret,
+        inlateout("rdi") arg1 => _,
+        inlateout("rsi") arg2 => _,
+        out("rcx") _,
+        out("rdx") _,
+        out("r8") _,
+        out("r9") _,
+        out("r10") _,
+        out("r11") _,
+    );
+    ret
+}
+
+#[inline(always)]
+unsafe fn syscall3(n: u64, arg1: u64, arg2: u64, arg3: u64) -> u64 {
+    let ret: u64;
+    core::arch::asm!(
+        "int 0x80",
+        inlateout("rax") n => ret,
+        inlateout("rdi") arg1 => _,
+        inlateout("rsi") arg2 => _,
+        inlateout("rdx") arg3 => _,
+        out("rcx") _,
+        out("r8") _,
+        out("r9") _,
+        out("r10") _,
+        out("r11") _,
+    );
+    ret
+}
+
+// Helper functions
+#[inline(always)]
+fn write_str(s: &str) {
+    unsafe {
+        syscall3(SYS_WRITE, 1, s.as_ptr() as u64, s.len() as u64);
+    }
+}
+
+#[inline(always)]
+fn write_num(n: i64) {
+    if n < 0 {
+        write_str("-");
+        write_num_inner(-n as u64);
+    } else {
+        write_num_inner(n as u64);
+    }
+}
+
+#[inline(always)]
+fn write_num_inner(mut n: u64) {
+    let mut buf = [0u8; 20];
+    let mut i = 19;
+
+    if n == 0 {
+        write_str("0");
+        return;
+    }
+
+    while n > 0 {
+        buf[i] = b'0' + (n % 10) as u8;
+        n /= 10;
+        i -= 1;
+    }
+
+    let s = unsafe { core::str::from_utf8_unchecked(&buf[i + 1..]) };
+    write_str(s);
+}
+
+#[inline(always)]
+fn fail(msg: &str) -> ! {
+    write_str("PIPE_REFCOUNT: FAIL - ");
+    write_str(msg);
+    write_str("\n");
+    unsafe {
+        syscall1(SYS_EXIT, 1);
+    }
+    loop {}
+}
+
+#[inline(always)]
+fn assert_eq(actual: i64, expected: i64, msg: &str) {
+    if actual != expected {
+        write_str("ASSERTION FAILED: ");
+        write_str(msg);
+        write_str("\n  Expected: ");
+        write_num(expected);
+        write_str("\n  Got: ");
+        write_num(actual);
+        write_str("\n");
+        fail("Assertion failed");
+    }
+}
+
+/// Test 1: Basic write after closing write end should fail
+fn test_write_after_close_write() {
+    write_str("\n=== Test 1: Write After Close Write End ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+
+    if ret < 0 {
+        fail("pipe() failed in test 1");
+    }
+
+    write_str("  Pipe created: read_fd=");
+    write_num(pipefd[0] as i64);
+    write_str(", write_fd=");
+    write_num(pipefd[1] as i64);
+    write_str("\n");
+
+    // Close write end
+    let close_ret = unsafe { syscall1(SYS_CLOSE, pipefd[1] as u64) } as i64;
+    assert_eq(close_ret, 0, "close(write_fd) should succeed");
+    write_str("  Closed write end\n");
+
+    // Attempt to write to closed write fd - should get EBADF
+    let test_data = b"Should fail";
+    let write_ret = unsafe {
+        syscall3(SYS_WRITE, pipefd[1] as u64, test_data.as_ptr() as u64, test_data.len() as u64)
+    } as i64;
+
+    assert_eq(write_ret, EBADF, "write to closed fd should return EBADF");
+    write_str("  Write to closed fd correctly returned EBADF\n");
+
+    // Clean up
+    unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) };
+    write_str("  Test 1: PASSED\n");
+}
+
+/// Test 2: Read should return EOF (0) when write end is closed and pipe is empty
+fn test_read_eof_after_close_write() {
+    write_str("\n=== Test 2: Read EOF After Close Write End ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+
+    if ret < 0 {
+        fail("pipe() failed in test 2");
+    }
+
+    write_str("  Pipe created\n");
+
+    // Close write end immediately (no data written)
+    unsafe { syscall1(SYS_CLOSE, pipefd[1] as u64) };
+    write_str("  Closed write end\n");
+
+    // Read should return EOF (0)
+    let mut buf = [0u8; 32];
+    let read_ret = unsafe {
+        syscall3(SYS_READ, pipefd[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+
+    assert_eq(read_ret, 0, "read should return EOF (0) when all writers closed");
+    write_str("  Read correctly returned EOF\n");
+
+    // Clean up
+    unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) };
+    write_str("  Test 2: PASSED\n");
+}
+
+/// Test 3: Read existing data, then get EOF on next read after write end closed
+fn test_read_data_then_eof() {
+    write_str("\n=== Test 3: Read Data Then EOF ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+
+    if ret < 0 {
+        fail("pipe() failed in test 3");
+    }
+
+    write_str("  Pipe created\n");
+
+    // Write some data
+    let test_data = b"Test data";
+    let write_ret = unsafe {
+        syscall3(SYS_WRITE, pipefd[1] as u64, test_data.as_ptr() as u64, test_data.len() as u64)
+    } as i64;
+
+    assert_eq(write_ret, test_data.len() as i64, "write should succeed");
+    write_str("  Wrote ");
+    write_num(write_ret);
+    write_str(" bytes\n");
+
+    // Close write end
+    unsafe { syscall1(SYS_CLOSE, pipefd[1] as u64) };
+    write_str("  Closed write end\n");
+
+    // First read should get the data
+    let mut buf = [0u8; 32];
+    let read_ret = unsafe {
+        syscall3(SYS_READ, pipefd[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+
+    assert_eq(read_ret, test_data.len() as i64, "first read should get all data");
+    write_str("  First read got ");
+    write_num(read_ret);
+    write_str(" bytes\n");
+
+    // Verify data
+    if &buf[..read_ret as usize] != test_data {
+        fail("data mismatch");
+    }
+    write_str("  Data verified\n");
+
+    // Second read should return EOF
+    let read_ret2 = unsafe {
+        syscall3(SYS_READ, pipefd[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+
+    assert_eq(read_ret2, 0, "second read should return EOF");
+    write_str("  Second read correctly returned EOF\n");
+
+    // Clean up
+    unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) };
+    write_str("  Test 3: PASSED\n");
+}
+
+/// Test 4: Close read end, then write should get EPIPE
+fn test_write_epipe_after_close_read() {
+    write_str("\n=== Test 4: Write EPIPE After Close Read End ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+
+    if ret < 0 {
+        fail("pipe() failed in test 4");
+    }
+
+    write_str("  Pipe created\n");
+
+    // Close read end
+    unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) };
+    write_str("  Closed read end\n");
+
+    // Write should get EPIPE (broken pipe)
+    let test_data = b"Should get EPIPE";
+    let write_ret = unsafe {
+        syscall3(SYS_WRITE, pipefd[1] as u64, test_data.as_ptr() as u64, test_data.len() as u64)
+    } as i64;
+
+    assert_eq(write_ret, EPIPE, "write should return EPIPE when all readers closed");
+    write_str("  Write correctly returned EPIPE\n");
+
+    // Clean up
+    unsafe { syscall1(SYS_CLOSE, pipefd[1] as u64) };
+    write_str("  Test 4: PASSED\n");
+}
+
+/// Test 5: Close both ends and verify both return success
+fn test_close_both_ends() {
+    write_str("\n=== Test 5: Close Both Ends ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+
+    if ret < 0 {
+        fail("pipe() failed in test 5");
+    }
+
+    write_str("  Pipe created\n");
+
+    // Close read end first
+    let close_read = unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) } as i64;
+    assert_eq(close_read, 0, "close(read_fd) should succeed");
+    write_str("  Closed read end\n");
+
+    // Close write end second
+    let close_write = unsafe { syscall1(SYS_CLOSE, pipefd[1] as u64) } as i64;
+    assert_eq(close_write, 0, "close(write_fd) should succeed");
+    write_str("  Closed write end\n");
+
+    // Verify double-close fails
+    let close_again = unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) } as i64;
+    assert_eq(close_again, EBADF, "closing already-closed fd should return EBADF");
+    write_str("  Double-close correctly returned EBADF\n");
+
+    write_str("  Test 5: PASSED\n");
+}
+
+/// Test 6: Multiple writes and reads
+fn test_multiple_operations() {
+    write_str("\n=== Test 6: Multiple Operations ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+
+    if ret < 0 {
+        fail("pipe() failed in test 6");
+    }
+
+    write_str("  Pipe created\n");
+
+    // Write multiple messages
+    let msg1 = b"First";
+    let msg2 = b"Second";
+    let msg3 = b"Third";
+
+    unsafe {
+        syscall3(SYS_WRITE, pipefd[1] as u64, msg1.as_ptr() as u64, msg1.len() as u64);
+        syscall3(SYS_WRITE, pipefd[1] as u64, msg2.as_ptr() as u64, msg2.len() as u64);
+        syscall3(SYS_WRITE, pipefd[1] as u64, msg3.as_ptr() as u64, msg3.len() as u64);
+    }
+    write_str("  Wrote 3 messages\n");
+
+    // Read all data
+    let mut buf = [0u8; 64];
+    let total_expected = msg1.len() + msg2.len() + msg3.len();
+    let mut total_read = 0;
+
+    while total_read < total_expected {
+        let read_ret = unsafe {
+            syscall3(SYS_READ, pipefd[0] as u64,
+                    (buf.as_mut_ptr() as u64) + total_read as u64,
+                    (buf.len() - total_read) as u64)
+        } as i64;
+
+        if read_ret <= 0 {
+            break;
+        }
+        total_read += read_ret as usize;
+    }
+
+    write_str("  Read ");
+    write_num(total_read as i64);
+    write_str(" bytes total\n");
+
+    if total_read != total_expected {
+        fail("did not read all expected data");
+    }
+
+    // Verify concatenated data
+    let expected = b"FirstSecondThird";
+    if &buf[..total_read] != &expected[..] {
+        fail("data corruption detected");
+    }
+    write_str("  Data integrity verified\n");
+
+    // Clean up
+    unsafe {
+        syscall1(SYS_CLOSE, pipefd[0] as u64);
+        syscall1(SYS_CLOSE, pipefd[1] as u64);
+    }
+    write_str("  Test 6: PASSED\n");
+}
+
+#[no_mangle]
+pub extern "C" fn _start() -> ! {
+    write_str("=== Pipe Reference Counting Stress Test ===\n");
+    write_str("\nThis test validates pipe reference counting behavior.\n");
+    write_str("Includes dup2 tests for reference counting with duplicated fds.\n");
+
+    // Run basic close tests
+    test_write_after_close_write();
+    test_read_eof_after_close_write();
+    test_read_data_then_eof();
+    test_write_epipe_after_close_read();
+    test_close_both_ends();
+    test_multiple_operations();
+
+    // Run dup2 tests
+    test_dup2_write_end();
+    test_dup2_read_end();
+    test_dup2_same_fd();
+    test_dup2_overwrite_fd();
+
+    // All tests passed
+    write_str("\n=== ALL TESTS PASSED ===\n");
+    write_str("PIPE_REFCOUNT_TEST_PASSED\n");
+
+    unsafe {
+        syscall1(SYS_EXIT, 0);
+    }
+    loop {}
+}
+
+#[panic_handler]
+fn panic(_info: &PanicInfo) -> ! {
+    write_str("PANIC in pipe_refcount_test!\n");
+    unsafe {
+        syscall1(SYS_EXIT, 1);
+    }
+    loop {}
+}
+
+// === DUP2 STRESS TESTS ===
+// These tests validate pipe reference counting with dup2 operations
+
+/// Test 7: Duplicate write end, close original, verify pipe still works
+fn test_dup2_write_end() {
+    write_str("\n=== Test 7: Dup2 Write End ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+    if ret < 0 {
+        fail("pipe() failed in test 7");
+    }
+
+    write_str("  Pipe created: read_fd=");
+    write_num(pipefd[0] as i64);
+    write_str(", write_fd=");
+    write_num(pipefd[1] as i64);
+    write_str("\n");
+
+    // Duplicate write end to fd 10
+    let new_write_fd: i32 = 10;
+    let dup_ret = unsafe { syscall2(SYS_DUP2, pipefd[1] as u64, new_write_fd as u64) } as i64;
+    assert_eq(dup_ret, new_write_fd as i64, "dup2 should return new_fd");
+    write_str("  Duplicated write_fd to fd ");
+    write_num(new_write_fd as i64);
+    write_str("\n");
+
+    // Close original write end
+    unsafe { syscall1(SYS_CLOSE, pipefd[1] as u64) };
+    write_str("  Closed original write_fd\n");
+
+    // Write via duplicated fd should still work (ref count was incremented)
+    let test_data = b"via dup";
+    let write_ret = unsafe {
+        syscall3(SYS_WRITE, new_write_fd as u64, test_data.as_ptr() as u64, test_data.len() as u64)
+    } as i64;
+    assert_eq(write_ret, test_data.len() as i64, "write via dup'd fd should succeed");
+    write_str("  Write via duplicated fd succeeded\n");
+
+    // Read should get the data
+    let mut buf = [0u8; 32];
+    let read_ret = unsafe {
+        syscall3(SYS_READ, pipefd[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+    assert_eq(read_ret, test_data.len() as i64, "read should get data");
+    write_str("  Read got data correctly\n");
+
+    // Close dup'd write fd - now all writers are closed
+    unsafe { syscall1(SYS_CLOSE, new_write_fd as u64) };
+    write_str("  Closed duplicated write_fd\n");
+
+    // Read should now return EOF
+    let read_eof = unsafe {
+        syscall3(SYS_READ, pipefd[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+    assert_eq(read_eof, 0, "read should return EOF after all writers closed");
+    write_str("  Read correctly returned EOF\n");
+
+    // Clean up
+    unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) };
+    write_str("  Test 7: PASSED\n");
+}
+
+/// Test 8: Duplicate read end, close in various orders
+fn test_dup2_read_end() {
+    write_str("\n=== Test 8: Dup2 Read End ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+    if ret < 0 {
+        fail("pipe() failed in test 8");
+    }
+
+    write_str("  Pipe created\n");
+
+    // Duplicate read end to fd 11
+    let new_read_fd: i32 = 11;
+    let dup_ret = unsafe { syscall2(SYS_DUP2, pipefd[0] as u64, new_read_fd as u64) } as i64;
+    assert_eq(dup_ret, new_read_fd as i64, "dup2 should return new_fd");
+    write_str("  Duplicated read_fd to fd ");
+    write_num(new_read_fd as i64);
+    write_str("\n");
+
+    // Write some data
+    let test_data = b"test data";
+    unsafe {
+        syscall3(SYS_WRITE, pipefd[1] as u64, test_data.as_ptr() as u64, test_data.len() as u64)
+    };
+    write_str("  Wrote data\n");
+
+    // Close original read end
+    unsafe { syscall1(SYS_CLOSE, pipefd[0] as u64) };
+    write_str("  Closed original read_fd\n");
+
+    // Read via duplicated fd should work
+    let mut buf = [0u8; 32];
+    let read_ret = unsafe {
+        syscall3(SYS_READ, new_read_fd as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+    assert_eq(read_ret, test_data.len() as i64, "read via dup'd fd should succeed");
+    write_str("  Read via duplicated fd succeeded\n");
+
+    // Close duplicated read fd - now all readers are closed
+    unsafe { syscall1(SYS_CLOSE, new_read_fd as u64) };
+    write_str("  Closed duplicated read_fd\n");
+
+    // Write should now get EPIPE
+    let write_ret = unsafe {
+        syscall3(SYS_WRITE, pipefd[1] as u64, test_data.as_ptr() as u64, test_data.len() as u64)
+    } as i64;
+    assert_eq(write_ret, EPIPE, "write should return EPIPE after all readers closed");
+    write_str("  Write correctly returned EPIPE\n");
+
+    // Clean up
+    unsafe { syscall1(SYS_CLOSE, pipefd[1] as u64) };
+    write_str("  Test 8: PASSED\n");
+}
+
+/// Test 9: dup2(fd, fd) same-fd case - should be a no-op per POSIX
+fn test_dup2_same_fd() {
+    write_str("\n=== Test 9: Dup2 Same FD (No-op) ===\n");
+
+    let mut pipefd: [i32; 2] = [0, 0];
+    let ret = unsafe { syscall1(SYS_PIPE, pipefd.as_mut_ptr() as u64) } as i64;
+    if ret < 0 {
+        fail("pipe() failed in test 9");
+    }
+
+    write_str("  Pipe created\n");
+
+    // dup2(read_fd, read_fd) should just validate and return read_fd
+    let dup_ret = unsafe { syscall2(SYS_DUP2, pipefd[0] as u64, pipefd[0] as u64) } as i64;
+    assert_eq(dup_ret, pipefd[0] as i64, "dup2(fd, fd) should return fd unchanged");
+    write_str("  dup2(read_fd, read_fd) returned correctly\n");
+
+    // Pipe should still work normally - this validates that ref counts weren't corrupted
+    // by a close-then-add sequence (the race condition the POSIX check prevents)
+    let test_data = b"still works";
+    let write_ret = unsafe {
+        syscall3(SYS_WRITE, pipefd[1] as u64, test_data.as_ptr() as u64, test_data.len() as u64)
+    } as i64;
+    assert_eq(write_ret, test_data.len() as i64, "write should succeed");
+    write_str("  Write still works\n");
+
+    let mut buf = [0u8; 32];
+    let read_ret = unsafe {
+        syscall3(SYS_READ, pipefd[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+    assert_eq(read_ret, test_data.len() as i64, "read should succeed");
+    write_str("  Read still works\n");
+
+    // Clean up
+    unsafe {
+        syscall1(SYS_CLOSE, pipefd[0] as u64);
+        syscall1(SYS_CLOSE, pipefd[1] as u64);
+    }
+    write_str("  Test 9: PASSED\n");
+}
+
+/// Test 10: dup2 overwrites an existing fd
+fn test_dup2_overwrite_fd() {
+    write_str("\n=== Test 10: Dup2 Overwrite Existing FD ===\n");
+
+    // Create two pipes
+    let mut pipe1: [i32; 2] = [0, 0];
+    let mut pipe2: [i32; 2] = [0, 0];
+
+    let ret1 = unsafe { syscall1(SYS_PIPE, pipe1.as_mut_ptr() as u64) } as i64;
+    if ret1 < 0 {
+        fail("pipe() #1 failed in test 10");
+    }
+    let ret2 = unsafe { syscall1(SYS_PIPE, pipe2.as_mut_ptr() as u64) } as i64;
+    if ret2 < 0 {
+        fail("pipe() #2 failed in test 10");
+    }
+
+    write_str("  Created two pipes\n");
+    write_str("  Pipe1: read=");
+    write_num(pipe1[0] as i64);
+    write_str(", write=");
+    write_num(pipe1[1] as i64);
+    write_str("\n  Pipe2: read=");
+    write_num(pipe2[0] as i64);
+    write_str(", write=");
+    write_num(pipe2[1] as i64);
+    write_str("\n");
+
+    // Write to pipe1 before overwriting
+    let msg1 = b"pipe1";
+    unsafe {
+        syscall3(SYS_WRITE, pipe1[1] as u64, msg1.as_ptr() as u64, msg1.len() as u64)
+    };
+    write_str("  Wrote to pipe1\n");
+
+    // dup2(pipe2_write, pipe1_write) - this should:
+    // 1. Close pipe1's write fd (decrementing writer count)
+    // 2. Make pipe1[1] point to pipe2's write end
+    let dup_ret = unsafe { syscall2(SYS_DUP2, pipe2[1] as u64, pipe1[1] as u64) } as i64;
+    assert_eq(dup_ret, pipe1[1] as i64, "dup2 should return new_fd");
+    write_str("  dup2'd pipe2 write to pipe1 write fd\n");
+
+    // Now pipe1[1] writes to pipe2, not pipe1
+    let msg2 = b"pipe2";
+    unsafe {
+        syscall3(SYS_WRITE, pipe1[1] as u64, msg2.as_ptr() as u64, msg2.len() as u64)
+    };
+    write_str("  Wrote 'pipe2' via overwritten fd\n");
+
+    // Read from pipe2 should get the data we just wrote
+    let mut buf = [0u8; 32];
+    let read2 = unsafe {
+        syscall3(SYS_READ, pipe2[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+    assert_eq(read2, msg2.len() as i64, "should read from pipe2");
+    if &buf[..read2 as usize] != msg2 {
+        fail("read wrong data from pipe2");
+    }
+    write_str("  Read 'pipe2' from pipe2 correctly\n");
+
+    // Read from pipe1 should get the original data we wrote
+    let read1 = unsafe {
+        syscall3(SYS_READ, pipe1[0] as u64, buf.as_mut_ptr() as u64, buf.len() as u64)
+    } as i64;
+    assert_eq(read1, msg1.len() as i64, "should read from pipe1");
+    if &buf[..read1 as usize] != msg1 {
+        fail("read wrong data from pipe1");
+    }
+    write_str("  Read 'pipe1' from pipe1 correctly\n");
+
+    // Clean up - pipe1[1] is now a dup of pipe2[1], so we have 2 writers for pipe2
+    unsafe {
+        syscall1(SYS_CLOSE, pipe1[0] as u64);
+        syscall1(SYS_CLOSE, pipe1[1] as u64);  // Actually closes a pipe2 writer
+        syscall1(SYS_CLOSE, pipe2[0] as u64);
+        syscall1(SYS_CLOSE, pipe2[1] as u64);
+    }
+    write_str("  Test 10: PASSED\n");
+}
diff --git a/userspace/tests/register_init_test.rs b/userspace/tests/register_init_test.rs
index 8939eed..a7cf550 100644
--- a/userspace/tests/register_init_test.rs
+++ b/userspace/tests/register_init_test.rs
@@ -66,73 +66,79 @@ fn write_hex(n: u64) {
 }
 
 /// Entry point - captures registers in pure assembly BEFORE any Rust code runs
+///
+/// NOTE: We save registers to a static buffer instead of the stack because:
+/// 1. Calling conventions may clobber registers before we read them
+/// 2. Stack-based approach was giving incorrect results for RAX
 #[no_mangle]
 #[unsafe(naked)]
 pub unsafe extern "C" fn _start() -> ! {
     core::arch::naked_asm!(
-        // Save all registers to stack IMMEDIATELY at entry
-        // Stack layout after this: [rax][rbx][rcx][rdx][rsi][rdi][r8][r9][r10][r11][r12][r13][r14][r15]
-        "push rax",
-        "push rbx",
-        "push rcx",
-        "push rdx",
-        "push rsi",
-        "push rdi",
-        "push r8",
-        "push r9",
-        "push r10",
-        "push r11",
-        "push r12",
-        "push r13",
-        "push r14",
-        "push r15",
-
-        // Now call Rust function to check them (stack pointer points to saved regs)
+        // Save all registers to a static buffer IMMEDIATELY at entry
+        // This is simpler and more reliable than stack-based approach
+        "lea r11, [{saved_regs}]",  // Load address of static buffer into r11 (clobbers r11)
+        "mov [r11 + 0*8], rax",
+        "mov [r11 + 1*8], rbx",
+        "mov [r11 + 2*8], rcx",
+        "mov [r11 + 3*8], rdx",
+        "mov [r11 + 4*8], rsi",
+        "mov [r11 + 5*8], rdi",
+        "mov [r11 + 6*8], r8",
+        "mov [r11 + 7*8], r9",
+        "mov [r11 + 8*8], r10",
+        // r11 is already used as base pointer, can't save its original value
+        "mov [r11 + 10*8], r12",
+        "mov [r11 + 11*8], r13",
+        "mov [r11 + 12*8], r14",
+        "mov [r11 + 13*8], r15",
+
+        // Now call Rust function to check them
         "call {check_registers}",
 
         // Never returns
         "ud2",
 
-        check_registers = sym check_registers_from_stack,
+        saved_regs = sym SAVED_REGS,
+        check_registers = sym check_registers_from_buffer,
     );
 }
 
-/// Check register values that were saved on the stack
-unsafe extern "C" fn check_registers_from_stack() -> ! {
-    // Read values from stack
-    // Stack layout: [r15][r14][r13][r12][r11][r10][r9][r8][rdi][rsi][rdx][rcx][rbx][rax][return_addr]
-    //                                                                                        ^RSP
-    // So we need to skip return address (+8) and read backwards
-    let regs_ptr: *const u64;
-    core::arch::asm!("lea {}, [rsp + 8]", out(reg) regs_ptr);
-
-    // Read in reverse order (last pushed = first in memory going up)
-    let r15 = *regs_ptr.offset(0);
-    let r14 = *regs_ptr.offset(1);
-    let r13 = *regs_ptr.offset(2);
-    let r12 = *regs_ptr.offset(3);
-    let r11 = *regs_ptr.offset(4);
-    let r10 = *regs_ptr.offset(5);
-    let r9  = *regs_ptr.offset(6);
-    let r8  = *regs_ptr.offset(7);
-    let rdi = *regs_ptr.offset(8);
-    let rsi = *regs_ptr.offset(9);
-    let rdx = *regs_ptr.offset(10);
-    let rcx = *regs_ptr.offset(11);
-    let rbx = *regs_ptr.offset(12);
-    let rax = *regs_ptr.offset(13);
+/// Static buffer to hold saved registers
+/// Order: rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, (r11 skipped), r12, r13, r14, r15
+static mut SAVED_REGS: [u64; 14] = [0; 14];
+
+/// Check register values that were saved to the static buffer
+unsafe extern "C" fn check_registers_from_buffer() -> ! {
+    // Read values from SAVED_REGS static buffer
+    // Layout: rax at 0, rbx at 1, ..., r15 at 13 (r11 is at index 9 but was clobbered)
+    let rax = SAVED_REGS[0];
+    let rbx = SAVED_REGS[1];
+    let rcx = SAVED_REGS[2];
+    let rdx = SAVED_REGS[3];
+    let rsi = SAVED_REGS[4];
+    let rdi_saved = SAVED_REGS[5];
+    let r8  = SAVED_REGS[6];
+    let r9  = SAVED_REGS[7];
+    let r10 = SAVED_REGS[8];
+    // r11 was used as base pointer, skip it (SAVED_REGS[9] contains garbage)
+    let r12 = SAVED_REGS[10];
+    let r13 = SAVED_REGS[11];
+    let r14 = SAVED_REGS[12];
+    let r15 = SAVED_REGS[13];
 
     // Check if all registers are zero
+    // Note: rdi_saved is the original rdi value (before we used rdi for argument passing)
+    // Note: r11 is not checked because we used it as base pointer to save other registers
     let all_zero = rax == 0
         && rbx == 0
         && rcx == 0
         && rdx == 0
         && rsi == 0
-        && rdi == 0
+        && rdi_saved == 0
         && r8 == 0
         && r9 == 0
         && r10 == 0
-        && r11 == 0
+        // r11 cannot be checked - used as base pointer
         && r12 == 0
         && r13 == 0
         && r14 == 0
@@ -167,9 +173,9 @@ unsafe extern "C" fn check_registers_from_stack() -> ! {
             write_hex(rsi);
             write_str(" (expected 0)\n");
         }
-        if rdi != 0 {
+        if rdi_saved != 0 {
             write_str("  RDI = ");
-            write_hex(rdi);
+            write_hex(rdi_saved);
             write_str(" (expected 0)\n");
         }
         if r8 != 0 {
@@ -187,11 +193,7 @@ unsafe extern "C" fn check_registers_from_stack() -> ! {
             write_hex(r10);
             write_str(" (expected 0)\n");
         }
-        if r11 != 0 {
-            write_str("  R11 = ");
-            write_hex(r11);
-            write_str(" (expected 0)\n");
-        }
+        // r11 not checked - used as base pointer in _start
         if r12 != 0 {
             write_str("  R12 = ");
             write_hex(r12);
diff --git a/xtask/src/main.rs b/xtask/src/main.rs
index fcc8450..13bf23d 100644
--- a/xtask/src/main.rs
+++ b/xtask/src/main.rs
@@ -523,6 +523,20 @@ fn get_boot_stages() -> Vec<BootStage> {
             failure_meaning: "pipe() syscall test failed - pipe creation, read/write, or close broken",
             check_hint: "Check kernel/src/syscall/pipe.rs and kernel/src/ipc/pipe.rs - verify pipe creation, fd allocation, and read/write operations",
         },
+        // Pipe + fork test
+        BootStage {
+            name: "Pipe + fork test passed",
+            marker: "PIPE_FORK_TEST_PASSED",
+            failure_meaning: "pipe+fork test failed - pipe IPC across fork boundary broken",
+            check_hint: "Check fork fd_table cloning and pipe reference counting in kernel/src/ipc/fd.rs and kernel/src/process/manager.rs",
+        },
+        // Pipe concurrent test
+        BootStage {
+            name: "Pipe concurrent test passed",
+            marker: "PIPE_CONCURRENT_TEST_PASSED",
+            failure_meaning: "concurrent pipe test failed - pipe buffer concurrency broken",
+            check_hint: "Check pipe buffer locking in kernel/src/ipc/pipe.rs",
+        },
         // NOTE: ENOSYS syscall verification requires external_test_bins feature
         // which is not enabled by default. Add back when external binaries are integrated.
     ]

From 9500e1d1685226a25fa392705964117305aa4552 Mon Sep 17 00:00:00 2001
From: Ryan Breen <ryan@ryanbreen.com>
Date: Mon, 29 Dec 2025 14:43:35 -0500
Subject: [PATCH 4/4] Yeah, I'm still not using Gemini because it sucks.

---
 .claude/commands/validate-implementation.md |  22 +
 AGENTS.md                                   | 526 ++++++++++++++++++++
 Gemini.md                                   | 123 +++++
 3 files changed, 671 insertions(+)
 create mode 100644 .claude/commands/validate-implementation.md
 create mode 100644 AGENTS.md
 create mode 100644 Gemini.md

diff --git a/.claude/commands/validate-implementation.md b/.claude/commands/validate-implementation.md
new file mode 100644
index 0000000..5b195ba
--- /dev/null
+++ b/.claude/commands/validate-implementation.md
@@ -0,0 +1,22 @@
+# Technical Implementation Validation
+
+Spawn a fresh-context agent to validate implementation and test quality.
+
+**Arguments:** $ARGUMENTS
+
+---
+
+Use the Task tool to spawn an agent:
+
+```
+subagent_type: general-purpose
+model: opus
+prompt: |
+  Load and follow the collaboration:technical-implementation-validation skill.
+
+  Context: $ARGUMENTS
+
+  Report: spec-to-test mapping, gaming patterns detected, scores (technical accuracy, intellectual honesty), categorized issues, recommendation (APPROVE/REVISE/REJECT).
+```
+
+Report validation results back to me.
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..f20744e
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,526 @@
+# Breenix OS
+
+## Project Overview
+
+Breenix is a production-quality x86_64 operating system kernel written in Rust. This is not a toy or learning project - we follow Linux/FreeBSD standard practices and prioritize quality over speed.
+
+## Project Structure
+
+```
+kernel/          # Core kernel (no_std, no_main)
+src.legacy/      # Previous implementation (being phased out)
+libs/            # libbreenix, tiered_allocator
+tests/           # Integration tests
+docs/planning/   # Numbered phase directories (00-15)
+```
+
+## Build & Run
+
+### Standard Workflow: Boot Stages Testing
+
+For normal development, use the boot stages test to verify kernel health:
+
+```bash
+# Run boot stages test - verifies kernel progresses through all checkpoints
+cargo run -p xtask -- boot-stages
+
+# Build only (no execution)
+cargo build --release --features testing,external_test_bins --bin qemu-uefi
+```
+
+The boot stages test (`xtask boot-stages`) monitors serial output for expected markers at each boot phase. Add new stages to `xtask/src/main.rs` when adding new subsystems.
+
+### GDB Debugging (For Deep Technical Issues)
+
+Use GDB when you need to understand **why** something is failing, not just **that** it failed. GDB is the right tool when:
+- You need to examine register state or memory at a specific point
+- A panic occurs and you need to inspect the call stack
+- You're debugging timing-sensitive issues that log output can't capture
+- You need to step through code instruction-by-instruction
+
+**Do NOT use GDB** for routine testing or to avoid writing proper boot stage markers. If you find yourself adding debug log statements in a loop, that's a sign you should use GDB instead.
+
+```bash
+# Start interactive GDB session
+./breenix-gdb-chat/scripts/gdb_session.sh start
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "break kernel::kernel_main"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "continue"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "info registers"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "backtrace 10"
+./breenix-gdb-chat/scripts/gdb_session.sh serial
+./breenix-gdb-chat/scripts/gdb_session.sh stop
+```
+
+### Logs
+All runs are logged to `logs/breenix_YYYYMMDD_HHMMSS.log`
+
+```bash
+# View latest log
+ls -t logs/*.log | head -1 | xargs less
+```
+
+## Development Workflow
+
+### Agent-Based Development (MANDATORY)
+
+**The main conversation is for ORCHESTRATION ONLY.** Never execute tests, run builds, or perform iterative debugging directly in the top-level session. This burns token context and leads to session exhaustion.
+
+**ALWAYS dispatch to agents:**
+- Build verification and compilation checks
+- GDB debugging sessions (via gdb_chat.py)
+- Code exploration and codebase research
+- Any task that may require multiple iterations or produce verbose output
+
+**The orchestrator session should only:**
+- Plan and decompose work into agent-dispatchable tasks
+- Review agent reports and synthesize findings
+- Make high-level decisions based on agent results
+- Coordinate multiple parallel agent investigations
+- Communicate summaries and next steps to the user
+
+**Anti-pattern (NEVER DO THIS):**
+```
+# DON'T run kernel directly - ALWAYS use GDB
+cargo run --release --bin qemu-uefi
+cargo run -p xtask -- boot-stages
+# DON'T grep through large outputs in main session
+cat target/output.txt | grep ...
+```
+
+**Correct pattern:**
+```
+# DO use GDB for kernel debugging
+printf 'break kernel::kernel_main\ncontinue\ninfo registers\nquit\n' | python3 breenix-gdb-chat/scripts/gdb_chat.py
+
+# DO dispatch to an agent for GDB sessions
+Task(subagent_type="general-purpose", prompt="Use gdb_chat.py to debug the
+clock_gettime syscall. Set breakpoint, examine registers, report findings.")
+```
+
+When a debugging task requires multiple iterations, dispatch it ONCE to an agent with comprehensive instructions. The agent will iterate internally and return a summary. If more investigation is needed, dispatch another agent - don't bring the iteration into the main session.
+
+### Feature Branches (REQUIRED)
+Never push directly to main. Always:
+```bash
+git checkout main
+git pull origin main
+git checkout -b feature-name
+# ... do work ...
+git push -u origin feature-name
+gh pr create --title "Brief description" --body "Details"
+```
+
+### Code Quality - ZERO TOLERANCE FOR WARNINGS
+
+**Every build must be completely clean.** Zero warnings, zero errors. This is non-negotiable.
+
+When you run any build or test command and observe warnings or errors in the compile stage, you MUST fix them before proceeding. Do not continue with broken builds.
+
+**Honest fixes only.** Do NOT suppress warnings dishonestly:
+- `#[allow(dead_code)]` is NOT acceptable for code that should be removed or actually used
+- `#[allow(unused_variables)]` is NOT acceptable for variables that indicate incomplete implementation
+- Prefixing with `_` is NOT acceptable if the variable was meant to be used
+- These annotations hide problems instead of fixing them
+
+**When to use suppression attributes:**
+- `#[allow(dead_code)]` ONLY for legitimate public API functions that are intentionally available but not yet called (e.g., `SpinLock::try_lock()` as part of a complete lock API)
+- `#[cfg(never)]` for code intentionally disabled for debugging (must be in Cargo.toml check-cfg)
+- Never use suppressions to hide incomplete work or actual bugs
+
+**Proper fixes:**
+- Unused variable? Either use it (complete the implementation) or remove it entirely
+- Dead code? Either call it or delete it
+- Unnecessary `mut`? Remove the `mut`
+- Unnecessary `unsafe`? Remove the `unsafe` block
+
+**Before every commit, verify:**
+```bash
+# Build must complete with 0 warnings
+cargo build --release --features testing,external_test_bins --bin qemu-uefi 2>&1 | grep -E "^(warning|error)"
+# Should produce no output (no warnings/errors)
+```
+
+### Testing Integrity - CRITICAL
+
+**NEVER fake a passing test.** If a test fails, it fails. Do not:
+- Add fallbacks that accept weaker evidence than the test requires
+- Change test criteria to match broken behavior
+- Accept "process was created" as proof of "process executed correctly"
+- Let CI pass by detecting markers printed before the actual test runs
+
+If a test cannot pass because the underlying code is broken:
+1. **Fix the underlying code** - this is the job
+2. Or disable the test explicitly with documentation explaining why
+3. NEVER make the test pass by weakening its criteria
+
+A test that passes without testing what it claims to test is worse than a failing test - it gives false confidence and hides real bugs.
+
+### Testing
+- Most tests use shared QEMU (`tests/shared_qemu.rs`)
+- Special tests marked `#[ignore]` require specific configs
+- Tests wait for: `🎯 KERNEL_POST_TESTS_COMPLETE 🎯`
+- BIOS test: `cargo test test_bios_boot -- --ignored`
+
+### Commits
+All commits co-authored by Ryan Breen and Claude Code.
+
+## Documentation
+
+### Visual Progress Dashboard
+**Public Dashboard**: https://v0-breenix-dashboard.vercel.app/
+- Interactive visualization of POSIX compliance progress
+- 12 subsystem regions with completion percentages
+- Phase timeline showing current position (Phase 8.5)
+
+**Updating the Dashboard**: Use the `collaboration:ux-research` skill to update the v0.dev dashboard.
+When features are completed, invoke the skill to update progress percentages and feature lists.
+
+### Master Roadmap
+`docs/planning/PROJECT_ROADMAP.md` tracks:
+- Current development status
+- Completed phases (✅)
+- In progress (🚧)
+- Planned work (📋)
+
+Update after each PR merge and when starting new work.
+
+### Structure
+- `docs/planning/00-15/` - Phase directories
+- `docs/planning/legacy-migration/FEATURE_COMPARISON.md` - Track migration progress
+- Cross-cutting dirs: `posix-compliance/`, `legacy-migration/`
+
+## Userland Development Stages
+
+The path to full POSIX libc compatibility is broken into 5 stages:
+
+### Stage 1: libbreenix (Rust) - ✅ ~80% Complete
+Location: `libs/libbreenix/`
+
+Provides syscall wrappers for Rust programs:
+- `process.rs` - exit, fork, exec, getpid, gettid, yield
+- `io.rs` - read, write, stdout, stderr
+- `time.rs` - clock_gettime (REALTIME, MONOTONIC)
+- `memory.rs` - brk, sbrk
+- `errno.rs` - POSIX errno definitions
+- `syscall.rs` - raw syscall primitives (syscall0-6)
+
+**Usage in test programs:**
+```rust
+use libbreenix::{io::println, process::exit, time::now_monotonic};
+
+#[no_mangle]
+pub extern "C" fn _start() -> ! {
+    println("Hello from userspace!");
+    let ts = now_monotonic();
+    exit(0);
+}
+```
+
+### Stage 2: Rust Runtime - 📋 Planned
+- Panic handler for userspace
+- Global allocator (using brk/sbrk)
+- `#[no_std]` program template
+- Core abstractions (File, Process types)
+
+### Stage 3: C libc Port - 📋 Planned
+- C-compatible ABI wrappers
+- stdio (printf, scanf, etc.)
+- stdlib (malloc, free, etc.)
+- string.h, unistd.h functions
+- Option: Port musl-libc or write custom
+
+### Stage 4: Shell - 📋 Planned
+Requires: Stage 3, filesystem syscalls, pipe/dup
+- Command parsing
+- Built-in commands (cd, exit, echo)
+- External command execution
+- Piping and redirection
+- Job control (requires signals)
+
+### Stage 5: Coreutils - 📋 Planned
+Requires: Stage 4, full filesystem
+- Basic: cat, echo, true, false
+- File ops: ls, cp, mv, rm
+- Dir ops: mkdir, rmdir
+- Text: head, tail, wc
+
+## Legacy Code Removal
+
+When new implementation reaches parity:
+1. Remove code from `src.legacy/`
+2. Update `FEATURE_COMPARISON.md`
+3. Include removal in same commit as feature completion
+
+## Build Configuration
+
+- Custom target: `x86_64-breenix.json`
+- Nightly Rust with `rust-src` and `llvm-tools-preview`
+- Panic strategy: abort
+- Red zone: disabled for interrupt safety
+- Features: `-mmx,-sse,+soft-float`
+
+## 🚨 PROHIBITED CODE SECTIONS 🚨
+
+The following files are on the **prohibited modifications list**. Agents MUST NOT modify these files without explicit user approval.
+
+### Tier 1: Absolutely Forbidden (ask before ANY change)
+| File | Reason |
+|------|--------|
+| `kernel/src/syscall/handler.rs` | Syscall hot path - ANY logging breaks timing tests |
+| `kernel/src/syscall/time.rs` | clock_gettime precision - called in tight loops |
+| `kernel/src/syscall/entry.asm` | Assembly syscall entry - must be minimal |
+| `kernel/src/interrupts/timer.rs` | Timer fires every 1ms - <1000 cycles budget |
+| `kernel/src/interrupts/timer_entry.asm` | Assembly timer entry - must be minimal |
+
+### Tier 2: High Scrutiny (explain why GDB is insufficient)
+| File | Reason |
+|------|--------|
+| `kernel/src/interrupts/context_switch.rs` | Context switch path - timing sensitive |
+| `kernel/src/interrupts/mod.rs` | Interrupt dispatch - timing sensitive |
+| `kernel/src/gdt.rs` | GDT/TSS - rarely needs changes |
+| `kernel/src/per_cpu.rs` | Per-CPU data - used in hot paths |
+
+### When Modifying Prohibited Sections
+
+If you believe you must modify a prohibited file:
+
+1. **Explain why GDB debugging is insufficient** for this specific problem
+2. **Get explicit user approval** before making any changes
+3. **Never add logging** - use GDB breakpoints instead
+4. **Remove any temporary debug code** before committing
+5. **Test via GDB** to verify the fix works
+
+### Detecting Violations
+
+Look for these red flags in prohibited files:
+- `log::*` macros
+- `serial_println!`
+- `format!` or string formatting
+- Raw serial port writes (`out dx, al` to 0x3F8)
+- Any I/O operations
+
+## Interrupt and Syscall Development - CRITICAL PATH REQUIREMENTS
+
+**The interrupt and syscall paths MUST remain pristine.** This is non-negotiable architectural guidance.
+
+### Why This Matters
+
+Timer interrupts fire every ~1ms (1000 Hz). At 3 GHz, that's only 3 million cycles between interrupts. If the timer handler takes too long:
+- Nested interrupts pile up
+- Stack overflow occurs
+- Userspace never executes (timer fires before IRETQ completes)
+
+Real-world example: Adding 230 lines of page table diagnostics to `trace_iretq_to_ring3()` caused timer interrupts to fire within 100-500 cycles after IRETQ, before userspace could execute a single instruction. Result: 0 syscalls executed, infinite kernel loop.
+
+### MANDATORY RULES
+
+**In interrupt handlers (`kernel/src/interrupts/`):**
+- NO serial output (`serial_println!`, `log!`, `debug!`)
+- NO page table walks or memory mapping operations
+- NO locks that might contend (use `try_lock()` with direct hardware fallback)
+- NO heap allocations
+- NO string formatting
+- Target: <1000 cycles total
+
+**In syscall entry/exit (`kernel/src/syscall/entry.asm`, `handler.rs`):**
+- NO logging on the hot path
+- NO diagnostic tracing by default
+- Frame transitions must be minimal
+
+**Stub functions for assembly references:**
+If assembly code calls logging functions that were removed, provide empty `#[no_mangle]` stubs rather than modifying assembly. See `kernel/src/interrupts/timer.rs` for examples.
+
+### Approved Debugging Alternatives
+
+1. **QEMU interrupt tracing**: `BREENIX_QEMU_DEBUG_FLAGS="int,cpu_reset"` logs to file without affecting kernel timing
+2. **GDB breakpoints**: `BREENIX_GDB=1` enables GDB server
+3. **Post-mortem analysis**: Analyze logs after crashes, not during execution
+4. **Dedicated diagnostic threads**: Run diagnostics in separate threads with proper scheduling
+
+### Code Review Checklist
+
+Before approving changes to interrupt/syscall code:
+- [ ] No `serial_println!` or logging macros
+- [ ] No page table operations
+- [ ] No locks without try_lock fallback
+- [ ] No heap allocations
+- [ ] Timing-critical paths marked with comments
+
+## GDB-Only Kernel Debugging - MANDATORY
+
+**ALL kernel execution and debugging MUST be done through GDB.** This is non-negotiable.
+
+Running the kernel directly (`cargo run`, `cargo test`, `cargo run -p xtask -- boot-stages`) without GDB:
+- Provides only serial output, which is insufficient for timing-sensitive bugs
+- Cannot inspect register state, memory, or call stacks
+- Cannot set breakpoints to catch issues before they cascade
+- Cannot intercept panics to examine state
+- Burns context analyzing log output instead of actual debugging
+
+### Interactive GDB Session (PRIMARY WORKFLOW)
+
+Use `gdb_session.sh` for persistent, interactive debugging sessions:
+
+```bash
+# Start a persistent session (keeps QEMU + GDB running)
+./breenix-gdb-chat/scripts/gdb_session.sh start
+
+# Send commands one at a time, making decisions based on results
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "break kernel::syscall::time::sys_clock_gettime"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "continue"
+# Examine what happened, then decide next step...
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "info registers rax rdi rsi"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "print/x \$rdi"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "backtrace 10"
+
+# Get all serial output (kernel print statements)
+./breenix-gdb-chat/scripts/gdb_session.sh serial
+
+# Stop when done
+./breenix-gdb-chat/scripts/gdb_session.sh stop
+```
+
+This is **conversational debugging** - you send a command, see the result, think about it, and decide what to do next. Just like a human sitting at a GDB terminal.
+
+### GDB Chat Tool (Underlying Engine)
+
+The session wrapper uses `breenix-gdb-chat/scripts/gdb_chat.py`:
+
+```bash
+# Can also use directly for scripted debugging
+printf 'break kernel::kernel_main\ncontinue\ninfo registers\nquit\n' | python3 breenix-gdb-chat/scripts/gdb_chat.py
+```
+
+The tool:
+1. Starts QEMU with GDB server enabled (`BREENIX_GDB=1`)
+2. Starts GDB and connects to QEMU on localhost:1234
+3. Loads kernel symbols at the correct PIE base address (0x10000000000)
+4. Accepts commands via stdin, returns JSON responses with serial output included
+5. **No automatic interrupt** - you control the timeout per command
+
+### Essential GDB Commands
+
+**Setting breakpoints:**
+```
+break kernel::kernel_main              # Break at function
+break kernel::syscall::time::sys_clock_gettime
+break *0x10000047b60                   # Break at address
+info breakpoints                       # List all breakpoints
+delete 1                               # Delete breakpoint #1
+```
+
+**Execution control:**
+```
+continue                               # Run until breakpoint or interrupt
+stepi                                  # Step one instruction
+stepi 20                               # Step 20 instructions
+next                                   # Step over function calls
+finish                                 # Run until current function returns
+```
+
+**Inspecting state:**
+```
+info registers                         # All registers
+info registers rip rsp rax rdi rsi     # Specific registers
+backtrace 10                           # Call stack (10 frames)
+x/10i $rip                             # Disassemble 10 instructions at RIP
+x/5xg $rsp                             # Examine 5 quad-words at RSP
+x/2xg 0x7fffff032f98                   # Examine memory at address
+print/x $rax                           # Print register in hex
+```
+
+**Kernel-specific patterns:**
+```
+# Check if syscall returned correctly (RAX = 0 for success)
+info registers rax
+
+# Examine userspace timespec after clock_gettime
+x/2xg $rsi                             # tv_sec, tv_nsec
+
+# Check stack frame integrity
+x/10xg $rsp
+
+# Verify we're in userspace (CS RPL = 3)
+print $cs & 3
+```
+
+### Debugging Workflow
+
+1. **Set breakpoints BEFORE continuing:**
+   ```
+   break kernel::syscall::time::sys_clock_gettime
+   continue
+   ```
+
+2. **Examine state at breakpoint:**
+   ```
+   info registers rip rdi rsi          # RIP, syscall args
+   backtrace 5                          # Where did we come from?
+   ```
+
+3. **Step through problematic code:**
+   ```
+   stepi 10                             # Step through instructions
+   info registers rax                   # Check return value
+   ```
+
+4. **Inspect memory if needed:**
+   ```
+   x/2xg 0x7fffff032f98                 # Examine user buffer
+   ```
+
+### Symbol Loading
+
+The PIE kernel loads at base address `0x10000000000` (1 TiB). The gdb_chat.py tool handles this automatically via `add-symbol-file` with correct section offsets:
+
+- `.text` offset: varies by build
+- Runtime address = `0x10000000000 + elf_section_offset`
+
+If symbols don't resolve, verify with:
+```
+info address kernel::kernel_main
+```
+
+### When to Use GDB vs Boot Stages
+
+**Use boot stages** (`cargo run -p xtask -- boot-stages`) for:
+- Verifying a fix works
+- Checking that all subsystems initialize
+- CI/continuous testing
+- Quick sanity checks
+
+**Use GDB** for:
+- Understanding why a specific failure occurs
+- Examining register/memory state at a crash
+- Stepping through complex code paths
+- Debugging timing-sensitive issues where adding logs would change behavior
+
+### Anti-Patterns
+
+```bash
+# DON'T add logging to hot paths (syscalls, interrupts) to debug issues
+log::debug!("clock_gettime called");  # This changes timing!
+
+# DON'T loop on adding debug prints - use GDB breakpoints instead
+# If you're on your 3rd round of "add log, rebuild, run", switch to GDB
+```
+
+### GDB Debugging Example
+
+```bash
+# Start interactive GDB session
+./breenix-gdb-chat/scripts/gdb_session.sh start
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "break kernel::syscall::time::sys_clock_gettime"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "continue"
+
+# Examine state at breakpoint
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "info registers rdi rsi"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "backtrace 10"
+
+# Stop when done
+./breenix-gdb-chat/scripts/gdb_session.sh stop
+```
+
+## Work Tracking
+
+We use Beads (bd) instead of Markdown for issue tracking. Run `bd quickstart` to get started.
diff --git a/Gemini.md b/Gemini.md
new file mode 100644
index 0000000..719f55f
--- /dev/null
+++ b/Gemini.md
@@ -0,0 +1,123 @@
+# Breenix OS - Gemini Agent Guidelines
+
+## Project Overview
+
+Breenix is a production-quality x86_64 operating system kernel written in Rust. This is not a toy or learning project - we follow Linux/FreeBSD standard practices and prioritize quality over speed.
+
+## Core Mandates & Philosophy
+
+*   **Quality First**: Zero tolerance for warnings (compiler or clippy). Every build must be clean.
+*   **Testing Integrity**: Never fake a passing test. If a test fails, fix the code or explicitly disable the test with a reason.
+*   **GDB for Debugging**: **Do NOT** add logging to hot paths (syscalls, interrupts) for debugging. Use GDB.
+*   **Atomic Migrations**: Legacy code removal must happen in the *same commit* as the new feature completion.
+*   **Pristine Interrupt Paths**: Interrupt and syscall paths must remain minimal (<1000 cycles). No logging, no allocs, no locks in these paths.
+
+## Project Structure
+
+*   `kernel/`: Core kernel (no_std, no_main)
+*   `src.legacy/`: Previous implementation (being phased out)
+*   `libs/`: libbreenix, tiered_allocator
+*   `tests/`: Integration tests
+*   `docs/planning/`: Phase directories (00-15)
+*   `xtask/`: Build and test automation tool
+
+## Primary Workflows
+
+### 1. Standard Development & Verification (Boot Stages)
+For normal development, use the boot stages test to verify kernel health:
+
+```bash
+# Run boot stages test - verifies kernel progresses through all checkpoints
+cargo run -p xtask -- boot-stages
+
+# Build only (no execution)
+cargo build --release --features testing,external_test_bins --bin qemu-uefi
+```
+
+### 2. Debugging (Mandatory GDB Usage)
+Use GDB when you need to understand *why* something is failing.
+
+**Interactive GDB Session (Primary):**
+```bash
+./breenix-gdb-chat/scripts/gdb_session.sh start
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "break kernel::kernel_main"
+./breenix-gdb-chat/scripts/gdb_session.sh cmd "continue"
+# ... examine state ...
+./breenix-gdb-chat/scripts/gdb_session.sh stop
+```
+
+**Quick Debug Loop (for signal detection):**
+```bash
+breenix-kernel-debug-loop/scripts/quick_debug.py --signal "KERNEL_INITIALIZED" --timeout 15
+```
+
+### 3. CI Failure Analysis
+When a CI run fails or a local test times out/crashes:
+```bash
+breenix-ci-failure-analysis/scripts/analyze_ci_failure.py target/xtask_output.txt
+```
+
+### 4. Code Quality Check (Pre-Commit)
+Before every commit, verify zero warnings:
+```bash
+cd kernel
+cargo clippy --target x86_64-unknown-none
+cargo build --target x86_64-unknown-none 2>&1 | grep warning
+# Must produce NO output
+```
+
+## Skills Reference
+
+I am equipped with specific skills for Breenix development. I will invoke these strategies when appropriate.
+
+### Debugging & Analysis
+*   **`breenix-gdb-chat`**: Conversational GDB. Use for interactive debugging, crashes, page faults.
+*   **`breenix-kernel-debug-loop`**: Fast iterative debugging. Use for checking if specific log signals appear within a timeout.
+*   **`breenix-log-analysis`**: Search and analyze kernel logs. Use `scripts/find-in-logs`.
+*   **`breenix-systematic-debugging`**: Document-driven debugging (Problem -> Root Cause -> Solution -> Evidence).
+*   **`breenix-memory-debugging`**: diagnosing page faults, double faults, and allocator issues.
+*   **`breenix-boot-analysis`**: Analyzing boot sequence, timing, and initialization order.
+*   **`breenix-interrupt-trace`**: QEMU-based interrupt tracing for low-level analysis.
+*   **`breenix-register-watch`**: Debugging register corruption using GDB snapshots.
+
+### Development & Maintenance
+*   **`breenix-code-quality-check`**: Enforcing zero-warning policy and Breenix coding standards.
+*   **`breenix-legacy-migration`**: Systematic migration from `src.legacy/` to `kernel/`.
+*   **`breenix-integration-test-authoring`**: Creating shared QEMU tests (`tests/shared_qemu.rs`).
+*   **`breenix-github-workflow-authoring`**: Creating/updating CI workflows.
+*   **`breenix-interrupt-syscall-development`**: Guidelines for writing pristine interrupt/syscall code.
+
+## Critical Technical Guidelines
+
+### 🚨 Prohibited Code Sections 🚨
+Do NOT modify these without explicit user approval and justification (GDB is usually the answer):
+*   **Tier 1 (Forbidden):** `kernel/src/syscall/handler.rs`, `kernel/src/syscall/time.rs`, `kernel/src/syscall/entry.asm`, `kernel/src/interrupts/timer.rs`, `kernel/src/interrupts/timer_entry.asm`.
+*   **Tier 2 (High Scrutiny):** `kernel/src/interrupts/context_switch.rs`, `kernel/src/interrupts/mod.rs`, `kernel/src/gdt.rs`, `kernel/src/per_cpu.rs`.
+
+### Interrupt/Syscall Path Rules
+*   **NO** serial output or logging.
+*   **NO** memory allocation (heap).
+*   **NO** page table walks.
+*   **NO** locks (unless try_lock with fallback).
+*   Target: <1000 cycles total.
+
+### Legacy Migration
+1.  Implement feature in `kernel/`.
+2.  Verify parity with `src.legacy/` via tests.
+3.  Remove `src.legacy/` code AND update `FEATURE_COMPARISON.md` in the **SAME** commit.
+
+### Testing
+*   Most tests use `tests/shared_qemu.rs`.
+*   Tests wait for signals like `🎯 KERNEL_POST_TESTS_COMPLETE 🎯`.
+*   Userspace tests are in `userspace/tests/`.
+
+## Userland Development Stages (Reference)
+*   **Stage 1**: libbreenix (Rust) - ~80% Complete (syscall wrappers)
+*   **Stage 2**: Rust Runtime - Planned (allocator, panic handler)
+*   **Stage 3**: C libc Port - Planned
+*   **Stage 4**: Shell - Planned
+*   **Stage 5**: Coreutils - Planned
+
+## Logging & Artifacts
+*   Logs: `logs/breenix_YYYYMMDD_HHMMSS.log`
+*   View latest: `ls -t logs/*.log | head -1 | xargs less`