diff --git a/bench/render.zig b/bench/render.zig new file mode 100644 index 0000000..64ff04d --- /dev/null +++ b/bench/render.zig @@ -0,0 +1,195 @@ +//! Microbenchmark for the `boo ui` viewport render hot path. +//! +//! Compares serialization strategies for one repaint frame: +//! A_full status quo: a fresh Allocating writer per row, all rows. +//! B_full reused buffer, all rows (full-repaint frame, e.g. scroll). +//! C_local reused buffer, re-serialize one changed row + reuse the +//! rest from cache (localized-update frame: typing, progress). +//! +//! Build/run: `zig build bench`. Reports ns/frame and the allocation +//! count for A vs B. +const std = @import("std"); +const vt = @import("ghostty-vt"); + +const rows: u16 = 50; +const cols: u16 = 200; +const frames: usize = 2000; + +/// Allocator that counts allocations, delegating to a backing one. +const CountingAllocator = struct { + backing: std.mem.Allocator, + count: usize = 0, + + fn allocator(self: *CountingAllocator) std.mem.Allocator { + return .{ .ptr = self, .vtable = &.{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = free, + } }; + } + fn alloc(ctx: *anyopaque, len: usize, a: std.mem.Alignment, ra: usize) ?[*]u8 { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + self.count += 1; + return self.backing.rawAlloc(len, a, ra); + } + fn resize(ctx: *anyopaque, m: []u8, a: std.mem.Alignment, n: usize, ra: usize) bool { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + return self.backing.rawResize(m, a, n, ra); + } + fn remap(ctx: *anyopaque, m: []u8, a: std.mem.Alignment, n: usize, ra: usize) ?[*]u8 { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + return self.backing.rawRemap(m, a, n, ra); + } + fn free(ctx: *anyopaque, m: []u8, a: std.mem.Alignment, ra: usize) void { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + self.backing.rawFree(m, a, ra); + } +}; + +/// Status-quo serialization: a fresh Allocating writer per row. +fn rowStatusQuo(alloc: std.mem.Allocator, term: *vt.Terminal, y: u16, out: *std.ArrayList(u8)) !void { + const screen = term.screens.active; + if (term.cols == 0) return; + const start = screen.pages.pin(.{ .viewport = .{ .x = 0, .y = y } }) orelse return; + const end = screen.pages.pin(.{ .viewport = .{ .x = term.cols - 1, .y = y } }) orelse return; + var formatter: vt.formatter.ScreenFormatter = .init(screen, .vt); + formatter.content = .{ .selection = vt.Selection.init(start, end, true) }; + var aw: std.Io.Writer.Allocating = .init(alloc); + defer aw.deinit(); + aw.writer.print("{f}", .{formatter}) catch return error.OutOfMemory; + const bytes = aw.writer.buffered(); + try out.appendSlice(alloc, bytes); + if (std.mem.indexOf(u8, bytes, "\x1b]8;") != null) { + try out.appendSlice(alloc, "\x1b]8;;\x1b\\"); + } +} + +/// Optimized serialization: format directly into the caller's buffer +/// (reused across rows/frames), no per-row allocation. +fn rowReused(alloc: std.mem.Allocator, term: *vt.Terminal, y: u16, out: *std.ArrayList(u8)) !void { + const screen = term.screens.active; + if (term.cols == 0) return; + const start = screen.pages.pin(.{ .viewport = .{ .x = 0, .y = y } }) orelse return; + const end = screen.pages.pin(.{ .viewport = .{ .x = term.cols - 1, .y = y } }) orelse return; + var formatter: vt.formatter.ScreenFormatter = .init(screen, .vt); + formatter.content = .{ .selection = vt.Selection.init(start, end, true) }; + const at = out.items.len; + { + var aw: std.Io.Writer.Allocating = .fromArrayList(alloc, out); + defer out.* = aw.toArrayList(); + aw.writer.print("{f}", .{formatter}) catch return error.OutOfMemory; + } + if (std.mem.indexOf(u8, out.items[at..], "\x1b]8;") != null) { + try out.appendSlice(alloc, "\x1b]8;;\x1b\\"); + } +} + +fn fillScreen(alloc: std.mem.Allocator, term: *vt.Terminal) !void { + var stream = vt.TerminalStream.initAlloc(alloc, vt.TerminalStream.Handler.init(term)); + defer stream.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(alloc); + try buf.appendSlice(alloc, "\x1b[H"); + for (0..rows) |y| { + // A mix of default and 256-color SGR segments per row. + var x: usize = 0; + while (x < cols - 12) : (x += 12) { + const color: usize = (y * 7 + x) % 231 + 16; + try buf.print(alloc, "\x1b[38;5;{d}mword{d:0>2} ", .{ color, (x / 12) % 100 }); + } + try buf.appendSlice(alloc, "\x1b[0m"); + if (y + 1 < rows) try buf.appendSlice(alloc, "\r\n"); + } + stream.nextSlice(buf.items); +} + +pub fn main() !void { + // boo runs on the C allocator at runtime (src/main.zig); benchmark + // with the same one so per-row allocation cost is realistic. + const base = std.heap.c_allocator; + + var stdout_buf: [4096]u8 = undefined; + var stdout_w = std.fs.File.stdout().writer(&stdout_buf); + const out = &stdout_w.interface; + + try out.print("boo ui render bench: {d} rows x {d} cols, {d} frames\n\n", .{ rows, cols, frames }); + + // --- A_full: status quo, all rows, per-row Allocating --- + { + var ca: CountingAllocator = .{ .backing = base }; + const alloc = ca.allocator(); + var term = try vt.Terminal.init(alloc, .{ .cols = cols, .rows = rows, .max_scrollback = 512 * 1024 }); + defer term.deinit(alloc); + try fillScreen(alloc, &term); + + var body: std.ArrayList(u8) = .empty; + defer body.deinit(alloc); + const alloc_before = ca.count; + var timer = try std.time.Timer.start(); + for (0..frames) |_| { + body.clearRetainingCapacity(); + for (0..rows) |y| try rowStatusQuo(alloc, &term, @intCast(y), &body); + } + const ns = timer.read(); + try out.print("A_full (status quo, all rows): {d:>7} ns/frame, {d:>7} allocs/frame\n", .{ + ns / frames, (ca.count - alloc_before) / frames, + }); + } + + // --- B_full: reused buffer, all rows --- + { + var ca: CountingAllocator = .{ .backing = base }; + const alloc = ca.allocator(); + var term = try vt.Terminal.init(alloc, .{ .cols = cols, .rows = rows, .max_scrollback = 512 * 1024 }); + defer term.deinit(alloc); + try fillScreen(alloc, &term); + + var body: std.ArrayList(u8) = .empty; + defer body.deinit(alloc); + const alloc_before = ca.count; + var timer = try std.time.Timer.start(); + for (0..frames) |_| { + body.clearRetainingCapacity(); + for (0..rows) |y| try rowReused(alloc, &term, @intCast(y), &body); + } + const ns = timer.read(); + try out.print("B_full (reused buf, all rows): {d:>7} ns/frame, {d:>7} allocs/frame\n", .{ + ns / frames, (ca.count - alloc_before) / frames, + }); + } + + // --- C_local: one changed row re-serialized, rest reused from cache --- + { + const alloc = base; + var term = try vt.Terminal.init(alloc, .{ .cols = cols, .rows = rows, .max_scrollback = 512 * 1024 }); + defer term.deinit(alloc); + try fillScreen(alloc, &term); + + // Per-row cache buffers, primed once. + var cache: [rows]std.ArrayList(u8) = undefined; + for (&cache) |*c| c.* = .empty; + defer for (&cache) |*c| c.deinit(alloc); + for (0..rows) |y| try rowReused(alloc, &term, @intCast(y), &cache[y]); + + var body: std.ArrayList(u8) = .empty; + defer body.deinit(alloc); + var timer = try std.time.Timer.start(); + for (0..frames) |i| { + body.clearRetainingCapacity(); + // One row is "dirty" this frame; re-serialize it, reuse rest. + const dirty: u16 = @intCast(i % rows); + for (0..rows) |y| { + if (y == dirty) { + cache[y].clearRetainingCapacity(); + try rowReused(alloc, &term, @intCast(y), &cache[y]); + } + try body.appendSlice(alloc, cache[y].items); + } + } + const ns = timer.read(); + try out.print("C_local (1 dirty row, rest cached): {d:>7} ns/frame\n", .{ns / frames}); + } + + try out.flush(); +} diff --git a/build.zig b/build.zig index 9e9f335..9df46a4 100644 --- a/build.zig +++ b/build.zig @@ -68,4 +68,25 @@ pub fn build(b: *std.Build) void { test_all_step.dependOn(test_step); test_all_step.dependOn(integration_step); + + // Benchmark: the viewport render hot path (no TTY required). + const bench_mod = b.createModule(.{ + .root_source_file = b.path("bench/render.zig"), + .target = target, + .optimize = optimize, + .link_libc = true, + }); + if (b.lazyDependency("ghostty", .{ + .target = target, + .optimize = optimize, + })) |dep| { + bench_mod.addImport("ghostty-vt", dep.module("ghostty-vt")); + } + const bench_exe = b.addExecutable(.{ + .name = "boo-bench", + .root_module = bench_mod, + }); + const bench_run = b.addRunArtifact(bench_exe); + const bench_step = b.step("bench", "Run the render microbenchmark"); + bench_step.dependOn(&bench_run.step); } diff --git a/src/ui.zig b/src/ui.zig index fe7568c..dc4ca0b 100644 --- a/src/ui.zig +++ b/src/ui.zig @@ -1044,6 +1044,40 @@ pub fn run(alloc: std.mem.Allocator, dir: []const u8) !void { try ui.loop(pipe_fds[0]); } +/// Cached serialization of one viewport (terminal) row, keyed on the +/// libghostty row identity so a row that scrolls to a new position is +/// re-serialized even when its own contents did not change. +const ViewportRow = struct { + /// The bytes `appendTermRow` produced for this row last time. + bytes: std.ArrayList(u8) = .empty, + /// The page node the cached row lived in, compared by pointer + /// identity. Null until first serialized. + node: ?*const anyopaque = null, + /// The row offset within `node`. + offset: u16 = 0, + /// Whether `bytes`/`node`/`offset` hold a serialized row. + valid: bool = false, + + fn deinit(self: *ViewportRow, alloc: std.mem.Allocator) void { + self.bytes.deinit(alloc); + } +}; + +/// Whether `entry` may be reused for the row currently at `pin` instead +/// of re-serializing it. Reuse is safe only when a full repaint is not +/// forced, the entry holds a serialized row, the libghostty row identity +/// (page node and offset within it) is unchanged, and the row is not +/// dirty. Scrolling the active screen relocates a visual row onto a +/// different identity even while its own bytes stay clean, so the +/// identity comparison is required and the dirty bit alone is not +/// enough. +fn viewportRowReusable(entry: *const ViewportRow, pin: vt.Pin, full_render: bool) bool { + if (full_render or !entry.valid) return false; + if (entry.node != @as(*const anyopaque, @ptrCast(pin.node))) return false; + if (entry.offset != pin.y) return false; + return !pin.isDirty(); +} + const Ui = struct { alloc: std.mem.Allocator, dir: []const u8, @@ -1106,6 +1140,9 @@ const Ui = struct { /// Per-screen-row cache of the last emitted bytes; rows that did /// not change are not re-sent. row_cache: std.ArrayList(std.ArrayList(u8)) = .empty, + /// Per-screen-row cache of the serialized viewport row bytes, + /// reused across frames when libghostty reports the row unchanged. + viewport_cache: std.ArrayList(ViewportRow) = .empty, need_render: bool = true, /// Force every row out on the next render (resize, C-a l). full_render: bool = true, @@ -1145,6 +1182,8 @@ const Ui = struct { self.message.deinit(self.alloc); for (self.row_cache.items) |*row| row.deinit(self.alloc); self.row_cache.deinit(self.alloc); + for (self.viewport_cache.items) |*row| row.deinit(self.alloc); + self.viewport_cache.deinit(self.alloc); } // -- Main loop --------------------------------------------------------- @@ -2617,6 +2656,15 @@ const Ui = struct { row.deinit(alloc); } + // The viewport cache tracks the same rows as the row cache. + while (self.viewport_cache.items.len < l.rows) { + try self.viewport_cache.append(alloc, .{}); + } + while (self.viewport_cache.items.len > l.rows) { + var row = self.viewport_cache.pop() orelse break; + row.deinit(alloc); + } + var body: std.ArrayList(u8) = .empty; defer body.deinit(alloc); @@ -2638,6 +2686,10 @@ const Ui = struct { const cursor = self.cursorSequence(); + // The frame consumed this round's dirty bits; clear them so the + // next frame's viewport cache reuse reflects only new changes. + if (self.liveView()) |v| v.term.screens.active.pages.clearDirty(); + if (body.items.len == 0 and !self.full_render) { // Row content unchanged; the cursor may still have moved. try frame.appendSlice(alloc, "\x1b[?25l"); @@ -2828,6 +2880,41 @@ const Ui = struct { try appendClipped(alloc, out, "", w); } + /// Append the serialized bytes for viewport row `y`, reusing the + /// cached serialization when libghostty reports the row unchanged. + /// + /// A row is reused only when its libghostty identity (the page node + /// and the offset within it) is unchanged and its dirty bit is + /// clear. Scrolling the active screen moves a visual row onto a + /// different page row, changing the identity and forcing a fresh + /// serialization; an in-place edit sets the dirty bit. `composeFrame` + /// clears the dirty bits once per frame, so a clear bit means + /// "unchanged since the last serialization". + fn appendViewportRow(self: *Ui, v: *View, y: u16, out: *std.ArrayList(u8)) !void { + const alloc = self.alloc; + const screen = v.term.screens.active; + const pin = screen.pages.pin(.{ .viewport = .{ .x = 0, .y = y } }) orelse { + if (y < self.viewport_cache.items.len) { + self.viewport_cache.items[y].valid = false; + } + return; + }; + const entry = &self.viewport_cache.items[y]; + const node: *const anyopaque = @ptrCast(pin.node); + + if (viewportRowReusable(entry, pin, self.full_render)) { + try out.appendSlice(alloc, entry.bytes.items); + return; + } + + entry.bytes.clearRetainingCapacity(); + try appendTermRow(alloc, &v.term, y, &entry.bytes); + entry.node = node; + entry.offset = pin.y; + entry.valid = true; + try out.appendSlice(alloc, entry.bytes.items); + } + fn composeViewportCell(self: *Ui, y: u16, out: *std.ArrayList(u8)) !void { const alloc = self.alloc; @@ -2864,7 +2951,7 @@ const Ui = struct { } if (y < v.term.rows) { - try appendTermRow(alloc, &v.term, y, out); + try self.appendViewportRow(v, y, out); } try out.appendSlice(alloc, sgr_reset); @@ -2972,15 +3059,17 @@ pub fn appendTermRow( var formatter: vt.formatter.ScreenFormatter = .init(screen, .vt); formatter.content = .{ .selection = vt.Selection.init(start, end, true) }; - var aw: std.Io.Writer.Allocating = .init(alloc); - defer aw.deinit(); - aw.writer.print("{f}", .{formatter}) catch return error.OutOfMemory; - - const bytes = aw.writer.buffered(); - try out.appendSlice(alloc, bytes); + // Format straight into `out`, reusing its capacity, so a repaint + // does not allocate a fresh writer for every row. + const begin = out.items.len; + { + var aw: std.Io.Writer.Allocating = .fromArrayList(alloc, out); + defer out.* = aw.toArrayList(); + aw.writer.print("{f}", .{formatter}) catch return error.OutOfMemory; + } // A row that opened a hyperlink must not leak it into the next // row or the sidebar. - if (std.mem.indexOf(u8, bytes, "\x1b]8;") != null) { + if (std.mem.indexOf(u8, out.items[begin..], "\x1b]8;") != null) { try out.appendSlice(alloc, "\x1b]8;;\x1b\\"); } } @@ -3989,3 +4078,55 @@ test "appendTermRow renders styled content for one row only" { try appendTermRow(alloc, &term, 3, &out); try std.testing.expectEqual(@as(usize, 0), out.items.len); } + +test "viewportRowReusable re-serializes a clean row that scrolled away" { + const alloc = std.testing.allocator; + + var term = try vt.Terminal.init(alloc, .{ .cols = 20, .rows = 4 }); + defer term.deinit(alloc); + var stream = term.vtStream(); + defer stream.deinit(); + + stream.nextSlice("\x1b[HAAA\r\nBBB\r\nCCC\r\nDDD"); + const screen = term.screens.active; + + // Mimic a settled frame: one cache entry per viewport row, tagged + // with that row's libghostty identity, then clear the dirty bits. + var entries: [4]ViewportRow = .{ .{}, .{}, .{}, .{} }; + defer for (&entries) |*e| e.deinit(alloc); + for (0..4) |y| { + const pin = screen.pages.pin(.{ .viewport = .{ .x = 0, .y = @intCast(y) } }).?; + try appendTermRow(alloc, &term, @intCast(y), &entries[y].bytes); + entries[y].node = @ptrCast(pin.node); + entries[y].offset = pin.y; + entries[y].valid = true; + } + screen.pages.clearDirty(); + + // Nothing changed: every settled row is reusable. + for (0..4) |y| { + const pin = screen.pages.pin(.{ .viewport = .{ .x = 0, .y = @intCast(y) } }).?; + try std.testing.expect(viewportRowReusable(&entries[y], pin, false)); + } + + // Scroll one line. The rows that moved up are not marked dirty, but + // they now show different content, so their stale cache entries must + // not be reused: clean-but-moved is exactly what the dirty bit + // misses and the identity check catches. + stream.nextSlice("\r\nEEE"); + var clean_moved: usize = 0; + for (0..4) |y| { + const pin = screen.pages.pin(.{ .viewport = .{ .x = 0, .y = @intCast(y) } }).?; + if (!pin.isDirty()) { + clean_moved += 1; + try std.testing.expect(!viewportRowReusable(&entries[y], pin, false)); + } + } + // The scroll must have produced a clean-but-moved row, or this test + // would not exercise the identity check at all. + try std.testing.expect(clean_moved > 0); + + // A forced full repaint never reuses, even an unchanged row. + const pin0 = screen.pages.pin(.{ .viewport = .{ .x = 0, .y = 0 } }).?; + try std.testing.expect(!viewportRowReusable(&entries[0], pin0, true)); +} diff --git a/test/integration.zig b/test/integration.zig index d562f44..42dda1c 100644 --- a/test/integration.zig +++ b/test/integration.zig @@ -1444,6 +1444,46 @@ test "ui: a row touching the viewport's right edge keeps its last cell" { try std.testing.expect(std.mem.indexOf(u8, first, "edge") != null); } +test "ui: scrolling output keeps the viewport in sync with the session" { + const alloc = std.testing.allocator; + var h = try Harness.init(alloc); + defer h.deinit(); + + try h.startDetached("scroll", &.{"sh"}); + + var ui = try PtyClient.spawn(&h, &.{"ui"}, 24, 100); + defer ui.deinit(); + try ui.waitFor("scroll"); + + // Print far more lines than the viewport is tall so the active + // screen scrolls many times. Each scroll moves every visible row + // onto a different libghostty row, so the viewport cache must key + // on row identity and not reuse a stale serialization. Wait on + // "LINE-200" (which the echoed command does not contain literally) + // so the wait cannot race the command echo. + try h.sendLine("scroll", "i=1; while [ $i -le 200 ]; do echo LINE-$i; i=$((i+1)); done"); + try ui.waitFor("LINE-200"); + + const screen = try renderScreen(alloc, ui.output.items, 24, 100); + defer alloc.free(screen); + + // Every LINE-N still on screen must appear in strictly increasing + // order, and the newest line must have rendered. A stale reused row + // would put an older number out of sequence or duplicate one. + var prev: i64 = -1; + var idx: usize = 0; + while (std.mem.indexOfPos(u8, screen, idx, "LINE-")) |pos| { + var end = pos + "LINE-".len; + while (end < screen.len and std.ascii.isDigit(screen[end])) end += 1; + idx = pos + "LINE-".len; + if (end == idx) continue; // "LINE-$i" from the echoed command + const n = std.fmt.parseInt(i64, screen[idx..end], 10) catch continue; + try std.testing.expect(n > prev); + prev = n; + } + try std.testing.expectEqual(@as(i64, 200), prev); +} + test "ui: the empty state shows the ghost and the keybind hint" { const alloc = std.testing.allocator; var h = try Harness.init(alloc);