From 1a253ffbcf084bc4a5c474ffed23c1068c9810c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pau=20Soler=20Valad=C3=A9s?= Date: Sat, 21 Feb 2026 00:02:44 +0100 Subject: [PATCH 1/5] Removed inecessary allocations in the smthUpdate fields --- src/array_hash_set.zig | 826 ++++++++++++++++++++++++++++++++++ src/hash_set.zig | 997 +++++++++++++++++++++++++++++++++++++++++ src/main.zig | 67 +++ src/root.zig | 23 +- 4 files changed, 1898 insertions(+), 15 deletions(-) create mode 100644 src/array_hash_set.zig create mode 100644 src/hash_set.zig create mode 100644 src/main.zig diff --git a/src/array_hash_set.zig b/src/array_hash_set.zig new file mode 100644 index 0000000..d7d4705 --- /dev/null +++ b/src/array_hash_set.zig @@ -0,0 +1,826 @@ +/// Open Source Initiative OSI - The MIT License (MIT):Licensing +/// The MIT License (MIT) +/// Copyright (c) 2025 Ralph Caraveo (deckarep@gmail.com) +/// Permission is hereby granted, free of charge, to any person obtaining a copy of +/// this software and associated documentation files (the "Software"), to deal in +/// the Software without restriction, including without limitation the rights to +/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +/// of the Software, and to permit persons to whom the Software is furnished to do +/// so, subject to the following conditions: +/// The above copyright notice and this permission notice shall be included in all +/// copies or substantial portions of the Software. +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +/// SOFTWARE. +/// +/// +const std = @import("std"); +const mem = std.mem; +const Allocator = mem.Allocator; + +/// comptime selection of the map type for string vs everything else. +fn selectMap(comptime E: type) type { + comptime { + if (E == []const u8) { + return std.StringArrayHashMapUnmanaged(void); + } else { + return std.AutoArrayHashMapUnmanaged(E, void); + } + } +} + +pub fn ArraySetUnmanaged(comptime E: type) type { + return struct { + /// The type of the internal hash map + pub const Map = selectMap(E); + + unmanaged: Map, + + pub const Size = usize; + + pub const Entry = struct { + key_ptr: *E, + }; + + /// The iterator type returned by iterator(), a Key iterator doesn't exist + /// on ArrayHashMaps for some reason. + pub const Iterator = struct { + keys: [*]E, + len: usize, + index: usize = 0, + + pub fn next(it: *Iterator) ?Entry { + if (it.index >= it.len) return null; + const result = Entry{ + .key_ptr = &it.keys[it.index], + }; + it.index += 1; + return result; + } + + /// Reset the iterator to the initial index + pub fn reset(it: *Iterator) void { + it.index = 0; + } + }; + + const Self = @This(); + + pub fn init() Self { + return .{ + .unmanaged = Map{}, + }; + } + + pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { + var self = Self.init(); + try self.unmanaged.ensureTotalCapacity(allocator, num); + return self; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.unmanaged.deinit(allocator); + self.* = undefined; + } + + pub fn add(self: *Self, allocator: Allocator, element: E) Allocator.Error!bool { + const prevCount = self.unmanaged.count(); + try self.unmanaged.put(allocator, element, {}); + return prevCount != self.unmanaged.count(); + } + + /// Appends all elements from the provided slice, and may allocate. + /// appendSlice returns an Allocator.Error or Size which represents how + /// many elements added and not previously in the slice. + pub fn appendSlice(self: *Self, allocator: Allocator, elements: []const E) Allocator.Error!Size { + const prevCount = self.unmanaged.count(); + for (elements) |el| { + try self.unmanaged.put(allocator, el, {}); + } + return self.unmanaged.count() - prevCount; + } + + /// Returns the number of total elements which may be present before + /// it is no longer guaranteed that no allocations will be performed. + pub fn capacity(self: Self) Size { + return self.unmanaged.capacity(); + } + + /// Cardinality effectively returns the size of the set. + pub fn cardinality(self: Self) Size { + return self.unmanaged.count(); + } + + /// Invalidates all element pointers. + pub fn clearAndFree(self: *Self, allocator: Allocator) void { + self.unmanaged.clearAndFree(allocator); + } + + /// Invalidates all element pointers. + pub fn clearRetainingCapacity(self: *Self) void { + self.unmanaged.clearRetainingCapacity(); + } + + /// Creates a copy of this set, using the same allocator. + /// clone may return an Allocator.Error or the cloned Set. + pub fn clone(self: *Self, allocator: Allocator) Allocator.Error!Self { + // Take a stack copy of self. + var cloneSelf = self.*; + // Clone the interal map. + cloneSelf.unmanaged = try self.unmanaged.clone(allocator); + return cloneSelf; + } + + /// Returns true when the provided element exists within the Set otherwise false. + pub fn contains(self: Self, element: E) bool { + return self.unmanaged.contains(element); + } + + /// Returns true when all elements in the other Set are present in this Set + /// otherwise false. + pub fn containsAll(self: Self, other: Self) bool { + var iter = other.iterator(); + while (iter.next()) |el| { + if (!self.unmanaged.contains(el.key_ptr.*)) { + return false; + } + } + return true; + } + + /// Returns true when all elements in the provided slice are present otherwise false. + pub fn containsAllSlice(self: Self, elements: []const E) bool { + for (elements) |el| { + if (!self.unmanaged.contains(el)) { + return false; + } + } + return true; + } + + /// Returns true when at least one or more elements from the other Set exist within + /// this Set otherwise false. + pub fn containsAny(self: Self, other: Self) bool { + var iter = other.iterator(); + while (iter.next()) |el| { + if (self.unmanaged.contains(el.*)) { + return true; + } + } + return false; + } + + pub fn ensureTotalCapacity(self: *Self, allocator: Allocator, num: Size) Allocator.Error!void { + return self.unmanaged.ensureTotalCapacity(allocator, num); + } + + /// differenceOf returns the difference between this set + /// and other. The returned set will contain + /// all elements of this set that are not also + /// elements of the other. + /// + /// Caller owns the newly allocated/returned set. + pub fn differenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var diffSet = Self.init(); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + _ = try diffSet.add(allocator, entry.key_ptr.*); + } + } + return diffSet; + } + + /// differenceUpdate does an in-place mutation of this set + /// and other. This set will contain all elements of this set that are not + /// also elements of other. + pub fn differenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + // In-place mutation invalidates iterators therefore a temp set is needed. + // So instead of a temp set, just invoke the regular full function which + // allocates and returns a set then swap out the map internally. + + // Also, this saves a step of not having to possibly discard many elements + // from the self set. + + // Just get a new set with the normal method. + const diffSet = try self.differenceOf(allocator, other); + + // Destroy the internal map. + self.unmanaged.deinit(allocator); + + // Swap it out with the new set. + self.unmanaged = diffSet.unmanaged; + } + + /// Returns true when at least one or more elements from the slice exist within + /// this Set otherwise false. + pub fn containsAnySlice(self: Self, elements: []const E) bool { + for (elements) |el| { + if (self.unmanaged.contains(el)) { + return true; + } + } + return false; + } + + /// eql determines if two sets are equal to each + /// other. If they have the same cardinality + /// and contain the same elements, they are + /// considered equal. The order in which + /// the elements were added is irrelevant. + pub fn eql(self: Self, other: Self) bool { + // First discriminate on cardinalities of both sets. + if (self.unmanaged.count() != other.unmanaged.count()) { + return false; + } + + // Now check for each element one for one and exit early + // on the first non-match. + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + return false; + } + } + + return true; + } + + /// intersectionOf returns a new set containing only the elements + /// that exist only in both sets. + /// + /// Caller owns the newly allocated/returned set. + pub fn intersectionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var interSet = Self.init(); + + // Optimization: iterate over whichever set is smaller. + // Matters when disparity in cardinality is large. + var s = other; + var o = self; + if (self.unmanaged.count() < other.unmanaged.count()) { + s = self; + o = other; + } + + var iter = s.unmanaged.iterator(); + while (iter.next()) |entry| { + if (o.unmanaged.contains(entry.key_ptr.*)) { + _ = try interSet.add(allocator, entry.key_ptr.*); + } + } + + return interSet; + } + + /// intersectionUpdate does an in-place intersecting update + /// to the current set from the other set keeping only + /// elements found in this Set and the other Set. + pub fn intersectionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + // In-place mutation invalidates iterators therefore a temp set is needed. + // So instead of a temp set, just invoke the regular full function which + // allocates and returns a set then swap out the map internally. + + // Also, this saves a step of not having to possibly discard many elements + // from the self set. + + // Just get a new set with the normal method. + const interSet = try self.intersectionOf(allocator, other); + + // Destroy the internal map. + self.unmanaged.deinit(allocator); + + // Swap it out with the new set. + self.unmanaged = interSet.unmanaged; + } + + /// isDisjoint returns true if the intersection between two sets is the null set. + /// Otherwise returns false. + pub fn isDisjoint(self: Self, other: Self) bool { + // Optimization: Find the smaller of the two, and iterate over the smaller set + const smaller = if (self.cardinality() <= other.cardinality()) self else other; + const larger = if (self.cardinality() <= other.cardinality()) other else self; + + var iter = smaller.iterator(); + while (iter.next()) |el| { + if (larger.contains(el.key_ptr.*)) { + return false; + } + } + return true; + } + + /// Returns true if this Set is empty otherwise false. + pub fn isEmpty(self: Self) bool { + return self.unmanaged.count() == 0; + } + + /// Create an iterator over the elements in the set. + /// The iterator is invalidated if the set is modified during iteration. + pub fn iterator(self: Self) Iterator { + const slice = self.unmanaged.entries.slice(); + return .{ + .keys = slice.items(.key).ptr, + .len = @as(u32, @intCast(slice.len)), + }; + } + + /// properSubsetOf determines if every element in this set is in + /// the other set but the two sets are not equal. + pub fn properSubsetOf(self: Self, other: Self) bool { + return self.unmanaged.count() < other.unmanaged.count() and self.subsetOf(other); + } + + /// properSupersetOf determines if every element in the other set + /// is in this set but the two sets are not equal. + pub fn properSupersetOf(self: Self, other: Self) bool { + return self.unmanaged.count() > other.unmanaged.count() and self.supersetOf(other); + } + + /// subsetOf determines if every element in this set is in + /// the other set. + pub fn subsetOf(self: Self, other: Self) bool { + // First discriminate on cardinalties of both sets. + if (self.unmanaged.count() > other.unmanaged.count()) { + return false; + } + + // Now check that self set has at least some elements from other. + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + return false; + } + } + + return true; + } + + /// subsetOf determines if every element in the other Set is in + /// the this Set. + pub fn supersetOf(self: Self, other: Self) bool { + // This is just the converse of subsetOf. + return other.subsetOf(self); + } + + /// pop removes and returns an arbitrary ?E from the set. + /// Order is not guaranteed. + /// This safely returns null if the Set is empty. + pub fn pop(self: *Self) ?E { + if (self.unmanaged.count() > 0) { + var iter = self.unmanaged.iterator(); + // NOTE: No in-place mutation as it invalidates live iterators. + // So a temporary capture is taken. + var capturedElement: E = undefined; + while (iter.next()) |entry| { + capturedElement = entry.key_ptr.*; + break; + } + _ = self.unmanaged.swapRemove(capturedElement); + return capturedElement; + } else { + return null; + } + } + + /// remove discards a single element from the Set + pub fn remove(self: *Self, element: E) bool { + return self.unmanaged.swapRemove(element); + } + + /// removesAll discards all elements passed from the other Set from + /// this Set + pub fn removeAll(self: *Self, other: Self) void { + var iter = other.iterator(); + while (iter.next()) |el| { + _ = self.unmanaged.swapRemove(el.key_ptr.*); + } + } + + /// removesAllSlice discards all elements passed as a slice from the Set + pub fn removeAllSlice(self: *Self, elements: []const E) void { + for (elements) |el| { + _ = self.unmanaged.swapRemove(el); + } + } + + /// symmetricDifferenceOf returns a new set with all elements which are + /// in either this set or the other set but not in both. + /// + /// The caller owns the newly allocated/returned Set. + pub fn symmetricDifferenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var sdSet = Self.init(); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + _ = try sdSet.add(allocator, entry.key_ptr.*); + } + } + + iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!self.unmanaged.contains(entry.key_ptr.*)) { + _ = try sdSet.add(allocator, entry.key_ptr.*); + } + } + + return sdSet; + } + + /// symmetricDifferenceUpdate does an in-place mutation with all elements + /// which are in either this set or the other set but not in both. + pub fn symmetricDifferenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + // In-place mutation invalidates iterators therefore a temp set is needed. + // So instead of a temp set, just invoke the regular full function which + // allocates and returns a set then swap out the map internally. + + // Also, this saves a step of not having to possibly discard many elements + // from the self set. + + // Just get a new set with the normal method. + const sd = try self.symmetricDifferenceOf(allocator, other); + + // Destroy the internal map. + self.unmanaged.deinit(allocator); + + // Swap it out with the new set. + self.unmanaged = sd.unmanaged; + } + + /// union returns a new set with all elements in both sets. + /// + /// The caller owns the newly allocated/returned Set. + pub fn unionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + // Sniff out larger set for capacity hint. + var n = self.unmanaged.count(); + if (other.unmanaged.count() > n) n = other.unmanaged.count(); + + var uSet = try Self.initCapacity( + allocator, + @intCast(n), + ); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try uSet.add(allocator, entry.key_ptr.*); + } + + iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try uSet.add(allocator, entry.key_ptr.*); + } + + return uSet; + } + + /// unionUpdate does an in-place union of the current Set and other Set. + /// + /// Allocations may occur. + pub fn unionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + var iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try self.add(allocator, entry.key_ptr.*); + } + } + }; +} + +const testing = std.testing; +const expect = std.testing.expect; +const expectEqual = std.testing.expectEqual; + +test "example usage" { + // Create a set of u32s called A + var A = ArraySetUnmanaged(u32).init(); + defer A.deinit(testing.allocator); + + // Add some data + _ = try A.add(testing.allocator, 5); + _ = try A.add(testing.allocator, 6); + _ = try A.add(testing.allocator, 7); + + // Add more data; single shot, duplicate data is ignored. + _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); + + // Create another set called B + var B = ArraySetUnmanaged(u32).init(); + defer B.deinit(testing.allocator); + + // Add data to B + _ = try B.appendSlice(testing.allocator, &.{ 50, 30, 20 }); + + // Get the union of A | B + var un = try A.unionOf(testing.allocator, B); + defer un.deinit(testing.allocator); + + const expectedCount = 9; + try expectEqual(expectedCount, un.cardinality()); + + // Grab an iterator and dump the contents. + var cnt: usize = 0; + var iter = un.iterator(); + while (iter.next()) |el| { + std.log.debug("element: {d}", .{el.key_ptr.*}); + cnt += 1; + } + + try expectEqual(expectedCount, cnt); +} + +test "string usage" { + var A = ArraySetUnmanaged([]const u8).init(); + defer A.deinit(testing.allocator); + + var B = ArraySetUnmanaged([]const u8).init(); + defer B.deinit(testing.allocator); + + _ = try A.add(testing.allocator, "Hello"); + _ = try B.add(testing.allocator, "World"); + + var C = try A.unionOf(testing.allocator, B); + defer C.deinit(testing.allocator); + try expectEqual(2, C.cardinality()); + try expect(C.containsAllSlice(&.{ "Hello", "World" })); +} + +test "comprehensive usage" { + var set = ArraySetUnmanaged(u32).init(); + defer set.deinit(testing.allocator); + + try expect(set.isEmpty()); + + _ = try set.add(testing.allocator, 8); + _ = try set.add(testing.allocator, 6); + _ = try set.add(testing.allocator, 7); + try expectEqual(set.cardinality(), 3); + + _ = try set.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); + + // Positive cases. + try expect(set.contains(8)); + try expect(set.containsAllSlice(&.{ 5, 3, 9 })); + try expect(set.containsAnySlice(&.{ 5, 55, 12 })); + + // Negative cases. + try expect(!set.contains(99)); + try expect(!set.containsAllSlice(&.{ 8, 6, 77 })); + try expect(!set.containsAnySlice(&.{ 99, 55, 44 })); + + try expectEqual(set.cardinality(), 7); + + var other = ArraySetUnmanaged(u32).init(); + defer other.deinit(testing.allocator); + + try expect(other.isEmpty()); + + _ = try other.add(testing.allocator, 8); + _ = try other.add(testing.allocator, 6); + _ = try other.add(testing.allocator, 7); + + _ = try other.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); + + try expect(set.eql(other)); + try expectEqual(other.cardinality(), 7); + + try expect(other.remove(8)); + try expectEqual(other.cardinality(), 6); + try expect(!other.remove(55)); + try expect(!set.eql(other)); + + other.removeAllSlice(&.{ 6, 7 }); + try expectEqual(other.cardinality(), 4); + + // intersectionOf + var inter = try set.intersectionOf(testing.allocator, other); + defer inter.deinit(testing.allocator); + try expect(!inter.isEmpty()); + try expectEqual(inter.cardinality(), 4); + try expect(inter.containsAllSlice(&.{ 5, 3, 0, 9 })); + + // Union + var un = try set.unionOf(testing.allocator, other); + defer un.deinit(testing.allocator); + try expect(!un.isEmpty()); + try expectEqual(un.cardinality(), 7); + try expect(un.containsAllSlice(&.{ 8, 6, 7, 5, 3, 0, 9 })); + + // differenceOf + var diff = try set.differenceOf(testing.allocator, other); + defer diff.deinit(testing.allocator); + try expect(!diff.isEmpty()); + try expectEqual(diff.cardinality(), 3); + try expect(diff.containsAllSlice(&.{ 8, 7, 6 })); + + // symmetricDifferenceOf + _ = try set.add(testing.allocator, 11111); + _ = try set.add(testing.allocator, 9999); + _ = try other.add(testing.allocator, 7777); + var symmDiff = try set.symmetricDifferenceOf(testing.allocator, other); + defer symmDiff.deinit(testing.allocator); + try expect(!symmDiff.isEmpty()); + try expectEqual(symmDiff.cardinality(), 6); + try expect(symmDiff.containsAllSlice(&.{ 7777, 11111, 8, 7, 6, 9999 })); + + // subsetOf + + // supersetOf +} + +test "isDisjoint" { + var a = ArraySetUnmanaged(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + var b = ArraySetUnmanaged(u32).init(); + defer b.deinit(testing.allocator); + _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); + + // Test the true case. + try expect(a.isDisjoint(b)); + try expect(b.isDisjoint(a)); + + // Test the false case. + var c = ArraySetUnmanaged(u32).init(); + defer c.deinit(testing.allocator); + _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); + + try expect(!a.isDisjoint(c)); + try expect(!c.isDisjoint(a)); +} + +test "clone" { + + // clone + var a = ArraySetUnmanaged(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + var b = try a.clone(testing.allocator); + defer b.deinit(testing.allocator); + + try expect(a.eql(b)); +} + +test "clear/capacity" { + var a = ArraySetUnmanaged(u32).init(); + defer a.deinit(testing.allocator); + + try expectEqual(0, a.cardinality()); + try expectEqual(0, a.capacity()); + + const cap = 99; + var b = try ArraySetUnmanaged(u32).initCapacity(testing.allocator, cap); + defer b.deinit(testing.allocator); + + try expectEqual(0, b.cardinality()); + try expect(b.capacity() >= cap); + + for (0..cap) |val| { + _ = try b.add(testing.allocator, @intCast(val)); + } + + try expectEqual(99, b.cardinality()); + try expect(b.capacity() >= cap); + + b.clearRetainingCapacity(); + + try expectEqual(0, b.cardinality()); + try expect(b.capacity() >= cap); + + b.clearAndFree(testing.allocator); + + try expectEqual(0, b.cardinality()); + try expectEqual(b.capacity(), 0); +} + +test "iterator" { + var a = ArraySetUnmanaged(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + var sum: u32 = 0; + var iterCount: usize = 0; + var iter = a.iterator(); + while (iter.next()) |el| { + sum += el.key_ptr.*; + iterCount += 1; + } + + try expectEqual(90, sum); + try expectEqual(3, iterCount); +} + +test "pop" { + var a = ArraySetUnmanaged(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + // No assumptions can be made about pop order. + while (a.pop()) |result| { + try expect(result == 20 or result == 30 or result == 40); + } + + // At this point, set must be empty. + try expectEqual(a.cardinality(), 0); + try expect(a.isEmpty()); + + // Lastly, pop should safely return null. + try expect(a.pop() == null); +} + +test "in-place methods" { + // intersectionUpdate + var a = ArraySetUnmanaged(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); + + var b = ArraySetUnmanaged(u32).init(); + defer b.deinit(testing.allocator); + _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); + + try a.intersectionUpdate(testing.allocator, b); + try expectEqual(a.cardinality(), 2); + try expect(a.containsAllSlice(&.{ 20, 30 })); + + // unionUpdate + var c = ArraySetUnmanaged(u32).init(); + defer c.deinit(testing.allocator); + _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); + + var d = ArraySetUnmanaged(u32).init(); + defer d.deinit(testing.allocator); + _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); + + try c.unionUpdate(testing.allocator, d); + try expectEqual(c.cardinality(), 6); + try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); + + // differenceUpdate + var e = ArraySetUnmanaged(u32).init(); + defer e.deinit(testing.allocator); + _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); + + var f = ArraySetUnmanaged(u32).init(); + defer f.deinit(testing.allocator); + _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); + + try e.differenceUpdate(testing.allocator, f); + + try expectEqual(1, e.cardinality()); + try expect(e.contains(11111)); + + // symmetricDifferenceUpdate + var g = ArraySetUnmanaged(u32).init(); + defer g.deinit(testing.allocator); + _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); + + var h = ArraySetUnmanaged(u32).init(); + defer h.deinit(testing.allocator); + _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); + + try g.symmetricDifferenceUpdate(testing.allocator, h); + + try expectEqual(10, g.cardinality()); + try expect(g.containsAllSlice(&.{ 1, 2, 11, 111, 22, 222, 1111, 333, 3333, 22222 })); +} + +test "removals" { + var a = ArraySetUnmanaged(u32).init(); + defer a.deinit(testing.allocator); + + _ = try a.appendSlice(testing.allocator, &.{ 5, 6, 7, 8 }); + _ = try a.appendSlice(testing.allocator, &.{ 50, 60, 70, 80 }); + _ = try a.appendSlice(testing.allocator, &.{ 111, 222, 333, 444 }); + + try expectEqual(12, a.cardinality()); + + try expect(a.remove(5)); + try expect(a.remove(6)); + try expect(a.remove(7)); + try expect(a.remove(8)); + + try expectEqual(8, a.cardinality()); + + a.removeAllSlice(&.{ 50, 60, 70, 80 }); + try expectEqual(4, a.cardinality()); + + var b = ArraySetUnmanaged(u32).init(); + defer b.deinit(testing.allocator); + + _ = try b.appendSlice(testing.allocator, &.{ 111, 222, 333, 444 }); + a.removeAll(b); + + try expectEqual(0, a.cardinality()); +} + +test "sizeOf matches" { + // No bloat guarantee, after all we're just building on top of what's good. + const expectedByteSize = 40; + try expectEqual(expectedByteSize, @sizeOf(std.array_hash_map.AutoArrayHashMapUnmanaged(u32, void))); + try expectEqual(expectedByteSize, @sizeOf(ArraySetUnmanaged(u32))); +} diff --git a/src/hash_set.zig b/src/hash_set.zig new file mode 100644 index 0000000..a5bdf82 --- /dev/null +++ b/src/hash_set.zig @@ -0,0 +1,997 @@ +/// Open Source Initiative OSI - The MIT License (MIT):Licensing +/// The MIT License (MIT) +/// Copyright (c) 2025 Ralph Caraveo (deckarep@gmail.com) +/// Permission is hereby granted, free of charge, to any person obtaining a copy of +/// this software and associated documentation files (the "Software"), to deal in +/// the Software without restriction, including without limitation the rights to +/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +/// of the Software, and to permit persons to whom the Software is furnished to do +/// so, subject to the following conditions: +/// The above copyright notice and this permission notice shall be included in all +/// copies or substantial portions of the Software. +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +/// SOFTWARE. +/// +/// +const std = @import("std"); +const mem = std.mem; +const math = std.math; +const Allocator = mem.Allocator; + +/// comptime selection of the map type for string vs everything else. +fn selectMap(comptime E: type) type { + comptime { + if (E == []const u8) { + return std.StringHashMapUnmanaged(void); + } else { + return std.AutoHashMapUnmanaged(E, void); + } + } +} + +/// Select a context-aware hash map type +fn selectMapWithContext(comptime E: type, comptime Context: type, comptime max_load_percentage: u8) type { + return std.HashMapUnmanaged(E, void, Context, max_load_percentage); +} + +/// HashSet is an implementation of a Set where there is no internal +/// allocator and all allocating methods require a first argument allocator. +/// This is a more compact Set built on top of the the HashMap +/// datastructure. +/// Note that max_load_percentage defaults to undefined, because the underlying +/// std.AutoHashMap/std.StringHashMap defaults are used. +pub fn HashSet(comptime E: type) type { + return HashSetWithContext(E, void, undefined); +} + +/// HashSetWithContext creates a set based on element type E with custom hashing behavior. +/// This variant allows specifying: +/// - A Context type that implements hash() and eql() functions for custom element hashing +/// - A max_load_percentage (1-100) that controls hash table resizing +/// If Context is undefined, then max_load_percentage is ignored. +/// +/// The Context type must provide: +/// fn hash(self: Context, key: K) u64 +/// fn eql(self: Context, a: K, b: K) bool +pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max_load_percentage: u8) type { + return struct { + /// The type of the internal hash map + pub const Map = if (Context == void) selectMap(E) else selectMapWithContext(E, Context, max_load_percentage); + + unmanaged: Map, + context: if (Context == void) void else Context = if (Context == void) {} else undefined, + max_load_percentage: if (Context == void) void else u8 = if (Context == void) {} else max_load_percentage, + + pub const Size = Map.Size; + /// The iterator type returned by iterator(), key-only for sets + pub const Iterator = Map.KeyIterator; + + const Self = @This(); + + /// Initialize a default set without context + pub fn init() Self { + return .{ + .unmanaged = Map{}, + .context = if (Context == void) {} else undefined, + .max_load_percentage = if (Context == void) {} else max_load_percentage, + }; + } + + /// Initialize with a custom context + pub fn initContext(context: Context) Self { + return .{ + .unmanaged = Map{}, + .context = context, + .max_load_percentage = max_load_percentage, + }; + } + + /// Initialzies a Set using a capacity hint, with the given Allocator + pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { + var self = Self.init(); + try self.unmanaged.ensureTotalCapacity(allocator, num); + return self; + } + + /// Destroys the unmanaged Set. + pub fn deinit(self: *Self, allocator: Allocator) void { + self.unmanaged.deinit(allocator); + self.* = undefined; + } + + pub fn add(self: *Self, allocator: Allocator, element: E) Allocator.Error!bool { + const prevCount = self.unmanaged.count(); + try self.unmanaged.put(allocator, element, {}); + return prevCount != self.unmanaged.count(); + } + + /// Adds a single element to the set. Asserts that there is enough capacity. + /// A bool is returned indicating if the element was actually added + /// if not already known. + pub fn addAssumeCapacity(self: *Self, element: E) bool { + const prevCount = self.unmanaged.count(); + self.unmanaged.putAssumeCapacity(element, {}); + return prevCount != self.unmanaged.count(); + } + + /// Appends all elements from the provided set, and may allocate. + /// append returns an Allocator.Error or Size which represents how + /// many elements added and not previously in the Set. + pub fn append(self: *Self, allocator: Allocator, other: Self) Allocator.Error!Size { + const prevCount = self.unmanaged.count(); + + try self.unionUpdate(allocator, other); + return self.unmanaged.count() - prevCount; + } + + /// Appends all elements from the provided slice, and may allocate. + /// appendSlice returns an Allocator.Error or Size which represents how + /// many elements added and not previously in the slice. + pub fn appendSlice(self: *Self, allocator: Allocator, elements: []const E) Allocator.Error!Size { + const prevCount = self.unmanaged.count(); + for (elements) |el| { + try self.unmanaged.put(allocator, el, {}); + } + return self.unmanaged.count() - prevCount; + } + + /// Returns the number of total elements which may be present before + /// it is no longer guaranteed that no allocations will be performed. + pub fn capacity(self: Self) Size { + return self.unmanaged.capacity(); + } + + /// Cardinality effectively returns the size of the set. + pub fn cardinality(self: Self) Size { + return self.unmanaged.count(); + } + + /// Invalidates all element pointers. + pub fn clearAndFree(self: *Self, allocator: Allocator) void { + self.unmanaged.clearAndFree(allocator); + } + + /// Invalidates all element pointers. + pub fn clearRetainingCapacity(self: *Self) void { + self.unmanaged.clearRetainingCapacity(); + } + + /// Creates a copy of this set, using the same allocator. + /// clone may return an Allocator.Error or the cloned Set. + pub fn clone(self: *Self, allocator: Allocator) Allocator.Error!Self { + // Take a stack copy of self. + var cloneSelf = self.*; + // Clone the interal map. + cloneSelf.unmanaged = try self.unmanaged.clone(allocator); + return cloneSelf; + } + + /// Returns true when the provided element exists within the Set otherwise false. + pub fn contains(self: Self, element: E) bool { + return self.unmanaged.contains(element); + } + + /// Returns true when all elements in the other Set are present in this Set + /// otherwise false. + pub fn containsAll(self: Self, other: Self) bool { + var iter = other.iterator(); + while (iter.next()) |el| { + if (!self.unmanaged.contains(el.*)) { + return false; + } + } + return true; + } + + /// Returns true when all elements in the provided slice are present otherwise false. + pub fn containsAllSlice(self: Self, elements: []const E) bool { + for (elements) |el| { + if (!self.unmanaged.contains(el)) { + return false; + } + } + return true; + } + + /// Returns true when at least one or more elements from the other Set exist within + /// this Set otherwise false. + pub fn containsAny(self: Self, other: Self) bool { + var iter = other.iterator(); + while (iter.next()) |el| { + if (self.unmanaged.contains(el.*)) { + return true; + } + } + return false; + } + + /// Returns true when at least one or more elements from the slice exist within + /// this Set otherwise false. + pub fn containsAnySlice(self: Self, elements: []const E) bool { + for (elements) |el| { + if (self.unmanaged.contains(el)) { + return true; + } + } + return false; + } + + /// differenceOf returns the difference between this set + /// and other. The returned set will contain + /// all elements of this set that are not also + /// elements of the other. + /// + /// Caller owns the newly allocated/returned set. + pub fn differenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var diffSet = Self.init(); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + _ = try diffSet.add(allocator, entry.key_ptr.*); + } + } + return diffSet; + } + + /// differenceUpdate does an in-place mutation of this set + /// and other. This set will contain all elements of this set that are not + /// also elements of other. + pub fn differenceUpdate(self: *Self, other: Self) Allocator.Error!void { + var iter = other.iterator(); + + while (iter.next()) |key_ptr| { + _ = self.remove(key_ptr.*); + } + } + + fn dump(self: Self) void { + std.log.err("\ncardinality: {d}\n", .{self.cardinality()}); + var iter = self.iterator(); + while (iter.next()) |el| { + std.log.err(" element: {d}\n", .{el.*}); + } + } + + /// Increases capacity, guaranteeing that insertions up until the + /// `expected_count` will not cause an allocation, and therefore cannot fail. + pub fn ensureTotalCapacity(self: *Self, allocator: Allocator, expected_count: Size) Allocator.Error!void { + return self.unmanaged.ensureTotalCapacity(allocator, expected_count); + } + + /// Increases capacity, guaranteeing that insertions up until + /// `additional_count` **more** items will not cause an allocation, and + /// therefore cannot fail. + pub fn ensureUnusedCapacity(self: *Self, allocator: Allocator, additional_count: Size) Allocator.Error!void { + return self.unmanaged.ensureUnusedCapacity(allocator, additional_count); + } + + /// eql determines if two sets are equal to each + /// other. If they have the same cardinality + /// and contain the same elements, they are + /// considered equal. The order in which + /// the elements were added is irrelevant. + pub fn eql(self: Self, other: Self) bool { + // First discriminate on cardinalities of both sets. + if (self.unmanaged.count() != other.unmanaged.count()) { + return false; + } + + // Now check for each element one for one and exit early + // on the first non-match. + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + return false; + } + } + + return true; + } + + /// intersectionOf returns a new set containing only the elements + /// that exist only in both sets. + /// + /// Caller owns the newly allocated/returned set. + pub fn intersectionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var interSet = Self.init(); + + // Optimization: iterate over whichever set is smaller. + // Matters when disparity in cardinality is large. + var s = other; + var o = self; + if (self.unmanaged.count() < other.unmanaged.count()) { + s = self; + o = other; + } + + var iter = s.unmanaged.iterator(); + while (iter.next()) |entry| { + if (o.unmanaged.contains(entry.key_ptr.*)) { + _ = try interSet.add(allocator, entry.key_ptr.*); + } + } + + return interSet; + } + + /// intersectionUpdate does an in-place intersecting update + /// to the current set from the other set keeping only + /// elements found in this Set and the other Set. + pub fn intersectionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + var to_remove: std.ArrayList(E) = .empty; + defer to_remove.deinit(allocator); + + var iter = self.iterator(); + while (iter.next()) |key_ptr| { + // Dereference key_ptr when checking! + if (!other.contains(key_ptr.*)) { + // Dereference key_ptr when appending! + try to_remove.append(allocator, key_ptr.*); + } + } + + for (to_remove.items) |item| { + // 'item' is already type E, so no .* is needed here! + _ = self.remove(item); + } } + + /// isDisjoint returns true if the intersection between two sets is the null set. + /// Otherwise returns false. + pub fn isDisjoint(self: Self, other: Self) bool { + // Optimization: Find the smaller of the two, and iterate over the smaller set + const smaller = if (self.cardinality() <= other.cardinality()) self else other; + const larger = if (self.cardinality() <= other.cardinality()) other else self; + + var iter = smaller.iterator(); + while (iter.next()) |el| { + if (larger.contains(el.*)) { + return false; + } + } + return true; + } + + pub fn isEmpty(self: Self) bool { + return self.unmanaged.count() == 0; + } + + /// Create an iterator over the elements in the set. + /// The iterator is invalidated if the set is modified during iteration. + pub fn iterator(self: Self) Iterator { + return self.unmanaged.keyIterator(); + } + + /// properSubsetOf determines if every element in this set is in + /// the other set but the two sets are not equal. + pub fn properSubsetOf(self: Self, other: Self) bool { + return self.unmanaged.count() < other.unmanaged.count() and self.subsetOf(other); + } + + /// properSupersetOf determines if every element in the other set + /// is in this set but the two sets are not equal. + pub fn properSupersetOf(self: Self, other: Self) bool { + return self.unmanaged.count() > other.unmanaged.count() and self.supersetOf(other); + } + + /// subsetOf determines if every element in this set is in + /// the other set. + pub fn subsetOf(self: Self, other: Self) bool { + // First discriminate on cardinalties of both sets. + if (self.unmanaged.count() > other.unmanaged.count()) { + return false; + } + + // Now check that self set has at least some elements from other. + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + return false; + } + } + + return true; + } + + /// subsetOf determines if every element in the other Set is in + /// the this Set. + pub fn supersetOf(self: Self, other: Self) bool { + // This is just the converse of subsetOf. + return other.subsetOf(self); + } + + /// pop removes and returns an arbitrary ?E from the set. + /// Order is not guaranteed. + /// This safely returns null if the Set is empty. + pub fn pop(self: *Self) ?E { + if (self.unmanaged.count() > 0) { + var iter = self.unmanaged.iterator(); + // NOTE: No in-place mutation as it invalidates live iterators. + // So a temporary capture is taken. + var capturedElement: E = undefined; + while (iter.next()) |entry| { + capturedElement = entry.key_ptr.*; + break; + } + _ = self.unmanaged.remove(capturedElement); + return capturedElement; + } else { + return null; + } + } + + /// remove discards a single element from the Set + pub fn remove(self: *Self, element: E) bool { + return self.unmanaged.remove(element); + } + + /// removesAll discards all elements passed from the other Set from + /// this Set + pub fn removeAll(self: *Self, other: Self) void { + var iter = other.iterator(); + while (iter.next()) |el| { + _ = self.unmanaged.remove(el); + } + } + + /// removesAllSlice discards all elements passed as a slice from the Set + pub fn removeAllSlice(self: *Self, elements: []const E) void { + for (elements) |el| { + _ = self.unmanaged.remove(el); + } + } + + /// symmetricDifferenceOf returns a new set with all elements which are + /// in either this set or the other set but not in both. + /// + /// The caller owns the newly allocated/returned Set. + pub fn symmetricDifferenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + var sdSet = Self.init(); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!other.unmanaged.contains(entry.key_ptr.*)) { + _ = try sdSet.add(allocator, entry.key_ptr.*); + } + } + + iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + if (!self.unmanaged.contains(entry.key_ptr.*)) { + _ = try sdSet.add(allocator, entry.key_ptr.*); + } + } + + return sdSet; + } + + /// symmetricDifferenceUpdate does an in-place mutation with all elements + /// which are in either this set or the other set but not in both. + pub fn symmetricDifferenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + var iter = other.iterator(); + while (iter.next()) |key_ptr| { + const element = key_ptr.*; // Extract it once up front + + if (self.contains(element)) { + _ = self.remove(element); + } else { + _ = try self.add(allocator, element); + } + } + } + + /// union returns a new set with all elements in both sets. + /// + /// The caller owns the newly allocated/returned Set. + pub fn unionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { + // Sniff out larger set for capacity hint. + var n = self.unmanaged.count(); + if (other.unmanaged.count() > n) n = other.unmanaged.count(); + + var uSet = try Self.initCapacity( + allocator, + @intCast(n), + ); + + var iter = self.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try uSet.add(allocator, entry.key_ptr.*); + } + + iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try uSet.add(allocator, entry.key_ptr.*); + } + + return uSet; + } + + /// unionUpdate does an in-place union of the current Set and other Set. + /// + /// Allocations may occur. + pub fn unionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { + var iter = other.unmanaged.iterator(); + while (iter.next()) |entry| { + _ = try self.add(allocator, entry.key_ptr.*); + } + } + }; +} + +const testing = std.testing; +const expect = std.testing.expect; +const expectEqual = std.testing.expectEqual; + +test "example usage" { + // Create a set of u32s called A + var A = HashSet(u32).init(); + defer A.deinit(testing.allocator); + + // Add some data + _ = try A.add(testing.allocator, 5); + _ = try A.add(testing.allocator, 6); + _ = try A.add(testing.allocator, 7); + + // Add more data; single shot, duplicate data is ignored. + _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); + + // Create another set called B + var B = HashSet(u32).init(); + defer B.deinit(testing.allocator); + + // Add data to B + _ = try B.appendSlice(testing.allocator, &.{ 50, 30, 20 }); + + // // Get the union of A | B + var un = try A.unionOf(testing.allocator, B); + defer un.deinit(testing.allocator); + + try expectEqual(9, un.cardinality()); + + // Grab an iterator and dump the contents. + var iter = un.iterator(); + while (iter.next()) |el| { + std.log.debug("element: {d}", .{el.*}); + } +} + +test "string usage" { + var A = HashSet([]const u8).init(); + defer A.deinit(testing.allocator); + + var B = HashSet([]const u8).init(); + defer B.deinit(testing.allocator); + + _ = try A.add(testing.allocator, "Hello"); + _ = try B.add(testing.allocator, "World"); + + var C = try A.unionOf(testing.allocator, B); + defer C.deinit(testing.allocator); + try expectEqual(2, C.cardinality()); + try expect(C.containsAllSlice(&.{ "Hello", "World" })); +} + +test "comprehensive usage" { + var set = HashSet(u32).init(); + defer set.deinit(testing.allocator); + + try expect(set.isEmpty()); + + _ = try set.add(testing.allocator, 8); + _ = try set.add(testing.allocator, 6); + _ = try set.add(testing.allocator, 7); + try expectEqual(set.cardinality(), 3); + + _ = try set.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); + + // Positive cases. + try expect(set.contains(8)); + try expect(set.containsAllSlice(&.{ 5, 3, 9 })); + try expect(set.containsAnySlice(&.{ 5, 55, 12 })); + + // Negative cases. + try expect(!set.contains(99)); + try expect(!set.containsAllSlice(&.{ 8, 6, 77 })); + try expect(!set.containsAnySlice(&.{ 99, 55, 44 })); + + try expectEqual(set.cardinality(), 7); + + var other = HashSet(u32).init(); + defer other.deinit(testing.allocator); + + try expect(other.isEmpty()); + + _ = try other.add(testing.allocator, 8); + _ = try other.add(testing.allocator, 6); + _ = try other.add(testing.allocator, 7); + + _ = try other.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); + + try expect(set.eql(other)); + try expectEqual(other.cardinality(), 7); + + try expect(other.remove(8)); + try expectEqual(other.cardinality(), 6); + try expect(!other.remove(55)); + try expect(!set.eql(other)); + + other.removeAllSlice(&.{ 6, 7 }); + try expectEqual(other.cardinality(), 4); + + // intersectionOf + var inter = try set.intersectionOf(testing.allocator, other); + defer inter.deinit(testing.allocator); + try expect(!inter.isEmpty()); + try expectEqual(inter.cardinality(), 4); + try expect(inter.containsAllSlice(&.{ 5, 3, 0, 9 })); + + // Union + var un = try set.unionOf(testing.allocator, other); + defer un.deinit(testing.allocator); + try expect(!un.isEmpty()); + try expectEqual(un.cardinality(), 7); + try expect(un.containsAllSlice(&.{ 8, 6, 7, 5, 3, 0, 9 })); + + // differenceOf + var diff = try set.differenceOf(testing.allocator, other); + defer diff.deinit(testing.allocator); + try expect(!diff.isEmpty()); + try expectEqual(diff.cardinality(), 3); + try expect(diff.containsAllSlice(&.{ 8, 7, 6 })); + + // symmetricDifferenceOf + _ = try set.add(testing.allocator, 11111); + _ = try set.add(testing.allocator, 9999); + _ = try other.add(testing.allocator, 7777); + var symmDiff = try set.symmetricDifferenceOf(testing.allocator, other); + defer symmDiff.deinit(testing.allocator); + try expect(!symmDiff.isEmpty()); + try expectEqual(symmDiff.cardinality(), 6); + try expect(symmDiff.containsAllSlice(&.{ 7777, 11111, 8, 7, 6, 9999 })); + + // subsetOf + + // supersetOf +} + +test "isDisjoint" { + var a = HashSet(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + var b = HashSet(u32).init(); + defer b.deinit(testing.allocator); + _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); + + // Test the true case. + try expect(a.isDisjoint(b)); + try expect(b.isDisjoint(a)); + + // Test the false case. + var c = HashSet(u32).init(); + defer c.deinit(testing.allocator); + _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); + + try expect(!a.isDisjoint(c)); + try expect(!c.isDisjoint(a)); +} + +test "clone" { + + // clone + var a = HashSet(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + var b = try a.clone(testing.allocator); + defer b.deinit(testing.allocator); + + try expect(a.eql(b)); +} + +test "clear/capacity" { + var a = HashSet(u32).init(); + defer a.deinit(testing.allocator); + + try expectEqual(0, a.cardinality()); + try expectEqual(0, a.capacity()); + + const cap = 99; + var b = try HashSet(u32).initCapacity(testing.allocator, cap); + defer b.deinit(testing.allocator); + + try expectEqual(0, b.cardinality()); + try expect(b.capacity() >= cap); + + for (0..cap) |val| { + _ = try b.add(testing.allocator, @intCast(val)); + } + + try expectEqual(99, b.cardinality()); + try expect(b.capacity() >= cap); + + b.clearRetainingCapacity(); + + try expectEqual(0, b.cardinality()); + try expect(b.capacity() >= cap); + + b.clearAndFree(testing.allocator); + + try expectEqual(0, b.cardinality()); + try expectEqual(b.capacity(), 0); +} + +test "iterator" { + var a = HashSet(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + var sum: u32 = 0; + var iterCount: usize = 0; + var iter = a.iterator(); + while (iter.next()) |el| { + sum += el.*; + iterCount += 1; + } + + try expectEqual(90, sum); + try expectEqual(3, iterCount); +} + +test "pop" { + var a = HashSet(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); + + // No assumptions can be made about pop order. + while (a.pop()) |result| { + try expect(result == 20 or result == 30 or result == 40); + } + + // At this point, set must be empty. + try expectEqual(a.cardinality(), 0); + try expect(a.isEmpty()); + + // Lastly, pop should safely return null. + try expect(a.pop() == null); +} + +test "in-place methods" { + // intersectionUpdate + var a = HashSet(u32).init(); + defer a.deinit(testing.allocator); + _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); + + var b = HashSet(u32).init(); + defer b.deinit(testing.allocator); + _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); + + try a.intersectionUpdate(testing.allocator, b); + try expectEqual(a.cardinality(), 2); + try expect(a.containsAllSlice(&.{ 20, 30 })); + + // unionUpdate + var c = HashSet(u32).init(); + defer c.deinit(testing.allocator); + _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); + + var d = HashSet(u32).init(); + defer d.deinit(testing.allocator); + _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); + + try c.unionUpdate(testing.allocator, d); + try expectEqual(c.cardinality(), 6); + try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); + + // differenceUpdate + var e = HashSet(u32).init(); + defer e.deinit(testing.allocator); + _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); + + var f = HashSet(u32).init(); + defer f.deinit(testing.allocator); + _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); + + try e.differenceUpdate(f); + + try expectEqual(1, e.cardinality()); + try expect(e.contains(11111)); + + // symmetricDifferenceUpdate + var g = HashSet(u32).init(); + defer g.deinit(testing.allocator); + _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); + + var h = HashSet(u32).init(); + defer h.deinit(testing.allocator); + _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); + + try g.symmetricDifferenceUpdate(testing.allocator, h); + + try expectEqual(10, g.cardinality()); + try expect(g.containsAllSlice(&.{ 1, 2, 11, 111, 22, 222, 1111, 333, 3333, 22222 })); +} + +test "sizeOf matches" { + // No bloat guarantee, after all we're just building on top of what's good. + // "What's good Miley!?!?"" + const expectedByteSize = 24; + const autoHashMapSize = @sizeOf(std.hash_map.AutoHashMapUnmanaged(u32, void)); + const hashSetSize = @sizeOf(HashSet(u32)); + try expectEqual(expectedByteSize, autoHashMapSize); + try expectEqual(expectedByteSize, hashSetSize); + + // The unmanaged with void context must be the same size as the unmanaged. + // The unmanaged with context must be larger by the size of the empty Context struct, + // due to the added Context and alignment padding. + const expectedContextDiff = 8; + const hashSetWithVoidContextSize = @sizeOf(HashSetWithContext(u32, void, undefined)); + const hashSetWithContextSize = @sizeOf(HashSetWithContext(u32, TestContext, 75)); + try expectEqual(0, hashSetWithVoidContextSize - hashSetSize); + try expectEqual(expectedContextDiff, hashSetWithContextSize - hashSetSize); +} + +const TestContext = struct { + const Self = @This(); + pub fn hash(_: Self, key: u32) u64 { + return @as(u64, key) *% 0x517cc1b727220a95; + } + pub fn eql(_: Self, a: u32, b: u32) bool { + return a == b; + } +}; + +test "custom hash function comprehensive" { + const context = TestContext{}; + var set = HashSetWithContext(u32, TestContext, 75).initContext(context); + defer set.deinit(testing.allocator); + + // Test basic operations + _ = try set.add(testing.allocator, 123); + _ = try set.add(testing.allocator, 456); + try expect(set.contains(123)); + try expect(set.contains(456)); + try expect(!set.contains(789)); + try expectEqual(set.cardinality(), 2); + + // Test clone with custom context + var cloned = try set.clone(testing.allocator); + defer cloned.deinit(testing.allocator); + try expect(cloned.contains(123)); + try expect(set.eql(cloned)); + + // Test set operations with custom context + var other = HashSetWithContext(u32, TestContext, 75).initContext(context); + defer other.deinit(testing.allocator); + _ = try other.add(testing.allocator, 456); + _ = try other.add(testing.allocator, 789); + + // Test union + var union_set = try set.unionOf(testing.allocator, other); + defer union_set.deinit(testing.allocator); + try expectEqual(union_set.cardinality(), 3); + try expect(union_set.containsAllSlice(&.{ 123, 456, 789 })); + + // Test intersection + var intersection = try set.intersectionOf(testing.allocator, other); + defer intersection.deinit(testing.allocator); + try expectEqual(intersection.cardinality(), 1); + try expect(intersection.contains(456)); + + // Test difference + var difference = try set.differenceOf(testing.allocator, other); + defer difference.deinit(testing.allocator); + try expectEqual(difference.cardinality(), 1); + try expect(difference.contains(123)); + + // Test symmetric difference + var sym_diff = try set.symmetricDifferenceOf(testing.allocator, other); + defer sym_diff.deinit(testing.allocator); + try expectEqual(sym_diff.cardinality(), 2); + try expect(sym_diff.containsAllSlice(&.{ 123, 789 })); + + // Test in-place operations + try set.unionUpdate(testing.allocator, other); + try expectEqual(set.cardinality(), 3); + try expect(set.containsAllSlice(&.{ 123, 456, 789 })); +} + +test "custom hash function with different load factors" { + const context = TestContext{}; + + // Test with low load factor + var low_load = HashSetWithContext(u32, TestContext, 25).initContext(context); + defer low_load.deinit(testing.allocator); + + // Test with high load factor + var high_load = HashSetWithContext(u32, TestContext, 90).initContext(context); + defer high_load.deinit(testing.allocator); + + // Add same elements to both + for (0..100) |i| { + _ = try low_load.add(testing.allocator, @intCast(i)); + _ = try high_load.add(testing.allocator, @intCast(i)); + } + + // Verify functionality is identical despite different load factors + try expectEqual(low_load.cardinality(), high_load.cardinality()); + try expect(low_load.capacity() != high_load.capacity()); // Should be different due to load factors + + // Verify both sets contain the same elements + for (0..100) |i| { + const val: u32 = @intCast(i); + try expect(low_load.contains(val) and high_load.contains(val)); + } +} + +test "custom hash function error cases" { + const context = TestContext{}; + var set = HashSetWithContext(u32, TestContext, 75).initContext(context); + defer set.deinit(testing.allocator); + + // Test allocation failures + var failing_allocator = std.testing.FailingAllocator.init(testing.allocator, .{ .fail_index = 0 }); + try std.testing.expectError(error.OutOfMemory, set.add(failing_allocator.allocator(), 123)); +} + +// String context for testing string usage with custom hash function +const StringContext = struct { + pub fn hash(self: @This(), str: []const u8) u64 { + _ = self; + // Simple FNV-1a hash + var h: u64 = 0xcbf29ce484222325; + for (str) |b| { + h = (h ^ b) *% 0x100000001b3; + } + return h; + } + + pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { + _ = self; + return std.mem.eql(u8, a, b); + } +}; + +test "custom hash function string usage" { + const context = StringContext{}; + var A = HashSetWithContext([]const u8, StringContext, 75).initContext(context); + defer A.deinit(testing.allocator); + + var B = HashSetWithContext([]const u8, StringContext, 75).initContext(context); + defer B.deinit(testing.allocator); + + _ = try A.add(testing.allocator, "Hello"); + _ = try B.add(testing.allocator, "World"); + + var C = try A.unionOf(testing.allocator, B); + defer C.deinit(testing.allocator); + try expectEqual(2, C.cardinality()); + try expect(C.containsAllSlice(&.{ "Hello", "World" })); + + // Test string-specific behavior + try expect(A.contains("Hello")); + try expect(!A.contains("hello")); // Case sensitive + try expect(!A.contains("Hell")); // Prefix doesn't match + try expect(!A.contains("Hello ")); // Trailing space matters + + // Test with longer strings + _ = try A.add(testing.allocator, "This is a longer string to test hash collisions"); + _ = try A.add(testing.allocator, "This is another longer string to test hash collisions"); + try expectEqual(3, A.cardinality()); + + // Test with empty string + _ = try A.add(testing.allocator, ""); + try expect(A.contains("")); + try expectEqual(4, A.cardinality()); + + // Test with strings containing special characters + _ = try A.add(testing.allocator, "Hello\n"); + _ = try A.add(testing.allocator, "Hello\r"); + _ = try A.add(testing.allocator, "Hello\t"); + try expectEqual(7, A.cardinality()); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..8d05ad8 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,67 @@ +const std = @import("std"); +const set = @import("root.zig"); + +pub fn main(init: std.process.Init) !void { + + const gpa = init.gpa; + + const repetitions: usize = 1000; + const times = try gpa.alloc(i64, repetitions); + defer gpa.free(times); + + const upper: u32 = 100000; + var B = set.Set(u32).init(); + defer B.deinit(gpa); + + for (0..@divExact(upper, 2)) |i| { + const e: u32 = @intCast(i); + _ = try B.add(gpa, e); + } + + std.debug.print("starting benchkmark\n", .{}); + for (0..repetitions) |i| { + + var A = set.Set(u32).init(); + defer A.deinit(gpa); + + for (0..upper) |j| { + const e: u32 = @intCast(j); + _ = try A.add(gpa, @as(u32, e)); + } + + const startTime = std.Io.Timestamp.now(init.io, .awake); + _ = try A.differenceUpdate(B); + const elapsedTime = startTime.untilNow(init.io, .awake); + + times[i] = elapsedTime.toMilliseconds(); + } + + const stats: Stats = Stats.calculateFromData(times); + + std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Avg Time (ms)", stats.mean, stats.ci }); +} + +pub const Stats = struct { + mean: f64, + ci: f64, + + pub fn calculateFromData(data: []i64) Stats { + var sum: i64 = 0; + for (data) |v| sum += v; + const mean: f64 = @as(f64, @floatFromInt(sum)) / @as(f64, @floatFromInt(data.len)); + + var sum_sq_diff: f64 = 0.0; + for (data) |v| { + const diff = @as(f64, @floatFromInt(v)) - mean; + sum_sq_diff += diff * diff; + } + + const variance = sum_sq_diff / @as(f64, @floatFromInt(data.len - 1)); + const std_dev = std.math.sqrt(variance); + + const margin_error = 1.96 * (std_dev / std.math.sqrt(@as(f64, @floatFromInt(data.len)))); + + return Stats{ .mean = mean, .ci = margin_error }; + } +}; + diff --git a/src/root.zig b/src/root.zig index fd94b70..9d7e520 100644 --- a/src/root.zig +++ b/src/root.zig @@ -18,27 +18,20 @@ /// SOFTWARE. /// /// + /// Set is just a short convenient "default" alias. If you don't know /// which to pick, just use Set. -pub const Set = HashSetManaged; +pub const Set = HashSet; /// HashSetUnmanaged is a conveniently exported "unmanaged" version of a hash-based Set. /// This Hash-based is optmized for lookups. -pub const HashSetUnmanaged = @import("hash_set/unmanaged.zig").HashSetUnmanaged; - -/// HashSetManaged is a conveniently exported "managed" version of a hash_based Set. -pub const HashSetManaged = @import("hash_set/managed.zig").HashSetManaged; - -/// ArraySetUnmanaged is a conveniently exported "unmanaged" version of an array-based Set. -/// This is a bit more specialized and optimized for heavy iteration. -pub const ArraySetUnmanaged = @import("array_hash_set/unmanaged.zig").ArraySetUnmanaged; +pub const HashSet = @import("hash_set.zig").HashSet; -/// ArraySetManaged is a conveniently exported "managed" version of an array-based Set. -pub const ArraySetManaged = @import("array_hash_set/managed.zig").ArraySetManaged; +// /// ArraySetUnmanaged is a conveniently exported "unmanaged" version of an array-based Set. +// /// This is a bit more specialized and optimized for heavy iteration. +// pub const ArraySet = @import("array_hash_set.zig").ArraySetUnmanaged; test "tests" { - _ = @import("hash_set/unmanaged.zig"); - _ = @import("hash_set/managed.zig"); - _ = @import("array_hash_set/unmanaged.zig"); - _ = @import("array_hash_set/managed.zig"); + _ = @import("hash_set.zig"); + // _ = @import("array_hash_set.zig"); } From 7baf0a44d6135e0cd1c12f64853c99960c2587ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pau=20Soler=20Valad=C3=A9s?= Date: Sat, 21 Feb 2026 03:47:34 +0100 Subject: [PATCH 2/5] Started with the bitset implementation --- src/array_hash_set/managed.zig | 863 ---------------- src/array_hash_set/unmanaged.zig | 826 ---------------- .../unmanaged.zig => dynamic_bit_set.zig} | 245 ++--- src/hash_set.zig | 8 +- src/hash_set/managed.zig | 925 ------------------ src/main.zig | 54 +- 6 files changed, 121 insertions(+), 2800 deletions(-) delete mode 100644 src/array_hash_set/managed.zig delete mode 100644 src/array_hash_set/unmanaged.zig rename src/{hash_set/unmanaged.zig => dynamic_bit_set.zig} (77%) delete mode 100644 src/hash_set/managed.zig diff --git a/src/array_hash_set/managed.zig b/src/array_hash_set/managed.zig deleted file mode 100644 index b71bb84..0000000 --- a/src/array_hash_set/managed.zig +++ /dev/null @@ -1,863 +0,0 @@ -/// Open Source Initiative OSI - The MIT License (MIT):Licensing -/// The MIT License (MIT) -/// Copyright (c) 2025 Ralph Caraveo (deckarep@gmail.com) -/// Permission is hereby granted, free of charge, to any person obtaining a copy of -/// this software and associated documentation files (the "Software"), to deal in -/// the Software without restriction, including without limitation the rights to -/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -/// of the Software, and to permit persons to whom the Software is furnished to do -/// so, subject to the following conditions: -/// The above copyright notice and this permission notice shall be included in all -/// copies or substantial portions of the Software. -/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -/// SOFTWARE. -/// -/// -const std = @import("std"); -const mem = std.mem; -const Allocator = mem.Allocator; -const ArraySetUnmanaged = @import("unmanaged.zig").ArraySetUnmanaged; - -pub fn ArraySetManaged(comptime E: type) type { - return struct { - allocator: Allocator, - - unmanaged: Set, - - /// The type of the internal array hash map - pub const Set = ArraySetUnmanaged(E); - - /// The integer type used to store the size of the map, borrowed from map - pub const Size = Set.Size; - /// The iterator type returned by iterator(), key-only for sets - pub const Iterator = Set.Iterator; - - const Self = @This(); - - /// Initialzies a Set with the given Allocator - pub fn init(allocator: std.mem.Allocator) Self { - return .{ - .allocator = allocator, - .unmanaged = Set.init(), - }; - } - - /// Initialzies a Set using a capacity hint, with the given Allocator - pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { - var self = Self.init(allocator); - self.allocator = allocator; - try self.unmanaged.ensureTotalCapacity(allocator, num); - return self; - } - - /// Destory the Set - pub fn deinit(self: *Self) void { - self.unmanaged.deinit(self.allocator); - self.* = undefined; - } - - /// Adds a single element to the set and an allocation may occur. - /// add may return an Allocator.Error or bool indicating if the element - /// was actually added if not already known. - pub fn add(self: *Self, element: E) Allocator.Error!bool { - return self.unmanaged.add(self.allocator, element); - } - - /// Adds a single element to the set. Asserts that there is enough capacity. - /// A bool is returned indicating if the element was actually added - /// if not already known. - pub fn addAssumeCapacity(self: *Self, element: E) bool { - return self.unmanaged.add(self.allocator, element) catch unreachable; - } - - /// Appends all elements from the provided set, and may allocate. - /// append returns an Allocator.Error or Size which represents how - /// many elements added and not previously in the Set. - pub fn append(self: *Self, other: Self) Allocator.Error!Size { - const prevCount = self.unmanaged.cardinality(); - // Directly access the underlying map instead of using unionUpdate - // We avoid double existence/capacity checks by accessing map directly - var iter = other.unmanaged.iterator(); - while (iter.next()) |entry| { - _ = try self.unmanaged.put(self.allocator, entry.key_ptr.*, {}); - } - return self.unmanaged.cardinality() - prevCount; - } - - /// Appends all elements from the provided slice, and may allocate. - /// appendSlice returns an Allocator.Error or Size which represents how - /// many elements added and not previously in the slice. - pub fn appendSlice(self: *Self, elements: []const E) Allocator.Error!Size { - const prevCount = self.unmanaged.cardinality(); - for (elements) |el| { - _ = try self.unmanaged.add(self.allocator, el); - } - return self.unmanaged.cardinality() - prevCount; - } - - /// Returns the number of total elements which may be present before - /// it is no longer guaranteed that no allocations will be performed. - pub fn capacity(self: Self) Size { - return self.unmanaged.capacity(); - } - - /// Cardinality effectively returns the size of the set - pub fn cardinality(self: Self) Size { - return self.unmanaged.cardinality(); - } - - /// Invalidates all element pointers. - pub fn clearAndFree(self: *Self) void { - self.unmanaged.clearAndFree(self.allocator); - } - - /// Invalidates all element pointers. - pub fn clearRetainingCapacity(self: *Self) void { - self.unmanaged.clearRetainingCapacity(); - } - - /// Creates a copy of this set, using the same allocator. - /// clone may return an Allocator.Error or the cloned Set. - pub fn clone(self: *Self) Allocator.Error!Self { - // Take a stack copy of self. - var cloneSelf = self.*; - // Clone the interal map. - cloneSelf.unmanaged = try self.unmanaged.clone(self.allocator); - return cloneSelf; - } - - /// Creates a copy of this set, using a specified allocator. - /// cloneWithAllocator may be return an Allocator.Error or the cloned Set. - pub fn cloneWithAllocator(self: *Self, allocator: Allocator) Allocator.Error!Self { - // Directly clone the unmanaged structure with the new allocator - const clonedUnmanaged = try self.unmanaged.clone(allocator); - return Self{ - .allocator = allocator, - .unmanaged = clonedUnmanaged, - }; - } - - /// Returns true when the provided element exists within the Set otherwise false. - pub fn contains(self: Self, element: E) bool { - return self.unmanaged.contains(element); - } - - /// Returns true when all elements in the other Set are present in this Set - /// otherwise false. - pub fn containsAll(self: Self, other: Self) bool { - return self.unmanaged.containsAll(other.unmanaged); - } - - /// Returns true when all elements in the provided slice are present otherwise false. - pub fn containsAllSlice(self: Self, elements: []const E) bool { - return self.unmanaged.containsAllSlice(elements); - } - - /// Returns true when at least one or more elements from the other Set exist within - /// this Set otherwise false. - pub fn containsAny(self: Self, other: Self) bool { - // Delegate to the unmanaged implementation which might have optimizations - return self.unmanaged.containsAny(other.unmanaged); - } - - /// Returns true when at least one or more elements from the slice exist within - /// this Set otherwise false. - pub fn containsAnySlice(self: Self, elements: []const E) bool { - for (elements) |el| { - if (self.unmanaged.contains(el)) { - return true; - } - } - return false; - } - - /// differenceOf returns the difference between this set - /// and other. The returned set will contain - /// all elements of this set that are not also - /// elements of the other. - /// - /// Caller owns the newly allocated/returned set. - pub fn differenceOf(self: Self, other: Self) Allocator.Error!Self { - // Delegate to unmanaged implementation to avoid double iteration - const diffUnmanaged = try self.unmanaged.differenceOf(self.allocator, other.unmanaged); - return Self{ - .allocator = self.allocator, - .unmanaged = diffUnmanaged, - }; - } - - /// differenceUpdate does an in-place mutation of this set - /// and other. This set will contain all elements of this set that are not - /// also elements of other. - pub fn differenceUpdate(self: *Self, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const diffSet = try self.differenceOf(other); - - // Destroy the internal map. - self.unmanaged.deinit(self.allocator); - - // Swap it out with the new set. - self.unmanaged = diffSet.unmanaged; - } - - fn dump(self: Self) void { - std.log.err("\ncardinality: {d}\n", .{self.cardinality()}); - var iter = self.iterator(); - while (iter.next()) |el| { - std.log.err(" element: {d}\n", .{el.*}); - } - } - - /// Increases capacity, guaranteeing that insertions up until the - /// `expected_count` will not cause an allocation, and therefore cannot fail. - pub fn ensureTotalCapacity(self: *Self, expected_count: Size) Allocator.Error!void { - return self.unmanaged.ensureTotalCapacity(expected_count); - } - - /// Increases capacity, guaranteeing that insertions up until - /// `additional_count` **more** items will not cause an allocation, and - /// therefore cannot fail. - pub fn ensureUnusedCapacity(self: *Self, additional_count: Size) Allocator.Error!void { - return self.unmanaged.ensureUnusedCapacity(additional_count); - } - - /// eql determines if two sets are equal to each - /// other. If they have the same cardinality - /// and contain the same elements, they are - /// considered equal. The order in which - /// the elements were added is irrelevant. - pub fn eql(self: Self, other: Self) bool { - // First discriminate on cardinalities of both sets. - if (self.unmanaged.cardinality() != other.unmanaged.cardinality()) { - return false; - } - - // Now check for each element one for one and exit early - // on the first non-match. - var iter = self.unmanaged.iterator(); - while (iter.next()) |pVal| { - if (!other.unmanaged.contains(pVal.key_ptr.*)) { - return false; - } - } - - return true; - } - - /// intersectionOf returns a new set containing only the elements - /// that exist only in both sets. - /// - /// Caller owns the newly allocated/returned set. - pub fn intersectionOf(self: Self, other: Self) Allocator.Error!Self { - const interUnmanaged = try self.unmanaged.intersectionOf(self.allocator, other.unmanaged); - return Self{ - .allocator = self.allocator, - .unmanaged = interUnmanaged, - }; - } - - /// intersectionUpdate does an in-place intersecting update - /// to the current set from the other set keeping only - /// elements found in this Set and the other Set. - pub fn intersectionUpdate(self: *Self, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const interSet = try self.intersectionOf(other); - - // Destroy the internal map. - self.unmanaged.deinit(self.allocator); - - // Swap it out with the new set. - self.unmanaged = interSet.unmanaged; - } - - /// isDisjoint returns true if the intersection between two sets is the null set. - /// Otherwise returns false. - pub fn isDisjoint(self: Self, other: Self) bool { - return self.unmanaged.isDisjoint(other.unmanaged); - } - - /// In place style: - /// differenceOfUpdate - /// symmetric_differenceOf_update - /// Returns true if the set is empty otherwise false - pub fn isEmpty(self: Self) bool { - return self.unmanaged.cardinality() == 0; - } - - /// Create an iterator over the elements in the set. - /// The iterator is invalidated if the set is modified during iteration. - pub fn iterator(self: Self) Iterator { - return self.unmanaged.iterator(); - } - - /// properSubsetOf determines if every element in this set is in - /// the other set but the two sets are not equal. - pub fn properSubsetOf(self: Self, other: Self) bool { - return self.unmanaged.cardinality() < other.unmanaged.cardinality() and self.subsetOf(other); - } - - /// properSupersetOf determines if every element in the other set - /// is in this set but the two sets are not equal. - pub fn properSupersetOf(self: Self, other: Self) bool { - return self.unmanaged.cardinality() > other.unmanaged.cardinality() and self.supersetOf(other); - } - - /// subsetOf determines if every element in this set is in - /// the other set. - pub fn subsetOf(self: Self, other: Self) bool { - // First discriminate on cardinalties of both sets. - if (self.unmanaged.cardinality() > other.unmanaged.cardinality()) { - return false; - } - - // Now check that self set has at least some elements from other. - var iter = self.unmanaged.iterator(); - while (iter.next()) |pVal| { - if (!other.unmanaged.contains(pVal.key_ptr.*)) { - return false; - } - } - - return true; - } - - /// subsetOf determines if every element in the other Set is in - /// the this Set. - pub fn supersetOf(self: Self, other: Self) bool { - // This is just the converse of subsetOf. - return other.subsetOf(self); - } - - /// pop removes and returns an arbitrary ?E from the set. - /// Order is not guaranteed. - /// This safely returns null if the Set is empty. - pub fn pop(self: *Self) ?E { - if (self.unmanaged.cardinality() > 0) { - var iter = self.unmanaged.iterator(); - // NOTE: No in-place mutation as it invalidates live iterators. - // So a temporary capture is taken. - var capturedElement: E = undefined; - while (iter.next()) |pVal| { - capturedElement = pVal.key_ptr.*; - break; - } - _ = self.unmanaged.remove(capturedElement); - return capturedElement; - } else { - return null; - } - } - - /// remove discards a single element from the Set - pub fn remove(self: *Self, element: E) bool { - return self.unmanaged.remove(element); - } - - /// removesAll discards all elements passed from the other Set from - /// this Set - pub fn removeAll(self: *Self, other: Self) void { - var iter = other.iterator(); - while (iter.next()) |el| { - _ = self.unmanaged.remove(el); - } - } - - /// removesAllSlice discards all elements passed as a slice from the Set - pub fn removeAllSlice(self: *Self, elements: []const E) void { - for (elements) |el| { - _ = self.unmanaged.remove(el); - } - } - - /// symmetricDifferenceOf returns a new set with all elements which are - /// in either this set or the other set but not in both. - /// - /// The caller owns the newly allocated/returned Set. - pub fn symmetricDifferenceOf(self: Self, other: Self) Allocator.Error!Self { - // Use optimized unmanaged implementation - const sdUnmanaged = try self.unmanaged.symmetricDifferenceOf(self.allocator, other.unmanaged); - return Self{ - .allocator = self.allocator, - .unmanaged = sdUnmanaged, - }; - } - - /// symmetricDifferenceUpdate does an in-place mutation with all elements - /// which are in either this set or the other set but not in both. - pub fn symmetricDifferenceUpdate(self: *Self, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const sd = try self.symmetricDifferenceOf(other); - - // Destroy the internal map. - self.unmanaged.deinit(self.allocator); - - // Swap it out with the new set. - self.unmanaged = sd.unmanaged; - } - - /// union returns a new set with all elements in both sets. - /// - /// The caller owns the newly allocated/returned Set. - pub fn unionOf(self: Self, other: Self) Allocator.Error!Self { - const unionUnmanaged = try self.unmanaged.unionOf(self.allocator, other.unmanaged); - return Self{ - .allocator = self.allocator, - .unmanaged = unionUnmanaged, - }; - } - - /// unionUpdate does an in-place union of the current Set and other Set. - /// - /// Allocations may occur. - pub fn unionUpdate(self: *Self, other: Self) Allocator.Error!void { - var iter = other.unmanaged.iterator(); - while (iter.next()) |pVal| { - _ = try self.add(pVal.key_ptr.*); - } - } - }; -} - -const testing = std.testing; -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; - -test "example usage" { - // import the namespace. - // const set = @import("set.zig"); - - // Create a set of u32s called A - var A = ArraySetManaged(u32).init(std.testing.allocator); - defer A.deinit(); - - // Add some data - _ = try A.add(5); - _ = try A.add(6); - _ = try A.add(7); - - // Add more data; single shot, duplicate data is ignored. - _ = try A.appendSlice(&.{ 5, 3, 0, 9 }); - - // Create another set called B - var B = ArraySetManaged(u32).init(std.testing.allocator); - defer B.deinit(); - - // Add data to B - _ = try B.appendSlice(&.{ 50, 30, 20 }); - - // Get the union of A | B - var un = try A.unionOf(B); - defer un.deinit(); - - // Grab an iterator and dump the contents. - var iter = un.iterator(); - while (iter.next()) |el| { - std.log.debug("element: {d}", .{el.key_ptr.*}); - } -} - -test "string usage" { - var A = ArraySetManaged([]const u8).init(std.testing.allocator); - defer A.deinit(); - - var B = ArraySetManaged([]const u8).init(std.testing.allocator); - defer B.deinit(); - - _ = try A.add("Hello"); - _ = try B.add("World"); - - var C = try A.unionOf(B); - defer C.deinit(); - try expectEqual(2, C.cardinality()); - try expect(C.containsAllSlice(&.{ "Hello", "World" })); -} - -test "comprehensive usage" { - var set = ArraySetManaged(u32).init(std.testing.allocator); - defer set.deinit(); - - try expect(set.isEmpty()); - - _ = try set.add(8); - _ = try set.add(6); - _ = try set.add(7); - try expectEqual(set.cardinality(), 3); - - _ = try set.appendSlice(&.{ 5, 3, 0, 9 }); - - // Positive cases. - try expect(set.contains(8)); - try expect(set.containsAllSlice(&.{ 5, 3, 9 })); - try expect(set.containsAnySlice(&.{ 5, 55, 12 })); - - // Negative cases. - try expect(!set.contains(99)); - try expect(!set.containsAllSlice(&.{ 8, 6, 77 })); - try expect(!set.containsAnySlice(&.{ 99, 55, 44 })); - - try expectEqual(set.cardinality(), 7); - - var other = ArraySetManaged(u32).init(std.testing.allocator); - defer other.deinit(); - - try expect(other.isEmpty()); - - _ = try other.add(8); - _ = try other.add(6); - _ = try other.add(7); - - _ = try other.appendSlice(&.{ 5, 3, 0, 9 }); - - try expect(set.eql(other)); - try expectEqual(other.cardinality(), 7); - - try expect(other.remove(8)); - try expectEqual(other.cardinality(), 6); - try expect(!other.remove(55)); - try expect(!set.eql(other)); - - other.removeAllSlice(&.{ 6, 7 }); - try expectEqual(other.cardinality(), 4); - - // intersectionOf - var inter = try set.intersectionOf(other); - defer inter.deinit(); - try expect(!inter.isEmpty()); - try expectEqual(inter.cardinality(), 4); - try expect(inter.containsAllSlice(&.{ 5, 3, 0, 9 })); - - // Union - var un = try set.unionOf(other); - defer un.deinit(); - try expect(!un.isEmpty()); - try expectEqual(un.cardinality(), 7); - try expect(un.containsAllSlice(&.{ 8, 6, 7, 5, 3, 0, 9 })); - - // differenceOf - var diff = try set.differenceOf(other); - defer diff.deinit(); - try expect(!diff.isEmpty()); - try expectEqual(diff.cardinality(), 3); - try expect(diff.containsAllSlice(&.{ 8, 7, 6 })); - - // symmetricDifferenceOf - _ = try set.add(11111); - _ = try set.add(9999); - _ = try other.add(7777); - var symmDiff = try set.symmetricDifferenceOf(other); - defer symmDiff.deinit(); - try expect(!symmDiff.isEmpty()); - try expectEqual(symmDiff.cardinality(), 6); - try expect(symmDiff.containsAllSlice(&.{ 7777, 11111, 8, 7, 6, 9999 })); - - // subsetOf - - // supersetOf -} - -test "isDisjoint" { - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - var b = ArraySetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - _ = try b.appendSlice(&.{ 202, 303, 403 }); - - // Test the true case. - try expect(a.isDisjoint(b)); - try expect(b.isDisjoint(a)); - - // Test the false case. - var c = ArraySetManaged(u32).init(std.testing.allocator); - defer c.deinit(); - _ = try c.appendSlice(&.{ 20, 30, 400 }); - - try expect(!a.isDisjoint(c)); - try expect(!c.isDisjoint(a)); -} - -test "clear/capacity" { - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - - try expectEqual(0, a.cardinality()); - try expectEqual(0, a.capacity()); - - const cap = 99; - var b = try ArraySetManaged(u32).initCapacity(std.testing.allocator, cap); - defer b.deinit(); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - for (0..cap) |val| { - _ = try b.add(@intCast(val)); - } - - try expectEqual(99, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearRetainingCapacity(); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearAndFree(); - - try expectEqual(0, b.cardinality()); - try expectEqual(b.capacity(), 0); -} - -test "clone" { - { - // clone - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - var b = try a.clone(); - defer b.deinit(); - - try expect(a.eql(b)); - } - - { - // cloneWithAllocator - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - // Use a different allocator than the test one. - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const tmpAlloc = gpa.allocator(); - defer { - const deinit_status = gpa.deinit(); - // Fail test; can't try in defer as defer is executed after we return - if (deinit_status == .leak) expect(false) catch @panic("TEST FAIL"); - } - - var b = try a.cloneWithAllocator(tmpAlloc); - defer b.deinit(); - - try expect(a.allocator.ptr != b.allocator.ptr); - try expect(a.eql(b)); - } -} - -test "pop" { - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - // No assumptions can be made about pop order. - while (a.pop()) |result| { - try expect(result == 20 or result == 30 or result == 40); - } - - // At this point, set must be empty. - try expectEqual(a.cardinality(), 0); - try expect(a.isEmpty()); - - // Lastly, pop should safely return null. - try expect(a.pop() == null); -} - -test "subset/superset" { - { - // subsetOf - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 1, 2, 3, 5, 7 }); - - var b = ArraySetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - - // b should be a subset of a. - try expect(b.subsetOf(a)); - - _ = try b.add(72); - - // b should not be a subset of a, because 72 is not in a. - try expect(!b.subsetOf(a)); - } - - { - // supersetOf - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 9, 5, 2, 1, 11 }); - - var b = ArraySetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - _ = try b.appendSlice(&.{ 5, 2, 11 }); - - // set a should be a superset of set b - try expect(!b.supersetOf(a)); - - _ = try b.add(42); - - // TODO: figure out why this fails. - //set a should not be a superset of set b because b has 42 - // try expect(a.supersetOf(&b)); - } -} - -test "iterator" { - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - var sum: u32 = 0; - var iterCount: usize = 0; - var iter = a.iterator(); - while (iter.next()) |el| { - sum += el.key_ptr.*; - iterCount += 1; - } - - try expectEqual(90, sum); - try expectEqual(3, iterCount); -} - -test "in-place methods" { - // intersectionUpdate - var a = ArraySetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 10, 20, 30, 40 }); - - var b = ArraySetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - _ = try b.appendSlice(&.{ 44, 20, 30, 66 }); - - try a.intersectionUpdate(b); - try expectEqual(a.cardinality(), 2); - try expect(a.containsAllSlice(&.{ 20, 30 })); - - // unionUpdate - var c = ArraySetManaged(u32).init(std.testing.allocator); - defer c.deinit(); - _ = try c.appendSlice(&.{ 10, 20, 30, 40 }); - - var d = ArraySetManaged(u32).init(std.testing.allocator); - defer d.deinit(); - _ = try d.appendSlice(&.{ 44, 20, 30, 66 }); - - try c.unionUpdate(d); - try expectEqual(c.cardinality(), 6); - try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); - - // differenceUpdate - var e = ArraySetManaged(u32).init(std.testing.allocator); - defer e.deinit(); - _ = try e.appendSlice(&.{ 1, 11, 111, 1111, 11111 }); - - var f = ArraySetManaged(u32).init(std.testing.allocator); - defer f.deinit(); - _ = try f.appendSlice(&.{ 1, 11, 111, 222, 2222, 1111 }); - - try e.differenceUpdate(f); - - try expectEqual(1, e.cardinality()); - try expect(e.contains(11111)); - - // symmetricDifferenceUpdate - var g = ArraySetManaged(u32).init(std.testing.allocator); - defer g.deinit(); - _ = try g.appendSlice(&.{ 2, 22, 222, 2222, 22222 }); - - var h = ArraySetManaged(u32).init(std.testing.allocator); - defer h.deinit(); - _ = try h.appendSlice(&.{ 1, 11, 111, 333, 3333, 2222, 1111 }); - - try g.symmetricDifferenceUpdate(h); - - try expectEqual(10, g.cardinality()); - try expect(g.containsAllSlice(&.{ 1, 2, 11, 111, 22, 222, 1111, 333, 3333, 22222 })); -} - -test "sizeOf" { - const unmanagedSize = @sizeOf(ArraySetUnmanaged(u32)); - const managedSize = @sizeOf(ArraySetManaged(u32)); - - // The managed must be only 16 bytes larger, the cost of the internal allocator - // otherwise we've added some CRAP! - const expectedDiff = 16; - try expectEqual(expectedDiff, managedSize - unmanagedSize); -} - -test "benchmark" { - const allocator = std.testing.allocator; - const Iterations = 10_000; - const SetSize = 1000; - - // Setup - var base = try ArraySetManaged(u32).initCapacity(allocator, SetSize); - defer base.deinit(); - for (0..SetSize) |i| _ = base.addAssumeCapacity(@intCast(i)); - - var other = try ArraySetManaged(u32).initCapacity(allocator, SetSize); - defer other.deinit(); - for (0..SetSize) |i| _ = other.addAssumeCapacity(@intCast(i + SetSize / 2)); - - // Benchmark unionOf - var union_timer = try std.time.Timer.start(); - for (0..Iterations) |_| { - var result = try base.unionOf(other); - defer result.deinit(); - } - const union_elapsed = union_timer.read(); - std.debug.print("\nunionOf: {d} ops/sec ({d:.2} ns/op)\n", .{ - Iterations * std.time.ns_per_s / union_elapsed, - @as(f64, @floatFromInt(union_elapsed)) / @as(f64, @floatFromInt(Iterations)), - }); - - // Benchmark intersectionOf - var inter_timer = try std.time.Timer.start(); - for (0..Iterations) |_| { - var result = try base.intersectionOf(other); - defer result.deinit(); - } - const inter_elapsed = inter_timer.read(); - std.debug.print("intersectionOf: {d} ops/sec ({d:.2} ns/op)\n", .{ - Iterations * std.time.ns_per_s / inter_elapsed, - @as(f64, @floatFromInt(inter_elapsed)) / @as(f64, @floatFromInt(Iterations)), - }); - - // Benchmark containsAll - var contains_timer = try std.time.Timer.start(); - for (0..Iterations) |_| { - _ = base.containsAll(other); - } - const contains_elapsed = contains_timer.read(); - std.debug.print("containsAll: {d} ops/sec ({d:.2} ns/op)\n", .{ - Iterations * std.time.ns_per_s / contains_elapsed, - @as(f64, @floatFromInt(contains_elapsed)) / @as(f64, @floatFromInt(Iterations)), - }); -} diff --git a/src/array_hash_set/unmanaged.zig b/src/array_hash_set/unmanaged.zig deleted file mode 100644 index d7d4705..0000000 --- a/src/array_hash_set/unmanaged.zig +++ /dev/null @@ -1,826 +0,0 @@ -/// Open Source Initiative OSI - The MIT License (MIT):Licensing -/// The MIT License (MIT) -/// Copyright (c) 2025 Ralph Caraveo (deckarep@gmail.com) -/// Permission is hereby granted, free of charge, to any person obtaining a copy of -/// this software and associated documentation files (the "Software"), to deal in -/// the Software without restriction, including without limitation the rights to -/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -/// of the Software, and to permit persons to whom the Software is furnished to do -/// so, subject to the following conditions: -/// The above copyright notice and this permission notice shall be included in all -/// copies or substantial portions of the Software. -/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -/// SOFTWARE. -/// -/// -const std = @import("std"); -const mem = std.mem; -const Allocator = mem.Allocator; - -/// comptime selection of the map type for string vs everything else. -fn selectMap(comptime E: type) type { - comptime { - if (E == []const u8) { - return std.StringArrayHashMapUnmanaged(void); - } else { - return std.AutoArrayHashMapUnmanaged(E, void); - } - } -} - -pub fn ArraySetUnmanaged(comptime E: type) type { - return struct { - /// The type of the internal hash map - pub const Map = selectMap(E); - - unmanaged: Map, - - pub const Size = usize; - - pub const Entry = struct { - key_ptr: *E, - }; - - /// The iterator type returned by iterator(), a Key iterator doesn't exist - /// on ArrayHashMaps for some reason. - pub const Iterator = struct { - keys: [*]E, - len: usize, - index: usize = 0, - - pub fn next(it: *Iterator) ?Entry { - if (it.index >= it.len) return null; - const result = Entry{ - .key_ptr = &it.keys[it.index], - }; - it.index += 1; - return result; - } - - /// Reset the iterator to the initial index - pub fn reset(it: *Iterator) void { - it.index = 0; - } - }; - - const Self = @This(); - - pub fn init() Self { - return .{ - .unmanaged = Map{}, - }; - } - - pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { - var self = Self.init(); - try self.unmanaged.ensureTotalCapacity(allocator, num); - return self; - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - self.unmanaged.deinit(allocator); - self.* = undefined; - } - - pub fn add(self: *Self, allocator: Allocator, element: E) Allocator.Error!bool { - const prevCount = self.unmanaged.count(); - try self.unmanaged.put(allocator, element, {}); - return prevCount != self.unmanaged.count(); - } - - /// Appends all elements from the provided slice, and may allocate. - /// appendSlice returns an Allocator.Error or Size which represents how - /// many elements added and not previously in the slice. - pub fn appendSlice(self: *Self, allocator: Allocator, elements: []const E) Allocator.Error!Size { - const prevCount = self.unmanaged.count(); - for (elements) |el| { - try self.unmanaged.put(allocator, el, {}); - } - return self.unmanaged.count() - prevCount; - } - - /// Returns the number of total elements which may be present before - /// it is no longer guaranteed that no allocations will be performed. - pub fn capacity(self: Self) Size { - return self.unmanaged.capacity(); - } - - /// Cardinality effectively returns the size of the set. - pub fn cardinality(self: Self) Size { - return self.unmanaged.count(); - } - - /// Invalidates all element pointers. - pub fn clearAndFree(self: *Self, allocator: Allocator) void { - self.unmanaged.clearAndFree(allocator); - } - - /// Invalidates all element pointers. - pub fn clearRetainingCapacity(self: *Self) void { - self.unmanaged.clearRetainingCapacity(); - } - - /// Creates a copy of this set, using the same allocator. - /// clone may return an Allocator.Error or the cloned Set. - pub fn clone(self: *Self, allocator: Allocator) Allocator.Error!Self { - // Take a stack copy of self. - var cloneSelf = self.*; - // Clone the interal map. - cloneSelf.unmanaged = try self.unmanaged.clone(allocator); - return cloneSelf; - } - - /// Returns true when the provided element exists within the Set otherwise false. - pub fn contains(self: Self, element: E) bool { - return self.unmanaged.contains(element); - } - - /// Returns true when all elements in the other Set are present in this Set - /// otherwise false. - pub fn containsAll(self: Self, other: Self) bool { - var iter = other.iterator(); - while (iter.next()) |el| { - if (!self.unmanaged.contains(el.key_ptr.*)) { - return false; - } - } - return true; - } - - /// Returns true when all elements in the provided slice are present otherwise false. - pub fn containsAllSlice(self: Self, elements: []const E) bool { - for (elements) |el| { - if (!self.unmanaged.contains(el)) { - return false; - } - } - return true; - } - - /// Returns true when at least one or more elements from the other Set exist within - /// this Set otherwise false. - pub fn containsAny(self: Self, other: Self) bool { - var iter = other.iterator(); - while (iter.next()) |el| { - if (self.unmanaged.contains(el.*)) { - return true; - } - } - return false; - } - - pub fn ensureTotalCapacity(self: *Self, allocator: Allocator, num: Size) Allocator.Error!void { - return self.unmanaged.ensureTotalCapacity(allocator, num); - } - - /// differenceOf returns the difference between this set - /// and other. The returned set will contain - /// all elements of this set that are not also - /// elements of the other. - /// - /// Caller owns the newly allocated/returned set. - pub fn differenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var diffSet = Self.init(); - - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - _ = try diffSet.add(allocator, entry.key_ptr.*); - } - } - return diffSet; - } - - /// differenceUpdate does an in-place mutation of this set - /// and other. This set will contain all elements of this set that are not - /// also elements of other. - pub fn differenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const diffSet = try self.differenceOf(allocator, other); - - // Destroy the internal map. - self.unmanaged.deinit(allocator); - - // Swap it out with the new set. - self.unmanaged = diffSet.unmanaged; - } - - /// Returns true when at least one or more elements from the slice exist within - /// this Set otherwise false. - pub fn containsAnySlice(self: Self, elements: []const E) bool { - for (elements) |el| { - if (self.unmanaged.contains(el)) { - return true; - } - } - return false; - } - - /// eql determines if two sets are equal to each - /// other. If they have the same cardinality - /// and contain the same elements, they are - /// considered equal. The order in which - /// the elements were added is irrelevant. - pub fn eql(self: Self, other: Self) bool { - // First discriminate on cardinalities of both sets. - if (self.unmanaged.count() != other.unmanaged.count()) { - return false; - } - - // Now check for each element one for one and exit early - // on the first non-match. - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - return false; - } - } - - return true; - } - - /// intersectionOf returns a new set containing only the elements - /// that exist only in both sets. - /// - /// Caller owns the newly allocated/returned set. - pub fn intersectionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var interSet = Self.init(); - - // Optimization: iterate over whichever set is smaller. - // Matters when disparity in cardinality is large. - var s = other; - var o = self; - if (self.unmanaged.count() < other.unmanaged.count()) { - s = self; - o = other; - } - - var iter = s.unmanaged.iterator(); - while (iter.next()) |entry| { - if (o.unmanaged.contains(entry.key_ptr.*)) { - _ = try interSet.add(allocator, entry.key_ptr.*); - } - } - - return interSet; - } - - /// intersectionUpdate does an in-place intersecting update - /// to the current set from the other set keeping only - /// elements found in this Set and the other Set. - pub fn intersectionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const interSet = try self.intersectionOf(allocator, other); - - // Destroy the internal map. - self.unmanaged.deinit(allocator); - - // Swap it out with the new set. - self.unmanaged = interSet.unmanaged; - } - - /// isDisjoint returns true if the intersection between two sets is the null set. - /// Otherwise returns false. - pub fn isDisjoint(self: Self, other: Self) bool { - // Optimization: Find the smaller of the two, and iterate over the smaller set - const smaller = if (self.cardinality() <= other.cardinality()) self else other; - const larger = if (self.cardinality() <= other.cardinality()) other else self; - - var iter = smaller.iterator(); - while (iter.next()) |el| { - if (larger.contains(el.key_ptr.*)) { - return false; - } - } - return true; - } - - /// Returns true if this Set is empty otherwise false. - pub fn isEmpty(self: Self) bool { - return self.unmanaged.count() == 0; - } - - /// Create an iterator over the elements in the set. - /// The iterator is invalidated if the set is modified during iteration. - pub fn iterator(self: Self) Iterator { - const slice = self.unmanaged.entries.slice(); - return .{ - .keys = slice.items(.key).ptr, - .len = @as(u32, @intCast(slice.len)), - }; - } - - /// properSubsetOf determines if every element in this set is in - /// the other set but the two sets are not equal. - pub fn properSubsetOf(self: Self, other: Self) bool { - return self.unmanaged.count() < other.unmanaged.count() and self.subsetOf(other); - } - - /// properSupersetOf determines if every element in the other set - /// is in this set but the two sets are not equal. - pub fn properSupersetOf(self: Self, other: Self) bool { - return self.unmanaged.count() > other.unmanaged.count() and self.supersetOf(other); - } - - /// subsetOf determines if every element in this set is in - /// the other set. - pub fn subsetOf(self: Self, other: Self) bool { - // First discriminate on cardinalties of both sets. - if (self.unmanaged.count() > other.unmanaged.count()) { - return false; - } - - // Now check that self set has at least some elements from other. - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - return false; - } - } - - return true; - } - - /// subsetOf determines if every element in the other Set is in - /// the this Set. - pub fn supersetOf(self: Self, other: Self) bool { - // This is just the converse of subsetOf. - return other.subsetOf(self); - } - - /// pop removes and returns an arbitrary ?E from the set. - /// Order is not guaranteed. - /// This safely returns null if the Set is empty. - pub fn pop(self: *Self) ?E { - if (self.unmanaged.count() > 0) { - var iter = self.unmanaged.iterator(); - // NOTE: No in-place mutation as it invalidates live iterators. - // So a temporary capture is taken. - var capturedElement: E = undefined; - while (iter.next()) |entry| { - capturedElement = entry.key_ptr.*; - break; - } - _ = self.unmanaged.swapRemove(capturedElement); - return capturedElement; - } else { - return null; - } - } - - /// remove discards a single element from the Set - pub fn remove(self: *Self, element: E) bool { - return self.unmanaged.swapRemove(element); - } - - /// removesAll discards all elements passed from the other Set from - /// this Set - pub fn removeAll(self: *Self, other: Self) void { - var iter = other.iterator(); - while (iter.next()) |el| { - _ = self.unmanaged.swapRemove(el.key_ptr.*); - } - } - - /// removesAllSlice discards all elements passed as a slice from the Set - pub fn removeAllSlice(self: *Self, elements: []const E) void { - for (elements) |el| { - _ = self.unmanaged.swapRemove(el); - } - } - - /// symmetricDifferenceOf returns a new set with all elements which are - /// in either this set or the other set but not in both. - /// - /// The caller owns the newly allocated/returned Set. - pub fn symmetricDifferenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var sdSet = Self.init(); - - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - _ = try sdSet.add(allocator, entry.key_ptr.*); - } - } - - iter = other.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!self.unmanaged.contains(entry.key_ptr.*)) { - _ = try sdSet.add(allocator, entry.key_ptr.*); - } - } - - return sdSet; - } - - /// symmetricDifferenceUpdate does an in-place mutation with all elements - /// which are in either this set or the other set but not in both. - pub fn symmetricDifferenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const sd = try self.symmetricDifferenceOf(allocator, other); - - // Destroy the internal map. - self.unmanaged.deinit(allocator); - - // Swap it out with the new set. - self.unmanaged = sd.unmanaged; - } - - /// union returns a new set with all elements in both sets. - /// - /// The caller owns the newly allocated/returned Set. - pub fn unionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - // Sniff out larger set for capacity hint. - var n = self.unmanaged.count(); - if (other.unmanaged.count() > n) n = other.unmanaged.count(); - - var uSet = try Self.initCapacity( - allocator, - @intCast(n), - ); - - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - _ = try uSet.add(allocator, entry.key_ptr.*); - } - - iter = other.unmanaged.iterator(); - while (iter.next()) |entry| { - _ = try uSet.add(allocator, entry.key_ptr.*); - } - - return uSet; - } - - /// unionUpdate does an in-place union of the current Set and other Set. - /// - /// Allocations may occur. - pub fn unionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - var iter = other.unmanaged.iterator(); - while (iter.next()) |entry| { - _ = try self.add(allocator, entry.key_ptr.*); - } - } - }; -} - -const testing = std.testing; -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; - -test "example usage" { - // Create a set of u32s called A - var A = ArraySetUnmanaged(u32).init(); - defer A.deinit(testing.allocator); - - // Add some data - _ = try A.add(testing.allocator, 5); - _ = try A.add(testing.allocator, 6); - _ = try A.add(testing.allocator, 7); - - // Add more data; single shot, duplicate data is ignored. - _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); - - // Create another set called B - var B = ArraySetUnmanaged(u32).init(); - defer B.deinit(testing.allocator); - - // Add data to B - _ = try B.appendSlice(testing.allocator, &.{ 50, 30, 20 }); - - // Get the union of A | B - var un = try A.unionOf(testing.allocator, B); - defer un.deinit(testing.allocator); - - const expectedCount = 9; - try expectEqual(expectedCount, un.cardinality()); - - // Grab an iterator and dump the contents. - var cnt: usize = 0; - var iter = un.iterator(); - while (iter.next()) |el| { - std.log.debug("element: {d}", .{el.key_ptr.*}); - cnt += 1; - } - - try expectEqual(expectedCount, cnt); -} - -test "string usage" { - var A = ArraySetUnmanaged([]const u8).init(); - defer A.deinit(testing.allocator); - - var B = ArraySetUnmanaged([]const u8).init(); - defer B.deinit(testing.allocator); - - _ = try A.add(testing.allocator, "Hello"); - _ = try B.add(testing.allocator, "World"); - - var C = try A.unionOf(testing.allocator, B); - defer C.deinit(testing.allocator); - try expectEqual(2, C.cardinality()); - try expect(C.containsAllSlice(&.{ "Hello", "World" })); -} - -test "comprehensive usage" { - var set = ArraySetUnmanaged(u32).init(); - defer set.deinit(testing.allocator); - - try expect(set.isEmpty()); - - _ = try set.add(testing.allocator, 8); - _ = try set.add(testing.allocator, 6); - _ = try set.add(testing.allocator, 7); - try expectEqual(set.cardinality(), 3); - - _ = try set.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); - - // Positive cases. - try expect(set.contains(8)); - try expect(set.containsAllSlice(&.{ 5, 3, 9 })); - try expect(set.containsAnySlice(&.{ 5, 55, 12 })); - - // Negative cases. - try expect(!set.contains(99)); - try expect(!set.containsAllSlice(&.{ 8, 6, 77 })); - try expect(!set.containsAnySlice(&.{ 99, 55, 44 })); - - try expectEqual(set.cardinality(), 7); - - var other = ArraySetUnmanaged(u32).init(); - defer other.deinit(testing.allocator); - - try expect(other.isEmpty()); - - _ = try other.add(testing.allocator, 8); - _ = try other.add(testing.allocator, 6); - _ = try other.add(testing.allocator, 7); - - _ = try other.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); - - try expect(set.eql(other)); - try expectEqual(other.cardinality(), 7); - - try expect(other.remove(8)); - try expectEqual(other.cardinality(), 6); - try expect(!other.remove(55)); - try expect(!set.eql(other)); - - other.removeAllSlice(&.{ 6, 7 }); - try expectEqual(other.cardinality(), 4); - - // intersectionOf - var inter = try set.intersectionOf(testing.allocator, other); - defer inter.deinit(testing.allocator); - try expect(!inter.isEmpty()); - try expectEqual(inter.cardinality(), 4); - try expect(inter.containsAllSlice(&.{ 5, 3, 0, 9 })); - - // Union - var un = try set.unionOf(testing.allocator, other); - defer un.deinit(testing.allocator); - try expect(!un.isEmpty()); - try expectEqual(un.cardinality(), 7); - try expect(un.containsAllSlice(&.{ 8, 6, 7, 5, 3, 0, 9 })); - - // differenceOf - var diff = try set.differenceOf(testing.allocator, other); - defer diff.deinit(testing.allocator); - try expect(!diff.isEmpty()); - try expectEqual(diff.cardinality(), 3); - try expect(diff.containsAllSlice(&.{ 8, 7, 6 })); - - // symmetricDifferenceOf - _ = try set.add(testing.allocator, 11111); - _ = try set.add(testing.allocator, 9999); - _ = try other.add(testing.allocator, 7777); - var symmDiff = try set.symmetricDifferenceOf(testing.allocator, other); - defer symmDiff.deinit(testing.allocator); - try expect(!symmDiff.isEmpty()); - try expectEqual(symmDiff.cardinality(), 6); - try expect(symmDiff.containsAllSlice(&.{ 7777, 11111, 8, 7, 6, 9999 })); - - // subsetOf - - // supersetOf -} - -test "isDisjoint" { - var a = ArraySetUnmanaged(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - var b = ArraySetUnmanaged(u32).init(); - defer b.deinit(testing.allocator); - _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); - - // Test the true case. - try expect(a.isDisjoint(b)); - try expect(b.isDisjoint(a)); - - // Test the false case. - var c = ArraySetUnmanaged(u32).init(); - defer c.deinit(testing.allocator); - _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); - - try expect(!a.isDisjoint(c)); - try expect(!c.isDisjoint(a)); -} - -test "clone" { - - // clone - var a = ArraySetUnmanaged(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - var b = try a.clone(testing.allocator); - defer b.deinit(testing.allocator); - - try expect(a.eql(b)); -} - -test "clear/capacity" { - var a = ArraySetUnmanaged(u32).init(); - defer a.deinit(testing.allocator); - - try expectEqual(0, a.cardinality()); - try expectEqual(0, a.capacity()); - - const cap = 99; - var b = try ArraySetUnmanaged(u32).initCapacity(testing.allocator, cap); - defer b.deinit(testing.allocator); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - for (0..cap) |val| { - _ = try b.add(testing.allocator, @intCast(val)); - } - - try expectEqual(99, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearRetainingCapacity(); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearAndFree(testing.allocator); - - try expectEqual(0, b.cardinality()); - try expectEqual(b.capacity(), 0); -} - -test "iterator" { - var a = ArraySetUnmanaged(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - var sum: u32 = 0; - var iterCount: usize = 0; - var iter = a.iterator(); - while (iter.next()) |el| { - sum += el.key_ptr.*; - iterCount += 1; - } - - try expectEqual(90, sum); - try expectEqual(3, iterCount); -} - -test "pop" { - var a = ArraySetUnmanaged(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - // No assumptions can be made about pop order. - while (a.pop()) |result| { - try expect(result == 20 or result == 30 or result == 40); - } - - // At this point, set must be empty. - try expectEqual(a.cardinality(), 0); - try expect(a.isEmpty()); - - // Lastly, pop should safely return null. - try expect(a.pop() == null); -} - -test "in-place methods" { - // intersectionUpdate - var a = ArraySetUnmanaged(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - - var b = ArraySetUnmanaged(u32).init(); - defer b.deinit(testing.allocator); - _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); - - try a.intersectionUpdate(testing.allocator, b); - try expectEqual(a.cardinality(), 2); - try expect(a.containsAllSlice(&.{ 20, 30 })); - - // unionUpdate - var c = ArraySetUnmanaged(u32).init(); - defer c.deinit(testing.allocator); - _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - - var d = ArraySetUnmanaged(u32).init(); - defer d.deinit(testing.allocator); - _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); - - try c.unionUpdate(testing.allocator, d); - try expectEqual(c.cardinality(), 6); - try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); - - // differenceUpdate - var e = ArraySetUnmanaged(u32).init(); - defer e.deinit(testing.allocator); - _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); - - var f = ArraySetUnmanaged(u32).init(); - defer f.deinit(testing.allocator); - _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); - - try e.differenceUpdate(testing.allocator, f); - - try expectEqual(1, e.cardinality()); - try expect(e.contains(11111)); - - // symmetricDifferenceUpdate - var g = ArraySetUnmanaged(u32).init(); - defer g.deinit(testing.allocator); - _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); - - var h = ArraySetUnmanaged(u32).init(); - defer h.deinit(testing.allocator); - _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); - - try g.symmetricDifferenceUpdate(testing.allocator, h); - - try expectEqual(10, g.cardinality()); - try expect(g.containsAllSlice(&.{ 1, 2, 11, 111, 22, 222, 1111, 333, 3333, 22222 })); -} - -test "removals" { - var a = ArraySetUnmanaged(u32).init(); - defer a.deinit(testing.allocator); - - _ = try a.appendSlice(testing.allocator, &.{ 5, 6, 7, 8 }); - _ = try a.appendSlice(testing.allocator, &.{ 50, 60, 70, 80 }); - _ = try a.appendSlice(testing.allocator, &.{ 111, 222, 333, 444 }); - - try expectEqual(12, a.cardinality()); - - try expect(a.remove(5)); - try expect(a.remove(6)); - try expect(a.remove(7)); - try expect(a.remove(8)); - - try expectEqual(8, a.cardinality()); - - a.removeAllSlice(&.{ 50, 60, 70, 80 }); - try expectEqual(4, a.cardinality()); - - var b = ArraySetUnmanaged(u32).init(); - defer b.deinit(testing.allocator); - - _ = try b.appendSlice(testing.allocator, &.{ 111, 222, 333, 444 }); - a.removeAll(b); - - try expectEqual(0, a.cardinality()); -} - -test "sizeOf matches" { - // No bloat guarantee, after all we're just building on top of what's good. - const expectedByteSize = 40; - try expectEqual(expectedByteSize, @sizeOf(std.array_hash_map.AutoArrayHashMapUnmanaged(u32, void))); - try expectEqual(expectedByteSize, @sizeOf(ArraySetUnmanaged(u32))); -} diff --git a/src/hash_set/unmanaged.zig b/src/dynamic_bit_set.zig similarity index 77% rename from src/hash_set/unmanaged.zig rename to src/dynamic_bit_set.zig index b9f9d77..dbe46ed 100644 --- a/src/hash_set/unmanaged.zig +++ b/src/dynamic_bit_set.zig @@ -1,74 +1,17 @@ -/// Open Source Initiative OSI - The MIT License (MIT):Licensing -/// The MIT License (MIT) -/// Copyright (c) 2025 Ralph Caraveo (deckarep@gmail.com) -/// Permission is hereby granted, free of charge, to any person obtaining a copy of -/// this software and associated documentation files (the "Software"), to deal in -/// the Software without restriction, including without limitation the rights to -/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -/// of the Software, and to permit persons to whom the Software is furnished to do -/// so, subject to the following conditions: -/// The above copyright notice and this permission notice shall be included in all -/// copies or substantial portions of the Software. -/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -/// SOFTWARE. -/// -/// const std = @import("std"); const mem = std.mem; const math = std.math; const Allocator = mem.Allocator; -/// comptime selection of the map type for string vs everything else. -fn selectMap(comptime E: type) type { - comptime { - if (E == []const u8) { - return std.StringHashMapUnmanaged(void); - } else { - return std.AutoHashMapUnmanaged(E, void); - } - } -} +// Note: this should not work for strings. This is a very niche implementation for numbers! -/// Select a context-aware hash map type -fn selectMapWithContext(comptime E: type, comptime Context: type, comptime max_load_percentage: u8) type { - return std.HashMapUnmanaged(E, void, Context, max_load_percentage); -} -/// HashSetUnmanaged is an implementation of a Set where there is no internal -/// allocator and all allocating methods require a first argument allocator. -/// This is a more compact Set built on top of the the HashMapUnmanaged -/// datastructure. -/// Note that max_load_percentage defaults to undefined, because the underlying -/// std.AutoHashMap/std.StringHashMap defaults are used. -pub fn HashSetUnmanaged(comptime E: type) type { - return HashSetUnmanagedWithContext(E, void, undefined); -} - -/// HashSetUnmanagedWithContext creates a set based on element type E with custom hashing behavior. -/// This variant allows specifying: -/// - A Context type that implements hash() and eql() functions for custom element hashing -/// - A max_load_percentage (1-100) that controls hash table resizing -/// If Context is undefined, then max_load_percentage is ignored. -/// -/// The Context type must provide: -/// fn hash(self: Context, key: K) u64 -/// fn eql(self: Context, a: K, b: K) bool -pub fn HashSetUnmanagedWithContext(comptime E: type, comptime Context: type, comptime max_load_percentage: u8) type { +/// HashSetWithContext creates a set based on element type E with custom hashing behavior. +/// Helpful comments goes here +pub fn BitSetUnmanaged(comptime E: type) type { return struct { - /// The type of the internal hash map - pub const Map = if (Context == void) selectMap(E) else selectMapWithContext(E, Context, max_load_percentage); - - unmanaged: Map, - context: if (Context == void) void else Context = if (Context == void) {} else undefined, - max_load_percentage: if (Context == void) void else u8 = if (Context == void) {} else max_load_percentage, - - pub const Size = Map.Size; - /// The iterator type returned by iterator(), key-only for sets + unmanaged: std.bit_set.DynamicallyBitSetUnmanaged, + pub const Iterator = Map.KeyIterator; const Self = @This(); @@ -76,47 +19,32 @@ pub fn HashSetUnmanagedWithContext(comptime E: type, comptime Context: type, com /// Initialize a default set without context pub fn init() Self { return .{ - .unmanaged = Map{}, - .context = if (Context == void) {} else undefined, - .max_load_percentage = if (Context == void) {} else max_load_percentage, + .unmanaged = std.bit_set.DynamicallyBitSetUnmanaged{}, }; } - /// Initialize with a custom context - pub fn initContext(context: Context) Self { - return .{ - .unmanaged = Map{}, - .context = context, - .max_load_percentage = max_load_percentage, - }; - } - - /// Initialzies a Set using a capacity hint, with the given Allocator - pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { - var self = Self.init(); - try self.unmanaged.ensureTotalCapacity(allocator, num); - return self; - } - /// Destroys the unmanaged Set. pub fn deinit(self: *Self, allocator: Allocator) void { - self.unmanaged.deinit(allocator); - self.* = undefined; + // TODO do when done with add } - + + /// Capacity: the maximum number i can insert without allocating pub fn add(self: *Self, allocator: Allocator, element: E) Allocator.Error!bool { - const prevCount = self.unmanaged.count(); - try self.unmanaged.put(allocator, element, {}); - return prevCount != self.unmanaged.count(); + if (self.unmanaged.capacity >= e) { + // Allocate memory to hold the int + } + // la lògica és correcta ya segut + const mask_bits: comptime_int = @bitSizeOf(self.unmanaged.MaskInt); + const mask_index = @divFloor(element, mask_bits); + const bit = @mod(element, mask_bits); + self.unmanaged.masks[mask] |= @as(self.unmanaged.ShiftInt, 1) << bit; } /// Adds a single element to the set. Asserts that there is enough capacity. /// A bool is returned indicating if the element was actually added /// if not already known. pub fn addAssumeCapacity(self: *Self, element: E) bool { - const prevCount = self.unmanaged.count(); - self.unmanaged.putAssumeCapacity(element, {}); - return prevCount != self.unmanaged.count(); + //TODO } /// Appends all elements from the provided set, and may allocate. @@ -242,22 +170,12 @@ pub fn HashSetUnmanagedWithContext(comptime E: type, comptime Context: type, com /// differenceUpdate does an in-place mutation of this set /// and other. This set will contain all elements of this set that are not /// also elements of other. - pub fn differenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const diffSet = try self.differenceOf(allocator, other); - - // Destroy the internal map. - self.unmanaged.deinit(allocator); - - // Swap it out with the new set. - self.unmanaged = diffSet.unmanaged; + pub fn differenceUpdate(self: *Self, other: Self) Allocator.Error!void { + var iter = other.iterator(); + + while (iter.next()) |key_ptr| { + _ = self.remove(key_ptr.*); + } } fn dump(self: Self) void { @@ -334,21 +252,19 @@ pub fn HashSetUnmanagedWithContext(comptime E: type, comptime Context: type, com /// to the current set from the other set keeping only /// elements found in this Set and the other Set. pub fn intersectionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. + var to_remove: std.ArrayList(E) = .empty; + defer to_remove.deinit(allocator); - // Just get a new set with the normal method. - const interSet = try self.intersectionOf(allocator, other); - - // Destroy the internal map. - self.unmanaged.deinit(allocator); + var iter = self.iterator(); + while (iter.next()) |key_ptr| { + if (!other.contains(key_ptr.*)) { + try to_remove.append(allocator, key_ptr.*); + } + } - // Swap it out with the new set. - self.unmanaged = interSet.unmanaged; + for (to_remove.items) |item| { + _ = self.remove(item); + } } /// isDisjoint returns true if the intersection between two sets is the null set. @@ -483,21 +399,16 @@ pub fn HashSetUnmanagedWithContext(comptime E: type, comptime Context: type, com /// symmetricDifferenceUpdate does an in-place mutation with all elements /// which are in either this set or the other set but not in both. pub fn symmetricDifferenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const sd = try self.symmetricDifferenceOf(allocator, other); - - // Destroy the internal map. - self.unmanaged.deinit(allocator); + var iter = other.iterator(); + while (iter.next()) |key_ptr| { + const element = key_ptr.*; - // Swap it out with the new set. - self.unmanaged = sd.unmanaged; + if (self.contains(element)) { + _ = self.remove(element); + } else { + _ = try self.add(allocator, element); + } + } } /// union returns a new set with all elements in both sets. @@ -544,7 +455,7 @@ const expectEqual = std.testing.expectEqual; test "example usage" { // Create a set of u32s called A - var A = HashSetUnmanaged(u32).init(); + var A = HashSet(u32).init(); defer A.deinit(testing.allocator); // Add some data @@ -556,7 +467,7 @@ test "example usage" { _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); // Create another set called B - var B = HashSetUnmanaged(u32).init(); + var B = HashSet(u32).init(); defer B.deinit(testing.allocator); // Add data to B @@ -576,10 +487,10 @@ test "example usage" { } test "string usage" { - var A = HashSetUnmanaged([]const u8).init(); + var A = HashSet([]const u8).init(); defer A.deinit(testing.allocator); - var B = HashSetUnmanaged([]const u8).init(); + var B = HashSet([]const u8).init(); defer B.deinit(testing.allocator); _ = try A.add(testing.allocator, "Hello"); @@ -592,7 +503,7 @@ test "string usage" { } test "comprehensive usage" { - var set = HashSetUnmanaged(u32).init(); + var set = HashSet(u32).init(); defer set.deinit(testing.allocator); try expect(set.isEmpty()); @@ -616,7 +527,7 @@ test "comprehensive usage" { try expectEqual(set.cardinality(), 7); - var other = HashSetUnmanaged(u32).init(); + var other = HashSet(u32).init(); defer other.deinit(testing.allocator); try expect(other.isEmpty()); @@ -675,11 +586,11 @@ test "comprehensive usage" { } test "isDisjoint" { - var a = HashSetUnmanaged(u32).init(); + var a = HashSet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - var b = HashSetUnmanaged(u32).init(); + var b = HashSet(u32).init(); defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); @@ -688,7 +599,7 @@ test "isDisjoint" { try expect(b.isDisjoint(a)); // Test the false case. - var c = HashSetUnmanaged(u32).init(); + var c = HashSet(u32).init(); defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); @@ -699,7 +610,7 @@ test "isDisjoint" { test "clone" { // clone - var a = HashSetUnmanaged(u32).init(); + var a = HashSet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -710,14 +621,14 @@ test "clone" { } test "clear/capacity" { - var a = HashSetUnmanaged(u32).init(); + var a = HashSet(u32).init(); defer a.deinit(testing.allocator); try expectEqual(0, a.cardinality()); try expectEqual(0, a.capacity()); const cap = 99; - var b = try HashSetUnmanaged(u32).initCapacity(testing.allocator, cap); + var b = try HashSet(u32).initCapacity(testing.allocator, cap); defer b.deinit(testing.allocator); try expectEqual(0, b.cardinality()); @@ -742,7 +653,7 @@ test "clear/capacity" { } test "iterator" { - var a = HashSetUnmanaged(u32).init(); + var a = HashSet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -759,7 +670,7 @@ test "iterator" { } test "pop" { - var a = HashSetUnmanaged(u32).init(); + var a = HashSet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -778,11 +689,11 @@ test "pop" { test "in-place methods" { // intersectionUpdate - var a = HashSetUnmanaged(u32).init(); + var a = HashSet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var b = HashSetUnmanaged(u32).init(); + var b = HashSet(u32).init(); defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -791,11 +702,11 @@ test "in-place methods" { try expect(a.containsAllSlice(&.{ 20, 30 })); // unionUpdate - var c = HashSetUnmanaged(u32).init(); + var c = HashSet(u32).init(); defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var d = HashSetUnmanaged(u32).init(); + var d = HashSet(u32).init(); defer d.deinit(testing.allocator); _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -804,25 +715,25 @@ test "in-place methods" { try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); // differenceUpdate - var e = HashSetUnmanaged(u32).init(); + var e = HashSet(u32).init(); defer e.deinit(testing.allocator); _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); - var f = HashSetUnmanaged(u32).init(); + var f = HashSet(u32).init(); defer f.deinit(testing.allocator); _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); - try e.differenceUpdate(testing.allocator, f); + try e.differenceUpdate(f); try expectEqual(1, e.cardinality()); try expect(e.contains(11111)); // symmetricDifferenceUpdate - var g = HashSetUnmanaged(u32).init(); + var g = HashSet(u32).init(); defer g.deinit(testing.allocator); _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); - var h = HashSetUnmanaged(u32).init(); + var h = HashSet(u32).init(); defer h.deinit(testing.allocator); _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); @@ -837,7 +748,7 @@ test "sizeOf matches" { // "What's good Miley!?!?"" const expectedByteSize = 24; const autoHashMapSize = @sizeOf(std.hash_map.AutoHashMapUnmanaged(u32, void)); - const hashSetSize = @sizeOf(HashSetUnmanaged(u32)); + const hashSetSize = @sizeOf(HashSet(u32)); try expectEqual(expectedByteSize, autoHashMapSize); try expectEqual(expectedByteSize, hashSetSize); @@ -845,8 +756,8 @@ test "sizeOf matches" { // The unmanaged with context must be larger by the size of the empty Context struct, // due to the added Context and alignment padding. const expectedContextDiff = 8; - const hashSetWithVoidContextSize = @sizeOf(HashSetUnmanagedWithContext(u32, void, undefined)); - const hashSetWithContextSize = @sizeOf(HashSetUnmanagedWithContext(u32, TestContext, 75)); + const hashSetWithVoidContextSize = @sizeOf(HashSetWithContext(u32, void, undefined)); + const hashSetWithContextSize = @sizeOf(HashSetWithContext(u32, TestContext, 75)); try expectEqual(0, hashSetWithVoidContextSize - hashSetSize); try expectEqual(expectedContextDiff, hashSetWithContextSize - hashSetSize); } @@ -863,7 +774,7 @@ const TestContext = struct { test "custom hash function comprehensive" { const context = TestContext{}; - var set = HashSetUnmanagedWithContext(u32, TestContext, 75).initContext(context); + var set = HashSetWithContext(u32, TestContext, 75).initContext(context); defer set.deinit(testing.allocator); // Test basic operations @@ -881,7 +792,7 @@ test "custom hash function comprehensive" { try expect(set.eql(cloned)); // Test set operations with custom context - var other = HashSetUnmanagedWithContext(u32, TestContext, 75).initContext(context); + var other = HashSetWithContext(u32, TestContext, 75).initContext(context); defer other.deinit(testing.allocator); _ = try other.add(testing.allocator, 456); _ = try other.add(testing.allocator, 789); @@ -920,11 +831,11 @@ test "custom hash function with different load factors" { const context = TestContext{}; // Test with low load factor - var low_load = HashSetUnmanagedWithContext(u32, TestContext, 25).initContext(context); + var low_load = HashSetWithContext(u32, TestContext, 25).initContext(context); defer low_load.deinit(testing.allocator); // Test with high load factor - var high_load = HashSetUnmanagedWithContext(u32, TestContext, 90).initContext(context); + var high_load = HashSetWithContext(u32, TestContext, 90).initContext(context); defer high_load.deinit(testing.allocator); // Add same elements to both @@ -946,7 +857,7 @@ test "custom hash function with different load factors" { test "custom hash function error cases" { const context = TestContext{}; - var set = HashSetUnmanagedWithContext(u32, TestContext, 75).initContext(context); + var set = HashSetWithContext(u32, TestContext, 75).initContext(context); defer set.deinit(testing.allocator); // Test allocation failures @@ -974,10 +885,10 @@ const StringContext = struct { test "custom hash function string usage" { const context = StringContext{}; - var A = HashSetUnmanagedWithContext([]const u8, StringContext, 75).initContext(context); + var A = HashSetWithContext([]const u8, StringContext, 75).initContext(context); defer A.deinit(testing.allocator); - var B = HashSetUnmanagedWithContext([]const u8, StringContext, 75).initContext(context); + var B = HashSetWithContext([]const u8, StringContext, 75).initContext(context); defer B.deinit(testing.allocator); _ = try A.add(testing.allocator, "Hello"); diff --git a/src/hash_set.zig b/src/hash_set.zig index a5bdf82..9466067 100644 --- a/src/hash_set.zig +++ b/src/hash_set.zig @@ -329,17 +329,15 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max var iter = self.iterator(); while (iter.next()) |key_ptr| { - // Dereference key_ptr when checking! if (!other.contains(key_ptr.*)) { - // Dereference key_ptr when appending! try to_remove.append(allocator, key_ptr.*); } } for (to_remove.items) |item| { - // 'item' is already type E, so no .* is needed here! _ = self.remove(item); - } } + } + } /// isDisjoint returns true if the intersection between two sets is the null set. /// Otherwise returns false. @@ -475,7 +473,7 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max pub fn symmetricDifferenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { var iter = other.iterator(); while (iter.next()) |key_ptr| { - const element = key_ptr.*; // Extract it once up front + const element = key_ptr.*; if (self.contains(element)) { _ = self.remove(element); diff --git a/src/hash_set/managed.zig b/src/hash_set/managed.zig deleted file mode 100644 index 5ced230..0000000 --- a/src/hash_set/managed.zig +++ /dev/null @@ -1,925 +0,0 @@ -/// Open Source Initiative OSI - The MIT License (MIT):Licensing -/// The MIT License (MIT) -/// Copyright (c) 2025 Ralph Caraveo (deckarep@gmail.com) -/// Permission is hereby granted, free of charge, to any person obtaining a copy of -/// this software and associated documentation files (the "Software"), to deal in -/// the Software without restriction, including without limitation the rights to -/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -/// of the Software, and to permit persons to whom the Software is furnished to do -/// so, subject to the following conditions: -/// The above copyright notice and this permission notice shall be included in all -/// copies or substantial portions of the Software. -/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -/// SOFTWARE. -/// -/// -/// -const std = @import("std"); -const mem = std.mem; -const Allocator = mem.Allocator; -const SetUnmanaged = @import("unmanaged.zig").HashSetUnmanaged; -const SetUnmanagedWithContext = @import("unmanaged.zig").HashSetUnmanagedWithContext; - -/// fn HashSetManaged(E) creates a set based on element type E. -/// This implementation is backed by the std.AutoHashMap implementation -/// where a Value is not needed and considered to be void and -/// a Key is considered to be a Set element of type E. -/// The Set comes complete with the common set operations expected -/// in a comprehensive set-based data-structure. -/// Note that max_load_percentage is passed as undefined, because the underlying -/// std.AutoHashMap/std.StringHashMap defaults are used. -pub fn HashSetManaged(comptime E: type) type { - return HashSetManagedWithContext(E, void, undefined); -} - -/// HashSetManagedWithContext creates a set based on element type E with custom hashing behavior. -/// This variant allows specifying: -/// - A Context type that implements hash() and eql() functions for custom element hashing -/// - A max_load_percentage (1-100) that controls hash table resizing -/// If Context is undefined, then max_load_percentage is ignored. -/// -/// The Context type must provide: -/// fn hash(self: Context, key: K) u64 -/// fn eql(self: Context, a: K, b: K) bool -pub fn HashSetManagedWithContext(comptime E: type, comptime Context: type, comptime max_load_percentage: u8) type { - return struct { - allocator: std.mem.Allocator, - - map: Map, - context: if (Context == void) void else Context = if (Context == void) {} else undefined, - max_load_percentage: if (Context == void) void else u8 = if (Context == void) {} else max_load_percentage, - - /// The type of the internal hash map - pub const Map = SetUnmanagedWithContext(E, Context, max_load_percentage); - pub const Size = Map.Size; - /// The iterator type returned by iterator(), key-only for sets - pub const Iterator = Map.Iterator; - - const Self = @This(); - - /// Initialzies a Set with the given Allocator - pub fn init(allocator: std.mem.Allocator) Self { - return .{ - .allocator = allocator, - .map = Map.init(), - .context = if (Context == void) {} else undefined, - .max_load_percentage = if (Context == void) {} else max_load_percentage, - }; - } - - pub fn initContext(allocator: std.mem.Allocator, context: Context) Self { - return .{ - .allocator = allocator, - .map = Map.initContext(context), - .context = context, - .max_load_percentage = max_load_percentage, - }; - } - - /// Initialzies a Set using a capacity hint, with the given Allocator - pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { - var self = Self.init(allocator); - self.allocator = allocator; - try self.map.ensureTotalCapacity(allocator, num); - return self; - } - - /// Destory the Set - pub fn deinit(self: *Self) void { - self.map.deinit(self.allocator); - self.* = undefined; - } - - /// Adds a single element to the set and an allocation may occur. - /// add may return an Allocator.Error or bool indicating if the element - /// was actually added if not already known. - pub fn add(self: *Self, element: E) Allocator.Error!bool { - return self.map.add(self.allocator, element); - } - - /// Adds a single element to the set. Asserts that there is enough capacity. - /// A bool is returned indicating if the element was actually added - /// if not already known. - pub fn addAssumeCapacity(self: *Self, element: E) bool { - const prevCount = self.map.cardinality(); - self.map.putAssumeCapacity(self.allocator, element, {}); - return prevCount != self.map.cardinality(); - } - - /// Appends all elements from the provided set, and may allocate. - /// append returns an Allocator.Error or Size which represents how - /// many elements added and not previously in the Set. - pub fn append(self: *Self, other: Self) Allocator.Error!Size { - const prevCount = self.map.cardinality(); - - try self.unionUpdate(self.allocator, other); - return self.map.cardinality() - prevCount; - } - - /// Appends all elements from the provided slice, and may allocate. - /// appendSlice returns an Allocator.Error or Size which represents how - /// many elements added and not previously in the slice. - pub fn appendSlice(self: *Self, elements: []const E) Allocator.Error!Size { - const prevCount = self.map.cardinality(); - for (elements) |el| { - _ = try self.map.add(self.allocator, el); - } - return self.map.cardinality() - prevCount; - } - - /// Returns the number of total elements which may be present before - /// it is no longer guaranteed that no allocations will be performed. - pub fn capacity(self: Self) Size { - return self.map.capacity(); - } - - /// Cardinality effectively returns the size of the set - pub fn cardinality(self: Self) Size { - return self.map.cardinality(); - } - - /// Invalidates all element pointers. - pub fn clearAndFree(self: *Self) void { - self.map.clearAndFree(self.allocator); - } - - /// Invalidates all element pointers. - pub fn clearRetainingCapacity(self: *Self) void { - self.map.clearRetainingCapacity(); - } - - /// Creates a copy of this set, using the same allocator. - /// clone may return an Allocator.Error or the cloned Set. - pub fn clone(self: *Self) Allocator.Error!Self { - // Take a stack copy of self. - var cloneSelf = self.*; - // Clone the interal map. - cloneSelf.map = try self.map.clone(self.allocator); - return cloneSelf; - } - - /// Creates a copy of this set, using a specified allocator. - /// cloneWithAllocator may be return an Allocator.Error or the cloned Set. - pub fn cloneWithAllocator(self: *Self, allocator: Allocator) Allocator.Error!Self { - // Since we're borrowing the internal map allocator, temporarily back it up. - const prevAllocator = self.allocator; - // Restore it at the end of the func, because the self.map should use the - // original allocator. - defer self.allocator = prevAllocator; - - // The cloned map must use and refer to the new allocator only. - self.allocator = allocator; - const cloneSelf = try self.clone(); - return cloneSelf; - } - - /// Returns true when the provided element exists within the Set otherwise false. - pub fn contains(self: Self, element: E) bool { - return self.map.contains(element); - } - - /// Returns true when all elements in the other Set are present in this Set - /// otherwise false. - pub fn containsAll(self: Self, other: Self) bool { - return self.map.containsAll(other); - } - - /// Returns true when all elements in the provided slice are present otherwise false. - pub fn containsAllSlice(self: Self, elements: []const E) bool { - return self.map.containsAllSlice(elements); - } - - /// Returns true when at least one or more elements from the other Set exist within - /// this Set otherwise false. - pub fn containsAny(self: Self, other: Self) bool { - var iter = other.iterator(); - while (iter.next()) |el| { - if (self.map.contains(el.*)) { - return true; - } - } - return false; - } - - /// Returns true when at least one or more elements from the slice exist within - /// this Set otherwise false. - pub fn containsAnySlice(self: Self, elements: []const E) bool { - for (elements) |el| { - if (self.map.contains(el)) { - return true; - } - } - return false; - } - - /// differenceOf returns the difference between this set - /// and other. The returned set will contain - /// all elements of this set that are not also - /// elements of the other. - /// - /// Caller owns the newly allocated/returned set. - pub fn differenceOf(self: Self, other: Self) Allocator.Error!Self { - var diffSet = Self.init(self.allocator); - - var iter = self.map.iterator(); - while (iter.next()) |pVal| { - if (!other.map.contains(pVal.*)) { - _ = try diffSet.add(pVal.*); - } - } - return diffSet; - } - - /// differenceUpdate does an in-place mutation of this set - /// and other. This set will contain all elements of this set that are not - /// also elements of other. - pub fn differenceUpdate(self: *Self, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const diffSet = try self.differenceOf(other); - - // Destroy the internal map. - self.map.deinit(self.allocator); - - // Swap it out with the new set. - self.map = diffSet.map; - } - - fn dump(self: Self) void { - std.log.err("\ncardinality: {d}\n", .{self.cardinality()}); - var iter = self.iterator(); - while (iter.next()) |el| { - std.log.err(" element: {d}\n", .{el.*}); - } - } - - /// Increases capacity, guaranteeing that insertions up until the - /// `expected_count` will not cause an allocation, and therefore cannot fail. - pub fn ensureTotalCapacity(self: *Self, expected_count: Size) Allocator.Error!void { - return self.map.ensureTotalCapacity(expected_count); - } - - /// Increases capacity, guaranteeing that insertions up until - /// `additional_count` **more** items will not cause an allocation, and - /// therefore cannot fail. - pub fn ensureUnusedCapacity(self: *Self, additional_count: Size) Allocator.Error!void { - return self.map.ensureUnusedCapacity(additional_count); - } - - /// eql determines if two sets are equal to each - /// other. If they have the same cardinality - /// and contain the same elements, they are - /// considered equal. The order in which - /// the elements were added is irrelevant. - pub fn eql(self: Self, other: Self) bool { - // First discriminate on cardinalities of both sets. - if (self.map.cardinality() != other.map.cardinality()) { - return false; - } - - // Now check for each element one for one and exit early - // on the first non-match. - var iter = self.map.iterator(); - while (iter.next()) |pVal| { - if (!other.map.contains(pVal.*)) { - return false; - } - } - - return true; - } - - /// intersectionOf returns a new set containing only the elements - /// that exist only in both sets. - /// - /// Caller owns the newly allocated/returned set. - pub fn intersectionOf(self: Self, other: Self) Allocator.Error!Self { - var interSet = Self.init(self.allocator); - - // Optimization: iterate over whichever set is smaller. - // Matters when disparity in cardinality is large. - var s = other; - var o = self; - if (self.map.cardinality() < other.map.cardinality()) { - s = self; - o = other; - } - - var iter = s.map.iterator(); - while (iter.next()) |pVal| { - if (o.map.contains(pVal.*)) { - _ = try interSet.add(pVal.*); - } - } - - return interSet; - } - - /// intersectionUpdate does an in-place intersecting update - /// to the current set from the other set keeping only - /// elements found in this Set and the other Set. - pub fn intersectionUpdate(self: *Self, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const interSet = try self.intersectionOf(other); - - // Destroy the internal map. - self.map.deinit(self.allocator); - - // Swap it out with the new set. - self.map = interSet.map; - } - - /// isDisjoint returns true if the intersection between two sets is the null set. - /// Otherwise returns false. - pub fn isDisjoint(self: Self, other: Self) bool { - return self.map.isDisjoint(other.map); - } - - /// In place style: - /// differenceOfUpdate - /// symmetric_differenceOf_update - /// Returns true if the set is empty otherwise false - pub fn isEmpty(self: Self) bool { - return self.map.cardinality() == 0; - } - - /// Create an iterator over the elements in the set. - /// The iterator is invalidated if the set is modified during iteration. - pub fn iterator(self: Self) Iterator { - return self.map.iterator(); - } - - /// properSubsetOf determines if every element in this set is in - /// the other set but the two sets are not equal. - pub fn properSubsetOf(self: Self, other: Self) bool { - return self.map.cardinality() < other.map.cardinality() and self.subsetOf(other); - } - - /// properSupersetOf determines if every element in the other set - /// is in this set but the two sets are not equal. - pub fn properSupersetOf(self: Self, other: Self) bool { - return self.map.cardinality() > other.map.cardinality() and self.supersetOf(other); - } - - /// subsetOf determines if every element in this set is in - /// the other set. - pub fn subsetOf(self: Self, other: Self) bool { - // First discriminate on cardinalties of both sets. - if (self.map.cardinality() > other.map.cardinality()) { - return false; - } - - // Now check that self set has at least some elements from other. - var iter = self.map.iterator(); - while (iter.next()) |pVal| { - if (!other.map.contains(pVal.*)) { - return false; - } - } - - return true; - } - - /// subsetOf determines if every element in the other Set is in - /// the this Set. - pub fn supersetOf(self: Self, other: Self) bool { - // This is just the converse of subsetOf. - return other.subsetOf(self); - } - - /// pop removes and returns an arbitrary ?E from the set. - /// Order is not guaranteed. - /// This safely returns null if the Set is empty. - pub fn pop(self: *Self) ?E { - if (self.map.cardinality() > 0) { - var iter = self.map.iterator(); - // NOTE: No in-place mutation as it invalidates live iterators. - // So a temporary capture is taken. - var capturedElement: E = undefined; - while (iter.next()) |pVal| { - capturedElement = pVal.*; - break; - } - _ = self.map.remove(capturedElement); - return capturedElement; - } else { - return null; - } - } - - /// remove discards a single element from the Set - pub fn remove(self: *Self, element: E) bool { - return self.map.remove(element); - } - - /// removesAll discards all elements passed from the other Set from - /// this Set - pub fn removeAll(self: *Self, other: Self) void { - var iter = other.iterator(); - while (iter.next()) |el| { - _ = self.map.remove(el); - } - } - - /// removesAllSlice discards all elements passed as a slice from the Set - pub fn removeAllSlice(self: *Self, elements: []const E) void { - for (elements) |el| { - _ = self.map.remove(el); - } - } - - /// symmetricDifferenceOf returns a new set with all elements which are - /// in either this set or the other set but not in both. - /// - /// The caller owns the newly allocated/returned Set. - pub fn symmetricDifferenceOf(self: Self, other: Self) Allocator.Error!Self { - var sdSet = Self.init(self.allocator); - - var iter = self.map.iterator(); - while (iter.next()) |pVal| { - if (!other.map.contains(pVal.*)) { - _ = try sdSet.add(pVal.*); - } - } - - iter = other.map.iterator(); - while (iter.next()) |pVal| { - if (!self.map.contains(pVal.*)) { - _ = try sdSet.add(pVal.*); - } - } - - return sdSet; - } - - /// symmetricDifferenceUpdate does an in-place mutation with all elements - /// which are in either this set or the other set but not in both. - pub fn symmetricDifferenceUpdate(self: *Self, other: Self) Allocator.Error!void { - // In-place mutation invalidates iterators therefore a temp set is needed. - // So instead of a temp set, just invoke the regular full function which - // allocates and returns a set then swap out the map internally. - - // Also, this saves a step of not having to possibly discard many elements - // from the self set. - - // Just get a new set with the normal method. - const sd = try self.symmetricDifferenceOf(other); - - // Destroy the internal map. - self.map.deinit(self.allocator); - - // Swap it out with the new set. - self.map = sd.map; - } - - /// union returns a new set with all elements in both sets. - /// - /// The caller owns the newly allocated/returned Set. - pub fn unionOf(self: Self, other: Self) Allocator.Error!Self { - // Sniff out larger set for capacity hint. - var n = self.map.cardinality(); - if (other.map.cardinality() > n) n = other.map.cardinality(); - - var uSet = try Self.initCapacity( - self.allocator, - @intCast(n), - ); - - var iter = self.map.iterator(); - while (iter.next()) |pVal| { - _ = try uSet.add(pVal.*); - } - - iter = other.map.iterator(); - while (iter.next()) |pVal| { - _ = try uSet.add(pVal.*); - } - - return uSet; - } - - /// unionUpdate does an in-place union of the current Set and other Set. - /// - /// Allocations may occur. - pub fn unionUpdate(self: *Self, other: Self) Allocator.Error!void { - var iter = other.map.iterator(); - while (iter.next()) |pVal| { - _ = try self.add(pVal.*); - } - } - }; -} - -const testing = std.testing; -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; - -test "example usage" { - // import the namespace. - // const set = @import("set.zig"); - - // Create a set of u32s called A - var A = HashSetManaged(u32).init(std.testing.allocator); - defer A.deinit(); - - // Add some data - _ = try A.add(5); - _ = try A.add(6); - _ = try A.add(7); - - // Add more data; single shot, duplicate data is ignored. - _ = try A.appendSlice(&.{ 5, 3, 0, 9 }); - - // Create another set called B - var B = HashSetManaged(u32).init(std.testing.allocator); - defer B.deinit(); - - // Add data to B - _ = try B.appendSlice(&.{ 50, 30, 20 }); - - // Get the union of A | B - var un = try A.unionOf(B); - defer un.deinit(); - - // Grab an iterator and dump the contents. - var iter = un.iterator(); - while (iter.next()) |el| { - std.log.debug("element: {d}", .{el.*}); - } -} - -test "string usage" { - var A = HashSetManaged([]const u8).init(std.testing.allocator); - defer A.deinit(); - - var B = HashSetManaged([]const u8).init(std.testing.allocator); - defer B.deinit(); - - _ = try A.add("Hello"); - _ = try B.add("World"); - - var C = try A.unionOf(B); - defer C.deinit(); - try expectEqual(2, C.cardinality()); - try expect(C.containsAllSlice(&.{ "Hello", "World" })); -} - -test "comprehensive usage" { - var set = HashSetManaged(u32).init(std.testing.allocator); - defer set.deinit(); - - try expect(set.isEmpty()); - - _ = try set.add(8); - _ = try set.add(6); - _ = try set.add(7); - try expectEqual(set.cardinality(), 3); - - _ = try set.appendSlice(&.{ 5, 3, 0, 9 }); - - // Positive cases. - try expect(set.contains(8)); - try expect(set.containsAllSlice(&.{ 5, 3, 9 })); - try expect(set.containsAnySlice(&.{ 5, 55, 12 })); - - // Negative cases. - try expect(!set.contains(99)); - try expect(!set.containsAllSlice(&.{ 8, 6, 77 })); - try expect(!set.containsAnySlice(&.{ 99, 55, 44 })); - - try expectEqual(set.cardinality(), 7); - - var other = HashSetManaged(u32).init(std.testing.allocator); - defer other.deinit(); - - try expect(other.isEmpty()); - - _ = try other.add(8); - _ = try other.add(6); - _ = try other.add(7); - - _ = try other.appendSlice(&.{ 5, 3, 0, 9 }); - - try expect(set.eql(other)); - try expectEqual(other.cardinality(), 7); - - try expect(other.remove(8)); - try expectEqual(other.cardinality(), 6); - try expect(!other.remove(55)); - try expect(!set.eql(other)); - - other.removeAllSlice(&.{ 6, 7 }); - try expectEqual(other.cardinality(), 4); - - // intersectionOf - var inter = try set.intersectionOf(other); - defer inter.deinit(); - try expect(!inter.isEmpty()); - try expectEqual(inter.cardinality(), 4); - try expect(inter.containsAllSlice(&.{ 5, 3, 0, 9 })); - - // Union - var un = try set.unionOf(other); - defer un.deinit(); - try expect(!un.isEmpty()); - try expectEqual(un.cardinality(), 7); - try expect(un.containsAllSlice(&.{ 8, 6, 7, 5, 3, 0, 9 })); - - // differenceOf - var diff = try set.differenceOf(other); - defer diff.deinit(); - try expect(!diff.isEmpty()); - try expectEqual(diff.cardinality(), 3); - try expect(diff.containsAllSlice(&.{ 8, 7, 6 })); - - // symmetricDifferenceOf - _ = try set.add(11111); - _ = try set.add(9999); - _ = try other.add(7777); - var symmDiff = try set.symmetricDifferenceOf(other); - defer symmDiff.deinit(); - try expect(!symmDiff.isEmpty()); - try expectEqual(symmDiff.cardinality(), 6); - try expect(symmDiff.containsAllSlice(&.{ 7777, 11111, 8, 7, 6, 9999 })); - - // subsetOf - - // supersetOf -} - -test "isDisjoint" { - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - var b = HashSetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - _ = try b.appendSlice(&.{ 202, 303, 403 }); - - // Test the true case. - try expect(a.isDisjoint(b)); - try expect(b.isDisjoint(a)); - - // Test the false case. - var c = HashSetManaged(u32).init(std.testing.allocator); - defer c.deinit(); - _ = try c.appendSlice(&.{ 20, 30, 400 }); - - try expect(!a.isDisjoint(c)); - try expect(!c.isDisjoint(a)); -} - -test "clear/capacity" { - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - - try expectEqual(0, a.cardinality()); - try expectEqual(0, a.capacity()); - - const cap = 99; - var b = try HashSetManaged(u32).initCapacity(std.testing.allocator, cap); - defer b.deinit(); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - for (0..cap) |val| { - _ = try b.add(@intCast(val)); - } - - try expectEqual(99, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearRetainingCapacity(); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearAndFree(); - - try expectEqual(0, b.cardinality()); - try expectEqual(b.capacity(), 0); -} - -test "clone" { - { - // clone - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - var b = try a.clone(); - defer b.deinit(); - - try expect(a.eql(b)); - } - - { - // cloneWithAllocator - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - // Use a different allocator than the test one. - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const tmpAlloc = gpa.allocator(); - defer { - const deinit_status = gpa.deinit(); - // Fail test; can't try in defer as defer is executed after we return - if (deinit_status == .leak) expect(false) catch @panic("TEST FAIL"); - } - - var b = try a.cloneWithAllocator(tmpAlloc); - defer b.deinit(); - - try expect(a.allocator.ptr != b.allocator.ptr); - try expect(a.eql(b)); - } -} - -test "pop" { - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - // No assumptions can be made about pop order. - while (a.pop()) |result| { - try expect(result == 20 or result == 30 or result == 40); - } - - // At this point, set must be empty. - try expectEqual(a.cardinality(), 0); - try expect(a.isEmpty()); - - // Lastly, pop should safely return null. - try expect(a.pop() == null); -} - -test "subset/superset" { - { - // subsetOf - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 1, 2, 3, 5, 7 }); - - var b = HashSetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - - // b should be a subset of a. - try expect(b.subsetOf(a)); - - _ = try b.add(72); - - // b should not be a subset of a, because 72 is not in a. - try expect(!b.subsetOf(a)); - } - - { - // supersetOf - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 9, 5, 2, 1, 11 }); - - var b = HashSetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - _ = try b.appendSlice(&.{ 5, 2, 11 }); - - // set a should be a superset of set b - try expect(!b.supersetOf(a)); - - _ = try b.add(42); - - // TODO: figure out why this fails. - //set a should not be a superset of set b because b has 42 - // try expect(a.supersetOf(&b)); - } -} - -test "iterator" { - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 20, 30, 40 }); - - var sum: u32 = 0; - var iterCount: usize = 0; - var iter = a.iterator(); - while (iter.next()) |el| { - sum += el.*; - iterCount += 1; - } - - try expectEqual(90, sum); - try expectEqual(3, iterCount); -} - -test "in-place methods" { - // intersectionUpdate - var a = HashSetManaged(u32).init(std.testing.allocator); - defer a.deinit(); - _ = try a.appendSlice(&.{ 10, 20, 30, 40 }); - - var b = HashSetManaged(u32).init(std.testing.allocator); - defer b.deinit(); - _ = try b.appendSlice(&.{ 44, 20, 30, 66 }); - - try a.intersectionUpdate(b); - try expectEqual(a.cardinality(), 2); - try expect(a.containsAllSlice(&.{ 20, 30 })); - - // unionUpdate - var c = HashSetManaged(u32).init(std.testing.allocator); - defer c.deinit(); - _ = try c.appendSlice(&.{ 10, 20, 30, 40 }); - - var d = HashSetManaged(u32).init(std.testing.allocator); - defer d.deinit(); - _ = try d.appendSlice(&.{ 44, 20, 30, 66 }); - - try c.unionUpdate(d); - try expectEqual(c.cardinality(), 6); - try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); - - // differenceUpdate - var e = HashSetManaged(u32).init(std.testing.allocator); - defer e.deinit(); - _ = try e.appendSlice(&.{ 1, 11, 111, 1111, 11111 }); - - var f = HashSetManaged(u32).init(std.testing.allocator); - defer f.deinit(); - _ = try f.appendSlice(&.{ 1, 11, 111, 222, 2222, 1111 }); - - try e.differenceUpdate(f); - - try expectEqual(1, e.cardinality()); - try expect(e.contains(11111)); - - // symmetricDifferenceUpdate - var g = HashSetManaged(u32).init(std.testing.allocator); - defer g.deinit(); - _ = try g.appendSlice(&.{ 2, 22, 222, 2222, 22222 }); - - var h = HashSetManaged(u32).init(std.testing.allocator); - defer h.deinit(); - _ = try h.appendSlice(&.{ 1, 11, 111, 333, 3333, 2222, 1111 }); - - try g.symmetricDifferenceUpdate(h); - - try expectEqual(10, g.cardinality()); - try expect(g.containsAllSlice(&.{ 1, 2, 11, 111, 22, 222, 1111, 333, 3333, 22222 })); -} - -test "sizeOf" { - const unmanagedSize = @sizeOf(SetUnmanaged(u32)); - const managedSize = @sizeOf(HashSetManaged(u32)); - const managedWithVoidContextSize = @sizeOf(HashSetManagedWithContext(u32, void, undefined)); - const managedWithContextSize = @sizeOf(HashSetManagedWithContext(u32, TestContext, 75)); - - // The managed must be only 16 bytes larger, the cost of the internal allocator - // otherwise we've added some CRAP! - const expectedDiff = 16; - try expectEqual(expectedDiff, managedSize - unmanagedSize); - - // The managed with void context must be the same size as the managed. - // The managed with context must be larger by the size of the Context type, - // due to the added Context + allocator and alignment padding. - const expectedContextDiff = 16; - try expectEqual(expectedDiff, managedWithVoidContextSize - unmanagedSize); - try expectEqual(expectedContextDiff, managedWithContextSize - managedSize); -} - -const TestContext = struct { - const Self = @This(); - pub fn hash(_: Self, key: u32) u64 { - return @as(u64, key) *% 0x517cc1b727220a95; - } - pub fn eql(_: Self, a: u32, b: u32) bool { - return a == b; - } -}; - -test "custom hash function" { - const context = TestContext{}; - var set = HashSetManagedWithContext(u32, TestContext, 75).initContext(testing.allocator, context); - defer set.deinit(); - - _ = try set.add(123); - try expect(set.contains(123)); -} diff --git a/src/main.zig b/src/main.zig index 8d05ad8..dd71c05 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,12 +2,18 @@ const std = @import("std"); const set = @import("root.zig"); pub fn main(init: std.process.Init) !void { - const gpa = init.gpa; const repetitions: usize = 1000; - const times = try gpa.alloc(i64, repetitions); - defer gpa.free(times); + + const times_diff = try gpa.alloc(i64, repetitions); + defer gpa.free(times_diff); + + const times_sym = try gpa.alloc(i64, repetitions); + defer gpa.free(times_sym); + + const times_inter = try gpa.alloc(i64, repetitions); + defer gpa.free(times_inter); const upper: u32 = 100000; var B = set.Set(u32).init(); @@ -18,27 +24,48 @@ pub fn main(init: std.process.Init) !void { _ = try B.add(gpa, e); } - std.debug.print("starting benchkmark\n", .{}); + std.debug.print("starting benchmark\n", .{}); + for (0..repetitions) |i| { + var A_diff = set.Set(u32).init(); + defer A_diff.deinit(gpa); + + var A_sym = set.Set(u32).init(); + defer A_sym.deinit(gpa); - var A = set.Set(u32).init(); - defer A.deinit(gpa); + var A_inter = set.Set(u32).init(); + defer A_inter.deinit(gpa); for (0..upper) |j| { const e: u32 = @intCast(j); - _ = try A.add(gpa, @as(u32, e)); + _ = try A_diff.add(gpa, e); + _ = try A_sym.add(gpa, e); + _ = try A_inter.add(gpa, e); } - const startTime = std.Io.Timestamp.now(init.io, .awake); - _ = try A.differenceUpdate(B); - const elapsedTime = startTime.untilNow(init.io, .awake); + const start_diff = std.Io.Timestamp.now(init.io, .awake); + _ = try A_diff.differenceUpdate(B); + const elapsed_diff = start_diff.untilNow(init.io, .awake); + times_diff[i] = elapsed_diff.toMilliseconds(); - times[i] = elapsedTime.toMilliseconds(); + const start_sym = std.Io.Timestamp.now(init.io, .awake); + _ = try A_sym.symmetricDifferenceUpdate(gpa, B); + const elapsed_sym = start_sym.untilNow(init.io, .awake); + times_sym[i] = elapsed_sym.toMilliseconds(); + + const start_inter = std.Io.Timestamp.now(init.io, .awake); + _ = try A_inter.intersectionUpdate(gpa, B); + const elapsed_inter = start_inter.untilNow(init.io, .awake); + times_inter[i] = elapsed_inter.toMilliseconds(); } - const stats: Stats = Stats.calculateFromData(times); + const stats_diff = Stats.calculateFromData(times_diff); + const stats_sym = Stats.calculateFromData(times_sym); + const stats_inter = Stats.calculateFromData(times_inter); - std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Avg Time (ms)", stats.mean, stats.ci }); + std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Difference (ms)", stats_diff.mean, stats_diff.ci }); + std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Sym Difference (ms)", stats_sym.mean, stats_sym.ci }); + std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Intersection (ms)", stats_inter.mean, stats_inter.ci }); } pub const Stats = struct { @@ -64,4 +91,3 @@ pub const Stats = struct { return Stats{ .mean = mean, .ci = margin_error }; } }; - From b47c9a6c3b00e5b2c69f205a71e30a9c7d332424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pau=20Soler=20Valad=C3=A9s?= Date: Wed, 25 Feb 2026 19:24:12 +0100 Subject: [PATCH 3/5] Removed managed variants --- src/array_hash_set.zig | 52 +-- src/dynamic_bit_set.zig | 923 ---------------------------------------- src/main.zig | 93 ---- src/root.zig | 8 +- 4 files changed, 30 insertions(+), 1046 deletions(-) delete mode 100644 src/dynamic_bit_set.zig delete mode 100644 src/main.zig diff --git a/src/array_hash_set.zig b/src/array_hash_set.zig index d7d4705..17ee324 100644 --- a/src/array_hash_set.zig +++ b/src/array_hash_set.zig @@ -33,7 +33,7 @@ fn selectMap(comptime E: type) type { } } -pub fn ArraySetUnmanaged(comptime E: type) type { +pub fn ArraySet(comptime E: type) type { return struct { /// The type of the internal hash map pub const Map = selectMap(E); @@ -496,7 +496,7 @@ const expectEqual = std.testing.expectEqual; test "example usage" { // Create a set of u32s called A - var A = ArraySetUnmanaged(u32).init(); + var A = ArraySet(u32).init(); defer A.deinit(testing.allocator); // Add some data @@ -508,7 +508,7 @@ test "example usage" { _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); // Create another set called B - var B = ArraySetUnmanaged(u32).init(); + var B = ArraySet(u32).init(); defer B.deinit(testing.allocator); // Add data to B @@ -533,10 +533,10 @@ test "example usage" { } test "string usage" { - var A = ArraySetUnmanaged([]const u8).init(); + var A = ArraySet([]const u8).init(); defer A.deinit(testing.allocator); - var B = ArraySetUnmanaged([]const u8).init(); + var B = ArraySet([]const u8).init(); defer B.deinit(testing.allocator); _ = try A.add(testing.allocator, "Hello"); @@ -549,7 +549,7 @@ test "string usage" { } test "comprehensive usage" { - var set = ArraySetUnmanaged(u32).init(); + var set = ArraySet(u32).init(); defer set.deinit(testing.allocator); try expect(set.isEmpty()); @@ -573,7 +573,7 @@ test "comprehensive usage" { try expectEqual(set.cardinality(), 7); - var other = ArraySetUnmanaged(u32).init(); + var other = ArraySet(u32).init(); defer other.deinit(testing.allocator); try expect(other.isEmpty()); @@ -632,11 +632,11 @@ test "comprehensive usage" { } test "isDisjoint" { - var a = ArraySetUnmanaged(u32).init(); + var a = ArraySet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - var b = ArraySetUnmanaged(u32).init(); + var b = ArraySet(u32).init(); defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); @@ -645,7 +645,7 @@ test "isDisjoint" { try expect(b.isDisjoint(a)); // Test the false case. - var c = ArraySetUnmanaged(u32).init(); + var c = ArraySet(u32).init(); defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); @@ -656,7 +656,7 @@ test "isDisjoint" { test "clone" { // clone - var a = ArraySetUnmanaged(u32).init(); + var a = ArraySet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -667,14 +667,14 @@ test "clone" { } test "clear/capacity" { - var a = ArraySetUnmanaged(u32).init(); + var a = ArraySet(u32).init(); defer a.deinit(testing.allocator); try expectEqual(0, a.cardinality()); try expectEqual(0, a.capacity()); const cap = 99; - var b = try ArraySetUnmanaged(u32).initCapacity(testing.allocator, cap); + var b = try ArraySet(u32).initCapacity(testing.allocator, cap); defer b.deinit(testing.allocator); try expectEqual(0, b.cardinality()); @@ -699,7 +699,7 @@ test "clear/capacity" { } test "iterator" { - var a = ArraySetUnmanaged(u32).init(); + var a = ArraySet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -716,7 +716,7 @@ test "iterator" { } test "pop" { - var a = ArraySetUnmanaged(u32).init(); + var a = ArraySet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -735,11 +735,11 @@ test "pop" { test "in-place methods" { // intersectionUpdate - var a = ArraySetUnmanaged(u32).init(); + var a = ArraySet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var b = ArraySetUnmanaged(u32).init(); + var b = ArraySet(u32).init(); defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -748,11 +748,11 @@ test "in-place methods" { try expect(a.containsAllSlice(&.{ 20, 30 })); // unionUpdate - var c = ArraySetUnmanaged(u32).init(); + var c = ArraySet(u32).init(); defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var d = ArraySetUnmanaged(u32).init(); + var d = ArraySet(u32).init(); defer d.deinit(testing.allocator); _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -761,11 +761,11 @@ test "in-place methods" { try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); // differenceUpdate - var e = ArraySetUnmanaged(u32).init(); + var e = ArraySet(u32).init(); defer e.deinit(testing.allocator); _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); - var f = ArraySetUnmanaged(u32).init(); + var f = ArraySet(u32).init(); defer f.deinit(testing.allocator); _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); @@ -775,11 +775,11 @@ test "in-place methods" { try expect(e.contains(11111)); // symmetricDifferenceUpdate - var g = ArraySetUnmanaged(u32).init(); + var g = ArraySet(u32).init(); defer g.deinit(testing.allocator); _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); - var h = ArraySetUnmanaged(u32).init(); + var h = ArraySet(u32).init(); defer h.deinit(testing.allocator); _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); @@ -790,7 +790,7 @@ test "in-place methods" { } test "removals" { - var a = ArraySetUnmanaged(u32).init(); + var a = ArraySet(u32).init(); defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 5, 6, 7, 8 }); @@ -809,7 +809,7 @@ test "removals" { a.removeAllSlice(&.{ 50, 60, 70, 80 }); try expectEqual(4, a.cardinality()); - var b = ArraySetUnmanaged(u32).init(); + var b = ArraySet(u32).init(); defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 111, 222, 333, 444 }); @@ -822,5 +822,5 @@ test "sizeOf matches" { // No bloat guarantee, after all we're just building on top of what's good. const expectedByteSize = 40; try expectEqual(expectedByteSize, @sizeOf(std.array_hash_map.AutoArrayHashMapUnmanaged(u32, void))); - try expectEqual(expectedByteSize, @sizeOf(ArraySetUnmanaged(u32))); + try expectEqual(expectedByteSize, @sizeOf(ArraySet(u32))); } diff --git a/src/dynamic_bit_set.zig b/src/dynamic_bit_set.zig deleted file mode 100644 index dbe46ed..0000000 --- a/src/dynamic_bit_set.zig +++ /dev/null @@ -1,923 +0,0 @@ -const std = @import("std"); -const mem = std.mem; -const math = std.math; -const Allocator = mem.Allocator; - -// Note: this should not work for strings. This is a very niche implementation for numbers! - - -/// HashSetWithContext creates a set based on element type E with custom hashing behavior. -/// Helpful comments goes here -pub fn BitSetUnmanaged(comptime E: type) type { - return struct { - unmanaged: std.bit_set.DynamicallyBitSetUnmanaged, - - pub const Iterator = Map.KeyIterator; - - const Self = @This(); - - /// Initialize a default set without context - pub fn init() Self { - return .{ - .unmanaged = std.bit_set.DynamicallyBitSetUnmanaged{}, - }; - } - - /// Destroys the unmanaged Set. - pub fn deinit(self: *Self, allocator: Allocator) void { - // TODO do when done with add - } - - /// Capacity: the maximum number i can insert without allocating - pub fn add(self: *Self, allocator: Allocator, element: E) Allocator.Error!bool { - if (self.unmanaged.capacity >= e) { - // Allocate memory to hold the int - } - // la lògica és correcta ya segut - const mask_bits: comptime_int = @bitSizeOf(self.unmanaged.MaskInt); - const mask_index = @divFloor(element, mask_bits); - const bit = @mod(element, mask_bits); - self.unmanaged.masks[mask] |= @as(self.unmanaged.ShiftInt, 1) << bit; - } - - /// Adds a single element to the set. Asserts that there is enough capacity. - /// A bool is returned indicating if the element was actually added - /// if not already known. - pub fn addAssumeCapacity(self: *Self, element: E) bool { - //TODO - } - - /// Appends all elements from the provided set, and may allocate. - /// append returns an Allocator.Error or Size which represents how - /// many elements added and not previously in the Set. - pub fn append(self: *Self, allocator: Allocator, other: Self) Allocator.Error!Size { - const prevCount = self.unmanaged.count(); - - try self.unionUpdate(allocator, other); - return self.unmanaged.count() - prevCount; - } - - /// Appends all elements from the provided slice, and may allocate. - /// appendSlice returns an Allocator.Error or Size which represents how - /// many elements added and not previously in the slice. - pub fn appendSlice(self: *Self, allocator: Allocator, elements: []const E) Allocator.Error!Size { - const prevCount = self.unmanaged.count(); - for (elements) |el| { - try self.unmanaged.put(allocator, el, {}); - } - return self.unmanaged.count() - prevCount; - } - - /// Returns the number of total elements which may be present before - /// it is no longer guaranteed that no allocations will be performed. - pub fn capacity(self: Self) Size { - return self.unmanaged.capacity(); - } - - /// Cardinality effectively returns the size of the set. - pub fn cardinality(self: Self) Size { - return self.unmanaged.count(); - } - - /// Invalidates all element pointers. - pub fn clearAndFree(self: *Self, allocator: Allocator) void { - self.unmanaged.clearAndFree(allocator); - } - - /// Invalidates all element pointers. - pub fn clearRetainingCapacity(self: *Self) void { - self.unmanaged.clearRetainingCapacity(); - } - - /// Creates a copy of this set, using the same allocator. - /// clone may return an Allocator.Error or the cloned Set. - pub fn clone(self: *Self, allocator: Allocator) Allocator.Error!Self { - // Take a stack copy of self. - var cloneSelf = self.*; - // Clone the interal map. - cloneSelf.unmanaged = try self.unmanaged.clone(allocator); - return cloneSelf; - } - - /// Returns true when the provided element exists within the Set otherwise false. - pub fn contains(self: Self, element: E) bool { - return self.unmanaged.contains(element); - } - - /// Returns true when all elements in the other Set are present in this Set - /// otherwise false. - pub fn containsAll(self: Self, other: Self) bool { - var iter = other.iterator(); - while (iter.next()) |el| { - if (!self.unmanaged.contains(el.*)) { - return false; - } - } - return true; - } - - /// Returns true when all elements in the provided slice are present otherwise false. - pub fn containsAllSlice(self: Self, elements: []const E) bool { - for (elements) |el| { - if (!self.unmanaged.contains(el)) { - return false; - } - } - return true; - } - - /// Returns true when at least one or more elements from the other Set exist within - /// this Set otherwise false. - pub fn containsAny(self: Self, other: Self) bool { - var iter = other.iterator(); - while (iter.next()) |el| { - if (self.unmanaged.contains(el.*)) { - return true; - } - } - return false; - } - - /// Returns true when at least one or more elements from the slice exist within - /// this Set otherwise false. - pub fn containsAnySlice(self: Self, elements: []const E) bool { - for (elements) |el| { - if (self.unmanaged.contains(el)) { - return true; - } - } - return false; - } - - /// differenceOf returns the difference between this set - /// and other. The returned set will contain - /// all elements of this set that are not also - /// elements of the other. - /// - /// Caller owns the newly allocated/returned set. - pub fn differenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var diffSet = Self.init(); - - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - _ = try diffSet.add(allocator, entry.key_ptr.*); - } - } - return diffSet; - } - - /// differenceUpdate does an in-place mutation of this set - /// and other. This set will contain all elements of this set that are not - /// also elements of other. - pub fn differenceUpdate(self: *Self, other: Self) Allocator.Error!void { - var iter = other.iterator(); - - while (iter.next()) |key_ptr| { - _ = self.remove(key_ptr.*); - } - } - - fn dump(self: Self) void { - std.log.err("\ncardinality: {d}\n", .{self.cardinality()}); - var iter = self.iterator(); - while (iter.next()) |el| { - std.log.err(" element: {d}\n", .{el.*}); - } - } - - /// Increases capacity, guaranteeing that insertions up until the - /// `expected_count` will not cause an allocation, and therefore cannot fail. - pub fn ensureTotalCapacity(self: *Self, allocator: Allocator, expected_count: Size) Allocator.Error!void { - return self.unmanaged.ensureTotalCapacity(allocator, expected_count); - } - - /// Increases capacity, guaranteeing that insertions up until - /// `additional_count` **more** items will not cause an allocation, and - /// therefore cannot fail. - pub fn ensureUnusedCapacity(self: *Self, allocator: Allocator, additional_count: Size) Allocator.Error!void { - return self.unmanaged.ensureUnusedCapacity(allocator, additional_count); - } - - /// eql determines if two sets are equal to each - /// other. If they have the same cardinality - /// and contain the same elements, they are - /// considered equal. The order in which - /// the elements were added is irrelevant. - pub fn eql(self: Self, other: Self) bool { - // First discriminate on cardinalities of both sets. - if (self.unmanaged.count() != other.unmanaged.count()) { - return false; - } - - // Now check for each element one for one and exit early - // on the first non-match. - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - return false; - } - } - - return true; - } - - /// intersectionOf returns a new set containing only the elements - /// that exist only in both sets. - /// - /// Caller owns the newly allocated/returned set. - pub fn intersectionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var interSet = Self.init(); - - // Optimization: iterate over whichever set is smaller. - // Matters when disparity in cardinality is large. - var s = other; - var o = self; - if (self.unmanaged.count() < other.unmanaged.count()) { - s = self; - o = other; - } - - var iter = s.unmanaged.iterator(); - while (iter.next()) |entry| { - if (o.unmanaged.contains(entry.key_ptr.*)) { - _ = try interSet.add(allocator, entry.key_ptr.*); - } - } - - return interSet; - } - - /// intersectionUpdate does an in-place intersecting update - /// to the current set from the other set keeping only - /// elements found in this Set and the other Set. - pub fn intersectionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - var to_remove: std.ArrayList(E) = .empty; - defer to_remove.deinit(allocator); - - var iter = self.iterator(); - while (iter.next()) |key_ptr| { - if (!other.contains(key_ptr.*)) { - try to_remove.append(allocator, key_ptr.*); - } - } - - for (to_remove.items) |item| { - _ = self.remove(item); - } - } - - /// isDisjoint returns true if the intersection between two sets is the null set. - /// Otherwise returns false. - pub fn isDisjoint(self: Self, other: Self) bool { - // Optimization: Find the smaller of the two, and iterate over the smaller set - const smaller = if (self.cardinality() <= other.cardinality()) self else other; - const larger = if (self.cardinality() <= other.cardinality()) other else self; - - var iter = smaller.iterator(); - while (iter.next()) |el| { - if (larger.contains(el.*)) { - return false; - } - } - return true; - } - - pub fn isEmpty(self: Self) bool { - return self.unmanaged.count() == 0; - } - - /// Create an iterator over the elements in the set. - /// The iterator is invalidated if the set is modified during iteration. - pub fn iterator(self: Self) Iterator { - return self.unmanaged.keyIterator(); - } - - /// properSubsetOf determines if every element in this set is in - /// the other set but the two sets are not equal. - pub fn properSubsetOf(self: Self, other: Self) bool { - return self.unmanaged.count() < other.unmanaged.count() and self.subsetOf(other); - } - - /// properSupersetOf determines if every element in the other set - /// is in this set but the two sets are not equal. - pub fn properSupersetOf(self: Self, other: Self) bool { - return self.unmanaged.count() > other.unmanaged.count() and self.supersetOf(other); - } - - /// subsetOf determines if every element in this set is in - /// the other set. - pub fn subsetOf(self: Self, other: Self) bool { - // First discriminate on cardinalties of both sets. - if (self.unmanaged.count() > other.unmanaged.count()) { - return false; - } - - // Now check that self set has at least some elements from other. - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - return false; - } - } - - return true; - } - - /// subsetOf determines if every element in the other Set is in - /// the this Set. - pub fn supersetOf(self: Self, other: Self) bool { - // This is just the converse of subsetOf. - return other.subsetOf(self); - } - - /// pop removes and returns an arbitrary ?E from the set. - /// Order is not guaranteed. - /// This safely returns null if the Set is empty. - pub fn pop(self: *Self) ?E { - if (self.unmanaged.count() > 0) { - var iter = self.unmanaged.iterator(); - // NOTE: No in-place mutation as it invalidates live iterators. - // So a temporary capture is taken. - var capturedElement: E = undefined; - while (iter.next()) |entry| { - capturedElement = entry.key_ptr.*; - break; - } - _ = self.unmanaged.remove(capturedElement); - return capturedElement; - } else { - return null; - } - } - - /// remove discards a single element from the Set - pub fn remove(self: *Self, element: E) bool { - return self.unmanaged.remove(element); - } - - /// removesAll discards all elements passed from the other Set from - /// this Set - pub fn removeAll(self: *Self, other: Self) void { - var iter = other.iterator(); - while (iter.next()) |el| { - _ = self.unmanaged.remove(el); - } - } - - /// removesAllSlice discards all elements passed as a slice from the Set - pub fn removeAllSlice(self: *Self, elements: []const E) void { - for (elements) |el| { - _ = self.unmanaged.remove(el); - } - } - - /// symmetricDifferenceOf returns a new set with all elements which are - /// in either this set or the other set but not in both. - /// - /// The caller owns the newly allocated/returned Set. - pub fn symmetricDifferenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var sdSet = Self.init(); - - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!other.unmanaged.contains(entry.key_ptr.*)) { - _ = try sdSet.add(allocator, entry.key_ptr.*); - } - } - - iter = other.unmanaged.iterator(); - while (iter.next()) |entry| { - if (!self.unmanaged.contains(entry.key_ptr.*)) { - _ = try sdSet.add(allocator, entry.key_ptr.*); - } - } - - return sdSet; - } - - /// symmetricDifferenceUpdate does an in-place mutation with all elements - /// which are in either this set or the other set but not in both. - pub fn symmetricDifferenceUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - var iter = other.iterator(); - while (iter.next()) |key_ptr| { - const element = key_ptr.*; - - if (self.contains(element)) { - _ = self.remove(element); - } else { - _ = try self.add(allocator, element); - } - } - } - - /// union returns a new set with all elements in both sets. - /// - /// The caller owns the newly allocated/returned Set. - pub fn unionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - // Sniff out larger set for capacity hint. - var n = self.unmanaged.count(); - if (other.unmanaged.count() > n) n = other.unmanaged.count(); - - var uSet = try Self.initCapacity( - allocator, - @intCast(n), - ); - - var iter = self.unmanaged.iterator(); - while (iter.next()) |entry| { - _ = try uSet.add(allocator, entry.key_ptr.*); - } - - iter = other.unmanaged.iterator(); - while (iter.next()) |entry| { - _ = try uSet.add(allocator, entry.key_ptr.*); - } - - return uSet; - } - - /// unionUpdate does an in-place union of the current Set and other Set. - /// - /// Allocations may occur. - pub fn unionUpdate(self: *Self, allocator: Allocator, other: Self) Allocator.Error!void { - var iter = other.unmanaged.iterator(); - while (iter.next()) |entry| { - _ = try self.add(allocator, entry.key_ptr.*); - } - } - }; -} - -const testing = std.testing; -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; - -test "example usage" { - // Create a set of u32s called A - var A = HashSet(u32).init(); - defer A.deinit(testing.allocator); - - // Add some data - _ = try A.add(testing.allocator, 5); - _ = try A.add(testing.allocator, 6); - _ = try A.add(testing.allocator, 7); - - // Add more data; single shot, duplicate data is ignored. - _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); - - // Create another set called B - var B = HashSet(u32).init(); - defer B.deinit(testing.allocator); - - // Add data to B - _ = try B.appendSlice(testing.allocator, &.{ 50, 30, 20 }); - - // // Get the union of A | B - var un = try A.unionOf(testing.allocator, B); - defer un.deinit(testing.allocator); - - try expectEqual(9, un.cardinality()); - - // Grab an iterator and dump the contents. - var iter = un.iterator(); - while (iter.next()) |el| { - std.log.debug("element: {d}", .{el.*}); - } -} - -test "string usage" { - var A = HashSet([]const u8).init(); - defer A.deinit(testing.allocator); - - var B = HashSet([]const u8).init(); - defer B.deinit(testing.allocator); - - _ = try A.add(testing.allocator, "Hello"); - _ = try B.add(testing.allocator, "World"); - - var C = try A.unionOf(testing.allocator, B); - defer C.deinit(testing.allocator); - try expectEqual(2, C.cardinality()); - try expect(C.containsAllSlice(&.{ "Hello", "World" })); -} - -test "comprehensive usage" { - var set = HashSet(u32).init(); - defer set.deinit(testing.allocator); - - try expect(set.isEmpty()); - - _ = try set.add(testing.allocator, 8); - _ = try set.add(testing.allocator, 6); - _ = try set.add(testing.allocator, 7); - try expectEqual(set.cardinality(), 3); - - _ = try set.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); - - // Positive cases. - try expect(set.contains(8)); - try expect(set.containsAllSlice(&.{ 5, 3, 9 })); - try expect(set.containsAnySlice(&.{ 5, 55, 12 })); - - // Negative cases. - try expect(!set.contains(99)); - try expect(!set.containsAllSlice(&.{ 8, 6, 77 })); - try expect(!set.containsAnySlice(&.{ 99, 55, 44 })); - - try expectEqual(set.cardinality(), 7); - - var other = HashSet(u32).init(); - defer other.deinit(testing.allocator); - - try expect(other.isEmpty()); - - _ = try other.add(testing.allocator, 8); - _ = try other.add(testing.allocator, 6); - _ = try other.add(testing.allocator, 7); - - _ = try other.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); - - try expect(set.eql(other)); - try expectEqual(other.cardinality(), 7); - - try expect(other.remove(8)); - try expectEqual(other.cardinality(), 6); - try expect(!other.remove(55)); - try expect(!set.eql(other)); - - other.removeAllSlice(&.{ 6, 7 }); - try expectEqual(other.cardinality(), 4); - - // intersectionOf - var inter = try set.intersectionOf(testing.allocator, other); - defer inter.deinit(testing.allocator); - try expect(!inter.isEmpty()); - try expectEqual(inter.cardinality(), 4); - try expect(inter.containsAllSlice(&.{ 5, 3, 0, 9 })); - - // Union - var un = try set.unionOf(testing.allocator, other); - defer un.deinit(testing.allocator); - try expect(!un.isEmpty()); - try expectEqual(un.cardinality(), 7); - try expect(un.containsAllSlice(&.{ 8, 6, 7, 5, 3, 0, 9 })); - - // differenceOf - var diff = try set.differenceOf(testing.allocator, other); - defer diff.deinit(testing.allocator); - try expect(!diff.isEmpty()); - try expectEqual(diff.cardinality(), 3); - try expect(diff.containsAllSlice(&.{ 8, 7, 6 })); - - // symmetricDifferenceOf - _ = try set.add(testing.allocator, 11111); - _ = try set.add(testing.allocator, 9999); - _ = try other.add(testing.allocator, 7777); - var symmDiff = try set.symmetricDifferenceOf(testing.allocator, other); - defer symmDiff.deinit(testing.allocator); - try expect(!symmDiff.isEmpty()); - try expectEqual(symmDiff.cardinality(), 6); - try expect(symmDiff.containsAllSlice(&.{ 7777, 11111, 8, 7, 6, 9999 })); - - // subsetOf - - // supersetOf -} - -test "isDisjoint" { - var a = HashSet(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - var b = HashSet(u32).init(); - defer b.deinit(testing.allocator); - _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); - - // Test the true case. - try expect(a.isDisjoint(b)); - try expect(b.isDisjoint(a)); - - // Test the false case. - var c = HashSet(u32).init(); - defer c.deinit(testing.allocator); - _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); - - try expect(!a.isDisjoint(c)); - try expect(!c.isDisjoint(a)); -} - -test "clone" { - - // clone - var a = HashSet(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - var b = try a.clone(testing.allocator); - defer b.deinit(testing.allocator); - - try expect(a.eql(b)); -} - -test "clear/capacity" { - var a = HashSet(u32).init(); - defer a.deinit(testing.allocator); - - try expectEqual(0, a.cardinality()); - try expectEqual(0, a.capacity()); - - const cap = 99; - var b = try HashSet(u32).initCapacity(testing.allocator, cap); - defer b.deinit(testing.allocator); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - for (0..cap) |val| { - _ = try b.add(testing.allocator, @intCast(val)); - } - - try expectEqual(99, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearRetainingCapacity(); - - try expectEqual(0, b.cardinality()); - try expect(b.capacity() >= cap); - - b.clearAndFree(testing.allocator); - - try expectEqual(0, b.cardinality()); - try expectEqual(b.capacity(), 0); -} - -test "iterator" { - var a = HashSet(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - var sum: u32 = 0; - var iterCount: usize = 0; - var iter = a.iterator(); - while (iter.next()) |el| { - sum += el.*; - iterCount += 1; - } - - try expectEqual(90, sum); - try expectEqual(3, iterCount); -} - -test "pop" { - var a = HashSet(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - - // No assumptions can be made about pop order. - while (a.pop()) |result| { - try expect(result == 20 or result == 30 or result == 40); - } - - // At this point, set must be empty. - try expectEqual(a.cardinality(), 0); - try expect(a.isEmpty()); - - // Lastly, pop should safely return null. - try expect(a.pop() == null); -} - -test "in-place methods" { - // intersectionUpdate - var a = HashSet(u32).init(); - defer a.deinit(testing.allocator); - _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - - var b = HashSet(u32).init(); - defer b.deinit(testing.allocator); - _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); - - try a.intersectionUpdate(testing.allocator, b); - try expectEqual(a.cardinality(), 2); - try expect(a.containsAllSlice(&.{ 20, 30 })); - - // unionUpdate - var c = HashSet(u32).init(); - defer c.deinit(testing.allocator); - _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - - var d = HashSet(u32).init(); - defer d.deinit(testing.allocator); - _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); - - try c.unionUpdate(testing.allocator, d); - try expectEqual(c.cardinality(), 6); - try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); - - // differenceUpdate - var e = HashSet(u32).init(); - defer e.deinit(testing.allocator); - _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); - - var f = HashSet(u32).init(); - defer f.deinit(testing.allocator); - _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); - - try e.differenceUpdate(f); - - try expectEqual(1, e.cardinality()); - try expect(e.contains(11111)); - - // symmetricDifferenceUpdate - var g = HashSet(u32).init(); - defer g.deinit(testing.allocator); - _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); - - var h = HashSet(u32).init(); - defer h.deinit(testing.allocator); - _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); - - try g.symmetricDifferenceUpdate(testing.allocator, h); - - try expectEqual(10, g.cardinality()); - try expect(g.containsAllSlice(&.{ 1, 2, 11, 111, 22, 222, 1111, 333, 3333, 22222 })); -} - -test "sizeOf matches" { - // No bloat guarantee, after all we're just building on top of what's good. - // "What's good Miley!?!?"" - const expectedByteSize = 24; - const autoHashMapSize = @sizeOf(std.hash_map.AutoHashMapUnmanaged(u32, void)); - const hashSetSize = @sizeOf(HashSet(u32)); - try expectEqual(expectedByteSize, autoHashMapSize); - try expectEqual(expectedByteSize, hashSetSize); - - // The unmanaged with void context must be the same size as the unmanaged. - // The unmanaged with context must be larger by the size of the empty Context struct, - // due to the added Context and alignment padding. - const expectedContextDiff = 8; - const hashSetWithVoidContextSize = @sizeOf(HashSetWithContext(u32, void, undefined)); - const hashSetWithContextSize = @sizeOf(HashSetWithContext(u32, TestContext, 75)); - try expectEqual(0, hashSetWithVoidContextSize - hashSetSize); - try expectEqual(expectedContextDiff, hashSetWithContextSize - hashSetSize); -} - -const TestContext = struct { - const Self = @This(); - pub fn hash(_: Self, key: u32) u64 { - return @as(u64, key) *% 0x517cc1b727220a95; - } - pub fn eql(_: Self, a: u32, b: u32) bool { - return a == b; - } -}; - -test "custom hash function comprehensive" { - const context = TestContext{}; - var set = HashSetWithContext(u32, TestContext, 75).initContext(context); - defer set.deinit(testing.allocator); - - // Test basic operations - _ = try set.add(testing.allocator, 123); - _ = try set.add(testing.allocator, 456); - try expect(set.contains(123)); - try expect(set.contains(456)); - try expect(!set.contains(789)); - try expectEqual(set.cardinality(), 2); - - // Test clone with custom context - var cloned = try set.clone(testing.allocator); - defer cloned.deinit(testing.allocator); - try expect(cloned.contains(123)); - try expect(set.eql(cloned)); - - // Test set operations with custom context - var other = HashSetWithContext(u32, TestContext, 75).initContext(context); - defer other.deinit(testing.allocator); - _ = try other.add(testing.allocator, 456); - _ = try other.add(testing.allocator, 789); - - // Test union - var union_set = try set.unionOf(testing.allocator, other); - defer union_set.deinit(testing.allocator); - try expectEqual(union_set.cardinality(), 3); - try expect(union_set.containsAllSlice(&.{ 123, 456, 789 })); - - // Test intersection - var intersection = try set.intersectionOf(testing.allocator, other); - defer intersection.deinit(testing.allocator); - try expectEqual(intersection.cardinality(), 1); - try expect(intersection.contains(456)); - - // Test difference - var difference = try set.differenceOf(testing.allocator, other); - defer difference.deinit(testing.allocator); - try expectEqual(difference.cardinality(), 1); - try expect(difference.contains(123)); - - // Test symmetric difference - var sym_diff = try set.symmetricDifferenceOf(testing.allocator, other); - defer sym_diff.deinit(testing.allocator); - try expectEqual(sym_diff.cardinality(), 2); - try expect(sym_diff.containsAllSlice(&.{ 123, 789 })); - - // Test in-place operations - try set.unionUpdate(testing.allocator, other); - try expectEqual(set.cardinality(), 3); - try expect(set.containsAllSlice(&.{ 123, 456, 789 })); -} - -test "custom hash function with different load factors" { - const context = TestContext{}; - - // Test with low load factor - var low_load = HashSetWithContext(u32, TestContext, 25).initContext(context); - defer low_load.deinit(testing.allocator); - - // Test with high load factor - var high_load = HashSetWithContext(u32, TestContext, 90).initContext(context); - defer high_load.deinit(testing.allocator); - - // Add same elements to both - for (0..100) |i| { - _ = try low_load.add(testing.allocator, @intCast(i)); - _ = try high_load.add(testing.allocator, @intCast(i)); - } - - // Verify functionality is identical despite different load factors - try expectEqual(low_load.cardinality(), high_load.cardinality()); - try expect(low_load.capacity() != high_load.capacity()); // Should be different due to load factors - - // Verify both sets contain the same elements - for (0..100) |i| { - const val: u32 = @intCast(i); - try expect(low_load.contains(val) and high_load.contains(val)); - } -} - -test "custom hash function error cases" { - const context = TestContext{}; - var set = HashSetWithContext(u32, TestContext, 75).initContext(context); - defer set.deinit(testing.allocator); - - // Test allocation failures - var failing_allocator = std.testing.FailingAllocator.init(testing.allocator, .{ .fail_index = 0 }); - try std.testing.expectError(error.OutOfMemory, set.add(failing_allocator.allocator(), 123)); -} - -// String context for testing string usage with custom hash function -const StringContext = struct { - pub fn hash(self: @This(), str: []const u8) u64 { - _ = self; - // Simple FNV-1a hash - var h: u64 = 0xcbf29ce484222325; - for (str) |b| { - h = (h ^ b) *% 0x100000001b3; - } - return h; - } - - pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { - _ = self; - return std.mem.eql(u8, a, b); - } -}; - -test "custom hash function string usage" { - const context = StringContext{}; - var A = HashSetWithContext([]const u8, StringContext, 75).initContext(context); - defer A.deinit(testing.allocator); - - var B = HashSetWithContext([]const u8, StringContext, 75).initContext(context); - defer B.deinit(testing.allocator); - - _ = try A.add(testing.allocator, "Hello"); - _ = try B.add(testing.allocator, "World"); - - var C = try A.unionOf(testing.allocator, B); - defer C.deinit(testing.allocator); - try expectEqual(2, C.cardinality()); - try expect(C.containsAllSlice(&.{ "Hello", "World" })); - - // Test string-specific behavior - try expect(A.contains("Hello")); - try expect(!A.contains("hello")); // Case sensitive - try expect(!A.contains("Hell")); // Prefix doesn't match - try expect(!A.contains("Hello ")); // Trailing space matters - - // Test with longer strings - _ = try A.add(testing.allocator, "This is a longer string to test hash collisions"); - _ = try A.add(testing.allocator, "This is another longer string to test hash collisions"); - try expectEqual(3, A.cardinality()); - - // Test with empty string - _ = try A.add(testing.allocator, ""); - try expect(A.contains("")); - try expectEqual(4, A.cardinality()); - - // Test with strings containing special characters - _ = try A.add(testing.allocator, "Hello\n"); - _ = try A.add(testing.allocator, "Hello\r"); - _ = try A.add(testing.allocator, "Hello\t"); - try expectEqual(7, A.cardinality()); -} diff --git a/src/main.zig b/src/main.zig deleted file mode 100644 index dd71c05..0000000 --- a/src/main.zig +++ /dev/null @@ -1,93 +0,0 @@ -const std = @import("std"); -const set = @import("root.zig"); - -pub fn main(init: std.process.Init) !void { - const gpa = init.gpa; - - const repetitions: usize = 1000; - - const times_diff = try gpa.alloc(i64, repetitions); - defer gpa.free(times_diff); - - const times_sym = try gpa.alloc(i64, repetitions); - defer gpa.free(times_sym); - - const times_inter = try gpa.alloc(i64, repetitions); - defer gpa.free(times_inter); - - const upper: u32 = 100000; - var B = set.Set(u32).init(); - defer B.deinit(gpa); - - for (0..@divExact(upper, 2)) |i| { - const e: u32 = @intCast(i); - _ = try B.add(gpa, e); - } - - std.debug.print("starting benchmark\n", .{}); - - for (0..repetitions) |i| { - var A_diff = set.Set(u32).init(); - defer A_diff.deinit(gpa); - - var A_sym = set.Set(u32).init(); - defer A_sym.deinit(gpa); - - var A_inter = set.Set(u32).init(); - defer A_inter.deinit(gpa); - - for (0..upper) |j| { - const e: u32 = @intCast(j); - _ = try A_diff.add(gpa, e); - _ = try A_sym.add(gpa, e); - _ = try A_inter.add(gpa, e); - } - - const start_diff = std.Io.Timestamp.now(init.io, .awake); - _ = try A_diff.differenceUpdate(B); - const elapsed_diff = start_diff.untilNow(init.io, .awake); - times_diff[i] = elapsed_diff.toMilliseconds(); - - const start_sym = std.Io.Timestamp.now(init.io, .awake); - _ = try A_sym.symmetricDifferenceUpdate(gpa, B); - const elapsed_sym = start_sym.untilNow(init.io, .awake); - times_sym[i] = elapsed_sym.toMilliseconds(); - - const start_inter = std.Io.Timestamp.now(init.io, .awake); - _ = try A_inter.intersectionUpdate(gpa, B); - const elapsed_inter = start_inter.untilNow(init.io, .awake); - times_inter[i] = elapsed_inter.toMilliseconds(); - } - - const stats_diff = Stats.calculateFromData(times_diff); - const stats_sym = Stats.calculateFromData(times_sym); - const stats_inter = Stats.calculateFromData(times_inter); - - std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Difference (ms)", stats_diff.mean, stats_diff.ci }); - std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Sym Difference (ms)", stats_sym.mean, stats_sym.ci }); - std.debug.print("{s: <24}: {d:.4} +/- {d:.6} (95% CI)\n", .{ "Intersection (ms)", stats_inter.mean, stats_inter.ci }); -} - -pub const Stats = struct { - mean: f64, - ci: f64, - - pub fn calculateFromData(data: []i64) Stats { - var sum: i64 = 0; - for (data) |v| sum += v; - const mean: f64 = @as(f64, @floatFromInt(sum)) / @as(f64, @floatFromInt(data.len)); - - var sum_sq_diff: f64 = 0.0; - for (data) |v| { - const diff = @as(f64, @floatFromInt(v)) - mean; - sum_sq_diff += diff * diff; - } - - const variance = sum_sq_diff / @as(f64, @floatFromInt(data.len - 1)); - const std_dev = std.math.sqrt(variance); - - const margin_error = 1.96 * (std_dev / std.math.sqrt(@as(f64, @floatFromInt(data.len)))); - - return Stats{ .mean = mean, .ci = margin_error }; - } -}; diff --git a/src/root.zig b/src/root.zig index 9d7e520..3d9ab97 100644 --- a/src/root.zig +++ b/src/root.zig @@ -27,11 +27,11 @@ pub const Set = HashSet; /// This Hash-based is optmized for lookups. pub const HashSet = @import("hash_set.zig").HashSet; -// /// ArraySetUnmanaged is a conveniently exported "unmanaged" version of an array-based Set. -// /// This is a bit more specialized and optimized for heavy iteration. -// pub const ArraySet = @import("array_hash_set.zig").ArraySetUnmanaged; +/// ArraySetUnmanaged is a conveniently exported "unmanaged" version of an array-based Set. +/// This is a bit more specialized and optimized for heavy iteration. +pub const ArraySet = @import("array_hash_set.zig").ArraySet; test "tests" { _ = @import("hash_set.zig"); - // _ = @import("array_hash_set.zig"); + _ = @import("array_hash_set.zig"); } From 78f49a732b6327fb6bfc3dd5838ccbe093009ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pau=20Soler=20Valad=C3=A9s?= Date: Wed, 25 Feb 2026 20:12:33 +0100 Subject: [PATCH 4/5] added .empty and initContext --- src/array_hash_set.zig | 60 +++++++++++++++++++++++------------------- src/hash_set.zig | 26 +++++++++++------- src/main.zig | 31 ++++++++++++++++++++++ src/root.zig | 4 +-- 4 files changed, 83 insertions(+), 38 deletions(-) create mode 100644 src/main.zig diff --git a/src/array_hash_set.zig b/src/array_hash_set.zig index 17ee324..ce1ef9d 100644 --- a/src/array_hash_set.zig +++ b/src/array_hash_set.zig @@ -69,12 +69,16 @@ pub fn ArraySet(comptime E: type) type { }; const Self = @This(); - - pub fn init() Self { - return .{ - .unmanaged = Map{}, - }; - } + + pub const empty: Self = .{ + .unmanaged = Map{}, + }; + + // pub fn init() Self { + // return .{ + // .unmanaged = Map{}, + // }; + // } pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { var self = Self.init(); @@ -82,6 +86,8 @@ pub fn ArraySet(comptime E: type) type { return self; } + /// TODO: zig has still not changed neither of the two maps from selectMap 0.16, + /// so we need to pass the allocator to deinit that. pub fn deinit(self: *Self, allocator: Allocator) void { self.unmanaged.deinit(allocator); self.* = undefined; @@ -496,7 +502,7 @@ const expectEqual = std.testing.expectEqual; test "example usage" { // Create a set of u32s called A - var A = ArraySet(u32).init(); + var A = ArraySet(u32) = .empty; defer A.deinit(testing.allocator); // Add some data @@ -508,7 +514,7 @@ test "example usage" { _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); // Create another set called B - var B = ArraySet(u32).init(); + var B = ArraySet(u32) = .empty; defer B.deinit(testing.allocator); // Add data to B @@ -549,7 +555,7 @@ test "string usage" { } test "comprehensive usage" { - var set = ArraySet(u32).init(); + var set = ArraySet(u32) = .empty; defer set.deinit(testing.allocator); try expect(set.isEmpty()); @@ -573,7 +579,7 @@ test "comprehensive usage" { try expectEqual(set.cardinality(), 7); - var other = ArraySet(u32).init(); + var other = ArraySet(u32) = .empty; defer other.deinit(testing.allocator); try expect(other.isEmpty()); @@ -632,11 +638,11 @@ test "comprehensive usage" { } test "isDisjoint" { - var a = ArraySet(u32).init(); + var a = ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - var b = ArraySet(u32).init(); + var b = ArraySet(u32) = .empty; defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); @@ -645,7 +651,7 @@ test "isDisjoint" { try expect(b.isDisjoint(a)); // Test the false case. - var c = ArraySet(u32).init(); + var c = ArraySet(u32) = .empty; defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); @@ -656,7 +662,7 @@ test "isDisjoint" { test "clone" { // clone - var a = ArraySet(u32).init(); + var a = ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -667,7 +673,7 @@ test "clone" { } test "clear/capacity" { - var a = ArraySet(u32).init(); + var a = ArraySet(u32) = .empty; defer a.deinit(testing.allocator); try expectEqual(0, a.cardinality()); @@ -699,7 +705,7 @@ test "clear/capacity" { } test "iterator" { - var a = ArraySet(u32).init(); + var a = ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -716,7 +722,7 @@ test "iterator" { } test "pop" { - var a = ArraySet(u32).init(); + var a = ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -735,11 +741,11 @@ test "pop" { test "in-place methods" { // intersectionUpdate - var a = ArraySet(u32).init(); + var a: ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var b = ArraySet(u32).init(); + var b: ArraySet(u32) = .empty; defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -748,11 +754,11 @@ test "in-place methods" { try expect(a.containsAllSlice(&.{ 20, 30 })); // unionUpdate - var c = ArraySet(u32).init(); + var c: ArraySet(u32) = .empty; defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var d = ArraySet(u32).init(); + var d: ArraySet(u32) = .empty; defer d.deinit(testing.allocator); _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -761,11 +767,11 @@ test "in-place methods" { try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); // differenceUpdate - var e = ArraySet(u32).init(); + var e: ArraySet(u32) = .empty; defer e.deinit(testing.allocator); _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); - var f = ArraySet(u32).init(); + var f: ArraySet(u32) = .empty; defer f.deinit(testing.allocator); _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); @@ -775,11 +781,11 @@ test "in-place methods" { try expect(e.contains(11111)); // symmetricDifferenceUpdate - var g = ArraySet(u32).init(); + var g: ArraySet(u32) = .empty; defer g.deinit(testing.allocator); _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); - var h = ArraySet(u32).init(); + var h = ArraySet(u32) = .empty; defer h.deinit(testing.allocator); _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); @@ -790,7 +796,7 @@ test "in-place methods" { } test "removals" { - var a = ArraySet(u32).init(); + var a: ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 5, 6, 7, 8 }); @@ -809,7 +815,7 @@ test "removals" { a.removeAllSlice(&.{ 50, 60, 70, 80 }); try expectEqual(4, a.cardinality()); - var b = ArraySet(u32).init(); + var b: ArraySet(u32) = .empty; defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 111, 222, 333, 444 }); diff --git a/src/hash_set.zig b/src/hash_set.zig index 9466067..a12faeb 100644 --- a/src/hash_set.zig +++ b/src/hash_set.zig @@ -72,15 +72,21 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max pub const Iterator = Map.KeyIterator; const Self = @This(); - - /// Initialize a default set without context - pub fn init() Self { - return .{ - .unmanaged = Map{}, - .context = if (Context == void) {} else undefined, - .max_load_percentage = if (Context == void) {} else max_load_percentage, - }; - } + + pub const empty: Self = if (@sizeOf(Context) == 0) .{ + .unmanaged = Map{}, + .context = if (Context == void) {} else undefined, + .max_load_percentage = if (Context == void) {} else max_load_percentage, + } else @compileError("Cannot init as empty if context was non void"); + + // /// Initialize a default set without context + // pub fn init() Self { + // return .{ + // .unmanaged = Map{}, + // .context = if (Context == void) {} else undefined, + // .max_load_percentage = if (Context == void) {} else max_load_percentage, + // }; + // } /// Initialize with a custom context pub fn initContext(context: Context) Self { @@ -99,6 +105,8 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max } /// Destroys the unmanaged Set. + /// TODO: zig has still not changed the HashMap in 0.16, so we need to pass + /// the allocator here. It needs to be removed. pub fn deinit(self: *Self, allocator: Allocator) void { self.unmanaged.deinit(allocator); self.* = undefined; diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..1a4f836 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,31 @@ +const std = @import("std"); +const set = @import("root.zig"); + +const HashSet = set.HashSet; +const ArraySet = set.ArraySet; + +const SimpleHasher = struct { + const Self = @This(); + pub fn hash(_: Self, key: u32) u64 { + return @as(u64, key) *% 0x517cc1b727220a95; + } + pub fn eql(_: Self, a: u32, b: u32) bool { + return a == b; + } +}; + +pub fn main(init: std.process.Init) void { + const gpa = init.gpa; + + // now we can initialize a HashSet with empty if no context is provided + var A: HashSet(u32) = .empty; + defer A.deinit(gpa); + + var B: ArraySet(u32) = .empty; + defer B.deinit(gpa); + + const ctx = SimpleHasher{}; + var C: set.HashSetContext(u32, SimpleHasher, 75) = .initContext(ctx); + defer C.deinit(gpa); + +} diff --git a/src/root.zig b/src/root.zig index 3d9ab97..627c717 100644 --- a/src/root.zig +++ b/src/root.zig @@ -23,14 +23,14 @@ /// which to pick, just use Set. pub const Set = HashSet; -/// HashSetUnmanaged is a conveniently exported "unmanaged" version of a hash-based Set. /// This Hash-based is optmized for lookups. pub const HashSet = @import("hash_set.zig").HashSet; +pub const HashSetContext = @import("hash_set.zig").HashSetWithContext; -/// ArraySetUnmanaged is a conveniently exported "unmanaged" version of an array-based Set. /// This is a bit more specialized and optimized for heavy iteration. pub const ArraySet = @import("array_hash_set.zig").ArraySet; + test "tests" { _ = @import("hash_set.zig"); _ = @import("array_hash_set.zig"); From fd8f144253bf060ee4c6dd43931ee7bbc0f47a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pau=20Soler=20Valad=C3=A9s?= Date: Wed, 25 Feb 2026 20:25:31 +0100 Subject: [PATCH 5/5] Refactored tests with the .empty and :Self inside the functions --- src/array_hash_set.zig | 36 +++++++++++++++--------------- src/hash_set.zig | 50 +++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/array_hash_set.zig b/src/array_hash_set.zig index ce1ef9d..17330c2 100644 --- a/src/array_hash_set.zig +++ b/src/array_hash_set.zig @@ -81,7 +81,7 @@ pub fn ArraySet(comptime E: type) type { // } pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { - var self = Self.init(); + var self: Self = .empty; try self.unmanaged.ensureTotalCapacity(allocator, num); return self; } @@ -191,7 +191,7 @@ pub fn ArraySet(comptime E: type) type { /// /// Caller owns the newly allocated/returned set. pub fn differenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var diffSet = Self.init(); + var diffSet: Self = .empty; var iter = self.unmanaged.iterator(); while (iter.next()) |entry| { @@ -262,7 +262,7 @@ pub fn ArraySet(comptime E: type) type { /// /// Caller owns the newly allocated/returned set. pub fn intersectionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var interSet = Self.init(); + var interSet: Self = .empty; // Optimization: iterate over whichever set is smaller. // Matters when disparity in cardinality is large. @@ -419,7 +419,7 @@ pub fn ArraySet(comptime E: type) type { /// /// The caller owns the newly allocated/returned Set. pub fn symmetricDifferenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var sdSet = Self.init(); + var sdSet: Self = .empty; var iter = self.unmanaged.iterator(); while (iter.next()) |entry| { @@ -502,7 +502,7 @@ const expectEqual = std.testing.expectEqual; test "example usage" { // Create a set of u32s called A - var A = ArraySet(u32) = .empty; + var A: ArraySet(u32) = .empty; defer A.deinit(testing.allocator); // Add some data @@ -514,7 +514,7 @@ test "example usage" { _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); // Create another set called B - var B = ArraySet(u32) = .empty; + var B: ArraySet(u32) = .empty; defer B.deinit(testing.allocator); // Add data to B @@ -539,10 +539,10 @@ test "example usage" { } test "string usage" { - var A = ArraySet([]const u8).init(); + var A: ArraySet([]const u8) = .empty; defer A.deinit(testing.allocator); - var B = ArraySet([]const u8).init(); + var B: ArraySet([]const u8) = .empty; defer B.deinit(testing.allocator); _ = try A.add(testing.allocator, "Hello"); @@ -555,7 +555,7 @@ test "string usage" { } test "comprehensive usage" { - var set = ArraySet(u32) = .empty; + var set: ArraySet(u32) = .empty; defer set.deinit(testing.allocator); try expect(set.isEmpty()); @@ -579,7 +579,7 @@ test "comprehensive usage" { try expectEqual(set.cardinality(), 7); - var other = ArraySet(u32) = .empty; + var other: ArraySet(u32) = .empty; defer other.deinit(testing.allocator); try expect(other.isEmpty()); @@ -638,11 +638,11 @@ test "comprehensive usage" { } test "isDisjoint" { - var a = ArraySet(u32) = .empty; + var a: ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - var b = ArraySet(u32) = .empty; + var b: ArraySet(u32) = .empty; defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); @@ -651,7 +651,7 @@ test "isDisjoint" { try expect(b.isDisjoint(a)); // Test the false case. - var c = ArraySet(u32) = .empty; + var c: ArraySet(u32) = .empty; defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); @@ -662,7 +662,7 @@ test "isDisjoint" { test "clone" { // clone - var a = ArraySet(u32) = .empty; + var a: ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -673,7 +673,7 @@ test "clone" { } test "clear/capacity" { - var a = ArraySet(u32) = .empty; + var a: ArraySet(u32) = .empty; defer a.deinit(testing.allocator); try expectEqual(0, a.cardinality()); @@ -705,7 +705,7 @@ test "clear/capacity" { } test "iterator" { - var a = ArraySet(u32) = .empty; + var a: ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -722,7 +722,7 @@ test "iterator" { } test "pop" { - var a = ArraySet(u32) = .empty; + var a: ArraySet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -785,7 +785,7 @@ test "in-place methods" { defer g.deinit(testing.allocator); _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); - var h = ArraySet(u32) = .empty; + var h: ArraySet(u32) = .empty; defer h.deinit(testing.allocator); _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 }); diff --git a/src/hash_set.zig b/src/hash_set.zig index a12faeb..e26fad6 100644 --- a/src/hash_set.zig +++ b/src/hash_set.zig @@ -99,7 +99,7 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max /// Initialzies a Set using a capacity hint, with the given Allocator pub fn initCapacity(allocator: Allocator, num: Size) Allocator.Error!Self { - var self = Self.init(); + var self: Self = .empty; try self.unmanaged.ensureTotalCapacity(allocator, num); return self; } @@ -236,7 +236,7 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max /// /// Caller owns the newly allocated/returned set. pub fn differenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var diffSet = Self.init(); + var diffSet: Self = .empty; var iter = self.unmanaged.iterator(); while (iter.next()) |entry| { @@ -307,7 +307,7 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max /// /// Caller owns the newly allocated/returned set. pub fn intersectionOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var interSet = Self.init(); + var interSet: Self = .empty; // Optimization: iterate over whichever set is smaller. // Matters when disparity in cardinality is large. @@ -457,7 +457,7 @@ pub fn HashSetWithContext(comptime E: type, comptime Context: type, comptime max /// /// The caller owns the newly allocated/returned Set. pub fn symmetricDifferenceOf(self: Self, allocator: Allocator, other: Self) Allocator.Error!Self { - var sdSet = Self.init(); + var sdSet: Self = .empty; var iter = self.unmanaged.iterator(); while (iter.next()) |entry| { @@ -535,7 +535,7 @@ const expectEqual = std.testing.expectEqual; test "example usage" { // Create a set of u32s called A - var A = HashSet(u32).init(); + var A: HashSet(u32) = .empty; defer A.deinit(testing.allocator); // Add some data @@ -547,7 +547,7 @@ test "example usage" { _ = try A.appendSlice(testing.allocator, &.{ 5, 3, 0, 9 }); // Create another set called B - var B = HashSet(u32).init(); + var B: HashSet(u32) = .empty; defer B.deinit(testing.allocator); // Add data to B @@ -567,10 +567,10 @@ test "example usage" { } test "string usage" { - var A = HashSet([]const u8).init(); + var A: HashSet([]const u8) = .empty; defer A.deinit(testing.allocator); - var B = HashSet([]const u8).init(); + var B: HashSet([]const u8) = .empty; defer B.deinit(testing.allocator); _ = try A.add(testing.allocator, "Hello"); @@ -583,7 +583,7 @@ test "string usage" { } test "comprehensive usage" { - var set = HashSet(u32).init(); + var set: HashSet(u32) = .empty; defer set.deinit(testing.allocator); try expect(set.isEmpty()); @@ -607,7 +607,7 @@ test "comprehensive usage" { try expectEqual(set.cardinality(), 7); - var other = HashSet(u32).init(); + var other: HashSet(u32) = .empty; defer other.deinit(testing.allocator); try expect(other.isEmpty()); @@ -666,11 +666,11 @@ test "comprehensive usage" { } test "isDisjoint" { - var a = HashSet(u32).init(); + var a: HashSet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); - var b = HashSet(u32).init(); + var b: HashSet(u32) = .empty; defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 202, 303, 403 }); @@ -679,7 +679,7 @@ test "isDisjoint" { try expect(b.isDisjoint(a)); // Test the false case. - var c = HashSet(u32).init(); + var c: HashSet(u32) = .empty; defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 20, 30, 400 }); @@ -690,7 +690,7 @@ test "isDisjoint" { test "clone" { // clone - var a = HashSet(u32).init(); + var a: HashSet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -701,7 +701,7 @@ test "clone" { } test "clear/capacity" { - var a = HashSet(u32).init(); + var a: HashSet(u32) = .empty; defer a.deinit(testing.allocator); try expectEqual(0, a.cardinality()); @@ -733,7 +733,7 @@ test "clear/capacity" { } test "iterator" { - var a = HashSet(u32).init(); + var a: HashSet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -750,7 +750,7 @@ test "iterator" { } test "pop" { - var a = HashSet(u32).init(); + var a: HashSet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 20, 30, 40 }); @@ -769,11 +769,11 @@ test "pop" { test "in-place methods" { // intersectionUpdate - var a = HashSet(u32).init(); + var a: HashSet(u32) = .empty; defer a.deinit(testing.allocator); _ = try a.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var b = HashSet(u32).init(); + var b: HashSet(u32) = .empty; defer b.deinit(testing.allocator); _ = try b.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -782,11 +782,11 @@ test "in-place methods" { try expect(a.containsAllSlice(&.{ 20, 30 })); // unionUpdate - var c = HashSet(u32).init(); + var c: HashSet(u32) = .empty; defer c.deinit(testing.allocator); _ = try c.appendSlice(testing.allocator, &.{ 10, 20, 30, 40 }); - var d = HashSet(u32).init(); + var d: HashSet(u32) = .empty; defer d.deinit(testing.allocator); _ = try d.appendSlice(testing.allocator, &.{ 44, 20, 30, 66 }); @@ -795,11 +795,11 @@ test "in-place methods" { try expect(c.containsAllSlice(&.{ 10, 20, 30, 40, 66 })); // differenceUpdate - var e = HashSet(u32).init(); + var e: HashSet(u32) = .empty; defer e.deinit(testing.allocator); _ = try e.appendSlice(testing.allocator, &.{ 1, 11, 111, 1111, 11111 }); - var f = HashSet(u32).init(); + var f: HashSet(u32) = .empty; defer f.deinit(testing.allocator); _ = try f.appendSlice(testing.allocator, &.{ 1, 11, 111, 222, 2222, 1111 }); @@ -809,11 +809,11 @@ test "in-place methods" { try expect(e.contains(11111)); // symmetricDifferenceUpdate - var g = HashSet(u32).init(); + var g: HashSet(u32) = .empty; defer g.deinit(testing.allocator); _ = try g.appendSlice(testing.allocator, &.{ 2, 22, 222, 2222, 22222 }); - var h = HashSet(u32).init(); + var h: HashSet(u32) = .empty; defer h.deinit(testing.allocator); _ = try h.appendSlice(testing.allocator, &.{ 1, 11, 111, 333, 3333, 2222, 1111 });