diff --git a/test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs b/test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs index 630d7f56a9bd..95d81dd5f49d 100644 --- a/test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs +++ b/test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs @@ -124,7 +124,9 @@ public void BuildCollectionGroups_Uniform_AssignsGroupsToEveryCollection() [Fact] public void BuildCollectionUsers_AllCollectionIdsAreValid() { - var result = CreateCollectionsStep.BuildCollectionUsers(_collectionIds, _userIds, 10); + var step = CreateStep(CollectionFanOutShape.Uniform, min: 1, max: 3); + + var result = step.BuildCollectionUsers(_collectionIds, _userIds, 10); Assert.All(result, cu => Assert.Contains(cu.CollectionId, _collectionIds)); } @@ -132,7 +134,9 @@ public void BuildCollectionUsers_AllCollectionIdsAreValid() [Fact] public void BuildCollectionUsers_AssignsOneToThreeCollectionsPerUser() { - var result = CreateCollectionsStep.BuildCollectionUsers(_collectionIds, _userIds, 10); + var step = CreateStep(CollectionFanOutShape.Uniform, min: 1, max: 3); + + var result = step.BuildCollectionUsers(_collectionIds, _userIds, 10); var perUser = result.GroupBy(cu => cu.OrganizationUserId).ToList(); Assert.All(perUser, group => Assert.InRange(group.Count(), 1, 3)); @@ -141,7 +145,9 @@ public void BuildCollectionUsers_AssignsOneToThreeCollectionsPerUser() [Fact] public void BuildCollectionUsers_RespectsDirectUserCount() { - var result = CreateCollectionsStep.BuildCollectionUsers(_collectionIds, _userIds, 5); + var step = CreateStep(CollectionFanOutShape.Uniform, min: 1, max: 3); + + var result = step.BuildCollectionUsers(_collectionIds, _userIds, 5); var distinctUsers = result.Select(cu => cu.OrganizationUserId).Distinct().ToList(); Assert.Equal(5, distinctUsers.Count); @@ -201,6 +207,67 @@ public void ComputeFanOut_Uniform_CyclesThroughRange() Assert.Equal(1, step.ComputeFanOut(3, 10, 1, 3)); } + [Fact] + public void ComputeCollectionsPerUser_Uniform_CyclesThroughRange() + { + var step = CreateUserCollectionStep(CollectionFanOutShape.Uniform, min: 1, max: 5); + + Assert.Equal(1, step.ComputeCollectionsPerUser(0, 100, 1, 5)); + Assert.Equal(2, step.ComputeCollectionsPerUser(1, 100, 1, 5)); + Assert.Equal(5, step.ComputeCollectionsPerUser(4, 100, 1, 5)); + Assert.Equal(1, step.ComputeCollectionsPerUser(5, 100, 1, 5)); + } + + [Fact] + public void ComputeCollectionsPerUser_PowerLaw_FirstUserGetsMax() + { + var step = CreateUserCollectionStep(CollectionFanOutShape.PowerLaw, min: 1, max: 50, skew: 0.7); + + Assert.Equal(50, step.ComputeCollectionsPerUser(0, 1000, 1, 50)); + } + + [Fact] + public void ComputeCollectionsPerUser_PowerLaw_LastUsersGetMin() + { + var step = CreateUserCollectionStep(CollectionFanOutShape.PowerLaw, min: 1, max: 50, skew: 0.7); + + Assert.Equal(1, step.ComputeCollectionsPerUser(999, 1000, 1, 50)); + } + + [Fact] + public void ComputeCollectionsPerUser_PowerLaw_DecaysMonotonically() + { + var step = CreateUserCollectionStep(CollectionFanOutShape.PowerLaw, min: 1, max: 25, skew: 0.6); + + var prev = step.ComputeCollectionsPerUser(0, 500, 1, 25); + for (var i = 1; i < 500; i++) + { + var current = step.ComputeCollectionsPerUser(i, 500, 1, 25); + Assert.True(current <= prev, $"Index {i}: {current} > {prev}"); + prev = current; + } + } + + [Fact] + public void ComputeCollectionsPerUser_FrontLoaded_FirstTenPercentGetMax() + { + var step = CreateUserCollectionStep(CollectionFanOutShape.FrontLoaded, min: 1, max: 20); + + Assert.Equal(20, step.ComputeCollectionsPerUser(0, 100, 1, 20)); + Assert.Equal(20, step.ComputeCollectionsPerUser(9, 100, 1, 20)); + Assert.Equal(1, step.ComputeCollectionsPerUser(10, 100, 1, 20)); + Assert.Equal(1, step.ComputeCollectionsPerUser(99, 100, 1, 20)); + } + + [Fact] + public void ComputeCollectionsPerUser_RangeOfOne_ReturnsMin() + { + var step = CreateUserCollectionStep(CollectionFanOutShape.PowerLaw, min: 3, max: 3, skew: 0.8); + + Assert.Equal(3, step.ComputeCollectionsPerUser(0, 100, 3, 3)); + Assert.Equal(3, step.ComputeCollectionsPerUser(99, 100, 3, 3)); + } + private static CreateCollectionsStep CreateStep(CollectionFanOutShape shape, int min, int max) { var density = new DensityProfile @@ -211,4 +278,17 @@ private static CreateCollectionsStep CreateStep(CollectionFanOutShape shape, int }; return CreateCollectionsStep.FromCount(0, density); } + + private static CreateCollectionsStep CreateUserCollectionStep( + CollectionFanOutShape shape, int min, int max, double skew = 0) + { + var density = new DensityProfile + { + UserCollectionShape = shape, + UserCollectionMin = min, + UserCollectionMax = max, + UserCollectionSkew = skew + }; + return CreateCollectionsStep.FromCount(0, density); + } } diff --git a/test/SeederApi.IntegrationTest/DensityModel/MultiCollectionAssignmentTests.cs b/test/SeederApi.IntegrationTest/DensityModel/MultiCollectionAssignmentTests.cs new file mode 100644 index 000000000000..423cb3c5ddf7 --- /dev/null +++ b/test/SeederApi.IntegrationTest/DensityModel/MultiCollectionAssignmentTests.cs @@ -0,0 +1,109 @@ +using Xunit; + +namespace Bit.SeederApi.IntegrationTest.DensityModel; + +/// +/// Validates the multi-collection cipher assignment math from GenerateCiphersStep +/// to ensure no duplicate (CipherId, CollectionId) pairs are produced. +/// +public class MultiCollectionAssignmentTests +{ + /// + /// Simulates the secondary collection assignment loop from GenerateCiphersStep + /// with the extraCount clamp fix applied. Returns the list of (cipherIndex, collectionIndex) pairs. + /// + private static List<(int CipherIndex, int CollectionIndex)> SimulateMultiCollectionAssignment( + int cipherCount, + int collectionCount, + double multiCollectionRate, + int maxCollectionsPerCipher) + { + var primaryIndices = new int[cipherCount]; + var pairs = new List<(int, int)>(); + + for (var i = 0; i < cipherCount; i++) + { + primaryIndices[i] = i % collectionCount; + pairs.Add((i, primaryIndices[i])); + } + + if (multiCollectionRate > 0 && collectionCount > 1) + { + var multiCount = (int)(cipherCount * multiCollectionRate); + for (var i = 0; i < multiCount; i++) + { + var extraCount = 1 + (i % Math.Max(maxCollectionsPerCipher - 1, 1)); + extraCount = Math.Min(extraCount, collectionCount - 1); + for (var j = 0; j < extraCount; j++) + { + var secondaryIndex = (primaryIndices[i] + 1 + j) % collectionCount; + pairs.Add((i, secondaryIndex)); + } + } + } + + return pairs; + } + + [Fact] + public void MultiCollectionAssignment_SmallCollectionCount_NoDuplicates() + { + var pairs = SimulateMultiCollectionAssignment( + cipherCount: 20, + collectionCount: 3, + multiCollectionRate: 1.0, + maxCollectionsPerCipher: 5); + + var grouped = pairs.GroupBy(p => p); + Assert.All(grouped, g => Assert.Single(g)); + } + + [Fact] + public void MultiCollectionAssignment_TwoCollections_NoDuplicates() + { + var pairs = SimulateMultiCollectionAssignment( + cipherCount: 50, + collectionCount: 2, + multiCollectionRate: 1.0, + maxCollectionsPerCipher: 10); + + var grouped = pairs.GroupBy(p => p); + Assert.All(grouped, g => Assert.Single(g)); + } + + [Fact] + public void MultiCollectionAssignment_ExtraCountClamped_ToAvailableCollections() + { + // With 2 collections, extraCount should never exceed 1 (collectionCount - 1) + var collectionCount = 2; + var maxCollectionsPerCipher = 10; + var cipherCount = 20; + + for (var i = 0; i < cipherCount; i++) + { + var extraCount = 1 + (i % Math.Max(maxCollectionsPerCipher - 1, 1)); + extraCount = Math.Min(extraCount, collectionCount - 1); + Assert.True(extraCount <= collectionCount - 1, + $"extraCount {extraCount} exceeds available secondary slots {collectionCount - 1} at i={i}"); + } + } + + [Fact] + public void MultiCollectionAssignment_SecondaryNeverEqualsPrimary() + { + var pairs = SimulateMultiCollectionAssignment( + cipherCount: 30, + collectionCount: 3, + multiCollectionRate: 1.0, + maxCollectionsPerCipher: 5); + + // Group by cipher index — for each cipher, no secondary should equal primary + var byCipher = pairs.GroupBy(p => p.CipherIndex); + foreach (var group in byCipher) + { + var primary = group.First().CollectionIndex; + var secondaries = group.Skip(1).Select(p => p.CollectionIndex); + Assert.DoesNotContain(primary, secondaries); + } + } +} diff --git a/test/SeederApi.IntegrationTest/DensityModel/RangeCalculationTests.cs b/test/SeederApi.IntegrationTest/DensityModel/RangeCalculationTests.cs new file mode 100644 index 000000000000..7626545dfc60 --- /dev/null +++ b/test/SeederApi.IntegrationTest/DensityModel/RangeCalculationTests.cs @@ -0,0 +1,53 @@ +using Xunit; + +namespace Bit.SeederApi.IntegrationTest.DensityModel; + +/// +/// Validates the range calculation formula used in GeneratePersonalCiphersStep and GenerateFoldersStep. +/// The formula: range.Min + (index % Math.Max(range.Max - range.Min + 1, 1)) +/// +public class RangeCalculationTests +{ + private static int ComputeFromRange(int min, int max, int index) + { + return min + (index % Math.Max(max - min + 1, 1)); + } + + [Fact] + public void RangeFormula_SmallRange_ProducesBothMinAndMax() + { + var values = Enumerable.Range(0, 100).Select(i => ComputeFromRange(0, 1, i)).ToHashSet(); + + Assert.Contains(0, values); + Assert.Contains(1, values); + } + + [Fact] + public void RangeFormula_LargerRange_MaxIsReachable() + { + var values = Enumerable.Range(0, 1000).Select(i => ComputeFromRange(5, 15, i)).ToHashSet(); + + Assert.Contains(5, values); + Assert.Contains(15, values); + Assert.Equal(11, values.Count); // 5,6,7,...,15 + } + + [Fact] + public void RangeFormula_SingleValue_AlwaysReturnsMin() + { + var values = Enumerable.Range(0, 50).Select(i => ComputeFromRange(3, 3, i)).Distinct().ToList(); + + Assert.Single(values); + Assert.Equal(3, values[0]); + } + + [Fact] + public void RangeFormula_AllValuesInBounds() + { + for (var i = 0; i < 500; i++) + { + var result = ComputeFromRange(50, 200, i); + Assert.InRange(result, 50, 200); + } + } +} diff --git a/test/SeederApi.IntegrationTest/DistributionTests.cs b/test/SeederApi.IntegrationTest/DistributionTests.cs index 808e31c273cd..de4c48fff222 100644 --- a/test/SeederApi.IntegrationTest/DistributionTests.cs +++ b/test/SeederApi.IntegrationTest/DistributionTests.cs @@ -172,4 +172,42 @@ public void Select_IsDeterministic_SameInputSameOutput() Assert.Equal(first, second); } } + + [Fact] + public void Select_ZeroWeightBucket_NeverSelected() + { + var distribution = new Distribution( + ("Manage", 0.50), + ("ReadWrite", 0.40), + ("ReadOnly", 0.10), + ("HidePasswords", 0.0) + ); + + for (var i = 0; i < 7; i++) + { + Assert.NotEqual("HidePasswords", distribution.Select(i, 7)); + } + } + + [Fact] + public void GetCounts_SmallTotal_RemainderGoesToLargestFraction() + { + var distribution = new Distribution( + ("A", 0.50), + ("B", 0.40), + ("C", 0.10), + ("D", 0.0) + ); + + var counts = distribution.GetCounts(7).ToList(); + + // Exact: A=3.5, B=2.8, C=0.7, D=0.0 + // Floors: A=3, B=2, C=0, D=0 (sum=5, deficit=2) + // Remainders: A=0.5, B=0.8, C=0.7, D=0.0 + // Deficit 1 → B (0.8), Deficit 2 → C (0.7) + Assert.Equal(("A", 3), counts[0]); + Assert.Equal(("B", 3), counts[1]); + Assert.Equal(("C", 1), counts[2]); + Assert.Equal(("D", 0), counts[3]); + } } diff --git a/util/Seeder/Data/Distributions/Distribution.cs b/util/Seeder/Data/Distributions/Distribution.cs index 8a44a46e320e..27b96cc70b5a 100644 --- a/util/Seeder/Data/Distributions/Distribution.cs +++ b/util/Seeder/Data/Distributions/Distribution.cs @@ -26,40 +26,68 @@ public Distribution(params (T Value, double Percentage)[] buckets) /// /// Selects a value deterministically based on index position within a total count. - /// Items 0 to (total * percentage1 - 1) get value1, and so on. + /// Remainder items go to buckets with the largest fractional parts, + /// not unconditionally to the last bucket. /// /// Zero-based index of the item. - /// Total number of items being distributed. For best accuracy, use totals >= 100. + /// Total number of items being distributed. /// The value assigned to this index position. public T Select(int index, int total) { var cumulative = 0; - foreach (var (value, percentage) in _buckets) + foreach (var (value, count) in GetCounts(total)) { - cumulative += (int)(total * percentage); + cumulative += count; if (index < cumulative) { return value; } } + return _buckets[^1].Value; } /// /// Returns all values with their calculated counts for a given total. - /// The last bucket receives any remainder from rounding. + /// Each bucket gets its truncated share, then the deficit is distributed one-at-a-time + /// to buckets with the largest fractional remainders. + /// Zero-weight buckets always receive exactly zero items. /// /// Total number of items to distribute. /// Sequence of value-count pairs. public IEnumerable<(T Value, int Count)> GetCounts(int total) { - var remaining = total; - for (var i = 0; i < _buckets.Length - 1; i++) + var counts = new int[_buckets.Length]; + var remainders = new double[_buckets.Length]; + var allocated = 0; + + for (var i = 0; i < _buckets.Length; i++) + { + var exact = total * _buckets[i].Percentage; + counts[i] = (int)exact; + remainders[i] = exact - counts[i]; + allocated += counts[i]; + } + + var deficit = total - allocated; + for (var d = 0; d < deficit; d++) + { + var bestIdx = 0; + for (var i = 1; i < remainders.Length; i++) + { + if (remainders[i] > remainders[bestIdx]) + { + bestIdx = i; + } + } + + counts[bestIdx]++; + remainders[bestIdx] = -1.0; + } + + for (var i = 0; i < _buckets.Length; i++) { - var count = (int)(total * _buckets[i].Percentage); - yield return (_buckets[i].Value, count); - remaining -= count; + yield return (_buckets[i].Value, counts[i]); } - yield return (_buckets[^1].Value, remaining); } } diff --git a/util/Seeder/Data/Distributions/FolderCountDistributions.cs b/util/Seeder/Data/Distributions/FolderCountDistributions.cs index c8811f2aa8ee..73c4291571c6 100644 --- a/util/Seeder/Data/Distributions/FolderCountDistributions.cs +++ b/util/Seeder/Data/Distributions/FolderCountDistributions.cs @@ -7,7 +7,7 @@ public static class FolderCountDistributions { /// /// Realistic distribution of folders per user. - /// 35% have zero, 35% have 1-3, 20% have 4-7, 10% have 10-15. + /// 35% have 0-1, 35% have 1-4, 20% have 4-8, 10% have 10-16. /// Values are (Min, Max) ranges for deterministic selection. /// public static Distribution<(int Min, int Max)> Realistic { get; } = new( @@ -16,4 +16,23 @@ public static class FolderCountDistributions ((4, 8), 0.20), ((10, 16), 0.10) ); + + /// + /// Enterprise: more structured organizations with heavier folder usage. + /// + public static Distribution<(int Min, int Max)> Enterprise { get; } = new( + ((0, 1), 0.20), + ((2, 5), 0.30), + ((5, 10), 0.30), + ((10, 25), 0.20) + ); + + /// + /// Minimal: most users don't bother organizing into folders. + /// + public static Distribution<(int Min, int Max)> Minimal { get; } = new( + ((0, 1), 0.70), + ((1, 3), 0.25), + ((3, 6), 0.05) + ); } diff --git a/util/Seeder/Data/Distributions/PersonalCipherDistributions.cs b/util/Seeder/Data/Distributions/PersonalCipherDistributions.cs new file mode 100644 index 000000000000..d2a7f2df61e2 --- /dev/null +++ b/util/Seeder/Data/Distributions/PersonalCipherDistributions.cs @@ -0,0 +1,37 @@ +namespace Bit.Seeder.Data.Distributions; + +/// +/// Pre-configured personal cipher count distributions per user. +/// +public static class PersonalCipherDistributions +{ + /// + /// Realistic enterprise mix: 30% have none, power users have 50-200. + /// + public static Distribution<(int Min, int Max)> Realistic { get; } = new( + ((0, 1), 0.30), + ((1, 5), 0.25), + ((5, 15), 0.25), + ((15, 50), 0.15), + ((50, 200), 0.05) + ); + + /// + /// Light usage: most users don't use personal vaults. + /// + public static Distribution<(int Min, int Max)> LightUsage { get; } = new( + ((0, 1), 0.60), + ((1, 5), 0.30), + ((5, 15), 0.10) + ); + + /// + /// Heavy usage: power users dominate, everyone has personal items. + /// + public static Distribution<(int Min, int Max)> HeavyUsage { get; } = new( + ((1, 5), 0.10), + ((5, 20), 0.30), + ((20, 100), 0.40), + ((100, 500), 0.20) + ); +} diff --git a/util/Seeder/Models/SeedPresetDensity.cs b/util/Seeder/Models/SeedPresetDensity.cs index 22adeb9b59a4..9d033a2372ed 100644 --- a/util/Seeder/Models/SeedPresetDensity.cs +++ b/util/Seeder/Models/SeedPresetDensity.cs @@ -14,6 +14,62 @@ internal record SeedPresetDensity public SeedPresetPermissions? Permissions { get; init; } public SeedPresetCipherAssignment? CipherAssignment { get; init; } + + public SeedPresetUserCollections? UserCollections { get; init; } + + public SeedPresetCipherTypes? CipherTypes { get; init; } + + public SeedPresetDensityPersonalCiphers? PersonalCiphers { get; init; } + + public SeedPresetDensityFolders? Folders { get; init; } +} + +/// +/// Folder count distribution per user: a named preset shape. +/// +internal record SeedPresetDensityFolders +{ + public string? Shape { get; init; } +} + +/// +/// Personal cipher count distribution per user: a named preset shape. +/// +internal record SeedPresetDensityPersonalCiphers +{ + public string? Shape { get; init; } +} + +/// +/// Cipher type distribution: a named preset or custom weights per type. +/// +internal record SeedPresetCipherTypes +{ + public string? Preset { get; init; } + + public double? Login { get; init; } + + public double? SecureNote { get; init; } + + public double? Card { get; init; } + + public double? Identity { get; init; } + + public double? SshKey { get; init; } +} + +/// +/// How many direct collections each user receives: range, distribution shape, and skew. +/// +internal record SeedPresetUserCollections +{ + public int? Min { get; init; } + + public int? Max { get; init; } + + public string? Shape { get; init; } + + public double? Skew { get; init; } } /// @@ -62,4 +118,8 @@ internal record SeedPresetCipherAssignment public string? Skew { get; init; } public double? OrphanRate { get; init; } + + public double? MultiCollectionRate { get; init; } + + public int? MaxCollectionsPerCipher { get; init; } } diff --git a/util/Seeder/Options/DensityProfile.cs b/util/Seeder/Options/DensityProfile.cs index 0b6317bef412..c98aa9e57318 100644 --- a/util/Seeder/Options/DensityProfile.cs +++ b/util/Seeder/Options/DensityProfile.cs @@ -1,4 +1,5 @@ -using Bit.Seeder.Data.Distributions; +using Bit.Core.Vault.Enums; +using Bit.Seeder.Data.Distributions; using Bit.Seeder.Data.Enums; namespace Bit.Seeder.Options; @@ -50,6 +51,31 @@ public class DensityProfile /// public Distribution PermissionDistribution { get; init; } = PermissionDistributions.Enterprise; + /// + /// Minimum direct collections per user. + /// + public int UserCollectionMin { get; init; } = 1; + + /// + /// Maximum direct collections per user. + /// + public int UserCollectionMax { get; init; } = 3; + + /// + /// Distribution shape for user-to-collection direct assignments. + /// + public CollectionFanOutShape UserCollectionShape { get; init; } = CollectionFanOutShape.Uniform; + + /// + /// Skew intensity for PowerLaw user-collection shape (0.0-1.0). Ignored for Uniform/FrontLoaded. + /// + public double UserCollectionSkew { get; init; } + + /// + /// Cipher type distribution override. When null, falls through to Realistic. + /// + public Distribution? CipherTypeDistribution { get; init; } + /// /// Cipher-to-collection assignment skew shape. /// @@ -59,4 +85,24 @@ public class DensityProfile /// Fraction of org ciphers with no collection assignment (0.0-1.0). /// public double OrphanCipherRate { get; init; } + + /// + /// Fraction of non-orphan ciphers assigned to more than one collection (0.0-1.0). + /// + public double MultiCollectionRate { get; init; } + + /// + /// Maximum number of collections a multi-collection cipher can belong to. + /// + public int MaxCollectionsPerCipher { get; init; } = 2; + + /// + /// Personal cipher count distribution override. When null, uses flat countPerUser. + /// + public Distribution<(int Min, int Max)>? PersonalCipherDistribution { get; init; } + + /// + /// Folder count distribution override. When null, uses FolderCountDistributions.Realistic. + /// + public Distribution<(int Min, int Max)>? FolderDistribution { get; init; } } diff --git a/util/Seeder/Pipeline/PresetLoader.cs b/util/Seeder/Pipeline/PresetLoader.cs index b2ce21f49398..992cf46df23b 100644 --- a/util/Seeder/Pipeline/PresetLoader.cs +++ b/util/Seeder/Pipeline/PresetLoader.cs @@ -1,4 +1,5 @@ -using Bit.Seeder.Data.Distributions; +using Bit.Core.Vault.Enums; +using Bit.Seeder.Data.Distributions; using Bit.Seeder.Data.Enums; using Bit.Seeder.Factories; using Bit.Seeder.Models; @@ -75,8 +76,15 @@ private static void BuildRecipe(string presetName, SeedPreset preset, ISeedReade builder.AddOwner(); } + var density = ParseDensity(preset.Density); + // Generator requires a domain and is needed for generated ciphers, personal ciphers, or folders - if (domain is not null && (preset.Ciphers?.Count > 0 || preset.PersonalCiphers?.CountPerUser > 0 || preset.Folders == true)) + if (domain is not null && ( + preset.Ciphers?.Count > 0 || + preset.PersonalCiphers?.CountPerUser > 0 || + preset.Folders == true || + density?.FolderDistribution is not null || + density?.PersonalCipherDistribution is not null)) { builder.WithGenerator(domain); } @@ -86,8 +94,6 @@ private static void BuildRecipe(string presetName, SeedPreset preset, ISeedReade builder.AddUsers(preset.Users.Count, preset.Users.RealisticStatusMix); } - var density = ParseDensity(preset.Density); - if (preset.Groups is not null) { builder.AddGroups(preset.Groups.Count, density); @@ -98,9 +104,9 @@ private static void BuildRecipe(string presetName, SeedPreset preset, ISeedReade builder.AddCollections(preset.Collections.Count, density); } - if (preset.Folders == true) + if (preset.Folders == true || density?.FolderDistribution is not null) { - builder.AddFolders(); + builder.AddFolders(density); } if (preset.Ciphers?.Fixture is not null) @@ -114,7 +120,11 @@ private static void BuildRecipe(string presetName, SeedPreset preset, ISeedReade if (preset.PersonalCiphers is not null && preset.PersonalCiphers.CountPerUser > 0) { - builder.AddPersonalCiphers(preset.PersonalCiphers.CountPerUser); + builder.AddPersonalCiphers(preset.PersonalCiphers.CountPerUser, density: density); + } + else if (density?.PersonalCipherDistribution is not null) + { + builder.AddPersonalCiphers(0, density: density); } builder.Validate(); @@ -139,6 +149,15 @@ private static void BuildRecipe(string presetName, SeedPreset preset, ISeedReade PermissionDistribution = ParsePermissions(preset.Permissions), CipherSkew = ParseEnum(preset.CipherAssignment?.Skew, CipherCollectionSkew.Uniform), OrphanCipherRate = preset.CipherAssignment?.OrphanRate ?? 0, + MultiCollectionRate = preset.CipherAssignment?.MultiCollectionRate ?? 0, + MaxCollectionsPerCipher = preset.CipherAssignment?.MaxCollectionsPerCipher ?? 2, + UserCollectionMin = preset.UserCollections?.Min ?? 1, + UserCollectionMax = preset.UserCollections?.Max ?? 3, + UserCollectionShape = ParseEnum(preset.UserCollections?.Shape, CollectionFanOutShape.Uniform), + UserCollectionSkew = preset.UserCollections?.Skew ?? 0, + CipherTypeDistribution = ParseCipherTypes(preset.CipherTypes), + PersonalCipherDistribution = ParsePersonalCipherDistribution(preset.PersonalCiphers?.Shape), + FolderDistribution = ParseFolderDistribution(preset.Folders?.Shape), }; } @@ -167,6 +186,88 @@ private static Distribution ParsePermissions(SeedPresetPermiss (PermissionWeight.HidePasswords, hidePasswords)); } - private static T ParseEnum(string? value, T defaultValue) where T : struct, Enum => - value is not null && Enum.TryParse(value, ignoreCase: true, out var result) ? result : defaultValue; + private static Distribution? ParseCipherTypes(SeedPresetCipherTypes? cipherTypes) + { + if (cipherTypes is null) + { + return null; + } + + if (cipherTypes.Preset is not null) + { + return cipherTypes.Preset.ToLowerInvariant() switch + { + "realistic" => CipherTypeDistributions.Realistic, + "loginonly" => CipherTypeDistributions.LoginOnly, + "documentationheavy" => CipherTypeDistributions.DocumentationHeavy, + "developerfocused" => CipherTypeDistributions.DeveloperFocused, + _ => throw new InvalidOperationException( + $"Unknown cipher type preset '{cipherTypes.Preset}'. Valid values: realistic, loginOnly, documentationHeavy, developerFocused."), + }; + } + + var login = cipherTypes.Login ?? 0; + var secureNote = cipherTypes.SecureNote ?? 0; + var card = cipherTypes.Card ?? 0; + var identity = cipherTypes.Identity ?? 0; + var sshKey = cipherTypes.SshKey ?? 0; + + return new Distribution( + (CipherType.Login, login), + (CipherType.SecureNote, secureNote), + (CipherType.Card, card), + (CipherType.Identity, identity), + (CipherType.SSHKey, sshKey)); + } + + private static Distribution<(int Min, int Max)>? ParsePersonalCipherDistribution(string? shape) + { + if (shape is null) + { + return null; + } + + return shape.ToLowerInvariant() switch + { + "realistic" => PersonalCipherDistributions.Realistic, + "lightusage" => PersonalCipherDistributions.LightUsage, + "heavyusage" => PersonalCipherDistributions.HeavyUsage, + _ => throw new InvalidOperationException( + $"Unknown personal cipher distribution '{shape}'. Valid values: realistic, lightUsage, heavyUsage."), + }; + } + + private static Distribution<(int Min, int Max)>? ParseFolderDistribution(string? shape) + { + if (shape is null) + { + return null; + } + + return shape.ToLowerInvariant() switch + { + "realistic" => FolderCountDistributions.Realistic, + "enterprise" => FolderCountDistributions.Enterprise, + "minimal" => FolderCountDistributions.Minimal, + _ => throw new InvalidOperationException( + $"Unknown folder distribution '{shape}'. Valid values: realistic, enterprise, minimal."), + }; + } + + private static T ParseEnum(string? value, T defaultValue) where T : struct, Enum + { + if (value is null) + { + return defaultValue; + } + + if (!Enum.TryParse(value, ignoreCase: true, out var result)) + { + var valid = string.Join(", ", Enum.GetNames()); + throw new InvalidOperationException( + $"Unknown {typeof(T).Name} '{value}'. Valid values: {valid}."); + } + + return result; + } } diff --git a/util/Seeder/Pipeline/RecipeBuilderExtensions.cs b/util/Seeder/Pipeline/RecipeBuilderExtensions.cs index 58a93fd4f6d9..7922105c7447 100644 --- a/util/Seeder/Pipeline/RecipeBuilderExtensions.cs +++ b/util/Seeder/Pipeline/RecipeBuilderExtensions.cs @@ -126,6 +126,7 @@ public static RecipeBuilder AddUsers(this RecipeBuilder builder, int count, bool /// /// The recipe builder /// Number of groups to generate + /// Optional density profile for membership distribution control /// The builder for fluent chaining /// Thrown when no users exist public static RecipeBuilder AddGroups(this RecipeBuilder builder, int count, DensityProfile? density = null) @@ -145,6 +146,7 @@ public static RecipeBuilder AddGroups(this RecipeBuilder builder, int count, Den /// /// The recipe builder /// Number of collections to generate + /// Optional density profile for collection fan-out and permission control /// The builder for fluent chaining /// Thrown when no users exist public static RecipeBuilder AddCollections(this RecipeBuilder builder, int count, DensityProfile? density = null) @@ -179,9 +181,13 @@ public static RecipeBuilder AddCollections(this RecipeBuilder builder, OrgStruct } /// - /// Generate folders for each user using a realistic distribution. + /// Generate folders for each user using a configurable distribution. /// - public static RecipeBuilder AddFolders(this RecipeBuilder builder) + /// The recipe builder + /// Optional density profile for folder count distribution override + /// The builder for fluent chaining + /// Thrown when no users exist + public static RecipeBuilder AddFolders(this RecipeBuilder builder, DensityProfile? density = null) { if (!builder.HasRosterUsers && !builder.HasGeneratedUsers) { @@ -190,7 +196,7 @@ public static RecipeBuilder AddFolders(this RecipeBuilder builder) } builder.HasFolders = true; - builder.AddStep(_ => new GenerateFoldersStep()); + builder.AddStep(_ => new GenerateFoldersStep(density)); return builder; } @@ -222,6 +228,7 @@ public static RecipeBuilder UseCiphers(this RecipeBuilder builder, string fixtur /// Distribution of cipher types. Uses realistic defaults if null. /// Distribution of password strengths. Uses realistic defaults if null. /// When true, assigns ciphers to user folders round-robin. + /// Optional density profile for cipher-to-collection assignment control /// The builder for fluent chaining /// Thrown when UseCiphers() was already called public static RecipeBuilder AddCiphers( @@ -254,12 +261,14 @@ public static RecipeBuilder AddCiphers( /// Number of personal ciphers per user /// Distribution of cipher types. Uses realistic defaults if null. /// Distribution of password strengths. Uses realistic defaults if null. + /// Optional density profile for per-user personal cipher count distribution /// The builder for fluent chaining /// Thrown when no users exist public static RecipeBuilder AddPersonalCiphers( this RecipeBuilder builder, int countPerUser, Distribution? typeDist = null, - Distribution? pwDist = null) + Distribution? pwDist = null, + DensityProfile? density = null) { if (!builder.HasRosterUsers && !builder.HasGeneratedUsers) { @@ -268,7 +277,7 @@ public static RecipeBuilder AddPersonalCiphers( } builder.HasPersonalCiphers = true; - builder.AddStep(_ => new GeneratePersonalCiphersStep(countPerUser, typeDist, pwDist)); + builder.AddStep(_ => new GeneratePersonalCiphersStep(countPerUser, typeDist, pwDist, density)); return builder; } diff --git a/util/Seeder/Pipeline/RecipeOrchestrator.cs b/util/Seeder/Pipeline/RecipeOrchestrator.cs index 1cebfe210f46..8f919f80cae8 100644 --- a/util/Seeder/Pipeline/RecipeOrchestrator.cs +++ b/util/Seeder/Pipeline/RecipeOrchestrator.cs @@ -82,7 +82,7 @@ internal ExecutionResult Execute( if (options.Ciphers > 0) { - builder.AddFolders(); + builder.AddFolders(options.Density); builder.AddCiphers(options.Ciphers, options.CipherTypeDistribution, options.PasswordDistribution, density: options.Density); } diff --git a/util/Seeder/Seeds/schemas/preset.schema.json b/util/Seeder/Seeds/schemas/preset.schema.json index f8a6cdced388..64067f499137 100644 --- a/util/Seeder/Seeds/schemas/preset.schema.json +++ b/util/Seeder/Seeds/schemas/preset.schema.json @@ -280,6 +280,112 @@ "minimum": 0.0, "maximum": 1.0, "description": "Fraction of org ciphers with no collection assignment." + }, + "multiCollectionRate": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Fraction of non-orphan ciphers assigned to more than one collection. Default: 0." + }, + "maxCollectionsPerCipher": { + "type": "integer", + "minimum": 1, + "maximum": 10, + "description": "Maximum number of collections a multi-collection cipher can belong to. Default: 2." + } + } + }, + "userCollections": { + "type": "object", + "description": "How many direct collections each user receives via CollectionUser records.", + "additionalProperties": false, + "properties": { + "min": { + "type": "integer", + "minimum": 1, + "description": "Minimum direct collections per user. Default: 1." + }, + "max": { + "type": "integer", + "minimum": 1, + "description": "Maximum direct collections per user. Default: 3." + }, + "shape": { + "type": "string", + "enum": ["uniform", "powerLaw", "frontLoaded"], + "description": "Distribution shape for user-collection assignments. Default: uniform." + }, + "skew": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Skew intensity for powerLaw shape. Ignored for uniform/frontLoaded. Default: 0." + } + } + }, + "cipherTypes": { + "type": "object", + "description": "Cipher type distribution for generated ciphers. Use 'preset' for a named distribution or specify custom weights per type.", + "additionalProperties": false, + "properties": { + "preset": { + "type": "string", + "enum": ["realistic", "loginOnly", "documentationHeavy", "developerFocused"], + "description": "Named cipher type distribution. Mutually exclusive with custom weights." + }, + "login": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for Login ciphers. All custom weights must sum to 1.0." + }, + "secureNote": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for SecureNote ciphers." + }, + "card": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for Card ciphers." + }, + "identity": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for Identity ciphers." + }, + "sshKey": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for SSH Key ciphers." + } + } + }, + "personalCiphers": { + "type": "object", + "description": "Personal cipher count distribution per user. Overrides the top-level personalCiphers.countPerUser with a variable distribution.", + "additionalProperties": false, + "properties": { + "shape": { + "type": "string", + "enum": ["realistic", "lightUsage", "heavyUsage"], + "description": "Named distribution for personal cipher counts per user. Default: flat countPerUser." + } + } + }, + "folders": { + "type": "object", + "description": "Folder count distribution per user. Overrides the default realistic distribution.", + "additionalProperties": false, + "properties": { + "shape": { + "type": "string", + "enum": ["realistic", "enterprise", "minimal"], + "description": "Named folder count distribution. Default: realistic." } } } diff --git a/util/Seeder/Steps/CreateCollectionsStep.cs b/util/Seeder/Steps/CreateCollectionsStep.cs index 9747e2eb79fd..14fabc59acf6 100644 --- a/util/Seeder/Steps/CreateCollectionsStep.cs +++ b/util/Seeder/Steps/CreateCollectionsStep.cs @@ -135,7 +135,6 @@ internal int ComputeFanOut(int collectionIndex, int collectionCount, int min, in return min + (int)(weight * (range - 1) + 0.5); case CollectionFanOutShape.FrontLoaded: - // First 10% of collections get max fan-out, rest get min var topCount = Math.Max(1, collectionCount / 10); return collectionIndex < topCount ? max : min; @@ -148,14 +147,18 @@ internal int ComputeFanOut(int collectionIndex, int collectionCount, int min, in } } - internal static List BuildCollectionUsers( + internal List BuildCollectionUsers( List collectionIds, List userIds, int directUserCount) { - var result = new List(directUserCount * 2); + var min = _density!.UserCollectionMin; + var max = _density.UserCollectionMax; + var result = new List(directUserCount * (min + max + 1) / 2); for (var i = 0; i < directUserCount; i++) { - var maxAssignments = Math.Min((i % 3) + 1, collectionIds.Count); - for (var j = 0; j < maxAssignments; j++) + var assignmentCount = Math.Min( + ComputeCollectionsPerUser(i, directUserCount, min, max), + collectionIds.Count); + for (var j = 0; j < assignmentCount; j++) { result.Add(CollectionUserSeeder.Create( collectionIds[(i + j) % collectionIds.Count], @@ -165,6 +168,34 @@ internal static List BuildCollectionUsers( return result; } + internal int ComputeCollectionsPerUser(int userIndex, int userCount, int min, int max) + { + var range = max - min + 1; + if (range <= 1) + { + return min; + } + + switch (_density!.UserCollectionShape) + { + case CollectionFanOutShape.PowerLaw: + var exponent = 0.5 + _density.UserCollectionSkew * 1.5; + var weight = 1.0 / Math.Pow(userIndex + 1, exponent); + return min + (int)(weight * (range - 1) + 0.5); + + case CollectionFanOutShape.FrontLoaded: + var topCount = Math.Max(1, userCount / 10); + return userIndex < topCount ? max : min; + + case CollectionFanOutShape.Uniform: + return min + (userIndex % range); + + default: + throw new InvalidOperationException( + $"Unhandled CollectionFanOutShape: {_density.UserCollectionShape}"); + } + } + private static (bool ReadOnly, bool HidePasswords, bool Manage) ResolvePermission( Distribution distribution, int index, int total) { diff --git a/util/Seeder/Steps/GenerateCiphersStep.cs b/util/Seeder/Steps/GenerateCiphersStep.cs index 517750db65da..5becbfaadfbb 100644 --- a/util/Seeder/Steps/GenerateCiphersStep.cs +++ b/util/Seeder/Steps/GenerateCiphersStep.cs @@ -43,7 +43,7 @@ public void Execute(SeederContext context) var orgId = context.RequireOrgId(); var orgKey = context.RequireOrgKey(); var collectionIds = context.Registry.CollectionIds; - var typeDistribution = typeDist ?? CipherTypeDistributions.Realistic; + var typeDistribution = typeDist ?? _density?.CipherTypeDistribution ?? CipherTypeDistributions.Realistic; var passwordDistribution = pwDist ?? PasswordDistributions.Realistic; var companies = Companies.All; @@ -95,6 +95,7 @@ public void Execute(SeederContext context) { var orphanCount = (int)(count * _density.OrphanCipherRate); var nonOrphanCount = count - orphanCount; + var primaryIndices = new int[nonOrphanCount]; for (var i = 0; i < nonOrphanCount; i++) { @@ -110,14 +111,33 @@ public void Execute(SeederContext context) collectionIndex = i % collectionIds.Count; } - var collectionId = collectionIds[collectionIndex]; + primaryIndices[i] = collectionIndex; collectionCiphers.Add(new CollectionCipher { CipherId = ciphers[i].Id, - CollectionId = collectionId + CollectionId = collectionIds[collectionIndex] }); } + + if (_density.MultiCollectionRate > 0 && collectionIds.Count > 1) + { + var multiCount = (int)(nonOrphanCount * _density.MultiCollectionRate); + for (var i = 0; i < multiCount; i++) + { + var extraCount = 1 + (i % Math.Max(_density.MaxCollectionsPerCipher - 1, 1)); + extraCount = Math.Min(extraCount, collectionIds.Count - 1); + for (var j = 0; j < extraCount; j++) + { + var secondaryIndex = (primaryIndices[i] + 1 + j) % collectionIds.Count; + collectionCiphers.Add(new CollectionCipher + { + CipherId = ciphers[i].Id, + CollectionId = collectionIds[secondaryIndex] + }); + } + } + } } } diff --git a/util/Seeder/Steps/GenerateFoldersStep.cs b/util/Seeder/Steps/GenerateFoldersStep.cs index bd856a780586..61293ba1c006 100644 --- a/util/Seeder/Steps/GenerateFoldersStep.cs +++ b/util/Seeder/Steps/GenerateFoldersStep.cs @@ -1,25 +1,26 @@ using Bit.Seeder.Data.Distributions; using Bit.Seeder.Factories; +using Bit.Seeder.Options; using Bit.Seeder.Pipeline; namespace Bit.Seeder.Steps; /// -/// Generates folders for each user based on a realistic distribution, encrypted with each user's symmetric key. +/// Generates folders for each user based on a configurable distribution, encrypted with each user's symmetric key. /// -internal sealed class GenerateFoldersStep : IStep +internal sealed class GenerateFoldersStep(DensityProfile? density = null) : IStep { public void Execute(SeederContext context) { var generator = context.RequireGenerator(); var userDigests = context.Registry.UserDigests; - var distribution = FolderCountDistributions.Realistic; + var distribution = density?.FolderDistribution ?? FolderCountDistributions.Realistic; for (var index = 0; index < userDigests.Count; index++) { var digest = userDigests[index]; var range = distribution.Select(index, userDigests.Count); - var count = range.Min + (index % Math.Max(range.Max - range.Min, 1)); + var count = range.Min + (index % Math.Max(range.Max - range.Min + 1, 1)); var folderIds = new List(count); for (var i = 0; i < count; i++) diff --git a/util/Seeder/Steps/GeneratePersonalCiphersStep.cs b/util/Seeder/Steps/GeneratePersonalCiphersStep.cs index a9e0391f2319..31c29c3a53a2 100644 --- a/util/Seeder/Steps/GeneratePersonalCiphersStep.cs +++ b/util/Seeder/Steps/GeneratePersonalCiphersStep.cs @@ -4,26 +4,29 @@ using Bit.Seeder.Data.Enums; using Bit.Seeder.Data.Static; using Bit.Seeder.Factories; +using Bit.Seeder.Options; using Bit.Seeder.Pipeline; namespace Bit.Seeder.Steps; /// -/// Creates N personal cipher entities per user, encrypted with each user's symmetric key. +/// Creates personal cipher entities per user, encrypted with each user's symmetric key. /// /// /// Iterates over and creates ciphers with /// UserId set and OrganizationId null. Personal ciphers are not assigned -/// to collections. +/// to collections. When a is set, +/// each user's count varies according to the distribution instead of using a flat count. /// internal sealed class GeneratePersonalCiphersStep( int countPerUser, Distribution? typeDist = null, - Distribution? pwDist = null) : IStep + Distribution? pwDist = null, + DensityProfile? density = null) : IStep { public void Execute(SeederContext context) { - if (countPerUser == 0) + if (countPerUser == 0 && density?.PersonalCipherDistribution is null) { return; } @@ -34,16 +37,28 @@ public void Execute(SeederContext context) var typeDistribution = typeDist ?? CipherTypeDistributions.Realistic; var passwordDistribution = pwDist ?? PasswordDistributions.Realistic; var companies = Companies.All; + var personalDist = density?.PersonalCipherDistribution; + var expectedTotal = personalDist is not null + ? EstimateTotal(userDigests.Count, personalDist) + : userDigests.Count * countPerUser; - var ciphers = new List(userDigests.Count * countPerUser); - var cipherIds = new List(userDigests.Count * countPerUser); + var ciphers = new List(expectedTotal); + var cipherIds = new List(expectedTotal); var globalIndex = 0; - foreach (var userDigest in userDigests) + for (var userIndex = 0; userIndex < userDigests.Count; userIndex++) { - for (var i = 0; i < countPerUser; i++) + var userDigest = userDigests[userIndex]; + var userCount = countPerUser; + if (personalDist is not null) { - var cipherType = typeDistribution.Select(globalIndex, userDigests.Count * countPerUser); + var range = personalDist.Select(userIndex, userDigests.Count); + userCount = range.Min + (userIndex % Math.Max(range.Max - range.Min + 1, 1)); + } + + for (var i = 0; i < userCount; i++) + { + var cipherType = typeDistribution.Select(globalIndex, expectedTotal); var cipher = CipherComposer.Compose(globalIndex, cipherType, userDigest.SymmetricKey, companies, generator, passwordDistribution, userId: userDigest.UserId); CipherComposer.AssignFolder(cipher, userDigest.UserId, i, context.Registry.UserFolderIds); @@ -57,4 +72,16 @@ public void Execute(SeederContext context) context.Ciphers.AddRange(ciphers); context.Registry.CipherIds.AddRange(cipherIds); } + + private static int EstimateTotal(int userCount, Distribution<(int Min, int Max)> dist) + { + var total = 0; + for (var i = 0; i < userCount; i++) + { + var range = dist.Select(i, userCount); + total += range.Min + (i % Math.Max(range.Max - range.Min + 1, 1)); + } + + return Math.Max(total, 1); + } }