From 7b3d48da9da5225f7621d7009af6f99f5915e149 Mon Sep 17 00:00:00 2001 From: djstrong Date: Sun, 22 Feb 2026 12:27:03 +0100 Subject: [PATCH 01/10] feat(plans): add Auto-Cleanup Architecture Plan for resolver record normalization --- ...cleanup_architecture_plan_50f1bdfb.plan.md | 514 ++++++++++++++++++ 1 file changed, 514 insertions(+) create mode 100644 .cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md new file mode 100644 index 000000000..8e0e0fb86 --- /dev/null +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -0,0 +1,514 @@ +--- +name: Auto-Cleanup Architecture Plan +overview: A comprehensive design specification for the resolver record normalization feature for ENS, covering the pure in-memory data model (Phase 1), the transform registry, and the API integration (Phase 3). +todos: [] +isProject: false +--- + +# Resolver Record Normalization — Architecture Specification + +## Goals (from issue #1061) + +1. Clients can request that all resolver records are **validated** (value matches expected format) and **normalized** (value is in a single canonical form regardless of how it was stored). +2. Keys are also normalized: legacy/fallback key variants (e.g. `vnd.twitter`, `twitter`) are resolved and the value is returned under the canonical key (e.g. `com.twitter`). +3. Unknown keys (no normalization logic defined) pass through unchanged. +4. Full normalization metadata is optionally returned so UIs can inspect and explain how each record was processed. +5. Normalized records carry UI-friendly enrichment: `displayKey`, `displayValue`, `url`. + +--- + +## Architecture: Phase separation + +The feature is split into three phases that can be developed in parallel: + +- **Phase 1** (current focus): Pure in-memory data model and functions in `@ensnode/ensnode-sdk`. No I/O, no resolution logic. This layer is independently testable and reusable. +- **Phase 2**: Resolution API refactors (tracked separately in issue #1471). +- **Phase 3**: Integrate Phase 1 logic into the APIs built in Phase 2. + +Phases 1 and 2 proceed in parallel. Phase 3 begins once both are sufficiently mature. + +--- + +## Phase 1: Pure in-memory data model and functions + +All code lives in `packages/ensnode-sdk/src/resolution/normalization/`. +Every function is a pure in-memory data operation with no I/O dependencies. + +### 1.1 Text record key definitions + +Each logical record type has a **normalized key** (the canonical primary key) and an ordered list of **unnormalized key variants** (legacy/alternative names tried as fallbacks): + +```typescript +type TextRecordKeyDef = { + /** Canonical key used in API responses and UI labels. e.g. "com.twitter" */ + normalizedKey: string; + /** Human-friendly label for UIs. e.g. "Twitter" */ + displayKey: string; + /** + * Ordered list of unnormalized key variants. + * The primary key (normalizedKey) itself is NOT in this list. + * Order determines priority if multiple variants have a value. + */ + unnormalizedKeys: readonly string[]; + /** Validate raw value. Returns ok + trimmed value, or ok:false with reason. */ + validate: (rawValue: string) => ValidationResult; + /** Convert a validated value to canonical form. */ + normalize: (validatedValue: string) => NormalizationResult; + /** Human-friendly value for UIs. e.g. "@alice" from "alice" */ + displayValue: (normalizedValue: string) => string; + /** Full URL to the related resource. e.g. "https://x.com/alice" */ + url: (normalizedValue: string) => string; +}; +``` + +Invariants (enforced at registry construction): + +- No two `normalizedKey` values are the same. +- No two `unnormalizedKey` values are the same across all definitions. +- No `unnormalizedKey` equals any `normalizedKey`. + +### 1.1a Supporting return types + +These types are used by the `validate` and `normalize` functions on each `TextRecordKeyDef`: + +```typescript +type ValidationResult = + | { ok: true; value: string } // trimmed, sanitized value ready for normalization + | { ok: false; reason: string }; // human-readable reason for rejection + +type NormalizationResult = + | { ok: true; value: string } // canonical normalized value + | { ok: false; reason: string }; // human-readable reason normalization failed +``` + +**Null value handling**: if `rawValue` is `null` (the resolver has no value for that key), `normalizeRecord` produces `op: "Unnormalizable"` with `reason: "no value set"`. The key is still recognized; the record participates in the set-level priority contest but always loses to any record with a non-null rawValue. + +### 1.2 Key normalization types (per key) + +```typescript +type KeyNormalizationOp = "AlreadyNormalized" | "Normalized" | "Unrecognized"; + +type KeyNormalizationResult = + /** rawKey is the canonical normalizedKey; no change was needed */ + | { op: "AlreadyNormalized"; rawKey: string; normalizedKey: string } + /** rawKey was a fallback variant; it was mapped to normalizedKey in the response */ + | { op: "Normalized"; rawKey: string; normalizedKey: string } + /** rawKey has no known definition; passed through as-is */ + | { op: "Unrecognized"; rawKey: string; normalizedKey: null }; +``` + +### 1.3 Value normalization types (per value) + +```typescript +type ValueNormalizationOp = + | "AlreadyNormalized" + | "Normalized" + | "Unnormalizable" + | "Unrecognized"; + +type ValueNormalizationResult = + /** Value was already in canonical form; rawValue === normalizedValue */ + | { op: "AlreadyNormalized"; rawValue: string; normalizedValue: string } + /** Value was successfully transformed; rawValue !== normalizedValue */ + | { op: "Normalized"; rawValue: string; normalizedValue: string } + /** Key was recognized but no candidate produced a valid normalized value */ + | { op: "Unnormalizable"; rawValue: string; normalizedValue: null } + /** Key is unrecognized; value passed through without validation/normalization */ + | { op: "Unrecognized"; rawValue: string; normalizedValue: null }; +``` + +### 1.4 Layer 1: Individual record normalization + +An individual record normalization result pairs key and value ops: + +```typescript +type IndividualRecordNormalizationResult = { + keyResult: KeyNormalizationResult; + valueResult: ValueNormalizationResult; +}; +``` + +**Key function — normalize one record:** + +```typescript +function normalizeRecord( + rawKey: string, + rawValue: string | null, + registry: TextRecordNormalizationRegistry, +): IndividualRecordNormalizationResult +``` + +Logic: + +- Look up `rawKey` in registry (by normalizedKey or unnormalizedKey). +- If found: determine `keyResult` (AlreadyNormalized if rawKey === normalizedKey, else Normalized). + - Run validate + normalize on rawValue. + - If succeeded: `valueResult` is AlreadyNormalized or Normalized. + - If failed: `valueResult` is Unnormalizable. +- If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. + +### 1.5 Layer 2: Set-level normalization + +After individually normalizing each record, the set is consolidated so only one normalized key is retained as the "winner" when multiple records map to the same normalized key. + +```typescript +type RecordNormalizationOp = + /** Key recognized, value valid — this record is the winner for its normalized key */ + | "Normalized" + /** Key unrecognized — both key and value are passed through unchanged */ + | "UnrecognizedKeyAndValue" + /** + * Key recognized (AlreadyNormalized or Normalized) but value op is Unrecognized. + * Distinct from UnnormalizableValue: the value could not even be attempted + * (e.g., schema mismatch), whereas UnnormalizableValue means validation was + * attempted and failed. + */ + | "UnrecognizedValue" + /** Key recognized, value attempted but could not be normalized — excluded from clean output */ + | "UnnormalizableValue" + /** Another record already claimed this normalized key (lower priority variant) */ + | "DuplicateNormalizedKey"; + +type RecordNormalizationResult = { + op: RecordNormalizationOp; + individual: IndividualRecordNormalizationResult; + /** Present when op is "Normalized" */ + normalizedKey?: string; + normalizedValue?: string; + displayKey?: string; + displayValue?: string; + url?: string; +}; + +type NormalizedRecordSet = { + /** + * Maps each normalized key to its winning RecordNormalizationResult. + * Also contains UnrecognizedKeyAndValue records (keyed by their rawKey). + */ + normalizedRecords: Record; + /** + * Records that did not make it into normalizedRecords: + * UnnormalizableValue and DuplicateNormalizedKey. + */ + unnormalizedRecords: RecordNormalizationResult[]; +}; +``` + +**Priority rule** when multiple records share the same normalized key: + +1. The record whose `rawKey === normalizedKey` wins unconditionally. +2. Among remaining candidates, priority follows the ordering in `unnormalizedKeys`. +3. Losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. + +**Key function — build the set:** + +```typescript +function normalizeRecordSet( + records: Array<{ rawKey: string; rawValue: string | null }>, + registry: TextRecordNormalizationRegistry, +): NormalizedRecordSet +``` + +### 1.6 Layer 3: Stripped output + +For clients that only want clean values without metadata: + +```typescript +function stripNormalizationMetadata( + set: NormalizedRecordSet, +): Record +``` + +Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. + +### 1.7 Pre-resolution: key expansion + +Before resolution, normalized keys are expanded into the full set of candidate keys that the resolver should be queried for: + +```typescript +function expandNormalizedKeys( + normalizedKeys: readonly string[], + registry: TextRecordNormalizationRegistry, +): string[] +``` + +Returns: `[normalizedKey, ...unnormalizedKeys]` for each known key, deduplicated. Unknown keys are kept as-is. + +--- + +## Phase 1: Transform registry + +The registry maps normalized keys to `TextRecordKeyDef`. All lookups support both normalized keys and unnormalized variants. + +**Initial set of recognized keys:** + + +| Normalized key | Display key | Unnormalized key variants | +| --------------------------- | ----------- | ---------------------------------------- | +| `com.twitter` (or `com.x`?) | Twitter / X | `vnd.twitter`, `twitter`, `Twitter` | +| `com.github` | GitHub | `vnd.github`, `github` | +| `xyz.farcaster` | Farcaster | `com.warpcast`, `Farcaster`, `farcaster` | +| `com.discord` | Discord | `discord` | +| `org.telegram` | Telegram | `telegram`, `com.telegram`, `Telegram` | +| `com.reddit` | Reddit | `reddit` | +| `url` | Website | `URL`, `Website`, `website` | +| `email` | Email | `Email` | +| `avatar` | Avatar | `Avatar` | + + +**Open question:** Should the normalized key for Twitter be `com.twitter` or `com.x` (reflecting the platform rebrand)? If `com.x`, what are the unnormalized variants to include? + +### Registry construction + +The registry is constructed once at module initialization as a plain object with two internal lookup maps built from the definitions: + +- `byNormalizedKey: Map` — keyed by `normalizedKey`. +- `byUnnormalizedKey: Map` — keyed by each entry in `unnormalizedKeys`, pointing to the owning def. + +At construction time the registry validates its own invariants and throws synchronously if any are violated (fail fast). No lazy initialization. + +### Per-key transform specifications + +The following specifies validation, normalization, and UI enrichment for each of the 9 initial keys. "Accepted input formats" lists formats that pass validation. "Canonical form" is the `normalizedValue` stored and returned. Values are first stripped of leading/trailing whitespace before validation. + +--- + +#### Twitter / X (`com.twitter` or `com.x` — see open question) + +Unnormalized variants: `vnd.twitter`, `twitter`, `Twitter` + +**Accepted input formats**: + +- Plain username: `alice` +- Prefixed: `@alice` +- twitter.com URL: `https://twitter.com/alice`, `http://twitter.com/alice`, `twitter.com/alice` +- x.com URL: `https://x.com/alice`, `http://x.com/alice`, `x.com/alice` + +**Validation**: extracted username must match `^[a-zA-Z0-9_]{1,15}$`. + +**Canonical form**: lowercase username without `@` prefix (e.g. `alice`). + +**displayValue**: `@{username}` (e.g. `@alice`). + +**url**: `https://x.com/{username}`. + +--- + +#### GitHub (`com.github`) + +Unnormalized variants: `vnd.github`, `github` + +**Accepted input formats**: + +- Plain username: `alice` +- Prefixed: `@alice` +- github.com URL: `https://github.com/alice`, `http://github.com/alice`, `github.com/alice` + +**Validation**: extracted username must match `^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$` (1–39 chars, alphanumeric and hyphens, no leading/trailing hyphen). + +**Canonical form**: lowercase username (e.g. `alice`). + +**displayValue**: `@{username}`. + +**url**: `https://github.com/{username}`. + +--- + +#### Farcaster (`xyz.farcaster`) + +Unnormalized variants: `com.warpcast`, `Farcaster`, `farcaster` + +**Accepted input formats**: + +- Plain username: `alice` +- Prefixed: `@alice` +- Warpcast URL: `https://warpcast.com/alice`, `http://warpcast.com/alice` + +**Validation**: extracted username must match `^[a-z0-9][a-z0-9-]{0,15}$` (Farcaster usernames are lowercase-only, 1–16 chars). + +**Canonical form**: lowercase username (e.g. `alice`). + +**displayValue**: `@{username}`. + +**url**: `https://warpcast.com/{username}`. + +--- + +#### Discord (`com.discord`) + +Unnormalized variants: `discord` + +Discord supports two username formats: the new format (post-2023, no discriminator) and the legacy format (with `#NNNN` discriminator). Both are accepted and preserved as-is. + +**Accepted input formats**: + +- New username: `alice` (2–32 chars, lowercase alphanumeric, underscores, periods) +- Legacy username: `alice#1234` + +**Validation**: + +- New format: must match `^[a-z0-9_.]{2,32}$`. +- Legacy format: must match `^.{2,32}#[0-9]{4}$`. + +**Canonical form**: the username as provided (lowercased for new format, `username#NNNN` preserved for legacy). + +**displayValue**: same as canonical form. + +**url**: Discord does not provide a reliable public profile URL by username (profile URLs use numeric user IDs). Returns `null`. + +--- + +#### Telegram (`org.telegram`) + +Unnormalized variants: `telegram`, `com.telegram`, `Telegram` + +**Accepted input formats**: + +- Plain username: `alice` +- Prefixed: `@alice` +- t.me URL: `https://t.me/alice`, `http://t.me/alice`, `t.me/alice` +- telegram.me URL: `https://telegram.me/alice` + +**Validation**: extracted username must match `^[a-zA-Z0-9_]{5,32}$`. + +**Canonical form**: lowercase username (e.g. `alice`). + +**displayValue**: `@{username}`. + +**url**: `https://t.me/{username}`. + +--- + +#### Reddit (`com.reddit`) + +Unnormalized variants: `reddit` + +**Accepted input formats**: + +- Plain username: `alice` +- Prefixed: `u/alice`, `/u/alice` +- reddit.com URL: `https://reddit.com/u/alice`, `https://www.reddit.com/u/alice`, `https://reddit.com/user/alice` + +**Validation**: extracted username must match `^[a-zA-Z0-9_-]{3,20}$`. + +**Canonical form**: username only, case preserved (e.g. `alice`). + +**displayValue**: `u/{username}`. + +**url**: `https://www.reddit.com/u/{username}`. + +--- + +#### Website URL (`url`) + +Unnormalized variants: `URL`, `Website`, `website` + +**Accepted input formats**: any string that parses as a valid URL with `http` or `https` scheme. + +**Validation**: `new URL(value)` must not throw and `scheme` must be `http:` or `https:`. + +**Canonical form**: `new URL(value).href` (the browser-canonical URL string, e.g. trailing slash normalized). + +**displayValue**: same as canonical form. + +**url**: same as canonical form. + +--- + +#### Email (`email`) + +Unnormalized variants: `Email` + +**Accepted input formats**: any string matching a standard email format. + +**Validation**: must match `^[^\s@]+@[^\s@]+\.[^\s@]+$` (basic structural check; full RFC 5322 compliance is not required). + +**Canonical form**: lowercased email address. + +**displayValue**: same as canonical form. + +**url**: `mailto:{email}`. + +--- + +#### Avatar (`avatar`) + +Unnormalized variants: `Avatar` + +Avatar values are complex — they can be HTTPS URLs, IPFS URIs, NFT references (EIP-155), or data URIs. Normalization preserves the value as-is after validation. + +**Accepted input formats**: + +- HTTPS/HTTP URL: `https://example.com/avatar.png` +- IPFS URI: `ipfs://Qm...`, `ipfs://bafy...` +- EIP-155 NFT reference: `eip155:1/erc721:0x.../1`, `eip155:1/erc1155:0x.../1` +- Data URI: `data:image/png;base64,...` + +**Validation**: must begin with one of the recognized prefixes (`https://`, `http://`, `ipfs://`, `eip155:`, `data:image/`). + +**Canonical form**: value as-is (no transformation applied). + +**displayValue**: same as canonical form. + +**url**: for `https://`/`http://` — same as value; for `ipfs://` — convert to `https://ipfs.io/ipfs/{cid}`; for `eip155:` and `data:` URIs — `null` (requires off-chain resolution beyond this layer). + +--- + +> **Note**: all existing code at `packages/ensnode-sdk/src/resolution/auto-cleanup/` will be deleted before implementation of this specification begins. No migration is required. + +--- + +## Phase 3: API integration + +### Design decisions + +#### Key expansion in the RPC path + +When `normalize=true`, client-requested keys are expanded to include all unnormalized variants before resolution. This expansion must happen regardless of the resolution path (indexed or RPC). + +For the RPC path, ENS resolution already uses a multicall pattern: all record lookups for a given name are batched into a single `eth_call`. Key expansion therefore does **not** require extra round trips — the expanded key list is simply added to the same multicall batch. The overall overhead is one additional text slot per unnormalized variant per expanded key, within the same single RPC call. + +### Parameters + +Two query parameters on `GET /records/:name`: + + +| Parameter | Type | Default | Description | +| ----------------------- | ------- | ------- | -------------------------------------------------------------------------------------------------------------- | +| `normalize` | boolean | `true` | Normalize keys and values. If true, expand keys pre-resolution and run normalization pipeline post-resolution. | +| `normalizationMetadata` | boolean | `false` | Include the full `NormalizedRecordSet` metadata in the response. Only meaningful when `normalize=true`. | + + +### Key expansion behavior + +When `normalize=true`: + +1. The requested text keys are passed through `expandNormalizedKeys` to produce a full candidate list. +2. **Indexed path**: the index is queried for all candidate keys directly (no extra cost, single query). +3. **RPC path**: all candidate keys are included in the same multicall batch used to resolve all other records. No additional RPC round trips are incurred. +4. The resolved raw records (potentially including fallback key variants) are then passed through the normalization pipeline. + +### Response shape + +```typescript +interface ResolveRecordsResponse { + records: ResolverRecordsResponse; + /** Only present when normalize=true AND normalizationMetadata=true */ + normalizationMetadata?: NormalizedRecordSet; + accelerationRequested: boolean; + accelerationAttempted: boolean; + trace?: TracingTrace; +} +``` + +The `records.texts` field always contains the stripped, clean output when `normalize=true` (normalized keys mapping to normalized values, unrecognized keys passed through). + +--- + +## Open questions + +1. **Normalized key for Twitter**: `com.twitter` or `com.x` (reflecting the platform rebrand)? What unnormalized variants should be included? +2. **Parameter name for metadata field**: `normalizationMetadata`, `includeNormalizationMetadata`, or another name? +3. **Unnormalizable behavior**: When a key is recognized but no candidate produces a valid value, should `records.texts` contain `null` for that key, or should the key be omitted from the response entirely? +4. `**displayValue` and `url` placement**: Should these enrichment fields be part of `records.texts` (when `normalize=true`) or only inside `normalizationMetadata`? Including them in the main response is more convenient for UI clients but changes the primary response shape significantly for all callers. + From cd22b13c5f04e121afd2acdc154cfa20ef92b0a7 Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 23 Feb 2026 16:33:34 +0100 Subject: [PATCH 02/10] refactor(plans): enhance normalization definitions for text records - Updated `url` type to allow null values when no URL can be derived. - Improved clarity in `Unnormalizable` and `Unrecognized` operations to include handling of null values and reasons for normalization failures. - Refined comments and invariants for better understanding of normalization logic. --- ...cleanup_architecture_plan_50f1bdfb.plan.md | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 8e0e0fb86..54d45d5b6 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -56,12 +56,14 @@ type TextRecordKeyDef = { normalize: (validatedValue: string) => NormalizationResult; /** Human-friendly value for UIs. e.g. "@alice" from "alice" */ displayValue: (normalizedValue: string) => string; - /** Full URL to the related resource. e.g. "https://x.com/alice" */ - url: (normalizedValue: string) => string; + /** Full URL to the related resource, or null if no URL can be derived (e.g. Discord, NFT avatar). */ + url: (normalizedValue: string) => string | null; }; ``` -Invariants (enforced at registry construction): +**Key lookup is case-sensitive and exact-match only.** A `rawKey` of `TWITTER` does not match `twitter` or `Twitter`. Every cased variant that a production ENS resolver may use must be listed explicitly in `unnormalizedKeys`. There is no implicit case folding at lookup time. + +Invariants (enforced at `TextRecordNormalizationDefs` construction): - No two `normalizedKey` values are the same. - No two `unnormalizedKey` values are the same across all definitions. @@ -111,10 +113,17 @@ type ValueNormalizationResult = | { op: "AlreadyNormalized"; rawValue: string; normalizedValue: string } /** Value was successfully transformed; rawValue !== normalizedValue */ | { op: "Normalized"; rawValue: string; normalizedValue: string } - /** Key was recognized but no candidate produced a valid normalized value */ - | { op: "Unnormalizable"; rawValue: string; normalizedValue: null } - /** Key is unrecognized; value passed through without validation/normalization */ - | { op: "Unrecognized"; rawValue: string; normalizedValue: null }; + /** + * Key was recognized but value could not be normalized. + * rawValue is null when the resolver had no value for this key. + * reason carries the validation/normalization failure message. + */ + | { op: "Unnormalizable"; rawValue: string | null; normalizedValue: null; reason: string } + /** + * Key is unrecognized; value passed through without validation/normalization. + * rawValue is null when the resolver had no value for this unrecognized key. + */ + | { op: "Unrecognized"; rawValue: string | null; normalizedValue: null }; ``` ### 1.4 Layer 1: Individual record normalization @@ -142,9 +151,10 @@ Logic: - Look up `rawKey` in registry (by normalizedKey or unnormalizedKey). - If found: determine `keyResult` (AlreadyNormalized if rawKey === normalizedKey, else Normalized). - - Run validate + normalize on rawValue. - - If succeeded: `valueResult` is AlreadyNormalized or Normalized. - - If failed: `valueResult` is Unnormalizable. + - If `rawValue` is null: `valueResult` is Unnormalizable with reason `"no value set"`. + - If `rawValue` is a string: run validate + normalize on rawValue. + - If succeeded: `valueResult` is AlreadyNormalized (when validate + normalize produced the same string) or Normalized (when the string changed). + - If failed: `valueResult` is Unnormalizable with the reason from the failing step. - If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. ### 1.5 Layer 2: Set-level normalization @@ -158,13 +168,12 @@ type RecordNormalizationOp = /** Key unrecognized — both key and value are passed through unchanged */ | "UnrecognizedKeyAndValue" /** - * Key recognized (AlreadyNormalized or Normalized) but value op is Unrecognized. - * Distinct from UnnormalizableValue: the value could not even be attempted - * (e.g., schema mismatch), whereas UnnormalizableValue means validation was - * attempted and failed. + * Key recognized, value could not be normalized — excluded from clean output. + * Covers all failure cases for a recognized key: null value, format mismatch, + * validation failure, etc. Note: a separate "UnrecognizedValue" op is not needed + * because ValueNormalizationOp "Unrecognized" is by definition only reachable + * when the key itself is unrecognized (captured by "UnrecognizedKeyAndValue" above). */ - | "UnrecognizedValue" - /** Key recognized, value attempted but could not be normalized — excluded from clean output */ | "UnnormalizableValue" /** Another record already claimed this normalized key (lower priority variant) */ | "DuplicateNormalizedKey"; @@ -177,7 +186,7 @@ type RecordNormalizationResult = { normalizedValue?: string; displayKey?: string; displayValue?: string; - url?: string; + url?: string | null; }; type NormalizedRecordSet = { From c9b1d62566c4e1d1c1c8beb0eba524d4a803f7cf Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 23 Feb 2026 17:01:45 +0100 Subject: [PATCH 03/10] refactor(plans): enhance text record normalization logic and definitions - Introduced `TextRecordNormalizationDefs` to streamline key lookups with two maps for normalized and unnormalized keys. - Updated normalization functions to utilize the new definitions, improving clarity and efficiency. - Enhanced comments and invariants to better explain normalization operations and priority rules for handling records. --- ...cleanup_architecture_plan_50f1bdfb.plan.md | 92 ++++++++++++------- 1 file changed, 57 insertions(+), 35 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 54d45d5b6..a6c08f7d2 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -39,6 +39,11 @@ Every function is a pure in-memory data operation with no I/O dependencies. Each logical record type has a **normalized key** (the canonical primary key) and an ordered list of **unnormalized key variants** (legacy/alternative names tried as fallbacks): ```typescript +type TextRecordNormalizationDefs = { + byNormalizedKey: Map; + byUnnormalizedKey: Map; +}; + type TextRecordKeyDef = { /** Canonical key used in API responses and UI labels. e.g. "com.twitter" */ normalizedKey: string; @@ -135,6 +140,8 @@ type IndividualRecordNormalizationResult = { keyResult: KeyNormalizationResult; valueResult: ValueNormalizationResult; }; +// Invariant: if keyResult.op === "Unrecognized" then valueResult.op === "Unrecognized". +// A recognized key can never produce a valueResult with op "Unrecognized". ``` **Key function — normalize one record:** @@ -143,18 +150,18 @@ type IndividualRecordNormalizationResult = { function normalizeRecord( rawKey: string, rawValue: string | null, - registry: TextRecordNormalizationRegistry, + defs: TextRecordNormalizationDefs, ): IndividualRecordNormalizationResult ``` Logic: -- Look up `rawKey` in registry (by normalizedKey or unnormalizedKey). +- Look up `rawKey` in `defs` (via `byNormalizedKey` or `byUnnormalizedKey`). - If found: determine `keyResult` (AlreadyNormalized if rawKey === normalizedKey, else Normalized). - If `rawValue` is null: `valueResult` is Unnormalizable with reason `"no value set"`. - If `rawValue` is a string: run validate + normalize on rawValue. - - If succeeded: `valueResult` is AlreadyNormalized (when validate + normalize produced the same string) or Normalized (when the string changed). - - If failed: `valueResult` is Unnormalizable with the reason from the failing step. + - If succeeded: `valueResult` is `AlreadyNormalized` iff `normalizedValue === rawValue` (validate + normalize produced a value identical to the original rawValue); otherwise `valueResult` is `Normalized` (normalizedValue differs from rawValue). + - If failed: `valueResult` is `Unnormalizable` with the reason from the failing step. - If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. ### 1.5 Layer 2: Set-level normalization @@ -178,21 +185,26 @@ type RecordNormalizationOp = /** Another record already claimed this normalized key (lower priority variant) */ | "DuplicateNormalizedKey"; -type RecordNormalizationResult = { - op: RecordNormalizationOp; - individual: IndividualRecordNormalizationResult; - /** Present when op is "Normalized" */ - normalizedKey?: string; - normalizedValue?: string; - displayKey?: string; - displayValue?: string; - url?: string | null; -}; +type RecordNormalizationResult = + | { + op: "Normalized"; + individual: IndividualRecordNormalizationResult; + normalizedKey: string; + normalizedValue: string; + displayKey: string; + displayValue: string; + url: string | null; + } + | { + op: "UnrecognizedKeyAndValue" | "UnnormalizableValue" | "DuplicateNormalizedKey"; + individual: IndividualRecordNormalizationResult; + }; type NormalizedRecordSet = { /** - * Maps each normalized key to its winning RecordNormalizationResult. - * Also contains UnrecognizedKeyAndValue records (keyed by their rawKey). + * Two distinct kinds of entries are keyed here: + * - op "Normalized": keyed by normalizedKey (the canonical key for the winner). + * - op "UnrecognizedKeyAndValue": keyed by rawKey (passed through as-is). */ normalizedRecords: Record; /** @@ -203,18 +215,28 @@ type NormalizedRecordSet = { }; ``` -**Priority rule** when multiple records share the same normalized key: +**Priority rule** when multiple records share the same normalized key — two-pass algorithm: + +**Pass 1 — normalizable candidates** (value op is `AlreadyNormalized` or `Normalized`): +1. Among these, the record whose `rawKey === normalizedKey` wins first. +2. If none match the normalized key, the first in `unnormalizedKeys` order wins. +3. The winner gets `op: "Normalized"` and is placed in `normalizedRecords`. +4. Pass-1 losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. + +**Pass 2 — only if Pass 1 found no winner** (all candidates are `Unnormalizable`): +1. Among Unnormalizable candidates, the record whose `rawKey === normalizedKey` wins first. +2. If none match the normalized key, the first in `unnormalizedKeys` order wins. +3. The winner gets `op: "UnnormalizableValue"` and goes into `unnormalizedRecords` (no valid value exists, so `normalizedRecords` has no entry for this normalized key). +4. Pass-2 losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. -1. The record whose `rawKey === normalizedKey` wins unconditionally. -2. Among remaining candidates, priority follows the ordering in `unnormalizedKeys`. -3. Losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. +This ensures a valid value from any fallback key always beats an invalid or null value on the canonical key. **Key function — build the set:** ```typescript function normalizeRecordSet( records: Array<{ rawKey: string; rawValue: string | null }>, - registry: TextRecordNormalizationRegistry, + defs: TextRecordNormalizationDefs, ): NormalizedRecordSet ``` @@ -228,7 +250,7 @@ function stripNormalizationMetadata( ): Record ``` -Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. +Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. Unrecognized keys are always included even when `rawValue` is null — producing `{ [rawKey]: null }` — so the caller receives a complete picture of every key that was present in the input. ### 1.7 Pre-resolution: key expansion @@ -237,7 +259,7 @@ Before resolution, normalized keys are expanded into the full set of candidate k ```typescript function expandNormalizedKeys( normalizedKeys: readonly string[], - registry: TextRecordNormalizationRegistry, + defs: TextRecordNormalizationDefs, ): string[] ``` @@ -245,9 +267,9 @@ Returns: `[normalizedKey, ...unnormalizedKeys]` for each known key, deduplicated --- -## Phase 1: Transform registry +## Phase 1: Initial `TextRecordNormalizationDefs` -The registry maps normalized keys to `TextRecordKeyDef`. All lookups support both normalized keys and unnormalized variants. +The initial definitions cover the 9 most common ENS text record key types. All lookups support both normalized keys and unnormalized variants via the two maps on `TextRecordNormalizationDefs`. **Initial set of recognized keys:** @@ -267,14 +289,14 @@ The registry maps normalized keys to `TextRecordKeyDef`. All lookups support bot **Open question:** Should the normalized key for Twitter be `com.twitter` or `com.x` (reflecting the platform rebrand)? If `com.x`, what are the unnormalized variants to include? -### Registry construction +### `TextRecordNormalizationDefs` construction -The registry is constructed once at module initialization as a plain object with two internal lookup maps built from the definitions: +A `TextRecordNormalizationDefs` is built once from the array of `TextRecordKeyDef` objects. Its two maps provide O(1) lookup by either key form: -- `byNormalizedKey: Map` — keyed by `normalizedKey`. -- `byUnnormalizedKey: Map` — keyed by each entry in `unnormalizedKeys`, pointing to the owning def. +- `byNormalizedKey` — keyed by each `normalizedKey`. +- `byUnnormalizedKey` — keyed by each entry in `unnormalizedKeys`, pointing to the owning def. -At construction time the registry validates its own invariants and throws synchronously if any are violated (fail fast). No lazy initialization. +At construction time the invariants are validated and any violation throws synchronously (fail fast). No lazy initialization. ### Per-key transform specifications @@ -313,7 +335,7 @@ Unnormalized variants: `vnd.github`, `github` - Prefixed: `@alice` - github.com URL: `https://github.com/alice`, `http://github.com/alice`, `github.com/alice` -**Validation**: extracted username must match `^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$` (1–39 chars, alphanumeric and hyphens, no leading/trailing hyphen). +**Validation**: extracted username must match `^(?!.*--)[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$` (1–39 chars, alphanumeric and hyphens, no leading/trailing hyphen, no consecutive hyphens). **Canonical form**: lowercase username (e.g. `alice`). @@ -331,7 +353,7 @@ Unnormalized variants: `com.warpcast`, `Farcaster`, `farcaster` - Plain username: `alice` - Prefixed: `@alice` -- Warpcast URL: `https://warpcast.com/alice`, `http://warpcast.com/alice` +- Warpcast URL: `https://warpcast.com/alice`, `http://warpcast.com/alice`, `warpcast.com/alice` **Validation**: extracted username must match `^[a-z0-9][a-z0-9-]{0,15}$` (Farcaster usernames are lowercase-only, 1–16 chars). @@ -356,8 +378,8 @@ Discord supports two username formats: the new format (post-2023, no discriminat **Validation**: -- New format: must match `^[a-z0-9_.]{2,32}$`. -- Legacy format: must match `^.{2,32}#[0-9]{4}$`. +- New format: must match `^(?!.*\.\.)[a-z0-9_.]{2,32}$` (no consecutive periods). +- Legacy format: must match `^[^\x00-\x1F\x7F]{2,32}#[0-9]{4}$` (printable characters only before the `#` discriminator). **Canonical form**: the username as provided (lowercased for new format, `username#NNNN` preserved for legacy). @@ -519,5 +541,5 @@ The `records.texts` field always contains the stripped, clean output when `norma 1. **Normalized key for Twitter**: `com.twitter` or `com.x` (reflecting the platform rebrand)? What unnormalized variants should be included? 2. **Parameter name for metadata field**: `normalizationMetadata`, `includeNormalizationMetadata`, or another name? 3. **Unnormalizable behavior**: When a key is recognized but no candidate produces a valid value, should `records.texts` contain `null` for that key, or should the key be omitted from the response entirely? -4. `**displayValue` and `url` placement**: Should these enrichment fields be part of `records.texts` (when `normalize=true`) or only inside `normalizationMetadata`? Including them in the main response is more convenient for UI clients but changes the primary response shape significantly for all callers. +4. **`displayValue` and `url` placement**: Should these enrichment fields be part of `records.texts` (when `normalize=true`) or only inside `normalizationMetadata`? Including them in the main response is more convenient for UI clients but changes the primary response shape significantly for all callers. From 463048d262402575d640e6640cfa9c5743b98330 Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 23 Feb 2026 18:10:46 +0100 Subject: [PATCH 04/10] refactor(plans): improve null value handling and normalization logic - Updated `normalizeRecord` to return an `IndividualRecordNormalizationResult` for null values, clarifying its treatment in the normalization process. - Refined the `normalizedRecords` and `unnormalizedRecords` types to ensure only relevant operation values are included. - Enhanced comments to provide clearer explanations of the priority rules and preconditions for key normalization. --- ...cleanup_architecture_plan_50f1bdfb.plan.md | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index a6c08f7d2..88ea146de 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -88,7 +88,7 @@ type NormalizationResult = | { ok: false; reason: string }; // human-readable reason normalization failed ``` -**Null value handling**: if `rawValue` is `null` (the resolver has no value for that key), `normalizeRecord` produces `op: "Unnormalizable"` with `reason: "no value set"`. The key is still recognized; the record participates in the set-level priority contest but always loses to any record with a non-null rawValue. +**Null value handling**: if `rawValue` is `null` (the resolver has no value for that key), `normalizeRecord` returns an `IndividualRecordNormalizationResult` whose `valueResult` has `op: "Unnormalizable"` with `reason: "no value set"`. At the set level, a null-value record is treated identically to any other `Unnormalizable` record: it only participates in Pass 2 (which runs only when no candidate successfully normalizes), and loses to any record whose value op is `AlreadyNormalized` or `Normalized`. ### 1.2 Key normalization types (per key) @@ -205,25 +205,34 @@ type NormalizedRecordSet = { * Two distinct kinds of entries are keyed here: * - op "Normalized": keyed by normalizedKey (the canonical key for the winner). * - op "UnrecognizedKeyAndValue": keyed by rawKey (passed through as-is). + * No other op values appear in this map. */ - normalizedRecords: Record; + normalizedRecords: Record< + string, + Extract + >; /** * Records that did not make it into normalizedRecords: * UnnormalizableValue and DuplicateNormalizedKey. */ - unnormalizedRecords: RecordNormalizationResult[]; + unnormalizedRecords: Extract< + RecordNormalizationResult, + { op: "UnnormalizableValue" | "DuplicateNormalizedKey" } + >[]; }; ``` **Priority rule** when multiple records share the same normalized key — two-pass algorithm: **Pass 1 — normalizable candidates** (value op is `AlreadyNormalized` or `Normalized`): + 1. Among these, the record whose `rawKey === normalizedKey` wins first. 2. If none match the normalized key, the first in `unnormalizedKeys` order wins. 3. The winner gets `op: "Normalized"` and is placed in `normalizedRecords`. 4. Pass-1 losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. **Pass 2 — only if Pass 1 found no winner** (all candidates are `Unnormalizable`): + 1. Among Unnormalizable candidates, the record whose `rawKey === normalizedKey` wins first. 2. If none match the normalized key, the first in `unnormalizedKeys` order wins. 3. The winner gets `op: "UnnormalizableValue"` and goes into `unnormalizedRecords` (no valid value exists, so `normalizedRecords` has no entry for this normalized key). @@ -263,7 +272,9 @@ function expandNormalizedKeys( ): string[] ``` -Returns: `[normalizedKey, ...unnormalizedKeys]` for each known key, deduplicated. Unknown keys are kept as-is. +**Precondition**: no element of `normalizedKeys` may be an unnormalized key variant (i.e. present in `defs.byUnnormalizedKey` but not in `defs.byNormalizedKey`). Passing `vnd.twitter` where `com.twitter` is expected is a caller error and must throw synchronously with a clear message listing the offending keys. Completely unknown keys (absent from both maps) are not an error — they are passed through as-is, supporting arbitrary user-defined keys. + +Returns: `[normalizedKey, ...unnormalizedKeys]` for each key found in `defs.byNormalizedKey`, followed by any unrecognized keys as-is. The result is deduplicated by first-occurrence: if the same key appears more than once, its first position is kept and subsequent occurrences are dropped. Ordering is otherwise stable and deterministic, ensuring consistent multicall construction and reproducible traces. --- @@ -398,7 +409,7 @@ Unnormalized variants: `telegram`, `com.telegram`, `Telegram` - Plain username: `alice` - Prefixed: `@alice` - t.me URL: `https://t.me/alice`, `http://t.me/alice`, `t.me/alice` -- telegram.me URL: `https://telegram.me/alice` +- telegram.me URL: `https://telegram.me/alice`, `http://telegram.me/alice`, `telegram.me/alice` **Validation**: extracted username must match `^[a-zA-Z0-9_]{5,32}$`. @@ -436,7 +447,7 @@ Unnormalized variants: `URL`, `Website`, `website` **Accepted input formats**: any string that parses as a valid URL with `http` or `https` scheme. -**Validation**: `new URL(value)` must not throw and `scheme` must be `http:` or `https:`. +**Validation**: `new URL(value)` must not throw and `url.protocol` must be `"http:"` or `"https:"`. **Canonical form**: `new URL(value).href` (the browser-canonical URL string, e.g. trailing slash normalized). @@ -542,4 +553,5 @@ The `records.texts` field always contains the stripped, clean output when `norma 2. **Parameter name for metadata field**: `normalizationMetadata`, `includeNormalizationMetadata`, or another name? 3. **Unnormalizable behavior**: When a key is recognized but no candidate produces a valid value, should `records.texts` contain `null` for that key, or should the key be omitted from the response entirely? 4. **`displayValue` and `url` placement**: Should these enrichment fields be part of `records.texts` (when `normalize=true`) or only inside `normalizationMetadata`? Including them in the main response is more convenient for UI clients but changes the primary response shape significantly for all callers. +5. **Client requesting an unnormalized key directly**: If a client passes `texts=vnd.twitter` (an unnormalized variant) instead of `texts=com.twitter`, should `expandNormalizedKeys` throw immediately (current spec — fail fast, caller error), or silently map it to the canonical key and expand from there (more forgiving for legacy integrations)? The issue does not address this case. From ea56f46839453bb27fb62535ae45653282db73f7 Mon Sep 17 00:00:00 2001 From: djstrong Date: Wed, 25 Feb 2026 13:07:48 +0100 Subject: [PATCH 05/10] refactor(plans): clarify normalization logic and handling of unrecognized records - Expanded the logic for handling unrecognized keys and values in the normalization process, ensuring that duplicates are correctly categorized in `unnormalizedRecords`. - Updated validation rules for extracted usernames to enforce a minimum length of 4 characters. - Enhanced comments to clarify the treatment of `Unnormalizable` records and the priority of canonical keys in normalization decisions. --- ...-cleanup_architecture_plan_50f1bdfb.plan.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 88ea146de..97b14956d 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -229,7 +229,8 @@ type NormalizedRecordSet = { 1. Among these, the record whose `rawKey === normalizedKey` wins first. 2. If none match the normalized key, the first in `unnormalizedKeys` order wins. 3. The winner gets `op: "Normalized"` and is placed in `normalizedRecords`. -4. Pass-1 losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. +4. Pass-1 losers (normalizable but not the winner) get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. +5. All `Unnormalizable` candidates are excluded from Pass 1. If Pass 1 found a winner, each excluded `Unnormalizable` record gets `op: "UnnormalizableValue"` and goes into `unnormalizedRecords`. Their raw values remain accessible via `individual.valueResult.rawValue` when `normalizationMetadata` is requested. **Pass 2 — only if Pass 1 found no winner** (all candidates are `Unnormalizable`): @@ -238,7 +239,7 @@ type NormalizedRecordSet = { 3. The winner gets `op: "UnnormalizableValue"` and goes into `unnormalizedRecords` (no valid value exists, so `normalizedRecords` has no entry for this normalized key). 4. Pass-2 losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. -This ensures a valid value from any fallback key always beats an invalid or null value on the canonical key. +This ensures a valid value from any fallback key always beats an invalid or null value on the canonical key. Example: if `com.twitter = ""` (invalid) and `vnd.twitter = "alice"` (valid), Pass 1 selects `vnd.twitter` as winner; `com.twitter` gets `op: "UnnormalizableValue"` in `unnormalizedRecords` with its bad raw value still accessible via `normalizationMetadata`. **Key function — build the set:** @@ -326,7 +327,7 @@ Unnormalized variants: `vnd.twitter`, `twitter`, `Twitter` - twitter.com URL: `https://twitter.com/alice`, `http://twitter.com/alice`, `twitter.com/alice` - x.com URL: `https://x.com/alice`, `http://x.com/alice`, `x.com/alice` -**Validation**: extracted username must match `^[a-zA-Z0-9_]{1,15}$`. +**Validation**: extracted username must match `^[a-zA-Z0-9_]{4,15}$`. **Canonical form**: lowercase username without `@` prefix (e.g. `alice`). @@ -494,12 +495,6 @@ Avatar values are complex — they can be HTTPS URLs, IPFS URIs, NFT references **url**: for `https://`/`http://` — same as value; for `ipfs://` — convert to `https://ipfs.io/ipfs/{cid}`; for `eip155:` and `data:` URIs — `null` (requires off-chain resolution beyond this layer). ---- - -> **Note**: all existing code at `packages/ensnode-sdk/src/resolution/auto-cleanup/` will be deleted before implementation of this specification begins. No migration is required. - ---- - ## Phase 3: API integration ### Design decisions @@ -552,6 +547,9 @@ The `records.texts` field always contains the stripped, clean output when `norma 1. **Normalized key for Twitter**: `com.twitter` or `com.x` (reflecting the platform rebrand)? What unnormalized variants should be included? 2. **Parameter name for metadata field**: `normalizationMetadata`, `includeNormalizationMetadata`, or another name? 3. **Unnormalizable behavior**: When a key is recognized but no candidate produces a valid value, should `records.texts` contain `null` for that key, or should the key be omitted from the response entirely? -4. **`displayValue` and `url` placement**: Should these enrichment fields be part of `records.texts` (when `normalize=true`) or only inside `normalizationMetadata`? Including them in the main response is more convenient for UI clients but changes the primary response shape significantly for all callers. +4. `**displayValue` and `url` placement**: Should these enrichment fields be part of `records.texts` (when `normalize=true`) or only inside `normalizationMetadata`? Including them in the main response is more convenient for UI clients but changes the primary response shape significantly for all callers. 5. **Client requesting an unnormalized key directly**: If a client passes `texts=vnd.twitter` (an unnormalized variant) instead of `texts=com.twitter`, should `expandNormalizedKeys` throw immediately (current spec — fail fast, caller error), or silently map it to the canonical key and expand from there (more forgiving for legacy integrations)? The issue does not address this case. +6. **Placement of `UnrecognizedKeyAndValue` records**: The current spec places them in `normalizedRecords` (keyed by `rawKey`) so that `stripNormalizationMetadata` only needs to iterate one map to produce the full output. The issue's own description says `normalizedRecords` maps *normalized keys* and `unnormalizedRecords` holds "all records that were unnormalized for one reason or another" — which by that framing would put unrecognized records in `unnormalizedRecords`. Decision: should `unnormalizedRecords` mean "records that don't appear in output" (current spec) or "records that weren't normalized" (issue's framing)? +7. **Verify validation rules against each service's official constraints**: The regexes and accepted input formats in this spec were derived from best-effort research and may not match each platform's current actual rules. Before finalising implementation, verify against official documentation or source code for: Twitter/X (username charset, 15-char limit), GitHub (39-char limit, hyphen rules), Farcaster (lowercase-only, length), Discord (new-format charset, period rules, legacy discriminator format), Telegram (5–32 chars, charset), Reddit (3–20 chars, charset), email (RFC compliance level), avatar (supported URI schemes). Flag any discrepancy as a bug in the transform definition. +8. **Canonical key priority when multiple valid values exist**: The issue states "the rawKey equal to the normalizedKey always gets top priority." But if both `com.twitter = "alice"` and `vnd.twitter = "bob"` are valid normalized values, should `com.twitter` win unconditionally (current spec — structural rule, ignores recency), or should priority be purely order-based (treating `normalizedKey` as simply position 0 in a unified key sequence, allowing it to be overridden per definition)? The issue states the rule but does not justify why the canonical key should beat other valid values. From c3c15722ffa5e402a8a75e86d68fba6e50d31f66 Mon Sep 17 00:00:00 2001 From: djstrong Date: Wed, 25 Feb 2026 13:17:01 +0100 Subject: [PATCH 06/10] refactor(plans): clarify handling of unrecognized keys and values in normalization - Expanded the logic for unrecognized `rawKey` handling, ensuring the first occurrence is categorized as `UnrecognizedKeyAndValue` and subsequent duplicates are marked as `DuplicateNormalizedKey`. - Enhanced comments to clarify the implications of normalization decisions and the treatment of records in `normalizedRecords` and `unnormalizedRecords`. --- .cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 97b14956d..992b58363 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -162,7 +162,7 @@ Logic: - If `rawValue` is a string: run validate + normalize on rawValue. - If succeeded: `valueResult` is `AlreadyNormalized` iff `normalizedValue === rawValue` (validate + normalize produced a value identical to the original rawValue); otherwise `valueResult` is `Normalized` (normalizedValue differs from rawValue). - If failed: `valueResult` is `Unnormalizable` with the reason from the failing step. -- If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. +- If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. At the set level, the first record with a given unrecognized `rawKey` is written into `normalizedRecords` as `UnrecognizedKeyAndValue`; any subsequent record with the same `rawKey` gets `op: "DuplicateNormalizedKey"` and goes into `unnormalizedRecords` (consistent with how recognized-key duplicates are handled). ### 1.5 Layer 2: Set-level normalization @@ -551,5 +551,4 @@ The `records.texts` field always contains the stripped, clean output when `norma 5. **Client requesting an unnormalized key directly**: If a client passes `texts=vnd.twitter` (an unnormalized variant) instead of `texts=com.twitter`, should `expandNormalizedKeys` throw immediately (current spec — fail fast, caller error), or silently map it to the canonical key and expand from there (more forgiving for legacy integrations)? The issue does not address this case. 6. **Placement of `UnrecognizedKeyAndValue` records**: The current spec places them in `normalizedRecords` (keyed by `rawKey`) so that `stripNormalizationMetadata` only needs to iterate one map to produce the full output. The issue's own description says `normalizedRecords` maps *normalized keys* and `unnormalizedRecords` holds "all records that were unnormalized for one reason or another" — which by that framing would put unrecognized records in `unnormalizedRecords`. Decision: should `unnormalizedRecords` mean "records that don't appear in output" (current spec) or "records that weren't normalized" (issue's framing)? 7. **Verify validation rules against each service's official constraints**: The regexes and accepted input formats in this spec were derived from best-effort research and may not match each platform's current actual rules. Before finalising implementation, verify against official documentation or source code for: Twitter/X (username charset, 15-char limit), GitHub (39-char limit, hyphen rules), Farcaster (lowercase-only, length), Discord (new-format charset, period rules, legacy discriminator format), Telegram (5–32 chars, charset), Reddit (3–20 chars, charset), email (RFC compliance level), avatar (supported URI schemes). Flag any discrepancy as a bug in the transform definition. -8. **Canonical key priority when multiple valid values exist**: The issue states "the rawKey equal to the normalizedKey always gets top priority." But if both `com.twitter = "alice"` and `vnd.twitter = "bob"` are valid normalized values, should `com.twitter` win unconditionally (current spec — structural rule, ignores recency), or should priority be purely order-based (treating `normalizedKey` as simply position 0 in a unified key sequence, allowing it to be overridden per definition)? The issue states the rule but does not justify why the canonical key should beat other valid values. From 67f17c4c3bb62cbd4034ebebe8e8ceef83c1742a Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 2 Mar 2026 23:33:17 +0100 Subject: [PATCH 07/10] refactor(plans): update normalization logic and documentation for key handling --- ...cleanup_architecture_plan_50f1bdfb.plan.md | 85 +++++++++++++------ 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 992b58363..0dc16af03 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -10,11 +10,13 @@ isProject: false ## Goals (from issue #1061) 1. Clients can request that all resolver records are **validated** (value matches expected format) and **normalized** (value is in a single canonical form regardless of how it was stored). -2. Keys are also normalized: legacy/fallback key variants (e.g. `vnd.twitter`, `twitter`) are resolved and the value is returned under the canonical key (e.g. `com.twitter`). +2. Keys are also normalized: legacy/fallback key variants (e.g. `com.twitter`, `vnd.twitter`, `twitter`) are resolved and the value is returned under the canonical key (e.g. `com.x`). 3. Unknown keys (no normalization logic defined) pass through unchanged. 4. Full normalization metadata is optionally returned so UIs can inspect and explain how each record was processed. 5. Normalized records carry UI-friendly enrichment: `displayKey`, `displayValue`, `url`. +**Phase 1 scope (this implementation):** Pure in-memory library only. Input = a raw set of key/value pairs (already resolved from a resolver). Output = a normalized record set and optionally stripped/enriched output. No I/O, no resolution logic, no API changes, no client key-request behavior. + --- ## Architecture: Phase separation @@ -45,7 +47,7 @@ type TextRecordNormalizationDefs = { }; type TextRecordKeyDef = { - /** Canonical key used in API responses and UI labels. e.g. "com.twitter" */ + /** Canonical key used in API responses and UI labels. e.g. "com.x" */ normalizedKey: string; /** Human-friendly label for UIs. e.g. "Twitter" */ displayKey: string; @@ -162,7 +164,7 @@ Logic: - If `rawValue` is a string: run validate + normalize on rawValue. - If succeeded: `valueResult` is `AlreadyNormalized` iff `normalizedValue === rawValue` (validate + normalize produced a value identical to the original rawValue); otherwise `valueResult` is `Normalized` (normalizedValue differs from rawValue). - If failed: `valueResult` is `Unnormalizable` with the reason from the failing step. -- If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. At the set level, the first record with a given unrecognized `rawKey` is written into `normalizedRecords` as `UnrecognizedKeyAndValue`; any subsequent record with the same `rawKey` gets `op: "DuplicateNormalizedKey"` and goes into `unnormalizedRecords` (consistent with how recognized-key duplicates are handled). +- If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. At the set level, the first record with a given unrecognized `rawKey` is written into `records` as `UnrecognizedKeyAndValue`; any subsequent record with the same `rawKey` gets `op: "DuplicateNormalizedKey"` and goes into `excludedRecords` (consistent with how recognized-key duplicates are handled). ### 1.5 Layer 2: Set-level normalization @@ -202,20 +204,20 @@ type RecordNormalizationResult = type NormalizedRecordSet = { /** - * Two distinct kinds of entries are keyed here: + * Records that appear in the output. Two distinct kinds of entries are keyed here: * - op "Normalized": keyed by normalizedKey (the canonical key for the winner). * - op "UnrecognizedKeyAndValue": keyed by rawKey (passed through as-is). * No other op values appear in this map. */ - normalizedRecords: Record< + records: Record< string, Extract >; /** - * Records that did not make it into normalizedRecords: + * Records excluded from the main output: * UnnormalizableValue and DuplicateNormalizedKey. */ - unnormalizedRecords: Extract< + excludedRecords: Extract< RecordNormalizationResult, { op: "UnnormalizableValue" | "DuplicateNormalizedKey" } >[]; @@ -228,18 +230,18 @@ type NormalizedRecordSet = { 1. Among these, the record whose `rawKey === normalizedKey` wins first. 2. If none match the normalized key, the first in `unnormalizedKeys` order wins. -3. The winner gets `op: "Normalized"` and is placed in `normalizedRecords`. -4. Pass-1 losers (normalizable but not the winner) get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. -5. All `Unnormalizable` candidates are excluded from Pass 1. If Pass 1 found a winner, each excluded `Unnormalizable` record gets `op: "UnnormalizableValue"` and goes into `unnormalizedRecords`. Their raw values remain accessible via `individual.valueResult.rawValue` when `normalizationMetadata` is requested. +3. The winner gets `op: "Normalized"` and is placed in `records`. +4. Pass-1 losers (normalizable but not the winner) get `op: "DuplicateNormalizedKey"` and go into `excludedRecords`. +5. All `Unnormalizable` candidates are excluded from Pass 1. If Pass 1 found a winner, each excluded `Unnormalizable` record gets `op: "UnnormalizableValue"` and goes into `excludedRecords`. Their raw values remain accessible via `individual.valueResult.rawValue`. **Pass 2 — only if Pass 1 found no winner** (all candidates are `Unnormalizable`): 1. Among Unnormalizable candidates, the record whose `rawKey === normalizedKey` wins first. 2. If none match the normalized key, the first in `unnormalizedKeys` order wins. -3. The winner gets `op: "UnnormalizableValue"` and goes into `unnormalizedRecords` (no valid value exists, so `normalizedRecords` has no entry for this normalized key). -4. Pass-2 losers get `op: "DuplicateNormalizedKey"` and go into `unnormalizedRecords`. +3. The winner gets `op: "UnnormalizableValue"` and goes into `excludedRecords` (no valid value exists, so `records` has no entry for this normalized key). +4. Pass-2 losers get `op: "DuplicateNormalizedKey"` and go into `excludedRecords`. -This ensures a valid value from any fallback key always beats an invalid or null value on the canonical key. Example: if `com.twitter = ""` (invalid) and `vnd.twitter = "alice"` (valid), Pass 1 selects `vnd.twitter` as winner; `com.twitter` gets `op: "UnnormalizableValue"` in `unnormalizedRecords` with its bad raw value still accessible via `normalizationMetadata`. +This ensures a valid value from any fallback key always beats an invalid or null value on the canonical key. Example: if `com.x = ""` (invalid) and `vnd.twitter = "alice"` (valid), Pass 1 selects `vnd.twitter` as winner; `com.x` gets `op: "UnnormalizableValue"` in `excludedRecords` with its bad raw value still accessible via `individual.valueResult.rawValue`. **Key function — build the set:** @@ -260,9 +262,27 @@ function stripNormalizationMetadata( ): Record ``` -Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. Unrecognized keys are always included even when `rawValue` is null — producing `{ [rawKey]: null }` — so the caller receives a complete picture of every key that was present in the input. +Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. For recognized keys where no candidate produced a valid value (winner is "UnnormalizableValue" in `excludedRecords`), the key is included with value `null` (e.g. `{ "com.x": null }`), so callers can distinguish "key present but invalid" from "key not present." Unrecognized keys are always included even when `rawValue` is null — producing `{ [rawKey]: null }` — so the caller receives a complete picture of every key that was present in the input. Thus the result is built from both `set.records` and, for each "UnnormalizableValue" in `set.excludedRecords`, an entry `normalizedKey → null`. + +For clients that want UI-friendly enrichment (displayValue, url) without holding the full `NormalizedRecordSet`: + +```typescript +type EnrichedRecord = { + value: string | null; + displayValue: string | null; + url: string | null; +}; + +function stripNormalizationMetadataWithEnrichment( + set: NormalizedRecordSet, +): Record +``` + +Returns the same key set as `stripNormalizationMetadata`, but each entry includes `displayValue` and `url` derived from the def's enrichment functions when the key is recognized and the value is normalized. For "UnrecognizedKeyAndValue" and "UnnormalizableValue" (key present but invalid) records, `displayValue` and `url` are `null`. This allows UI callers to render enriched records without needing to inspect the full metadata. -### 1.7 Pre-resolution: key expansion +### 1.7 Pre-resolution: key expansion *(Phase 2+ — out of scope for Phase 1)* + +> **Note:** Key expansion and client-requested key behavior are out of scope for Phase 1. Phase 1 assumes input is already a set of raw key/value pairs obtained from some prior resolution step. Key expansion into candidate keys before resolution is handled in Phase 2/3. Before resolution, normalized keys are expanded into the full set of candidate keys that the resolver should be queried for: @@ -288,7 +308,7 @@ The initial definitions cover the 9 most common ENS text record key types. All l | Normalized key | Display key | Unnormalized key variants | | --------------------------- | ----------- | ---------------------------------------- | -| `com.twitter` (or `com.x`?) | Twitter / X | `vnd.twitter`, `twitter`, `Twitter` | +| `com.x` | X (Twitter) | `com.twitter`, `vnd.twitter`, `twitter`, `Twitter` | | `com.github` | GitHub | `vnd.github`, `github` | | `xyz.farcaster` | Farcaster | `com.warpcast`, `Farcaster`, `farcaster` | | `com.discord` | Discord | `discord` | @@ -299,8 +319,6 @@ The initial definitions cover the 9 most common ENS text record key types. All l | `avatar` | Avatar | `Avatar` | -**Open question:** Should the normalized key for Twitter be `com.twitter` or `com.x` (reflecting the platform rebrand)? If `com.x`, what are the unnormalized variants to include? - ### `TextRecordNormalizationDefs` construction A `TextRecordNormalizationDefs` is built once from the array of `TextRecordKeyDef` objects. Its two maps provide O(1) lookup by either key form: @@ -316,9 +334,9 @@ The following specifies validation, normalization, and UI enrichment for each of --- -#### Twitter / X (`com.twitter` or `com.x` — see open question) +#### X / Twitter (`com.x`) -Unnormalized variants: `vnd.twitter`, `twitter`, `Twitter` +Unnormalized variants: `com.twitter`, `vnd.twitter`, `twitter`, `Twitter` **Accepted input formats**: @@ -542,13 +560,26 @@ The `records.texts` field always contains the stripped, clean output when `norma --- +## Scope of initial implementation + +When this plan is accepted, **only Phase 1** is implemented. Phases 2 and 3 are not in scope for the current implementation. + +**In scope:** +- Pure in-memory data model and functions in `@ensnode/ensnode-sdk` (`packages/ensnode-sdk/src/resolution/normalization/`). +- `normalizeRecord`, `normalizeRecordSet`, `stripNormalizationMetadata`, `stripNormalizationMetadataWithEnrichment`. +- Initial `TextRecordNormalizationDefs` covering the 9 recognized key types. +- Unit tests for all functions and transform definitions. + +**Out of scope:** +- API changes, query parameters, HTTP handlers. +- Client-requested key behavior and `expandNormalizedKeys` usage in request handling. +- Integration with the indexed or RPC resolution paths (Phase 3). + +--- + ## Open questions -1. **Normalized key for Twitter**: `com.twitter` or `com.x` (reflecting the platform rebrand)? What unnormalized variants should be included? -2. **Parameter name for metadata field**: `normalizationMetadata`, `includeNormalizationMetadata`, or another name? -3. **Unnormalizable behavior**: When a key is recognized but no candidate produces a valid value, should `records.texts` contain `null` for that key, or should the key be omitted from the response entirely? -4. `**displayValue` and `url` placement**: Should these enrichment fields be part of `records.texts` (when `normalize=true`) or only inside `normalizationMetadata`? Including them in the main response is more convenient for UI clients but changes the primary response shape significantly for all callers. -5. **Client requesting an unnormalized key directly**: If a client passes `texts=vnd.twitter` (an unnormalized variant) instead of `texts=com.twitter`, should `expandNormalizedKeys` throw immediately (current spec — fail fast, caller error), or silently map it to the canonical key and expand from there (more forgiving for legacy integrations)? The issue does not address this case. -6. **Placement of `UnrecognizedKeyAndValue` records**: The current spec places them in `normalizedRecords` (keyed by `rawKey`) so that `stripNormalizationMetadata` only needs to iterate one map to produce the full output. The issue's own description says `normalizedRecords` maps *normalized keys* and `unnormalizedRecords` holds "all records that were unnormalized for one reason or another" — which by that framing would put unrecognized records in `unnormalizedRecords`. Decision: should `unnormalizedRecords` mean "records that don't appear in output" (current spec) or "records that weren't normalized" (issue's framing)? -7. **Verify validation rules against each service's official constraints**: The regexes and accepted input formats in this spec were derived from best-effort research and may not match each platform's current actual rules. Before finalising implementation, verify against official documentation or source code for: Twitter/X (username charset, 15-char limit), GitHub (39-char limit, hyphen rules), Farcaster (lowercase-only, length), Discord (new-format charset, period rules, legacy discriminator format), Telegram (5–32 chars, charset), Reddit (3–20 chars, charset), email (RFC compliance level), avatar (supported URI schemes). Flag any discrepancy as a bug in the transform definition. +1. **Parameter name for metadata field** *(Phase 3 only — not applicable to Phase 1)*: `normalizationMetadata`, `includeNormalizationMetadata`, or another name? +2. **Client requesting an unnormalized key directly** *(Phase 2/3 — out of scope for Phase 1)*: If a client passes `texts=vnd.twitter` (an unnormalized variant) instead of `texts=com.x`, should `expandNormalizedKeys` throw immediately (current spec — fail fast, caller error), or silently map it to the canonical key and expand from there (more forgiving for legacy integrations)? The issue does not address this case. +3. **Verify validation rules against each service's official constraints**: The regexes and accepted input formats in this spec were derived from best-effort research and may not match each platform's current actual rules. Before finalising implementation, verify against official documentation or source code for: Twitter/X (username charset, 15-char limit), GitHub (39-char limit, hyphen rules), Farcaster (lowercase-only, length), Discord (new-format charset, period rules, legacy discriminator format), Telegram (5–32 chars, charset), Reddit (3–20 chars, charset), email (RFC compliance level), avatar (supported URI schemes). Flag any discrepancy as a bug in the transform definition. Rules will be verified using actual data. From 844c0651336408917492db3979f54e923b1766bb Mon Sep 17 00:00:00 2001 From: djstrong Date: Tue, 3 Mar 2026 00:23:50 +0100 Subject: [PATCH 08/10] refactor(plans): enhance documentation and clarify duplicate key handling in normalization --- ...cleanup_architecture_plan_50f1bdfb.plan.md | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 0dc16af03..0851e24ff 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -17,6 +17,8 @@ isProject: false **Phase 1 scope (this implementation):** Pure in-memory library only. Input = a raw set of key/value pairs (already resolved from a resolver). Output = a normalized record set and optionally stripped/enriched output. No I/O, no resolution logic, no API changes, no client key-request behavior. +**Assumption — unique keys in resolver records:** Resolver records (the key/value set passed into the normalizer) are assumed to have unique keys: each key appears at most once. Duplicate handling in this spec applies only to *recognized* keys: multiple *different* raw keys (e.g. `com.twitter`, `vnd.twitter`) can map to the same normalized key (e.g. `com.x`); only one record wins per normalized key. The `DuplicateNormalizedKey` op is used only for those recognized-key losers, not for repeated occurrences of the same raw key. By contrast, the *requested* keys (e.g. the list passed to key expansion / multicall) may contain duplicates—the user may ask for the same key more than once—and the generated query should deduplicate so each key is requested at most once. + --- ## Architecture: Phase separation @@ -164,11 +166,11 @@ Logic: - If `rawValue` is a string: run validate + normalize on rawValue. - If succeeded: `valueResult` is `AlreadyNormalized` iff `normalizedValue === rawValue` (validate + normalize produced a value identical to the original rawValue); otherwise `valueResult` is `Normalized` (normalizedValue differs from rawValue). - If failed: `valueResult` is `Unnormalizable` with the reason from the failing step. -- If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. At the set level, the first record with a given unrecognized `rawKey` is written into `records` as `UnrecognizedKeyAndValue`; any subsequent record with the same `rawKey` gets `op: "DuplicateNormalizedKey"` and goes into `excludedRecords` (consistent with how recognized-key duplicates are handled). +- If not found: `keyResult` is Unrecognized; `valueResult` is Unrecognized. ### 1.5 Layer 2: Set-level normalization -After individually normalizing each record, the set is consolidated so only one normalized key is retained as the "winner" when multiple records map to the same normalized key. +After individually normalizing each record, the set is consolidated so only one normalized key is retained as the "winner" when multiple records map to the same normalized key. Duplicate detection requires set context: if a key is not found in defs, at the set level the first record with that unrecognized `rawKey` is written into `records` as `UnrecognizedKeyAndValue`; subsequent records with the same `rawKey` are marked `op: "DuplicateNormalizedKey"` and placed into `excludedRecords`. ```typescript type RecordNormalizationOp = @@ -262,6 +264,8 @@ function stripNormalizationMetadata( ): Record ``` +**Return type note:** The type is intentionally `Record` because the result is a single flat object that mixes (1) entries from "Normalized" records, where the value is always a string, and (2) entries from "UnrecognizedKeyAndValue" or "UnnormalizableValue", where the value can be null. TypeScript cannot express this per-key distinction in one record type, so the union `string | null` is used for all keys. + Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. For recognized keys where no candidate produced a valid value (winner is "UnnormalizableValue" in `excludedRecords`), the key is included with value `null` (e.g. `{ "com.x": null }`), so callers can distinguish "key present but invalid" from "key not present." Unrecognized keys are always included even when `rawValue` is null — producing `{ [rawKey]: null }` — so the caller receives a complete picture of every key that was present in the input. Thus the result is built from both `set.records` and, for each "UnnormalizableValue" in `set.excludedRecords`, an entry `normalizedKey → null`. For clients that want UI-friendly enrichment (displayValue, url) without holding the full `NormalizedRecordSet`: @@ -293,9 +297,9 @@ function expandNormalizedKeys( ): string[] ``` -**Precondition**: no element of `normalizedKeys` may be an unnormalized key variant (i.e. present in `defs.byUnnormalizedKey` but not in `defs.byNormalizedKey`). Passing `vnd.twitter` where `com.twitter` is expected is a caller error and must throw synchronously with a clear message listing the offending keys. Completely unknown keys (absent from both maps) are not an error — they are passed through as-is, supporting arbitrary user-defined keys. +**Precondition**: no element of `normalizedKeys` may be an unnormalized key variant (i.e. present in `defs.byUnnormalizedKey` but not in `defs.byNormalizedKey`). Passing `vnd.twitter` where `com.x` is expected is a caller error and must throw synchronously with a clear message listing the offending keys. Completely unknown keys (absent from both maps) are not an error — they are passed through as-is, supporting arbitrary user-defined keys. -Returns: `[normalizedKey, ...unnormalizedKeys]` for each key found in `defs.byNormalizedKey`, followed by any unrecognized keys as-is. The result is deduplicated by first-occurrence: if the same key appears more than once, its first position is kept and subsequent occurrences are dropped. Ordering is otherwise stable and deterministic, ensuring consistent multicall construction and reproducible traces. +Returns: `[normalizedKey, ...unnormalizedKeys]` for each key found in `defs.byNormalizedKey`, followed by any unrecognized keys as-is. The result is deduplicated by first-occurrence: the caller may request the same key more than once (e.g. `["com.x", "com.twitter", "com.x"]`); the generated query list must contain each key at most once, so the first occurrence is kept and subsequent duplicates are dropped. Ordering is otherwise stable and deterministic, ensuring consistent multicall construction and reproducible traces. --- @@ -306,17 +310,17 @@ The initial definitions cover the 9 most common ENS text record key types. All l **Initial set of recognized keys:** -| Normalized key | Display key | Unnormalized key variants | -| --------------------------- | ----------- | ---------------------------------------- | -| `com.x` | X (Twitter) | `com.twitter`, `vnd.twitter`, `twitter`, `Twitter` | -| `com.github` | GitHub | `vnd.github`, `github` | -| `xyz.farcaster` | Farcaster | `com.warpcast`, `Farcaster`, `farcaster` | -| `com.discord` | Discord | `discord` | -| `org.telegram` | Telegram | `telegram`, `com.telegram`, `Telegram` | -| `com.reddit` | Reddit | `reddit` | -| `url` | Website | `URL`, `Website`, `website` | -| `email` | Email | `Email` | -| `avatar` | Avatar | `Avatar` | +| Normalized key | Display key | Unnormalized key variants | +| --------------- | ----------- | -------------------------------------------------- | +| `com.x` | X (Twitter) | `com.twitter`, `vnd.twitter`, `twitter`, `Twitter` | +| `com.github` | GitHub | `vnd.github`, `github` | +| `xyz.farcaster` | Farcaster | `com.warpcast`, `Farcaster`, `farcaster` | +| `com.discord` | Discord | `discord` | +| `org.telegram` | Telegram | `telegram`, `com.telegram`, `Telegram` | +| `com.reddit` | Reddit | `reddit` | +| `url` | Website | `URL`, `Website`, `website` | +| `email` | Email | `Email` | +| `avatar` | Avatar | `Avatar` | ### `TextRecordNormalizationDefs` construction @@ -409,7 +413,7 @@ Discord supports two username formats: the new format (post-2023, no discriminat **Validation**: - New format: must match `^(?!.*\.\.)[a-z0-9_.]{2,32}$` (no consecutive periods). -- Legacy format: must match `^[^\x00-\x1F\x7F]{2,32}#[0-9]{4}$` (printable characters only before the `#` discriminator). +- Legacy format: must match `^[^\x00-\x1F\x7F#]{2,32}#[0-9]{4}$` (printable characters only before the `#` discriminator; `#` is excluded from the username portion so inputs like `alice#bob#1234` are rejected). **Canonical form**: the username as provided (lowercased for new format, `username#NNNN` preserved for legacy). @@ -430,7 +434,7 @@ Unnormalized variants: `telegram`, `com.telegram`, `Telegram` - t.me URL: `https://t.me/alice`, `http://t.me/alice`, `t.me/alice` - telegram.me URL: `https://telegram.me/alice`, `http://telegram.me/alice`, `telegram.me/alice` -**Validation**: extracted username must match `^[a-zA-Z0-9_]{5,32}$`. +**Validation**: extracted username must match `^(?!_)(?!.*_$)(?!.*__)[a-zA-Z0-9_]{5,32}$` (5–32 chars from `[a-zA-Z0-9_]`, but no leading underscore, no trailing underscore, and no consecutive underscores; e.g. `_alice`, `alice_`, and `alice__bob` are rejected). **Canonical form**: lowercase username (e.g. `alice`). @@ -527,6 +531,7 @@ For the RPC path, ENS resolution already uses a multicall pattern: all record lo Two query parameters on `GET /records/:name`: +> **Breaking change:** The default `normalize=true` alters the response shape (e.g. `records.texts` will contain normalized keys and values instead of raw resolver output). Callers that rely on the previous raw key/value shape must pass `normalize=false` explicitly. | Parameter | Type | Default | Description | | ----------------------- | ------- | ------- | -------------------------------------------------------------------------------------------------------------- | @@ -556,7 +561,7 @@ interface ResolveRecordsResponse { } ``` -The `records.texts` field always contains the stripped, clean output when `normalize=true` (normalized keys mapping to normalized values, unrecognized keys passed through). +When `normalize=true`, the `records.texts` field contains the stripped, clean output (normalized keys mapping to normalized values, unrecognized keys passed through). When `normalize=false`, the expansion/normalization pipeline is not run and `records` keep the original resolved shape. --- @@ -565,12 +570,14 @@ The `records.texts` field always contains the stripped, clean output when `norma When this plan is accepted, **only Phase 1** is implemented. Phases 2 and 3 are not in scope for the current implementation. **In scope:** + - Pure in-memory data model and functions in `@ensnode/ensnode-sdk` (`packages/ensnode-sdk/src/resolution/normalization/`). - `normalizeRecord`, `normalizeRecordSet`, `stripNormalizationMetadata`, `stripNormalizationMetadataWithEnrichment`. - Initial `TextRecordNormalizationDefs` covering the 9 recognized key types. - Unit tests for all functions and transform definitions. **Out of scope:** + - API changes, query parameters, HTTP handlers. - Client-requested key behavior and `expandNormalizedKeys` usage in request handling. - Integration with the indexed or RPC resolution paths (Phase 3). From 4f170eab82b0012913aa6053e8705841149c1c76 Mon Sep 17 00:00:00 2001 From: djstrong Date: Tue, 3 Mar 2026 00:44:58 +0100 Subject: [PATCH 09/10] refactor(plans): further clarify duplicate key handling and update normalization logic --- ...cleanup_architecture_plan_50f1bdfb.plan.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 0851e24ff..582b0ca89 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -17,7 +17,7 @@ isProject: false **Phase 1 scope (this implementation):** Pure in-memory library only. Input = a raw set of key/value pairs (already resolved from a resolver). Output = a normalized record set and optionally stripped/enriched output. No I/O, no resolution logic, no API changes, no client key-request behavior. -**Assumption — unique keys in resolver records:** Resolver records (the key/value set passed into the normalizer) are assumed to have unique keys: each key appears at most once. Duplicate handling in this spec applies only to *recognized* keys: multiple *different* raw keys (e.g. `com.twitter`, `vnd.twitter`) can map to the same normalized key (e.g. `com.x`); only one record wins per normalized key. The `DuplicateNormalizedKey` op is used only for those recognized-key losers, not for repeated occurrences of the same raw key. By contrast, the *requested* keys (e.g. the list passed to key expansion / multicall) may contain duplicates—the user may ask for the same key more than once—and the generated query should deduplicate so each key is requested at most once. +**Assumption — unique keys in resolver records:** Resolver records (the key/value set passed into the normalizer) are assumed to have unique keys: each key appears at most once. Duplicate handling in this spec applies only to *recognized* keys: multiple *different* raw keys (e.g. `com.twitter`, `vnd.twitter`) can map to the same normalized key (e.g. `com.x`); only one record wins per normalized key. The `DuplicateNormalizedKey` op is used only for those recognized-key losers. If the input violates the uniqueness assumption (same unrecognized `rawKey` appears more than once), subsequent occurrences are marked `DuplicateRawKey` and placed in `excludedRecords`—this should not be possible when resolver keys are unique. By contrast, the *requested* keys (e.g. the list passed to key expansion / multicall) may contain duplicates—the user may ask for the same key more than once—and the generated query should deduplicate so each key is requested at most once. --- @@ -170,7 +170,7 @@ Logic: ### 1.5 Layer 2: Set-level normalization -After individually normalizing each record, the set is consolidated so only one normalized key is retained as the "winner" when multiple records map to the same normalized key. Duplicate detection requires set context: if a key is not found in defs, at the set level the first record with that unrecognized `rawKey` is written into `records` as `UnrecognizedKeyAndValue`; subsequent records with the same `rawKey` are marked `op: "DuplicateNormalizedKey"` and placed into `excludedRecords`. +After individually normalizing each record, the set is consolidated so only one normalized key is retained as the "winner" when multiple records map to the same normalized key. Duplicate detection requires set context: if a key is not found in defs, at the set level the first record with that unrecognized `rawKey` is written into `records` as `UnrecognizedKeyAndValue`; subsequent records with the same `rawKey` are marked `op: "DuplicateRawKey"` and placed into `excludedRecords` (this should not occur when resolver keys are unique; see assumption above). ```typescript type RecordNormalizationOp = @@ -187,7 +187,12 @@ type RecordNormalizationOp = */ | "UnnormalizableValue" /** Another record already claimed this normalized key (lower priority variant) */ - | "DuplicateNormalizedKey"; + | "DuplicateNormalizedKey" + /** + * Same unrecognized rawKey appeared again; first occurrence won. Defensive only — should not + * occur when resolver records have unique keys (see assumption above). + */ + | "DuplicateRawKey"; type RecordNormalizationResult = | { @@ -200,7 +205,7 @@ type RecordNormalizationResult = url: string | null; } | { - op: "UnrecognizedKeyAndValue" | "UnnormalizableValue" | "DuplicateNormalizedKey"; + op: "UnrecognizedKeyAndValue" | "UnnormalizableValue" | "DuplicateNormalizedKey" | "DuplicateRawKey"; individual: IndividualRecordNormalizationResult; }; @@ -217,11 +222,11 @@ type NormalizedRecordSet = { >; /** * Records excluded from the main output: - * UnnormalizableValue and DuplicateNormalizedKey. + * UnnormalizableValue, DuplicateNormalizedKey, and DuplicateRawKey. */ excludedRecords: Extract< RecordNormalizationResult, - { op: "UnnormalizableValue" | "DuplicateNormalizedKey" } + { op: "UnnormalizableValue" | "DuplicateNormalizedKey" | "DuplicateRawKey" } >[]; }; ``` @@ -299,7 +304,7 @@ function expandNormalizedKeys( **Precondition**: no element of `normalizedKeys` may be an unnormalized key variant (i.e. present in `defs.byUnnormalizedKey` but not in `defs.byNormalizedKey`). Passing `vnd.twitter` where `com.x` is expected is a caller error and must throw synchronously with a clear message listing the offending keys. Completely unknown keys (absent from both maps) are not an error — they are passed through as-is, supporting arbitrary user-defined keys. -Returns: `[normalizedKey, ...unnormalizedKeys]` for each key found in `defs.byNormalizedKey`, followed by any unrecognized keys as-is. The result is deduplicated by first-occurrence: the caller may request the same key more than once (e.g. `["com.x", "com.twitter", "com.x"]`); the generated query list must contain each key at most once, so the first occurrence is kept and subsequent duplicates are dropped. Ordering is otherwise stable and deterministic, ensuring consistent multicall construction and reproducible traces. +Returns: `[normalizedKey, ...unnormalizedKeys]` for each key found in `defs.byNormalizedKey`, followed by any unrecognized keys as-is. The result is deduplicated by first-occurrence: the caller may request the same key more than once (e.g. `["com.x", "com.x"]`); the generated query list must contain each key at most once, so the first occurrence is kept and subsequent duplicates are dropped. Ordering is otherwise stable and deterministic, ensuring consistent multicall construction and reproducible traces. --- From e1e70f1f98a886a80db2abbd4fdd836dbaf3fc56 Mon Sep 17 00:00:00 2001 From: djstrong Date: Tue, 3 Mar 2026 00:54:04 +0100 Subject: [PATCH 10/10] refactor(plans): refine normalization logic for handling null values and key presence --- .cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md index 582b0ca89..09558cf43 100644 --- a/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md +++ b/.cursor/plans/auto-cleanup_architecture_plan_50f1bdfb.plan.md @@ -271,7 +271,7 @@ function stripNormalizationMetadata( **Return type note:** The type is intentionally `Record` because the result is a single flat object that mixes (1) entries from "Normalized" records, where the value is always a string, and (2) entries from "UnrecognizedKeyAndValue" or "UnnormalizableValue", where the value can be null. TypeScript cannot express this per-key distinction in one record type, so the union `string | null` is used for all keys. -Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. For recognized keys where no candidate produced a valid value (winner is "UnnormalizableValue" in `excludedRecords`), the key is included with value `null` (e.g. `{ "com.x": null }`), so callers can distinguish "key present but invalid" from "key not present." Unrecognized keys are always included even when `rawValue` is null — producing `{ [rawKey]: null }` — so the caller receives a complete picture of every key that was present in the input. Thus the result is built from both `set.records` and, for each "UnnormalizableValue" in `set.excludedRecords`, an entry `normalizedKey → null`. +Returns only the `normalizedKey → normalizedValue` pairs from the "Normalized" records, plus `rawKey → rawValue` passthrough for "UnrecognizedKeyAndValue" records. For recognized keys where no candidate produced a valid value (winner is "UnnormalizableValue" in `excludedRecords`), the key is included with value `null` (e.g. `{ "com.x": null }`), so callers can distinguish "key present but invalid" from "key not present." Unrecognized keys are always included even when `rawValue` is null — producing `{ [rawKey]: null }` — so the caller receives a complete picture of every key that was present in the input. Thus the result is built from `set.records` first; then, for each "UnnormalizableValue" in `set.excludedRecords`, add `normalizedKey → null` **only when** `set.records[normalizedKey]` is absent (i.e. when no candidate successfully normalized for that key). This avoids overwriting a valid winner with null when `excludedRecords` contains Pass-1 losers (unnormalizable candidates for a key that had a normalizable winner). For clients that want UI-friendly enrichment (displayValue, url) without holding the full `NormalizedRecordSet`: