From d9623a14acb6c0a23f6126cf0d57116ad77c4bc5 Mon Sep 17 00:00:00 2001 From: Elle Mouton Date: Thu, 4 Dec 2025 11:23:42 +0200 Subject: [PATCH 1/4] graph/db/models: fix race condition in Node.PubKey The PubKey method had a race condition where concurrent calls could all pass the nil check and race to write to the cached pubKey field. This is a classic check-then-act race. Remove the caching entirely to fix the race. The overhead of parsing a public key is minimal and doesn't justify the added complexity and race risk of caching. --- graph/db/models/node.go | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/graph/db/models/node.go b/graph/db/models/node.go index 8bbd837db5b..9a045d53c36 100644 --- a/graph/db/models/node.go +++ b/graph/db/models/node.go @@ -22,7 +22,6 @@ type Node struct { // PubKeyBytes is the raw bytes of the public key of the target node. PubKeyBytes [33]byte - pubKey *btcec.PublicKey // LastUpdate is the last time the vertex information for this node has // been updated. @@ -129,21 +128,8 @@ func (n *Node) HaveAnnouncement() bool { // PubKey is the node's long-term identity public key. This key will be used to // authenticated any advertisements/updates sent by the node. -// -// NOTE: By having this method to access an attribute, we ensure we only need -// to fully deserialize the pubkey if absolutely necessary. func (n *Node) PubKey() (*btcec.PublicKey, error) { - if n.pubKey != nil { - return n.pubKey, nil - } - - key, err := btcec.ParsePubKey(n.PubKeyBytes[:]) - if err != nil { - return nil, err - } - n.pubKey = key - - return key, nil + return btcec.ParsePubKey(n.PubKeyBytes[:]) } // NodeAnnouncement retrieves the latest node announcement of the node. From 49752f3b773ba2de25b98e88f503ddd66ddd61ef Mon Sep 17 00:00:00 2001 From: Elle Mouton Date: Thu, 4 Dec 2025 11:23:53 +0200 Subject: [PATCH 2/4] graph/db/models: fix race conditions in ChannelEdgeInfo Both NodeKey1 and NodeKey2 methods had the same race condition as the Node.PubKey method, where concurrent calls could race to write to the cached fields. Remove the caching for the same reasons: parsing overhead is minimal and doesn't justify the complexity and race risk. --- graph/db/models/channel_edge_info.go | 32 ++-------------------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/graph/db/models/channel_edge_info.go b/graph/db/models/channel_edge_info.go index b86c140bc1d..0d6cd04f0f2 100644 --- a/graph/db/models/channel_edge_info.go +++ b/graph/db/models/channel_edge_info.go @@ -30,11 +30,9 @@ type ChannelEdgeInfo struct { // NodeKey1Bytes is the raw public key of the first node. NodeKey1Bytes [33]byte - nodeKey1 *btcec.PublicKey // NodeKey2Bytes is the raw public key of the first node. NodeKey2Bytes [33]byte - nodeKey2 *btcec.PublicKey // BitcoinKey1Bytes is the raw public key of the first node. BitcoinKey1Bytes [33]byte @@ -78,42 +76,16 @@ type ChannelEdgeInfo struct { // the creation of this channel. A node is considered "first" if the // lexicographical ordering the its serialized public key is "smaller" than // that of the other node involved in channel creation. -// -// NOTE: By having this method to access an attribute, we ensure we only need -// to fully deserialize the pubkey if absolutely necessary. func (c *ChannelEdgeInfo) NodeKey1() (*btcec.PublicKey, error) { - if c.nodeKey1 != nil { - return c.nodeKey1, nil - } - - key, err := btcec.ParsePubKey(c.NodeKey1Bytes[:]) - if err != nil { - return nil, err - } - c.nodeKey1 = key - - return key, nil + return btcec.ParsePubKey(c.NodeKey1Bytes[:]) } // NodeKey2 is the identity public key of the "second" node that was involved in // the creation of this channel. A node is considered "second" if the // lexicographical ordering the its serialized public key is "larger" than that // of the other node involved in channel creation. -// -// NOTE: By having this method to access an attribute, we ensure we only need -// to fully deserialize the pubkey if absolutely necessary. func (c *ChannelEdgeInfo) NodeKey2() (*btcec.PublicKey, error) { - if c.nodeKey2 != nil { - return c.nodeKey2, nil - } - - key, err := btcec.ParsePubKey(c.NodeKey2Bytes[:]) - if err != nil { - return nil, err - } - c.nodeKey2 = key - - return key, nil + return btcec.ParsePubKey(c.NodeKey2Bytes[:]) } // OtherNodeKeyBytes returns the node key bytes of the other end of the channel. From 65dd7f25d7a7033a2a954e700c70b4628e9b3b66 Mon Sep 17 00:00:00 2001 From: Elle Mouton Date: Thu, 4 Dec 2025 11:24:56 +0200 Subject: [PATCH 3/4] graph/db: fix race in DisconnectBlockAtHeight cache access The DisconnectBlockAtHeight method was modifying the rejectCache and chanCache without holding the cacheMu lock. This caused races with other operations that properly held the lock, such as AddChannelEdge which modifies the caches in its OnCommit callback while the batch scheduler holds cacheMu. Fix by acquiring cacheMu before removing channels from the caches. --- graph/db/sql_store.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/graph/db/sql_store.go b/graph/db/sql_store.go index 65077072619..2bfcce530a0 100644 --- a/graph/db/sql_store.go +++ b/graph/db/sql_store.go @@ -2911,10 +2911,12 @@ func (s *SQLStore) DisconnectBlockAtHeight(height uint32) ( "height: %w", err) } + s.cacheMu.Lock() for _, channel := range removedChans { s.rejectCache.remove(channel.ChannelID) s.chanCache.remove(channel.ChannelID) } + s.cacheMu.Unlock() return removedChans, nil } From 1acf309850ac685b8c42bfd5663987009857cc95 Mon Sep 17 00:00:00 2001 From: Elle Mouton Date: Thu, 4 Dec 2025 12:52:42 +0200 Subject: [PATCH 4/4] docs: add release notes for race condition fixes --- docs/release-notes/release-notes-0.21.0.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/release-notes/release-notes-0.21.0.md b/docs/release-notes/release-notes-0.21.0.md index d15df411416..2fcdf7c7abf 100644 --- a/docs/release-notes/release-notes-0.21.0.md +++ b/docs/release-notes/release-notes-0.21.0.md @@ -31,6 +31,14 @@ or key) existed. The manager now correctly regenerates both files when either is missing, preventing "file not found" errors on startup. +- [Fixed race conditions](https://github.com/lightningnetwork/lnd/pull/XXXXX) in + the channel graph database. The `Node.PubKey()` and + `ChannelEdgeInfo.NodeKey1/NodeKey2()` methods had check-then-act races when + caching parsed public keys. Additionally, `DisconnectBlockAtHeight` was + accessing the reject and channel caches without proper locking. The caching + has been removed from the public key parsing methods, and proper mutex + protection has been added to the cache access in `DisconnectBlockAtHeight`. + # New Features - Basic Support for [onion messaging forwarding](https://github.com/lightningnetwork/lnd/pull/9868)