From 3ca33abf6b600a409cb5772a92455fce45632072 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Thu, 18 Dec 2025 21:19:22 +0000 Subject: [PATCH] Optimize find_last_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimization transforms an O(n*m) algorithm into an O(n+m) algorithm by eliminating redundant edge scanning. **Key Changes:** - **Pre-computed source set**: Creates a set `sources = {e["source"] for e in edges}` containing all source node IDs from edges - **O(1) membership testing**: Replaces `all(e["source"] != n["id"] for e in edges)` with `n["id"] not in sources` **Why This Is Faster:** The original code performs a linear scan through all edges for every node being checked. With n nodes and m edges, this creates O(n*m) time complexity. For each node, it checks every edge to ensure that node isn't a source anywhere. The optimized version builds the source set once in O(m) time, then performs O(1) hash table lookups for each node, resulting in O(n+m) total complexity. **Performance Impact:** The 245x speedup (from 101ms to 410μs) demonstrates the dramatic improvement, especially evident in large-scale test cases: - `test_large_linear_chain` (1000 nodes): Benefits significantly as it avoids 1000×999 = 999,000 edge comparisons - `test_large_fan_in` (1000 nodes): Similarly optimized from quadratic to linear scanning - Small graphs see less dramatic but still substantial improvements **Test Case Performance:** The optimization is most beneficial for graphs with many edges relative to nodes, where the original's repeated edge scanning becomes a bottleneck. Even simple cases like `test_three_nodes_linear` benefit from avoiding redundant edge iterations. --- src/algorithms/graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/algorithms/graph.py b/src/algorithms/graph.py index 777ea3b..f23d356 100644 --- a/src/algorithms/graph.py +++ b/src/algorithms/graph.py @@ -47,7 +47,8 @@ def find_shortest_path(self, start: str, end: str) -> list[str]: def find_last_node(nodes, edges): """This function receives a flow and returns the last node.""" - return next((n for n in nodes if all(e["source"] != n["id"] for e in edges)), None) + sources = {e["source"] for e in edges} + return next((n for n in nodes if n["id"] not in sources), None) def find_leaf_nodes(nodes: list[dict], edges: list[dict]) -> list[dict]: