From a460773494386ae95a32cacf9828a6191b5b5546 Mon Sep 17 00:00:00 2001 From: Tony Li Date: Tue, 5 May 2026 09:42:34 +1200 Subject: [PATCH 1/3] Tolerate URL normalization differences in cache lookups Register a `urls_eq` SQLite scalar function that compares two URL strings via `ParsedUrl` parsing (with raw-string fast path) and use it in `WHERE urls_eq(url, ?)` for `self_hosted_sites.url` reads. Lookups now match the same site whether it was stored as `http://localhost` or `http://localhost/`, which fixes the trunk integration-test break in `test_remove_site.rs` and removes the latent risk of duplicate rows on devices upgrading across PR #1239. Replaces the narrower `url_candidates` workaround (and the `delete_self_hosted_site_by_url(&[String])` overload that consumed it), so every URL-keyed read goes through one mechanism. --- CLAUDE.md | 1 + wp_mobile_cache/Cargo.toml | 2 +- wp_mobile_cache/src/lib.rs | 128 ++++++++++++++++++++---- wp_mobile_cache/src/repository/sites.rs | 89 +++------------- 4 files changed, 120 insertions(+), 100 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 48e5f598e..5570971e3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -71,6 +71,7 @@ Test credentials are configured in: ## Development Tips - Platform bindings are generated automatically - don't edit generated files directly +- For URL columns in the `wp_mobile_cache` database (e.g. `self_hosted_sites.url`), compare with the `urls_eq(a, b)` scalar function rather than `=`. It compares via `ParsedUrl` parsing, so lookups tolerate trailing-slash and other URL-normalization differences. ## PR Description Format diff --git a/wp_mobile_cache/Cargo.toml b/wp_mobile_cache/Cargo.toml index 6764bfafa..ce667f28b 100644 --- a/wp_mobile_cache/Cargo.toml +++ b/wp_mobile_cache/Cargo.toml @@ -9,7 +9,7 @@ test-helpers = ["dep:chrono", "dep:integration_test_credentials", "dep:rstest"] [dependencies] log = { workspace = true } -rusqlite = { workspace = true, features = ["hooks"] } +rusqlite = { workspace = true, features = ["functions", "hooks"] } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/wp_mobile_cache/src/lib.rs b/wp_mobile_cache/src/lib.rs index 02f590801..fad900bb2 100644 --- a/wp_mobile_cache/src/lib.rs +++ b/wp_mobile_cache/src/lib.rs @@ -1,3 +1,4 @@ +use rusqlite::functions::FunctionFlags; use rusqlite::hooks::Action; use rusqlite::types::{FromSql, FromSqlResult, ToSql, ToSqlOutput}; use rusqlite::{Connection, Result as SqliteResult, params}; @@ -356,9 +357,13 @@ impl WpApiCache { /// Returns `true` if the site was found and removed, `false` if no site /// with the given URL exists. pub fn remove_self_hosted_site(&self, url: Arc) -> Result { - let candidates = url_candidates(&url); self.execute(|connection| { - SiteRepository.delete_self_hosted_site_by_url(connection, &candidates) + let Some(full_entity) = + SiteRepository.select_self_hosted_site_by_url(connection, &url.url())? + else { + return Ok(false); + }; + SiteRepository.delete_site(connection, &full_entity.data.0) }) } @@ -428,6 +433,7 @@ impl From for WpApiCache { /// This is typically used in tests to create a cache from an already-migrated /// in-memory database connection. fn from(connection: Connection) -> Self { + register_url_functions(&connection).expect("Registering URL SQL functions should succeed"); Self { inner: DBManager { connection: Mutex::new(connection), @@ -458,6 +464,11 @@ pub struct MigrationManager<'a> { impl<'a> MigrationManager<'a> { pub fn new(connection: &'a Connection) -> Result { + // Cover raw-connection paths that bypass WpApiCache construction + // (test fixtures that hand a Connection straight to the repository + // layer). The registration is idempotent, so connections that were + // already prepared by DBManager / From are unaffected. + register_url_functions(connection)?; Ok(Self { connection }) } @@ -592,21 +603,42 @@ impl DBManager { Connection::open_in_memory()? }; + register_url_functions(&connection)?; + Ok(Self { connection: Mutex::new(connection), }) } } -fn url_candidates(url: &ParsedUrl) -> [String; 2] { - let s = url.url(); - if let Some(stripped) = s.strip_suffix('/') { - let stripped = stripped.to_string(); - [s, stripped] - } else { - let with_slash = format!("{}/", s); - [s, with_slash] - } +/// Register custom SQL functions on the given connection. +/// +/// Currently registers `urls_eq(a, b)`: a deterministic, UTF-8 scalar that +/// reports semantic URL equivalence (per `ParsedUrl == ParsedUrl`), with a +/// raw-string fallback when either side fails to parse. +/// +/// Why: the `self_hosted_sites.url` column may legitimately hold either +/// `http://example.com` or `http://example.com/` depending on whether it +/// was inserted before or after `ParsedUrl`-based normalization landed +/// (see PR #1239). Lookups must tolerate either shape without forcing a +/// data migration on existing user caches. +pub(crate) fn register_url_functions(conn: &Connection) -> SqliteResult<()> { + conn.create_scalar_function( + "urls_eq", + 2, + FunctionFlags::SQLITE_DETERMINISTIC | FunctionFlags::SQLITE_UTF8, + |ctx| { + let a: String = ctx.get(0)?; + let b: String = ctx.get(1)?; + if a == b { + return Ok(true); + } + Ok(match (ParsedUrl::parse(&a), ParsedUrl::parse(&b)) { + (Ok(parsed_a), Ok(parsed_b)) => parsed_a == parsed_b, + _ => false, + }) + }, + ) } uniffi::setup_scaffolding!(); @@ -789,19 +821,71 @@ mod tests { } #[test] - fn test_url_candidates_with_trailing_slash() { - let url = ParsedUrl::parse("https://example.com/").unwrap(); - let candidates = url_candidates(&url); - assert_eq!(candidates, ["https://example.com/", "https://example.com"]); + fn test_urls_eq_matches_across_trailing_slash() { + let conn = Connection::open_in_memory().unwrap(); + register_url_functions(&conn).unwrap(); + + for (a, b) in [ + ("https://example.com", "https://example.com/"), + ("https://example.com/", "https://example.com"), + ("https://example.com", "https://example.com"), + ] { + let result: bool = conn + .query_row("SELECT urls_eq(?, ?)", [a, b], |row| row.get(0)) + .unwrap(); + assert!(result, "{a:?} should equal {b:?}"); + } } #[test] - fn test_url_candidates_without_trailing_slash() { - let url = ParsedUrl::parse("https://example.com/path").unwrap(); - let candidates = url_candidates(&url); - assert_eq!( - candidates, - ["https://example.com/path", "https://example.com/path/"] - ); + fn test_urls_eq_is_case_insensitive_in_scheme_and_host() { + let conn = Connection::open_in_memory().unwrap(); + register_url_functions(&conn).unwrap(); + + let result: bool = conn + .query_row( + "SELECT urls_eq(?, ?)", + ["http://localhost", "HTTP://LOCALHOST"], + |row| row.get(0), + ) + .unwrap(); + assert!(result); + } + + #[test] + fn test_urls_eq_returns_false_for_different_hosts() { + let conn = Connection::open_in_memory().unwrap(); + register_url_functions(&conn).unwrap(); + + let result: bool = conn + .query_row( + "SELECT urls_eq(?, ?)", + ["https://example.com", "https://other.example.com"], + |row| row.get(0), + ) + .unwrap(); + assert!(!result); + } + + #[test] + fn test_urls_eq_falls_back_to_string_equality_for_unparseable_input() { + let conn = Connection::open_in_memory().unwrap(); + register_url_functions(&conn).unwrap(); + + let same: bool = conn + .query_row("SELECT urls_eq(?, ?)", ["not a url", "not a url"], |row| { + row.get(0) + }) + .unwrap(); + assert!(same, "identical unparseable strings should match"); + + let different: bool = conn + .query_row( + "SELECT urls_eq(?, ?)", + ["not a url", "also not a url"], + |row| row.get(0), + ) + .unwrap(); + assert!(!different, "distinct unparseable strings should not match"); } } diff --git a/wp_mobile_cache/src/repository/sites.rs b/wp_mobile_cache/src/repository/sites.rs index fd8abaea0..2e963cfa9 100644 --- a/wp_mobile_cache/src/repository/sites.rs +++ b/wp_mobile_cache/src/repository/sites.rs @@ -125,9 +125,11 @@ impl SiteRepository { executor: &impl QueryExecutor, url: &str, ) -> Result>, SqliteDbError> { - // First get the self_hosted_site + // First get the self_hosted_site. Use the `urls_eq` UDF so the lookup + // tolerates trailing-slash and other URL-normalization differences + // between the stored value and the caller-provided string. let sql = format!( - "SELECT * FROM {} WHERE url = ?", + "SELECT * FROM {} WHERE urls_eq(url, ?)", DbTable::SelfHostedSites.table_name() ); let mut stmt = executor.prepare(&sql)?; @@ -259,25 +261,6 @@ impl SiteRepository { Ok(sites_deleted > 0) } - /// Delete a self-hosted site by trying multiple URL candidates. - /// - /// Useful when the caller's URL representation may differ from the stored - /// form (e.g. trailing slash normalization). Returns `true` if a site was - /// deleted, `false` if none of the candidates matched. - pub fn delete_self_hosted_site_by_url( - &self, - transaction_manager: &mut impl TransactionManager, - url_candidates: &[String], - ) -> Result { - for candidate in url_candidates { - let site_data = self.select_self_hosted_site_by_url(transaction_manager, candidate)?; - if let Some(full_entity) = site_data { - return self.delete_site(transaction_manager, &full_entity.data.0); - } - } - Ok(false) - } - /// Upsert a WordPress.com site and return its EntityId (atomic transaction). /// /// If a site with the given site_id already exists, reuses it. Otherwise creates a new one. @@ -822,74 +805,26 @@ mod tests { } #[rstest] - fn test_delete_self_hosted_site_by_url_deletes_site(mut test_conn: Connection) { - let repo = SiteRepository; - let url = "https://example.com"; - let site = SelfHostedSite { - url: url.to_string(), - api_root: "https://example.com/wp-json".to_string(), - }; - - repo.upsert_self_hosted_site(&mut test_conn, &site) - .expect("Failed to upsert site"); - - let deleted = repo - .delete_self_hosted_site_by_url(&mut test_conn, &[url.to_string(), format!("{}/", url)]) - .expect("Failed to delete site"); - assert!(deleted, "Should return true when site is deleted"); - - let after_delete = repo - .select_self_hosted_site_by_url(&test_conn, url) - .expect("Failed to select site by URL"); - assert_eq!(after_delete, None, "Site should be deleted"); - } - - #[rstest] - fn test_delete_self_hosted_site_by_url_with_trailing_slash_mismatch(mut test_conn: Connection) { + fn test_select_self_hosted_site_by_url_tolerates_trailing_slash(mut test_conn: Connection) { let repo = SiteRepository; - // Store WITHOUT trailing slash + // Store WITHOUT trailing slash (the shape historical caches may have). let site = SelfHostedSite { url: "https://example.com".to_string(), api_root: "https://example.com/wp-json".to_string(), }; - repo.upsert_self_hosted_site(&mut test_conn, &site) .expect("Failed to upsert site"); - // Delete WITH trailing slash first (as ParsedUrl would produce) - let deleted = repo - .delete_self_hosted_site_by_url( - &mut test_conn, - &[ - "https://example.com/".to_string(), - "https://example.com".to_string(), - ], - ) - .expect("Failed to delete site"); + // Look up WITH trailing slash (what ParsedUrl::url() produces today). + let retrieved = repo + .select_self_hosted_site_by_url(&test_conn, "https://example.com/") + .expect("select should succeed"); assert!( - deleted, - "Should find and delete site despite trailing slash mismatch" + retrieved.is_some(), + "lookup with trailing slash should match a row stored without one" ); } - #[rstest] - fn test_delete_self_hosted_site_by_url_returns_false_for_non_existent( - mut test_conn: Connection, - ) { - let repo = SiteRepository; - - let deleted = repo - .delete_self_hosted_site_by_url( - &mut test_conn, - &[ - "https://non-existent.com".to_string(), - "https://non-existent.com/".to_string(), - ], - ) - .expect("Failed to delete site"); - assert!(!deleted, "Should return false when site doesn't exist"); - } - // WordPress.com site tests #[rstest] From 2ac835fe34f66e265a9e62d6b739fbaf456bd855 Mon Sep 17 00:00:00 2001 From: Tony Li Date: Tue, 5 May 2026 09:46:49 +1200 Subject: [PATCH 2/3] Make WpApiCache::from(Connection) fallible `register_url_functions` can fail at the SQLite layer; switch the impl from `From` to `TryFrom` so callers surface the error as `SqliteDbError` rather than panic. The four existing call sites are unit-test setup, which now `.expect(...)` the result. --- wp_mobile/src/service/metadata.rs | 2 +- wp_mobile/src/service/post_types.rs | 2 +- wp_mobile/src/service/posts.rs | 4 ++-- wp_mobile_cache/src/lib.rs | 12 +++++++----- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/wp_mobile/src/service/metadata.rs b/wp_mobile/src/service/metadata.rs index 8ddc07a5e..5351129f8 100644 --- a/wp_mobile/src/service/metadata.rs +++ b/wp_mobile/src/service/metadata.rs @@ -692,7 +692,7 @@ mod tests { .expect("Site creation should succeed") .db_site; - let cache = Arc::new(WpApiCache::from(conn)); + let cache = Arc::new(WpApiCache::try_from(conn).expect("Cache creation should succeed")); let db_site = Arc::new(db_site); let service = MetadataService::new(db_site, cache.clone()); diff --git a/wp_mobile/src/service/post_types.rs b/wp_mobile/src/service/post_types.rs index da3ccc678..a8d7c566b 100644 --- a/wp_mobile/src/service/post_types.rs +++ b/wp_mobile/src/service/post_types.rs @@ -223,7 +223,7 @@ mod tests { }; let db_site = create_test_site(&mut conn, &self_hosted_site); - let cache = Arc::new(WpApiCache::from(conn)); + let cache = Arc::new(WpApiCache::try_from(conn).expect("Cache creation should succeed")); let api_client = mock_api_client(); let post_type_service = PostTypeService::new(api_client, Arc::new(db_site), cache.clone()); diff --git a/wp_mobile/src/service/posts.rs b/wp_mobile/src/service/posts.rs index 5d5383c3e..4f61e7d98 100644 --- a/wp_mobile/src/service/posts.rs +++ b/wp_mobile/src/service/posts.rs @@ -1423,7 +1423,7 @@ mod tests { .expect("Site creation") .db_site; - let cache = Arc::new(WpApiCache::from(conn)); + let cache = Arc::new(WpApiCache::try_from(conn).expect("Cache creation should succeed")); let db_site_arc = Arc::new(db_site); PostService::new(api_client, db_site_arc, cache) } @@ -1500,7 +1500,7 @@ mod tests { .db_site; // Setup: Create PostService with cache - let cache = Arc::new(WpApiCache::from(conn)); + let cache = Arc::new(WpApiCache::try_from(conn).expect("Cache creation should succeed")); let db_site_arc = Arc::new(db_site); let post_service = PostService::new(mock_api_client, db_site_arc.clone(), cache.clone()); diff --git a/wp_mobile_cache/src/lib.rs b/wp_mobile_cache/src/lib.rs index fad900bb2..121a72874 100644 --- a/wp_mobile_cache/src/lib.rs +++ b/wp_mobile_cache/src/lib.rs @@ -427,18 +427,20 @@ impl WpApiCache { } } -impl From for WpApiCache { +impl TryFrom for WpApiCache { + type Error = SqliteDbError; + /// Create a WpApiCache from an existing connection. /// /// This is typically used in tests to create a cache from an already-migrated /// in-memory database connection. - fn from(connection: Connection) -> Self { - register_url_functions(&connection).expect("Registering URL SQL functions should succeed"); - Self { + fn try_from(connection: Connection) -> Result { + register_url_functions(&connection)?; + Ok(Self { inner: DBManager { connection: Mutex::new(connection), }, - } + }) } } From 84e0f98ed3889fb067dd3b06d8e5b5035b435ba0 Mon Sep 17 00:00:00 2001 From: Tony Li Date: Tue, 5 May 2026 10:00:46 +1200 Subject: [PATCH 3/3] Add a change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f210a302a..fa736cb0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [Support both Integer and String for `WPApiDetails.gmt_offset`](https://github.com/Automattic/wordpress-rs/pull/209) - Pin `wp-cli/restful` to v0.4.1 to fix Docker build (v0.4.2+ requires unreleased `wp-cli ^2.13`) +- **Internal:** `WpApiCache` lookups for a self-hosted site by URL now tolerate trailing-slash and other URL-normalization differences. ## [0.1]