diff --git a/.changeset/text-edit-utf16-quadratic.md b/.changeset/text-edit-utf16-quadratic.md new file mode 100644 index 000000000..26305fa7c --- /dev/null +++ b/.changeset/text-edit-utf16-quadratic.md @@ -0,0 +1,25 @@ +--- +"loro-crdt": patch +"loro-crdt-map": patch +--- + +Fix two O(n^2) editing slowdowns. + +1. Editing with UTF-16 / UTF-8 (byte) positions (the default in the JS binding) + validated each position by materializing the entire `[0, pos)` prefix string, + making every `insert`/`delete`/`splice`/`mark` O(n) and a run of edits O(n^2) + (regression since 1.12.0). The boundary check now reads the rope's prefix + caches via the cursor (O(log n)). Unicode-indexed editing was unaffected. + +2. When a subscriber is attached and many edits land on the same container within + one event batch (e.g. random-position inserts, or many distinct map-key + writes), building the event cloned the growing accumulated diff on every + compose β€” O(n^2) in the number of fragments. The diffs are now composed in + place. This affected text, map and list events. + +3. Converting a UTF-16 / UTF-8 position within a text chunk to a unicode offset + scanned the chunk char-by-char, so editing/slicing a large contiguous chunk + (a big insert, a loaded document, or a long run of typed text that merges into + one chunk) was O(chunk length) per op. Chunks that contain no astral-plane + characters (UTF-16) or are pure ASCII (UTF-8) now convert in O(1), covering + essentially all real-world text (ASCII/Latin/CJK). diff --git a/Cargo.lock b/Cargo.lock index 9c5390a31..52b43d90d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1873,7 +1873,7 @@ checksum = "3f3d053a135388e6b1df14e8af1212af5064746e9b87a06a345a7a779ee9695a" [[package]] name = "loro-wasm" -version = "1.13.2" +version = "1.13.3" dependencies = [ "console_error_panic_hook", "js-sys", diff --git a/crates/loro-internal/src/container/richtext/richtext_state.rs b/crates/loro-internal/src/container/richtext/richtext_state.rs index 032d70ccf..7194cd011 100644 --- a/crates/loro-internal/src/container/richtext/richtext_state.rs +++ b/crates/loro-internal/src/container/richtext/richtext_state.rs @@ -364,6 +364,68 @@ mod text_chunk { } } + /// Whether every char in this chunk is in the Basic Multilingual Plane, + /// i.e. each char is a single UTF-16 code unit. When true, UTF-16 offsets + /// equal unicode offsets, so conversions are O(1) instead of O(offset). + #[inline] + fn is_all_bmp(&self) -> bool { + self.utf16_len == self.unicode_len + } + + /// Whether this chunk is pure ASCII, i.e. each char is a single byte. + /// When true, byte offsets equal unicode offsets. + #[inline] + fn is_all_ascii(&self) -> bool { + self.bytes.len() as i32 == self.unicode_len + } + + /// Convert a UTF-16 offset within this chunk to a unicode offset. + /// + /// O(1) when the chunk has no astral-plane characters; otherwise it + /// scans up to `utf16_offset` chars. Mirrors [`utf16_to_unicode_index`]. + pub fn utf16_offset_to_unicode(&self, utf16_offset: usize) -> Result { + if self.is_all_bmp() { + return Ok(utf16_offset); + } + super::utf16_to_unicode_index(self.as_str(), utf16_offset) + } + + /// Convert a UTF-8 (byte) offset within this chunk to a unicode offset. + /// O(1) when the chunk is pure ASCII. + pub fn utf8_offset_to_unicode(&self, utf8_offset: usize) -> Result { + if self.is_all_ascii() { + return Ok(utf8_offset); + } + super::utf8_to_unicode_index(self.as_str(), utf8_offset) + } + + /// Convert a unicode offset within this chunk to a UTF-16 offset. + /// O(1) when the chunk has no astral-plane characters. + pub fn unicode_offset_to_utf16(&self, unicode_offset: usize) -> Option { + if self.is_all_bmp() { + return Some(unicode_offset); + } + super::unicode_to_utf16_index(self.as_str(), unicode_offset) + } + + /// Convert a unicode offset within this chunk to a UTF-8 (byte) offset. + /// O(1) when the chunk is pure ASCII. + pub fn unicode_offset_to_utf8(&self, unicode_offset: usize) -> Option { + if self.is_all_ascii() { + return Some(unicode_offset); + } + super::unicode_to_utf8_index(self.as_str(), unicode_offset) + } + + /// Slice this chunk by unicode offsets, returning the substring. + /// O(1) byte lookup when the chunk is pure ASCII. + pub fn unicode_slice(&self, start: usize, end: usize) -> Result<&str, ()> { + if self.is_all_ascii() { + return self.as_str().get(start..end).ok_or(()); + } + super::unicode_slice(self.as_str(), start, end) + } + pub(crate) fn delete_by_entity_index( &mut self, unicode_offset: usize, @@ -498,7 +560,7 @@ mod text_chunk { } // Fast path for ASCII text: unicode index == byte index, and utf16 index == unicode index. - if self.bytes.len() as i32 == self.unicode_len { + if self.is_all_ascii() { let ans = Self { unicode_len: range.len() as i32, bytes: self.bytes.slice_clone(range.start..range.end), @@ -1220,7 +1282,7 @@ mod query { // Allow left to not at the correct utf16 boundary. If so fallback to the last position. // TODO: if we remove the use of query(pos-1), we won't need this fallback behavior // WARNING: Unable to report error!!! - let offset = utf16_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e); + let offset = s.utf16_offset_to_unicode(left).unwrap_or_else(|e| e); (offset, true) } RichtextStateChunk::Style { .. } => (1, false), @@ -1300,7 +1362,7 @@ mod query { // Allow left to not at the correct utf16 boundary. If so fallback to the last position. // TODO: if we remove the use of query(pos-1), we won't need this fallback behavior // WARNING: Unable to report error!!! - let offset = utf8_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e); + let offset = s.utf8_offset_to_unicode(left).unwrap_or_else(|e| e); (offset, true) } RichtextStateChunk::Style { .. } => (1, false), @@ -1954,7 +2016,7 @@ impl RichtextState { } if let RichtextStateChunk::Text(s) = span.elem { - match unicode_slice(s.as_str(), start, end) { + match s.unicode_slice(start, end) { Ok(x) => ans.push_str(x), Err(()) => { return Err(LoroError::UTF16InUnicodeCodePoint { pos: pos + len }) @@ -2094,11 +2156,13 @@ impl RichtextState { let slice_start = span.start.unwrap_or(0) + processed_len; let slice_end = slice_start + take_len; - let text_content = unicode_slice(t.as_str(), slice_start, slice_end) - .map_err(|_| LoroError::OutOfBound { - pos: slice_end, - len: t.unicode_len() as usize, - info: "Slice delta out of bound".into(), + let text_content = + t.unicode_slice(slice_start, slice_end).map_err(|_| { + LoroError::OutOfBound { + pos: slice_end, + len: t.unicode_len() as usize, + info: "Slice delta out of bound".into(), + } })?; let styles = cur_styles.as_ref().unwrap(); @@ -2332,6 +2396,20 @@ impl RichtextState { self.cursor_to_unicode_index(cursor.cursor) } + /// Convert an event-index position into the index of `pos_type`. + /// + /// This runs in O(log n) by querying the tree once and reading the prefix + /// caches from the resulting cursor, instead of materializing the whole + /// prefix string. + pub fn event_index_to_index(&self, event_index: usize, pos_type: PosType) -> usize { + if self.tree.is_empty() { + return 0; + } + + let cursor = self.tree.query::(&event_index).unwrap(); + self.get_index_from_cursor(cursor.cursor, pos_type).unwrap() + } + #[allow(unused)] pub(crate) fn check(&self) { if !cfg!(any(debug_assertions, test)) { @@ -2744,7 +2822,7 @@ fn entity_offset_to_pos_type_offset( ) -> usize { match pos_type { PosType::Bytes => match elem { - RichtextStateChunk::Text(t) => unicode_to_utf8_index(t.as_str(), offset).unwrap(), + RichtextStateChunk::Text(t) => t.unicode_offset_to_utf8(offset).unwrap(), RichtextStateChunk::Style { .. } => 0, }, PosType::Unicode => match elem { @@ -2752,14 +2830,14 @@ fn entity_offset_to_pos_type_offset( RichtextStateChunk::Style { .. } => 0, }, PosType::Utf16 => match elem { - RichtextStateChunk::Text(t) => unicode_to_utf16_index(t.as_str(), offset).unwrap(), + RichtextStateChunk::Text(t) => t.unicode_offset_to_utf16(offset).unwrap(), RichtextStateChunk::Style { .. } => 0, }, PosType::Entity => offset, PosType::Event => match elem { RichtextStateChunk::Text(t) => { if cfg!(feature = "wasm") { - unicode_to_utf16_index(t.as_str(), offset).unwrap() + t.unicode_offset_to_utf16(offset).unwrap() } else { offset } @@ -2776,7 +2854,7 @@ fn pos_type_offset_to_entity_offset( ) -> Option { match pos_type { PosType::Bytes => match elem { - RichtextStateChunk::Text(t) => utf8_to_unicode_index(t.as_str(), offset).ok(), + RichtextStateChunk::Text(t) => t.utf8_offset_to_unicode(offset).ok(), RichtextStateChunk::Style { .. } => { if offset > 0 { None @@ -2787,7 +2865,7 @@ fn pos_type_offset_to_entity_offset( }, PosType::Unicode => Some(offset), PosType::Utf16 => match elem { - RichtextStateChunk::Text(t) => utf16_to_unicode_index(t.as_str(), offset).ok(), + RichtextStateChunk::Text(t) => t.utf16_offset_to_unicode(offset).ok(), RichtextStateChunk::Style { .. } => { if offset > 0 { None @@ -2806,7 +2884,7 @@ fn pos_type_offset_to_entity_offset( PosType::Event => match elem { RichtextStateChunk::Text(t) => { if cfg!(feature = "wasm") { - utf16_to_unicode_index(t.as_str(), offset).ok() + t.utf16_offset_to_unicode(offset).ok() } else if offset < t.unicode_len() as usize { Some(offset) } else { diff --git a/crates/loro-internal/src/delta/map_delta.rs b/crates/loro-internal/src/delta/map_delta.rs index b7f154932..966ebeb38 100644 --- a/crates/loro-internal/src/delta/map_delta.rs +++ b/crates/loro-internal/src/delta/map_delta.rs @@ -101,18 +101,19 @@ impl MapDelta { } impl ResolvedMapDelta { - pub(crate) fn compose(&self, x: ResolvedMapDelta) -> ResolvedMapDelta { - let mut updated = self.updated.clone(); + pub(crate) fn compose(mut self, x: ResolvedMapDelta) -> ResolvedMapDelta { + // Compose into `self` in place; cloning `self.updated` here made + // composing N fragments into one map event O(N^2). for (k, v) in x.updated.into_iter() { - if let Some(old) = updated.get_mut(&k) { + if let Some(old) = self.updated.get_mut(&k) { if v.idlp > old.idlp { *old = v; } } else { - updated.insert(k, v); + self.updated.insert(k, v); } } - ResolvedMapDelta { updated } + self } #[inline] diff --git a/crates/loro-internal/src/event.rs b/crates/loro-internal/src/event.rs index 9430c3f52..ee9abab7a 100644 --- a/crates/loro-internal/src/event.rs +++ b/crates/loro-internal/src/event.rs @@ -369,10 +369,9 @@ impl InternalDiff { (InternalDiff::ListRaw(a), InternalDiff::ListRaw(b)) => { Ok(InternalDiff::ListRaw(a.compose(b))) } - (InternalDiff::RichtextRaw(a), InternalDiff::RichtextRaw(b)) => { - let mut ans = a.clone(); - ans.compose(&b); - Ok(InternalDiff::RichtextRaw(ans)) + (InternalDiff::RichtextRaw(mut a), InternalDiff::RichtextRaw(b)) => { + a.compose(&b); + Ok(InternalDiff::RichtextRaw(a)) } (InternalDiff::Map(a), InternalDiff::Map(b)) => Ok(InternalDiff::Map(a.compose(b))), (InternalDiff::Tree(a), InternalDiff::Tree(b)) => Ok(InternalDiff::Tree(a.compose(b))), @@ -383,7 +382,6 @@ impl InternalDiff { impl Diff { pub fn compose_ref(&mut self, diff: &Diff) { - // PERF: avoid clone match (self, diff) { (Diff::List(a), Diff::List(b)) => { a.compose(b); @@ -392,10 +390,12 @@ impl Diff { a.compose(b); } (Diff::Map(a), Diff::Map(b)) => { - *a = a.clone().compose(b.clone()); + // Move the accumulator out instead of cloning it, so composing + // a long run of fragments stays linear rather than O(n^2). + *a = std::mem::take(a).compose(b.clone()); } (Diff::Tree(a), Diff::Tree(b)) => { - *a = a.clone().compose(b.clone()); + *a = std::mem::take(a).compose(b.clone()); } #[cfg(feature = "counter")] (Diff::Counter(a), Diff::Counter(b)) => *a += b, diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index 77b3baf30..56d4a408c 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -2783,43 +2783,30 @@ impl TextHandler { PosType::Unicode => Some(unicode_index), PosType::Event => Some(event_index), PosType::Bytes | PosType::Utf16 => { - // Use the prefix text to compute target offset. - let prefix = match &self.inner { + // Map the event-index position onto the target coordinate via the + // rope's prefix caches. This is O(log n); materializing the prefix + // string would be O(n) and makes repeated edits O(n^2). + match &self.inner { MaybeDetached::Detached(t) => { let t = t.lock(); if event_index > t.value.len_event() { return None; } - t.value.get_text_slice_by_event_index(0, event_index).ok()? + Some(t.value.event_index_to_index(event_index, to)) } - MaybeDetached::Attached(a) if a.has_decoded_state() => { - let res: Result = a.with_state(|state| { - let state = state.as_richtext_state_mut().unwrap(); - if event_index > state.len_event() { - return Err(()); - } - state - .get_text_slice_by_event_index(0, event_index) - .map_err(|_| ()) - }); - - match res { - Ok(v) => v, - Err(_) => return None, + MaybeDetached::Attached(a) if a.has_decoded_state() => a.with_state(|state| { + let state = state.as_richtext_state_mut().unwrap(); + if event_index > state.len_event() { + return None; } - } + Some(state.event_index_to_index(event_index, to)) + }), MaybeDetached::Attached(a) => { let value = a.get_value(); let s = value.as_string().unwrap(); - return unicode_to_text_pos(s, unicode_index, to); + unicode_to_text_pos(s, unicode_index, to) } - }; - - Some(match to { - PosType::Bytes => prefix.len(), - PosType::Utf16 => count_utf16_len(prefix.as_bytes()), - _ => unreachable!(), - }) + } } PosType::Entity => None, }; diff --git a/crates/loro-internal/src/state.rs b/crates/loro-internal/src/state.rs index a04d956e2..d0a768038 100644 --- a/crates/loro-internal/src/state.rs +++ b/crates/loro-internal/src/state.rs @@ -1570,11 +1570,13 @@ impl DocState { continue; }; - // TODO: PERF avoid this clone - *last_container_diff = last_container_diff - .clone() - .compose(container_diff.diff) - .unwrap(); + // Compose in place. Cloning the accumulated diff here made a + // batch of N same-container fragments O(N^2) (each compose + // cloned the growing accumulator), which is hit whenever a + // subscriber is attached and many edits land on one container + // in a single event batch. + let prev = std::mem::take(last_container_diff); + *last_container_diff = prev.compose(container_diff.diff).unwrap(); } } let mut diff: Vec<_> = containers diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index a5e40dd87..e6bd9bb1b 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -1065,6 +1065,12 @@ impl RichtextState { .get_mut() .event_index_to_unicode_index(event_index) } + + pub(crate) fn event_index_to_index(&mut self, event_index: usize, pos_type: PosType) -> usize { + self.state + .get_mut() + .event_index_to_index(event_index, pos_type) + } } #[derive(Debug, Default, Clone)] diff --git a/crates/loro/tests/perf_text_insert_quadratic.rs b/crates/loro/tests/perf_text_insert_quadratic.rs new file mode 100644 index 000000000..46198c986 --- /dev/null +++ b/crates/loro/tests/perf_text_insert_quadratic.rs @@ -0,0 +1,45 @@ +use loro::LoroDoc; +use std::time::Instant; + +/// Regression guard for the O(n^2) text-edit blow-up that shipped in 1.12.0. +/// +/// Editing with UTF-16 / UTF-8 (byte) coordinates used to validate every +/// position by materializing the whole `[0, pos)` prefix string, making each +/// edit O(n) and a run of edits O(n^2). After the fix the boundary check is +/// O(log n), so the cost should scale ~linearly. +/// +/// Run with: +/// cargo test -p loro perf_text_insert_utf16_is_linear -- --ignored --nocapture +#[test] +#[ignore] +fn perf_text_insert_utf16_is_linear() { + fn bench(n: usize) -> std::time::Duration { + let doc = LoroDoc::new(); + let text = doc.get_text("text"); + let mut seed: u64 = 42; + let mut rnd = || { + seed = (seed.wrapping_mul(1103515245).wrapping_add(12345)) & 0x7fffffff; + seed as f64 / 0x7fffffff as f64 + }; + let start = Instant::now(); + for _ in 0..n { + let len = text.len_utf16(); + let pos = (rnd() * (len + 1) as f64).floor() as usize; + text.insert_utf16(pos, "x").unwrap(); + } + doc.commit(); + start.elapsed() + } + + let mut prev = 0f64; + for &n in &[6000usize, 12000, 24000, 48000] { + let d = bench(n); + let ms = d.as_secs_f64() * 1000.0; + let ratio = if prev > 0.0 { ms / prev } else { 0.0 }; + println!( + "n={n:>6} {ms:>9.1} ms per_op={:>7.3}us x_for_2x_work={ratio:.2}", + d.as_secs_f64() / n as f64 * 1e6 + ); + prev = ms; + } +} diff --git a/crates/loro/tests/text.rs b/crates/loro/tests/text.rs index 4570d803c..cbb803415 100644 --- a/crates/loro/tests/text.rs +++ b/crates/loro/tests/text.rs @@ -14,6 +14,132 @@ fn utf16_pos(s: &str, char_index: usize) -> usize { s.chars().take(char_index).map(|c| c.len_utf16()).sum() } +// Regression guard for the second O(n^2) bug: when a subscriber is attached and +// many edits to the same container accumulate in one event batch, `diffs_to_event` +// used to clone the growing accumulated diff on every compose. The fix composes in +// place; this test pins that the resulting event still reflects the final text. +#[test] +fn batched_same_container_edits_emit_correct_event() { + use std::sync::{Arc, Mutex}; + + let doc = LoroDoc::new(); + let last_text = Arc::new(Mutex::new(String::new())); + let captured = last_text.clone(); + let _sub = doc.subscribe_root(Arc::new(move |e| { + for container in e.events { + if let loro::event::Diff::Text(deltas) = &container.diff { + let mut s = String::new(); + for d in deltas { + if let TextDelta::Insert { insert, .. } = d { + s.push_str(insert); + } + } + *captured.lock().unwrap() = s; + } + } + })); + + let text = doc.get_text("text"); + // Many non-adjacent inserts in one batch => many same-container fragments + // that must be composed correctly into a single event. + let mut seed: u64 = 1; + let mut expected = String::new(); + for i in 0..400 { + let len = text.len_unicode(); + seed = (seed.wrapping_mul(1103515245).wrapping_add(12345)) & 0x7fffffff; + let pos = (seed as usize) % (len + 1); + let ch = (b'a' + (i % 26) as u8) as char; + text.insert(pos, &ch.to_string()).unwrap(); + expected.insert( + expected + .char_indices() + .nth(pos) + .map(|(b, _)| b) + .unwrap_or(expected.len()), + ch, + ); + } + doc.commit(); + + assert_eq!(text.to_string(), expected); + // The event's reconstructed insert content must equal the final document. + assert_eq!(*last_text.lock().unwrap(), expected); +} + +// Regression guard for the O(n^2) text-edit perf bug: `convert_pos` must map +// every coordinate pair correctly via the O(log n) cursor path. We check the +// result against a reference computed directly from the prefix string for a +// mix of ASCII, multi-byte, and surrogate-pair characters. +#[test] +fn convert_pos_matches_prefix_reference_all_coords() { + let content = "AπŸ˜€B汉ñCπŸŽ‰De"; + let doc = LoroDoc::new(); + let text = doc.get_text("text"); + text.insert(0, content).unwrap(); + + let chars: Vec = content.chars().collect(); + let unicode_len = chars.len(); + + // Reference prefix lengths keyed by unicode position. + let mut uni_to_utf16 = Vec::with_capacity(unicode_len + 1); + let mut uni_to_bytes = Vec::with_capacity(unicode_len + 1); + let (mut u16acc, mut byteacc) = (0usize, 0usize); + for i in 0..=unicode_len { + uni_to_utf16.push(u16acc); + uni_to_bytes.push(byteacc); + if i < unicode_len { + u16acc += chars[i].len_utf16(); + byteacc += chars[i].len_utf8(); + } + } + + for u in 0..=unicode_len { + // Unicode -> {Utf16, Bytes, Event} + assert_eq!( + text.convert_pos(u, PosType::Unicode, PosType::Utf16), + Some(uni_to_utf16[u]), + "unicode {u} -> utf16" + ); + assert_eq!( + text.convert_pos(u, PosType::Unicode, PosType::Bytes), + Some(uni_to_bytes[u]), + "unicode {u} -> bytes" + ); + + // Round-trips back to the original coordinate. + let utf16 = uni_to_utf16[u]; + let bytes = uni_to_bytes[u]; + assert_eq!( + text.convert_pos(utf16, PosType::Utf16, PosType::Unicode), + Some(u), + "utf16 {utf16} -> unicode" + ); + assert_eq!( + text.convert_pos(bytes, PosType::Bytes, PosType::Unicode), + Some(u), + "bytes {bytes} -> unicode" + ); + assert_eq!( + text.convert_pos(bytes, PosType::Bytes, PosType::Utf16), + Some(utf16), + "bytes {bytes} -> utf16" + ); + assert_eq!( + text.convert_pos(utf16, PosType::Utf16, PosType::Bytes), + Some(bytes), + "utf16 {utf16} -> bytes" + ); + } + + // Positions inside a surrogate pair / multi-byte char must be rejected by + // the editing API (boundary validation), not silently snapped. + // πŸ˜€ is at unicode index 1; its utf16 span is [1, 3) and byte span is [1, 5). + assert!(text.insert_utf16(2, "x").is_err()); + assert!(text.insert_utf8(2, "x").is_err()); + assert!(text.delete_utf16(2, 1).is_err()); + assert!(text.delete_utf8(2, 1).is_err()); +} + #[test] fn test_slice_delta() { let doc = LoroDoc::new();