diff --git a/Cargo.lock b/Cargo.lock
index e6dd6fe..6fe4d74 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -58,6 +58,12 @@ dependencies = [
"windows-sys",
]
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
[[package]]
name = "bumpalo"
version = "3.20.2"
@@ -250,6 +256,7 @@ dependencies = [
name = "sdocx-cli"
version = "0.4.0"
dependencies = [
+ "base64",
"clap",
"sdocx",
]
diff --git a/crates/sdocx-cli/Cargo.toml b/crates/sdocx-cli/Cargo.toml
index e378be8..5212bbc 100644
--- a/crates/sdocx-cli/Cargo.toml
+++ b/crates/sdocx-cli/Cargo.toml
@@ -9,4 +9,5 @@ repository = "https://github.com/twangodev/sdocx"
[dependencies]
sdocx = { version = "0.4.0", path = "../sdocx" }
-clap = { version = "4.5.60", features = ["derive"] }
\ No newline at end of file
+clap = { version = "4.5.60", features = ["derive"] }
+base64 = "0.22"
diff --git a/crates/sdocx-cli/src/main.rs b/crates/sdocx-cli/src/main.rs
index 7721fa8..a49f18f 100644
--- a/crates/sdocx-cli/src/main.rs
+++ b/crates/sdocx-cli/src/main.rs
@@ -1,10 +1,19 @@
+use base64::Engine as _;
use clap::Parser;
-use sdocx::{Color, Document, Page, Stroke};
+use sdocx::{
+ Color, Document, MediaAsset, Page, PageElement, PageTemplate, PageTemplateSource, RichTextBox,
+ RichTextRun, Stroke,
+};
use std::fmt::Write as FmtWrite;
use std::fs;
use std::path::PathBuf;
-const DEFAULT_INK: &str = "#ffffff";
+// Default ink for uncolored strokes, by canvas: light on dark, dark on light.
+const DEFAULT_INK_DARK_MODE: &str = "#ffffff";
+const DEFAULT_INK_LIGHT_MODE: &str = "#1a1a1a";
+// Fallback canvas when a note carries no background color, matched to the ink.
+const FALLBACK_BG_DARK_MODE: &str = "#252525";
+const FALLBACK_BG_LIGHT_MODE: &str = "#fcfcfc";
// Pressure channel on v4.4.x files can be present but all-zero; treat as absent.
const PRESSURE_PRESENT_EPSILON: f64 = 0.01;
@@ -23,37 +32,33 @@ fn color_hex(c: &Color) -> String {
format!("#{:02x}{:02x}{:02x}", c.r, c.g, c.b)
}
-fn render_page_svg(page: &Page, bg_color: Option<&Color>) -> String {
- let bg = bg_color.map(color_hex).unwrap_or_else(|| "#252525".into());
-
- // Compute content bounding box from all stroke points
- let mut min_x = f64::MAX;
- let mut min_y = f64::MAX;
- let mut max_x = f64::MIN;
- let mut max_y = f64::MIN;
-
- for stroke in &page.strokes {
- for pt in &stroke.points {
- min_x = min_x.min(pt.x);
- min_y = min_y.min(pt.y);
- max_x = max_x.max(pt.x);
- max_y = max_y.max(pt.y);
+fn render_page_svg(
+ page: &Page,
+ fallback_bg_color: Option<&Color>,
+ media_assets: &[MediaAsset],
+ dark_mode: bool,
+) -> String {
+ // Dark-mode notes have light ink, so prefer the document's dark background
+ // over the light page template; otherwise keep the template background.
+ let bg_color = if dark_mode {
+ fallback_bg_color.or(page.background_color.as_ref())
+ } else {
+ page.background_color.as_ref().or(fallback_bg_color)
+ };
+ let bg = bg_color.map(color_hex).unwrap_or_else(|| {
+ if dark_mode {
+ FALLBACK_BG_DARK_MODE
+ } else {
+ FALLBACK_BG_LIGHT_MODE
}
- }
-
- if page.strokes.is_empty() || min_x > max_x {
- return r#""#.into();
- }
-
- let margin = 10.0;
- let vb_x = min_x - margin;
- let vb_y = min_y - margin;
- let vb_w = max_x - min_x + 2.0 * margin;
- let vb_h = max_y - min_y + 2.0 * margin;
-
- let scale = (1200.0 / vb_w).min(1600.0 / vb_h).min(1.0);
- let svg_w = (vb_w * scale) as u32;
- let svg_h = (vb_h * scale) as u32;
+ .into()
+ });
+ let vb_x = 0.0;
+ let vb_y = 0.0;
+ let vb_w = page.width as f64;
+ let vb_h = page.height as f64;
+ let svg_w = page.width;
+ let svg_h = page.height;
let mut svg = String::with_capacity(page.strokes.len() * 256);
@@ -69,15 +74,210 @@ fn render_page_svg(page: &Page, bg_color: Option<&Color>) -> String {
)
.unwrap();
+ let default_ink = if dark_mode {
+ DEFAULT_INK_DARK_MODE
+ } else {
+ DEFAULT_INK_LIGHT_MODE
+ };
for stroke in &page.strokes {
- render_stroke(&mut svg, stroke);
+ render_stroke(&mut svg, stroke, default_ink);
+ }
+ for element in &page.elements {
+ render_element(&mut svg, element, page, media_assets);
}
svg.push_str("\n");
svg
}
-fn render_stroke(svg: &mut String, stroke: &Stroke) {
+fn render_element(
+ svg: &mut String,
+ element: &PageElement,
+ page: &Page,
+ media_assets: &[MediaAsset],
+) {
+ match element {
+ PageElement::Image { bbox, media_index } => {
+ let Some(asset) = media_assets.get(*media_index) else {
+ return;
+ };
+ let encoded = base64::engine::general_purpose::STANDARD.encode(&asset.data);
+ writeln!(
+ svg,
+ r#" "#,
+ bbox.x_min,
+ bbox.y_min,
+ bbox.x_max - bbox.x_min,
+ bbox.y_max - bbox.y_min,
+ asset.mime_type,
+ encoded,
+ )
+ .unwrap();
+ }
+ PageElement::TextBox(text_box) => render_text_box(svg, text_box, page),
+ }
+}
+
+fn render_text_box(svg: &mut String, text_box: &RichTextBox, page: &Page) {
+ let text = text_box.text.trim_end_matches('\n');
+ if text.trim().is_empty() {
+ return;
+ }
+
+ let is_note_body =
+ text_box.bbox.x_max <= text_box.bbox.x_min || text_box.bbox.y_max <= text_box.bbox.y_min;
+ let (x, y, width, height) = if is_note_body {
+ (50.0, 0.0, page.width as f64 - 100.0, page.height as f64)
+ } else {
+ (
+ text_box.bbox.x_min,
+ text_box.bbox.y_min,
+ text_box.bbox.x_max - text_box.bbox.x_min,
+ text_box.bbox.y_max - text_box.bbox.y_min,
+ )
+ };
+ let color = text_box
+ .color
+ .as_ref()
+ .map(color_hex)
+ .unwrap_or_else(|| "#252525".into());
+ let font_size = text_box.font_size.map(samsung_font_to_svg).unwrap_or(37.0);
+ let line_height = font_size * 1.35;
+ let mut transform = String::new();
+ if let Some(rotation) = text_box.rotation_degrees {
+ let cx = x + width / 2.0;
+ let cy = y + height / 2.0;
+ transform = format!(r#" transform="rotate({rotation:.2} {cx:.2} {cy:.2})""#);
+ }
+
+ writeln!(svg, r#" "#).unwrap();
+ if let Some(highlight) = text_box.highlight_color.as_ref() {
+ writeln!(
+ svg,
+ r#" "#,
+ color_hex(highlight),
+ )
+ .unwrap();
+ }
+ for (line_idx, line) in text.lines().enumerate() {
+ if line.is_empty() {
+ continue;
+ }
+ let text_y = y + font_size + line_idx as f64 * line_height;
+ let decoration = if text_box.underline {
+ r#" text-decoration="underline""#
+ } else {
+ ""
+ };
+ let line_start = text
+ .lines()
+ .take(line_idx)
+ .map(|line| line.chars().count() + 1)
+ .sum::();
+ let spans = styled_line_spans(line, line_start, &text_box.runs);
+ write!(
+ svg,
+ r#" "#,
+ )
+ .unwrap();
+ for span in spans {
+ write!(
+ svg,
+ r#"{}"#,
+ if span.bold {
+ r#" font-weight="bold""#
+ } else {
+ ""
+ },
+ if span.italic {
+ r#" font-style="italic""#
+ } else {
+ ""
+ },
+ escape_xml(span.text),
+ )
+ .unwrap();
+ }
+ svg.push_str("\n");
+ }
+ svg.push_str(" \n");
+}
+
+struct StyledSpan<'a> {
+ text: &'a str,
+ bold: bool,
+ italic: bool,
+}
+
+fn styled_line_spans<'a>(
+ line: &'a str,
+ line_start: usize,
+ runs: &[RichTextRun],
+) -> Vec> {
+ let char_count = line.chars().count();
+ let mut boundaries = vec![0, char_count];
+ for run in runs {
+ let start = run.start.saturating_sub(line_start).min(char_count);
+ let end = run.end.saturating_sub(line_start).min(char_count);
+ if start < end {
+ boundaries.push(start);
+ boundaries.push(end);
+ }
+ }
+ boundaries.sort_unstable();
+ boundaries.dedup();
+
+ let byte_offsets = char_byte_offsets(line);
+ let mut spans = Vec::new();
+ for pair in boundaries.windows(2) {
+ let start = pair[0];
+ let end = pair[1];
+ if start == end {
+ continue;
+ }
+ let global_start = line_start + start;
+ let global_end = line_start + end;
+ let mut bold = false;
+ let mut italic = false;
+ for run in runs {
+ if run.start < global_end && run.end > global_start {
+ bold |= run.bold;
+ italic |= run.italic;
+ }
+ }
+ spans.push(StyledSpan {
+ text: &line[byte_offsets[start]..byte_offsets[end]],
+ bold,
+ italic,
+ });
+ }
+ spans
+}
+
+fn char_byte_offsets(text: &str) -> Vec {
+ let mut offsets: Vec = text.char_indices().map(|(offset, _)| offset).collect();
+ offsets.push(text.len());
+ offsets
+}
+
+fn samsung_font_to_svg(size: f32) -> f64 {
+ let size = size as f64;
+ if size.is_finite() && size > 0.0 {
+ (size * 2.18).clamp(8.0, 96.0)
+ } else {
+ 37.0
+ }
+}
+
+fn escape_xml(input: &str) -> String {
+ input
+ .replace('&', "&")
+ .replace('<', "<")
+ .replace('>', ">")
+ .replace('"', """)
+}
+
+fn render_stroke(svg: &mut String, stroke: &Stroke, default_ink: &str) {
if stroke.points.len() < 2 {
return;
}
@@ -86,8 +286,8 @@ fn render_stroke(svg: &mut String, stroke: &Stroke) {
.color
.as_ref()
.map(color_hex)
- .unwrap_or_else(|| DEFAULT_INK.into());
- let base_width = stroke.pen_width as f64 / 2.5;
+ .unwrap_or_else(|| default_ink.into());
+ let base_width = normalized_stroke_width(stroke.pen_width);
let has_pressure = stroke.pressures.len() >= stroke.points.len() - 1
&& stroke
.pressures
@@ -124,12 +324,24 @@ fn render_stroke(svg: &mut String, stroke: &Stroke) {
}
}
+fn normalized_stroke_width(pen_width: f32) -> f64 {
+ let raw_width = pen_width as f64 / 2.5;
+ if raw_width.is_finite() && raw_width > 0.0 {
+ raw_width.clamp(0.4, 12.0)
+ } else {
+ 1.0
+ }
+}
+
fn print_info(doc: &Document) {
if let Some(dims) = doc.metadata.page_dimensions {
eprintln!("Page dimensions: {} x {}", dims.0, dims.1);
}
if let Some(bg) = doc.metadata.background_color {
- eprintln!("Background: #{:02x}{:02x}{:02x}", bg.r, bg.g, bg.b);
+ eprintln!("Document background: #{:02x}{:02x}{:02x}", bg.r, bg.g, bg.b);
+ }
+ if let Some(enabled) = doc.metadata.dark_mode_compatibility {
+ eprintln!("Dark mode compatibility: {enabled}");
}
eprintln!("{} page(s)", doc.pages.len());
for (i, page) in doc.pages.iter().enumerate() {
@@ -141,10 +353,16 @@ fn print_info(doc: &Document) {
.filter(|s| !s.pressures.is_empty())
.count();
eprintln!(
- " Page {}: {} x {}, {} strokes, {} points, {} colors, {} with pressure",
+ " Page {}: {} x {}, background {}, template {}, {} strokes, {} points, {} colors, {} with pressure",
i,
page.width,
page.height,
+ page.background_color
+ .map(|color| format!("#{:02x}{:02x}{:02x}", color.r, color.g, color.b))
+ .unwrap_or_else(|| "none".to_string()),
+ page.template
+ .map(format_template)
+ .unwrap_or_else(|| "none".to_string()),
page.strokes.len(),
total_points,
colors.len(),
@@ -153,6 +371,118 @@ fn print_info(doc: &Document) {
}
}
+fn format_template(template: PageTemplate) -> String {
+ match template.source {
+ PageTemplateSource::BuiltIn => format!("built-in {}", template.id),
+ PageTemplateSource::CustomPdf { page_index } => {
+ format!("custom PDF page {}", page_index + 1)
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{normalized_stroke_width, render_page_svg};
+ use sdocx::{BoundingBox, Color, Page, Point, Stroke};
+
+ #[test]
+ fn normalizes_invalid_stroke_widths() {
+ assert_eq!(normalized_stroke_width(f32::NAN), 1.0);
+ assert_eq!(normalized_stroke_width(f32::INFINITY), 1.0);
+ assert_eq!(normalized_stroke_width(0.0), 1.0);
+ assert_eq!(normalized_stroke_width(-1.0), 1.0);
+ }
+
+ #[test]
+ fn clamps_extreme_stroke_widths() {
+ assert_eq!(normalized_stroke_width(0.1), 0.4);
+ assert_eq!(normalized_stroke_width(10_000.0), 12.0);
+ assert_eq!(normalized_stroke_width(5.0), 2.0);
+ }
+
+ #[test]
+ fn renders_empty_page_with_page_dimensions_and_background() {
+ let page = Page {
+ uuid: "page".into(),
+ width: 1080,
+ height: 1527,
+ content_bbox: BoundingBox::default(),
+ background_color: Some(Color {
+ r: 0xcb,
+ g: 0xda,
+ b: 0xdd,
+ }),
+ template: None,
+ strokes: Vec::new(),
+ elements: Vec::new(),
+ };
+
+ let svg = render_page_svg(&page, None, &[], false);
+
+ assert!(svg.contains(r#"viewBox="0.0 0.0 1080.0 1527.0""#));
+ assert!(svg.contains(r#"width="1080" height="1527""#));
+ assert!(svg.contains(r##"fill="#cbdadd""##));
+ }
+
+ fn page_with_uncolored_stroke() -> Page {
+ Page {
+ uuid: "page".into(),
+ width: 100,
+ height: 100,
+ content_bbox: BoundingBox::default(),
+ background_color: None,
+ template: None,
+ strokes: vec![Stroke {
+ bbox: BoundingBox::default(),
+ points: vec![Point { x: 1.0, y: 1.0 }, Point { x: 9.0, y: 9.0 }],
+ pressures: Vec::new(),
+ timestamps: Vec::new(),
+ tilt_x: Vec::new(),
+ tilt_y: Vec::new(),
+ color: None,
+ pen_width: 2.0,
+ }],
+ elements: Vec::new(),
+ }
+ }
+
+ #[test]
+ fn uncolored_stroke_defaults_to_dark_ink_in_light_mode() {
+ let svg = render_page_svg(&page_with_uncolored_stroke(), None, &[], false);
+ assert!(
+ svg.contains(r##"stroke="#1a1a1a""##),
+ "light-mode default ink"
+ );
+ assert!(!svg.contains(r##"stroke="#ffffff""##));
+ }
+
+ #[test]
+ fn uncolored_stroke_defaults_to_light_ink_in_dark_mode() {
+ let svg = render_page_svg(&page_with_uncolored_stroke(), None, &[], true);
+ assert!(
+ svg.contains(r##"stroke="#ffffff""##),
+ "dark-mode default ink"
+ );
+ assert!(!svg.contains(r##"stroke="#1a1a1a""##));
+ }
+
+ #[test]
+ fn missing_background_falls_back_to_mode_matched_canvas() {
+ // No page or document background: the fallback canvas must match the ink
+ // mode, or dark ink lands on a dark fallback (or vice versa) and vanishes.
+ let light = render_page_svg(&page_with_uncolored_stroke(), None, &[], false);
+ assert!(
+ light.contains(r##"fill="#fcfcfc""##),
+ "light-mode fallback bg"
+ );
+ let dark = render_page_svg(&page_with_uncolored_stroke(), None, &[], true);
+ assert!(
+ dark.contains(r##"fill="#252525""##),
+ "dark-mode fallback bg"
+ );
+ }
+}
+
fn main() {
let cli = Cli::parse();
@@ -169,7 +499,12 @@ fn main() {
let output_base = cli.output.unwrap_or_else(|| cli.path.with_extension("svg"));
if doc.pages.len() == 1 {
- let svg = render_page_svg(&doc.pages[0], doc.metadata.background_color.as_ref());
+ let svg = render_page_svg(
+ &doc.pages[0],
+ doc.metadata.background_color.as_ref(),
+ &doc.metadata.media_assets,
+ doc.metadata.dark_mode_compatibility.unwrap_or(false),
+ );
fs::write(&output_base, &svg).expect("failed to write SVG");
eprintln!("Wrote {} ({} bytes)", output_base.display(), svg.len());
} else {
@@ -183,7 +518,12 @@ fn main() {
.unwrap_or_default()
.to_string_lossy();
let path = output_base.with_file_name(format!("{stem}_page{i}.{ext}"));
- let svg = render_page_svg(page, doc.metadata.background_color.as_ref());
+ let svg = render_page_svg(
+ page,
+ doc.metadata.background_color.as_ref(),
+ &doc.metadata.media_assets,
+ doc.metadata.dark_mode_compatibility.unwrap_or(false),
+ );
fs::write(&path, &svg).expect("failed to write SVG");
eprintln!("Wrote {} ({} bytes)", path.display(), svg.len());
}
diff --git a/crates/sdocx/src/container.rs b/crates/sdocx/src/container.rs
index e7e1f33..4e7b6d8 100644
--- a/crates/sdocx/src/container.rs
+++ b/crates/sdocx/src/container.rs
@@ -2,7 +2,9 @@ use std::io::{Read, Seek};
use crate::error::{Error, Result};
use crate::page::parse_page;
-use crate::types::{Color, Document, DocumentMetadata, Page};
+use crate::types::{
+ BoundingBox, Color, Document, DocumentMetadata, MediaAsset, Page, RichTextBox, RichTextRun,
+};
/// Parse a `.sdocx` ZIP archive from a reader.
pub fn parse_from_reader(reader: R) -> Result {
@@ -17,11 +19,14 @@ pub fn parse_from_reader(reader: R) -> Result {
parse_end_tag(&buf, &mut metadata);
}
+ let mut note_text = None;
+
// Parse note.note (optional)
if let Ok(mut entry) = archive.by_name("note.note") {
let mut buf = Vec::new();
entry.read_to_end(&mut buf)?;
parse_note_note(&buf, &mut metadata);
+ note_text = parse_note_text(&buf);
}
// Parse pageIdInfo.dat (optional)
@@ -48,6 +53,8 @@ pub fn parse_from_reader(reader: R) -> Result {
return Err(Error::Format("no .page files found in archive".into()));
}
+ metadata.media_assets = parse_media_assets(&mut archive)?;
+
let mut pages: Vec = Vec::with_capacity(page_names.len());
for name in &page_names {
let mut entry = archive.by_name(name)?;
@@ -57,6 +64,11 @@ pub fn parse_from_reader(reader: R) -> Result {
pages.push(page);
}
+ if let (Some(page), Some(text)) = (pages.first_mut(), note_text.clone()) {
+ page.elements.push(crate::types::PageElement::TextBox(text));
+ }
+ metadata.note_text = note_text;
+
Ok(Document { pages, metadata })
}
@@ -73,6 +85,11 @@ fn parse_end_tag(data: &[u8], metadata: &mut DocumentMetadata) {
/// Extract background color and page dimensions from `note.note`.
fn parse_note_note(data: &[u8], metadata: &mut DocumentMetadata) {
+ if data.len() >= 0x08 {
+ let flags = u32::from_le_bytes(data[0x04..0x08].try_into().unwrap());
+ metadata.dark_mode_compatibility = Some(flags & 0x0800 != 0);
+ }
+
// Page dimensions at 0x28, 0x2C
if data.len() >= 0x30 {
let w = u32::from_le_bytes(data[0x28..0x2C].try_into().unwrap());
@@ -101,6 +118,187 @@ fn parse_note_note(data: &[u8], metadata: &mut DocumentMetadata) {
}
}
+fn parse_media_assets(archive: &mut zip::ZipArchive) -> Result> {
+ let mut names: Vec = (0..archive.len())
+ .filter_map(|i| {
+ let entry = archive.by_index(i).ok()?;
+ let name = entry.name().to_string();
+ let lower = name.to_ascii_lowercase();
+ if name.starts_with("media/")
+ && (lower.ends_with(".jpg")
+ || lower.ends_with(".jpeg")
+ || lower.ends_with(".png")
+ || lower.ends_with(".webp"))
+ {
+ Some(name)
+ } else {
+ None
+ }
+ })
+ .collect();
+ names.sort_by_key(|name| {
+ name.rsplit('/')
+ .next()
+ .and_then(|file| file.split('@').next())
+ .and_then(|prefix| prefix.parse::().ok())
+ .unwrap_or(usize::MAX)
+ });
+
+ let mut assets = Vec::with_capacity(names.len());
+ for name in names {
+ let mut entry = archive.by_name(&name)?;
+ let mut data = Vec::with_capacity(entry.size() as usize);
+ entry.read_to_end(&mut data)?;
+ let lower = name.to_ascii_lowercase();
+ let mime_type = if lower.ends_with(".png") {
+ "image/png"
+ } else if lower.ends_with(".webp") {
+ "image/webp"
+ } else {
+ "image/jpeg"
+ };
+ assets.push(MediaAsset {
+ name,
+ mime_type: mime_type.to_string(),
+ data,
+ });
+ }
+ Ok(assets)
+}
+
+fn parse_note_text(data: &[u8]) -> Option {
+ let (text, text_end) = first_utf16_text(data)?;
+ let styles = &data[text_end..];
+ let color = tlv_color(styles, 0x01);
+ let font_size = tlv_f32(styles, 0x03);
+ let runs = parse_rich_text_runs(styles, text.chars().count());
+ Some(RichTextBox {
+ // `note.note` stores the typed note body as the default page text layer. The body itself
+ // carries leading blank lines, so the renderer can place it from the page origin.
+ bbox: BoundingBox {
+ x_min: 0.0,
+ y_min: 0.0,
+ x_max: 0.0,
+ y_max: 0.0,
+ },
+ rotation_degrees: None,
+ text,
+ color,
+ highlight_color: None,
+ underline: false,
+ font_size,
+ runs,
+ })
+}
+
+fn first_utf16_text(data: &[u8]) -> Option<(String, usize)> {
+ let mut offset = 0;
+ while offset + 6 <= data.len() {
+ let mut end = offset;
+ let mut units = Vec::new();
+ while end + 2 <= data.len() {
+ let unit = u16::from_le_bytes(data[end..end + 2].try_into().ok()?);
+ let printable = unit == 0x0A || (0x20..=0xD7FF).contains(&unit);
+ if !printable {
+ break;
+ }
+ units.push(unit);
+ end += 2;
+ }
+ let text = String::from_utf16(&units).ok()?;
+ let trimmed = text.trim();
+ if trimmed.chars().filter(|c| !c.is_whitespace()).count() >= 3
+ && looks_like_note_text(trimmed)
+ {
+ return Some((text, end));
+ }
+ offset += 2;
+ }
+ None
+}
+
+fn looks_like_note_text(text: &str) -> bool {
+ let mut total = 0;
+ let mut common = 0;
+ for ch in text.chars() {
+ if ch.is_whitespace() {
+ continue;
+ }
+ total += 1;
+ if ch.is_ascii_alphanumeric() || ch.is_ascii_punctuation() {
+ common += 1;
+ }
+ }
+ total >= 3 && common * 4 >= total * 3
+}
+
+fn tlv_color(data: &[u8], tag: u16) -> Option {
+ let marker = [0x18, 0x00, tag as u8, (tag >> 8) as u8];
+ for offset in 0..data.len().saturating_sub(22) {
+ if data[offset..offset + 4] == marker && data[offset + 21] == 0xFF {
+ return Some(Color {
+ r: data[offset + 20],
+ g: data[offset + 19],
+ b: data[offset + 18],
+ });
+ }
+ }
+ None
+}
+
+fn tlv_f32(data: &[u8], tag: u16) -> Option {
+ let marker = [0x18, 0x00, tag as u8, (tag >> 8) as u8];
+ for offset in 0..data.len().saturating_sub(24) {
+ if data[offset..offset + 4] == marker {
+ for value_offset in [18, 20, 24] {
+ let value = f32::from_le_bytes(
+ data[offset + value_offset..offset + value_offset + 4]
+ .try_into()
+ .ok()?,
+ );
+ if value.is_finite() && (4.0..=96.0).contains(&value) {
+ return Some(value);
+ }
+ }
+ }
+ }
+ None
+}
+
+fn parse_rich_text_runs(data: &[u8], text_len: usize) -> Vec {
+ let mut runs = Vec::new();
+ collect_style_runs(data, text_len, 0x05, true, false, &mut runs);
+ collect_style_runs(data, text_len, 0x06, false, true, &mut runs);
+ runs
+}
+
+fn collect_style_runs(
+ data: &[u8],
+ text_len: usize,
+ tag: u16,
+ bold: bool,
+ italic: bool,
+ runs: &mut Vec,
+) {
+ let marker = [0x18, 0x00, tag as u8, (tag >> 8) as u8];
+ for offset in 0..data.len().saturating_sub(22) {
+ if data[offset..offset + 4] != marker {
+ continue;
+ }
+ let start = u32::from_le_bytes(data[offset + 6..offset + 10].try_into().unwrap()) as usize;
+ let end = u32::from_le_bytes(data[offset + 10..offset + 14].try_into().unwrap()) as usize;
+ let enabled = u32::from_le_bytes(data[offset + 18..offset + 22].try_into().unwrap()) != 0;
+ if enabled && start < end && end <= text_len {
+ runs.push(RichTextRun {
+ start,
+ end,
+ bold,
+ italic,
+ });
+ }
+ }
+}
+
/// Extract page UUIDs from `pageIdInfo.dat`.
fn parse_page_id_info(data: &[u8], metadata: &mut DocumentMetadata) {
if data.len() < 0x24 {
@@ -127,3 +325,25 @@ fn parse_page_id_info(data: &[u8], metadata: &mut DocumentMetadata) {
offset += char_len * 2;
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::parse_note_note;
+ use crate::types::DocumentMetadata;
+
+ #[test]
+ fn parses_dark_mode_compatibility_flag() {
+ let mut metadata = DocumentMetadata::default();
+ let mut data = vec![0; 0x30];
+ data[0x04..0x08].copy_from_slice(&0x0804_u32.to_le_bytes());
+
+ parse_note_note(&data, &mut metadata);
+
+ assert_eq!(metadata.dark_mode_compatibility, Some(true));
+
+ data[0x04..0x08].copy_from_slice(&0x0004_u32.to_le_bytes());
+ parse_note_note(&data, &mut metadata);
+
+ assert_eq!(metadata.dark_mode_compatibility, Some(false));
+ }
+}
diff --git a/crates/sdocx/src/page.rs b/crates/sdocx/src/page.rs
index 06233e5..750efd4 100644
--- a/crates/sdocx/src/page.rs
+++ b/crates/sdocx/src/page.rs
@@ -1,11 +1,19 @@
use crate::decode::{decode_coordinates, decode_trailing};
use crate::error::{Error, Result};
-use crate::types::{BoundingBox, Page, Stroke};
+use crate::types::{
+ BoundingBox, Color, Page, PageElement, PageTemplate, PageTemplateSource, RichTextBox,
+ RichTextRun, Stroke,
+};
const PRE_STROKE_RECORD_LEN: usize = 71;
const STROKE_HEADER_LEN: usize = 89; // bbox(32) + meta(41) + start(16)
const EXTRA_LEN_BIAS: u8 = 0x79; // byte value at record+3 when no extras are present
+struct ParsedStroke {
+ stroke: Stroke,
+ next_record_off: usize,
+}
+
/// Parse a `.page` binary file into a `Page`.
///
/// Layout: `base = u32 @ 0x00`; stroke count at `base + 0x66`; first stroke at `base + 0xB5`.
@@ -40,6 +48,9 @@ pub fn parse_page(data: &[u8]) -> Result {
y_max: f64::from_le_bytes(data[0x98..0xA0].try_into().unwrap()),
};
+ let background_color = page_background_color(data, base);
+ let template = background_color.and_then(|_| page_template(data, base));
+
// Stroke count at base + 0x66
let sc_off = base + 0x66;
if sc_off + 4 > data.len() {
@@ -63,38 +74,91 @@ pub fn parse_page(data: &[u8]) -> Result {
break;
}
- // 1) Bounding box (32 bytes, 4 x f64)
- let bbox = BoundingBox {
- x_min: f64::from_le_bytes(data[off..off + 8].try_into().unwrap()),
- y_min: f64::from_le_bytes(data[off + 8..off + 16].try_into().unwrap()),
- x_max: f64::from_le_bytes(data[off + 16..off + 24].try_into().unwrap()),
- y_max: f64::from_le_bytes(data[off + 24..off + 32].try_into().unwrap()),
+ let current = parse_stroke(data, off, extra_len, StrokeLayout::Current);
+ let shifted = parse_stroke(data, off, extra_len, StrokeLayout::StartPointMinusThree);
+
+ let parsed = match (current, shifted) {
+ (Some(current), Some(shifted))
+ if shifted.stroke.points.len() > current.stroke.points.len() =>
+ {
+ shifted
+ }
+ (Some(current), _) => current,
+ (None, Some(shifted)) => shifted,
+ (None, None) => break,
};
- // 2) Metadata (41 + extra_len bytes): data_len @ 21, n_points @ 39
- let meta_off = off + 32 + extra_len;
- let data_len =
- u32::from_le_bytes(data[meta_off + 21..meta_off + 25].try_into().unwrap()) as usize;
- let n_points =
- u16::from_le_bytes(data[meta_off + 39..meta_off + 41].try_into().unwrap()) as usize;
-
- // 3) Start point (16 bytes, 2 x f64)
- let sp_off = off + 73 + extra_len;
- let start_x = f64::from_le_bytes(data[sp_off..sp_off + 8].try_into().unwrap());
- let start_y = f64::from_le_bytes(data[sp_off + 8..sp_off + 16].try_into().unwrap());
-
- // 4) Delta data
- let data_off = sp_off + 16;
- if data_off + data_len > data.len() {
- break;
- }
- let data_blob = &data[data_off..data_off + data_len];
+ record_off = parsed.next_record_off;
+ strokes.push(parsed.stroke);
+ }
+
+ let elements = parse_page_elements(data, record_off, width, height);
- let (points, n_coord_bytes) =
- decode_coordinates(data_blob, start_x, start_y, n_points.saturating_sub(1));
- let trailing = decode_trailing(data_blob, n_coord_bytes, points.len().saturating_sub(1));
+ Ok(Page {
+ uuid,
+ width,
+ height,
+ content_bbox,
+ background_color,
+ template,
+ strokes,
+ elements,
+ })
+}
- strokes.push(Stroke {
+#[derive(Clone, Copy)]
+enum StrokeLayout {
+ Current,
+ StartPointMinusThree,
+}
+
+fn parse_stroke(
+ data: &[u8],
+ off: usize,
+ extra_len: usize,
+ layout: StrokeLayout,
+) -> Option {
+ let bbox = BoundingBox {
+ x_min: read_f64(data, off)?,
+ y_min: read_f64(data, off + 8)?,
+ x_max: read_f64(data, off + 16)?,
+ y_max: read_f64(data, off + 24)?,
+ };
+
+ let (meta_off, n_points_off, sp_off, next_record_adjust) = match layout {
+ StrokeLayout::Current => (off + 32 + extra_len, 39, off + 73 + extra_len, 0),
+ StrokeLayout::StartPointMinusThree => (off + 32, 36, off + 70, 3),
+ };
+
+ let data_len = read_u32(data, meta_off + 21)? as usize;
+ let n_points = read_u16(data, meta_off + n_points_off)? as usize;
+ let start_x = read_f64(data, sp_off)?;
+ let start_y = read_f64(data, sp_off + 8)?;
+ if !start_x.is_finite() || !start_y.is_finite() || n_points == 0 {
+ return None;
+ }
+
+ let data_off = sp_off + 16;
+ let data_end = data_off.checked_add(data_len)?;
+ if data_end > data.len() {
+ return None;
+ }
+ let data_blob = &data[data_off..data_end];
+
+ let (points, n_coord_bytes) =
+ decode_coordinates(data_blob, start_x, start_y, n_points.saturating_sub(1));
+ if points.is_empty()
+ || points
+ .iter()
+ .any(|point| !point.x.is_finite() || !point.y.is_finite())
+ {
+ return None;
+ }
+
+ let trailing = decode_trailing(data_blob, n_coord_bytes, points.len().saturating_sub(1));
+
+ Some(ParsedStroke {
+ stroke: Stroke {
bbox,
points,
pressures: trailing.pressures,
@@ -103,17 +167,443 @@ pub fn parse_page(data: &[u8]) -> Result {
tilt_y: trailing.tilt_y,
color: trailing.color,
pen_width: trailing.pen_width,
- });
+ },
+ next_record_off: data_end + next_record_adjust,
+ })
+}
+
+fn read_u16(data: &[u8], offset: usize) -> Option {
+ Some(u16::from_le_bytes(
+ data.get(offset..offset + 2)?.try_into().ok()?,
+ ))
+}
+
+fn read_u32(data: &[u8], offset: usize) -> Option {
+ Some(u32::from_le_bytes(
+ data.get(offset..offset + 4)?.try_into().ok()?,
+ ))
+}
+
+fn read_f64(data: &[u8], offset: usize) -> Option {
+ Some(f64::from_le_bytes(
+ data.get(offset..offset + 8)?.try_into().ok()?,
+ ))
+}
- // Next pre-stroke record starts immediately after this stroke's delta data.
- record_off = data_off + data_len;
+fn page_background_color(data: &[u8], base: usize) -> Option {
+ let offset = match base {
+ 0x90 => 0x84,
+ 0xA6 => 0x80,
+ _ => 0xA4,
+ };
+ if data.len() >= offset + 4 && data[offset + 3] == 0xFF {
+ Some(crate::types::Color {
+ r: data[offset + 2],
+ g: data[offset + 1],
+ b: data[offset],
+ })
+ } else {
+ None
}
+}
- Ok(Page {
- uuid,
- width,
- height,
- content_bbox,
- strokes,
+fn page_template(data: &[u8], base: usize) -> Option {
+ match base {
+ // Short built-in template page records store the template id in the compact header.
+ 0x90 => {
+ let id = read_u32(data, 0x8C)?;
+ is_builtin_template_id(id).then_some(PageTemplate {
+ id,
+ source: PageTemplateSource::BuiltIn,
+ })
+ }
+ // Custom downloaded templates are backed by media PDFs. The compact header stores the
+ // zero-based PDF page index in the high 16 bits of this field.
+ 0xA6 => {
+ let page_index = read_u32(data, 0x8C)? >> 16;
+ Some(PageTemplate {
+ id: page_index,
+ source: PageTemplateSource::CustomPdf { page_index },
+ })
+ }
+ _ => {
+ let id = if base >= 0xE7 {
+ read_u32(data, 0xAC)?
+ } else {
+ read_u32(data, 0xB4)?
+ };
+ is_builtin_template_id(id).then_some(PageTemplate {
+ id,
+ source: PageTemplateSource::BuiltIn,
+ })
+ }
+ }
+}
+
+fn is_builtin_template_id(id: u32) -> bool {
+ id != 0 && id <= 0xFFFF
+}
+
+fn parse_page_elements(data: &[u8], start: usize, width: u32, height: u32) -> Vec {
+ let mut elements = Vec::new();
+ let mut image_count = 0;
+
+ for uuid_off in find_ascii_uuid_offsets(data, start) {
+ let Some(bbox) = find_object_bbox(data, uuid_off, width, height) else {
+ continue;
+ };
+
+ let next_uuid = find_ascii_uuid_offsets(data, uuid_off + 36)
+ .into_iter()
+ .next()
+ .unwrap_or(data.len());
+ let record = &data[uuid_off..next_uuid];
+
+ if let Some(text_box) = parse_text_box_record(record, bbox) {
+ elements.push(PageElement::TextBox(text_box));
+ } else if looks_like_image_record(record) {
+ elements.push(PageElement::Image {
+ bbox,
+ media_index: image_count,
+ });
+ image_count += 1;
+ }
+ }
+
+ elements
+}
+
+fn find_ascii_uuid_offsets(data: &[u8], start: usize) -> Vec {
+ let mut offsets = Vec::new();
+ let mut offset = start;
+ while offset + 36 <= data.len() {
+ if is_ascii_uuid(&data[offset..offset + 36]) {
+ offsets.push(offset);
+ offset += 36;
+ } else {
+ offset += 1;
+ }
+ }
+ offsets
+}
+
+fn is_ascii_uuid(bytes: &[u8]) -> bool {
+ bytes.len() == 36
+ && bytes.iter().enumerate().all(|(i, &b)| match i {
+ 8 | 13 | 18 | 23 => b == b'-',
+ _ => b.is_ascii_hexdigit(),
+ })
+}
+
+fn find_object_bbox(data: &[u8], uuid_off: usize, width: u32, height: u32) -> Option {
+ let search_end = (uuid_off + 128).min(data.len().saturating_sub(32));
+ for offset in uuid_off + 36..=search_end {
+ let bbox = BoundingBox {
+ x_min: read_f64(data, offset)?,
+ y_min: read_f64(data, offset + 8)?,
+ x_max: read_f64(data, offset + 16)?,
+ y_max: read_f64(data, offset + 24)?,
+ };
+ if plausible_bbox(bbox, width, height) {
+ return Some(bbox);
+ }
+ }
+
+ None
+}
+
+fn plausible_bbox(bbox: BoundingBox, width: u32, height: u32) -> bool {
+ bbox.x_min.is_finite()
+ && bbox.y_min.is_finite()
+ && bbox.x_max.is_finite()
+ && bbox.y_max.is_finite()
+ && bbox.x_min >= 1.0
+ && bbox.y_min >= 1.0
+ && bbox.x_max > bbox.x_min
+ && bbox.y_max > bbox.y_min
+ && bbox.x_max <= width as f64 * 1.25
+ && bbox.y_max <= height as f64 * 1.25
+ && bbox.x_max - bbox.x_min > 8.0
+ && bbox.y_max - bbox.y_min > 8.0
+}
+
+fn looks_like_image_record(record: &[u8]) -> bool {
+ record.windows(4).any(|window| window == b"Re")
+ || record
+ .windows(4)
+ .any(|window| window == b"\x01\x00\x04\x20")
+}
+
+fn parse_text_box_record(record: &[u8], bbox: BoundingBox) -> Option {
+ let (text, text_end) = first_utf16_text(record)?;
+ let styles = &record[text_end..];
+ let color = tlv_color(styles, 0x01);
+ let highlight_color = tlv_color(styles, 0x11);
+ let underline = tlv_u32(styles, 0x07).is_some_and(|value| value != 0)
+ || tlv_u32(styles, 0x06).is_some_and(|value| value != 0);
+ let font_size = tlv_f32(styles, 0x03);
+ let rotation_degrees = infer_rotation_degrees(record, bbox);
+ let runs = parse_rich_text_runs(styles, text.chars().count());
+
+ Some(RichTextBox {
+ bbox,
+ rotation_degrees,
+ text,
+ color,
+ highlight_color,
+ underline,
+ font_size,
+ runs,
})
}
+
+fn first_utf16_text(data: &[u8]) -> Option<(String, usize)> {
+ let mut offset = 0;
+ while offset + 6 <= data.len() {
+ let mut end = offset;
+ let mut units = Vec::new();
+ while end + 2 <= data.len() {
+ let unit = u16::from_le_bytes(data[end..end + 2].try_into().ok()?);
+ let printable = unit == 0x0A || (0x20..=0xD7FF).contains(&unit);
+ if !printable {
+ break;
+ }
+ units.push(unit);
+ end += 2;
+ }
+ let text = String::from_utf16(&units).ok()?;
+ let trimmed = text.trim();
+ if trimmed.chars().filter(|c| !c.is_whitespace()).count() >= 3
+ && looks_like_note_text(trimmed)
+ {
+ return Some((text, end));
+ }
+ offset += 2;
+ }
+ None
+}
+
+fn looks_like_note_text(text: &str) -> bool {
+ let mut total = 0;
+ let mut common = 0;
+ for ch in text.chars() {
+ if ch.is_whitespace() {
+ continue;
+ }
+ total += 1;
+ if ch.is_ascii_alphanumeric() || ch.is_ascii_punctuation() {
+ common += 1;
+ }
+ }
+ total >= 3 && common * 4 >= total * 3
+}
+
+fn tlv_color(data: &[u8], tag: u16) -> Option {
+ let marker = [0x18, 0x00, tag as u8, (tag >> 8) as u8];
+ for offset in 0..data.len().saturating_sub(22) {
+ if data[offset..offset + 4] == marker && data[offset + 21] == 0xFF {
+ return Some(Color {
+ r: data[offset + 20],
+ g: data[offset + 19],
+ b: data[offset + 18],
+ });
+ }
+ }
+ None
+}
+
+fn tlv_u32(data: &[u8], tag: u16) -> Option {
+ let marker = [0x18, 0x00, tag as u8, (tag >> 8) as u8];
+ for offset in 0..data.len().saturating_sub(22) {
+ if data[offset..offset + 4] == marker {
+ return read_u32(data, offset + 18);
+ }
+ }
+ None
+}
+
+fn tlv_f32(data: &[u8], tag: u16) -> Option {
+ let marker = [0x18, 0x00, tag as u8, (tag >> 8) as u8];
+ for offset in 0..data.len().saturating_sub(24) {
+ if data[offset..offset + 4] == marker {
+ for value_offset in [18, 20, 24] {
+ let value = f32::from_le_bytes(
+ data[offset + value_offset..offset + value_offset + 4]
+ .try_into()
+ .ok()?,
+ );
+ if value.is_finite() && (4.0..=96.0).contains(&value) {
+ return Some(value);
+ }
+ }
+ }
+ }
+ None
+}
+
+fn parse_rich_text_runs(data: &[u8], text_len: usize) -> Vec {
+ let mut runs = Vec::new();
+ collect_style_runs(data, text_len, 0x05, true, false, &mut runs);
+ collect_style_runs(data, text_len, 0x06, false, true, &mut runs);
+ runs
+}
+
+fn collect_style_runs(
+ data: &[u8],
+ text_len: usize,
+ tag: u16,
+ bold: bool,
+ italic: bool,
+ runs: &mut Vec,
+) {
+ let marker = [0x18, 0x00, tag as u8, (tag >> 8) as u8];
+ for offset in 0..data.len().saturating_sub(18) {
+ if data[offset..offset + 4] != marker {
+ continue;
+ }
+ let Some(start) = read_u32(data, offset + 6).map(|value| value as usize) else {
+ continue;
+ };
+ let Some(end) = read_u32(data, offset + 10).map(|value| value as usize) else {
+ continue;
+ };
+ let enabled = read_u32(data, offset + 18).is_some_and(|value| value != 0);
+ if enabled && start < end && end <= text_len {
+ runs.push(RichTextRun {
+ start,
+ end,
+ bold,
+ italic,
+ });
+ }
+ }
+}
+
+fn infer_rotation_degrees(record: &[u8], bbox: BoundingBox) -> Option {
+ let mut points = Vec::new();
+ for offset in 0..record.len().saturating_sub(16) {
+ let x = read_f64(record, offset)?;
+ let y = read_f64(record, offset + 8)?;
+ if x.is_finite()
+ && y.is_finite()
+ && x >= bbox.x_min - bbox.x_max
+ && x <= bbox.x_max + bbox.x_max
+ && y >= bbox.y_min - bbox.y_max
+ && y <= bbox.y_max + bbox.y_max
+ {
+ points.push((x, y));
+ }
+ }
+ for pair in points.windows(2) {
+ let dx = pair[1].0 - pair[0].0;
+ let dy = pair[1].1 - pair[0].1;
+ let distance = dx.hypot(dy);
+ if distance > 40.0 {
+ let degrees = dy.atan2(dx).to_degrees();
+ if degrees.abs() > 5.0 && degrees.abs() < 85.0 {
+ return Some(degrees);
+ }
+ }
+ }
+ None
+}
+
+#[cfg(test)]
+mod tests {
+ use super::parse_page;
+ use crate::types::{Color, PageTemplate, PageTemplateSource};
+
+ #[test]
+ fn parses_page_header_background_color() {
+ let mut data = vec![0; 0xA8];
+ data[0x16..0x1A].copy_from_slice(&1080_u32.to_le_bytes());
+ data[0x1A..0x1E].copy_from_slice(&1527_u32.to_le_bytes());
+ data[0xA4..0xA8].copy_from_slice(&[0xDD, 0xDD, 0xF5, 0xFF]);
+
+ let page = parse_page(&data).unwrap();
+
+ assert_eq!(
+ page.background_color,
+ Some(Color {
+ r: 0xF5,
+ g: 0xDD,
+ b: 0xDD,
+ })
+ );
+ }
+
+ #[test]
+ fn parses_page_template_id() {
+ let mut data = vec![0; 0x200];
+ data[0x00..0x04].copy_from_slice(&0xE7_u32.to_le_bytes());
+ data[0x16..0x1A].copy_from_slice(&1080_u32.to_le_bytes());
+ data[0x1A..0x1E].copy_from_slice(&1527_u32.to_le_bytes());
+ data[0xA4..0xA8].copy_from_slice(&[0xDD, 0xDA, 0xCB, 0xFF]);
+ data[0xAC..0xB0].copy_from_slice(&1_u32.to_le_bytes());
+
+ let page = parse_page(&data).unwrap();
+
+ assert_eq!(
+ page.template,
+ Some(PageTemplate {
+ id: 1,
+ source: PageTemplateSource::BuiltIn,
+ })
+ );
+ }
+
+ #[test]
+ fn parses_short_page_template_id() {
+ let mut data = vec![0; 0x200];
+ data[0x00..0x04].copy_from_slice(&0x90_u32.to_le_bytes());
+ data[0x16..0x1A].copy_from_slice(&1080_u32.to_le_bytes());
+ data[0x1A..0x1E].copy_from_slice(&1527_u32.to_le_bytes());
+ data[0x84..0x88].copy_from_slice(&[0xDD, 0xDA, 0xCB, 0xFF]);
+ data[0x8C..0x90].copy_from_slice(&10_u32.to_le_bytes());
+
+ let page = parse_page(&data).unwrap();
+
+ assert_eq!(
+ page.template,
+ Some(PageTemplate {
+ id: 10,
+ source: PageTemplateSource::BuiltIn,
+ })
+ );
+ }
+
+ #[test]
+ fn parses_custom_pdf_page_template() {
+ let mut data = vec![0; 0x200];
+ data[0x00..0x04].copy_from_slice(&0xA6_u32.to_le_bytes());
+ data[0x16..0x1A].copy_from_slice(&1080_u32.to_le_bytes());
+ data[0x1A..0x1E].copy_from_slice(&1528_u32.to_le_bytes());
+ data[0x80..0x84].copy_from_slice(&[0xDD, 0xDA, 0xCB, 0xFF]);
+ data[0x8C..0x90].copy_from_slice(&(3_u32 << 16).to_le_bytes());
+
+ let page = parse_page(&data).unwrap();
+
+ assert_eq!(
+ page.template,
+ Some(PageTemplate {
+ id: 3,
+ source: PageTemplateSource::CustomPdf { page_index: 3 },
+ })
+ );
+ }
+
+ #[test]
+ fn parses_older_page_template_id_offset_as_absent_when_zero() {
+ let mut data = vec![0; 0x200];
+ data[0x00..0x04].copy_from_slice(&0xE3_u32.to_le_bytes());
+ data[0x16..0x1A].copy_from_slice(&1080_u32.to_le_bytes());
+ data[0x1A..0x1E].copy_from_slice(&1527_u32.to_le_bytes());
+ data[0xA4..0xA8].copy_from_slice(&[0xFC, 0xFC, 0xFC, 0xFF]);
+ data[0xAC..0xB0].copy_from_slice(&1_u32.to_le_bytes());
+ data[0xB4..0xB8].copy_from_slice(&0_u32.to_le_bytes());
+
+ let page = parse_page(&data).unwrap();
+
+ assert_eq!(page.template, None);
+ }
+}
diff --git a/crates/sdocx/src/types.rs b/crates/sdocx/src/types.rs
index 54724ab..b4f09d0 100644
--- a/crates/sdocx/src/types.rs
+++ b/crates/sdocx/src/types.rs
@@ -18,10 +18,16 @@ pub struct DocumentMetadata {
pub modified_ms: Option,
/// Background color of the document.
pub background_color: Option,
+ /// Whether Samsung Notes dark-mode compatibility is enabled.
+ pub dark_mode_compatibility: Option,
/// Default page dimensions as `(width, height)` in pixels.
pub page_dimensions: Option<(u32, u32)>,
/// Ordered list of page UUIDs.
pub page_ids: Vec,
+ /// Embedded media assets from the archive.
+ pub media_assets: Vec,
+ /// Top-level typed note text from `note.note`, if present.
+ pub note_text: Option,
}
/// A single page within a document.
@@ -36,8 +42,100 @@ pub struct Page {
pub height: u32,
/// Bounding box enclosing all stroke content.
pub content_bbox: BoundingBox,
+ /// Page background color, if present in the page header.
+ pub background_color: Option,
+ /// Page template metadata, if present in the page header.
+ pub template: Option,
/// The strokes drawn on this page.
pub strokes: Vec,
+ /// Non-stroke page objects parsed from the page stream.
+ pub elements: Vec,
+}
+
+/// An embedded media asset.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct MediaAsset {
+ /// Archive path.
+ pub name: String,
+ /// MIME type, when recognized.
+ pub mime_type: String,
+ /// Raw media bytes.
+ pub data: Vec,
+}
+
+/// A non-stroke page element.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum PageElement {
+ /// A placed image object.
+ Image {
+ /// Placement box in page coordinates.
+ bbox: BoundingBox,
+ /// Index into `DocumentMetadata::media_assets`.
+ media_index: usize,
+ },
+ /// A rich text object.
+ TextBox(RichTextBox),
+}
+
+/// Parsed rich text box data.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct RichTextBox {
+ /// Placement box in page coordinates.
+ pub bbox: BoundingBox,
+ /// Clockwise rotation in degrees, if present.
+ pub rotation_degrees: Option,
+ /// Full text content.
+ pub text: String,
+ /// Text foreground color.
+ pub color: Option,
+ /// Text highlight/fill color.
+ pub highlight_color: Option,
+ /// Whether underline styling is present.
+ pub underline: bool,
+ /// Font size in Samsung Notes logical units, when present.
+ pub font_size: Option,
+ /// Style runs using character indexes into `text`.
+ pub runs: Vec,
+}
+
+/// A rich text style run.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct RichTextRun {
+ /// Start character index, inclusive.
+ pub start: usize,
+ /// End character index, exclusive.
+ pub end: usize,
+ /// Whether the run is bold.
+ pub bold: bool,
+ /// Whether the run is italic.
+ pub italic: bool,
+}
+
+/// Page template metadata.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct PageTemplate {
+ /// Raw Samsung Notes template identifier.
+ pub id: u32,
+ /// Template backing source.
+ pub source: PageTemplateSource,
+}
+
+/// Page template backing source.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub enum PageTemplateSource {
+ /// Built-in Samsung Notes page template.
+ BuiltIn,
+ /// Custom PDF-backed page template.
+ CustomPdf {
+ /// Zero-based PDF page index used as the template.
+ page_index: u32,
+ },
}
/// A single pen stroke consisting of points and associated data.