diff --git a/README.md b/README.md index bc9dba9..438a2c5 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,7 @@ xml-disassembler parse | `--ignore-path ` | Path to the ignore file | .xmldisassemblerignore | | `--format ` | Output format: xml, json, json5, yaml | xml | | `--strategy ` | unique-id or grouped-by-tag | unique-id | +| `-p`, `--split-tags ` | With grouped-by-tag: split or group nested tags into subdirs (e.g. `objectPermissions:split:object,fieldPermissions:group:field`) | (none) | | `--multi-level ` | Further disassemble matching files: `file_pattern:root_to_strip:unique_id_elements` | (none) | #### Reassemble options @@ -170,6 +171,21 @@ xml-disassembler disassemble ./my.xml --strategy grouped-by-tag --format yaml Reassembly preserves element content and structure. +#### Split tags (`-p` / `--split-tags`) + +With `--strategy grouped-by-tag`, you can optionally **split** or **group** specific nested tags into subdirectories instead of a single file per tag. Useful for permission sets and similar metadata: e.g. one file per `objectPermissions` under `objectPermissions/`, and `fieldPermissions` grouped by object under `fieldPermissions/`. + +Spec: comma-separated rules. Each rule is `tag:mode:field` or `tag:path:mode:field` (path defaults to tag). **mode** is `split` (one file per array item, filename from `field`) or `group` (group array items by `field`, one file per group). + +```bash +# Permission set: objectPermissions → one file per object; fieldPermissions → one file per field value +xml-disassembler disassemble fixtures/split-tags/HR_Admin.permissionset-meta.xml \ + --strategy grouped-by-tag \ + -p "objectPermissions:split:object,fieldPermissions:group:field" +``` + +Creates `HR_Admin/` with e.g. `objectPermissions/Job_Request__c.objectPermissions-meta.xml`, `objectPermissions/Account.objectPermissions-meta.xml`, `fieldPermissions/.fieldPermissions-meta.xml`, plus the main `HR_Admin.permissionset-meta.xml` with the rest. Reassembly requires no changes: the existing reassemble command merges subdirs and files back into one XML. + ### Multi-level disassembly For advanced use cases (e.g. Salesforce Loyalty Program Setup metadata), you can further disassemble specific output files by stripping a root element and re-running disassembly with different unique-id elements. diff --git a/fixtures/split-tags/HR_Admin.permissionset-meta.xml b/fixtures/split-tags/HR_Admin.permissionset-meta.xml new file mode 100644 index 0000000..610cc57 --- /dev/null +++ b/fixtures/split-tags/HR_Admin.permissionset-meta.xml @@ -0,0 +1,68 @@ + + + + JobApps__Recruiting + true + + + Send_Email_Confirmation + true + + + true + Account.Name + true + + + true + Account.Type + true + + + true + Job_Request__c.SalaryPay__c + true + + + true + Job_Request__c.Salary__c + true + + Grants all rights needed for an HR administrator to manage employees. + + Salesforce + + true + true + false + true + true + false + Account + + + true + true + true + true + true + true + Job_Request__c + + + Job_Request_Web_Form + true + + + Recruiting.DevManager + true + + + Job_Request__c + Available + + + true + APIEnabled + + \ No newline at end of file diff --git a/src/builders/build_disassembled_files.rs b/src/builders/build_disassembled_files.rs index 994a638..ed487c4 100644 --- a/src/builders/build_disassembled_files.rs +++ b/src/builders/build_disassembled_files.rs @@ -4,8 +4,11 @@ use crate::builders::{build_disassembled_file, extract_root_attributes}; use crate::parsers::{ extract_xml_declaration_from_raw, extract_xmlns_from_raw, parse_element_unified, }; -use crate::types::{BuildDisassembledFilesOptions, XmlElementArrayMap, XmlElementParams}; +use crate::types::{ + BuildDisassembledFilesOptions, DecomposeRule, XmlElementArrayMap, XmlElementParams, +}; use serde_json::{Map, Value}; +use std::collections::HashMap; use tokio::fs; const BATCH_SIZE: usize = 20; @@ -108,6 +111,39 @@ async fn disassemble_element_keys( (leaf_content, nested_groups, leaf_count, has_nested_elements) } +/// Extract string from an element's field - handles direct strings and objects with #text (XML leaf elements). +fn get_field_value(element: &Value, field: &str) -> Option { + let obj = element.as_object()?; + let v = obj.get(field)?; + if let Some(s) = v.as_str() { + return Some(s.to_string()); + } + if let Some(child) = v.as_object() { + if let Some(text) = child.get("#text").and_then(|t| t.as_str()) { + return Some(text.to_string()); + } + } + None +} + +/// For group mode: use the segment before the first '.' as key when present (e.g. "Account.Name" -> "Account"). +fn group_key_from_field_value(s: &str) -> &str { + s.find('.').map(|i| &s[..i]).unwrap_or(s) +} + +/// Sanitize a string for use as a filename (no path separators or invalid chars). +fn sanitize_filename(s: &str) -> String { + s.chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' { + c + } else { + '_' + } + }) + .collect() +} + async fn write_nested_groups( nested_groups: &XmlElementArrayMap, strategy: &str, @@ -116,30 +152,113 @@ async fn write_nested_groups( if strategy != "grouped-by-tag" { return; } + let decompose_by_tag: HashMap<&str, &DecomposeRule> = options + .decompose_rules + .map(|rules| rules.iter().map(|r| (r.tag.as_str(), r)).collect()) + .unwrap_or_default(); + for (tag, arr) in nested_groups { - let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions { - content: Value::Array(arr.clone()), - disassembled_path: options.disassembled_path, - output_file_name: Some(&format!("{}.{}", tag, options.format)), - subdirectory: None, - wrap_key: Some(tag), - is_grouped_array: true, - root_element_name: options.root_element_name, - root_attributes: options.root_attributes.clone(), - format: options.format, - xml_declaration: options.xml_declaration.clone(), - unique_id_elements: None, - }) - .await; + let rule = decompose_by_tag.get(tag.as_str()); + let path_segment = rule + .map(|r| { + if r.path_segment.is_empty() { + &r.tag + } else { + &r.path_segment + } + }) + .unwrap_or(tag); + + if let Some(r) = rule { + if r.mode == "split" { + for (idx, item) in arr.iter().enumerate() { + let name = get_field_value(item, &r.field) + .as_deref() + .map(sanitize_filename) + .filter(|s: &String| !s.is_empty()) + .unwrap_or_else(|| idx.to_string()); + let file_name = format!("{}.{}-meta.{}", name, tag, options.format); + let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions { + content: item.clone(), + disassembled_path: options.disassembled_path, + output_file_name: Some(&file_name), + subdirectory: Some(path_segment), + wrap_key: Some(tag), + is_grouped_array: false, + root_element_name: options.root_element_name, + root_attributes: options.root_attributes.clone(), + format: options.format, + xml_declaration: options.xml_declaration.clone(), + unique_id_elements: None, + }) + .await; + } + } else if r.mode == "group" { + let mut by_key: HashMap> = HashMap::new(); + for item in arr { + let key = get_field_value(item, &r.field) + .as_deref() + .map(group_key_from_field_value) + .map(sanitize_filename) + .filter(|s: &String| !s.is_empty()) + .unwrap_or_else(|| "unknown".to_string()); + by_key.entry(key).or_default().push(item.clone()); + } + for (key, group) in by_key { + let file_name = format!("{}.{}-meta.{}", key, tag, options.format); + let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions { + content: Value::Array(group), + disassembled_path: options.disassembled_path, + output_file_name: Some(&file_name), + subdirectory: Some(path_segment), + wrap_key: Some(tag), + is_grouped_array: true, + root_element_name: options.root_element_name, + root_attributes: options.root_attributes.clone(), + format: options.format, + xml_declaration: options.xml_declaration.clone(), + unique_id_elements: None, + }) + .await; + } + } else { + fallback_write_one_file(tag, arr, path_segment, options).await; + } + } else { + fallback_write_one_file(tag, arr, path_segment, options).await; + } } } +async fn fallback_write_one_file( + tag: &str, + arr: &[Value], + _path_segment: &str, + options: &WriteNestedOptions<'_>, +) { + let _ = build_disassembled_file(crate::types::BuildDisassembledFileOptions { + content: Value::Array(arr.to_vec()), + disassembled_path: options.disassembled_path, + output_file_name: Some(&format!("{}.{}", tag, options.format)), + subdirectory: None, + wrap_key: Some(tag), + is_grouped_array: true, + root_element_name: options.root_element_name, + root_attributes: options.root_attributes.clone(), + format: options.format, + xml_declaration: options.xml_declaration.clone(), + unique_id_elements: None, + }) + .await; +} + struct WriteNestedOptions<'a> { disassembled_path: &'a str, root_element_name: &'a str, root_attributes: Value, xml_declaration: Option, format: &'a str, + decompose_rules: Option<&'a [DecomposeRule]>, } pub async fn build_disassembled_files_unified( @@ -153,6 +272,7 @@ pub async fn build_disassembled_files_unified( format, unique_id_elements, strategy, + decompose_rules, } = options; let xml_content = match fs::read_to_string(file_path).await { @@ -215,6 +335,7 @@ pub async fn build_disassembled_files_unified( root_attributes: root_attributes.clone(), xml_declaration: xml_declaration.clone(), format, + decompose_rules, }; write_nested_groups(&nested_groups, strategy, &write_opts).await; diff --git a/src/handlers/disassemble.rs b/src/handlers/disassemble.rs index 98a0b1d..654fafb 100644 --- a/src/handlers/disassemble.rs +++ b/src/handlers/disassemble.rs @@ -6,7 +6,7 @@ use crate::multi_level::{ strip_root_and_build_xml, }; use crate::parsers::parse_xml; -use crate::types::{BuildDisassembledFilesOptions, MultiLevelRule}; +use crate::types::{BuildDisassembledFilesOptions, DecomposeRule, MultiLevelRule}; use ignore::gitignore::GitignoreBuilder; use std::path::Path; use tokio::fs; @@ -62,6 +62,7 @@ impl DisassembleXmlFileHandler { ignore_path: &str, format: &str, multi_level_rule: Option<&MultiLevelRule>, + decompose_rules: Option<&[DecomposeRule]>, ) -> Result<(), Box> { let strategy = strategy.unwrap_or("unique-id"); let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) { @@ -92,6 +93,7 @@ impl DisassembleXmlFileHandler { post_purge, format, multi_level_rule, + decompose_rules, ) .await?; } else if meta.is_dir() { @@ -103,6 +105,7 @@ impl DisassembleXmlFileHandler { post_purge, format, multi_level_rule, + decompose_rules, ) .await?; } @@ -121,6 +124,7 @@ impl DisassembleXmlFileHandler { post_purge: bool, format: &str, multi_level_rule: Option<&MultiLevelRule>, + decompose_rules: Option<&[DecomposeRule]>, ) -> Result<(), Box> { let resolved = Path::new(file_path) .canonicalize() @@ -150,6 +154,7 @@ impl DisassembleXmlFileHandler { post_purge, format, multi_level_rule, + decompose_rules, ) .await } @@ -164,6 +169,7 @@ impl DisassembleXmlFileHandler { post_purge: bool, format: &str, multi_level_rule: Option<&MultiLevelRule>, + decompose_rules: Option<&[DecomposeRule]>, ) -> Result<(), Box> { let mut entries = fs::read_dir(dir_path).await?; let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf()); @@ -190,6 +196,7 @@ impl DisassembleXmlFileHandler { post_purge, format, multi_level_rule, + decompose_rules, ) .await?; } @@ -209,6 +216,7 @@ impl DisassembleXmlFileHandler { post_purge: bool, format: &str, multi_level_rule: Option<&MultiLevelRule>, + decompose_rules: Option<&[DecomposeRule]>, ) -> Result<(), Box> { log::debug!("Parsing file to disassemble: {}", file_path); @@ -231,6 +239,7 @@ impl DisassembleXmlFileHandler { format, unique_id_elements, strategy, + decompose_rules, }) .await?; @@ -323,6 +332,7 @@ impl DisassembleXmlFileHandler { format, unique_id_elements: Some(&rule.unique_id_elements), strategy: "unique-id", + decompose_rules: None, }) .await?; diff --git a/src/lib.rs b/src/lib.rs index ee3d416..178d03d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,4 +17,4 @@ pub use multi_level::{ }; pub use parsers::parse_xml; pub use transformers::{transform_to_json, transform_to_json5, transform_to_yaml}; -pub use types::{MultiLevelConfig, MultiLevelRule, XmlElement}; +pub use types::{DecomposeRule, MultiLevelConfig, MultiLevelRule, XmlElement}; diff --git a/src/main.rs b/src/main.rs index ffa2c49..6da67ad 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use std::env; use xml_disassembler::{ - build_xml_string, parse_xml, DisassembleXmlFileHandler, MultiLevelRule, + build_xml_string, parse_xml, DecomposeRule, DisassembleXmlFileHandler, MultiLevelRule, ReassembleXmlFileHandler, }; @@ -16,6 +16,44 @@ struct DisassembleOpts<'a> { format: &'a str, strategy: Option<&'a str>, multi_level: Option, + split_tags: Option, +} + +/// Parse --split-tags spec for grouped-by-tag. Comma-separated rules; each rule: +/// tag:mode:field (path_segment defaults to tag) or tag:path:mode:field. +/// mode = "split" (one file per item, filename from field) or "group" (group by field). +/// e.g. "objectPermissions:split:object,fieldPermissions:group:object" +fn parse_decompose_spec(spec: &str) -> Vec { + let mut rules = Vec::new(); + for part in spec.split(',') { + let part = part.trim(); + let segments: Vec<&str> = part.splitn(4, ':').collect(); + if segments.len() >= 3 { + let tag = segments[0].to_string(); + let (path_segment, mode, field) = if segments.len() == 3 { + ( + tag.clone(), + segments[1].to_string(), + segments[2].to_string(), + ) + } else { + ( + segments[1].to_string(), + segments[2].to_string(), + segments[3].to_string(), + ) + }; + if !tag.is_empty() && !mode.is_empty() && !field.is_empty() { + rules.push(DecomposeRule { + tag, + path_segment, + mode, + field, + }); + } + } + } + rules } /// Parse --multi-level spec: "file_pattern:root_to_strip:unique_id_elements" @@ -51,6 +89,7 @@ fn parse_disassemble_args(args: &[String]) -> DisassembleOpts<'_> { let mut format = "xml"; let mut strategy = None; let mut multi_level = None; + let mut split_tags = None; let mut i = 0; while i < args.len() { @@ -106,6 +145,15 @@ fn parse_disassemble_args(args: &[String]) -> DisassembleOpts<'_> { multi_level = Some(args[i].clone()); i += 1; } + } else if arg.starts_with("--split-tags=") { + split_tags = Some(arg.trim_start_matches("--split-tags=").to_string()); + i += 1; + } else if arg == "--split-tags" || arg == "-p" { + i += 1; + if i < args.len() { + split_tags = Some(args[i].clone()); + i += 1; + } } else if arg.starts_with("--") { i += 1; } else if path.is_none() { @@ -125,6 +173,7 @@ fn parse_disassemble_args(args: &[String]) -> DisassembleOpts<'_> { format, strategy, multi_level, + split_tags, } } @@ -163,6 +212,7 @@ async fn main() -> Result<(), Box> { " --strategy - unique-id or grouped-by-tag (default: unique-id)" ); eprintln!(" --multi-level - Further disassemble matching files: file_pattern:root_to_strip:unique_id_elements"); + eprintln!(" -p, --split-tags - With grouped-by-tag: split/group nested tags (e.g. objectPermissions:split:object,fieldPermissions:group:field)"); eprintln!(" reassemble [extension] [--postpurge] - Reassemble directory (default extension: xml)"); eprintln!(" parse - Parse and rebuild XML (test)"); return Ok(()); @@ -175,6 +225,7 @@ async fn main() -> Result<(), Box> { "disassemble" => { let opts = parse_disassemble_args(&args[2..]); let path = opts.path.unwrap_or("."); + let strategy = opts.strategy.unwrap_or("unique-id"); let multi_level_rule = opts .multi_level .as_ref() @@ -184,17 +235,31 @@ async fn main() -> Result<(), Box> { "Invalid --multi-level spec; use file_pattern:root_to_strip:unique_id_elements" ); } + let decompose_rules: Vec = if strategy == "grouped-by-tag" { + opts.split_tags + .as_ref() + .map(|s| parse_decompose_spec(s)) + .unwrap_or_default() + } else { + Vec::new() + }; + let decompose_rules_ref = if decompose_rules.is_empty() { + None + } else { + Some(decompose_rules.as_slice()) + }; let mut handler = DisassembleXmlFileHandler::new(); handler .disassemble( path, opts.unique_id_elements, - opts.strategy.or(Some("unique-id")), + Some(strategy), opts.pre_purge, opts.post_purge, opts.ignore_path, opts.format, multi_level_rule.as_ref(), + decompose_rules_ref, ) .await?; } diff --git a/src/types.rs b/src/types.rs index e3f992f..15f2878 100644 --- a/src/types.rs +++ b/src/types.rs @@ -55,6 +55,20 @@ pub struct UnifiedParseResult { /// Map of tag name to array of elements. pub type XmlElementArrayMap = std::collections::HashMap>; +/// Rule for decomposing a nested tag when using grouped-by-tag strategy. +/// E.g. write each <objectPermissions> to its own file, or group <fieldPermissions> by object. +#[derive(Debug, Clone)] +pub struct DecomposeRule { + /// Element tag to decompose (e.g. "objectPermissions", "fieldPermissions"). + pub tag: String, + /// Subdirectory under disassembled path (defaults to tag if empty). + pub path_segment: String, + /// "split" = one file per array item (filename from field); "group" = group by field, one file per group. + pub mode: String, + /// Field name: for split, used for filename; for group, used to group items. + pub field: String, +} + /// Options for building disassembled files from a source file. #[derive(Debug, Clone)] pub struct BuildDisassembledFilesOptions<'a> { @@ -65,6 +79,8 @@ pub struct BuildDisassembledFilesOptions<'a> { pub format: &'a str, pub unique_id_elements: Option<&'a str>, pub strategy: &'a str, + /// When strategy is grouped-by-tag, optionally decompose specific tags (split or group by field). + pub decompose_rules: Option<&'a [DecomposeRule]>, } /// Parameters for writing leaf content. diff --git a/tests/disassemble_reassemble.rs b/tests/disassemble_reassemble.rs index 4477b51..29ad33e 100644 --- a/tests/disassemble_reassemble.rs +++ b/tests/disassemble_reassemble.rs @@ -2,7 +2,7 @@ //! the reassembled XML matches the original file contents (same as original TypeScript tests). use std::path::Path; -use xml_disassembler::{DisassembleXmlFileHandler, ReassembleXmlFileHandler}; +use xml_disassembler::{DecomposeRule, DisassembleXmlFileHandler, ReassembleXmlFileHandler}; #[tokio::test] async fn disassemble_then_reassemble_matches_original_xml() { @@ -37,6 +37,7 @@ async fn disassemble_then_reassemble_matches_original_xml() { ".xmldisassemblerignore", "xml", None, + None, ) .await .expect("disassemble"); @@ -94,6 +95,7 @@ async fn cdata_preserved_round_trip() { ".xmldisassemblerignore", "xml", None, + None, ) .await .expect("disassemble"); @@ -155,6 +157,7 @@ async fn comments_preserved_round_trip() { ".xmldisassemblerignore", "xml", None, + None, ) .await .expect("disassemble"); @@ -219,6 +222,7 @@ async fn deeply_nested_unique_id_elements_round_trip() { ".xmldisassemblerignore", "xml", None, + None, ) .await .expect("disassemble"); @@ -291,6 +295,7 @@ async fn multi_level_disassemble_then_reassemble_matches_original() { ".xmldisassemblerignore", "xml", Some(&rule), + None, ) .await .expect("disassemble"); @@ -316,3 +321,78 @@ async fn multi_level_disassemble_then_reassemble_matches_original() { "Reassembled XML must match original (multi-level round-trip)" ); } + +/// Grouped-by-tag with --split-tags: objectPermissions split by object, fieldPermissions grouped by object (from field). +/// Reassemble and compare to original fixture. +#[tokio::test] +async fn split_tags_disassemble_then_reassemble_matches_original() { + let _ = env_logger::try_init(); + + let fixture = "fixtures/split-tags/HR_Admin.permissionset-meta.xml"; + assert!( + Path::new(fixture).exists(), + "Fixture {} must exist (run from project root)", + fixture + ); + + let original_content = std::fs::read_to_string(fixture).expect("read original fixture"); + + let temp_dir = tempfile::tempdir().expect("temp dir"); + let base = temp_dir.path(); + let disassembled_dir = base.join("HR_Admin"); + + let source_in_temp = base.join("HR_Admin.permissionset-meta.xml"); + std::fs::copy(fixture, &source_in_temp).expect("copy fixture to temp"); + + let split_tags_rules = vec![ + DecomposeRule { + tag: "objectPermissions".to_string(), + path_segment: "objectPermissions".to_string(), + mode: "split".to_string(), + field: "object".to_string(), + }, + DecomposeRule { + tag: "fieldPermissions".to_string(), + path_segment: "fieldPermissions".to_string(), + mode: "group".to_string(), + field: "field".to_string(), + }, + ]; + + let mut disassemble = DisassembleXmlFileHandler::new(); + disassemble + .disassemble( + source_in_temp.to_str().unwrap(), + None, + Some("grouped-by-tag"), + false, + false, + ".xmldisassemblerignore", + "xml", + None, + Some(&split_tags_rules), + ) + .await + .expect("disassemble"); + + assert!( + disassembled_dir.exists(), + "Disassembled directory should exist" + ); + + let reassemble_handler = ReassembleXmlFileHandler::new(); + reassemble_handler + .reassemble(disassembled_dir.to_str().unwrap(), Some("xml"), false) + .await + .expect("reassemble"); + + let reassembled_path = base.join("HR_Admin.xml"); + assert!(reassembled_path.exists(), "Reassembled file should exist"); + + let reassembled_content = std::fs::read_to_string(&reassembled_path).expect("read reassembled"); + + assert_eq!( + original_content, reassembled_content, + "Reassembled XML must match original (split-tags round-trip)" + ); +}