Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ xml-disassembler parse <path>
| `--ignore-path <path>` | Path to the ignore file | .xmldisassemblerignore |
| `--format <fmt>` | Output format: xml, json, json5, yaml | xml |
| `--strategy <name>` | unique-id or grouped-by-tag | unique-id |
| `--multi-level <spec>` | Further disassemble matching files: `file_pattern:root_to_strip:unique_id_elements` | (none) |

#### Reassemble options

Expand Down Expand Up @@ -169,6 +170,30 @@ xml-disassembler disassemble ./my.xml --strategy grouped-by-tag --format yaml

Reassembly preserves element content and structure.

### Multi-level disassembly

For advanced use cases (e.g. Salesforce Loyalty Program Setup metadata), you can further disassemble specific output files by stripping a root element and re-running disassembly with different unique-id elements.

Use `--multi-level <spec>` where the spec is:

`file_pattern:root_to_strip:unique_id_elements`

- **file_pattern** – Match XML files whose name or path contains this (e.g. `programProcesses` or `programProcesses-meta`).
- **root_to_strip** – Element to strip/unwrap: if it is the root, its inner content becomes the new document; if it is a child (e.g. `programProcesses` under `LoyaltyProgramSetup`), it is unwrapped so its inner content becomes the root’s direct children.
- **unique_id_elements** – Comma-separated element names for the second-level disassembly (e.g. `parameterName,ruleName`).

Example (loyalty program): strip the child `programProcesses` in each process file so parameters/rules can be disassembled:

```bash
xml-disassembler disassemble ./Cloud_Kicks_Inner_Circle.loyaltyProgramSetup-meta.xml \
--unique-id-elements "fullName,name,processName" \
--multi-level "programProcesses:programProcesses:parameterName,ruleName"
```

A `.multi_level.json` config is written in the disassembly root so **reassemble** automatically does inner-level reassembly first, wraps files with the original root, then reassembles the top level. No extra flags are needed for reassembly.

**Caveat:** Multi-level reassembly removes disassembled directories after reassembling each level, even when you do not pass `--postpurge`. This is required so the next level can merge the reassembled XML files. Use version control (e.g. Git) to recover the tree if needed, or run reassembly only in a pipeline where these changes can be discarded.

## Ignore file

Exclude files or directories from disassembly using an ignore file (default: `.xmldisassemblerignore`). The Rust implementation uses the [ignore](https://crates.io/crates/ignore) crate with `.gitignore`-style syntax.
Expand Down
5,499 changes: 5,499 additions & 0 deletions fixtures/multi-level/Cloud_Kicks_Inner_Circle.loyaltyProgramSetup-meta.xml

Large diffs are not rendered by default.

148 changes: 146 additions & 2 deletions src/handlers/disassemble.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
//! Disassemble XML file handler.

use crate::builders::build_disassembled_files_unified;
use crate::types::BuildDisassembledFilesOptions;
use crate::multi_level::{
capture_xmlns_from_root, path_segment_from_file_pattern, save_multi_level_config,
strip_root_and_build_xml,
};
use crate::parsers::parse_xml;
use crate::types::{BuildDisassembledFilesOptions, MultiLevelRule};
use ignore::gitignore::GitignoreBuilder;
use std::path::Path;
use tokio::fs;
Expand Down Expand Up @@ -56,6 +61,7 @@ impl DisassembleXmlFileHandler {
post_purge: bool,
ignore_path: &str,
format: &str,
multi_level_rule: Option<&MultiLevelRule>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let strategy = strategy.unwrap_or("unique-id");
let strategy = if ["unique-id", "grouped-by-tag"].contains(&strategy) {
Expand Down Expand Up @@ -85,6 +91,7 @@ impl DisassembleXmlFileHandler {
pre_purge,
post_purge,
format,
multi_level_rule,
)
.await?;
} else if meta.is_dir() {
Expand All @@ -95,6 +102,7 @@ impl DisassembleXmlFileHandler {
pre_purge,
post_purge,
format,
multi_level_rule,
)
.await?;
}
Expand All @@ -112,6 +120,7 @@ impl DisassembleXmlFileHandler {
pre_purge: bool,
post_purge: bool,
format: &str,
multi_level_rule: Option<&MultiLevelRule>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let resolved = Path::new(file_path)
.canonicalize()
Expand Down Expand Up @@ -140,10 +149,12 @@ impl DisassembleXmlFileHandler {
pre_purge,
post_purge,
format,
multi_level_rule,
)
.await
}

#[allow(clippy::too_many_arguments)]
async fn handle_directory(
&self,
dir_path: &str,
Expand All @@ -152,6 +163,7 @@ impl DisassembleXmlFileHandler {
pre_purge: bool,
post_purge: bool,
format: &str,
multi_level_rule: Option<&MultiLevelRule>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let mut entries = fs::read_dir(dir_path).await?;
let cwd = std::env::current_dir().unwrap_or_else(|_| Path::new(".").to_path_buf());
Expand All @@ -177,6 +189,7 @@ impl DisassembleXmlFileHandler {
pre_purge,
post_purge,
format,
multi_level_rule,
)
.await?;
}
Expand All @@ -195,6 +208,7 @@ impl DisassembleXmlFileHandler {
pre_purge: bool,
post_purge: bool,
format: &str,
multi_level_rule: Option<&MultiLevelRule>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
log::debug!("Parsing file to disassemble: {}", file_path);

Expand All @@ -218,7 +232,137 @@ impl DisassembleXmlFileHandler {
unique_id_elements,
strategy,
})
.await
.await?;

if let Some(rule) = multi_level_rule {
self.recursively_disassemble_multi_level(&output_path, rule, format)
.await?;
}

Ok(())
}

/// Recursively walk the disassembly output; for XML files matching the rule's file_pattern,
/// strip the root and re-disassemble with the rule's unique_id_elements.
async fn recursively_disassemble_multi_level(
&self,
dir_path: &Path,
rule: &MultiLevelRule,
format: &str,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let mut config = crate::multi_level::load_multi_level_config(dir_path)
.await
.unwrap_or_default();

let mut stack = vec![dir_path.to_path_buf()];
while let Some(current) = stack.pop() {
let mut entries = Vec::new();
let mut read_dir = fs::read_dir(&current).await?;
while let Some(entry) = read_dir.next_entry().await? {
entries.push(entry);
}

for entry in entries {
let path = entry.path();
let path_str = path.to_string_lossy().to_string();

if path.is_dir() {
stack.push(path);
} else if path.is_file() {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
let path_str_check = path.to_string_lossy();
if !name.ends_with(".xml")
|| (!name.contains(&rule.file_pattern)
&& !path_str_check.contains(&rule.file_pattern))
{
continue;
}

let parsed = match parse_xml(&path_str).await {
Some(p) => p,
None => continue,
};
let has_element_to_strip = parsed
.as_object()
.and_then(|o| {
let root_key = o.keys().find(|k| *k != "?xml")?;
let root_val = o.get(root_key)?.as_object()?;
Some(
root_key == &rule.root_to_strip
|| root_val.contains_key(&rule.root_to_strip),
)
})
.unwrap_or(false);
if !has_element_to_strip {
continue;
}

let wrap_xmlns = capture_xmlns_from_root(&parsed).unwrap_or_default();

let stripped_xml = match strip_root_and_build_xml(&parsed, &rule.root_to_strip)
{
Some(xml) => xml,
None => continue,
};

fs::write(&path, stripped_xml).await?;

let file_stem = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("output");
let output_dir_name = file_stem.split('.').next().unwrap_or(file_stem);
let parent = path.parent().unwrap_or(dir_path);
let second_level_output = parent.join(output_dir_name);

build_disassembled_files_unified(BuildDisassembledFilesOptions {
file_path: &path_str,
disassembled_path: second_level_output.to_str().unwrap_or("."),
base_name: output_dir_name,
post_purge: true,
format,
unique_id_elements: Some(&rule.unique_id_elements),
strategy: "unique-id",
})
.await?;

if config.rules.is_empty() {
let wrap_root = parsed
.as_object()
.and_then(|o| o.keys().find(|k| *k != "?xml").cloned())
.unwrap_or_else(|| rule.wrap_root_element.clone());
config.rules.push(MultiLevelRule {
file_pattern: rule.file_pattern.clone(),
root_to_strip: rule.root_to_strip.clone(),
unique_id_elements: rule.unique_id_elements.clone(),
path_segment: if rule.path_segment.is_empty() {
path_segment_from_file_pattern(&rule.file_pattern)
} else {
rule.path_segment.clone()
},
// Persist document root (e.g. LoyaltyProgramSetup) so reassembly uses it as root with xmlns;
// path_segment (e.g. programProcesses) is the inner wrapper in each file.
wrap_root_element: wrap_root,
wrap_xmlns: if rule.wrap_xmlns.is_empty() {
wrap_xmlns
} else {
rule.wrap_xmlns.clone()
},
});
} else if let Some(r) = config.rules.first_mut() {
if r.wrap_xmlns.is_empty() {
r.wrap_xmlns = wrap_xmlns;
}
}
}
}
}

if !config.rules.is_empty() {
save_multi_level_config(dir_path, &config).await?;
}

Ok(())
}
}

Expand Down
Loading